1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/moduleparam.h> 3 4 #include "x86_ops.h" 5 #include "vmx.h" 6 #include "mmu.h" 7 #include "nested.h" 8 #include "pmu.h" 9 #include "posted_intr.h" 10 #include "tdx.h" 11 #include "tdx_arch.h" 12 13 #ifdef CONFIG_KVM_INTEL_TDX 14 static_assert(offsetof(struct vcpu_vmx, vt) == offsetof(struct vcpu_tdx, vt)); 15 16 static void vt_disable_virtualization_cpu(void) 17 { 18 /* Note, TDX *and* VMX need to be disabled if TDX is enabled. */ 19 if (enable_tdx) 20 tdx_disable_virtualization_cpu(); 21 vmx_disable_virtualization_cpu(); 22 } 23 24 static __init int vt_hardware_setup(void) 25 { 26 int ret; 27 28 ret = vmx_hardware_setup(); 29 if (ret) 30 return ret; 31 32 /* 33 * Update vt_x86_ops::vm_size here so it is ready before 34 * kvm_ops_update() is called in kvm_x86_vendor_init(). 35 * 36 * Note, the actual bringing up of TDX must be done after 37 * kvm_ops_update() because enabling TDX requires enabling 38 * hardware virtualization first, i.e., all online CPUs must 39 * be in post-VMXON state. This means the @vm_size here 40 * may be updated to TDX's size but TDX may fail to enable 41 * at later time. 42 * 43 * The VMX/VT code could update kvm_x86_ops::vm_size again 44 * after bringing up TDX, but this would require exporting 45 * either kvm_x86_ops or kvm_ops_update() from the base KVM 46 * module, which looks overkill. Anyway, the worst case here 47 * is KVM may allocate couple of more bytes than needed for 48 * each VM. 49 */ 50 if (enable_tdx) { 51 vt_x86_ops.vm_size = max_t(unsigned int, vt_x86_ops.vm_size, 52 sizeof(struct kvm_tdx)); 53 /* 54 * Note, TDX may fail to initialize in a later time in 55 * vt_init(), in which case it is not necessary to setup 56 * those callbacks. But making them valid here even 57 * when TDX fails to init later is fine because those 58 * callbacks won't be called if the VM isn't TDX guest. 59 */ 60 vt_x86_ops.link_external_spt = tdx_sept_link_private_spt; 61 vt_x86_ops.set_external_spte = tdx_sept_set_private_spte; 62 vt_x86_ops.free_external_spt = tdx_sept_free_private_spt; 63 vt_x86_ops.remove_external_spte = tdx_sept_remove_private_spte; 64 vt_x86_ops.protected_apic_has_interrupt = tdx_protected_apic_has_interrupt; 65 } 66 67 return 0; 68 } 69 70 static int vt_vm_init(struct kvm *kvm) 71 { 72 if (is_td(kvm)) 73 return tdx_vm_init(kvm); 74 75 return vmx_vm_init(kvm); 76 } 77 78 static void vt_vm_pre_destroy(struct kvm *kvm) 79 { 80 if (is_td(kvm)) 81 return tdx_mmu_release_hkid(kvm); 82 } 83 84 static void vt_vm_destroy(struct kvm *kvm) 85 { 86 if (is_td(kvm)) 87 return tdx_vm_destroy(kvm); 88 89 vmx_vm_destroy(kvm); 90 } 91 92 static int vt_vcpu_precreate(struct kvm *kvm) 93 { 94 if (is_td(kvm)) 95 return 0; 96 97 return vmx_vcpu_precreate(kvm); 98 } 99 100 static int vt_vcpu_create(struct kvm_vcpu *vcpu) 101 { 102 if (is_td_vcpu(vcpu)) 103 return tdx_vcpu_create(vcpu); 104 105 return vmx_vcpu_create(vcpu); 106 } 107 108 static void vt_vcpu_free(struct kvm_vcpu *vcpu) 109 { 110 if (is_td_vcpu(vcpu)) { 111 tdx_vcpu_free(vcpu); 112 return; 113 } 114 115 vmx_vcpu_free(vcpu); 116 } 117 118 static void vt_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) 119 { 120 if (is_td_vcpu(vcpu)) { 121 tdx_vcpu_reset(vcpu, init_event); 122 return; 123 } 124 125 vmx_vcpu_reset(vcpu, init_event); 126 } 127 128 static void vt_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 129 { 130 if (is_td_vcpu(vcpu)) { 131 tdx_vcpu_load(vcpu, cpu); 132 return; 133 } 134 135 vmx_vcpu_load(vcpu, cpu); 136 } 137 138 static void vt_update_cpu_dirty_logging(struct kvm_vcpu *vcpu) 139 { 140 /* 141 * Basic TDX does not support feature PML. KVM does not enable PML in 142 * TD's VMCS, nor does it allocate or flush PML buffer for TDX. 143 */ 144 if (WARN_ON_ONCE(is_td_vcpu(vcpu))) 145 return; 146 147 vmx_update_cpu_dirty_logging(vcpu); 148 } 149 150 static void vt_prepare_switch_to_guest(struct kvm_vcpu *vcpu) 151 { 152 if (is_td_vcpu(vcpu)) { 153 tdx_prepare_switch_to_guest(vcpu); 154 return; 155 } 156 157 vmx_prepare_switch_to_guest(vcpu); 158 } 159 160 static void vt_vcpu_put(struct kvm_vcpu *vcpu) 161 { 162 if (is_td_vcpu(vcpu)) { 163 tdx_vcpu_put(vcpu); 164 return; 165 } 166 167 vmx_vcpu_put(vcpu); 168 } 169 170 static int vt_vcpu_pre_run(struct kvm_vcpu *vcpu) 171 { 172 if (is_td_vcpu(vcpu)) 173 return tdx_vcpu_pre_run(vcpu); 174 175 return vmx_vcpu_pre_run(vcpu); 176 } 177 178 static fastpath_t vt_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit) 179 { 180 if (is_td_vcpu(vcpu)) 181 return tdx_vcpu_run(vcpu, force_immediate_exit); 182 183 return vmx_vcpu_run(vcpu, force_immediate_exit); 184 } 185 186 static int vt_handle_exit(struct kvm_vcpu *vcpu, 187 enum exit_fastpath_completion fastpath) 188 { 189 if (is_td_vcpu(vcpu)) 190 return tdx_handle_exit(vcpu, fastpath); 191 192 return vmx_handle_exit(vcpu, fastpath); 193 } 194 195 static int vt_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) 196 { 197 if (unlikely(is_td_vcpu(vcpu))) 198 return tdx_set_msr(vcpu, msr_info); 199 200 return vmx_set_msr(vcpu, msr_info); 201 } 202 203 /* 204 * The kvm parameter can be NULL (module initialization, or invocation before 205 * VM creation). Be sure to check the kvm parameter before using it. 206 */ 207 static bool vt_has_emulated_msr(struct kvm *kvm, u32 index) 208 { 209 if (kvm && is_td(kvm)) 210 return tdx_has_emulated_msr(index); 211 212 return vmx_has_emulated_msr(kvm, index); 213 } 214 215 static int vt_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) 216 { 217 if (unlikely(is_td_vcpu(vcpu))) 218 return tdx_get_msr(vcpu, msr_info); 219 220 return vmx_get_msr(vcpu, msr_info); 221 } 222 223 static void vt_msr_filter_changed(struct kvm_vcpu *vcpu) 224 { 225 /* 226 * TDX doesn't allow VMM to configure interception of MSR accesses. 227 * TDX guest requests MSR accesses by calling TDVMCALL. The MSR 228 * filters will be applied when handling the TDVMCALL for RDMSR/WRMSR 229 * if the userspace has set any. 230 */ 231 if (is_td_vcpu(vcpu)) 232 return; 233 234 vmx_msr_filter_changed(vcpu); 235 } 236 237 static int vt_complete_emulated_msr(struct kvm_vcpu *vcpu, int err) 238 { 239 if (is_td_vcpu(vcpu)) 240 return tdx_complete_emulated_msr(vcpu, err); 241 242 return vmx_complete_emulated_msr(vcpu, err); 243 } 244 245 #ifdef CONFIG_KVM_SMM 246 static int vt_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection) 247 { 248 if (KVM_BUG_ON(is_td_vcpu(vcpu), vcpu->kvm)) 249 return 0; 250 251 return vmx_smi_allowed(vcpu, for_injection); 252 } 253 254 static int vt_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram) 255 { 256 if (KVM_BUG_ON(is_td_vcpu(vcpu), vcpu->kvm)) 257 return 0; 258 259 return vmx_enter_smm(vcpu, smram); 260 } 261 262 static int vt_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram) 263 { 264 if (KVM_BUG_ON(is_td_vcpu(vcpu), vcpu->kvm)) 265 return 0; 266 267 return vmx_leave_smm(vcpu, smram); 268 } 269 270 static void vt_enable_smi_window(struct kvm_vcpu *vcpu) 271 { 272 if (KVM_BUG_ON(is_td_vcpu(vcpu), vcpu->kvm)) 273 return; 274 275 /* RSM will cause a vmexit anyway. */ 276 vmx_enable_smi_window(vcpu); 277 } 278 #endif 279 280 static int vt_check_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type, 281 void *insn, int insn_len) 282 { 283 /* 284 * For TDX, this can only be triggered for MMIO emulation. Let the 285 * guest retry after installing the SPTE with suppress #VE bit cleared, 286 * so that the guest will receive #VE when retry. The guest is expected 287 * to call TDG.VP.VMCALL<MMIO> to request VMM to do MMIO emulation on 288 * #VE. 289 */ 290 if (is_td_vcpu(vcpu)) 291 return X86EMUL_RETRY_INSTR; 292 293 return vmx_check_emulate_instruction(vcpu, emul_type, insn, insn_len); 294 } 295 296 static bool vt_apic_init_signal_blocked(struct kvm_vcpu *vcpu) 297 { 298 /* 299 * INIT and SIPI are always blocked for TDX, i.e., INIT handling and 300 * the OP vcpu_deliver_sipi_vector() won't be called. 301 */ 302 if (is_td_vcpu(vcpu)) 303 return true; 304 305 return vmx_apic_init_signal_blocked(vcpu); 306 } 307 308 static void vt_set_virtual_apic_mode(struct kvm_vcpu *vcpu) 309 { 310 /* Only x2APIC mode is supported for TD. */ 311 if (is_td_vcpu(vcpu)) 312 return; 313 314 return vmx_set_virtual_apic_mode(vcpu); 315 } 316 317 static void vt_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr) 318 { 319 if (is_td_vcpu(vcpu)) 320 return; 321 322 return vmx_hwapic_isr_update(vcpu, max_isr); 323 } 324 325 static int vt_sync_pir_to_irr(struct kvm_vcpu *vcpu) 326 { 327 if (is_td_vcpu(vcpu)) 328 return -1; 329 330 return vmx_sync_pir_to_irr(vcpu); 331 } 332 333 static void vt_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode, 334 int trig_mode, int vector) 335 { 336 if (is_td_vcpu(apic->vcpu)) { 337 tdx_deliver_interrupt(apic, delivery_mode, trig_mode, 338 vector); 339 return; 340 } 341 342 vmx_deliver_interrupt(apic, delivery_mode, trig_mode, vector); 343 } 344 345 static void vt_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) 346 { 347 if (is_td_vcpu(vcpu)) 348 return; 349 350 vmx_vcpu_after_set_cpuid(vcpu); 351 } 352 353 static void vt_update_exception_bitmap(struct kvm_vcpu *vcpu) 354 { 355 if (is_td_vcpu(vcpu)) 356 return; 357 358 vmx_update_exception_bitmap(vcpu); 359 } 360 361 static u64 vt_get_segment_base(struct kvm_vcpu *vcpu, int seg) 362 { 363 if (is_td_vcpu(vcpu)) 364 return 0; 365 366 return vmx_get_segment_base(vcpu, seg); 367 } 368 369 static void vt_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, 370 int seg) 371 { 372 if (is_td_vcpu(vcpu)) { 373 memset(var, 0, sizeof(*var)); 374 return; 375 } 376 377 vmx_get_segment(vcpu, var, seg); 378 } 379 380 static void vt_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, 381 int seg) 382 { 383 if (is_td_vcpu(vcpu)) 384 return; 385 386 vmx_set_segment(vcpu, var, seg); 387 } 388 389 static int vt_get_cpl(struct kvm_vcpu *vcpu) 390 { 391 if (is_td_vcpu(vcpu)) 392 return 0; 393 394 return vmx_get_cpl(vcpu); 395 } 396 397 static int vt_get_cpl_no_cache(struct kvm_vcpu *vcpu) 398 { 399 if (is_td_vcpu(vcpu)) 400 return 0; 401 402 return vmx_get_cpl_no_cache(vcpu); 403 } 404 405 static void vt_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) 406 { 407 if (is_td_vcpu(vcpu)) { 408 *db = 0; 409 *l = 0; 410 return; 411 } 412 413 vmx_get_cs_db_l_bits(vcpu, db, l); 414 } 415 416 static bool vt_is_valid_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) 417 { 418 if (is_td_vcpu(vcpu)) 419 return true; 420 421 return vmx_is_valid_cr0(vcpu, cr0); 422 } 423 424 static void vt_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) 425 { 426 if (is_td_vcpu(vcpu)) 427 return; 428 429 vmx_set_cr0(vcpu, cr0); 430 } 431 432 static bool vt_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) 433 { 434 if (is_td_vcpu(vcpu)) 435 return true; 436 437 return vmx_is_valid_cr4(vcpu, cr4); 438 } 439 440 static void vt_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) 441 { 442 if (is_td_vcpu(vcpu)) 443 return; 444 445 vmx_set_cr4(vcpu, cr4); 446 } 447 448 static int vt_set_efer(struct kvm_vcpu *vcpu, u64 efer) 449 { 450 if (is_td_vcpu(vcpu)) 451 return 0; 452 453 return vmx_set_efer(vcpu, efer); 454 } 455 456 static void vt_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) 457 { 458 if (is_td_vcpu(vcpu)) { 459 memset(dt, 0, sizeof(*dt)); 460 return; 461 } 462 463 vmx_get_idt(vcpu, dt); 464 } 465 466 static void vt_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) 467 { 468 if (is_td_vcpu(vcpu)) 469 return; 470 471 vmx_set_idt(vcpu, dt); 472 } 473 474 static void vt_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) 475 { 476 if (is_td_vcpu(vcpu)) { 477 memset(dt, 0, sizeof(*dt)); 478 return; 479 } 480 481 vmx_get_gdt(vcpu, dt); 482 } 483 484 static void vt_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) 485 { 486 if (is_td_vcpu(vcpu)) 487 return; 488 489 vmx_set_gdt(vcpu, dt); 490 } 491 492 static void vt_set_dr6(struct kvm_vcpu *vcpu, unsigned long val) 493 { 494 if (is_td_vcpu(vcpu)) 495 return; 496 497 vmx_set_dr6(vcpu, val); 498 } 499 500 static void vt_set_dr7(struct kvm_vcpu *vcpu, unsigned long val) 501 { 502 if (is_td_vcpu(vcpu)) 503 return; 504 505 vmx_set_dr7(vcpu, val); 506 } 507 508 static void vt_sync_dirty_debug_regs(struct kvm_vcpu *vcpu) 509 { 510 /* 511 * MOV-DR exiting is always cleared for TD guest, even in debug mode. 512 * Thus KVM_DEBUGREG_WONT_EXIT can never be set and it should never 513 * reach here for TD vcpu. 514 */ 515 if (is_td_vcpu(vcpu)) 516 return; 517 518 vmx_sync_dirty_debug_regs(vcpu); 519 } 520 521 static void vt_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) 522 { 523 if (WARN_ON_ONCE(is_td_vcpu(vcpu))) 524 return; 525 526 vmx_cache_reg(vcpu, reg); 527 } 528 529 static unsigned long vt_get_rflags(struct kvm_vcpu *vcpu) 530 { 531 if (is_td_vcpu(vcpu)) 532 return 0; 533 534 return vmx_get_rflags(vcpu); 535 } 536 537 static void vt_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) 538 { 539 if (is_td_vcpu(vcpu)) 540 return; 541 542 vmx_set_rflags(vcpu, rflags); 543 } 544 545 static bool vt_get_if_flag(struct kvm_vcpu *vcpu) 546 { 547 if (is_td_vcpu(vcpu)) 548 return false; 549 550 return vmx_get_if_flag(vcpu); 551 } 552 553 static void vt_flush_tlb_all(struct kvm_vcpu *vcpu) 554 { 555 if (is_td_vcpu(vcpu)) { 556 tdx_flush_tlb_all(vcpu); 557 return; 558 } 559 560 vmx_flush_tlb_all(vcpu); 561 } 562 563 static void vt_flush_tlb_current(struct kvm_vcpu *vcpu) 564 { 565 if (is_td_vcpu(vcpu)) { 566 tdx_flush_tlb_current(vcpu); 567 return; 568 } 569 570 vmx_flush_tlb_current(vcpu); 571 } 572 573 static void vt_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t addr) 574 { 575 if (is_td_vcpu(vcpu)) 576 return; 577 578 vmx_flush_tlb_gva(vcpu, addr); 579 } 580 581 static void vt_flush_tlb_guest(struct kvm_vcpu *vcpu) 582 { 583 if (is_td_vcpu(vcpu)) 584 return; 585 586 vmx_flush_tlb_guest(vcpu); 587 } 588 589 static void vt_inject_nmi(struct kvm_vcpu *vcpu) 590 { 591 if (is_td_vcpu(vcpu)) { 592 tdx_inject_nmi(vcpu); 593 return; 594 } 595 596 vmx_inject_nmi(vcpu); 597 } 598 599 static int vt_nmi_allowed(struct kvm_vcpu *vcpu, bool for_injection) 600 { 601 /* 602 * The TDX module manages NMI windows and NMI reinjection, and hides NMI 603 * blocking, all KVM can do is throw an NMI over the wall. 604 */ 605 if (is_td_vcpu(vcpu)) 606 return true; 607 608 return vmx_nmi_allowed(vcpu, for_injection); 609 } 610 611 static bool vt_get_nmi_mask(struct kvm_vcpu *vcpu) 612 { 613 /* 614 * KVM can't get NMI blocking status for TDX guest, assume NMIs are 615 * always unmasked. 616 */ 617 if (is_td_vcpu(vcpu)) 618 return false; 619 620 return vmx_get_nmi_mask(vcpu); 621 } 622 623 static void vt_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) 624 { 625 if (is_td_vcpu(vcpu)) 626 return; 627 628 vmx_set_nmi_mask(vcpu, masked); 629 } 630 631 static void vt_enable_nmi_window(struct kvm_vcpu *vcpu) 632 { 633 /* Refer to the comments in tdx_inject_nmi(). */ 634 if (is_td_vcpu(vcpu)) 635 return; 636 637 vmx_enable_nmi_window(vcpu); 638 } 639 640 static void vt_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa, 641 int pgd_level) 642 { 643 if (is_td_vcpu(vcpu)) { 644 tdx_load_mmu_pgd(vcpu, root_hpa, pgd_level); 645 return; 646 } 647 648 vmx_load_mmu_pgd(vcpu, root_hpa, pgd_level); 649 } 650 651 static void vt_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) 652 { 653 if (is_td_vcpu(vcpu)) 654 return; 655 656 vmx_set_interrupt_shadow(vcpu, mask); 657 } 658 659 static u32 vt_get_interrupt_shadow(struct kvm_vcpu *vcpu) 660 { 661 if (is_td_vcpu(vcpu)) 662 return 0; 663 664 return vmx_get_interrupt_shadow(vcpu); 665 } 666 667 static void vt_patch_hypercall(struct kvm_vcpu *vcpu, 668 unsigned char *hypercall) 669 { 670 /* 671 * Because guest memory is protected, guest can't be patched. TD kernel 672 * is modified to use TDG.VP.VMCALL for hypercall. 673 */ 674 if (is_td_vcpu(vcpu)) 675 return; 676 677 vmx_patch_hypercall(vcpu, hypercall); 678 } 679 680 static void vt_inject_irq(struct kvm_vcpu *vcpu, bool reinjected) 681 { 682 if (is_td_vcpu(vcpu)) 683 return; 684 685 vmx_inject_irq(vcpu, reinjected); 686 } 687 688 static void vt_inject_exception(struct kvm_vcpu *vcpu) 689 { 690 if (is_td_vcpu(vcpu)) 691 return; 692 693 vmx_inject_exception(vcpu); 694 } 695 696 static void vt_cancel_injection(struct kvm_vcpu *vcpu) 697 { 698 if (is_td_vcpu(vcpu)) 699 return; 700 701 vmx_cancel_injection(vcpu); 702 } 703 704 static int vt_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection) 705 { 706 if (is_td_vcpu(vcpu)) 707 return tdx_interrupt_allowed(vcpu); 708 709 return vmx_interrupt_allowed(vcpu, for_injection); 710 } 711 712 static void vt_enable_irq_window(struct kvm_vcpu *vcpu) 713 { 714 if (is_td_vcpu(vcpu)) 715 return; 716 717 vmx_enable_irq_window(vcpu); 718 } 719 720 static void vt_get_entry_info(struct kvm_vcpu *vcpu, u32 *intr_info, u32 *error_code) 721 { 722 *intr_info = 0; 723 *error_code = 0; 724 725 if (is_td_vcpu(vcpu)) 726 return; 727 728 vmx_get_entry_info(vcpu, intr_info, error_code); 729 } 730 731 static void vt_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason, 732 u64 *info1, u64 *info2, u32 *intr_info, u32 *error_code) 733 { 734 if (is_td_vcpu(vcpu)) { 735 tdx_get_exit_info(vcpu, reason, info1, info2, intr_info, 736 error_code); 737 return; 738 } 739 740 vmx_get_exit_info(vcpu, reason, info1, info2, intr_info, error_code); 741 } 742 743 static void vt_update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) 744 { 745 if (is_td_vcpu(vcpu)) 746 return; 747 748 vmx_update_cr8_intercept(vcpu, tpr, irr); 749 } 750 751 static void vt_set_apic_access_page_addr(struct kvm_vcpu *vcpu) 752 { 753 if (is_td_vcpu(vcpu)) 754 return; 755 756 vmx_set_apic_access_page_addr(vcpu); 757 } 758 759 static void vt_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) 760 { 761 if (is_td_vcpu(vcpu)) { 762 KVM_BUG_ON(!kvm_vcpu_apicv_active(vcpu), vcpu->kvm); 763 return; 764 } 765 766 vmx_refresh_apicv_exec_ctrl(vcpu); 767 } 768 769 static void vt_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) 770 { 771 if (is_td_vcpu(vcpu)) 772 return; 773 774 vmx_load_eoi_exitmap(vcpu, eoi_exit_bitmap); 775 } 776 777 static int vt_set_tss_addr(struct kvm *kvm, unsigned int addr) 778 { 779 if (is_td(kvm)) 780 return 0; 781 782 return vmx_set_tss_addr(kvm, addr); 783 } 784 785 static int vt_set_identity_map_addr(struct kvm *kvm, u64 ident_addr) 786 { 787 if (is_td(kvm)) 788 return 0; 789 790 return vmx_set_identity_map_addr(kvm, ident_addr); 791 } 792 793 static u64 vt_get_l2_tsc_offset(struct kvm_vcpu *vcpu) 794 { 795 /* TDX doesn't support L2 guest at the moment. */ 796 if (is_td_vcpu(vcpu)) 797 return 0; 798 799 return vmx_get_l2_tsc_offset(vcpu); 800 } 801 802 static u64 vt_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu) 803 { 804 /* TDX doesn't support L2 guest at the moment. */ 805 if (is_td_vcpu(vcpu)) 806 return 0; 807 808 return vmx_get_l2_tsc_multiplier(vcpu); 809 } 810 811 static void vt_write_tsc_offset(struct kvm_vcpu *vcpu) 812 { 813 /* In TDX, tsc offset can't be changed. */ 814 if (is_td_vcpu(vcpu)) 815 return; 816 817 vmx_write_tsc_offset(vcpu); 818 } 819 820 static void vt_write_tsc_multiplier(struct kvm_vcpu *vcpu) 821 { 822 /* In TDX, tsc multiplier can't be changed. */ 823 if (is_td_vcpu(vcpu)) 824 return; 825 826 vmx_write_tsc_multiplier(vcpu); 827 } 828 829 #ifdef CONFIG_X86_64 830 static int vt_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc, 831 bool *expired) 832 { 833 /* VMX-preemption timer isn't available for TDX. */ 834 if (is_td_vcpu(vcpu)) 835 return -EINVAL; 836 837 return vmx_set_hv_timer(vcpu, guest_deadline_tsc, expired); 838 } 839 840 static void vt_cancel_hv_timer(struct kvm_vcpu *vcpu) 841 { 842 /* VMX-preemption timer can't be set. See vt_set_hv_timer(). */ 843 if (is_td_vcpu(vcpu)) 844 return; 845 846 vmx_cancel_hv_timer(vcpu); 847 } 848 #endif 849 850 static void vt_setup_mce(struct kvm_vcpu *vcpu) 851 { 852 if (is_td_vcpu(vcpu)) 853 return; 854 855 vmx_setup_mce(vcpu); 856 } 857 858 static int vt_mem_enc_ioctl(struct kvm *kvm, void __user *argp) 859 { 860 if (!is_td(kvm)) 861 return -ENOTTY; 862 863 return tdx_vm_ioctl(kvm, argp); 864 } 865 866 static int vt_vcpu_mem_enc_ioctl(struct kvm_vcpu *vcpu, void __user *argp) 867 { 868 if (!is_td_vcpu(vcpu)) 869 return -EINVAL; 870 871 return tdx_vcpu_ioctl(vcpu, argp); 872 } 873 874 static int vt_gmem_private_max_mapping_level(struct kvm *kvm, kvm_pfn_t pfn) 875 { 876 if (is_td(kvm)) 877 return tdx_gmem_private_max_mapping_level(kvm, pfn); 878 879 return 0; 880 } 881 882 #define vt_op(name) vt_##name 883 #define vt_op_tdx_only(name) vt_##name 884 #else /* CONFIG_KVM_INTEL_TDX */ 885 #define vt_op(name) vmx_##name 886 #define vt_op_tdx_only(name) NULL 887 #endif /* CONFIG_KVM_INTEL_TDX */ 888 889 #define VMX_REQUIRED_APICV_INHIBITS \ 890 (BIT(APICV_INHIBIT_REASON_DISABLED) | \ 891 BIT(APICV_INHIBIT_REASON_ABSENT) | \ 892 BIT(APICV_INHIBIT_REASON_HYPERV) | \ 893 BIT(APICV_INHIBIT_REASON_BLOCKIRQ) | \ 894 BIT(APICV_INHIBIT_REASON_PHYSICAL_ID_ALIASED) | \ 895 BIT(APICV_INHIBIT_REASON_APIC_ID_MODIFIED) | \ 896 BIT(APICV_INHIBIT_REASON_APIC_BASE_MODIFIED)) 897 898 struct kvm_x86_ops vt_x86_ops __initdata = { 899 .name = KBUILD_MODNAME, 900 901 .check_processor_compatibility = vmx_check_processor_compat, 902 903 .hardware_unsetup = vmx_hardware_unsetup, 904 905 .enable_virtualization_cpu = vmx_enable_virtualization_cpu, 906 .disable_virtualization_cpu = vt_op(disable_virtualization_cpu), 907 .emergency_disable_virtualization_cpu = vmx_emergency_disable_virtualization_cpu, 908 909 .has_emulated_msr = vt_op(has_emulated_msr), 910 911 .vm_size = sizeof(struct kvm_vmx), 912 913 .vm_init = vt_op(vm_init), 914 .vm_destroy = vt_op(vm_destroy), 915 .vm_pre_destroy = vt_op_tdx_only(vm_pre_destroy), 916 917 .vcpu_precreate = vt_op(vcpu_precreate), 918 .vcpu_create = vt_op(vcpu_create), 919 .vcpu_free = vt_op(vcpu_free), 920 .vcpu_reset = vt_op(vcpu_reset), 921 922 .prepare_switch_to_guest = vt_op(prepare_switch_to_guest), 923 .vcpu_load = vt_op(vcpu_load), 924 .vcpu_put = vt_op(vcpu_put), 925 926 .update_exception_bitmap = vt_op(update_exception_bitmap), 927 .get_feature_msr = vmx_get_feature_msr, 928 .get_msr = vt_op(get_msr), 929 .set_msr = vt_op(set_msr), 930 931 .get_segment_base = vt_op(get_segment_base), 932 .get_segment = vt_op(get_segment), 933 .set_segment = vt_op(set_segment), 934 .get_cpl = vt_op(get_cpl), 935 .get_cpl_no_cache = vt_op(get_cpl_no_cache), 936 .get_cs_db_l_bits = vt_op(get_cs_db_l_bits), 937 .is_valid_cr0 = vt_op(is_valid_cr0), 938 .set_cr0 = vt_op(set_cr0), 939 .is_valid_cr4 = vt_op(is_valid_cr4), 940 .set_cr4 = vt_op(set_cr4), 941 .set_efer = vt_op(set_efer), 942 .get_idt = vt_op(get_idt), 943 .set_idt = vt_op(set_idt), 944 .get_gdt = vt_op(get_gdt), 945 .set_gdt = vt_op(set_gdt), 946 .set_dr6 = vt_op(set_dr6), 947 .set_dr7 = vt_op(set_dr7), 948 .sync_dirty_debug_regs = vt_op(sync_dirty_debug_regs), 949 .cache_reg = vt_op(cache_reg), 950 .get_rflags = vt_op(get_rflags), 951 .set_rflags = vt_op(set_rflags), 952 .get_if_flag = vt_op(get_if_flag), 953 954 .flush_tlb_all = vt_op(flush_tlb_all), 955 .flush_tlb_current = vt_op(flush_tlb_current), 956 .flush_tlb_gva = vt_op(flush_tlb_gva), 957 .flush_tlb_guest = vt_op(flush_tlb_guest), 958 959 .vcpu_pre_run = vt_op(vcpu_pre_run), 960 .vcpu_run = vt_op(vcpu_run), 961 .handle_exit = vt_op(handle_exit), 962 .skip_emulated_instruction = vmx_skip_emulated_instruction, 963 .update_emulated_instruction = vmx_update_emulated_instruction, 964 .set_interrupt_shadow = vt_op(set_interrupt_shadow), 965 .get_interrupt_shadow = vt_op(get_interrupt_shadow), 966 .patch_hypercall = vt_op(patch_hypercall), 967 .inject_irq = vt_op(inject_irq), 968 .inject_nmi = vt_op(inject_nmi), 969 .inject_exception = vt_op(inject_exception), 970 .cancel_injection = vt_op(cancel_injection), 971 .interrupt_allowed = vt_op(interrupt_allowed), 972 .nmi_allowed = vt_op(nmi_allowed), 973 .get_nmi_mask = vt_op(get_nmi_mask), 974 .set_nmi_mask = vt_op(set_nmi_mask), 975 .enable_nmi_window = vt_op(enable_nmi_window), 976 .enable_irq_window = vt_op(enable_irq_window), 977 .update_cr8_intercept = vt_op(update_cr8_intercept), 978 979 .x2apic_icr_is_split = false, 980 .set_virtual_apic_mode = vt_op(set_virtual_apic_mode), 981 .set_apic_access_page_addr = vt_op(set_apic_access_page_addr), 982 .refresh_apicv_exec_ctrl = vt_op(refresh_apicv_exec_ctrl), 983 .load_eoi_exitmap = vt_op(load_eoi_exitmap), 984 .apicv_pre_state_restore = pi_apicv_pre_state_restore, 985 .required_apicv_inhibits = VMX_REQUIRED_APICV_INHIBITS, 986 .hwapic_isr_update = vt_op(hwapic_isr_update), 987 .sync_pir_to_irr = vt_op(sync_pir_to_irr), 988 .deliver_interrupt = vt_op(deliver_interrupt), 989 .dy_apicv_has_pending_interrupt = pi_has_pending_interrupt, 990 991 .set_tss_addr = vt_op(set_tss_addr), 992 .set_identity_map_addr = vt_op(set_identity_map_addr), 993 .get_mt_mask = vmx_get_mt_mask, 994 995 .get_exit_info = vt_op(get_exit_info), 996 .get_entry_info = vt_op(get_entry_info), 997 998 .vcpu_after_set_cpuid = vt_op(vcpu_after_set_cpuid), 999 1000 .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit, 1001 1002 .get_l2_tsc_offset = vt_op(get_l2_tsc_offset), 1003 .get_l2_tsc_multiplier = vt_op(get_l2_tsc_multiplier), 1004 .write_tsc_offset = vt_op(write_tsc_offset), 1005 .write_tsc_multiplier = vt_op(write_tsc_multiplier), 1006 1007 .load_mmu_pgd = vt_op(load_mmu_pgd), 1008 1009 .check_intercept = vmx_check_intercept, 1010 .handle_exit_irqoff = vmx_handle_exit_irqoff, 1011 1012 .update_cpu_dirty_logging = vt_op(update_cpu_dirty_logging), 1013 1014 .nested_ops = &vmx_nested_ops, 1015 1016 .pi_update_irte = vmx_pi_update_irte, 1017 .pi_start_assignment = vmx_pi_start_assignment, 1018 1019 #ifdef CONFIG_X86_64 1020 .set_hv_timer = vt_op(set_hv_timer), 1021 .cancel_hv_timer = vt_op(cancel_hv_timer), 1022 #endif 1023 1024 .setup_mce = vt_op(setup_mce), 1025 1026 #ifdef CONFIG_KVM_SMM 1027 .smi_allowed = vt_op(smi_allowed), 1028 .enter_smm = vt_op(enter_smm), 1029 .leave_smm = vt_op(leave_smm), 1030 .enable_smi_window = vt_op(enable_smi_window), 1031 #endif 1032 1033 .check_emulate_instruction = vt_op(check_emulate_instruction), 1034 .apic_init_signal_blocked = vt_op(apic_init_signal_blocked), 1035 .migrate_timers = vmx_migrate_timers, 1036 1037 .msr_filter_changed = vt_op(msr_filter_changed), 1038 .complete_emulated_msr = vt_op(complete_emulated_msr), 1039 1040 .vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector, 1041 1042 .get_untagged_addr = vmx_get_untagged_addr, 1043 1044 .mem_enc_ioctl = vt_op_tdx_only(mem_enc_ioctl), 1045 .vcpu_mem_enc_ioctl = vt_op_tdx_only(vcpu_mem_enc_ioctl), 1046 1047 .private_max_mapping_level = vt_op_tdx_only(gmem_private_max_mapping_level) 1048 }; 1049 1050 struct kvm_x86_init_ops vt_init_ops __initdata = { 1051 .hardware_setup = vt_op(hardware_setup), 1052 .handle_intel_pt_intr = NULL, 1053 1054 .runtime_ops = &vt_x86_ops, 1055 .pmu_ops = &intel_pmu_ops, 1056 }; 1057 1058 static void __exit vt_exit(void) 1059 { 1060 kvm_exit(); 1061 tdx_cleanup(); 1062 vmx_exit(); 1063 } 1064 module_exit(vt_exit); 1065 1066 static int __init vt_init(void) 1067 { 1068 unsigned vcpu_size, vcpu_align; 1069 int r; 1070 1071 r = vmx_init(); 1072 if (r) 1073 return r; 1074 1075 /* tdx_init() has been taken */ 1076 r = tdx_bringup(); 1077 if (r) 1078 goto err_tdx_bringup; 1079 1080 /* 1081 * TDX and VMX have different vCPU structures. Calculate the 1082 * maximum size/align so that kvm_init() can use the larger 1083 * values to create the kmem_vcpu_cache. 1084 */ 1085 vcpu_size = sizeof(struct vcpu_vmx); 1086 vcpu_align = __alignof__(struct vcpu_vmx); 1087 if (enable_tdx) { 1088 vcpu_size = max_t(unsigned, vcpu_size, 1089 sizeof(struct vcpu_tdx)); 1090 vcpu_align = max_t(unsigned, vcpu_align, 1091 __alignof__(struct vcpu_tdx)); 1092 kvm_caps.supported_vm_types |= BIT(KVM_X86_TDX_VM); 1093 } 1094 1095 /* 1096 * Common KVM initialization _must_ come last, after this, /dev/kvm is 1097 * exposed to userspace! 1098 */ 1099 r = kvm_init(vcpu_size, vcpu_align, THIS_MODULE); 1100 if (r) 1101 goto err_kvm_init; 1102 1103 return 0; 1104 1105 err_kvm_init: 1106 tdx_cleanup(); 1107 err_tdx_bringup: 1108 vmx_exit(); 1109 return r; 1110 } 1111 module_init(vt_init); 1112