1 /* 2 * x86/vmx.c : Framework for testing nested virtualization 3 * This is a framework to test nested VMX for KVM, which 4 * started as a project of GSoC 2013. All test cases should 5 * be located in x86/vmx_tests.c and framework related 6 * functions should be in this file. 7 * 8 * How to write test cases? 9 * Add callbacks of test suite in variant "vmx_tests". You can 10 * write: 11 * 1. init function used for initializing test suite 12 * 2. main function for codes running in L2 guest, 13 * 3. exit_handler to handle vmexit of L2 to L1 14 * 4. syscall handler to handle L2 syscall vmexit 15 * 5. vmenter fail handler to handle direct failure of vmenter 16 * 6. guest_regs is loaded when vmenter and saved when 17 * vmexit, you can read and set it in exit_handler 18 * If no special function is needed for a test suite, use 19 * coressponding basic_* functions as callback. More handlers 20 * can be added to "vmx_tests", see details of "struct vmx_test" 21 * and function test_run(). 22 * 23 * Currently, vmx test framework only set up one VCPU and one 24 * concurrent guest test environment with same paging for L2 and 25 * L1. For usage of EPT, only 1:1 mapped paging is used from VFN 26 * to PFN. 27 * 28 * Author : Arthur Chunqi Li <yzt356@gmail.com> 29 */ 30 31 #include "libcflat.h" 32 #include "processor.h" 33 #include "alloc_page.h" 34 #include "vm.h" 35 #include "vmalloc.h" 36 #include "desc.h" 37 #include "vmx.h" 38 #include "msr.h" 39 #include "smp.h" 40 #include "apic.h" 41 42 u64 *bsp_vmxon_region; 43 struct vmcs *vmcs_root; 44 u32 vpid_cnt; 45 void *guest_stack, *guest_syscall_stack; 46 u32 ctrl_pin, ctrl_enter, ctrl_exit, ctrl_cpu[2]; 47 struct regs regs; 48 49 struct vmx_test *current; 50 51 #define MAX_TEST_TEARDOWN_STEPS 10 52 53 struct test_teardown_step { 54 test_teardown_func func; 55 void *data; 56 }; 57 58 static int teardown_count; 59 static struct test_teardown_step teardown_steps[MAX_TEST_TEARDOWN_STEPS]; 60 61 static test_guest_func v2_guest_main; 62 63 u64 hypercall_field; 64 bool launched; 65 static int matched; 66 static int guest_finished; 67 static int in_guest; 68 69 union vmx_basic basic; 70 union vmx_ctrl_msr ctrl_pin_rev; 71 union vmx_ctrl_msr ctrl_cpu_rev[2]; 72 union vmx_ctrl_msr ctrl_exit_rev; 73 union vmx_ctrl_msr ctrl_enter_rev; 74 union vmx_ept_vpid ept_vpid; 75 76 extern struct descriptor_table_ptr gdt64_desc; 77 extern struct descriptor_table_ptr idt_descr; 78 extern struct descriptor_table_ptr tss_descr; 79 extern void *vmx_return; 80 extern void *entry_sysenter; 81 extern void *guest_entry; 82 83 static volatile u32 stage; 84 85 static jmp_buf abort_target; 86 87 struct vmcs_field { 88 u64 mask; 89 u64 encoding; 90 }; 91 92 #define MASK(_bits) GENMASK_ULL((_bits) - 1, 0) 93 #define MASK_NATURAL MASK(sizeof(unsigned long) * 8) 94 95 static struct vmcs_field vmcs_fields[] = { 96 { MASK(16), VPID }, 97 { MASK(16), PINV }, 98 { MASK(16), EPTP_IDX }, 99 100 { MASK(16), GUEST_SEL_ES }, 101 { MASK(16), GUEST_SEL_CS }, 102 { MASK(16), GUEST_SEL_SS }, 103 { MASK(16), GUEST_SEL_DS }, 104 { MASK(16), GUEST_SEL_FS }, 105 { MASK(16), GUEST_SEL_GS }, 106 { MASK(16), GUEST_SEL_LDTR }, 107 { MASK(16), GUEST_SEL_TR }, 108 { MASK(16), GUEST_INT_STATUS }, 109 110 { MASK(16), HOST_SEL_ES }, 111 { MASK(16), HOST_SEL_CS }, 112 { MASK(16), HOST_SEL_SS }, 113 { MASK(16), HOST_SEL_DS }, 114 { MASK(16), HOST_SEL_FS }, 115 { MASK(16), HOST_SEL_GS }, 116 { MASK(16), HOST_SEL_TR }, 117 118 { MASK(64), IO_BITMAP_A }, 119 { MASK(64), IO_BITMAP_B }, 120 { MASK(64), MSR_BITMAP }, 121 { MASK(64), EXIT_MSR_ST_ADDR }, 122 { MASK(64), EXIT_MSR_LD_ADDR }, 123 { MASK(64), ENTER_MSR_LD_ADDR }, 124 { MASK(64), VMCS_EXEC_PTR }, 125 { MASK(64), TSC_OFFSET }, 126 { MASK(64), APIC_VIRT_ADDR }, 127 { MASK(64), APIC_ACCS_ADDR }, 128 { MASK(64), EPTP }, 129 130 { MASK(64), INFO_PHYS_ADDR }, 131 132 { MASK(64), VMCS_LINK_PTR }, 133 { MASK(64), GUEST_DEBUGCTL }, 134 { MASK(64), GUEST_EFER }, 135 { MASK(64), GUEST_PAT }, 136 { MASK(64), GUEST_PERF_GLOBAL_CTRL }, 137 { MASK(64), GUEST_PDPTE }, 138 139 { MASK(64), HOST_PAT }, 140 { MASK(64), HOST_EFER }, 141 { MASK(64), HOST_PERF_GLOBAL_CTRL }, 142 143 { MASK(32), PIN_CONTROLS }, 144 { MASK(32), CPU_EXEC_CTRL0 }, 145 { MASK(32), EXC_BITMAP }, 146 { MASK(32), PF_ERROR_MASK }, 147 { MASK(32), PF_ERROR_MATCH }, 148 { MASK(32), CR3_TARGET_COUNT }, 149 { MASK(32), EXI_CONTROLS }, 150 { MASK(32), EXI_MSR_ST_CNT }, 151 { MASK(32), EXI_MSR_LD_CNT }, 152 { MASK(32), ENT_CONTROLS }, 153 { MASK(32), ENT_MSR_LD_CNT }, 154 { MASK(32), ENT_INTR_INFO }, 155 { MASK(32), ENT_INTR_ERROR }, 156 { MASK(32), ENT_INST_LEN }, 157 { MASK(32), TPR_THRESHOLD }, 158 { MASK(32), CPU_EXEC_CTRL1 }, 159 160 { MASK(32), VMX_INST_ERROR }, 161 { MASK(32), EXI_REASON }, 162 { MASK(32), EXI_INTR_INFO }, 163 { MASK(32), EXI_INTR_ERROR }, 164 { MASK(32), IDT_VECT_INFO }, 165 { MASK(32), IDT_VECT_ERROR }, 166 { MASK(32), EXI_INST_LEN }, 167 { MASK(32), EXI_INST_INFO }, 168 169 { MASK(32), GUEST_LIMIT_ES }, 170 { MASK(32), GUEST_LIMIT_CS }, 171 { MASK(32), GUEST_LIMIT_SS }, 172 { MASK(32), GUEST_LIMIT_DS }, 173 { MASK(32), GUEST_LIMIT_FS }, 174 { MASK(32), GUEST_LIMIT_GS }, 175 { MASK(32), GUEST_LIMIT_LDTR }, 176 { MASK(32), GUEST_LIMIT_TR }, 177 { MASK(32), GUEST_LIMIT_GDTR }, 178 { MASK(32), GUEST_LIMIT_IDTR }, 179 { 0x1d0ff, GUEST_AR_ES }, 180 { 0x1f0ff, GUEST_AR_CS }, 181 { 0x1d0ff, GUEST_AR_SS }, 182 { 0x1d0ff, GUEST_AR_DS }, 183 { 0x1d0ff, GUEST_AR_FS }, 184 { 0x1d0ff, GUEST_AR_GS }, 185 { 0x1d0ff, GUEST_AR_LDTR }, 186 { 0x1d0ff, GUEST_AR_TR }, 187 { MASK(32), GUEST_INTR_STATE }, 188 { MASK(32), GUEST_ACTV_STATE }, 189 { MASK(32), GUEST_SMBASE }, 190 { MASK(32), GUEST_SYSENTER_CS }, 191 { MASK(32), PREEMPT_TIMER_VALUE }, 192 193 { MASK(32), HOST_SYSENTER_CS }, 194 195 { MASK_NATURAL, CR0_MASK }, 196 { MASK_NATURAL, CR4_MASK }, 197 { MASK_NATURAL, CR0_READ_SHADOW }, 198 { MASK_NATURAL, CR4_READ_SHADOW }, 199 { MASK_NATURAL, CR3_TARGET_0 }, 200 { MASK_NATURAL, CR3_TARGET_1 }, 201 { MASK_NATURAL, CR3_TARGET_2 }, 202 { MASK_NATURAL, CR3_TARGET_3 }, 203 204 { MASK_NATURAL, EXI_QUALIFICATION }, 205 { MASK_NATURAL, IO_RCX }, 206 { MASK_NATURAL, IO_RSI }, 207 { MASK_NATURAL, IO_RDI }, 208 { MASK_NATURAL, IO_RIP }, 209 { MASK_NATURAL, GUEST_LINEAR_ADDRESS }, 210 211 { MASK_NATURAL, GUEST_CR0 }, 212 { MASK_NATURAL, GUEST_CR3 }, 213 { MASK_NATURAL, GUEST_CR4 }, 214 { MASK_NATURAL, GUEST_BASE_ES }, 215 { MASK_NATURAL, GUEST_BASE_CS }, 216 { MASK_NATURAL, GUEST_BASE_SS }, 217 { MASK_NATURAL, GUEST_BASE_DS }, 218 { MASK_NATURAL, GUEST_BASE_FS }, 219 { MASK_NATURAL, GUEST_BASE_GS }, 220 { MASK_NATURAL, GUEST_BASE_LDTR }, 221 { MASK_NATURAL, GUEST_BASE_TR }, 222 { MASK_NATURAL, GUEST_BASE_GDTR }, 223 { MASK_NATURAL, GUEST_BASE_IDTR }, 224 { MASK_NATURAL, GUEST_DR7 }, 225 { MASK_NATURAL, GUEST_RSP }, 226 { MASK_NATURAL, GUEST_RIP }, 227 { MASK_NATURAL, GUEST_RFLAGS }, 228 { MASK_NATURAL, GUEST_PENDING_DEBUG }, 229 { MASK_NATURAL, GUEST_SYSENTER_ESP }, 230 { MASK_NATURAL, GUEST_SYSENTER_EIP }, 231 232 { MASK_NATURAL, HOST_CR0 }, 233 { MASK_NATURAL, HOST_CR3 }, 234 { MASK_NATURAL, HOST_CR4 }, 235 { MASK_NATURAL, HOST_BASE_FS }, 236 { MASK_NATURAL, HOST_BASE_GS }, 237 { MASK_NATURAL, HOST_BASE_TR }, 238 { MASK_NATURAL, HOST_BASE_GDTR }, 239 { MASK_NATURAL, HOST_BASE_IDTR }, 240 { MASK_NATURAL, HOST_SYSENTER_ESP }, 241 { MASK_NATURAL, HOST_SYSENTER_EIP }, 242 { MASK_NATURAL, HOST_RSP }, 243 { MASK_NATURAL, HOST_RIP }, 244 }; 245 246 enum vmcs_field_type { 247 VMCS_FIELD_TYPE_CONTROL = 0, 248 VMCS_FIELD_TYPE_READ_ONLY_DATA = 1, 249 VMCS_FIELD_TYPE_GUEST = 2, 250 VMCS_FIELD_TYPE_HOST = 3, 251 VMCS_FIELD_TYPES, 252 }; 253 254 static inline int vmcs_field_type(struct vmcs_field *f) 255 { 256 return (f->encoding >> VMCS_FIELD_TYPE_SHIFT) & 0x3; 257 } 258 259 static int vmcs_field_readonly(struct vmcs_field *f) 260 { 261 u64 ia32_vmx_misc; 262 263 ia32_vmx_misc = rdmsr(MSR_IA32_VMX_MISC); 264 return !(ia32_vmx_misc & MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS) && 265 (vmcs_field_type(f) == VMCS_FIELD_TYPE_READ_ONLY_DATA); 266 } 267 268 static inline u64 vmcs_field_value(struct vmcs_field *f, u8 cookie) 269 { 270 u64 value; 271 272 /* Incorporate the cookie and the field encoding into the value. */ 273 value = cookie; 274 value |= (f->encoding << 8); 275 value |= 0xdeadbeefull << 32; 276 277 return value & f->mask; 278 } 279 280 static void set_vmcs_field(struct vmcs_field *f, u8 cookie) 281 { 282 vmcs_write(f->encoding, vmcs_field_value(f, cookie)); 283 } 284 285 static bool check_vmcs_field(struct vmcs_field *f, u8 cookie) 286 { 287 u64 expected; 288 u64 actual; 289 int ret; 290 291 if (f->encoding == VMX_INST_ERROR) { 292 printf("Skipping volatile field %lx\n", f->encoding); 293 return true; 294 } 295 296 ret = vmcs_read_checking(f->encoding, &actual); 297 assert(!(ret & X86_EFLAGS_CF)); 298 /* Skip VMCS fields that aren't recognized by the CPU */ 299 if (ret & X86_EFLAGS_ZF) 300 return true; 301 302 if (vmcs_field_readonly(f)) { 303 printf("Skipping read-only field %lx\n", f->encoding); 304 return true; 305 } 306 307 expected = vmcs_field_value(f, cookie); 308 actual &= f->mask; 309 310 if (expected == actual) 311 return true; 312 313 printf("FAIL: VMWRITE/VMREAD %lx (expected: %lx, actual: %lx)\n", 314 f->encoding, (unsigned long) expected, (unsigned long) actual); 315 316 return false; 317 } 318 319 static void set_all_vmcs_fields(u8 cookie) 320 { 321 int i; 322 323 for (i = 0; i < ARRAY_SIZE(vmcs_fields); i++) 324 set_vmcs_field(&vmcs_fields[i], cookie); 325 } 326 327 static bool check_all_vmcs_fields(u8 cookie) 328 { 329 bool pass = true; 330 int i; 331 332 for (i = 0; i < ARRAY_SIZE(vmcs_fields); i++) { 333 if (!check_vmcs_field(&vmcs_fields[i], cookie)) 334 pass = false; 335 } 336 337 return pass; 338 } 339 340 static u32 find_vmcs_max_index(void) 341 { 342 u32 idx, width, type, enc; 343 u64 actual; 344 int ret; 345 346 /* scan backwards and stop when found */ 347 for (idx = (1 << 9) - 1; idx >= 0; idx--) { 348 349 /* try all combinations of width and type */ 350 for (type = 0; type < (1 << 2); type++) { 351 for (width = 0; width < (1 << 2) ; width++) { 352 enc = (idx << VMCS_FIELD_INDEX_SHIFT) | 353 (type << VMCS_FIELD_TYPE_SHIFT) | 354 (width << VMCS_FIELD_WIDTH_SHIFT); 355 356 ret = vmcs_read_checking(enc, &actual); 357 assert(!(ret & X86_EFLAGS_CF)); 358 if (!(ret & X86_EFLAGS_ZF)) 359 return idx; 360 } 361 } 362 } 363 /* some VMCS fields should exist */ 364 assert(0); 365 return 0; 366 } 367 368 static void test_vmwrite_vmread(void) 369 { 370 struct vmcs *vmcs = alloc_page(); 371 u32 vmcs_enum_max, max_index = 0; 372 373 vmcs->hdr.revision_id = basic.revision; 374 assert(!vmcs_clear(vmcs)); 375 assert(!make_vmcs_current(vmcs)); 376 377 set_all_vmcs_fields(0x42); 378 report(check_all_vmcs_fields(0x42), "VMWRITE/VMREAD"); 379 380 vmcs_enum_max = (rdmsr(MSR_IA32_VMX_VMCS_ENUM) & VMCS_FIELD_INDEX_MASK) 381 >> VMCS_FIELD_INDEX_SHIFT; 382 max_index = find_vmcs_max_index(); 383 report(vmcs_enum_max == max_index, 384 "VMX_VMCS_ENUM.MAX_INDEX expected: %x, actual: %x", 385 max_index, vmcs_enum_max); 386 387 assert(!vmcs_clear(vmcs)); 388 free_page(vmcs); 389 } 390 391 ulong finish_fault; 392 u8 sentinel; 393 bool handler_called; 394 395 static void pf_handler(struct ex_regs *regs) 396 { 397 /* 398 * check that RIP was not improperly advanced and that the 399 * flags value was preserved. 400 */ 401 report(regs->rip < finish_fault, "RIP has not been advanced!"); 402 report(((u8)regs->rflags == ((sentinel | 2) & 0xd7)), 403 "The low byte of RFLAGS was preserved!"); 404 regs->rip = finish_fault; 405 handler_called = true; 406 407 } 408 409 static void prep_flags_test_env(void **vpage, struct vmcs **vmcs, handler *old) 410 { 411 /* 412 * get an unbacked address that will cause a #PF 413 */ 414 *vpage = alloc_vpage(); 415 416 /* 417 * set up VMCS so we have something to read from 418 */ 419 *vmcs = alloc_page(); 420 421 memset(*vmcs, 0, PAGE_SIZE); 422 (*vmcs)->hdr.revision_id = basic.revision; 423 assert(!vmcs_clear(*vmcs)); 424 assert(!make_vmcs_current(*vmcs)); 425 426 *old = handle_exception(PF_VECTOR, &pf_handler); 427 } 428 429 static void test_read_sentinel(void) 430 { 431 void *vpage; 432 struct vmcs *vmcs; 433 handler old; 434 435 prep_flags_test_env(&vpage, &vmcs, &old); 436 437 /* 438 * set the proper label 439 */ 440 extern char finish_read_fault; 441 442 finish_fault = (ulong)&finish_read_fault; 443 444 /* 445 * execute the vmread instruction that will cause a #PF 446 */ 447 handler_called = false; 448 asm volatile ("movb %[byte], %%ah\n\t" 449 "sahf\n\t" 450 "vmread %[enc], %[val]; finish_read_fault:" 451 : [val] "=m" (*(u64 *)vpage) 452 : [byte] "Krm" (sentinel), 453 [enc] "r" ((u64)GUEST_SEL_SS) 454 : "cc", "ah"); 455 report(handler_called, "The #PF handler was invoked"); 456 457 /* 458 * restore the old #PF handler 459 */ 460 handle_exception(PF_VECTOR, old); 461 } 462 463 static void test_vmread_flags_touch(void) 464 { 465 /* 466 * set up the sentinel value in the flags register. we 467 * choose these two values because they candy-stripe 468 * the 5 flags that sahf sets. 469 */ 470 sentinel = 0x91; 471 test_read_sentinel(); 472 473 sentinel = 0x45; 474 test_read_sentinel(); 475 } 476 477 static void test_write_sentinel(void) 478 { 479 void *vpage; 480 struct vmcs *vmcs; 481 handler old; 482 483 prep_flags_test_env(&vpage, &vmcs, &old); 484 485 /* 486 * set the proper label 487 */ 488 extern char finish_write_fault; 489 490 finish_fault = (ulong)&finish_write_fault; 491 492 /* 493 * execute the vmwrite instruction that will cause a #PF 494 */ 495 handler_called = false; 496 asm volatile ("movb %[byte], %%ah\n\t" 497 "sahf\n\t" 498 "vmwrite %[val], %[enc]; finish_write_fault:" 499 : [val] "=m" (*(u64 *)vpage) 500 : [byte] "Krm" (sentinel), 501 [enc] "r" ((u64)GUEST_SEL_SS) 502 : "cc", "ah"); 503 report(handler_called, "The #PF handler was invoked"); 504 505 /* 506 * restore the old #PF handler 507 */ 508 handle_exception(PF_VECTOR, old); 509 } 510 511 static void test_vmwrite_flags_touch(void) 512 { 513 /* 514 * set up the sentinel value in the flags register. we 515 * choose these two values because they candy-stripe 516 * the 5 flags that sahf sets. 517 */ 518 sentinel = 0x91; 519 test_write_sentinel(); 520 521 sentinel = 0x45; 522 test_write_sentinel(); 523 } 524 525 526 static void test_vmcs_high(void) 527 { 528 struct vmcs *vmcs = alloc_page(); 529 530 vmcs->hdr.revision_id = basic.revision; 531 assert(!vmcs_clear(vmcs)); 532 assert(!make_vmcs_current(vmcs)); 533 534 vmcs_write(TSC_OFFSET, 0x0123456789ABCDEFull); 535 report(vmcs_read(TSC_OFFSET) == 0x0123456789ABCDEFull, 536 "VMREAD TSC_OFFSET after VMWRITE TSC_OFFSET"); 537 report(vmcs_read(TSC_OFFSET_HI) == 0x01234567ull, 538 "VMREAD TSC_OFFSET_HI after VMWRITE TSC_OFFSET"); 539 vmcs_write(TSC_OFFSET_HI, 0x76543210ul); 540 report(vmcs_read(TSC_OFFSET_HI) == 0x76543210ul, 541 "VMREAD TSC_OFFSET_HI after VMWRITE TSC_OFFSET_HI"); 542 report(vmcs_read(TSC_OFFSET) == 0x7654321089ABCDEFull, 543 "VMREAD TSC_OFFSET after VMWRITE TSC_OFFSET_HI"); 544 545 assert(!vmcs_clear(vmcs)); 546 free_page(vmcs); 547 } 548 549 static void test_vmcs_lifecycle(void) 550 { 551 struct vmcs *vmcs[2] = {}; 552 int i; 553 554 for (i = 0; i < ARRAY_SIZE(vmcs); i++) { 555 vmcs[i] = alloc_page(); 556 vmcs[i]->hdr.revision_id = basic.revision; 557 } 558 559 #define VMPTRLD(_i) do { \ 560 assert(_i < ARRAY_SIZE(vmcs)); \ 561 assert(!make_vmcs_current(vmcs[_i])); \ 562 printf("VMPTRLD VMCS%d\n", (_i)); \ 563 } while (0) 564 565 #define VMCLEAR(_i) do { \ 566 assert(_i < ARRAY_SIZE(vmcs)); \ 567 assert(!vmcs_clear(vmcs[_i])); \ 568 printf("VMCLEAR VMCS%d\n", (_i)); \ 569 } while (0) 570 571 VMCLEAR(0); 572 VMPTRLD(0); 573 set_all_vmcs_fields(0); 574 report(check_all_vmcs_fields(0), "current:VMCS0 active:[VMCS0]"); 575 576 VMCLEAR(0); 577 VMPTRLD(0); 578 report(check_all_vmcs_fields(0), "current:VMCS0 active:[VMCS0]"); 579 580 VMCLEAR(1); 581 report(check_all_vmcs_fields(0), "current:VMCS0 active:[VMCS0]"); 582 583 VMPTRLD(1); 584 set_all_vmcs_fields(1); 585 report(check_all_vmcs_fields(1), "current:VMCS1 active:[VMCS0,VCMS1]"); 586 587 VMPTRLD(0); 588 report(check_all_vmcs_fields(0), "current:VMCS0 active:[VMCS0,VCMS1]"); 589 VMPTRLD(1); 590 report(check_all_vmcs_fields(1), "current:VMCS1 active:[VMCS0,VCMS1]"); 591 VMPTRLD(1); 592 report(check_all_vmcs_fields(1), "current:VMCS1 active:[VMCS0,VCMS1]"); 593 594 VMCLEAR(0); 595 report(check_all_vmcs_fields(1), "current:VMCS1 active:[VCMS1]"); 596 597 /* VMPTRLD should not erase VMWRITEs to the current VMCS */ 598 set_all_vmcs_fields(2); 599 VMPTRLD(1); 600 report(check_all_vmcs_fields(2), "current:VMCS1 active:[VCMS1]"); 601 602 for (i = 0; i < ARRAY_SIZE(vmcs); i++) { 603 VMCLEAR(i); 604 free_page(vmcs[i]); 605 } 606 607 #undef VMPTRLD 608 #undef VMCLEAR 609 } 610 611 void vmx_set_test_stage(u32 s) 612 { 613 barrier(); 614 stage = s; 615 barrier(); 616 } 617 618 u32 vmx_get_test_stage(void) 619 { 620 u32 s; 621 622 barrier(); 623 s = stage; 624 barrier(); 625 return s; 626 } 627 628 void vmx_inc_test_stage(void) 629 { 630 barrier(); 631 stage++; 632 barrier(); 633 } 634 635 /* entry_sysenter */ 636 asm( 637 ".align 4, 0x90\n\t" 638 ".globl entry_sysenter\n\t" 639 "entry_sysenter:\n\t" 640 SAVE_GPR 641 " and $0xf, %rax\n\t" 642 " mov %rax, %rdi\n\t" 643 " call syscall_handler\n\t" 644 LOAD_GPR 645 " vmresume\n\t" 646 ); 647 648 static void __attribute__((__used__)) syscall_handler(u64 syscall_no) 649 { 650 if (current->syscall_handler) 651 current->syscall_handler(syscall_no); 652 } 653 654 static const char * const exit_reason_descriptions[] = { 655 [VMX_EXC_NMI] = "VMX_EXC_NMI", 656 [VMX_EXTINT] = "VMX_EXTINT", 657 [VMX_TRIPLE_FAULT] = "VMX_TRIPLE_FAULT", 658 [VMX_INIT] = "VMX_INIT", 659 [VMX_SIPI] = "VMX_SIPI", 660 [VMX_SMI_IO] = "VMX_SMI_IO", 661 [VMX_SMI_OTHER] = "VMX_SMI_OTHER", 662 [VMX_INTR_WINDOW] = "VMX_INTR_WINDOW", 663 [VMX_NMI_WINDOW] = "VMX_NMI_WINDOW", 664 [VMX_TASK_SWITCH] = "VMX_TASK_SWITCH", 665 [VMX_CPUID] = "VMX_CPUID", 666 [VMX_GETSEC] = "VMX_GETSEC", 667 [VMX_HLT] = "VMX_HLT", 668 [VMX_INVD] = "VMX_INVD", 669 [VMX_INVLPG] = "VMX_INVLPG", 670 [VMX_RDPMC] = "VMX_RDPMC", 671 [VMX_RDTSC] = "VMX_RDTSC", 672 [VMX_RSM] = "VMX_RSM", 673 [VMX_VMCALL] = "VMX_VMCALL", 674 [VMX_VMCLEAR] = "VMX_VMCLEAR", 675 [VMX_VMLAUNCH] = "VMX_VMLAUNCH", 676 [VMX_VMPTRLD] = "VMX_VMPTRLD", 677 [VMX_VMPTRST] = "VMX_VMPTRST", 678 [VMX_VMREAD] = "VMX_VMREAD", 679 [VMX_VMRESUME] = "VMX_VMRESUME", 680 [VMX_VMWRITE] = "VMX_VMWRITE", 681 [VMX_VMXOFF] = "VMX_VMXOFF", 682 [VMX_VMXON] = "VMX_VMXON", 683 [VMX_CR] = "VMX_CR", 684 [VMX_DR] = "VMX_DR", 685 [VMX_IO] = "VMX_IO", 686 [VMX_RDMSR] = "VMX_RDMSR", 687 [VMX_WRMSR] = "VMX_WRMSR", 688 [VMX_FAIL_STATE] = "VMX_FAIL_STATE", 689 [VMX_FAIL_MSR] = "VMX_FAIL_MSR", 690 [VMX_MWAIT] = "VMX_MWAIT", 691 [VMX_MTF] = "VMX_MTF", 692 [VMX_MONITOR] = "VMX_MONITOR", 693 [VMX_PAUSE] = "VMX_PAUSE", 694 [VMX_FAIL_MCHECK] = "VMX_FAIL_MCHECK", 695 [VMX_TPR_THRESHOLD] = "VMX_TPR_THRESHOLD", 696 [VMX_APIC_ACCESS] = "VMX_APIC_ACCESS", 697 [VMX_EOI_INDUCED] = "VMX_EOI_INDUCED", 698 [VMX_GDTR_IDTR] = "VMX_GDTR_IDTR", 699 [VMX_LDTR_TR] = "VMX_LDTR_TR", 700 [VMX_EPT_VIOLATION] = "VMX_EPT_VIOLATION", 701 [VMX_EPT_MISCONFIG] = "VMX_EPT_MISCONFIG", 702 [VMX_INVEPT] = "VMX_INVEPT", 703 [VMX_PREEMPT] = "VMX_PREEMPT", 704 [VMX_INVVPID] = "VMX_INVVPID", 705 [VMX_WBINVD] = "VMX_WBINVD", 706 [VMX_XSETBV] = "VMX_XSETBV", 707 [VMX_APIC_WRITE] = "VMX_APIC_WRITE", 708 [VMX_RDRAND] = "VMX_RDRAND", 709 [VMX_INVPCID] = "VMX_INVPCID", 710 [VMX_VMFUNC] = "VMX_VMFUNC", 711 [VMX_RDSEED] = "VMX_RDSEED", 712 [VMX_PML_FULL] = "VMX_PML_FULL", 713 [VMX_XSAVES] = "VMX_XSAVES", 714 [VMX_XRSTORS] = "VMX_XRSTORS", 715 }; 716 717 const char *exit_reason_description(u64 reason) 718 { 719 if (reason >= ARRAY_SIZE(exit_reason_descriptions)) 720 return "(unknown)"; 721 return exit_reason_descriptions[reason] ? : "(unused)"; 722 } 723 724 void print_vmexit_info(union exit_reason exit_reason) 725 { 726 u64 guest_rip, guest_rsp; 727 ulong exit_qual = vmcs_read(EXI_QUALIFICATION); 728 guest_rip = vmcs_read(GUEST_RIP); 729 guest_rsp = vmcs_read(GUEST_RSP); 730 printf("VMEXIT info:\n"); 731 printf("\tvmexit reason = %u\n", exit_reason.basic); 732 printf("\tfailed vmentry = %u\n", !!exit_reason.failed_vmentry); 733 printf("\texit qualification = %#lx\n", exit_qual); 734 printf("\tguest_rip = %#lx\n", guest_rip); 735 printf("\tRAX=%#lx RBX=%#lx RCX=%#lx RDX=%#lx\n", 736 regs.rax, regs.rbx, regs.rcx, regs.rdx); 737 printf("\tRSP=%#lx RBP=%#lx RSI=%#lx RDI=%#lx\n", 738 guest_rsp, regs.rbp, regs.rsi, regs.rdi); 739 printf("\tR8 =%#lx R9 =%#lx R10=%#lx R11=%#lx\n", 740 regs.r8, regs.r9, regs.r10, regs.r11); 741 printf("\tR12=%#lx R13=%#lx R14=%#lx R15=%#lx\n", 742 regs.r12, regs.r13, regs.r14, regs.r15); 743 } 744 745 void print_vmentry_failure_info(struct vmentry_result *result) 746 { 747 if (result->entered) 748 return; 749 750 if (result->vm_fail) { 751 printf("VM-Fail on %s: ", result->instr); 752 switch (result->flags & VMX_ENTRY_FLAGS) { 753 case X86_EFLAGS_CF: 754 printf("current-VMCS pointer is not valid.\n"); 755 break; 756 case X86_EFLAGS_ZF: 757 printf("error number is %ld. See Intel 30.4.\n", 758 vmcs_read(VMX_INST_ERROR)); 759 break; 760 default: 761 printf("unexpected flags %lx!\n", result->flags); 762 } 763 } else { 764 u64 qual = vmcs_read(EXI_QUALIFICATION); 765 766 printf("VM-Exit failure on %s (reason=%#x, qual=%#lx): ", 767 result->instr, result->exit_reason.full, qual); 768 769 switch (result->exit_reason.basic) { 770 case VMX_FAIL_STATE: 771 printf("invalid guest state\n"); 772 break; 773 case VMX_FAIL_MSR: 774 printf("MSR loading\n"); 775 break; 776 case VMX_FAIL_MCHECK: 777 printf("machine-check event\n"); 778 break; 779 default: 780 printf("unexpected basic exit reason %u\n", 781 result->exit_reason.basic); 782 } 783 784 if (!result->exit_reason.failed_vmentry) 785 printf("\tVMX_ENTRY_FAILURE BIT NOT SET!\n"); 786 787 if (result->exit_reason.full & 0x7fff0000) 788 printf("\tRESERVED BITS SET!\n"); 789 } 790 } 791 792 /* 793 * VMCLEAR should ensures all VMCS state is flushed to the VMCS 794 * region in memory. 795 */ 796 static void test_vmclear_flushing(void) 797 { 798 struct vmcs *vmcs[3] = {}; 799 int i; 800 801 for (i = 0; i < ARRAY_SIZE(vmcs); i++) { 802 vmcs[i] = alloc_page(); 803 } 804 805 vmcs[0]->hdr.revision_id = basic.revision; 806 assert(!vmcs_clear(vmcs[0])); 807 assert(!make_vmcs_current(vmcs[0])); 808 set_all_vmcs_fields(0x86); 809 810 assert(!vmcs_clear(vmcs[0])); 811 memcpy(vmcs[1], vmcs[0], basic.size); 812 assert(!make_vmcs_current(vmcs[1])); 813 report(check_all_vmcs_fields(0x86), 814 "test vmclear flush (current VMCS)"); 815 816 set_all_vmcs_fields(0x87); 817 assert(!make_vmcs_current(vmcs[0])); 818 assert(!vmcs_clear(vmcs[1])); 819 memcpy(vmcs[2], vmcs[1], basic.size); 820 assert(!make_vmcs_current(vmcs[2])); 821 report(check_all_vmcs_fields(0x87), 822 "test vmclear flush (!current VMCS)"); 823 824 for (i = 0; i < ARRAY_SIZE(vmcs); i++) { 825 assert(!vmcs_clear(vmcs[i])); 826 free_page(vmcs[i]); 827 } 828 } 829 830 static void test_vmclear(void) 831 { 832 struct vmcs *tmp_root; 833 int width = cpuid_maxphyaddr(); 834 835 /* 836 * Note- The tests below do not necessarily have a 837 * valid VMCS, but that's ok since the invalid vmcs 838 * is only used for a specific test and is discarded 839 * without touching its contents 840 */ 841 842 /* Unaligned page access */ 843 tmp_root = (struct vmcs *)((intptr_t)vmcs_root + 1); 844 report(vmcs_clear(tmp_root) == 1, "test vmclear with unaligned vmcs"); 845 846 /* gpa bits beyond physical address width are set*/ 847 tmp_root = (struct vmcs *)((intptr_t)vmcs_root | 848 ((u64)1 << (width+1))); 849 report(vmcs_clear(tmp_root) == 1, 850 "test vmclear with vmcs address bits set beyond physical address width"); 851 852 /* Pass VMXON region */ 853 tmp_root = (struct vmcs *)bsp_vmxon_region; 854 report(vmcs_clear(tmp_root) == 1, "test vmclear with vmxon region"); 855 856 /* Valid VMCS */ 857 report(vmcs_clear(vmcs_root) == 0, 858 "test vmclear with valid vmcs region"); 859 860 test_vmclear_flushing(); 861 } 862 863 static void __attribute__((__used__)) guest_main(void) 864 { 865 if (current->v2) 866 v2_guest_main(); 867 else 868 current->guest_main(); 869 } 870 871 /* guest_entry */ 872 asm( 873 ".align 4, 0x90\n\t" 874 ".globl entry_guest\n\t" 875 "guest_entry:\n\t" 876 " call guest_main\n\t" 877 " mov $1, %edi\n\t" 878 " call hypercall\n\t" 879 ); 880 881 /* EPT paging structure related functions */ 882 /* split_large_ept_entry: Split a 2M/1G large page into 512 smaller PTEs. 883 @ptep : large page table entry to split 884 @level : level of ptep (2 or 3) 885 */ 886 static void split_large_ept_entry(unsigned long *ptep, int level) 887 { 888 unsigned long *new_pt; 889 unsigned long gpa; 890 unsigned long pte; 891 unsigned long prototype; 892 int i; 893 894 pte = *ptep; 895 assert(pte & EPT_PRESENT); 896 assert(pte & EPT_LARGE_PAGE); 897 assert(level == 2 || level == 3); 898 899 new_pt = alloc_page(); 900 assert(new_pt); 901 902 prototype = pte & ~EPT_ADDR_MASK; 903 if (level == 2) 904 prototype &= ~EPT_LARGE_PAGE; 905 906 gpa = pte & EPT_ADDR_MASK; 907 for (i = 0; i < EPT_PGDIR_ENTRIES; i++) { 908 new_pt[i] = prototype | gpa; 909 gpa += 1ul << EPT_LEVEL_SHIFT(level - 1); 910 } 911 912 pte &= ~EPT_LARGE_PAGE; 913 pte &= ~EPT_ADDR_MASK; 914 pte |= virt_to_phys(new_pt); 915 916 *ptep = pte; 917 } 918 919 /* install_ept_entry : Install a page to a given level in EPT 920 @pml4 : addr of pml4 table 921 @pte_level : level of PTE to set 922 @guest_addr : physical address of guest 923 @pte : pte value to set 924 @pt_page : address of page table, NULL for a new page 925 */ 926 void install_ept_entry(unsigned long *pml4, 927 int pte_level, 928 unsigned long guest_addr, 929 unsigned long pte, 930 unsigned long *pt_page) 931 { 932 int level; 933 unsigned long *pt = pml4; 934 unsigned offset; 935 936 /* EPT only uses 48 bits of GPA. */ 937 assert(guest_addr < (1ul << 48)); 938 939 for (level = EPT_PAGE_LEVEL; level > pte_level; --level) { 940 offset = (guest_addr >> EPT_LEVEL_SHIFT(level)) 941 & EPT_PGDIR_MASK; 942 if (!(pt[offset] & (EPT_PRESENT))) { 943 unsigned long *new_pt = pt_page; 944 if (!new_pt) 945 new_pt = alloc_page(); 946 else 947 pt_page = 0; 948 memset(new_pt, 0, PAGE_SIZE); 949 pt[offset] = virt_to_phys(new_pt) 950 | EPT_RA | EPT_WA | EPT_EA; 951 } else if (pt[offset] & EPT_LARGE_PAGE) 952 split_large_ept_entry(&pt[offset], level); 953 pt = phys_to_virt(pt[offset] & EPT_ADDR_MASK); 954 } 955 offset = (guest_addr >> EPT_LEVEL_SHIFT(level)) & EPT_PGDIR_MASK; 956 pt[offset] = pte; 957 } 958 959 /* Map a page, @perm is the permission of the page */ 960 void install_ept(unsigned long *pml4, 961 unsigned long phys, 962 unsigned long guest_addr, 963 u64 perm) 964 { 965 install_ept_entry(pml4, 1, guest_addr, (phys & PAGE_MASK) | perm, 0); 966 } 967 968 /* Map a 1G-size page */ 969 void install_1g_ept(unsigned long *pml4, 970 unsigned long phys, 971 unsigned long guest_addr, 972 u64 perm) 973 { 974 install_ept_entry(pml4, 3, guest_addr, 975 (phys & PAGE_MASK) | perm | EPT_LARGE_PAGE, 0); 976 } 977 978 /* Map a 2M-size page */ 979 void install_2m_ept(unsigned long *pml4, 980 unsigned long phys, 981 unsigned long guest_addr, 982 u64 perm) 983 { 984 install_ept_entry(pml4, 2, guest_addr, 985 (phys & PAGE_MASK) | perm | EPT_LARGE_PAGE, 0); 986 } 987 988 /* setup_ept_range : Setup a range of 1:1 mapped page to EPT paging structure. 989 @start : start address of guest page 990 @len : length of address to be mapped 991 @map_1g : whether 1G page map is used 992 @map_2m : whether 2M page map is used 993 @perm : permission for every page 994 */ 995 void setup_ept_range(unsigned long *pml4, unsigned long start, 996 unsigned long len, int map_1g, int map_2m, u64 perm) 997 { 998 u64 phys = start; 999 u64 max = (u64)len + (u64)start; 1000 1001 if (map_1g) { 1002 while (phys + PAGE_SIZE_1G <= max) { 1003 install_1g_ept(pml4, phys, phys, perm); 1004 phys += PAGE_SIZE_1G; 1005 } 1006 } 1007 if (map_2m) { 1008 while (phys + PAGE_SIZE_2M <= max) { 1009 install_2m_ept(pml4, phys, phys, perm); 1010 phys += PAGE_SIZE_2M; 1011 } 1012 } 1013 while (phys + PAGE_SIZE <= max) { 1014 install_ept(pml4, phys, phys, perm); 1015 phys += PAGE_SIZE; 1016 } 1017 } 1018 1019 /* get_ept_pte : Get the PTE of a given level in EPT, 1020 @level == 1 means get the latest level*/ 1021 bool get_ept_pte(unsigned long *pml4, unsigned long guest_addr, int level, 1022 unsigned long *pte) 1023 { 1024 int l; 1025 unsigned long *pt = pml4, iter_pte; 1026 unsigned offset; 1027 1028 assert(level >= 1 && level <= 4); 1029 1030 for (l = EPT_PAGE_LEVEL; ; --l) { 1031 offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK; 1032 iter_pte = pt[offset]; 1033 if (l == level) 1034 break; 1035 if (l < 4 && (iter_pte & EPT_LARGE_PAGE)) 1036 return false; 1037 if (!(iter_pte & (EPT_PRESENT))) 1038 return false; 1039 pt = (unsigned long *)(iter_pte & EPT_ADDR_MASK); 1040 } 1041 offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK; 1042 if (pte) 1043 *pte = pt[offset]; 1044 return true; 1045 } 1046 1047 static void clear_ept_ad_pte(unsigned long *pml4, unsigned long guest_addr) 1048 { 1049 int l; 1050 unsigned long *pt = pml4; 1051 u64 pte; 1052 unsigned offset; 1053 1054 for (l = EPT_PAGE_LEVEL; ; --l) { 1055 offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK; 1056 pt[offset] &= ~(EPT_ACCESS_FLAG|EPT_DIRTY_FLAG); 1057 pte = pt[offset]; 1058 if (l == 1 || (l < 4 && (pte & EPT_LARGE_PAGE))) 1059 break; 1060 pt = (unsigned long *)(pte & EPT_ADDR_MASK); 1061 } 1062 } 1063 1064 /* clear_ept_ad : Clear EPT A/D bits for the page table walk and the 1065 final GPA of a guest address. */ 1066 void clear_ept_ad(unsigned long *pml4, u64 guest_cr3, 1067 unsigned long guest_addr) 1068 { 1069 int l; 1070 unsigned long *pt = (unsigned long *)guest_cr3, gpa; 1071 u64 pte, offset_in_page; 1072 unsigned offset; 1073 1074 for (l = EPT_PAGE_LEVEL; ; --l) { 1075 offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK; 1076 1077 clear_ept_ad_pte(pml4, (u64) &pt[offset]); 1078 pte = pt[offset]; 1079 if (l == 1 || (l < 4 && (pte & PT_PAGE_SIZE_MASK))) 1080 break; 1081 if (!(pte & PT_PRESENT_MASK)) 1082 return; 1083 pt = (unsigned long *)(pte & PT_ADDR_MASK); 1084 } 1085 1086 offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK; 1087 offset_in_page = guest_addr & ((1 << EPT_LEVEL_SHIFT(l)) - 1); 1088 gpa = (pt[offset] & PT_ADDR_MASK) | (guest_addr & offset_in_page); 1089 clear_ept_ad_pte(pml4, gpa); 1090 } 1091 1092 /* check_ept_ad : Check the content of EPT A/D bits for the page table 1093 walk and the final GPA of a guest address. */ 1094 void check_ept_ad(unsigned long *pml4, u64 guest_cr3, 1095 unsigned long guest_addr, int expected_gpa_ad, 1096 int expected_pt_ad) 1097 { 1098 int l; 1099 unsigned long *pt = (unsigned long *)guest_cr3, gpa; 1100 u64 ept_pte, pte, offset_in_page; 1101 unsigned offset; 1102 bool bad_pt_ad = false; 1103 1104 for (l = EPT_PAGE_LEVEL; ; --l) { 1105 offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK; 1106 1107 if (!get_ept_pte(pml4, (u64) &pt[offset], 1, &ept_pte)) { 1108 printf("EPT - guest level %d page table is not mapped.\n", l); 1109 return; 1110 } 1111 1112 if (!bad_pt_ad) { 1113 bad_pt_ad |= (ept_pte & (EPT_ACCESS_FLAG|EPT_DIRTY_FLAG)) != expected_pt_ad; 1114 if (bad_pt_ad) 1115 report(false, 1116 "EPT - guest level %d page table A=%d/D=%d", 1117 l, 1118 !!(expected_pt_ad & EPT_ACCESS_FLAG), 1119 !!(expected_pt_ad & EPT_DIRTY_FLAG)); 1120 } 1121 1122 pte = pt[offset]; 1123 if (l == 1 || (l < 4 && (pte & PT_PAGE_SIZE_MASK))) 1124 break; 1125 if (!(pte & PT_PRESENT_MASK)) 1126 return; 1127 pt = (unsigned long *)(pte & PT_ADDR_MASK); 1128 } 1129 1130 if (!bad_pt_ad) 1131 report(true, "EPT - guest page table structures A=%d/D=%d", 1132 !!(expected_pt_ad & EPT_ACCESS_FLAG), 1133 !!(expected_pt_ad & EPT_DIRTY_FLAG)); 1134 1135 offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK; 1136 offset_in_page = guest_addr & ((1 << EPT_LEVEL_SHIFT(l)) - 1); 1137 gpa = (pt[offset] & PT_ADDR_MASK) | (guest_addr & offset_in_page); 1138 1139 if (!get_ept_pte(pml4, gpa, 1, &ept_pte)) { 1140 report(false, "EPT - guest physical address is not mapped"); 1141 return; 1142 } 1143 report((ept_pte & (EPT_ACCESS_FLAG | EPT_DIRTY_FLAG)) == expected_gpa_ad, 1144 "EPT - guest physical address A=%d/D=%d", 1145 !!(expected_gpa_ad & EPT_ACCESS_FLAG), 1146 !!(expected_gpa_ad & EPT_DIRTY_FLAG)); 1147 } 1148 1149 1150 void ept_sync(int type, u64 eptp) 1151 { 1152 switch (type) { 1153 case INVEPT_SINGLE: 1154 if (ept_vpid.val & EPT_CAP_INVEPT_SINGLE) { 1155 invept(INVEPT_SINGLE, eptp); 1156 break; 1157 } 1158 /* else fall through */ 1159 case INVEPT_GLOBAL: 1160 if (ept_vpid.val & EPT_CAP_INVEPT_ALL) { 1161 invept(INVEPT_GLOBAL, eptp); 1162 break; 1163 } 1164 /* else fall through */ 1165 default: 1166 printf("WARNING: invept is not supported!\n"); 1167 } 1168 } 1169 1170 void set_ept_pte(unsigned long *pml4, unsigned long guest_addr, 1171 int level, u64 pte_val) 1172 { 1173 int l; 1174 unsigned long *pt = pml4; 1175 unsigned offset; 1176 1177 assert(level >= 1 && level <= 4); 1178 1179 for (l = EPT_PAGE_LEVEL; ; --l) { 1180 offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK; 1181 if (l == level) 1182 break; 1183 assert(pt[offset] & EPT_PRESENT); 1184 pt = (unsigned long *)(pt[offset] & EPT_ADDR_MASK); 1185 } 1186 offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK; 1187 pt[offset] = pte_val; 1188 } 1189 1190 bool ept_2m_supported(void) 1191 { 1192 return ept_vpid.val & EPT_CAP_2M_PAGE; 1193 } 1194 1195 bool ept_1g_supported(void) 1196 { 1197 return ept_vpid.val & EPT_CAP_1G_PAGE; 1198 } 1199 1200 bool ept_huge_pages_supported(int level) 1201 { 1202 if (level == 2) 1203 return ept_2m_supported(); 1204 else if (level == 3) 1205 return ept_1g_supported(); 1206 else 1207 return false; 1208 } 1209 1210 bool ept_execute_only_supported(void) 1211 { 1212 return ept_vpid.val & EPT_CAP_WT; 1213 } 1214 1215 bool ept_ad_bits_supported(void) 1216 { 1217 return ept_vpid.val & EPT_CAP_AD_FLAG; 1218 } 1219 1220 void vpid_sync(int type, u16 vpid) 1221 { 1222 switch(type) { 1223 case INVVPID_CONTEXT_GLOBAL: 1224 if (ept_vpid.val & VPID_CAP_INVVPID_CXTGLB) { 1225 invvpid(INVVPID_CONTEXT_GLOBAL, vpid, 0); 1226 break; 1227 } 1228 case INVVPID_ALL: 1229 if (ept_vpid.val & VPID_CAP_INVVPID_ALL) { 1230 invvpid(INVVPID_ALL, vpid, 0); 1231 break; 1232 } 1233 default: 1234 printf("WARNING: invvpid is not supported\n"); 1235 } 1236 } 1237 1238 static void init_vmcs_ctrl(void) 1239 { 1240 /* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */ 1241 /* 26.2.1.1 */ 1242 vmcs_write(PIN_CONTROLS, ctrl_pin); 1243 /* Disable VMEXIT of IO instruction */ 1244 vmcs_write(CPU_EXEC_CTRL0, ctrl_cpu[0]); 1245 if (ctrl_cpu_rev[0].set & CPU_SECONDARY) { 1246 ctrl_cpu[1] = (ctrl_cpu[1] | ctrl_cpu_rev[1].set) & 1247 ctrl_cpu_rev[1].clr; 1248 vmcs_write(CPU_EXEC_CTRL1, ctrl_cpu[1]); 1249 } 1250 vmcs_write(CR3_TARGET_COUNT, 0); 1251 vmcs_write(VPID, ++vpid_cnt); 1252 } 1253 1254 static void init_vmcs_host(void) 1255 { 1256 /* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */ 1257 /* 26.2.1.2 */ 1258 vmcs_write(HOST_EFER, rdmsr(MSR_EFER)); 1259 1260 /* 26.2.1.3 */ 1261 vmcs_write(ENT_CONTROLS, ctrl_enter); 1262 vmcs_write(EXI_CONTROLS, ctrl_exit); 1263 1264 /* 26.2.2 */ 1265 vmcs_write(HOST_CR0, read_cr0()); 1266 vmcs_write(HOST_CR3, read_cr3()); 1267 vmcs_write(HOST_CR4, read_cr4()); 1268 vmcs_write(HOST_SYSENTER_EIP, (u64)(&entry_sysenter)); 1269 vmcs_write(HOST_SYSENTER_CS, KERNEL_CS); 1270 1271 /* 26.2.3 */ 1272 vmcs_write(HOST_SEL_CS, KERNEL_CS); 1273 vmcs_write(HOST_SEL_SS, KERNEL_DS); 1274 vmcs_write(HOST_SEL_DS, KERNEL_DS); 1275 vmcs_write(HOST_SEL_ES, KERNEL_DS); 1276 vmcs_write(HOST_SEL_FS, KERNEL_DS); 1277 vmcs_write(HOST_SEL_GS, KERNEL_DS); 1278 vmcs_write(HOST_SEL_TR, TSS_MAIN); 1279 vmcs_write(HOST_BASE_TR, tss_descr.base); 1280 vmcs_write(HOST_BASE_GDTR, gdt64_desc.base); 1281 vmcs_write(HOST_BASE_IDTR, idt_descr.base); 1282 vmcs_write(HOST_BASE_FS, 0); 1283 vmcs_write(HOST_BASE_GS, 0); 1284 1285 /* Set other vmcs area */ 1286 vmcs_write(PF_ERROR_MASK, 0); 1287 vmcs_write(PF_ERROR_MATCH, 0); 1288 vmcs_write(VMCS_LINK_PTR, ~0ul); 1289 vmcs_write(VMCS_LINK_PTR_HI, ~0ul); 1290 vmcs_write(HOST_RIP, (u64)(&vmx_return)); 1291 } 1292 1293 static void init_vmcs_guest(void) 1294 { 1295 /* 26.3 CHECKING AND LOADING GUEST STATE */ 1296 ulong guest_cr0, guest_cr4, guest_cr3; 1297 /* 26.3.1.1 */ 1298 guest_cr0 = read_cr0(); 1299 guest_cr4 = read_cr4(); 1300 guest_cr3 = read_cr3(); 1301 if (ctrl_enter & ENT_GUEST_64) { 1302 guest_cr0 |= X86_CR0_PG; 1303 guest_cr4 |= X86_CR4_PAE; 1304 } 1305 if ((ctrl_enter & ENT_GUEST_64) == 0) 1306 guest_cr4 &= (~X86_CR4_PCIDE); 1307 if (guest_cr0 & X86_CR0_PG) 1308 guest_cr0 |= X86_CR0_PE; 1309 vmcs_write(GUEST_CR0, guest_cr0); 1310 vmcs_write(GUEST_CR3, guest_cr3); 1311 vmcs_write(GUEST_CR4, guest_cr4); 1312 vmcs_write(GUEST_SYSENTER_CS, KERNEL_CS); 1313 vmcs_write(GUEST_SYSENTER_ESP, 1314 (u64)(guest_syscall_stack + PAGE_SIZE - 1)); 1315 vmcs_write(GUEST_SYSENTER_EIP, (u64)(&entry_sysenter)); 1316 vmcs_write(GUEST_DR7, 0); 1317 vmcs_write(GUEST_EFER, rdmsr(MSR_EFER)); 1318 1319 /* 26.3.1.2 */ 1320 vmcs_write(GUEST_SEL_CS, KERNEL_CS); 1321 vmcs_write(GUEST_SEL_SS, KERNEL_DS); 1322 vmcs_write(GUEST_SEL_DS, KERNEL_DS); 1323 vmcs_write(GUEST_SEL_ES, KERNEL_DS); 1324 vmcs_write(GUEST_SEL_FS, KERNEL_DS); 1325 vmcs_write(GUEST_SEL_GS, KERNEL_DS); 1326 vmcs_write(GUEST_SEL_TR, TSS_MAIN); 1327 vmcs_write(GUEST_SEL_LDTR, 0); 1328 1329 vmcs_write(GUEST_BASE_CS, 0); 1330 vmcs_write(GUEST_BASE_ES, 0); 1331 vmcs_write(GUEST_BASE_SS, 0); 1332 vmcs_write(GUEST_BASE_DS, 0); 1333 vmcs_write(GUEST_BASE_FS, 0); 1334 vmcs_write(GUEST_BASE_GS, 0); 1335 vmcs_write(GUEST_BASE_TR, tss_descr.base); 1336 vmcs_write(GUEST_BASE_LDTR, 0); 1337 1338 vmcs_write(GUEST_LIMIT_CS, 0xFFFFFFFF); 1339 vmcs_write(GUEST_LIMIT_DS, 0xFFFFFFFF); 1340 vmcs_write(GUEST_LIMIT_ES, 0xFFFFFFFF); 1341 vmcs_write(GUEST_LIMIT_SS, 0xFFFFFFFF); 1342 vmcs_write(GUEST_LIMIT_FS, 0xFFFFFFFF); 1343 vmcs_write(GUEST_LIMIT_GS, 0xFFFFFFFF); 1344 vmcs_write(GUEST_LIMIT_LDTR, 0xffff); 1345 vmcs_write(GUEST_LIMIT_TR, tss_descr.limit); 1346 1347 vmcs_write(GUEST_AR_CS, 0xa09b); 1348 vmcs_write(GUEST_AR_DS, 0xc093); 1349 vmcs_write(GUEST_AR_ES, 0xc093); 1350 vmcs_write(GUEST_AR_FS, 0xc093); 1351 vmcs_write(GUEST_AR_GS, 0xc093); 1352 vmcs_write(GUEST_AR_SS, 0xc093); 1353 vmcs_write(GUEST_AR_LDTR, 0x82); 1354 vmcs_write(GUEST_AR_TR, 0x8b); 1355 1356 /* 26.3.1.3 */ 1357 vmcs_write(GUEST_BASE_GDTR, gdt64_desc.base); 1358 vmcs_write(GUEST_BASE_IDTR, idt_descr.base); 1359 vmcs_write(GUEST_LIMIT_GDTR, gdt64_desc.limit); 1360 vmcs_write(GUEST_LIMIT_IDTR, idt_descr.limit); 1361 1362 /* 26.3.1.4 */ 1363 vmcs_write(GUEST_RIP, (u64)(&guest_entry)); 1364 vmcs_write(GUEST_RSP, (u64)(guest_stack + PAGE_SIZE - 1)); 1365 vmcs_write(GUEST_RFLAGS, X86_EFLAGS_FIXED); 1366 1367 /* 26.3.1.5 */ 1368 vmcs_write(GUEST_ACTV_STATE, ACTV_ACTIVE); 1369 vmcs_write(GUEST_INTR_STATE, 0); 1370 } 1371 1372 int init_vmcs(struct vmcs **vmcs) 1373 { 1374 *vmcs = alloc_page(); 1375 (*vmcs)->hdr.revision_id = basic.revision; 1376 /* vmclear first to init vmcs */ 1377 if (vmcs_clear(*vmcs)) { 1378 printf("%s : vmcs_clear error\n", __func__); 1379 return 1; 1380 } 1381 1382 if (make_vmcs_current(*vmcs)) { 1383 printf("%s : make_vmcs_current error\n", __func__); 1384 return 1; 1385 } 1386 1387 /* All settings to pin/exit/enter/cpu 1388 control fields should be placed here */ 1389 ctrl_pin |= PIN_EXTINT | PIN_NMI | PIN_VIRT_NMI; 1390 ctrl_exit = EXI_LOAD_EFER | EXI_HOST_64; 1391 ctrl_enter = (ENT_LOAD_EFER | ENT_GUEST_64); 1392 /* DIsable IO instruction VMEXIT now */ 1393 ctrl_cpu[0] &= (~(CPU_IO | CPU_IO_BITMAP)); 1394 ctrl_cpu[1] = 0; 1395 1396 ctrl_pin = (ctrl_pin | ctrl_pin_rev.set) & ctrl_pin_rev.clr; 1397 ctrl_enter = (ctrl_enter | ctrl_enter_rev.set) & ctrl_enter_rev.clr; 1398 ctrl_exit = (ctrl_exit | ctrl_exit_rev.set) & ctrl_exit_rev.clr; 1399 ctrl_cpu[0] = (ctrl_cpu[0] | ctrl_cpu_rev[0].set) & ctrl_cpu_rev[0].clr; 1400 1401 init_vmcs_ctrl(); 1402 init_vmcs_host(); 1403 init_vmcs_guest(); 1404 return 0; 1405 } 1406 1407 void enable_vmx(void) 1408 { 1409 bool vmx_enabled = 1410 rdmsr(MSR_IA32_FEATURE_CONTROL) & 1411 FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; 1412 1413 if (!vmx_enabled) { 1414 wrmsr(MSR_IA32_FEATURE_CONTROL, 1415 FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX | 1416 FEATURE_CONTROL_LOCKED); 1417 } 1418 } 1419 1420 static void init_vmx_caps(void) 1421 { 1422 basic.val = rdmsr(MSR_IA32_VMX_BASIC); 1423 ctrl_pin_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PIN 1424 : MSR_IA32_VMX_PINBASED_CTLS); 1425 ctrl_exit_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_EXIT 1426 : MSR_IA32_VMX_EXIT_CTLS); 1427 ctrl_enter_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_ENTRY 1428 : MSR_IA32_VMX_ENTRY_CTLS); 1429 ctrl_cpu_rev[0].val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PROC 1430 : MSR_IA32_VMX_PROCBASED_CTLS); 1431 if ((ctrl_cpu_rev[0].clr & CPU_SECONDARY) != 0) 1432 ctrl_cpu_rev[1].val = rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2); 1433 else 1434 ctrl_cpu_rev[1].val = 0; 1435 if ((ctrl_cpu_rev[1].clr & (CPU_EPT | CPU_VPID)) != 0) 1436 ept_vpid.val = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP); 1437 else 1438 ept_vpid.val = 0; 1439 } 1440 1441 void init_vmx(u64 *vmxon_region) 1442 { 1443 ulong fix_cr0_set, fix_cr0_clr; 1444 ulong fix_cr4_set, fix_cr4_clr; 1445 1446 fix_cr0_set = rdmsr(MSR_IA32_VMX_CR0_FIXED0); 1447 fix_cr0_clr = rdmsr(MSR_IA32_VMX_CR0_FIXED1); 1448 fix_cr4_set = rdmsr(MSR_IA32_VMX_CR4_FIXED0); 1449 fix_cr4_clr = rdmsr(MSR_IA32_VMX_CR4_FIXED1); 1450 1451 write_cr0((read_cr0() & fix_cr0_clr) | fix_cr0_set); 1452 write_cr4((read_cr4() & fix_cr4_clr) | fix_cr4_set | X86_CR4_VMXE); 1453 1454 *vmxon_region = basic.revision; 1455 } 1456 1457 static void alloc_bsp_vmx_pages(void) 1458 { 1459 bsp_vmxon_region = alloc_page(); 1460 guest_stack = alloc_page(); 1461 guest_syscall_stack = alloc_page(); 1462 vmcs_root = alloc_page(); 1463 } 1464 1465 static void init_bsp_vmx(void) 1466 { 1467 init_vmx_caps(); 1468 alloc_bsp_vmx_pages(); 1469 init_vmx(bsp_vmxon_region); 1470 } 1471 1472 static void do_vmxon_off(void *data) 1473 { 1474 vmx_on(); 1475 vmx_off(); 1476 } 1477 1478 static void do_write_feature_control(void *data) 1479 { 1480 wrmsr(MSR_IA32_FEATURE_CONTROL, 0); 1481 } 1482 1483 static int test_vmx_feature_control(void) 1484 { 1485 u64 ia32_feature_control; 1486 bool vmx_enabled; 1487 bool feature_control_locked; 1488 1489 ia32_feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL); 1490 vmx_enabled = 1491 ia32_feature_control & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; 1492 feature_control_locked = 1493 ia32_feature_control & FEATURE_CONTROL_LOCKED; 1494 1495 if (vmx_enabled && feature_control_locked) { 1496 printf("VMX enabled and locked by BIOS\n"); 1497 return 0; 1498 } else if (feature_control_locked) { 1499 printf("ERROR: VMX locked out by BIOS!?\n"); 1500 return 1; 1501 } 1502 1503 wrmsr(MSR_IA32_FEATURE_CONTROL, 0); 1504 report(test_for_exception(GP_VECTOR, &do_vmxon_off, NULL), 1505 "test vmxon with FEATURE_CONTROL cleared"); 1506 1507 wrmsr(MSR_IA32_FEATURE_CONTROL, FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX); 1508 report(test_for_exception(GP_VECTOR, &do_vmxon_off, NULL), 1509 "test vmxon without FEATURE_CONTROL lock"); 1510 1511 wrmsr(MSR_IA32_FEATURE_CONTROL, 1512 FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX | 1513 FEATURE_CONTROL_LOCKED); 1514 1515 ia32_feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL); 1516 vmx_enabled = 1517 ia32_feature_control & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; 1518 report(vmx_enabled, "test enable VMX in FEATURE_CONTROL"); 1519 1520 report(test_for_exception(GP_VECTOR, &do_write_feature_control, NULL), 1521 "test FEATURE_CONTROL lock bit"); 1522 1523 return !vmx_enabled; 1524 } 1525 1526 static int test_vmxon(void) 1527 { 1528 int ret, ret1; 1529 u64 *vmxon_region; 1530 int width = cpuid_maxphyaddr(); 1531 1532 /* Unaligned page access */ 1533 vmxon_region = (u64 *)((intptr_t)bsp_vmxon_region + 1); 1534 ret1 = _vmx_on(vmxon_region); 1535 report(ret1, "test vmxon with unaligned vmxon region"); 1536 if (!ret1) { 1537 ret = 1; 1538 goto out; 1539 } 1540 1541 /* gpa bits beyond physical address width are set*/ 1542 vmxon_region = (u64 *)((intptr_t)bsp_vmxon_region | ((u64)1 << (width+1))); 1543 ret1 = _vmx_on(vmxon_region); 1544 report(ret1, "test vmxon with bits set beyond physical address width"); 1545 if (!ret1) { 1546 ret = 1; 1547 goto out; 1548 } 1549 1550 /* invalid revision indentifier */ 1551 *bsp_vmxon_region = 0xba9da9; 1552 ret1 = vmx_on(); 1553 report(ret1, "test vmxon with invalid revision identifier"); 1554 if (!ret1) { 1555 ret = 1; 1556 goto out; 1557 } 1558 1559 /* and finally a valid region */ 1560 *bsp_vmxon_region = basic.revision; 1561 ret = vmx_on(); 1562 report(!ret, "test vmxon with valid vmxon region"); 1563 1564 out: 1565 return ret; 1566 } 1567 1568 static void test_vmptrld(void) 1569 { 1570 struct vmcs *vmcs, *tmp_root; 1571 int width = cpuid_maxphyaddr(); 1572 1573 vmcs = alloc_page(); 1574 vmcs->hdr.revision_id = basic.revision; 1575 1576 /* Unaligned page access */ 1577 tmp_root = (struct vmcs *)((intptr_t)vmcs + 1); 1578 report(make_vmcs_current(tmp_root) == 1, 1579 "test vmptrld with unaligned vmcs"); 1580 1581 /* gpa bits beyond physical address width are set*/ 1582 tmp_root = (struct vmcs *)((intptr_t)vmcs | 1583 ((u64)1 << (width+1))); 1584 report(make_vmcs_current(tmp_root) == 1, 1585 "test vmptrld with vmcs address bits set beyond physical address width"); 1586 1587 /* Pass VMXON region */ 1588 assert(!vmcs_clear(vmcs)); 1589 assert(!make_vmcs_current(vmcs)); 1590 tmp_root = (struct vmcs *)bsp_vmxon_region; 1591 report(make_vmcs_current(tmp_root) == 1, 1592 "test vmptrld with vmxon region"); 1593 report(vmcs_read(VMX_INST_ERROR) == VMXERR_VMPTRLD_VMXON_POINTER, 1594 "test vmptrld with vmxon region vm-instruction error"); 1595 1596 report(make_vmcs_current(vmcs) == 0, 1597 "test vmptrld with valid vmcs region"); 1598 } 1599 1600 static void test_vmptrst(void) 1601 { 1602 int ret; 1603 struct vmcs *vmcs1, *vmcs2; 1604 1605 vmcs1 = alloc_page(); 1606 init_vmcs(&vmcs1); 1607 ret = vmcs_save(&vmcs2); 1608 report((!ret) && (vmcs1 == vmcs2), "test vmptrst"); 1609 } 1610 1611 struct vmx_ctl_msr { 1612 const char *name; 1613 u32 index, true_index; 1614 u32 default1; 1615 } vmx_ctl_msr[] = { 1616 { "MSR_IA32_VMX_PINBASED_CTLS", MSR_IA32_VMX_PINBASED_CTLS, 1617 MSR_IA32_VMX_TRUE_PIN, 0x16 }, 1618 { "MSR_IA32_VMX_PROCBASED_CTLS", MSR_IA32_VMX_PROCBASED_CTLS, 1619 MSR_IA32_VMX_TRUE_PROC, 0x401e172 }, 1620 { "MSR_IA32_VMX_PROCBASED_CTLS2", MSR_IA32_VMX_PROCBASED_CTLS2, 1621 MSR_IA32_VMX_PROCBASED_CTLS2, 0 }, 1622 { "MSR_IA32_VMX_EXIT_CTLS", MSR_IA32_VMX_EXIT_CTLS, 1623 MSR_IA32_VMX_TRUE_EXIT, 0x36dff }, 1624 { "MSR_IA32_VMX_ENTRY_CTLS", MSR_IA32_VMX_ENTRY_CTLS, 1625 MSR_IA32_VMX_TRUE_ENTRY, 0x11ff }, 1626 }; 1627 1628 static void test_vmx_caps(void) 1629 { 1630 u64 val, default1, fixed0, fixed1; 1631 union vmx_ctrl_msr ctrl, true_ctrl; 1632 unsigned int n; 1633 bool ok; 1634 1635 printf("\nTest suite: VMX capability reporting\n"); 1636 1637 report((basic.revision & (1ul << 31)) == 0 && 1638 basic.size > 0 && basic.size <= 4096 && 1639 (basic.type == 0 || basic.type == 6) && 1640 basic.reserved1 == 0 && basic.reserved2 == 0, 1641 "MSR_IA32_VMX_BASIC"); 1642 1643 val = rdmsr(MSR_IA32_VMX_MISC); 1644 report((!(ctrl_cpu_rev[1].clr & CPU_URG) || val & (1ul << 5)) && 1645 ((val >> 16) & 0x1ff) <= 256 && 1646 (val & 0x80007e00) == 0, 1647 "MSR_IA32_VMX_MISC"); 1648 1649 for (n = 0; n < ARRAY_SIZE(vmx_ctl_msr); n++) { 1650 ctrl.val = rdmsr(vmx_ctl_msr[n].index); 1651 default1 = vmx_ctl_msr[n].default1; 1652 ok = (ctrl.set & default1) == default1; 1653 ok = ok && (ctrl.set & ~ctrl.clr) == 0; 1654 if (ok && basic.ctrl) { 1655 true_ctrl.val = rdmsr(vmx_ctl_msr[n].true_index); 1656 ok = ctrl.clr == true_ctrl.clr; 1657 ok = ok && ctrl.set == (true_ctrl.set | default1); 1658 } 1659 report(ok, "%s", vmx_ctl_msr[n].name); 1660 } 1661 1662 fixed0 = rdmsr(MSR_IA32_VMX_CR0_FIXED0); 1663 fixed1 = rdmsr(MSR_IA32_VMX_CR0_FIXED1); 1664 report(((fixed0 ^ fixed1) & ~fixed1) == 0, 1665 "MSR_IA32_VMX_IA32_VMX_CR0_FIXED0/1"); 1666 1667 fixed0 = rdmsr(MSR_IA32_VMX_CR4_FIXED0); 1668 fixed1 = rdmsr(MSR_IA32_VMX_CR4_FIXED1); 1669 report(((fixed0 ^ fixed1) & ~fixed1) == 0, 1670 "MSR_IA32_VMX_IA32_VMX_CR4_FIXED0/1"); 1671 1672 val = rdmsr(MSR_IA32_VMX_VMCS_ENUM); 1673 report((val & VMCS_FIELD_INDEX_MASK) >= 0x2a && 1674 (val & 0xfffffffffffffc01Ull) == 0, 1675 "MSR_IA32_VMX_VMCS_ENUM"); 1676 1677 fixed0 = -1ull; 1678 fixed0 &= ~(EPT_CAP_WT | 1679 EPT_CAP_PWL4 | 1680 EPT_CAP_PWL5 | 1681 EPT_CAP_UC | 1682 EPT_CAP_WB | 1683 EPT_CAP_2M_PAGE | 1684 EPT_CAP_1G_PAGE | 1685 EPT_CAP_INVEPT | 1686 EPT_CAP_AD_FLAG | 1687 EPT_CAP_ADV_EPT_INFO | 1688 EPT_CAP_INVEPT_SINGLE | 1689 EPT_CAP_INVEPT_ALL | 1690 VPID_CAP_INVVPID | 1691 VPID_CAP_INVVPID_ADDR | 1692 VPID_CAP_INVVPID_CXTGLB | 1693 VPID_CAP_INVVPID_ALL | 1694 VPID_CAP_INVVPID_CXTLOC); 1695 1696 val = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP); 1697 report((val & fixed0) == 0, 1698 "MSR_IA32_VMX_EPT_VPID_CAP"); 1699 } 1700 1701 /* This function can only be called in guest */ 1702 void __attribute__((__used__)) hypercall(u32 hypercall_no) 1703 { 1704 u64 val = 0; 1705 val = (hypercall_no & HYPERCALL_MASK) | HYPERCALL_BIT; 1706 hypercall_field = val; 1707 asm volatile("vmcall\n\t"); 1708 } 1709 1710 static bool is_hypercall(union exit_reason exit_reason) 1711 { 1712 return exit_reason.basic == VMX_VMCALL && 1713 (hypercall_field & HYPERCALL_BIT); 1714 } 1715 1716 static int handle_hypercall(void) 1717 { 1718 ulong hypercall_no; 1719 1720 hypercall_no = hypercall_field & HYPERCALL_MASK; 1721 hypercall_field = 0; 1722 switch (hypercall_no) { 1723 case HYPERCALL_VMEXIT: 1724 return VMX_TEST_VMEXIT; 1725 case HYPERCALL_VMABORT: 1726 return VMX_TEST_VMABORT; 1727 case HYPERCALL_VMSKIP: 1728 return VMX_TEST_VMSKIP; 1729 default: 1730 printf("ERROR : Invalid hypercall number : %ld\n", hypercall_no); 1731 } 1732 return VMX_TEST_EXIT; 1733 } 1734 1735 static void continue_abort(void) 1736 { 1737 assert(!in_guest); 1738 printf("Host was here when guest aborted:\n"); 1739 dump_stack(); 1740 longjmp(abort_target, 1); 1741 abort(); 1742 } 1743 1744 void __abort_test(void) 1745 { 1746 if (in_guest) 1747 hypercall(HYPERCALL_VMABORT); 1748 else 1749 longjmp(abort_target, 1); 1750 abort(); 1751 } 1752 1753 static void continue_skip(void) 1754 { 1755 assert(!in_guest); 1756 longjmp(abort_target, 1); 1757 abort(); 1758 } 1759 1760 void test_skip(const char *msg) 1761 { 1762 printf("%s skipping test: %s\n", in_guest ? "Guest" : "Host", msg); 1763 if (in_guest) 1764 hypercall(HYPERCALL_VMABORT); 1765 else 1766 longjmp(abort_target, 1); 1767 abort(); 1768 } 1769 1770 static int exit_handler(union exit_reason exit_reason) 1771 { 1772 int ret; 1773 1774 current->exits++; 1775 regs.rflags = vmcs_read(GUEST_RFLAGS); 1776 if (is_hypercall(exit_reason)) 1777 ret = handle_hypercall(); 1778 else 1779 ret = current->exit_handler(exit_reason); 1780 vmcs_write(GUEST_RFLAGS, regs.rflags); 1781 1782 return ret; 1783 } 1784 1785 /* 1786 * Tries to enter the guest, populates @result with VM-Fail, VM-Exit, entered, 1787 * etc... 1788 */ 1789 static void vmx_enter_guest(struct vmentry_result *result) 1790 { 1791 memset(result, 0, sizeof(*result)); 1792 1793 in_guest = 1; 1794 asm volatile ( 1795 "mov %[HOST_RSP], %%rdi\n\t" 1796 "vmwrite %%rsp, %%rdi\n\t" 1797 LOAD_GPR_C 1798 "cmpb $0, %[launched]\n\t" 1799 "jne 1f\n\t" 1800 "vmlaunch\n\t" 1801 "jmp 2f\n\t" 1802 "1: " 1803 "vmresume\n\t" 1804 "2: " 1805 SAVE_GPR_C 1806 "pushf\n\t" 1807 "pop %%rdi\n\t" 1808 "mov %%rdi, %[vm_fail_flags]\n\t" 1809 "movl $1, %[vm_fail]\n\t" 1810 "jmp 3f\n\t" 1811 "vmx_return:\n\t" 1812 SAVE_GPR_C 1813 "3: \n\t" 1814 : [vm_fail]"+m"(result->vm_fail), 1815 [vm_fail_flags]"=m"(result->flags) 1816 : [launched]"m"(launched), [HOST_RSP]"i"(HOST_RSP) 1817 : "rdi", "memory", "cc" 1818 ); 1819 in_guest = 0; 1820 1821 result->vmlaunch = !launched; 1822 result->instr = launched ? "vmresume" : "vmlaunch"; 1823 result->exit_reason.full = result->vm_fail ? 0xdead : 1824 vmcs_read(EXI_REASON); 1825 result->entered = !result->vm_fail && 1826 !result->exit_reason.failed_vmentry; 1827 } 1828 1829 static int vmx_run(void) 1830 { 1831 struct vmentry_result result; 1832 u32 ret; 1833 1834 while (1) { 1835 vmx_enter_guest(&result); 1836 if (result.entered) { 1837 /* 1838 * VMCS isn't in "launched" state if there's been any 1839 * entry failure (early or otherwise). 1840 */ 1841 launched = 1; 1842 ret = exit_handler(result.exit_reason); 1843 } else if (current->entry_failure_handler) { 1844 ret = current->entry_failure_handler(&result); 1845 } else { 1846 ret = VMX_TEST_EXIT; 1847 } 1848 1849 switch (ret) { 1850 case VMX_TEST_RESUME: 1851 continue; 1852 case VMX_TEST_VMEXIT: 1853 guest_finished = 1; 1854 return 0; 1855 case VMX_TEST_EXIT: 1856 break; 1857 default: 1858 printf("ERROR : Invalid %s_handler return val %d.\n", 1859 result.entered ? "exit" : "entry_failure", 1860 ret); 1861 break; 1862 } 1863 1864 if (result.entered) 1865 print_vmexit_info(result.exit_reason); 1866 else 1867 print_vmentry_failure_info(&result); 1868 abort(); 1869 } 1870 } 1871 1872 static void run_teardown_step(struct test_teardown_step *step) 1873 { 1874 step->func(step->data); 1875 } 1876 1877 static int test_run(struct vmx_test *test) 1878 { 1879 int r; 1880 1881 /* Validate V2 interface. */ 1882 if (test->v2) { 1883 int ret = 0; 1884 if (test->init || test->guest_main || test->exit_handler || 1885 test->syscall_handler) { 1886 report(0, "V2 test cannot specify V1 callbacks."); 1887 ret = 1; 1888 } 1889 if (ret) 1890 return ret; 1891 } 1892 1893 if (test->name == NULL) 1894 test->name = "(no name)"; 1895 if (vmx_on()) { 1896 printf("%s : vmxon failed.\n", __func__); 1897 return 1; 1898 } 1899 1900 init_vmcs(&(test->vmcs)); 1901 /* Directly call test->init is ok here, init_vmcs has done 1902 vmcs init, vmclear and vmptrld*/ 1903 if (test->init && test->init(test->vmcs) != VMX_TEST_START) 1904 goto out; 1905 teardown_count = 0; 1906 v2_guest_main = NULL; 1907 test->exits = 0; 1908 current = test; 1909 regs = test->guest_regs; 1910 vmcs_write(GUEST_RFLAGS, regs.rflags | X86_EFLAGS_FIXED); 1911 launched = 0; 1912 guest_finished = 0; 1913 printf("\nTest suite: %s\n", test->name); 1914 1915 r = setjmp(abort_target); 1916 if (r) { 1917 assert(!in_guest); 1918 goto out; 1919 } 1920 1921 1922 if (test->v2) 1923 test->v2(); 1924 else 1925 vmx_run(); 1926 1927 while (teardown_count > 0) 1928 run_teardown_step(&teardown_steps[--teardown_count]); 1929 1930 if (launched && !guest_finished) 1931 report(0, "Guest didn't run to completion."); 1932 1933 out: 1934 if (vmx_off()) { 1935 printf("%s : vmxoff failed.\n", __func__); 1936 return 1; 1937 } 1938 return 0; 1939 } 1940 1941 /* 1942 * Add a teardown step. Executed after the test's main function returns. 1943 * Teardown steps executed in reverse order. 1944 */ 1945 void test_add_teardown(test_teardown_func func, void *data) 1946 { 1947 struct test_teardown_step *step; 1948 1949 TEST_ASSERT_MSG(teardown_count < MAX_TEST_TEARDOWN_STEPS, 1950 "There are already %d teardown steps.", 1951 teardown_count); 1952 step = &teardown_steps[teardown_count++]; 1953 step->func = func; 1954 step->data = data; 1955 } 1956 1957 /* 1958 * Set the target of the first enter_guest call. Can only be called once per 1959 * test. Must be called before first enter_guest call. 1960 */ 1961 void test_set_guest(test_guest_func func) 1962 { 1963 assert(current->v2); 1964 TEST_ASSERT_MSG(!v2_guest_main, "Already set guest func."); 1965 v2_guest_main = func; 1966 } 1967 1968 static void check_for_guest_termination(union exit_reason exit_reason) 1969 { 1970 if (is_hypercall(exit_reason)) { 1971 int ret; 1972 1973 ret = handle_hypercall(); 1974 switch (ret) { 1975 case VMX_TEST_VMEXIT: 1976 guest_finished = 1; 1977 break; 1978 case VMX_TEST_VMABORT: 1979 continue_abort(); 1980 break; 1981 case VMX_TEST_VMSKIP: 1982 continue_skip(); 1983 break; 1984 default: 1985 printf("ERROR : Invalid handle_hypercall return %d.\n", 1986 ret); 1987 abort(); 1988 } 1989 } 1990 } 1991 1992 /* 1993 * Enters the guest (or launches it for the first time). Error to call once the 1994 * guest has returned (i.e., run past the end of its guest() function). 1995 */ 1996 void __enter_guest(u8 abort_flag, struct vmentry_result *result) 1997 { 1998 TEST_ASSERT_MSG(v2_guest_main, 1999 "Never called test_set_guest_func!"); 2000 2001 TEST_ASSERT_MSG(!guest_finished, 2002 "Called enter_guest() after guest returned."); 2003 2004 vmx_enter_guest(result); 2005 2006 if (result->vm_fail) { 2007 if (abort_flag & ABORT_ON_EARLY_VMENTRY_FAIL) 2008 goto do_abort; 2009 return; 2010 } 2011 if (result->exit_reason.failed_vmentry) { 2012 if ((abort_flag & ABORT_ON_INVALID_GUEST_STATE) || 2013 result->exit_reason.basic != VMX_FAIL_STATE) 2014 goto do_abort; 2015 return; 2016 } 2017 2018 launched = 1; 2019 check_for_guest_termination(result->exit_reason); 2020 return; 2021 2022 do_abort: 2023 print_vmentry_failure_info(result); 2024 abort(); 2025 } 2026 2027 void enter_guest_with_bad_controls(void) 2028 { 2029 struct vmentry_result result; 2030 2031 TEST_ASSERT_MSG(v2_guest_main, 2032 "Never called test_set_guest_func!"); 2033 2034 TEST_ASSERT_MSG(!guest_finished, 2035 "Called enter_guest() after guest returned."); 2036 2037 __enter_guest(ABORT_ON_INVALID_GUEST_STATE, &result); 2038 report(result.vm_fail, "VM-Fail occurred as expected"); 2039 report((result.flags & VMX_ENTRY_FLAGS) == X86_EFLAGS_ZF, 2040 "FLAGS set correctly on VM-Fail"); 2041 report(vmcs_read(VMX_INST_ERROR) == VMXERR_ENTRY_INVALID_CONTROL_FIELD, 2042 "VM-Inst Error # is %d (VM entry with invalid control field(s))", 2043 VMXERR_ENTRY_INVALID_CONTROL_FIELD); 2044 } 2045 2046 void enter_guest(void) 2047 { 2048 struct vmentry_result result; 2049 2050 __enter_guest(ABORT_ON_EARLY_VMENTRY_FAIL | 2051 ABORT_ON_INVALID_GUEST_STATE, &result); 2052 } 2053 2054 extern struct vmx_test vmx_tests[]; 2055 2056 static bool 2057 test_wanted(const char *name, const char *filters[], int filter_count) 2058 { 2059 int i; 2060 bool positive = false; 2061 bool match = false; 2062 char clean_name[strlen(name) + 1]; 2063 char *c; 2064 const char *n; 2065 2066 printf("filter = %s, test = %s\n", filters[0], name); 2067 2068 /* Replace spaces with underscores. */ 2069 n = name; 2070 c = &clean_name[0]; 2071 do *c++ = (*n == ' ') ? '_' : *n; 2072 while (*n++); 2073 2074 for (i = 0; i < filter_count; i++) { 2075 const char *filter = filters[i]; 2076 2077 if (filter[0] == '-') { 2078 if (simple_glob(clean_name, filter + 1)) 2079 return false; 2080 } else { 2081 positive = true; 2082 match |= simple_glob(clean_name, filter); 2083 } 2084 } 2085 2086 if (!positive || match) { 2087 matched++; 2088 return true; 2089 } else { 2090 return false; 2091 } 2092 } 2093 2094 int main(int argc, const char *argv[]) 2095 { 2096 int i = 0; 2097 2098 setup_vm(); 2099 hypercall_field = 0; 2100 2101 /* We want xAPIC mode to test MMIO passthrough from L1 (us) to L2. */ 2102 smp_reset_apic(); 2103 2104 argv++; 2105 argc--; 2106 2107 if (!this_cpu_has(X86_FEATURE_VMX)) { 2108 printf("WARNING: vmx not supported, add '-cpu host'\n"); 2109 goto exit; 2110 } 2111 init_bsp_vmx(); 2112 if (test_wanted("test_vmx_feature_control", argv, argc)) { 2113 /* Sets MSR_IA32_FEATURE_CONTROL to 0x5 */ 2114 if (test_vmx_feature_control() != 0) 2115 goto exit; 2116 } else { 2117 enable_vmx(); 2118 } 2119 2120 if (test_wanted("test_vmxon", argv, argc)) { 2121 /* Enables VMX */ 2122 if (test_vmxon() != 0) 2123 goto exit; 2124 } else { 2125 if (vmx_on()) { 2126 report(0, "vmxon"); 2127 goto exit; 2128 } 2129 } 2130 2131 if (test_wanted("test_vmptrld", argv, argc)) 2132 test_vmptrld(); 2133 if (test_wanted("test_vmclear", argv, argc)) 2134 test_vmclear(); 2135 if (test_wanted("test_vmptrst", argv, argc)) 2136 test_vmptrst(); 2137 if (test_wanted("test_vmwrite_vmread", argv, argc)) 2138 test_vmwrite_vmread(); 2139 if (test_wanted("test_vmcs_high", argv, argc)) 2140 test_vmcs_high(); 2141 if (test_wanted("test_vmcs_lifecycle", argv, argc)) 2142 test_vmcs_lifecycle(); 2143 if (test_wanted("test_vmx_caps", argv, argc)) 2144 test_vmx_caps(); 2145 if (test_wanted("test_vmread_flags_touch", argv, argc)) 2146 test_vmread_flags_touch(); 2147 if (test_wanted("test_vmwrite_flags_touch", argv, argc)) 2148 test_vmwrite_flags_touch(); 2149 2150 /* Balance vmxon from test_vmxon. */ 2151 vmx_off(); 2152 2153 for (; vmx_tests[i].name != NULL; i++) { 2154 if (!test_wanted(vmx_tests[i].name, argv, argc)) 2155 continue; 2156 if (test_run(&vmx_tests[i])) 2157 goto exit; 2158 } 2159 2160 if (!matched) 2161 report(matched, "command line didn't match any tests!"); 2162 2163 exit: 2164 return report_summary(); 2165 } 2166