1 /* 2 * x86/vmx.c : Framework for testing nested virtualization 3 * This is a framework to test nested VMX for KVM, which 4 * started as a project of GSoC 2013. All test cases should 5 * be located in x86/vmx_tests.c and framework related 6 * functions should be in this file. 7 * 8 * How to write test cases? 9 * Add callbacks of test suite in variant "vmx_tests". You can 10 * write: 11 * 1. init function used for initializing test suite 12 * 2. main function for codes running in L2 guest, 13 * 3. exit_handler to handle vmexit of L2 to L1 14 * 4. syscall handler to handle L2 syscall vmexit 15 * 5. vmenter fail handler to handle direct failure of vmenter 16 * 6. guest_regs is loaded when vmenter and saved when 17 * vmexit, you can read and set it in exit_handler 18 * If no special function is needed for a test suite, use 19 * coressponding basic_* functions as callback. More handlers 20 * can be added to "vmx_tests", see details of "struct vmx_test" 21 * and function test_run(). 22 * 23 * Currently, vmx test framework only set up one VCPU and one 24 * concurrent guest test environment with same paging for L2 and 25 * L1. For usage of EPT, only 1:1 mapped paging is used from VFN 26 * to PFN. 27 * 28 * Author : Arthur Chunqi Li <yzt356@gmail.com> 29 */ 30 31 #include "libcflat.h" 32 #include "processor.h" 33 #include "alloc_page.h" 34 #include "vm.h" 35 #include "vmalloc.h" 36 #include "desc.h" 37 #include "vmx.h" 38 #include "msr.h" 39 #include "smp.h" 40 #include "apic.h" 41 42 u64 *bsp_vmxon_region; 43 struct vmcs *vmcs_root; 44 u32 vpid_cnt; 45 u64 guest_stack_top, guest_syscall_stack_top; 46 u32 ctrl_pin, ctrl_enter, ctrl_exit, ctrl_cpu[2]; 47 struct regs regs; 48 49 struct vmx_test *current; 50 51 #define MAX_TEST_TEARDOWN_STEPS 10 52 53 struct test_teardown_step { 54 test_teardown_func func; 55 void *data; 56 }; 57 58 static int teardown_count; 59 static struct test_teardown_step teardown_steps[MAX_TEST_TEARDOWN_STEPS]; 60 61 static test_guest_func v2_guest_main; 62 63 u64 hypercall_field; 64 bool launched; 65 static int matched; 66 static int guest_finished; 67 static int in_guest; 68 69 union vmx_basic basic; 70 union vmx_ctrl_msr ctrl_pin_rev; 71 union vmx_ctrl_msr ctrl_cpu_rev[2]; 72 union vmx_ctrl_msr ctrl_exit_rev; 73 union vmx_ctrl_msr ctrl_enter_rev; 74 union vmx_ept_vpid ept_vpid; 75 76 extern struct descriptor_table_ptr gdt_descr; 77 extern struct descriptor_table_ptr idt_descr; 78 extern void *vmx_return; 79 extern void *entry_sysenter; 80 extern void *guest_entry; 81 82 static volatile u32 stage; 83 84 static jmp_buf abort_target; 85 86 struct vmcs_field { 87 u64 mask; 88 u64 encoding; 89 }; 90 91 #define MASK(_bits) GENMASK_ULL((_bits) - 1, 0) 92 #define MASK_NATURAL MASK(sizeof(unsigned long) * 8) 93 94 static struct vmcs_field vmcs_fields[] = { 95 { MASK(16), VPID }, 96 { MASK(16), PINV }, 97 { MASK(16), EPTP_IDX }, 98 99 { MASK(16), GUEST_SEL_ES }, 100 { MASK(16), GUEST_SEL_CS }, 101 { MASK(16), GUEST_SEL_SS }, 102 { MASK(16), GUEST_SEL_DS }, 103 { MASK(16), GUEST_SEL_FS }, 104 { MASK(16), GUEST_SEL_GS }, 105 { MASK(16), GUEST_SEL_LDTR }, 106 { MASK(16), GUEST_SEL_TR }, 107 { MASK(16), GUEST_INT_STATUS }, 108 109 { MASK(16), HOST_SEL_ES }, 110 { MASK(16), HOST_SEL_CS }, 111 { MASK(16), HOST_SEL_SS }, 112 { MASK(16), HOST_SEL_DS }, 113 { MASK(16), HOST_SEL_FS }, 114 { MASK(16), HOST_SEL_GS }, 115 { MASK(16), HOST_SEL_TR }, 116 117 { MASK(64), IO_BITMAP_A }, 118 { MASK(64), IO_BITMAP_B }, 119 { MASK(64), MSR_BITMAP }, 120 { MASK(64), EXIT_MSR_ST_ADDR }, 121 { MASK(64), EXIT_MSR_LD_ADDR }, 122 { MASK(64), ENTER_MSR_LD_ADDR }, 123 { MASK(64), VMCS_EXEC_PTR }, 124 { MASK(64), TSC_OFFSET }, 125 { MASK(64), APIC_VIRT_ADDR }, 126 { MASK(64), APIC_ACCS_ADDR }, 127 { MASK(64), EPTP }, 128 129 { MASK(64), INFO_PHYS_ADDR }, 130 131 { MASK(64), VMCS_LINK_PTR }, 132 { MASK(64), GUEST_DEBUGCTL }, 133 { MASK(64), GUEST_EFER }, 134 { MASK(64), GUEST_PAT }, 135 { MASK(64), GUEST_PERF_GLOBAL_CTRL }, 136 { MASK(64), GUEST_PDPTE }, 137 138 { MASK(64), HOST_PAT }, 139 { MASK(64), HOST_EFER }, 140 { MASK(64), HOST_PERF_GLOBAL_CTRL }, 141 142 { MASK(32), PIN_CONTROLS }, 143 { MASK(32), CPU_EXEC_CTRL0 }, 144 { MASK(32), EXC_BITMAP }, 145 { MASK(32), PF_ERROR_MASK }, 146 { MASK(32), PF_ERROR_MATCH }, 147 { MASK(32), CR3_TARGET_COUNT }, 148 { MASK(32), EXI_CONTROLS }, 149 { MASK(32), EXI_MSR_ST_CNT }, 150 { MASK(32), EXI_MSR_LD_CNT }, 151 { MASK(32), ENT_CONTROLS }, 152 { MASK(32), ENT_MSR_LD_CNT }, 153 { MASK(32), ENT_INTR_INFO }, 154 { MASK(32), ENT_INTR_ERROR }, 155 { MASK(32), ENT_INST_LEN }, 156 { MASK(32), TPR_THRESHOLD }, 157 { MASK(32), CPU_EXEC_CTRL1 }, 158 159 { MASK(32), VMX_INST_ERROR }, 160 { MASK(32), EXI_REASON }, 161 { MASK(32), EXI_INTR_INFO }, 162 { MASK(32), EXI_INTR_ERROR }, 163 { MASK(32), IDT_VECT_INFO }, 164 { MASK(32), IDT_VECT_ERROR }, 165 { MASK(32), EXI_INST_LEN }, 166 { MASK(32), EXI_INST_INFO }, 167 168 { MASK(32), GUEST_LIMIT_ES }, 169 { MASK(32), GUEST_LIMIT_CS }, 170 { MASK(32), GUEST_LIMIT_SS }, 171 { MASK(32), GUEST_LIMIT_DS }, 172 { MASK(32), GUEST_LIMIT_FS }, 173 { MASK(32), GUEST_LIMIT_GS }, 174 { MASK(32), GUEST_LIMIT_LDTR }, 175 { MASK(32), GUEST_LIMIT_TR }, 176 { MASK(32), GUEST_LIMIT_GDTR }, 177 { MASK(32), GUEST_LIMIT_IDTR }, 178 { 0x1d0ff, GUEST_AR_ES }, 179 { 0x1f0ff, GUEST_AR_CS }, 180 { 0x1d0ff, GUEST_AR_SS }, 181 { 0x1d0ff, GUEST_AR_DS }, 182 { 0x1d0ff, GUEST_AR_FS }, 183 { 0x1d0ff, GUEST_AR_GS }, 184 { 0x1d0ff, GUEST_AR_LDTR }, 185 { 0x1d0ff, GUEST_AR_TR }, 186 { MASK(32), GUEST_INTR_STATE }, 187 { MASK(32), GUEST_ACTV_STATE }, 188 { MASK(32), GUEST_SMBASE }, 189 { MASK(32), GUEST_SYSENTER_CS }, 190 { MASK(32), PREEMPT_TIMER_VALUE }, 191 192 { MASK(32), HOST_SYSENTER_CS }, 193 194 { MASK_NATURAL, CR0_MASK }, 195 { MASK_NATURAL, CR4_MASK }, 196 { MASK_NATURAL, CR0_READ_SHADOW }, 197 { MASK_NATURAL, CR4_READ_SHADOW }, 198 { MASK_NATURAL, CR3_TARGET_0 }, 199 { MASK_NATURAL, CR3_TARGET_1 }, 200 { MASK_NATURAL, CR3_TARGET_2 }, 201 { MASK_NATURAL, CR3_TARGET_3 }, 202 203 { MASK_NATURAL, EXI_QUALIFICATION }, 204 { MASK_NATURAL, IO_RCX }, 205 { MASK_NATURAL, IO_RSI }, 206 { MASK_NATURAL, IO_RDI }, 207 { MASK_NATURAL, IO_RIP }, 208 { MASK_NATURAL, GUEST_LINEAR_ADDRESS }, 209 210 { MASK_NATURAL, GUEST_CR0 }, 211 { MASK_NATURAL, GUEST_CR3 }, 212 { MASK_NATURAL, GUEST_CR4 }, 213 { MASK_NATURAL, GUEST_BASE_ES }, 214 { MASK_NATURAL, GUEST_BASE_CS }, 215 { MASK_NATURAL, GUEST_BASE_SS }, 216 { MASK_NATURAL, GUEST_BASE_DS }, 217 { MASK_NATURAL, GUEST_BASE_FS }, 218 { MASK_NATURAL, GUEST_BASE_GS }, 219 { MASK_NATURAL, GUEST_BASE_LDTR }, 220 { MASK_NATURAL, GUEST_BASE_TR }, 221 { MASK_NATURAL, GUEST_BASE_GDTR }, 222 { MASK_NATURAL, GUEST_BASE_IDTR }, 223 { MASK_NATURAL, GUEST_DR7 }, 224 { MASK_NATURAL, GUEST_RSP }, 225 { MASK_NATURAL, GUEST_RIP }, 226 { MASK_NATURAL, GUEST_RFLAGS }, 227 { MASK_NATURAL, GUEST_PENDING_DEBUG }, 228 { MASK_NATURAL, GUEST_SYSENTER_ESP }, 229 { MASK_NATURAL, GUEST_SYSENTER_EIP }, 230 231 { MASK_NATURAL, HOST_CR0 }, 232 { MASK_NATURAL, HOST_CR3 }, 233 { MASK_NATURAL, HOST_CR4 }, 234 { MASK_NATURAL, HOST_BASE_FS }, 235 { MASK_NATURAL, HOST_BASE_GS }, 236 { MASK_NATURAL, HOST_BASE_TR }, 237 { MASK_NATURAL, HOST_BASE_GDTR }, 238 { MASK_NATURAL, HOST_BASE_IDTR }, 239 { MASK_NATURAL, HOST_SYSENTER_ESP }, 240 { MASK_NATURAL, HOST_SYSENTER_EIP }, 241 { MASK_NATURAL, HOST_RSP }, 242 { MASK_NATURAL, HOST_RIP }, 243 }; 244 245 enum vmcs_field_type { 246 VMCS_FIELD_TYPE_CONTROL = 0, 247 VMCS_FIELD_TYPE_READ_ONLY_DATA = 1, 248 VMCS_FIELD_TYPE_GUEST = 2, 249 VMCS_FIELD_TYPE_HOST = 3, 250 VMCS_FIELD_TYPES, 251 }; 252 253 static inline int vmcs_field_type(struct vmcs_field *f) 254 { 255 return (f->encoding >> VMCS_FIELD_TYPE_SHIFT) & 0x3; 256 } 257 258 static int vmcs_field_readonly(struct vmcs_field *f) 259 { 260 u64 ia32_vmx_misc; 261 262 ia32_vmx_misc = rdmsr(MSR_IA32_VMX_MISC); 263 return !(ia32_vmx_misc & MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS) && 264 (vmcs_field_type(f) == VMCS_FIELD_TYPE_READ_ONLY_DATA); 265 } 266 267 static inline u64 vmcs_field_value(struct vmcs_field *f, u8 cookie) 268 { 269 u64 value; 270 271 /* Incorporate the cookie and the field encoding into the value. */ 272 value = cookie; 273 value |= (f->encoding << 8); 274 value |= 0xdeadbeefull << 32; 275 276 return value & f->mask; 277 } 278 279 static void set_vmcs_field(struct vmcs_field *f, u8 cookie) 280 { 281 vmcs_write(f->encoding, vmcs_field_value(f, cookie)); 282 } 283 284 static bool check_vmcs_field(struct vmcs_field *f, u8 cookie) 285 { 286 u64 expected; 287 u64 actual; 288 int ret; 289 290 if (f->encoding == VMX_INST_ERROR) { 291 printf("Skipping volatile field %lx\n", f->encoding); 292 return true; 293 } 294 295 ret = vmcs_read_checking(f->encoding, &actual); 296 assert(!(ret & X86_EFLAGS_CF)); 297 /* Skip VMCS fields that aren't recognized by the CPU */ 298 if (ret & X86_EFLAGS_ZF) 299 return true; 300 301 if (vmcs_field_readonly(f)) { 302 printf("Skipping read-only field %lx\n", f->encoding); 303 return true; 304 } 305 306 expected = vmcs_field_value(f, cookie); 307 actual &= f->mask; 308 309 if (expected == actual) 310 return true; 311 312 printf("FAIL: VMWRITE/VMREAD %lx (expected: %lx, actual: %lx)\n", 313 f->encoding, (unsigned long) expected, (unsigned long) actual); 314 315 return false; 316 } 317 318 static void set_all_vmcs_fields(u8 cookie) 319 { 320 int i; 321 322 for (i = 0; i < ARRAY_SIZE(vmcs_fields); i++) 323 set_vmcs_field(&vmcs_fields[i], cookie); 324 } 325 326 static bool check_all_vmcs_fields(u8 cookie) 327 { 328 bool pass = true; 329 int i; 330 331 for (i = 0; i < ARRAY_SIZE(vmcs_fields); i++) { 332 if (!check_vmcs_field(&vmcs_fields[i], cookie)) 333 pass = false; 334 } 335 336 return pass; 337 } 338 339 static u32 find_vmcs_max_index(void) 340 { 341 u32 idx, width, type, enc; 342 u64 actual; 343 int ret; 344 345 /* scan backwards and stop when found */ 346 for (idx = (1 << 9) - 1; idx >= 0; idx--) { 347 348 /* try all combinations of width and type */ 349 for (type = 0; type < (1 << 2); type++) { 350 for (width = 0; width < (1 << 2) ; width++) { 351 enc = (idx << VMCS_FIELD_INDEX_SHIFT) | 352 (type << VMCS_FIELD_TYPE_SHIFT) | 353 (width << VMCS_FIELD_WIDTH_SHIFT); 354 355 ret = vmcs_read_checking(enc, &actual); 356 assert(!(ret & X86_EFLAGS_CF)); 357 if (!(ret & X86_EFLAGS_ZF)) 358 return idx; 359 } 360 } 361 } 362 /* some VMCS fields should exist */ 363 assert(0); 364 return 0; 365 } 366 367 static void test_vmwrite_vmread(void) 368 { 369 struct vmcs *vmcs = alloc_page(); 370 u32 vmcs_enum_max, max_index = 0; 371 372 vmcs->hdr.revision_id = basic.revision; 373 assert(!vmcs_clear(vmcs)); 374 assert(!make_vmcs_current(vmcs)); 375 376 set_all_vmcs_fields(0x42); 377 report(check_all_vmcs_fields(0x42), "VMWRITE/VMREAD"); 378 379 vmcs_enum_max = (rdmsr(MSR_IA32_VMX_VMCS_ENUM) & VMCS_FIELD_INDEX_MASK) 380 >> VMCS_FIELD_INDEX_SHIFT; 381 max_index = find_vmcs_max_index(); 382 report(vmcs_enum_max == max_index, 383 "VMX_VMCS_ENUM.MAX_INDEX expected: %x, actual: %x", 384 max_index, vmcs_enum_max); 385 386 assert(!vmcs_clear(vmcs)); 387 free_page(vmcs); 388 } 389 390 ulong finish_fault; 391 u8 sentinel; 392 bool handler_called; 393 394 static void pf_handler(struct ex_regs *regs) 395 { 396 /* 397 * check that RIP was not improperly advanced and that the 398 * flags value was preserved. 399 */ 400 report(regs->rip < finish_fault, "RIP has not been advanced!"); 401 report(((u8)regs->rflags == ((sentinel | 2) & 0xd7)), 402 "The low byte of RFLAGS was preserved!"); 403 regs->rip = finish_fault; 404 handler_called = true; 405 406 } 407 408 static void prep_flags_test_env(void **vpage, struct vmcs **vmcs, handler *old) 409 { 410 /* 411 * get an unbacked address that will cause a #PF 412 */ 413 *vpage = alloc_vpage(); 414 415 /* 416 * set up VMCS so we have something to read from 417 */ 418 *vmcs = alloc_page(); 419 420 memset(*vmcs, 0, PAGE_SIZE); 421 (*vmcs)->hdr.revision_id = basic.revision; 422 assert(!vmcs_clear(*vmcs)); 423 assert(!make_vmcs_current(*vmcs)); 424 425 *old = handle_exception(PF_VECTOR, &pf_handler); 426 } 427 428 static noinline void test_read_sentinel(void) 429 { 430 void *vpage; 431 struct vmcs *vmcs; 432 handler old; 433 434 prep_flags_test_env(&vpage, &vmcs, &old); 435 436 /* 437 * set the proper label 438 */ 439 extern char finish_read_fault; 440 441 finish_fault = (ulong)&finish_read_fault; 442 443 /* 444 * execute the vmread instruction that will cause a #PF 445 */ 446 handler_called = false; 447 asm volatile ("movb %[byte], %%ah\n\t" 448 "sahf\n\t" 449 "vmread %[enc], %[val]; finish_read_fault:" 450 : [val] "=m" (*(u64 *)vpage) 451 : [byte] "Krm" (sentinel), 452 [enc] "r" ((u64)GUEST_SEL_SS) 453 : "cc", "ah"); 454 report(handler_called, "The #PF handler was invoked"); 455 456 /* 457 * restore the old #PF handler 458 */ 459 handle_exception(PF_VECTOR, old); 460 } 461 462 static void test_vmread_flags_touch(void) 463 { 464 /* 465 * set up the sentinel value in the flags register. we 466 * choose these two values because they candy-stripe 467 * the 5 flags that sahf sets. 468 */ 469 sentinel = 0x91; 470 test_read_sentinel(); 471 472 sentinel = 0x45; 473 test_read_sentinel(); 474 } 475 476 static noinline void test_write_sentinel(void) 477 { 478 void *vpage; 479 struct vmcs *vmcs; 480 handler old; 481 482 prep_flags_test_env(&vpage, &vmcs, &old); 483 484 /* 485 * set the proper label 486 */ 487 extern char finish_write_fault; 488 489 finish_fault = (ulong)&finish_write_fault; 490 491 /* 492 * execute the vmwrite instruction that will cause a #PF 493 */ 494 handler_called = false; 495 asm volatile ("movb %[byte], %%ah\n\t" 496 "sahf\n\t" 497 "vmwrite %[val], %[enc]; finish_write_fault:" 498 : [val] "=m" (*(u64 *)vpage) 499 : [byte] "Krm" (sentinel), 500 [enc] "r" ((u64)GUEST_SEL_SS) 501 : "cc", "ah"); 502 report(handler_called, "The #PF handler was invoked"); 503 504 /* 505 * restore the old #PF handler 506 */ 507 handle_exception(PF_VECTOR, old); 508 } 509 510 static void test_vmwrite_flags_touch(void) 511 { 512 /* 513 * set up the sentinel value in the flags register. we 514 * choose these two values because they candy-stripe 515 * the 5 flags that sahf sets. 516 */ 517 sentinel = 0x91; 518 test_write_sentinel(); 519 520 sentinel = 0x45; 521 test_write_sentinel(); 522 } 523 524 525 static void test_vmcs_high(void) 526 { 527 struct vmcs *vmcs = alloc_page(); 528 529 vmcs->hdr.revision_id = basic.revision; 530 assert(!vmcs_clear(vmcs)); 531 assert(!make_vmcs_current(vmcs)); 532 533 vmcs_write(TSC_OFFSET, 0x0123456789ABCDEFull); 534 report(vmcs_read(TSC_OFFSET) == 0x0123456789ABCDEFull, 535 "VMREAD TSC_OFFSET after VMWRITE TSC_OFFSET"); 536 report(vmcs_read(TSC_OFFSET_HI) == 0x01234567ull, 537 "VMREAD TSC_OFFSET_HI after VMWRITE TSC_OFFSET"); 538 vmcs_write(TSC_OFFSET_HI, 0x76543210ul); 539 report(vmcs_read(TSC_OFFSET_HI) == 0x76543210ul, 540 "VMREAD TSC_OFFSET_HI after VMWRITE TSC_OFFSET_HI"); 541 report(vmcs_read(TSC_OFFSET) == 0x7654321089ABCDEFull, 542 "VMREAD TSC_OFFSET after VMWRITE TSC_OFFSET_HI"); 543 544 assert(!vmcs_clear(vmcs)); 545 free_page(vmcs); 546 } 547 548 static void test_vmcs_lifecycle(void) 549 { 550 struct vmcs *vmcs[2] = {}; 551 int i; 552 553 for (i = 0; i < ARRAY_SIZE(vmcs); i++) { 554 vmcs[i] = alloc_page(); 555 vmcs[i]->hdr.revision_id = basic.revision; 556 } 557 558 #define VMPTRLD(_i) do { \ 559 assert(_i < ARRAY_SIZE(vmcs)); \ 560 assert(!make_vmcs_current(vmcs[_i])); \ 561 printf("VMPTRLD VMCS%d\n", (_i)); \ 562 } while (0) 563 564 #define VMCLEAR(_i) do { \ 565 assert(_i < ARRAY_SIZE(vmcs)); \ 566 assert(!vmcs_clear(vmcs[_i])); \ 567 printf("VMCLEAR VMCS%d\n", (_i)); \ 568 } while (0) 569 570 VMCLEAR(0); 571 VMPTRLD(0); 572 set_all_vmcs_fields(0); 573 report(check_all_vmcs_fields(0), "current:VMCS0 active:[VMCS0]"); 574 575 VMCLEAR(0); 576 VMPTRLD(0); 577 report(check_all_vmcs_fields(0), "current:VMCS0 active:[VMCS0]"); 578 579 VMCLEAR(1); 580 report(check_all_vmcs_fields(0), "current:VMCS0 active:[VMCS0]"); 581 582 VMPTRLD(1); 583 set_all_vmcs_fields(1); 584 report(check_all_vmcs_fields(1), "current:VMCS1 active:[VMCS0,VCMS1]"); 585 586 VMPTRLD(0); 587 report(check_all_vmcs_fields(0), "current:VMCS0 active:[VMCS0,VCMS1]"); 588 VMPTRLD(1); 589 report(check_all_vmcs_fields(1), "current:VMCS1 active:[VMCS0,VCMS1]"); 590 VMPTRLD(1); 591 report(check_all_vmcs_fields(1), "current:VMCS1 active:[VMCS0,VCMS1]"); 592 593 VMCLEAR(0); 594 report(check_all_vmcs_fields(1), "current:VMCS1 active:[VCMS1]"); 595 596 /* VMPTRLD should not erase VMWRITEs to the current VMCS */ 597 set_all_vmcs_fields(2); 598 VMPTRLD(1); 599 report(check_all_vmcs_fields(2), "current:VMCS1 active:[VCMS1]"); 600 601 for (i = 0; i < ARRAY_SIZE(vmcs); i++) { 602 VMCLEAR(i); 603 free_page(vmcs[i]); 604 } 605 606 #undef VMPTRLD 607 #undef VMCLEAR 608 } 609 610 void vmx_set_test_stage(u32 s) 611 { 612 barrier(); 613 stage = s; 614 barrier(); 615 } 616 617 u32 vmx_get_test_stage(void) 618 { 619 u32 s; 620 621 barrier(); 622 s = stage; 623 barrier(); 624 return s; 625 } 626 627 void vmx_inc_test_stage(void) 628 { 629 barrier(); 630 stage++; 631 barrier(); 632 } 633 634 /* entry_sysenter */ 635 asm( 636 ".align 4, 0x90\n\t" 637 ".globl entry_sysenter\n\t" 638 "entry_sysenter:\n\t" 639 SAVE_GPR 640 " and $0xf, %rax\n\t" 641 " mov %rax, %rdi\n\t" 642 " call syscall_handler\n\t" 643 LOAD_GPR 644 " vmresume\n\t" 645 ); 646 647 static void __attribute__((__used__)) syscall_handler(u64 syscall_no) 648 { 649 if (current->syscall_handler) 650 current->syscall_handler(syscall_no); 651 } 652 653 static const char * const exit_reason_descriptions[] = { 654 [VMX_EXC_NMI] = "VMX_EXC_NMI", 655 [VMX_EXTINT] = "VMX_EXTINT", 656 [VMX_TRIPLE_FAULT] = "VMX_TRIPLE_FAULT", 657 [VMX_INIT] = "VMX_INIT", 658 [VMX_SIPI] = "VMX_SIPI", 659 [VMX_SMI_IO] = "VMX_SMI_IO", 660 [VMX_SMI_OTHER] = "VMX_SMI_OTHER", 661 [VMX_INTR_WINDOW] = "VMX_INTR_WINDOW", 662 [VMX_NMI_WINDOW] = "VMX_NMI_WINDOW", 663 [VMX_TASK_SWITCH] = "VMX_TASK_SWITCH", 664 [VMX_CPUID] = "VMX_CPUID", 665 [VMX_GETSEC] = "VMX_GETSEC", 666 [VMX_HLT] = "VMX_HLT", 667 [VMX_INVD] = "VMX_INVD", 668 [VMX_INVLPG] = "VMX_INVLPG", 669 [VMX_RDPMC] = "VMX_RDPMC", 670 [VMX_RDTSC] = "VMX_RDTSC", 671 [VMX_RSM] = "VMX_RSM", 672 [VMX_VMCALL] = "VMX_VMCALL", 673 [VMX_VMCLEAR] = "VMX_VMCLEAR", 674 [VMX_VMLAUNCH] = "VMX_VMLAUNCH", 675 [VMX_VMPTRLD] = "VMX_VMPTRLD", 676 [VMX_VMPTRST] = "VMX_VMPTRST", 677 [VMX_VMREAD] = "VMX_VMREAD", 678 [VMX_VMRESUME] = "VMX_VMRESUME", 679 [VMX_VMWRITE] = "VMX_VMWRITE", 680 [VMX_VMXOFF] = "VMX_VMXOFF", 681 [VMX_VMXON] = "VMX_VMXON", 682 [VMX_CR] = "VMX_CR", 683 [VMX_DR] = "VMX_DR", 684 [VMX_IO] = "VMX_IO", 685 [VMX_RDMSR] = "VMX_RDMSR", 686 [VMX_WRMSR] = "VMX_WRMSR", 687 [VMX_FAIL_STATE] = "VMX_FAIL_STATE", 688 [VMX_FAIL_MSR] = "VMX_FAIL_MSR", 689 [VMX_MWAIT] = "VMX_MWAIT", 690 [VMX_MTF] = "VMX_MTF", 691 [VMX_MONITOR] = "VMX_MONITOR", 692 [VMX_PAUSE] = "VMX_PAUSE", 693 [VMX_FAIL_MCHECK] = "VMX_FAIL_MCHECK", 694 [VMX_TPR_THRESHOLD] = "VMX_TPR_THRESHOLD", 695 [VMX_APIC_ACCESS] = "VMX_APIC_ACCESS", 696 [VMX_EOI_INDUCED] = "VMX_EOI_INDUCED", 697 [VMX_GDTR_IDTR] = "VMX_GDTR_IDTR", 698 [VMX_LDTR_TR] = "VMX_LDTR_TR", 699 [VMX_EPT_VIOLATION] = "VMX_EPT_VIOLATION", 700 [VMX_EPT_MISCONFIG] = "VMX_EPT_MISCONFIG", 701 [VMX_INVEPT] = "VMX_INVEPT", 702 [VMX_PREEMPT] = "VMX_PREEMPT", 703 [VMX_INVVPID] = "VMX_INVVPID", 704 [VMX_WBINVD] = "VMX_WBINVD", 705 [VMX_XSETBV] = "VMX_XSETBV", 706 [VMX_APIC_WRITE] = "VMX_APIC_WRITE", 707 [VMX_RDRAND] = "VMX_RDRAND", 708 [VMX_INVPCID] = "VMX_INVPCID", 709 [VMX_VMFUNC] = "VMX_VMFUNC", 710 [VMX_RDSEED] = "VMX_RDSEED", 711 [VMX_PML_FULL] = "VMX_PML_FULL", 712 [VMX_XSAVES] = "VMX_XSAVES", 713 [VMX_XRSTORS] = "VMX_XRSTORS", 714 }; 715 716 const char *exit_reason_description(u64 reason) 717 { 718 if (reason >= ARRAY_SIZE(exit_reason_descriptions)) 719 return "(unknown)"; 720 return exit_reason_descriptions[reason] ? : "(unused)"; 721 } 722 723 void print_vmexit_info(union exit_reason exit_reason) 724 { 725 u64 guest_rip, guest_rsp; 726 ulong exit_qual = vmcs_read(EXI_QUALIFICATION); 727 guest_rip = vmcs_read(GUEST_RIP); 728 guest_rsp = vmcs_read(GUEST_RSP); 729 printf("VMEXIT info:\n"); 730 printf("\tvmexit reason = %u\n", exit_reason.basic); 731 printf("\tfailed vmentry = %u\n", !!exit_reason.failed_vmentry); 732 printf("\texit qualification = %#lx\n", exit_qual); 733 printf("\tguest_rip = %#lx\n", guest_rip); 734 printf("\tRAX=%#lx RBX=%#lx RCX=%#lx RDX=%#lx\n", 735 regs.rax, regs.rbx, regs.rcx, regs.rdx); 736 printf("\tRSP=%#lx RBP=%#lx RSI=%#lx RDI=%#lx\n", 737 guest_rsp, regs.rbp, regs.rsi, regs.rdi); 738 printf("\tR8 =%#lx R9 =%#lx R10=%#lx R11=%#lx\n", 739 regs.r8, regs.r9, regs.r10, regs.r11); 740 printf("\tR12=%#lx R13=%#lx R14=%#lx R15=%#lx\n", 741 regs.r12, regs.r13, regs.r14, regs.r15); 742 } 743 744 void print_vmentry_failure_info(struct vmentry_result *result) 745 { 746 if (result->entered) 747 return; 748 749 if (result->vm_fail) { 750 printf("VM-Fail on %s: ", result->instr); 751 switch (result->flags & VMX_ENTRY_FLAGS) { 752 case X86_EFLAGS_CF: 753 printf("current-VMCS pointer is not valid.\n"); 754 break; 755 case X86_EFLAGS_ZF: 756 printf("error number is %ld. See Intel 30.4.\n", 757 vmcs_read(VMX_INST_ERROR)); 758 break; 759 default: 760 printf("unexpected flags %lx!\n", result->flags); 761 } 762 } else { 763 u64 qual = vmcs_read(EXI_QUALIFICATION); 764 765 printf("VM-Exit failure on %s (reason=%#x, qual=%#lx): ", 766 result->instr, result->exit_reason.full, qual); 767 768 switch (result->exit_reason.basic) { 769 case VMX_FAIL_STATE: 770 printf("invalid guest state\n"); 771 break; 772 case VMX_FAIL_MSR: 773 printf("MSR loading\n"); 774 break; 775 case VMX_FAIL_MCHECK: 776 printf("machine-check event\n"); 777 break; 778 default: 779 printf("unexpected basic exit reason %u\n", 780 result->exit_reason.basic); 781 } 782 783 if (!result->exit_reason.failed_vmentry) 784 printf("\tVMX_ENTRY_FAILURE BIT NOT SET!\n"); 785 786 if (result->exit_reason.full & 0x7fff0000) 787 printf("\tRESERVED BITS SET!\n"); 788 } 789 } 790 791 /* 792 * VMCLEAR should ensures all VMCS state is flushed to the VMCS 793 * region in memory. 794 */ 795 static void test_vmclear_flushing(void) 796 { 797 struct vmcs *vmcs[3] = {}; 798 int i; 799 800 for (i = 0; i < ARRAY_SIZE(vmcs); i++) { 801 vmcs[i] = alloc_page(); 802 } 803 804 vmcs[0]->hdr.revision_id = basic.revision; 805 assert(!vmcs_clear(vmcs[0])); 806 assert(!make_vmcs_current(vmcs[0])); 807 set_all_vmcs_fields(0x86); 808 809 assert(!vmcs_clear(vmcs[0])); 810 memcpy(vmcs[1], vmcs[0], basic.size); 811 assert(!make_vmcs_current(vmcs[1])); 812 report(check_all_vmcs_fields(0x86), 813 "test vmclear flush (current VMCS)"); 814 815 set_all_vmcs_fields(0x87); 816 assert(!make_vmcs_current(vmcs[0])); 817 assert(!vmcs_clear(vmcs[1])); 818 memcpy(vmcs[2], vmcs[1], basic.size); 819 assert(!make_vmcs_current(vmcs[2])); 820 report(check_all_vmcs_fields(0x87), 821 "test vmclear flush (!current VMCS)"); 822 823 for (i = 0; i < ARRAY_SIZE(vmcs); i++) { 824 assert(!vmcs_clear(vmcs[i])); 825 free_page(vmcs[i]); 826 } 827 } 828 829 static void test_vmclear(void) 830 { 831 struct vmcs *tmp_root; 832 int width = cpuid_maxphyaddr(); 833 834 /* 835 * Note- The tests below do not necessarily have a 836 * valid VMCS, but that's ok since the invalid vmcs 837 * is only used for a specific test and is discarded 838 * without touching its contents 839 */ 840 841 /* Unaligned page access */ 842 tmp_root = (struct vmcs *)((intptr_t)vmcs_root + 1); 843 report(vmcs_clear(tmp_root) == 1, "test vmclear with unaligned vmcs"); 844 845 /* gpa bits beyond physical address width are set*/ 846 tmp_root = (struct vmcs *)((intptr_t)vmcs_root | 847 ((u64)1 << (width+1))); 848 report(vmcs_clear(tmp_root) == 1, 849 "test vmclear with vmcs address bits set beyond physical address width"); 850 851 /* Pass VMXON region */ 852 tmp_root = (struct vmcs *)bsp_vmxon_region; 853 report(vmcs_clear(tmp_root) == 1, "test vmclear with vmxon region"); 854 855 /* Valid VMCS */ 856 report(vmcs_clear(vmcs_root) == 0, 857 "test vmclear with valid vmcs region"); 858 859 test_vmclear_flushing(); 860 } 861 862 static void __attribute__((__used__)) guest_main(void) 863 { 864 if (current->v2) 865 v2_guest_main(); 866 else 867 current->guest_main(); 868 } 869 870 /* guest_entry */ 871 asm( 872 ".align 4, 0x90\n\t" 873 ".globl entry_guest\n\t" 874 "guest_entry:\n\t" 875 " call guest_main\n\t" 876 " mov $1, %edi\n\t" 877 " call hypercall\n\t" 878 ); 879 880 /* EPT paging structure related functions */ 881 /* split_large_ept_entry: Split a 2M/1G large page into 512 smaller PTEs. 882 @ptep : large page table entry to split 883 @level : level of ptep (2 or 3) 884 */ 885 static void split_large_ept_entry(unsigned long *ptep, int level) 886 { 887 unsigned long *new_pt; 888 unsigned long gpa; 889 unsigned long pte; 890 unsigned long prototype; 891 int i; 892 893 pte = *ptep; 894 assert(pte & EPT_PRESENT); 895 assert(pte & EPT_LARGE_PAGE); 896 assert(level == 2 || level == 3); 897 898 new_pt = alloc_page(); 899 assert(new_pt); 900 901 prototype = pte & ~EPT_ADDR_MASK; 902 if (level == 2) 903 prototype &= ~EPT_LARGE_PAGE; 904 905 gpa = pte & EPT_ADDR_MASK; 906 for (i = 0; i < EPT_PGDIR_ENTRIES; i++) { 907 new_pt[i] = prototype | gpa; 908 gpa += 1ul << EPT_LEVEL_SHIFT(level - 1); 909 } 910 911 pte &= ~EPT_LARGE_PAGE; 912 pte &= ~EPT_ADDR_MASK; 913 pte |= virt_to_phys(new_pt); 914 915 *ptep = pte; 916 } 917 918 /* install_ept_entry : Install a page to a given level in EPT 919 @pml4 : addr of pml4 table 920 @pte_level : level of PTE to set 921 @guest_addr : physical address of guest 922 @pte : pte value to set 923 @pt_page : address of page table, NULL for a new page 924 */ 925 void install_ept_entry(unsigned long *pml4, 926 int pte_level, 927 unsigned long guest_addr, 928 unsigned long pte, 929 unsigned long *pt_page) 930 { 931 int level; 932 unsigned long *pt = pml4; 933 unsigned offset; 934 935 /* EPT only uses 48 bits of GPA. */ 936 assert(guest_addr < (1ul << 48)); 937 938 for (level = EPT_PAGE_LEVEL; level > pte_level; --level) { 939 offset = (guest_addr >> EPT_LEVEL_SHIFT(level)) 940 & EPT_PGDIR_MASK; 941 if (!(pt[offset] & (EPT_PRESENT))) { 942 unsigned long *new_pt = pt_page; 943 if (!new_pt) 944 new_pt = alloc_page(); 945 else 946 pt_page = 0; 947 memset(new_pt, 0, PAGE_SIZE); 948 pt[offset] = virt_to_phys(new_pt) 949 | EPT_RA | EPT_WA | EPT_EA; 950 } else if (pt[offset] & EPT_LARGE_PAGE) 951 split_large_ept_entry(&pt[offset], level); 952 pt = phys_to_virt(pt[offset] & EPT_ADDR_MASK); 953 } 954 offset = (guest_addr >> EPT_LEVEL_SHIFT(level)) & EPT_PGDIR_MASK; 955 pt[offset] = pte; 956 } 957 958 /* Map a page, @perm is the permission of the page */ 959 void install_ept(unsigned long *pml4, 960 unsigned long phys, 961 unsigned long guest_addr, 962 u64 perm) 963 { 964 install_ept_entry(pml4, 1, guest_addr, (phys & PAGE_MASK) | perm, 0); 965 } 966 967 /* Map a 1G-size page */ 968 void install_1g_ept(unsigned long *pml4, 969 unsigned long phys, 970 unsigned long guest_addr, 971 u64 perm) 972 { 973 install_ept_entry(pml4, 3, guest_addr, 974 (phys & PAGE_MASK) | perm | EPT_LARGE_PAGE, 0); 975 } 976 977 /* Map a 2M-size page */ 978 void install_2m_ept(unsigned long *pml4, 979 unsigned long phys, 980 unsigned long guest_addr, 981 u64 perm) 982 { 983 install_ept_entry(pml4, 2, guest_addr, 984 (phys & PAGE_MASK) | perm | EPT_LARGE_PAGE, 0); 985 } 986 987 /* setup_ept_range : Setup a range of 1:1 mapped page to EPT paging structure. 988 @start : start address of guest page 989 @len : length of address to be mapped 990 @map_1g : whether 1G page map is used 991 @map_2m : whether 2M page map is used 992 @perm : permission for every page 993 */ 994 void setup_ept_range(unsigned long *pml4, unsigned long start, 995 unsigned long len, int map_1g, int map_2m, u64 perm) 996 { 997 u64 phys = start; 998 u64 max = (u64)len + (u64)start; 999 1000 if (map_1g) { 1001 while (phys + PAGE_SIZE_1G <= max) { 1002 install_1g_ept(pml4, phys, phys, perm); 1003 phys += PAGE_SIZE_1G; 1004 } 1005 } 1006 if (map_2m) { 1007 while (phys + PAGE_SIZE_2M <= max) { 1008 install_2m_ept(pml4, phys, phys, perm); 1009 phys += PAGE_SIZE_2M; 1010 } 1011 } 1012 while (phys + PAGE_SIZE <= max) { 1013 install_ept(pml4, phys, phys, perm); 1014 phys += PAGE_SIZE; 1015 } 1016 } 1017 1018 /* get_ept_pte : Get the PTE of a given level in EPT, 1019 @level == 1 means get the latest level*/ 1020 bool get_ept_pte(unsigned long *pml4, unsigned long guest_addr, int level, 1021 unsigned long *pte) 1022 { 1023 int l; 1024 unsigned long *pt = pml4, iter_pte; 1025 unsigned offset; 1026 1027 assert(level >= 1 && level <= 4); 1028 1029 for (l = EPT_PAGE_LEVEL; ; --l) { 1030 offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK; 1031 iter_pte = pt[offset]; 1032 if (l == level) 1033 break; 1034 if (l < 4 && (iter_pte & EPT_LARGE_PAGE)) 1035 return false; 1036 if (!(iter_pte & (EPT_PRESENT))) 1037 return false; 1038 pt = (unsigned long *)(iter_pte & EPT_ADDR_MASK); 1039 } 1040 offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK; 1041 if (pte) 1042 *pte = pt[offset]; 1043 return true; 1044 } 1045 1046 static void clear_ept_ad_pte(unsigned long *pml4, unsigned long guest_addr) 1047 { 1048 int l; 1049 unsigned long *pt = pml4; 1050 u64 pte; 1051 unsigned offset; 1052 1053 for (l = EPT_PAGE_LEVEL; ; --l) { 1054 offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK; 1055 pt[offset] &= ~(EPT_ACCESS_FLAG|EPT_DIRTY_FLAG); 1056 pte = pt[offset]; 1057 if (l == 1 || (l < 4 && (pte & EPT_LARGE_PAGE))) 1058 break; 1059 pt = (unsigned long *)(pte & EPT_ADDR_MASK); 1060 } 1061 } 1062 1063 /* clear_ept_ad : Clear EPT A/D bits for the page table walk and the 1064 final GPA of a guest address. */ 1065 void clear_ept_ad(unsigned long *pml4, u64 guest_cr3, 1066 unsigned long guest_addr) 1067 { 1068 int l; 1069 unsigned long *pt = (unsigned long *)guest_cr3, gpa; 1070 u64 pte, offset_in_page; 1071 unsigned offset; 1072 1073 for (l = EPT_PAGE_LEVEL; ; --l) { 1074 offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK; 1075 1076 clear_ept_ad_pte(pml4, (u64) &pt[offset]); 1077 pte = pt[offset]; 1078 if (l == 1 || (l < 4 && (pte & PT_PAGE_SIZE_MASK))) 1079 break; 1080 if (!(pte & PT_PRESENT_MASK)) 1081 return; 1082 pt = (unsigned long *)(pte & PT_ADDR_MASK); 1083 } 1084 1085 offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK; 1086 offset_in_page = guest_addr & ((1 << EPT_LEVEL_SHIFT(l)) - 1); 1087 gpa = (pt[offset] & PT_ADDR_MASK) | (guest_addr & offset_in_page); 1088 clear_ept_ad_pte(pml4, gpa); 1089 } 1090 1091 /* check_ept_ad : Check the content of EPT A/D bits for the page table 1092 walk and the final GPA of a guest address. */ 1093 void check_ept_ad(unsigned long *pml4, u64 guest_cr3, 1094 unsigned long guest_addr, int expected_gpa_ad, 1095 int expected_pt_ad) 1096 { 1097 int l; 1098 unsigned long *pt = (unsigned long *)guest_cr3, gpa; 1099 u64 ept_pte, pte, offset_in_page; 1100 unsigned offset; 1101 bool bad_pt_ad = false; 1102 1103 for (l = EPT_PAGE_LEVEL; ; --l) { 1104 offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK; 1105 1106 if (!get_ept_pte(pml4, (u64) &pt[offset], 1, &ept_pte)) { 1107 printf("EPT - guest level %d page table is not mapped.\n", l); 1108 return; 1109 } 1110 1111 if (!bad_pt_ad) { 1112 bad_pt_ad |= (ept_pte & (EPT_ACCESS_FLAG|EPT_DIRTY_FLAG)) != expected_pt_ad; 1113 if (bad_pt_ad) 1114 report_fail("EPT - guest level %d page table A=%d/D=%d", 1115 l, 1116 !!(expected_pt_ad & EPT_ACCESS_FLAG), 1117 !!(expected_pt_ad & EPT_DIRTY_FLAG)); 1118 } 1119 1120 pte = pt[offset]; 1121 if (l == 1 || (l < 4 && (pte & PT_PAGE_SIZE_MASK))) 1122 break; 1123 if (!(pte & PT_PRESENT_MASK)) 1124 return; 1125 pt = (unsigned long *)(pte & PT_ADDR_MASK); 1126 } 1127 1128 if (!bad_pt_ad) 1129 report_pass("EPT - guest page table structures A=%d/D=%d", 1130 !!(expected_pt_ad & EPT_ACCESS_FLAG), 1131 !!(expected_pt_ad & EPT_DIRTY_FLAG)); 1132 1133 offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK; 1134 offset_in_page = guest_addr & ((1 << EPT_LEVEL_SHIFT(l)) - 1); 1135 gpa = (pt[offset] & PT_ADDR_MASK) | (guest_addr & offset_in_page); 1136 1137 if (!get_ept_pte(pml4, gpa, 1, &ept_pte)) { 1138 report_fail("EPT - guest physical address is not mapped"); 1139 return; 1140 } 1141 report((ept_pte & (EPT_ACCESS_FLAG | EPT_DIRTY_FLAG)) == expected_gpa_ad, 1142 "EPT - guest physical address A=%d/D=%d", 1143 !!(expected_gpa_ad & EPT_ACCESS_FLAG), 1144 !!(expected_gpa_ad & EPT_DIRTY_FLAG)); 1145 } 1146 1147 void set_ept_pte(unsigned long *pml4, unsigned long guest_addr, 1148 int level, u64 pte_val) 1149 { 1150 int l; 1151 unsigned long *pt = pml4; 1152 unsigned offset; 1153 1154 assert(level >= 1 && level <= 4); 1155 1156 for (l = EPT_PAGE_LEVEL; ; --l) { 1157 offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK; 1158 if (l == level) 1159 break; 1160 assert(pt[offset] & EPT_PRESENT); 1161 pt = (unsigned long *)(pt[offset] & EPT_ADDR_MASK); 1162 } 1163 offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK; 1164 pt[offset] = pte_val; 1165 } 1166 1167 static void init_vmcs_ctrl(void) 1168 { 1169 /* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */ 1170 /* 26.2.1.1 */ 1171 vmcs_write(PIN_CONTROLS, ctrl_pin); 1172 /* Disable VMEXIT of IO instruction */ 1173 vmcs_write(CPU_EXEC_CTRL0, ctrl_cpu[0]); 1174 if (ctrl_cpu_rev[0].set & CPU_SECONDARY) { 1175 ctrl_cpu[1] = (ctrl_cpu[1] | ctrl_cpu_rev[1].set) & 1176 ctrl_cpu_rev[1].clr; 1177 vmcs_write(CPU_EXEC_CTRL1, ctrl_cpu[1]); 1178 } 1179 vmcs_write(CR3_TARGET_COUNT, 0); 1180 vmcs_write(VPID, ++vpid_cnt); 1181 } 1182 1183 static void init_vmcs_host(void) 1184 { 1185 /* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */ 1186 /* 26.2.1.2 */ 1187 vmcs_write(HOST_EFER, rdmsr(MSR_EFER)); 1188 1189 /* 26.2.1.3 */ 1190 vmcs_write(ENT_CONTROLS, ctrl_enter); 1191 vmcs_write(EXI_CONTROLS, ctrl_exit); 1192 1193 /* 26.2.2 */ 1194 vmcs_write(HOST_CR0, read_cr0()); 1195 vmcs_write(HOST_CR3, read_cr3()); 1196 vmcs_write(HOST_CR4, read_cr4()); 1197 vmcs_write(HOST_SYSENTER_EIP, (u64)(&entry_sysenter)); 1198 vmcs_write(HOST_SYSENTER_CS, KERNEL_CS); 1199 1200 /* 26.2.3 */ 1201 vmcs_write(HOST_SEL_CS, KERNEL_CS); 1202 vmcs_write(HOST_SEL_SS, KERNEL_DS); 1203 vmcs_write(HOST_SEL_DS, KERNEL_DS); 1204 vmcs_write(HOST_SEL_ES, KERNEL_DS); 1205 vmcs_write(HOST_SEL_FS, KERNEL_DS); 1206 vmcs_write(HOST_SEL_GS, KERNEL_DS); 1207 vmcs_write(HOST_SEL_TR, TSS_MAIN); 1208 vmcs_write(HOST_BASE_TR, get_gdt_entry_base(get_tss_descr())); 1209 vmcs_write(HOST_BASE_GDTR, gdt_descr.base); 1210 vmcs_write(HOST_BASE_IDTR, idt_descr.base); 1211 vmcs_write(HOST_BASE_FS, 0); 1212 vmcs_write(HOST_BASE_GS, rdmsr(MSR_GS_BASE)); 1213 1214 /* Set other vmcs area */ 1215 vmcs_write(PF_ERROR_MASK, 0); 1216 vmcs_write(PF_ERROR_MATCH, 0); 1217 vmcs_write(VMCS_LINK_PTR, ~0ul); 1218 vmcs_write(VMCS_LINK_PTR_HI, ~0ul); 1219 vmcs_write(HOST_RIP, (u64)(&vmx_return)); 1220 } 1221 1222 static void init_vmcs_guest(void) 1223 { 1224 gdt_entry_t *tss_descr = get_tss_descr(); 1225 1226 /* 26.3 CHECKING AND LOADING GUEST STATE */ 1227 ulong guest_cr0, guest_cr4, guest_cr3; 1228 /* 26.3.1.1 */ 1229 guest_cr0 = read_cr0(); 1230 guest_cr4 = read_cr4(); 1231 guest_cr3 = read_cr3(); 1232 if (ctrl_enter & ENT_GUEST_64) { 1233 guest_cr0 |= X86_CR0_PG; 1234 guest_cr4 |= X86_CR4_PAE; 1235 } 1236 if ((ctrl_enter & ENT_GUEST_64) == 0) 1237 guest_cr4 &= (~X86_CR4_PCIDE); 1238 if (guest_cr0 & X86_CR0_PG) 1239 guest_cr0 |= X86_CR0_PE; 1240 vmcs_write(GUEST_CR0, guest_cr0); 1241 vmcs_write(GUEST_CR3, guest_cr3); 1242 vmcs_write(GUEST_CR4, guest_cr4); 1243 vmcs_write(GUEST_SYSENTER_CS, KERNEL_CS); 1244 vmcs_write(GUEST_SYSENTER_ESP, guest_syscall_stack_top); 1245 vmcs_write(GUEST_SYSENTER_EIP, (u64)(&entry_sysenter)); 1246 vmcs_write(GUEST_DR7, 0); 1247 vmcs_write(GUEST_EFER, rdmsr(MSR_EFER)); 1248 1249 /* 26.3.1.2 */ 1250 vmcs_write(GUEST_SEL_CS, KERNEL_CS); 1251 vmcs_write(GUEST_SEL_SS, KERNEL_DS); 1252 vmcs_write(GUEST_SEL_DS, KERNEL_DS); 1253 vmcs_write(GUEST_SEL_ES, KERNEL_DS); 1254 vmcs_write(GUEST_SEL_FS, KERNEL_DS); 1255 vmcs_write(GUEST_SEL_GS, KERNEL_DS); 1256 vmcs_write(GUEST_SEL_TR, TSS_MAIN); 1257 vmcs_write(GUEST_SEL_LDTR, 0); 1258 1259 vmcs_write(GUEST_BASE_CS, 0); 1260 vmcs_write(GUEST_BASE_ES, 0); 1261 vmcs_write(GUEST_BASE_SS, 0); 1262 vmcs_write(GUEST_BASE_DS, 0); 1263 vmcs_write(GUEST_BASE_FS, 0); 1264 vmcs_write(GUEST_BASE_GS, rdmsr(MSR_GS_BASE)); 1265 vmcs_write(GUEST_BASE_TR, get_gdt_entry_base(tss_descr)); 1266 vmcs_write(GUEST_BASE_LDTR, 0); 1267 1268 vmcs_write(GUEST_LIMIT_CS, 0xFFFFFFFF); 1269 vmcs_write(GUEST_LIMIT_DS, 0xFFFFFFFF); 1270 vmcs_write(GUEST_LIMIT_ES, 0xFFFFFFFF); 1271 vmcs_write(GUEST_LIMIT_SS, 0xFFFFFFFF); 1272 vmcs_write(GUEST_LIMIT_FS, 0xFFFFFFFF); 1273 vmcs_write(GUEST_LIMIT_GS, 0xFFFFFFFF); 1274 vmcs_write(GUEST_LIMIT_LDTR, 0xffff); 1275 vmcs_write(GUEST_LIMIT_TR, get_gdt_entry_limit(tss_descr)); 1276 1277 vmcs_write(GUEST_AR_CS, 0xa09b); 1278 vmcs_write(GUEST_AR_DS, 0xc093); 1279 vmcs_write(GUEST_AR_ES, 0xc093); 1280 vmcs_write(GUEST_AR_FS, 0xc093); 1281 vmcs_write(GUEST_AR_GS, 0xc093); 1282 vmcs_write(GUEST_AR_SS, 0xc093); 1283 vmcs_write(GUEST_AR_LDTR, 0x82); 1284 vmcs_write(GUEST_AR_TR, 0x8b); 1285 1286 /* 26.3.1.3 */ 1287 vmcs_write(GUEST_BASE_GDTR, gdt_descr.base); 1288 vmcs_write(GUEST_BASE_IDTR, idt_descr.base); 1289 vmcs_write(GUEST_LIMIT_GDTR, gdt_descr.limit); 1290 vmcs_write(GUEST_LIMIT_IDTR, idt_descr.limit); 1291 1292 /* 26.3.1.4 */ 1293 vmcs_write(GUEST_RIP, (u64)(&guest_entry)); 1294 vmcs_write(GUEST_RSP, guest_stack_top); 1295 vmcs_write(GUEST_RFLAGS, X86_EFLAGS_FIXED); 1296 1297 /* 26.3.1.5 */ 1298 vmcs_write(GUEST_ACTV_STATE, ACTV_ACTIVE); 1299 vmcs_write(GUEST_INTR_STATE, 0); 1300 } 1301 1302 int init_vmcs(struct vmcs **vmcs) 1303 { 1304 *vmcs = alloc_page(); 1305 (*vmcs)->hdr.revision_id = basic.revision; 1306 /* vmclear first to init vmcs */ 1307 if (vmcs_clear(*vmcs)) { 1308 printf("%s : vmcs_clear error\n", __func__); 1309 return 1; 1310 } 1311 1312 if (make_vmcs_current(*vmcs)) { 1313 printf("%s : make_vmcs_current error\n", __func__); 1314 return 1; 1315 } 1316 1317 /* All settings to pin/exit/enter/cpu 1318 control fields should be placed here */ 1319 ctrl_pin |= PIN_EXTINT | PIN_NMI | PIN_VIRT_NMI; 1320 ctrl_exit = EXI_LOAD_EFER | EXI_HOST_64; 1321 ctrl_enter = (ENT_LOAD_EFER | ENT_GUEST_64); 1322 /* DIsable IO instruction VMEXIT now */ 1323 ctrl_cpu[0] &= (~(CPU_IO | CPU_IO_BITMAP)); 1324 ctrl_cpu[1] = 0; 1325 1326 ctrl_pin = (ctrl_pin | ctrl_pin_rev.set) & ctrl_pin_rev.clr; 1327 ctrl_enter = (ctrl_enter | ctrl_enter_rev.set) & ctrl_enter_rev.clr; 1328 ctrl_exit = (ctrl_exit | ctrl_exit_rev.set) & ctrl_exit_rev.clr; 1329 ctrl_cpu[0] = (ctrl_cpu[0] | ctrl_cpu_rev[0].set) & ctrl_cpu_rev[0].clr; 1330 1331 init_vmcs_ctrl(); 1332 init_vmcs_host(); 1333 init_vmcs_guest(); 1334 return 0; 1335 } 1336 1337 void enable_vmx(void) 1338 { 1339 bool vmx_enabled = 1340 rdmsr(MSR_IA32_FEATURE_CONTROL) & 1341 FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; 1342 1343 if (!vmx_enabled) { 1344 wrmsr(MSR_IA32_FEATURE_CONTROL, 1345 FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX | 1346 FEATURE_CONTROL_LOCKED); 1347 } 1348 } 1349 1350 static void init_vmx_caps(void) 1351 { 1352 basic.val = rdmsr(MSR_IA32_VMX_BASIC); 1353 ctrl_pin_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PIN 1354 : MSR_IA32_VMX_PINBASED_CTLS); 1355 ctrl_exit_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_EXIT 1356 : MSR_IA32_VMX_EXIT_CTLS); 1357 ctrl_enter_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_ENTRY 1358 : MSR_IA32_VMX_ENTRY_CTLS); 1359 ctrl_cpu_rev[0].val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PROC 1360 : MSR_IA32_VMX_PROCBASED_CTLS); 1361 if ((ctrl_cpu_rev[0].clr & CPU_SECONDARY) != 0) 1362 ctrl_cpu_rev[1].val = rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2); 1363 else 1364 ctrl_cpu_rev[1].val = 0; 1365 if ((ctrl_cpu_rev[1].clr & (CPU_EPT | CPU_VPID)) != 0) 1366 ept_vpid.val = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP); 1367 else 1368 ept_vpid.val = 0; 1369 } 1370 1371 void init_vmx(u64 *vmxon_region) 1372 { 1373 ulong fix_cr0_set, fix_cr0_clr; 1374 ulong fix_cr4_set, fix_cr4_clr; 1375 1376 fix_cr0_set = rdmsr(MSR_IA32_VMX_CR0_FIXED0); 1377 fix_cr0_clr = rdmsr(MSR_IA32_VMX_CR0_FIXED1); 1378 fix_cr4_set = rdmsr(MSR_IA32_VMX_CR4_FIXED0); 1379 fix_cr4_clr = rdmsr(MSR_IA32_VMX_CR4_FIXED1); 1380 1381 write_cr0((read_cr0() & fix_cr0_clr) | fix_cr0_set); 1382 write_cr4((read_cr4() & fix_cr4_clr) | fix_cr4_set | X86_CR4_VMXE); 1383 1384 *vmxon_region = basic.revision; 1385 } 1386 1387 static void alloc_bsp_vmx_pages(void) 1388 { 1389 bsp_vmxon_region = alloc_page(); 1390 guest_stack_top = (uintptr_t)alloc_page() + PAGE_SIZE; 1391 guest_syscall_stack_top = (uintptr_t)alloc_page() + PAGE_SIZE; 1392 vmcs_root = alloc_page(); 1393 } 1394 1395 static void init_bsp_vmx(void) 1396 { 1397 init_vmx_caps(); 1398 alloc_bsp_vmx_pages(); 1399 init_vmx(bsp_vmxon_region); 1400 } 1401 1402 static void do_vmxon_off(void *data) 1403 { 1404 vmx_on(); 1405 vmx_off(); 1406 } 1407 1408 static void do_write_feature_control(void *data) 1409 { 1410 wrmsr(MSR_IA32_FEATURE_CONTROL, 0); 1411 } 1412 1413 static int test_vmx_feature_control(void) 1414 { 1415 u64 ia32_feature_control; 1416 bool vmx_enabled; 1417 bool feature_control_locked; 1418 1419 ia32_feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL); 1420 vmx_enabled = 1421 ia32_feature_control & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; 1422 feature_control_locked = 1423 ia32_feature_control & FEATURE_CONTROL_LOCKED; 1424 1425 if (vmx_enabled && feature_control_locked) { 1426 printf("VMX enabled and locked by BIOS\n"); 1427 return 0; 1428 } else if (feature_control_locked) { 1429 printf("ERROR: VMX locked out by BIOS!?\n"); 1430 return 1; 1431 } 1432 1433 wrmsr(MSR_IA32_FEATURE_CONTROL, 0); 1434 report(test_for_exception(GP_VECTOR, &do_vmxon_off, NULL), 1435 "test vmxon with FEATURE_CONTROL cleared"); 1436 1437 wrmsr(MSR_IA32_FEATURE_CONTROL, FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX); 1438 report(test_for_exception(GP_VECTOR, &do_vmxon_off, NULL), 1439 "test vmxon without FEATURE_CONTROL lock"); 1440 1441 wrmsr(MSR_IA32_FEATURE_CONTROL, 1442 FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX | 1443 FEATURE_CONTROL_LOCKED); 1444 1445 ia32_feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL); 1446 vmx_enabled = 1447 ia32_feature_control & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; 1448 report(vmx_enabled, "test enable VMX in FEATURE_CONTROL"); 1449 1450 report(test_for_exception(GP_VECTOR, &do_write_feature_control, NULL), 1451 "test FEATURE_CONTROL lock bit"); 1452 1453 return !vmx_enabled; 1454 } 1455 1456 static int test_vmxon(void) 1457 { 1458 int ret, ret1; 1459 u64 *vmxon_region; 1460 int width = cpuid_maxphyaddr(); 1461 1462 /* Unaligned page access */ 1463 vmxon_region = (u64 *)((intptr_t)bsp_vmxon_region + 1); 1464 ret1 = _vmx_on(vmxon_region); 1465 report(ret1, "test vmxon with unaligned vmxon region"); 1466 if (!ret1) { 1467 ret = 1; 1468 goto out; 1469 } 1470 1471 /* gpa bits beyond physical address width are set*/ 1472 vmxon_region = (u64 *)((intptr_t)bsp_vmxon_region | ((u64)1 << (width+1))); 1473 ret1 = _vmx_on(vmxon_region); 1474 report(ret1, "test vmxon with bits set beyond physical address width"); 1475 if (!ret1) { 1476 ret = 1; 1477 goto out; 1478 } 1479 1480 /* invalid revision indentifier */ 1481 *bsp_vmxon_region = 0xba9da9; 1482 ret1 = vmx_on(); 1483 report(ret1, "test vmxon with invalid revision identifier"); 1484 if (!ret1) { 1485 ret = 1; 1486 goto out; 1487 } 1488 1489 /* and finally a valid region */ 1490 *bsp_vmxon_region = basic.revision; 1491 ret = vmx_on(); 1492 report(!ret, "test vmxon with valid vmxon region"); 1493 1494 out: 1495 return ret; 1496 } 1497 1498 static void test_vmptrld(void) 1499 { 1500 struct vmcs *vmcs, *tmp_root; 1501 int width = cpuid_maxphyaddr(); 1502 1503 vmcs = alloc_page(); 1504 vmcs->hdr.revision_id = basic.revision; 1505 1506 /* Unaligned page access */ 1507 tmp_root = (struct vmcs *)((intptr_t)vmcs + 1); 1508 report(make_vmcs_current(tmp_root) == 1, 1509 "test vmptrld with unaligned vmcs"); 1510 1511 /* gpa bits beyond physical address width are set*/ 1512 tmp_root = (struct vmcs *)((intptr_t)vmcs | 1513 ((u64)1 << (width+1))); 1514 report(make_vmcs_current(tmp_root) == 1, 1515 "test vmptrld with vmcs address bits set beyond physical address width"); 1516 1517 /* Pass VMXON region */ 1518 assert(!vmcs_clear(vmcs)); 1519 assert(!make_vmcs_current(vmcs)); 1520 tmp_root = (struct vmcs *)bsp_vmxon_region; 1521 report(make_vmcs_current(tmp_root) == 1, 1522 "test vmptrld with vmxon region"); 1523 report(vmcs_read(VMX_INST_ERROR) == VMXERR_VMPTRLD_VMXON_POINTER, 1524 "test vmptrld with vmxon region vm-instruction error"); 1525 1526 report(make_vmcs_current(vmcs) == 0, 1527 "test vmptrld with valid vmcs region"); 1528 } 1529 1530 static void test_vmptrst(void) 1531 { 1532 int ret; 1533 struct vmcs *vmcs1, *vmcs2; 1534 1535 vmcs1 = alloc_page(); 1536 init_vmcs(&vmcs1); 1537 ret = vmcs_save(&vmcs2); 1538 report((!ret) && (vmcs1 == vmcs2), "test vmptrst"); 1539 } 1540 1541 struct vmx_ctl_msr { 1542 const char *name; 1543 u32 index, true_index; 1544 u32 default1; 1545 } vmx_ctl_msr[] = { 1546 { "MSR_IA32_VMX_PINBASED_CTLS", MSR_IA32_VMX_PINBASED_CTLS, 1547 MSR_IA32_VMX_TRUE_PIN, 0x16 }, 1548 { "MSR_IA32_VMX_PROCBASED_CTLS", MSR_IA32_VMX_PROCBASED_CTLS, 1549 MSR_IA32_VMX_TRUE_PROC, 0x401e172 }, 1550 { "MSR_IA32_VMX_PROCBASED_CTLS2", MSR_IA32_VMX_PROCBASED_CTLS2, 1551 MSR_IA32_VMX_PROCBASED_CTLS2, 0 }, 1552 { "MSR_IA32_VMX_EXIT_CTLS", MSR_IA32_VMX_EXIT_CTLS, 1553 MSR_IA32_VMX_TRUE_EXIT, 0x36dff }, 1554 { "MSR_IA32_VMX_ENTRY_CTLS", MSR_IA32_VMX_ENTRY_CTLS, 1555 MSR_IA32_VMX_TRUE_ENTRY, 0x11ff }, 1556 }; 1557 1558 static void test_vmx_caps(void) 1559 { 1560 u64 val, default1, fixed0, fixed1; 1561 union vmx_ctrl_msr ctrl, true_ctrl; 1562 unsigned int n; 1563 bool ok; 1564 1565 printf("\nTest suite: VMX capability reporting\n"); 1566 1567 report((basic.revision & (1ul << 31)) == 0 && 1568 basic.size > 0 && basic.size <= 4096 && 1569 (basic.type == 0 || basic.type == 6) && 1570 basic.reserved1 == 0 && basic.reserved2 == 0, 1571 "MSR_IA32_VMX_BASIC"); 1572 1573 val = rdmsr(MSR_IA32_VMX_MISC); 1574 report((!(ctrl_cpu_rev[1].clr & CPU_URG) || val & (1ul << 5)) && 1575 ((val >> 16) & 0x1ff) <= 256 && 1576 (val & 0x80007e00) == 0, 1577 "MSR_IA32_VMX_MISC"); 1578 1579 for (n = 0; n < ARRAY_SIZE(vmx_ctl_msr); n++) { 1580 ctrl.val = rdmsr(vmx_ctl_msr[n].index); 1581 default1 = vmx_ctl_msr[n].default1; 1582 ok = (ctrl.set & default1) == default1; 1583 ok = ok && (ctrl.set & ~ctrl.clr) == 0; 1584 if (ok && basic.ctrl) { 1585 true_ctrl.val = rdmsr(vmx_ctl_msr[n].true_index); 1586 ok = ctrl.clr == true_ctrl.clr; 1587 ok = ok && ctrl.set == (true_ctrl.set | default1); 1588 } 1589 report(ok, "%s", vmx_ctl_msr[n].name); 1590 } 1591 1592 fixed0 = rdmsr(MSR_IA32_VMX_CR0_FIXED0); 1593 fixed1 = rdmsr(MSR_IA32_VMX_CR0_FIXED1); 1594 report(((fixed0 ^ fixed1) & ~fixed1) == 0, 1595 "MSR_IA32_VMX_IA32_VMX_CR0_FIXED0/1"); 1596 1597 fixed0 = rdmsr(MSR_IA32_VMX_CR4_FIXED0); 1598 fixed1 = rdmsr(MSR_IA32_VMX_CR4_FIXED1); 1599 report(((fixed0 ^ fixed1) & ~fixed1) == 0, 1600 "MSR_IA32_VMX_IA32_VMX_CR4_FIXED0/1"); 1601 1602 val = rdmsr(MSR_IA32_VMX_VMCS_ENUM); 1603 report((val & VMCS_FIELD_INDEX_MASK) >= 0x2a && 1604 (val & 0xfffffffffffffc01Ull) == 0, 1605 "MSR_IA32_VMX_VMCS_ENUM"); 1606 1607 fixed0 = -1ull; 1608 fixed0 &= ~(EPT_CAP_EXEC_ONLY | 1609 EPT_CAP_PWL4 | 1610 EPT_CAP_PWL5 | 1611 EPT_CAP_UC | 1612 EPT_CAP_WB | 1613 EPT_CAP_2M_PAGE | 1614 EPT_CAP_1G_PAGE | 1615 EPT_CAP_INVEPT | 1616 EPT_CAP_AD_FLAG | 1617 EPT_CAP_ADV_EPT_INFO | 1618 EPT_CAP_INVEPT_SINGLE | 1619 EPT_CAP_INVEPT_ALL | 1620 VPID_CAP_INVVPID | 1621 VPID_CAP_INVVPID_ADDR | 1622 VPID_CAP_INVVPID_CXTGLB | 1623 VPID_CAP_INVVPID_ALL | 1624 VPID_CAP_INVVPID_CXTLOC); 1625 1626 val = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP); 1627 report((val & fixed0) == 0, 1628 "MSR_IA32_VMX_EPT_VPID_CAP"); 1629 } 1630 1631 /* This function can only be called in guest */ 1632 void __attribute__((__used__)) hypercall(u32 hypercall_no) 1633 { 1634 u64 val = 0; 1635 val = (hypercall_no & HYPERCALL_MASK) | HYPERCALL_BIT; 1636 hypercall_field = val; 1637 asm volatile("vmcall\n\t"); 1638 } 1639 1640 static bool is_hypercall(union exit_reason exit_reason) 1641 { 1642 return exit_reason.basic == VMX_VMCALL && 1643 (hypercall_field & HYPERCALL_BIT); 1644 } 1645 1646 static int handle_hypercall(void) 1647 { 1648 ulong hypercall_no; 1649 1650 hypercall_no = hypercall_field & HYPERCALL_MASK; 1651 hypercall_field = 0; 1652 switch (hypercall_no) { 1653 case HYPERCALL_VMEXIT: 1654 return VMX_TEST_VMEXIT; 1655 case HYPERCALL_VMABORT: 1656 return VMX_TEST_VMABORT; 1657 case HYPERCALL_VMSKIP: 1658 return VMX_TEST_VMSKIP; 1659 default: 1660 printf("ERROR : Invalid hypercall number : %ld\n", hypercall_no); 1661 } 1662 return VMX_TEST_EXIT; 1663 } 1664 1665 static void continue_abort(void) 1666 { 1667 assert(!in_guest); 1668 printf("Host was here when guest aborted:\n"); 1669 dump_stack(); 1670 longjmp(abort_target, 1); 1671 abort(); 1672 } 1673 1674 void __abort_test(void) 1675 { 1676 if (in_guest) 1677 hypercall(HYPERCALL_VMABORT); 1678 else 1679 longjmp(abort_target, 1); 1680 abort(); 1681 } 1682 1683 static void continue_skip(void) 1684 { 1685 assert(!in_guest); 1686 longjmp(abort_target, 1); 1687 abort(); 1688 } 1689 1690 void test_skip(const char *msg) 1691 { 1692 printf("%s skipping test: %s\n", in_guest ? "Guest" : "Host", msg); 1693 if (in_guest) 1694 hypercall(HYPERCALL_VMABORT); 1695 else 1696 longjmp(abort_target, 1); 1697 abort(); 1698 } 1699 1700 static int exit_handler(union exit_reason exit_reason) 1701 { 1702 int ret; 1703 1704 current->exits++; 1705 regs.rflags = vmcs_read(GUEST_RFLAGS); 1706 if (is_hypercall(exit_reason)) 1707 ret = handle_hypercall(); 1708 else 1709 ret = current->exit_handler(exit_reason); 1710 vmcs_write(GUEST_RFLAGS, regs.rflags); 1711 1712 return ret; 1713 } 1714 1715 /* 1716 * Tries to enter the guest, populates @result with VM-Fail, VM-Exit, entered, 1717 * etc... 1718 */ 1719 static noinline void vmx_enter_guest(struct vmentry_result *result) 1720 { 1721 memset(result, 0, sizeof(*result)); 1722 1723 in_guest = 1; 1724 asm volatile ( 1725 "mov %[HOST_RSP], %%rdi\n\t" 1726 "vmwrite %%rsp, %%rdi\n\t" 1727 LOAD_GPR_C 1728 "cmpb $0, %[launched]\n\t" 1729 "jne 1f\n\t" 1730 "vmlaunch\n\t" 1731 "jmp 2f\n\t" 1732 "1: " 1733 "vmresume\n\t" 1734 "2: " 1735 SAVE_GPR_C 1736 "pushf\n\t" 1737 "pop %%rdi\n\t" 1738 "mov %%rdi, %[vm_fail_flags]\n\t" 1739 "movl $1, %[vm_fail]\n\t" 1740 "jmp 3f\n\t" 1741 "vmx_return:\n\t" 1742 SAVE_GPR_C 1743 "3: \n\t" 1744 : [vm_fail]"+m"(result->vm_fail), 1745 [vm_fail_flags]"=m"(result->flags) 1746 : [launched]"m"(launched), [HOST_RSP]"i"(HOST_RSP) 1747 : "rdi", "memory", "cc" 1748 ); 1749 in_guest = 0; 1750 1751 result->vmlaunch = !launched; 1752 result->instr = launched ? "vmresume" : "vmlaunch"; 1753 result->exit_reason.full = result->vm_fail ? 0xdead : 1754 vmcs_read(EXI_REASON); 1755 result->entered = !result->vm_fail && 1756 !result->exit_reason.failed_vmentry; 1757 } 1758 1759 static int vmx_run(void) 1760 { 1761 struct vmentry_result result; 1762 u32 ret; 1763 1764 while (1) { 1765 vmx_enter_guest(&result); 1766 if (result.entered) { 1767 /* 1768 * VMCS isn't in "launched" state if there's been any 1769 * entry failure (early or otherwise). 1770 */ 1771 launched = 1; 1772 ret = exit_handler(result.exit_reason); 1773 } else if (current->entry_failure_handler) { 1774 ret = current->entry_failure_handler(&result); 1775 } else { 1776 ret = VMX_TEST_EXIT; 1777 } 1778 1779 switch (ret) { 1780 case VMX_TEST_RESUME: 1781 continue; 1782 case VMX_TEST_VMEXIT: 1783 guest_finished = 1; 1784 return 0; 1785 case VMX_TEST_EXIT: 1786 break; 1787 default: 1788 printf("ERROR : Invalid %s_handler return val %d.\n", 1789 result.entered ? "exit" : "entry_failure", 1790 ret); 1791 break; 1792 } 1793 1794 if (result.entered) 1795 print_vmexit_info(result.exit_reason); 1796 else 1797 print_vmentry_failure_info(&result); 1798 abort(); 1799 } 1800 } 1801 1802 static void run_teardown_step(struct test_teardown_step *step) 1803 { 1804 step->func(step->data); 1805 } 1806 1807 static int test_run(struct vmx_test *test) 1808 { 1809 int r; 1810 1811 /* Validate V2 interface. */ 1812 if (test->v2) { 1813 int ret = 0; 1814 if (test->init || test->guest_main || test->exit_handler || 1815 test->syscall_handler) { 1816 report_fail("V2 test cannot specify V1 callbacks."); 1817 ret = 1; 1818 } 1819 if (ret) 1820 return ret; 1821 } 1822 1823 if (test->name == NULL) 1824 test->name = "(no name)"; 1825 if (vmx_on()) { 1826 printf("%s : vmxon failed.\n", __func__); 1827 return 1; 1828 } 1829 1830 init_vmcs(&(test->vmcs)); 1831 /* Directly call test->init is ok here, init_vmcs has done 1832 vmcs init, vmclear and vmptrld*/ 1833 if (test->init && test->init(test->vmcs) != VMX_TEST_START) 1834 goto out; 1835 teardown_count = 0; 1836 v2_guest_main = NULL; 1837 test->exits = 0; 1838 current = test; 1839 regs = test->guest_regs; 1840 vmcs_write(GUEST_RFLAGS, regs.rflags | X86_EFLAGS_FIXED); 1841 launched = 0; 1842 guest_finished = 0; 1843 printf("\nTest suite: %s\n", test->name); 1844 1845 r = setjmp(abort_target); 1846 if (r) { 1847 assert(!in_guest); 1848 goto out; 1849 } 1850 1851 1852 if (test->v2) 1853 test->v2(); 1854 else 1855 vmx_run(); 1856 1857 while (teardown_count > 0) 1858 run_teardown_step(&teardown_steps[--teardown_count]); 1859 1860 if (launched && !guest_finished) 1861 report_fail("Guest didn't run to completion."); 1862 1863 out: 1864 if (vmx_off()) { 1865 printf("%s : vmxoff failed.\n", __func__); 1866 return 1; 1867 } 1868 return 0; 1869 } 1870 1871 /* 1872 * Add a teardown step. Executed after the test's main function returns. 1873 * Teardown steps executed in reverse order. 1874 */ 1875 void test_add_teardown(test_teardown_func func, void *data) 1876 { 1877 struct test_teardown_step *step; 1878 1879 TEST_ASSERT_MSG(teardown_count < MAX_TEST_TEARDOWN_STEPS, 1880 "There are already %d teardown steps.", 1881 teardown_count); 1882 step = &teardown_steps[teardown_count++]; 1883 step->func = func; 1884 step->data = data; 1885 } 1886 1887 static void __test_set_guest(test_guest_func func) 1888 { 1889 assert(current->v2); 1890 v2_guest_main = func; 1891 } 1892 1893 /* 1894 * Set the target of the first enter_guest call. Can only be called once per 1895 * test. Must be called before first enter_guest call. 1896 */ 1897 void test_set_guest(test_guest_func func) 1898 { 1899 TEST_ASSERT_MSG(!v2_guest_main, "Already set guest func."); 1900 __test_set_guest(func); 1901 } 1902 1903 /* 1904 * Set the target of the enter_guest call and reset the RIP so 'func' will 1905 * start from the beginning. This can be called multiple times per test. 1906 */ 1907 void test_override_guest(test_guest_func func) 1908 { 1909 __test_set_guest(func); 1910 init_vmcs_guest(); 1911 } 1912 1913 void test_set_guest_finished(void) 1914 { 1915 guest_finished = 1; 1916 } 1917 1918 static void check_for_guest_termination(union exit_reason exit_reason) 1919 { 1920 if (is_hypercall(exit_reason)) { 1921 int ret; 1922 1923 ret = handle_hypercall(); 1924 switch (ret) { 1925 case VMX_TEST_VMEXIT: 1926 guest_finished = 1; 1927 break; 1928 case VMX_TEST_VMABORT: 1929 continue_abort(); 1930 break; 1931 case VMX_TEST_VMSKIP: 1932 continue_skip(); 1933 break; 1934 default: 1935 printf("ERROR : Invalid handle_hypercall return %d.\n", 1936 ret); 1937 abort(); 1938 } 1939 } 1940 } 1941 1942 /* 1943 * Enters the guest (or launches it for the first time). Error to call once the 1944 * guest has returned (i.e., run past the end of its guest() function). 1945 */ 1946 void __enter_guest(u8 abort_flag, struct vmentry_result *result) 1947 { 1948 TEST_ASSERT_MSG(v2_guest_main, 1949 "Never called test_set_guest_func!"); 1950 1951 TEST_ASSERT_MSG(!guest_finished, 1952 "Called enter_guest() after guest returned."); 1953 1954 vmx_enter_guest(result); 1955 1956 if (result->vm_fail) { 1957 if (abort_flag & ABORT_ON_EARLY_VMENTRY_FAIL) 1958 goto do_abort; 1959 return; 1960 } 1961 if (result->exit_reason.failed_vmentry) { 1962 if ((abort_flag & ABORT_ON_INVALID_GUEST_STATE) || 1963 result->exit_reason.basic != VMX_FAIL_STATE) 1964 goto do_abort; 1965 return; 1966 } 1967 1968 launched = 1; 1969 check_for_guest_termination(result->exit_reason); 1970 return; 1971 1972 do_abort: 1973 print_vmentry_failure_info(result); 1974 abort(); 1975 } 1976 1977 void enter_guest_with_bad_controls(void) 1978 { 1979 struct vmentry_result result; 1980 1981 TEST_ASSERT_MSG(v2_guest_main, 1982 "Never called test_set_guest_func!"); 1983 1984 TEST_ASSERT_MSG(!guest_finished, 1985 "Called enter_guest() after guest returned."); 1986 1987 __enter_guest(ABORT_ON_INVALID_GUEST_STATE, &result); 1988 report(result.vm_fail, "VM-Fail occurred as expected"); 1989 report((result.flags & VMX_ENTRY_FLAGS) == X86_EFLAGS_ZF, 1990 "FLAGS set correctly on VM-Fail"); 1991 report(vmcs_read(VMX_INST_ERROR) == VMXERR_ENTRY_INVALID_CONTROL_FIELD, 1992 "VM-Inst Error # is %d (VM entry with invalid control field(s))", 1993 VMXERR_ENTRY_INVALID_CONTROL_FIELD); 1994 } 1995 1996 void enter_guest(void) 1997 { 1998 struct vmentry_result result; 1999 2000 __enter_guest(ABORT_ON_EARLY_VMENTRY_FAIL | 2001 ABORT_ON_INVALID_GUEST_STATE, &result); 2002 } 2003 2004 extern struct vmx_test vmx_tests[]; 2005 2006 static bool 2007 test_wanted(const char *name, const char *filters[], int filter_count) 2008 { 2009 int i; 2010 bool positive = false; 2011 bool match = false; 2012 char clean_name[strlen(name) + 1]; 2013 char *c; 2014 const char *n; 2015 2016 printf("filter = %s, test = %s\n", filters[0], name); 2017 2018 /* Replace spaces with underscores. */ 2019 n = name; 2020 c = &clean_name[0]; 2021 do *c++ = (*n == ' ') ? '_' : *n; 2022 while (*n++); 2023 2024 for (i = 0; i < filter_count; i++) { 2025 const char *filter = filters[i]; 2026 2027 if (filter[0] == '-') { 2028 if (simple_glob(clean_name, filter + 1)) 2029 return false; 2030 } else { 2031 positive = true; 2032 match |= simple_glob(clean_name, filter); 2033 } 2034 } 2035 2036 if (!positive || match) { 2037 matched++; 2038 return true; 2039 } else { 2040 return false; 2041 } 2042 } 2043 2044 int main(int argc, const char *argv[]) 2045 { 2046 int i = 0; 2047 2048 setup_vm(); 2049 hypercall_field = 0; 2050 2051 /* We want xAPIC mode to test MMIO passthrough from L1 (us) to L2. */ 2052 smp_reset_apic(); 2053 2054 argv++; 2055 argc--; 2056 2057 if (!this_cpu_has(X86_FEATURE_VMX)) { 2058 printf("WARNING: vmx not supported, add '-cpu host'\n"); 2059 goto exit; 2060 } 2061 init_bsp_vmx(); 2062 if (test_wanted("test_vmx_feature_control", argv, argc)) { 2063 /* Sets MSR_IA32_FEATURE_CONTROL to 0x5 */ 2064 if (test_vmx_feature_control() != 0) 2065 goto exit; 2066 } else { 2067 enable_vmx(); 2068 } 2069 2070 if (test_wanted("test_vmxon", argv, argc)) { 2071 /* Enables VMX */ 2072 if (test_vmxon() != 0) 2073 goto exit; 2074 } else { 2075 if (vmx_on()) { 2076 report_fail("vmxon"); 2077 goto exit; 2078 } 2079 } 2080 2081 if (test_wanted("test_vmptrld", argv, argc)) 2082 test_vmptrld(); 2083 if (test_wanted("test_vmclear", argv, argc)) 2084 test_vmclear(); 2085 if (test_wanted("test_vmptrst", argv, argc)) 2086 test_vmptrst(); 2087 if (test_wanted("test_vmwrite_vmread", argv, argc)) 2088 test_vmwrite_vmread(); 2089 if (test_wanted("test_vmcs_high", argv, argc)) 2090 test_vmcs_high(); 2091 if (test_wanted("test_vmcs_lifecycle", argv, argc)) 2092 test_vmcs_lifecycle(); 2093 if (test_wanted("test_vmx_caps", argv, argc)) 2094 test_vmx_caps(); 2095 if (test_wanted("test_vmread_flags_touch", argv, argc)) 2096 test_vmread_flags_touch(); 2097 if (test_wanted("test_vmwrite_flags_touch", argv, argc)) 2098 test_vmwrite_flags_touch(); 2099 2100 /* Balance vmxon from test_vmxon. */ 2101 vmx_off(); 2102 2103 for (; vmx_tests[i].name != NULL; i++) { 2104 if (!test_wanted(vmx_tests[i].name, argv, argc)) 2105 continue; 2106 if (test_run(&vmx_tests[i])) 2107 goto exit; 2108 } 2109 2110 if (!matched) 2111 report(matched, "command line didn't match any tests!"); 2112 2113 exit: 2114 return report_summary(); 2115 } 2116