1 /* 2 * x86/vmx.c : Framework for testing nested virtualization 3 * This is a framework to test nested VMX for KVM, which 4 * started as a project of GSoC 2013. All test cases should 5 * be located in x86/vmx_tests.c and framework related 6 * functions should be in this file. 7 * 8 * How to write test cases? 9 * Add callbacks of test suite in variant "vmx_tests". You can 10 * write: 11 * 1. init function used for initializing test suite 12 * 2. main function for codes running in L2 guest, 13 * 3. exit_handler to handle vmexit of L2 to L1 14 * 4. syscall handler to handle L2 syscall vmexit 15 * 5. vmenter fail handler to handle direct failure of vmenter 16 * 6. guest_regs is loaded when vmenter and saved when 17 * vmexit, you can read and set it in exit_handler 18 * If no special function is needed for a test suite, use 19 * coressponding basic_* functions as callback. More handlers 20 * can be added to "vmx_tests", see details of "struct vmx_test" 21 * and function test_run(). 22 * 23 * Currently, vmx test framework only set up one VCPU and one 24 * concurrent guest test environment with same paging for L2 and 25 * L1. For usage of EPT, only 1:1 mapped paging is used from VFN 26 * to PFN. 27 * 28 * Author : Arthur Chunqi Li <yzt356@gmail.com> 29 */ 30 31 #include "libcflat.h" 32 #include "processor.h" 33 #include "alloc_page.h" 34 #include "vm.h" 35 #include "vmalloc.h" 36 #include "desc.h" 37 #include "vmx.h" 38 #include "msr.h" 39 #include "smp.h" 40 #include "apic.h" 41 42 u64 *bsp_vmxon_region; 43 struct vmcs *vmcs_root; 44 u32 vpid_cnt; 45 u64 guest_stack_top, guest_syscall_stack_top; 46 u32 ctrl_pin, ctrl_enter, ctrl_exit, ctrl_cpu[2]; 47 struct regs regs; 48 49 struct vmx_test *current; 50 51 #define MAX_TEST_TEARDOWN_STEPS 10 52 53 struct test_teardown_step { 54 test_teardown_func func; 55 void *data; 56 }; 57 58 static int teardown_count; 59 static struct test_teardown_step teardown_steps[MAX_TEST_TEARDOWN_STEPS]; 60 61 static test_guest_func v2_guest_main; 62 63 u64 hypercall_field; 64 bool launched; 65 static int matched; 66 static int guest_finished; 67 static int in_guest; 68 69 union vmx_basic_msr basic_msr; 70 union vmx_ctrl_msr ctrl_pin_rev; 71 union vmx_ctrl_msr ctrl_cpu_rev[2]; 72 union vmx_ctrl_msr ctrl_exit_rev; 73 union vmx_ctrl_msr ctrl_enter_rev; 74 union vmx_ept_vpid ept_vpid; 75 76 extern struct descriptor_table_ptr gdt_descr; 77 extern struct descriptor_table_ptr idt_descr; 78 extern void *vmx_return; 79 extern void *entry_sysenter; 80 extern void *guest_entry; 81 82 static volatile u32 stage; 83 84 static jmp_buf abort_target; 85 86 struct vmcs_field { 87 u64 mask; 88 u64 encoding; 89 }; 90 91 #define MASK(_bits) GENMASK_ULL((_bits) - 1, 0) 92 #define MASK_NATURAL MASK(sizeof(unsigned long) * 8) 93 94 static struct vmcs_field vmcs_fields[] = { 95 { MASK(16), VPID }, 96 { MASK(16), PINV }, 97 { MASK(16), EPTP_IDX }, 98 99 { MASK(16), GUEST_SEL_ES }, 100 { MASK(16), GUEST_SEL_CS }, 101 { MASK(16), GUEST_SEL_SS }, 102 { MASK(16), GUEST_SEL_DS }, 103 { MASK(16), GUEST_SEL_FS }, 104 { MASK(16), GUEST_SEL_GS }, 105 { MASK(16), GUEST_SEL_LDTR }, 106 { MASK(16), GUEST_SEL_TR }, 107 { MASK(16), GUEST_INT_STATUS }, 108 109 { MASK(16), HOST_SEL_ES }, 110 { MASK(16), HOST_SEL_CS }, 111 { MASK(16), HOST_SEL_SS }, 112 { MASK(16), HOST_SEL_DS }, 113 { MASK(16), HOST_SEL_FS }, 114 { MASK(16), HOST_SEL_GS }, 115 { MASK(16), HOST_SEL_TR }, 116 117 { MASK(64), IO_BITMAP_A }, 118 { MASK(64), IO_BITMAP_B }, 119 { MASK(64), MSR_BITMAP }, 120 { MASK(64), EXIT_MSR_ST_ADDR }, 121 { MASK(64), EXIT_MSR_LD_ADDR }, 122 { MASK(64), ENTER_MSR_LD_ADDR }, 123 { MASK(64), VMCS_EXEC_PTR }, 124 { MASK(64), TSC_OFFSET }, 125 { MASK(64), APIC_VIRT_ADDR }, 126 { MASK(64), APIC_ACCS_ADDR }, 127 { MASK(64), EPTP }, 128 129 { MASK(64), INFO_PHYS_ADDR }, 130 131 { MASK(64), VMCS_LINK_PTR }, 132 { MASK(64), GUEST_DEBUGCTL }, 133 { MASK(64), GUEST_EFER }, 134 { MASK(64), GUEST_PAT }, 135 { MASK(64), GUEST_PERF_GLOBAL_CTRL }, 136 { MASK(64), GUEST_PDPTE }, 137 138 { MASK(64), HOST_PAT }, 139 { MASK(64), HOST_EFER }, 140 { MASK(64), HOST_PERF_GLOBAL_CTRL }, 141 142 { MASK(32), PIN_CONTROLS }, 143 { MASK(32), CPU_EXEC_CTRL0 }, 144 { MASK(32), EXC_BITMAP }, 145 { MASK(32), PF_ERROR_MASK }, 146 { MASK(32), PF_ERROR_MATCH }, 147 { MASK(32), CR3_TARGET_COUNT }, 148 { MASK(32), EXI_CONTROLS }, 149 { MASK(32), EXI_MSR_ST_CNT }, 150 { MASK(32), EXI_MSR_LD_CNT }, 151 { MASK(32), ENT_CONTROLS }, 152 { MASK(32), ENT_MSR_LD_CNT }, 153 { MASK(32), ENT_INTR_INFO }, 154 { MASK(32), ENT_INTR_ERROR }, 155 { MASK(32), ENT_INST_LEN }, 156 { MASK(32), TPR_THRESHOLD }, 157 { MASK(32), CPU_EXEC_CTRL1 }, 158 159 { MASK(32), VMX_INST_ERROR }, 160 { MASK(32), EXI_REASON }, 161 { MASK(32), EXI_INTR_INFO }, 162 { MASK(32), EXI_INTR_ERROR }, 163 { MASK(32), IDT_VECT_INFO }, 164 { MASK(32), IDT_VECT_ERROR }, 165 { MASK(32), EXI_INST_LEN }, 166 { MASK(32), EXI_INST_INFO }, 167 168 { MASK(32), GUEST_LIMIT_ES }, 169 { MASK(32), GUEST_LIMIT_CS }, 170 { MASK(32), GUEST_LIMIT_SS }, 171 { MASK(32), GUEST_LIMIT_DS }, 172 { MASK(32), GUEST_LIMIT_FS }, 173 { MASK(32), GUEST_LIMIT_GS }, 174 { MASK(32), GUEST_LIMIT_LDTR }, 175 { MASK(32), GUEST_LIMIT_TR }, 176 { MASK(32), GUEST_LIMIT_GDTR }, 177 { MASK(32), GUEST_LIMIT_IDTR }, 178 { 0x1d0ff, GUEST_AR_ES }, 179 { 0x1f0ff, GUEST_AR_CS }, 180 { 0x1d0ff, GUEST_AR_SS }, 181 { 0x1d0ff, GUEST_AR_DS }, 182 { 0x1d0ff, GUEST_AR_FS }, 183 { 0x1d0ff, GUEST_AR_GS }, 184 { 0x1d0ff, GUEST_AR_LDTR }, 185 { 0x1d0ff, GUEST_AR_TR }, 186 { MASK(32), GUEST_INTR_STATE }, 187 { MASK(32), GUEST_ACTV_STATE }, 188 { MASK(32), GUEST_SMBASE }, 189 { MASK(32), GUEST_SYSENTER_CS }, 190 { MASK(32), PREEMPT_TIMER_VALUE }, 191 192 { MASK(32), HOST_SYSENTER_CS }, 193 194 { MASK_NATURAL, CR0_MASK }, 195 { MASK_NATURAL, CR4_MASK }, 196 { MASK_NATURAL, CR0_READ_SHADOW }, 197 { MASK_NATURAL, CR4_READ_SHADOW }, 198 { MASK_NATURAL, CR3_TARGET_0 }, 199 { MASK_NATURAL, CR3_TARGET_1 }, 200 { MASK_NATURAL, CR3_TARGET_2 }, 201 { MASK_NATURAL, CR3_TARGET_3 }, 202 203 { MASK_NATURAL, EXI_QUALIFICATION }, 204 { MASK_NATURAL, IO_RCX }, 205 { MASK_NATURAL, IO_RSI }, 206 { MASK_NATURAL, IO_RDI }, 207 { MASK_NATURAL, IO_RIP }, 208 { MASK_NATURAL, GUEST_LINEAR_ADDRESS }, 209 210 { MASK_NATURAL, GUEST_CR0 }, 211 { MASK_NATURAL, GUEST_CR3 }, 212 { MASK_NATURAL, GUEST_CR4 }, 213 { MASK_NATURAL, GUEST_BASE_ES }, 214 { MASK_NATURAL, GUEST_BASE_CS }, 215 { MASK_NATURAL, GUEST_BASE_SS }, 216 { MASK_NATURAL, GUEST_BASE_DS }, 217 { MASK_NATURAL, GUEST_BASE_FS }, 218 { MASK_NATURAL, GUEST_BASE_GS }, 219 { MASK_NATURAL, GUEST_BASE_LDTR }, 220 { MASK_NATURAL, GUEST_BASE_TR }, 221 { MASK_NATURAL, GUEST_BASE_GDTR }, 222 { MASK_NATURAL, GUEST_BASE_IDTR }, 223 { MASK_NATURAL, GUEST_DR7 }, 224 { MASK_NATURAL, GUEST_RSP }, 225 { MASK_NATURAL, GUEST_RIP }, 226 { MASK_NATURAL, GUEST_RFLAGS }, 227 { MASK_NATURAL, GUEST_PENDING_DEBUG }, 228 { MASK_NATURAL, GUEST_SYSENTER_ESP }, 229 { MASK_NATURAL, GUEST_SYSENTER_EIP }, 230 231 { MASK_NATURAL, HOST_CR0 }, 232 { MASK_NATURAL, HOST_CR3 }, 233 { MASK_NATURAL, HOST_CR4 }, 234 { MASK_NATURAL, HOST_BASE_FS }, 235 { MASK_NATURAL, HOST_BASE_GS }, 236 { MASK_NATURAL, HOST_BASE_TR }, 237 { MASK_NATURAL, HOST_BASE_GDTR }, 238 { MASK_NATURAL, HOST_BASE_IDTR }, 239 { MASK_NATURAL, HOST_SYSENTER_ESP }, 240 { MASK_NATURAL, HOST_SYSENTER_EIP }, 241 { MASK_NATURAL, HOST_RSP }, 242 { MASK_NATURAL, HOST_RIP }, 243 }; 244 245 enum vmcs_field_type { 246 VMCS_FIELD_TYPE_CONTROL = 0, 247 VMCS_FIELD_TYPE_READ_ONLY_DATA = 1, 248 VMCS_FIELD_TYPE_GUEST = 2, 249 VMCS_FIELD_TYPE_HOST = 3, 250 VMCS_FIELD_TYPES, 251 }; 252 253 static inline int vmcs_field_type(struct vmcs_field *f) 254 { 255 return (f->encoding >> VMCS_FIELD_TYPE_SHIFT) & 0x3; 256 } 257 258 static int vmcs_field_readonly(struct vmcs_field *f) 259 { 260 u64 ia32_vmx_misc; 261 262 ia32_vmx_misc = rdmsr(MSR_IA32_VMX_MISC); 263 return !(ia32_vmx_misc & MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS) && 264 (vmcs_field_type(f) == VMCS_FIELD_TYPE_READ_ONLY_DATA); 265 } 266 267 static inline u64 vmcs_field_value(struct vmcs_field *f, u8 cookie) 268 { 269 u64 value; 270 271 /* Incorporate the cookie and the field encoding into the value. */ 272 value = cookie; 273 value |= (f->encoding << 8); 274 value |= 0xdeadbeefull << 32; 275 276 return value & f->mask; 277 } 278 279 static void set_vmcs_field(struct vmcs_field *f, u8 cookie) 280 { 281 vmcs_write(f->encoding, vmcs_field_value(f, cookie)); 282 } 283 284 static bool check_vmcs_field(struct vmcs_field *f, u8 cookie) 285 { 286 u64 expected; 287 u64 actual; 288 int ret; 289 290 if (f->encoding == VMX_INST_ERROR) { 291 printf("Skipping volatile field %lx\n", f->encoding); 292 return true; 293 } 294 295 ret = vmcs_read_safe(f->encoding, &actual); 296 assert(!(ret & X86_EFLAGS_CF)); 297 /* Skip VMCS fields that aren't recognized by the CPU */ 298 if (ret & X86_EFLAGS_ZF) 299 return true; 300 301 if (vmcs_field_readonly(f)) { 302 printf("Skipping read-only field %lx\n", f->encoding); 303 return true; 304 } 305 306 expected = vmcs_field_value(f, cookie); 307 actual &= f->mask; 308 309 if (expected == actual) 310 return true; 311 312 printf("FAIL: VMWRITE/VMREAD %lx (expected: %lx, actual: %lx)\n", 313 f->encoding, (unsigned long) expected, (unsigned long) actual); 314 315 return false; 316 } 317 318 static void set_all_vmcs_fields(u8 cookie) 319 { 320 int i; 321 322 for (i = 0; i < ARRAY_SIZE(vmcs_fields); i++) 323 set_vmcs_field(&vmcs_fields[i], cookie); 324 } 325 326 static bool check_all_vmcs_fields(u8 cookie) 327 { 328 bool pass = true; 329 int i; 330 331 for (i = 0; i < ARRAY_SIZE(vmcs_fields); i++) { 332 if (!check_vmcs_field(&vmcs_fields[i], cookie)) 333 pass = false; 334 } 335 336 return pass; 337 } 338 339 static u32 find_vmcs_max_index(void) 340 { 341 u32 idx, width, type, enc; 342 u64 actual; 343 int ret; 344 345 /* scan backwards and stop when found */ 346 for (idx = (1 << 9) - 1; idx >= 0; idx--) { 347 348 /* try all combinations of width and type */ 349 for (type = 0; type < (1 << 2); type++) { 350 for (width = 0; width < (1 << 2) ; width++) { 351 enc = (idx << VMCS_FIELD_INDEX_SHIFT) | 352 (type << VMCS_FIELD_TYPE_SHIFT) | 353 (width << VMCS_FIELD_WIDTH_SHIFT); 354 355 ret = vmcs_read_safe(enc, &actual); 356 assert(!(ret & X86_EFLAGS_CF)); 357 if (!(ret & X86_EFLAGS_ZF)) 358 return idx; 359 } 360 } 361 } 362 /* some VMCS fields should exist */ 363 assert(0); 364 return 0; 365 } 366 367 static void test_vmwrite_vmread(void) 368 { 369 struct vmcs *vmcs = alloc_page(); 370 u32 vmcs_enum_max, max_index = 0; 371 372 vmcs->hdr.revision_id = basic_msr.revision; 373 assert(!vmcs_clear(vmcs)); 374 assert(!make_vmcs_current(vmcs)); 375 376 set_all_vmcs_fields(0x42); 377 report(check_all_vmcs_fields(0x42), "VMWRITE/VMREAD"); 378 379 vmcs_enum_max = (rdmsr(MSR_IA32_VMX_VMCS_ENUM) & VMCS_FIELD_INDEX_MASK) 380 >> VMCS_FIELD_INDEX_SHIFT; 381 max_index = find_vmcs_max_index(); 382 report(vmcs_enum_max == max_index, 383 "VMX_VMCS_ENUM.MAX_INDEX expected: %x, actual: %x", 384 max_index, vmcs_enum_max); 385 386 assert(!vmcs_clear(vmcs)); 387 free_page(vmcs); 388 } 389 390 static void __test_vmread_vmwrite_pf(bool vmread, u64 *val, u8 sentinel) 391 { 392 unsigned long flags = sentinel; 393 unsigned int vector; 394 395 /* 396 * Execute VMREAD/VMWRITE with a not-PRESENT memory operand, and verify 397 * a #PF occurred and RFLAGS were not modified. 398 */ 399 if (vmread) 400 asm volatile ("sahf\n\t" 401 ASM_TRY("1f") 402 "vmread %[enc], %[val]\n\t" 403 "1: lahf" 404 : [val] "=m" (*val), 405 [flags] "+a" (flags) 406 : [enc] "r" ((u64)GUEST_SEL_SS) 407 : "cc"); 408 else 409 asm volatile ("sahf\n\t" 410 ASM_TRY("1f") 411 "vmwrite %[val], %[enc]\n\t" 412 "1: lahf" 413 : [val] "=m" (*val), 414 [flags] "+a" (flags) 415 : [enc] "r" ((u64)GUEST_SEL_SS) 416 : "cc"); 417 418 vector = exception_vector(); 419 report(vector == PF_VECTOR, 420 "Expected #PF on %s, got exception '0x%x'\n", 421 vmread ? "VMREAD" : "VMWRITE", vector); 422 423 report((u8)flags == sentinel, 424 "Expected RFLAGS 0x%x, got 0x%x", sentinel, (u8)flags); 425 } 426 427 static void test_vmread_vmwrite_pf(bool vmread) 428 { 429 struct vmcs *vmcs = alloc_page(); 430 void *vpage = alloc_vpage(); 431 432 memset(vmcs, 0, PAGE_SIZE); 433 vmcs->hdr.revision_id = basic_msr.revision; 434 assert(!vmcs_clear(vmcs)); 435 assert(!make_vmcs_current(vmcs)); 436 437 /* 438 * Test with two values to candy-stripe the 5 flags stored/loaded by 439 * SAHF/LAHF. 440 */ 441 __test_vmread_vmwrite_pf(vmread, vpage, 0x91); 442 __test_vmread_vmwrite_pf(vmread, vpage, 0x45); 443 } 444 445 static void test_vmread_flags_touch(void) 446 { 447 test_vmread_vmwrite_pf(true); 448 } 449 450 static void test_vmwrite_flags_touch(void) 451 { 452 test_vmread_vmwrite_pf(false); 453 } 454 455 static void test_vmcs_high(void) 456 { 457 struct vmcs *vmcs = alloc_page(); 458 459 vmcs->hdr.revision_id = basic_msr.revision; 460 assert(!vmcs_clear(vmcs)); 461 assert(!make_vmcs_current(vmcs)); 462 463 vmcs_write(TSC_OFFSET, 0x0123456789ABCDEFull); 464 report(vmcs_read(TSC_OFFSET) == 0x0123456789ABCDEFull, 465 "VMREAD TSC_OFFSET after VMWRITE TSC_OFFSET"); 466 report(vmcs_read(TSC_OFFSET_HI) == 0x01234567ull, 467 "VMREAD TSC_OFFSET_HI after VMWRITE TSC_OFFSET"); 468 vmcs_write(TSC_OFFSET_HI, 0x76543210ul); 469 report(vmcs_read(TSC_OFFSET_HI) == 0x76543210ul, 470 "VMREAD TSC_OFFSET_HI after VMWRITE TSC_OFFSET_HI"); 471 report(vmcs_read(TSC_OFFSET) == 0x7654321089ABCDEFull, 472 "VMREAD TSC_OFFSET after VMWRITE TSC_OFFSET_HI"); 473 474 assert(!vmcs_clear(vmcs)); 475 free_page(vmcs); 476 } 477 478 static void test_vmcs_lifecycle(void) 479 { 480 struct vmcs *vmcs[2] = {}; 481 int i; 482 483 for (i = 0; i < ARRAY_SIZE(vmcs); i++) { 484 vmcs[i] = alloc_page(); 485 vmcs[i]->hdr.revision_id = basic_msr.revision; 486 } 487 488 #define VMPTRLD(_i) do { \ 489 assert(_i < ARRAY_SIZE(vmcs)); \ 490 assert(!make_vmcs_current(vmcs[_i])); \ 491 printf("VMPTRLD VMCS%d\n", (_i)); \ 492 } while (0) 493 494 #define VMCLEAR(_i) do { \ 495 assert(_i < ARRAY_SIZE(vmcs)); \ 496 assert(!vmcs_clear(vmcs[_i])); \ 497 printf("VMCLEAR VMCS%d\n", (_i)); \ 498 } while (0) 499 500 VMCLEAR(0); 501 VMPTRLD(0); 502 set_all_vmcs_fields(0); 503 report(check_all_vmcs_fields(0), "current:VMCS0 active:[VMCS0]"); 504 505 VMCLEAR(0); 506 VMPTRLD(0); 507 report(check_all_vmcs_fields(0), "current:VMCS0 active:[VMCS0]"); 508 509 VMCLEAR(1); 510 report(check_all_vmcs_fields(0), "current:VMCS0 active:[VMCS0]"); 511 512 VMPTRLD(1); 513 set_all_vmcs_fields(1); 514 report(check_all_vmcs_fields(1), "current:VMCS1 active:[VMCS0,VCMS1]"); 515 516 VMPTRLD(0); 517 report(check_all_vmcs_fields(0), "current:VMCS0 active:[VMCS0,VCMS1]"); 518 VMPTRLD(1); 519 report(check_all_vmcs_fields(1), "current:VMCS1 active:[VMCS0,VCMS1]"); 520 VMPTRLD(1); 521 report(check_all_vmcs_fields(1), "current:VMCS1 active:[VMCS0,VCMS1]"); 522 523 VMCLEAR(0); 524 report(check_all_vmcs_fields(1), "current:VMCS1 active:[VCMS1]"); 525 526 /* VMPTRLD should not erase VMWRITEs to the current VMCS */ 527 set_all_vmcs_fields(2); 528 VMPTRLD(1); 529 report(check_all_vmcs_fields(2), "current:VMCS1 active:[VCMS1]"); 530 531 for (i = 0; i < ARRAY_SIZE(vmcs); i++) { 532 VMCLEAR(i); 533 free_page(vmcs[i]); 534 } 535 536 #undef VMPTRLD 537 #undef VMCLEAR 538 } 539 540 void vmx_set_test_stage(u32 s) 541 { 542 barrier(); 543 stage = s; 544 barrier(); 545 } 546 547 u32 vmx_get_test_stage(void) 548 { 549 u32 s; 550 551 barrier(); 552 s = stage; 553 barrier(); 554 return s; 555 } 556 557 void vmx_inc_test_stage(void) 558 { 559 barrier(); 560 stage++; 561 barrier(); 562 } 563 564 /* entry_sysenter */ 565 asm( 566 ".align 4, 0x90\n\t" 567 ".globl entry_sysenter\n\t" 568 "entry_sysenter:\n\t" 569 SAVE_GPR 570 " and $0xf, %rax\n\t" 571 " mov %rax, %rdi\n\t" 572 " call syscall_handler\n\t" 573 LOAD_GPR 574 " vmresume\n\t" 575 ); 576 577 static void __attribute__((__used__)) syscall_handler(u64 syscall_no) 578 { 579 if (current->syscall_handler) 580 current->syscall_handler(syscall_no); 581 } 582 583 static const char * const exit_reason_descriptions[] = { 584 [VMX_EXC_NMI] = "VMX_EXC_NMI", 585 [VMX_EXTINT] = "VMX_EXTINT", 586 [VMX_TRIPLE_FAULT] = "VMX_TRIPLE_FAULT", 587 [VMX_INIT] = "VMX_INIT", 588 [VMX_SIPI] = "VMX_SIPI", 589 [VMX_SMI_IO] = "VMX_SMI_IO", 590 [VMX_SMI_OTHER] = "VMX_SMI_OTHER", 591 [VMX_INTR_WINDOW] = "VMX_INTR_WINDOW", 592 [VMX_NMI_WINDOW] = "VMX_NMI_WINDOW", 593 [VMX_TASK_SWITCH] = "VMX_TASK_SWITCH", 594 [VMX_CPUID] = "VMX_CPUID", 595 [VMX_GETSEC] = "VMX_GETSEC", 596 [VMX_HLT] = "VMX_HLT", 597 [VMX_INVD] = "VMX_INVD", 598 [VMX_INVLPG] = "VMX_INVLPG", 599 [VMX_RDPMC] = "VMX_RDPMC", 600 [VMX_RDTSC] = "VMX_RDTSC", 601 [VMX_RSM] = "VMX_RSM", 602 [VMX_VMCALL] = "VMX_VMCALL", 603 [VMX_VMCLEAR] = "VMX_VMCLEAR", 604 [VMX_VMLAUNCH] = "VMX_VMLAUNCH", 605 [VMX_VMPTRLD] = "VMX_VMPTRLD", 606 [VMX_VMPTRST] = "VMX_VMPTRST", 607 [VMX_VMREAD] = "VMX_VMREAD", 608 [VMX_VMRESUME] = "VMX_VMRESUME", 609 [VMX_VMWRITE] = "VMX_VMWRITE", 610 [VMX_VMXOFF] = "VMX_VMXOFF", 611 [VMX_VMXON] = "VMX_VMXON", 612 [VMX_CR] = "VMX_CR", 613 [VMX_DR] = "VMX_DR", 614 [VMX_IO] = "VMX_IO", 615 [VMX_RDMSR] = "VMX_RDMSR", 616 [VMX_WRMSR] = "VMX_WRMSR", 617 [VMX_FAIL_STATE] = "VMX_FAIL_STATE", 618 [VMX_FAIL_MSR] = "VMX_FAIL_MSR", 619 [VMX_MWAIT] = "VMX_MWAIT", 620 [VMX_MTF] = "VMX_MTF", 621 [VMX_MONITOR] = "VMX_MONITOR", 622 [VMX_PAUSE] = "VMX_PAUSE", 623 [VMX_FAIL_MCHECK] = "VMX_FAIL_MCHECK", 624 [VMX_TPR_THRESHOLD] = "VMX_TPR_THRESHOLD", 625 [VMX_APIC_ACCESS] = "VMX_APIC_ACCESS", 626 [VMX_EOI_INDUCED] = "VMX_EOI_INDUCED", 627 [VMX_GDTR_IDTR] = "VMX_GDTR_IDTR", 628 [VMX_LDTR_TR] = "VMX_LDTR_TR", 629 [VMX_EPT_VIOLATION] = "VMX_EPT_VIOLATION", 630 [VMX_EPT_MISCONFIG] = "VMX_EPT_MISCONFIG", 631 [VMX_INVEPT] = "VMX_INVEPT", 632 [VMX_PREEMPT] = "VMX_PREEMPT", 633 [VMX_INVVPID] = "VMX_INVVPID", 634 [VMX_WBINVD] = "VMX_WBINVD", 635 [VMX_XSETBV] = "VMX_XSETBV", 636 [VMX_APIC_WRITE] = "VMX_APIC_WRITE", 637 [VMX_RDRAND] = "VMX_RDRAND", 638 [VMX_INVPCID] = "VMX_INVPCID", 639 [VMX_VMFUNC] = "VMX_VMFUNC", 640 [VMX_RDSEED] = "VMX_RDSEED", 641 [VMX_PML_FULL] = "VMX_PML_FULL", 642 [VMX_XSAVES] = "VMX_XSAVES", 643 [VMX_XRSTORS] = "VMX_XRSTORS", 644 }; 645 646 const char *exit_reason_description(u64 reason) 647 { 648 if (reason >= ARRAY_SIZE(exit_reason_descriptions)) 649 return "(unknown)"; 650 return exit_reason_descriptions[reason] ? : "(unused)"; 651 } 652 653 void print_vmexit_info(union exit_reason exit_reason) 654 { 655 u64 guest_rip, guest_rsp; 656 ulong exit_qual = vmcs_read(EXI_QUALIFICATION); 657 guest_rip = vmcs_read(GUEST_RIP); 658 guest_rsp = vmcs_read(GUEST_RSP); 659 printf("VMEXIT info:\n"); 660 printf("\tvmexit reason = %u\n", exit_reason.basic); 661 printf("\tfailed vmentry = %u\n", !!exit_reason.failed_vmentry); 662 printf("\texit qualification = %#lx\n", exit_qual); 663 printf("\tguest_rip = %#lx\n", guest_rip); 664 printf("\tRAX=%#lx RBX=%#lx RCX=%#lx RDX=%#lx\n", 665 regs.rax, regs.rbx, regs.rcx, regs.rdx); 666 printf("\tRSP=%#lx RBP=%#lx RSI=%#lx RDI=%#lx\n", 667 guest_rsp, regs.rbp, regs.rsi, regs.rdi); 668 printf("\tR8 =%#lx R9 =%#lx R10=%#lx R11=%#lx\n", 669 regs.r8, regs.r9, regs.r10, regs.r11); 670 printf("\tR12=%#lx R13=%#lx R14=%#lx R15=%#lx\n", 671 regs.r12, regs.r13, regs.r14, regs.r15); 672 } 673 674 void print_vmentry_failure_info(struct vmentry_result *result) 675 { 676 if (result->entered) 677 return; 678 679 if (result->vm_fail) { 680 printf("VM-Fail on %s: ", result->instr); 681 switch (result->flags & VMX_ENTRY_FLAGS) { 682 case X86_EFLAGS_CF: 683 printf("current-VMCS pointer is not valid.\n"); 684 break; 685 case X86_EFLAGS_ZF: 686 printf("error number is %ld. See Intel 30.4.\n", 687 vmcs_read(VMX_INST_ERROR)); 688 break; 689 default: 690 printf("unexpected flags %lx!\n", result->flags); 691 } 692 } else { 693 u64 qual = vmcs_read(EXI_QUALIFICATION); 694 695 printf("VM-Exit failure on %s (reason=%#x, qual=%#lx): ", 696 result->instr, result->exit_reason.full, qual); 697 698 switch (result->exit_reason.basic) { 699 case VMX_FAIL_STATE: 700 printf("invalid guest state\n"); 701 break; 702 case VMX_FAIL_MSR: 703 printf("MSR loading\n"); 704 break; 705 case VMX_FAIL_MCHECK: 706 printf("machine-check event\n"); 707 break; 708 default: 709 printf("unexpected basic exit reason %u\n", 710 result->exit_reason.basic); 711 } 712 713 if (!result->exit_reason.failed_vmentry) 714 printf("\tVMX_ENTRY_FAILURE BIT NOT SET!\n"); 715 716 if (result->exit_reason.full & 0x7fff0000) 717 printf("\tRESERVED BITS SET!\n"); 718 } 719 } 720 721 /* 722 * VMCLEAR should ensures all VMCS state is flushed to the VMCS 723 * region in memory. 724 */ 725 static void test_vmclear_flushing(void) 726 { 727 struct vmcs *vmcs[3] = {}; 728 int i; 729 730 for (i = 0; i < ARRAY_SIZE(vmcs); i++) { 731 vmcs[i] = alloc_page(); 732 } 733 734 vmcs[0]->hdr.revision_id = basic_msr.revision; 735 assert(!vmcs_clear(vmcs[0])); 736 assert(!make_vmcs_current(vmcs[0])); 737 set_all_vmcs_fields(0x86); 738 739 assert(!vmcs_clear(vmcs[0])); 740 memcpy(vmcs[1], vmcs[0], basic_msr.size); 741 assert(!make_vmcs_current(vmcs[1])); 742 report(check_all_vmcs_fields(0x86), 743 "test vmclear flush (current VMCS)"); 744 745 set_all_vmcs_fields(0x87); 746 assert(!make_vmcs_current(vmcs[0])); 747 assert(!vmcs_clear(vmcs[1])); 748 memcpy(vmcs[2], vmcs[1], basic_msr.size); 749 assert(!make_vmcs_current(vmcs[2])); 750 report(check_all_vmcs_fields(0x87), 751 "test vmclear flush (!current VMCS)"); 752 753 for (i = 0; i < ARRAY_SIZE(vmcs); i++) { 754 assert(!vmcs_clear(vmcs[i])); 755 free_page(vmcs[i]); 756 } 757 } 758 759 static void test_vmclear(void) 760 { 761 struct vmcs *tmp_root; 762 int width = cpuid_maxphyaddr(); 763 764 /* 765 * Note- The tests below do not necessarily have a 766 * valid VMCS, but that's ok since the invalid vmcs 767 * is only used for a specific test and is discarded 768 * without touching its contents 769 */ 770 771 /* Unaligned page access */ 772 tmp_root = (struct vmcs *)((intptr_t)vmcs_root + 1); 773 report(vmcs_clear(tmp_root) == 1, "test vmclear with unaligned vmcs"); 774 775 /* gpa bits beyond physical address width are set*/ 776 tmp_root = (struct vmcs *)((intptr_t)vmcs_root | 777 ((u64)1 << (width+1))); 778 report(vmcs_clear(tmp_root) == 1, 779 "test vmclear with vmcs address bits set beyond physical address width"); 780 781 /* Pass VMXON region */ 782 tmp_root = (struct vmcs *)bsp_vmxon_region; 783 report(vmcs_clear(tmp_root) == 1, "test vmclear with vmxon region"); 784 785 /* Valid VMCS */ 786 report(vmcs_clear(vmcs_root) == 0, 787 "test vmclear with valid vmcs region"); 788 789 test_vmclear_flushing(); 790 } 791 792 static void __attribute__((__used__)) guest_main(void) 793 { 794 if (current->v2) 795 v2_guest_main(); 796 else 797 current->guest_main(); 798 } 799 800 /* guest_entry */ 801 asm( 802 ".align 4, 0x90\n\t" 803 ".globl entry_guest\n\t" 804 "guest_entry:\n\t" 805 " call guest_main\n\t" 806 " mov $1, %edi\n\t" 807 " call hypercall\n\t" 808 ); 809 810 /* EPT paging structure related functions */ 811 /* split_large_ept_entry: Split a 2M/1G large page into 512 smaller PTEs. 812 @ptep : large page table entry to split 813 @level : level of ptep (2 or 3) 814 */ 815 static void split_large_ept_entry(unsigned long *ptep, int level) 816 { 817 unsigned long *new_pt; 818 unsigned long gpa; 819 unsigned long pte; 820 unsigned long prototype; 821 int i; 822 823 pte = *ptep; 824 assert(pte & EPT_PRESENT); 825 assert(pte & EPT_LARGE_PAGE); 826 assert(level == 2 || level == 3); 827 828 new_pt = alloc_page(); 829 assert(new_pt); 830 831 prototype = pte & ~EPT_ADDR_MASK; 832 if (level == 2) 833 prototype &= ~EPT_LARGE_PAGE; 834 835 gpa = pte & EPT_ADDR_MASK; 836 for (i = 0; i < EPT_PGDIR_ENTRIES; i++) { 837 new_pt[i] = prototype | gpa; 838 gpa += 1ul << EPT_LEVEL_SHIFT(level - 1); 839 } 840 841 pte &= ~EPT_LARGE_PAGE; 842 pte &= ~EPT_ADDR_MASK; 843 pte |= virt_to_phys(new_pt); 844 845 *ptep = pte; 846 } 847 848 /* install_ept_entry : Install a page to a given level in EPT 849 @pml4 : addr of pml4 table 850 @pte_level : level of PTE to set 851 @guest_addr : physical address of guest 852 @pte : pte value to set 853 @pt_page : address of page table, NULL for a new page 854 */ 855 void install_ept_entry(unsigned long *pml4, 856 int pte_level, 857 unsigned long guest_addr, 858 unsigned long pte, 859 unsigned long *pt_page) 860 { 861 int level; 862 unsigned long *pt = pml4; 863 unsigned offset; 864 865 /* EPT only uses 48 bits of GPA. */ 866 assert(guest_addr < (1ul << 48)); 867 868 for (level = EPT_PAGE_LEVEL; level > pte_level; --level) { 869 offset = (guest_addr >> EPT_LEVEL_SHIFT(level)) 870 & EPT_PGDIR_MASK; 871 if (!(pt[offset] & (EPT_PRESENT))) { 872 unsigned long *new_pt = pt_page; 873 if (!new_pt) 874 new_pt = alloc_page(); 875 else 876 pt_page = 0; 877 memset(new_pt, 0, PAGE_SIZE); 878 pt[offset] = virt_to_phys(new_pt) 879 | EPT_RA | EPT_WA | EPT_EA; 880 } else if (pt[offset] & EPT_LARGE_PAGE) 881 split_large_ept_entry(&pt[offset], level); 882 pt = phys_to_virt(pt[offset] & EPT_ADDR_MASK); 883 } 884 offset = (guest_addr >> EPT_LEVEL_SHIFT(level)) & EPT_PGDIR_MASK; 885 pt[offset] = pte; 886 } 887 888 /* Map a page, @perm is the permission of the page */ 889 void install_ept(unsigned long *pml4, 890 unsigned long phys, 891 unsigned long guest_addr, 892 u64 perm) 893 { 894 install_ept_entry(pml4, 1, guest_addr, (phys & PAGE_MASK) | perm, 0); 895 } 896 897 /* Map a 1G-size page */ 898 void install_1g_ept(unsigned long *pml4, 899 unsigned long phys, 900 unsigned long guest_addr, 901 u64 perm) 902 { 903 install_ept_entry(pml4, 3, guest_addr, 904 (phys & PAGE_MASK) | perm | EPT_LARGE_PAGE, 0); 905 } 906 907 /* Map a 2M-size page */ 908 void install_2m_ept(unsigned long *pml4, 909 unsigned long phys, 910 unsigned long guest_addr, 911 u64 perm) 912 { 913 install_ept_entry(pml4, 2, guest_addr, 914 (phys & PAGE_MASK) | perm | EPT_LARGE_PAGE, 0); 915 } 916 917 /* setup_ept_range : Setup a range of 1:1 mapped page to EPT paging structure. 918 @start : start address of guest page 919 @len : length of address to be mapped 920 @map_1g : whether 1G page map is used 921 @map_2m : whether 2M page map is used 922 @perm : permission for every page 923 */ 924 void setup_ept_range(unsigned long *pml4, unsigned long start, 925 unsigned long len, int map_1g, int map_2m, u64 perm) 926 { 927 u64 phys = start; 928 u64 max = (u64)len + (u64)start; 929 930 if (map_1g) { 931 while (phys + PAGE_SIZE_1G <= max) { 932 install_1g_ept(pml4, phys, phys, perm); 933 phys += PAGE_SIZE_1G; 934 } 935 } 936 if (map_2m) { 937 while (phys + PAGE_SIZE_2M <= max) { 938 install_2m_ept(pml4, phys, phys, perm); 939 phys += PAGE_SIZE_2M; 940 } 941 } 942 while (phys + PAGE_SIZE <= max) { 943 install_ept(pml4, phys, phys, perm); 944 phys += PAGE_SIZE; 945 } 946 } 947 948 /* get_ept_pte : Get the PTE of a given level in EPT, 949 @level == 1 means get the latest level*/ 950 bool get_ept_pte(unsigned long *pml4, unsigned long guest_addr, int level, 951 unsigned long *pte) 952 { 953 int l; 954 unsigned long *pt = pml4, iter_pte; 955 unsigned offset; 956 957 assert(level >= 1 && level <= 4); 958 959 for (l = EPT_PAGE_LEVEL; ; --l) { 960 offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK; 961 iter_pte = pt[offset]; 962 if (l == level) 963 break; 964 if (l < 4 && (iter_pte & EPT_LARGE_PAGE)) 965 return false; 966 if (!(iter_pte & (EPT_PRESENT))) 967 return false; 968 pt = (unsigned long *)(iter_pte & EPT_ADDR_MASK); 969 } 970 offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK; 971 if (pte) 972 *pte = pt[offset]; 973 return true; 974 } 975 976 static void clear_ept_ad_pte(unsigned long *pml4, unsigned long guest_addr) 977 { 978 int l; 979 unsigned long *pt = pml4; 980 u64 pte; 981 unsigned offset; 982 983 for (l = EPT_PAGE_LEVEL; ; --l) { 984 offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK; 985 pt[offset] &= ~(EPT_ACCESS_FLAG|EPT_DIRTY_FLAG); 986 pte = pt[offset]; 987 if (l == 1 || (l < 4 && (pte & EPT_LARGE_PAGE))) 988 break; 989 pt = (unsigned long *)(pte & EPT_ADDR_MASK); 990 } 991 } 992 993 /* clear_ept_ad : Clear EPT A/D bits for the page table walk and the 994 final GPA of a guest address. */ 995 void clear_ept_ad(unsigned long *pml4, u64 guest_cr3, 996 unsigned long guest_addr) 997 { 998 int l; 999 unsigned long *pt = (unsigned long *)guest_cr3, gpa; 1000 u64 pte, offset_in_page; 1001 unsigned offset; 1002 1003 for (l = EPT_PAGE_LEVEL; ; --l) { 1004 offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK; 1005 1006 clear_ept_ad_pte(pml4, (u64) &pt[offset]); 1007 pte = pt[offset]; 1008 if (l == 1 || (l < 4 && (pte & PT_PAGE_SIZE_MASK))) 1009 break; 1010 if (!(pte & PT_PRESENT_MASK)) 1011 return; 1012 pt = (unsigned long *)(pte & PT_ADDR_MASK); 1013 } 1014 1015 offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK; 1016 offset_in_page = guest_addr & ((1 << EPT_LEVEL_SHIFT(l)) - 1); 1017 gpa = (pt[offset] & PT_ADDR_MASK) | (guest_addr & offset_in_page); 1018 clear_ept_ad_pte(pml4, gpa); 1019 } 1020 1021 /* check_ept_ad : Check the content of EPT A/D bits for the page table 1022 walk and the final GPA of a guest address. */ 1023 void check_ept_ad(unsigned long *pml4, u64 guest_cr3, 1024 unsigned long guest_addr, int expected_gpa_ad, 1025 int expected_pt_ad) 1026 { 1027 int l; 1028 unsigned long *pt = (unsigned long *)guest_cr3, gpa; 1029 u64 ept_pte, pte, offset_in_page; 1030 unsigned offset; 1031 bool bad_pt_ad = false; 1032 1033 for (l = EPT_PAGE_LEVEL; ; --l) { 1034 offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK; 1035 1036 if (!get_ept_pte(pml4, (u64) &pt[offset], 1, &ept_pte)) { 1037 printf("EPT - guest level %d page table is not mapped.\n", l); 1038 return; 1039 } 1040 1041 if (!bad_pt_ad) { 1042 bad_pt_ad |= (ept_pte & (EPT_ACCESS_FLAG|EPT_DIRTY_FLAG)) != expected_pt_ad; 1043 if (bad_pt_ad) 1044 report_fail("EPT - guest level %d page table A=%d/D=%d", 1045 l, 1046 !!(expected_pt_ad & EPT_ACCESS_FLAG), 1047 !!(expected_pt_ad & EPT_DIRTY_FLAG)); 1048 } 1049 1050 pte = pt[offset]; 1051 if (l == 1 || (l < 4 && (pte & PT_PAGE_SIZE_MASK))) 1052 break; 1053 if (!(pte & PT_PRESENT_MASK)) 1054 return; 1055 pt = (unsigned long *)(pte & PT_ADDR_MASK); 1056 } 1057 1058 if (!bad_pt_ad) 1059 report_pass("EPT - guest page table structures A=%d/D=%d", 1060 !!(expected_pt_ad & EPT_ACCESS_FLAG), 1061 !!(expected_pt_ad & EPT_DIRTY_FLAG)); 1062 1063 offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK; 1064 offset_in_page = guest_addr & ((1 << EPT_LEVEL_SHIFT(l)) - 1); 1065 gpa = (pt[offset] & PT_ADDR_MASK) | (guest_addr & offset_in_page); 1066 1067 if (!get_ept_pte(pml4, gpa, 1, &ept_pte)) { 1068 report_fail("EPT - guest physical address is not mapped"); 1069 return; 1070 } 1071 report((ept_pte & (EPT_ACCESS_FLAG | EPT_DIRTY_FLAG)) == expected_gpa_ad, 1072 "EPT - guest physical address A=%d/D=%d", 1073 !!(expected_gpa_ad & EPT_ACCESS_FLAG), 1074 !!(expected_gpa_ad & EPT_DIRTY_FLAG)); 1075 } 1076 1077 void set_ept_pte(unsigned long *pml4, unsigned long guest_addr, 1078 int level, u64 pte_val) 1079 { 1080 int l; 1081 unsigned long *pt = pml4; 1082 unsigned offset; 1083 1084 assert(level >= 1 && level <= 4); 1085 1086 for (l = EPT_PAGE_LEVEL; ; --l) { 1087 offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK; 1088 if (l == level) 1089 break; 1090 assert(pt[offset] & EPT_PRESENT); 1091 pt = (unsigned long *)(pt[offset] & EPT_ADDR_MASK); 1092 } 1093 offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK; 1094 pt[offset] = pte_val; 1095 } 1096 1097 static void init_vmcs_ctrl(void) 1098 { 1099 /* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */ 1100 /* 26.2.1.1 */ 1101 vmcs_write(PIN_CONTROLS, ctrl_pin); 1102 /* Disable VMEXIT of IO instruction */ 1103 vmcs_write(CPU_EXEC_CTRL0, ctrl_cpu[0]); 1104 if (ctrl_cpu_rev[0].set & CPU_SECONDARY) { 1105 ctrl_cpu[1] = (ctrl_cpu[1] | ctrl_cpu_rev[1].set) & 1106 ctrl_cpu_rev[1].clr; 1107 vmcs_write(CPU_EXEC_CTRL1, ctrl_cpu[1]); 1108 } 1109 vmcs_write(CR3_TARGET_COUNT, 0); 1110 vmcs_write(VPID, ++vpid_cnt); 1111 } 1112 1113 static void init_vmcs_host(void) 1114 { 1115 /* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */ 1116 /* 26.2.1.2 */ 1117 vmcs_write(HOST_EFER, rdmsr(MSR_EFER)); 1118 1119 /* 26.2.1.3 */ 1120 vmcs_write(ENT_CONTROLS, ctrl_enter); 1121 vmcs_write(EXI_CONTROLS, ctrl_exit); 1122 1123 /* 26.2.2 */ 1124 vmcs_write(HOST_CR0, read_cr0()); 1125 vmcs_write(HOST_CR3, read_cr3()); 1126 vmcs_write(HOST_CR4, read_cr4()); 1127 vmcs_write(HOST_SYSENTER_EIP, (u64)(&entry_sysenter)); 1128 vmcs_write(HOST_SYSENTER_CS, KERNEL_CS); 1129 if (ctrl_exit_rev.clr & EXI_LOAD_PAT) 1130 vmcs_write(HOST_PAT, rdmsr(MSR_IA32_CR_PAT)); 1131 1132 /* 26.2.3 */ 1133 vmcs_write(HOST_SEL_CS, KERNEL_CS); 1134 vmcs_write(HOST_SEL_SS, KERNEL_DS); 1135 vmcs_write(HOST_SEL_DS, KERNEL_DS); 1136 vmcs_write(HOST_SEL_ES, KERNEL_DS); 1137 vmcs_write(HOST_SEL_FS, KERNEL_DS); 1138 vmcs_write(HOST_SEL_GS, KERNEL_DS); 1139 vmcs_write(HOST_SEL_TR, TSS_MAIN); 1140 vmcs_write(HOST_BASE_TR, get_gdt_entry_base(get_tss_descr())); 1141 vmcs_write(HOST_BASE_GDTR, gdt_descr.base); 1142 vmcs_write(HOST_BASE_IDTR, idt_descr.base); 1143 vmcs_write(HOST_BASE_FS, 0); 1144 vmcs_write(HOST_BASE_GS, rdmsr(MSR_GS_BASE)); 1145 1146 /* Set other vmcs area */ 1147 vmcs_write(PF_ERROR_MASK, 0); 1148 vmcs_write(PF_ERROR_MATCH, 0); 1149 vmcs_write(VMCS_LINK_PTR, ~0ul); 1150 vmcs_write(VMCS_LINK_PTR_HI, ~0ul); 1151 vmcs_write(HOST_RIP, (u64)(&vmx_return)); 1152 } 1153 1154 static void init_vmcs_guest(void) 1155 { 1156 gdt_entry_t *tss_descr = get_tss_descr(); 1157 1158 /* 26.3 CHECKING AND LOADING GUEST STATE */ 1159 ulong guest_cr0, guest_cr4, guest_cr3; 1160 /* 26.3.1.1 */ 1161 guest_cr0 = read_cr0(); 1162 guest_cr4 = read_cr4(); 1163 guest_cr3 = read_cr3(); 1164 if (ctrl_enter & ENT_GUEST_64) { 1165 guest_cr0 |= X86_CR0_PG; 1166 guest_cr4 |= X86_CR4_PAE; 1167 } 1168 if ((ctrl_enter & ENT_GUEST_64) == 0) 1169 guest_cr4 &= (~X86_CR4_PCIDE); 1170 if (guest_cr0 & X86_CR0_PG) 1171 guest_cr0 |= X86_CR0_PE; 1172 vmcs_write(GUEST_CR0, guest_cr0); 1173 vmcs_write(GUEST_CR3, guest_cr3); 1174 vmcs_write(GUEST_CR4, guest_cr4); 1175 vmcs_write(GUEST_SYSENTER_CS, KERNEL_CS); 1176 vmcs_write(GUEST_SYSENTER_ESP, guest_syscall_stack_top); 1177 vmcs_write(GUEST_SYSENTER_EIP, (u64)(&entry_sysenter)); 1178 vmcs_write(GUEST_DR7, 0); 1179 vmcs_write(GUEST_EFER, rdmsr(MSR_EFER)); 1180 1181 /* 26.3.1.2 */ 1182 vmcs_write(GUEST_SEL_CS, KERNEL_CS); 1183 vmcs_write(GUEST_SEL_SS, KERNEL_DS); 1184 vmcs_write(GUEST_SEL_DS, KERNEL_DS); 1185 vmcs_write(GUEST_SEL_ES, KERNEL_DS); 1186 vmcs_write(GUEST_SEL_FS, KERNEL_DS); 1187 vmcs_write(GUEST_SEL_GS, KERNEL_DS); 1188 vmcs_write(GUEST_SEL_TR, TSS_MAIN); 1189 vmcs_write(GUEST_SEL_LDTR, 0); 1190 1191 vmcs_write(GUEST_BASE_CS, 0); 1192 vmcs_write(GUEST_BASE_ES, 0); 1193 vmcs_write(GUEST_BASE_SS, 0); 1194 vmcs_write(GUEST_BASE_DS, 0); 1195 vmcs_write(GUEST_BASE_FS, 0); 1196 vmcs_write(GUEST_BASE_GS, rdmsr(MSR_GS_BASE)); 1197 vmcs_write(GUEST_BASE_TR, get_gdt_entry_base(tss_descr)); 1198 vmcs_write(GUEST_BASE_LDTR, 0); 1199 1200 vmcs_write(GUEST_LIMIT_CS, 0xFFFFFFFF); 1201 vmcs_write(GUEST_LIMIT_DS, 0xFFFFFFFF); 1202 vmcs_write(GUEST_LIMIT_ES, 0xFFFFFFFF); 1203 vmcs_write(GUEST_LIMIT_SS, 0xFFFFFFFF); 1204 vmcs_write(GUEST_LIMIT_FS, 0xFFFFFFFF); 1205 vmcs_write(GUEST_LIMIT_GS, 0xFFFFFFFF); 1206 vmcs_write(GUEST_LIMIT_LDTR, 0xffff); 1207 vmcs_write(GUEST_LIMIT_TR, get_gdt_entry_limit(tss_descr)); 1208 1209 vmcs_write(GUEST_AR_CS, 0xa09b); 1210 vmcs_write(GUEST_AR_DS, 0xc093); 1211 vmcs_write(GUEST_AR_ES, 0xc093); 1212 vmcs_write(GUEST_AR_FS, 0xc093); 1213 vmcs_write(GUEST_AR_GS, 0xc093); 1214 vmcs_write(GUEST_AR_SS, 0xc093); 1215 vmcs_write(GUEST_AR_LDTR, 0x82); 1216 vmcs_write(GUEST_AR_TR, 0x8b); 1217 1218 /* 26.3.1.3 */ 1219 vmcs_write(GUEST_BASE_GDTR, gdt_descr.base); 1220 vmcs_write(GUEST_BASE_IDTR, idt_descr.base); 1221 vmcs_write(GUEST_LIMIT_GDTR, gdt_descr.limit); 1222 vmcs_write(GUEST_LIMIT_IDTR, idt_descr.limit); 1223 1224 /* 26.3.1.4 */ 1225 vmcs_write(GUEST_RIP, (u64)(&guest_entry)); 1226 vmcs_write(GUEST_RSP, guest_stack_top); 1227 vmcs_write(GUEST_RFLAGS, X86_EFLAGS_FIXED); 1228 1229 /* 26.3.1.5 */ 1230 vmcs_write(GUEST_ACTV_STATE, ACTV_ACTIVE); 1231 vmcs_write(GUEST_INTR_STATE, 0); 1232 } 1233 1234 int init_vmcs(struct vmcs **vmcs) 1235 { 1236 *vmcs = alloc_page(); 1237 (*vmcs)->hdr.revision_id = basic_msr.revision; 1238 /* vmclear first to init vmcs */ 1239 if (vmcs_clear(*vmcs)) { 1240 printf("%s : vmcs_clear error\n", __func__); 1241 return 1; 1242 } 1243 1244 if (make_vmcs_current(*vmcs)) { 1245 printf("%s : make_vmcs_current error\n", __func__); 1246 return 1; 1247 } 1248 1249 /* All settings to pin/exit/enter/cpu 1250 control fields should be placed here */ 1251 ctrl_pin |= PIN_EXTINT | PIN_NMI | PIN_VIRT_NMI; 1252 ctrl_exit = EXI_LOAD_EFER | EXI_HOST_64 | EXI_LOAD_PAT; 1253 ctrl_enter = (ENT_LOAD_EFER | ENT_GUEST_64); 1254 /* DIsable IO instruction VMEXIT now */ 1255 ctrl_cpu[0] &= (~(CPU_IO | CPU_IO_BITMAP)); 1256 ctrl_cpu[1] = 0; 1257 1258 ctrl_pin = (ctrl_pin | ctrl_pin_rev.set) & ctrl_pin_rev.clr; 1259 ctrl_enter = (ctrl_enter | ctrl_enter_rev.set) & ctrl_enter_rev.clr; 1260 ctrl_exit = (ctrl_exit | ctrl_exit_rev.set) & ctrl_exit_rev.clr; 1261 ctrl_cpu[0] = (ctrl_cpu[0] | ctrl_cpu_rev[0].set) & ctrl_cpu_rev[0].clr; 1262 1263 init_vmcs_ctrl(); 1264 init_vmcs_host(); 1265 init_vmcs_guest(); 1266 return 0; 1267 } 1268 1269 void enable_vmx(void) 1270 { 1271 bool vmx_enabled = 1272 rdmsr(MSR_IA32_FEATURE_CONTROL) & 1273 FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; 1274 1275 if (!vmx_enabled) { 1276 wrmsr(MSR_IA32_FEATURE_CONTROL, 1277 FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX | 1278 FEATURE_CONTROL_LOCKED); 1279 } 1280 } 1281 1282 static void init_vmx_caps(void) 1283 { 1284 basic_msr.val = rdmsr(MSR_IA32_VMX_BASIC); 1285 ctrl_pin_rev.val = rdmsr(basic_msr.ctrl ? MSR_IA32_VMX_TRUE_PIN 1286 : MSR_IA32_VMX_PINBASED_CTLS); 1287 ctrl_exit_rev.val = rdmsr(basic_msr.ctrl ? MSR_IA32_VMX_TRUE_EXIT 1288 : MSR_IA32_VMX_EXIT_CTLS); 1289 ctrl_enter_rev.val = rdmsr(basic_msr.ctrl ? MSR_IA32_VMX_TRUE_ENTRY 1290 : MSR_IA32_VMX_ENTRY_CTLS); 1291 ctrl_cpu_rev[0].val = rdmsr(basic_msr.ctrl ? MSR_IA32_VMX_TRUE_PROC 1292 : MSR_IA32_VMX_PROCBASED_CTLS); 1293 if ((ctrl_cpu_rev[0].clr & CPU_SECONDARY) != 0) 1294 ctrl_cpu_rev[1].val = rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2); 1295 else 1296 ctrl_cpu_rev[1].val = 0; 1297 if ((ctrl_cpu_rev[1].clr & (CPU_EPT | CPU_VPID)) != 0) 1298 ept_vpid.val = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP); 1299 else 1300 ept_vpid.val = 0; 1301 } 1302 1303 void init_vmx(u64 *vmxon_region) 1304 { 1305 ulong fix_cr0_set, fix_cr0_clr; 1306 ulong fix_cr4_set, fix_cr4_clr; 1307 1308 fix_cr0_set = rdmsr(MSR_IA32_VMX_CR0_FIXED0); 1309 fix_cr0_clr = rdmsr(MSR_IA32_VMX_CR0_FIXED1); 1310 fix_cr4_set = rdmsr(MSR_IA32_VMX_CR4_FIXED0); 1311 fix_cr4_clr = rdmsr(MSR_IA32_VMX_CR4_FIXED1); 1312 1313 write_cr0((read_cr0() & fix_cr0_clr) | fix_cr0_set); 1314 write_cr4((read_cr4() & fix_cr4_clr) | fix_cr4_set | X86_CR4_VMXE); 1315 1316 *vmxon_region = basic_msr.revision; 1317 } 1318 1319 static void alloc_bsp_vmx_pages(void) 1320 { 1321 bsp_vmxon_region = alloc_page(); 1322 guest_stack_top = (uintptr_t)alloc_page() + PAGE_SIZE; 1323 guest_syscall_stack_top = (uintptr_t)alloc_page() + PAGE_SIZE; 1324 vmcs_root = alloc_page(); 1325 } 1326 1327 static void init_bsp_vmx(void) 1328 { 1329 init_vmx_caps(); 1330 alloc_bsp_vmx_pages(); 1331 init_vmx(bsp_vmxon_region); 1332 } 1333 1334 static void do_vmxon_off(void *data) 1335 { 1336 TEST_ASSERT(!vmx_on()); 1337 TEST_ASSERT(!vmx_off()); 1338 } 1339 1340 static void do_write_feature_control(void *data) 1341 { 1342 wrmsr(MSR_IA32_FEATURE_CONTROL, 0); 1343 } 1344 1345 static int test_vmx_feature_control(void) 1346 { 1347 u64 ia32_feature_control; 1348 bool vmx_enabled; 1349 bool feature_control_locked; 1350 1351 ia32_feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL); 1352 vmx_enabled = 1353 ia32_feature_control & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; 1354 feature_control_locked = 1355 ia32_feature_control & FEATURE_CONTROL_LOCKED; 1356 1357 if (vmx_enabled && feature_control_locked) { 1358 printf("VMX enabled and locked by BIOS\n"); 1359 return 0; 1360 } else if (feature_control_locked) { 1361 printf("ERROR: VMX locked out by BIOS!?\n"); 1362 return 1; 1363 } 1364 1365 wrmsr(MSR_IA32_FEATURE_CONTROL, 0); 1366 report(test_for_exception(GP_VECTOR, &do_vmxon_off, NULL), 1367 "test vmxon with FEATURE_CONTROL cleared"); 1368 1369 wrmsr(MSR_IA32_FEATURE_CONTROL, FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX); 1370 report(test_for_exception(GP_VECTOR, &do_vmxon_off, NULL), 1371 "test vmxon without FEATURE_CONTROL lock"); 1372 1373 wrmsr(MSR_IA32_FEATURE_CONTROL, 1374 FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX | 1375 FEATURE_CONTROL_LOCKED); 1376 1377 ia32_feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL); 1378 vmx_enabled = 1379 ia32_feature_control & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; 1380 report(vmx_enabled, "test enable VMX in FEATURE_CONTROL"); 1381 1382 report(test_for_exception(GP_VECTOR, &do_write_feature_control, NULL), 1383 "test FEATURE_CONTROL lock bit"); 1384 1385 return !vmx_enabled; 1386 } 1387 1388 1389 static void write_cr(int cr_number, unsigned long val) 1390 { 1391 if (!cr_number) 1392 write_cr0(val); 1393 else 1394 write_cr4(val); 1395 } 1396 1397 static int write_cr_safe(int cr_number, unsigned long val) 1398 { 1399 if (!cr_number) 1400 return write_cr0_safe(val); 1401 else 1402 return write_cr4_safe(val); 1403 } 1404 1405 static int test_vmxon_bad_cr(int cr_number, unsigned long orig_cr, 1406 unsigned long *flexible_bits) 1407 { 1408 unsigned long required1, disallowed1, val, bit; 1409 int ret, i, expected; 1410 1411 if (!cr_number) { 1412 required1 = rdmsr(MSR_IA32_VMX_CR0_FIXED0); 1413 disallowed1 = ~rdmsr(MSR_IA32_VMX_CR0_FIXED1); 1414 } else { 1415 required1 = rdmsr(MSR_IA32_VMX_CR4_FIXED0); 1416 disallowed1 = ~rdmsr(MSR_IA32_VMX_CR4_FIXED1); 1417 } 1418 1419 *flexible_bits = 0; 1420 1421 for (i = 0; i < BITS_PER_LONG; i++) { 1422 bit = BIT(i); 1423 1424 /* 1425 * Don't touch bits that will affect the current paging mode, 1426 * toggling them will send the test into the weeds before it 1427 * gets to VMXON. nVMX tests are 64-bit only, so CR4.PAE is 1428 * guaranteed to be '1', i.e. PSE is fair game. PKU/PKS are 1429 * also fair game as KVM doesn't configure any keys. SMAP and 1430 * SMEP are off limits because the page tables have the USER 1431 * bit set at all levels. 1432 */ 1433 if ((cr_number == 0 && (bit == X86_CR0_PE || bit == X86_CR0_PG)) || 1434 (cr_number == 4 && (bit == X86_CR4_PAE || bit == X86_CR4_SMAP || 1435 bit == X86_CR4_SMEP || bit == X86_CR4_CET))) 1436 continue; 1437 1438 if (!(bit & required1) && !(bit & disallowed1)) { 1439 if (!write_cr_safe(cr_number, orig_cr ^ bit)) { 1440 *flexible_bits |= bit; 1441 write_cr(cr_number, orig_cr); 1442 } 1443 continue; 1444 } 1445 1446 assert(!(required1 & disallowed1)); 1447 1448 if (required1 & bit) 1449 val = orig_cr & ~bit; 1450 else 1451 val = orig_cr | bit; 1452 1453 if (write_cr_safe(cr_number, val)) 1454 continue; 1455 1456 /* 1457 * CR0.PE==0 and CR4.VMXE==0 result in #UD, all other invalid 1458 * CR0/CR4 bits result in #GP. Include CR0.PE even though it's 1459 * dead code (see above) for completeness. 1460 */ 1461 if ((cr_number == 0 && bit == X86_CR0_PE) || 1462 (cr_number == 4 && bit == X86_CR4_VMXE)) 1463 expected = UD_VECTOR; 1464 else 1465 expected = GP_VECTOR; 1466 1467 ret = vmx_on(); 1468 report(ret == expected, 1469 "VMXON with CR%d bit %d %s should %s, got '%d'", 1470 cr_number, i, (required1 & bit) ? "cleared" : "set", 1471 expected == UD_VECTOR ? "UD" : "#GP", ret); 1472 1473 write_cr(cr_number, orig_cr); 1474 1475 if (ret <= 0) 1476 return 1; 1477 } 1478 return 0; 1479 } 1480 1481 static int test_vmxon(void) 1482 { 1483 unsigned long orig_cr0, flexible_cr0, orig_cr4, flexible_cr4; 1484 int width = cpuid_maxphyaddr(); 1485 u64 *vmxon_region; 1486 int ret; 1487 1488 orig_cr0 = read_cr0(); 1489 if (test_vmxon_bad_cr(0, orig_cr0, &flexible_cr0)) 1490 return 1; 1491 1492 orig_cr4 = read_cr4(); 1493 if (test_vmxon_bad_cr(4, orig_cr4, &flexible_cr4)) 1494 return 1; 1495 1496 /* Unaligned page access */ 1497 vmxon_region = (u64 *)((intptr_t)bsp_vmxon_region + 1); 1498 ret = __vmxon_safe(vmxon_region); 1499 report(ret < 0, "test vmxon with unaligned vmxon region"); 1500 if (ret >= 0) 1501 return 1; 1502 1503 /* gpa bits beyond physical address width are set*/ 1504 vmxon_region = (u64 *)((intptr_t)bsp_vmxon_region | ((u64)1 << (width+1))); 1505 ret = __vmxon_safe(vmxon_region); 1506 report(ret < 0, "test vmxon with bits set beyond physical address width"); 1507 if (ret >= 0) 1508 return 1; 1509 1510 /* invalid revision identifier */ 1511 *bsp_vmxon_region = 0xba9da9; 1512 ret = vmxon_safe(); 1513 report(ret < 0, "test vmxon with invalid revision identifier"); 1514 if (ret >= 0) 1515 return 1; 1516 1517 /* and finally a valid region, with valid-but-tweaked cr0/cr4 */ 1518 write_cr0(orig_cr0 ^ flexible_cr0); 1519 write_cr4(orig_cr4 ^ flexible_cr4); 1520 *bsp_vmxon_region = basic_msr.revision; 1521 ret = vmxon_safe(); 1522 report(!ret, "test vmxon with valid vmxon region"); 1523 write_cr0(orig_cr0); 1524 write_cr4(orig_cr4); 1525 return ret; 1526 } 1527 1528 static void test_vmptrld(void) 1529 { 1530 struct vmcs *vmcs, *tmp_root; 1531 int width = cpuid_maxphyaddr(); 1532 1533 vmcs = alloc_page(); 1534 vmcs->hdr.revision_id = basic_msr.revision; 1535 1536 /* Unaligned page access */ 1537 tmp_root = (struct vmcs *)((intptr_t)vmcs + 1); 1538 report(make_vmcs_current(tmp_root) == 1, 1539 "test vmptrld with unaligned vmcs"); 1540 1541 /* gpa bits beyond physical address width are set*/ 1542 tmp_root = (struct vmcs *)((intptr_t)vmcs | 1543 ((u64)1 << (width+1))); 1544 report(make_vmcs_current(tmp_root) == 1, 1545 "test vmptrld with vmcs address bits set beyond physical address width"); 1546 1547 /* Pass VMXON region */ 1548 assert(!vmcs_clear(vmcs)); 1549 assert(!make_vmcs_current(vmcs)); 1550 tmp_root = (struct vmcs *)bsp_vmxon_region; 1551 report(make_vmcs_current(tmp_root) == 1, 1552 "test vmptrld with vmxon region"); 1553 report(vmcs_read(VMX_INST_ERROR) == VMXERR_VMPTRLD_VMXON_POINTER, 1554 "test vmptrld with vmxon region vm-instruction error"); 1555 1556 report(make_vmcs_current(vmcs) == 0, 1557 "test vmptrld with valid vmcs region"); 1558 } 1559 1560 static void test_vmptrst(void) 1561 { 1562 int ret; 1563 struct vmcs *vmcs1, *vmcs2; 1564 1565 vmcs1 = alloc_page(); 1566 init_vmcs(&vmcs1); 1567 ret = vmcs_save(&vmcs2); 1568 report((!ret) && (vmcs1 == vmcs2), "test vmptrst"); 1569 } 1570 1571 struct vmx_ctl_msr { 1572 const char *name; 1573 u32 index, true_index; 1574 u32 default1; 1575 } vmx_ctl_msr[] = { 1576 { "MSR_IA32_VMX_PINBASED_CTLS", MSR_IA32_VMX_PINBASED_CTLS, 1577 MSR_IA32_VMX_TRUE_PIN, 0x16 }, 1578 { "MSR_IA32_VMX_PROCBASED_CTLS", MSR_IA32_VMX_PROCBASED_CTLS, 1579 MSR_IA32_VMX_TRUE_PROC, 0x401e172 }, 1580 { "MSR_IA32_VMX_PROCBASED_CTLS2", MSR_IA32_VMX_PROCBASED_CTLS2, 1581 MSR_IA32_VMX_PROCBASED_CTLS2, 0 }, 1582 { "MSR_IA32_VMX_EXIT_CTLS", MSR_IA32_VMX_EXIT_CTLS, 1583 MSR_IA32_VMX_TRUE_EXIT, 0x36dff }, 1584 { "MSR_IA32_VMX_ENTRY_CTLS", MSR_IA32_VMX_ENTRY_CTLS, 1585 MSR_IA32_VMX_TRUE_ENTRY, 0x11ff }, 1586 }; 1587 1588 static void test_vmx_caps(void) 1589 { 1590 u64 val, default1, fixed0, fixed1; 1591 union vmx_ctrl_msr ctrl, true_ctrl; 1592 unsigned int n; 1593 bool ok; 1594 1595 printf("\nTest suite: VMX capability reporting\n"); 1596 1597 report((basic_msr.revision & (1ul << 31)) == 0 && 1598 basic_msr.size > 0 && basic_msr.size <= 4096 && 1599 (basic_msr.type == 0 || basic_msr.type == 6) && 1600 basic_msr.reserved1 == 0 && basic_msr.reserved2 == 0, 1601 "MSR_IA32_VMX_BASIC"); 1602 1603 val = rdmsr(MSR_IA32_VMX_MISC); 1604 report((!(ctrl_cpu_rev[1].clr & CPU_URG) || val & (1ul << 5)) && 1605 ((val >> 16) & 0x1ff) <= 256 && 1606 (val & 0x80007e00) == 0, 1607 "MSR_IA32_VMX_MISC"); 1608 1609 for (n = 0; n < ARRAY_SIZE(vmx_ctl_msr); n++) { 1610 ctrl.val = rdmsr(vmx_ctl_msr[n].index); 1611 default1 = vmx_ctl_msr[n].default1; 1612 ok = (ctrl.set & default1) == default1; 1613 ok = ok && (ctrl.set & ~ctrl.clr) == 0; 1614 if (ok && basic_msr.ctrl) { 1615 true_ctrl.val = rdmsr(vmx_ctl_msr[n].true_index); 1616 ok = ctrl.clr == true_ctrl.clr; 1617 ok = ok && ctrl.set == (true_ctrl.set | default1); 1618 } 1619 report(ok, "%s", vmx_ctl_msr[n].name); 1620 } 1621 1622 fixed0 = rdmsr(MSR_IA32_VMX_CR0_FIXED0); 1623 fixed1 = rdmsr(MSR_IA32_VMX_CR0_FIXED1); 1624 report(((fixed0 ^ fixed1) & ~fixed1) == 0, 1625 "MSR_IA32_VMX_IA32_VMX_CR0_FIXED0/1"); 1626 1627 fixed0 = rdmsr(MSR_IA32_VMX_CR4_FIXED0); 1628 fixed1 = rdmsr(MSR_IA32_VMX_CR4_FIXED1); 1629 report(((fixed0 ^ fixed1) & ~fixed1) == 0, 1630 "MSR_IA32_VMX_IA32_VMX_CR4_FIXED0/1"); 1631 1632 val = rdmsr(MSR_IA32_VMX_VMCS_ENUM); 1633 report((val & VMCS_FIELD_INDEX_MASK) >= 0x2a && 1634 (val & 0xfffffffffffffc01Ull) == 0, 1635 "MSR_IA32_VMX_VMCS_ENUM"); 1636 1637 fixed0 = -1ull; 1638 fixed0 &= ~(EPT_CAP_EXEC_ONLY | 1639 EPT_CAP_PWL4 | 1640 EPT_CAP_PWL5 | 1641 EPT_CAP_UC | 1642 EPT_CAP_WB | 1643 EPT_CAP_2M_PAGE | 1644 EPT_CAP_1G_PAGE | 1645 EPT_CAP_INVEPT | 1646 EPT_CAP_AD_FLAG | 1647 EPT_CAP_ADV_EPT_INFO | 1648 EPT_CAP_INVEPT_SINGLE | 1649 EPT_CAP_INVEPT_ALL | 1650 VPID_CAP_INVVPID | 1651 VPID_CAP_INVVPID_ADDR | 1652 VPID_CAP_INVVPID_CXTGLB | 1653 VPID_CAP_INVVPID_ALL | 1654 VPID_CAP_INVVPID_CXTLOC); 1655 1656 val = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP); 1657 report((val & fixed0) == 0, 1658 "MSR_IA32_VMX_EPT_VPID_CAP"); 1659 } 1660 1661 /* This function can only be called in guest */ 1662 void __attribute__((__used__)) hypercall(u32 hypercall_no) 1663 { 1664 u64 val = 0; 1665 val = (hypercall_no & HYPERCALL_MASK) | HYPERCALL_BIT; 1666 hypercall_field = val; 1667 asm volatile("vmcall\n\t"); 1668 } 1669 1670 static bool is_hypercall(union exit_reason exit_reason) 1671 { 1672 return exit_reason.basic == VMX_VMCALL && 1673 (hypercall_field & HYPERCALL_BIT); 1674 } 1675 1676 static int handle_hypercall(void) 1677 { 1678 ulong hypercall_no; 1679 1680 hypercall_no = hypercall_field & HYPERCALL_MASK; 1681 hypercall_field = 0; 1682 switch (hypercall_no) { 1683 case HYPERCALL_VMEXIT: 1684 return VMX_TEST_VMEXIT; 1685 case HYPERCALL_VMABORT: 1686 return VMX_TEST_VMABORT; 1687 case HYPERCALL_VMSKIP: 1688 return VMX_TEST_VMSKIP; 1689 default: 1690 printf("ERROR : Invalid hypercall number : %ld\n", hypercall_no); 1691 } 1692 return VMX_TEST_EXIT; 1693 } 1694 1695 static void continue_abort(void) 1696 { 1697 assert(!in_guest); 1698 printf("Host was here when guest aborted:\n"); 1699 dump_stack(); 1700 longjmp(abort_target, 1); 1701 abort(); 1702 } 1703 1704 void __abort_test(void) 1705 { 1706 if (in_guest) 1707 hypercall(HYPERCALL_VMABORT); 1708 else 1709 longjmp(abort_target, 1); 1710 abort(); 1711 } 1712 1713 static void continue_skip(void) 1714 { 1715 assert(!in_guest); 1716 longjmp(abort_target, 1); 1717 abort(); 1718 } 1719 1720 void test_skip(const char *msg) 1721 { 1722 printf("%s skipping test: %s\n", in_guest ? "Guest" : "Host", msg); 1723 if (in_guest) 1724 hypercall(HYPERCALL_VMABORT); 1725 else 1726 longjmp(abort_target, 1); 1727 abort(); 1728 } 1729 1730 static int exit_handler(union exit_reason exit_reason) 1731 { 1732 int ret; 1733 1734 current->exits++; 1735 regs.rflags = vmcs_read(GUEST_RFLAGS); 1736 if (is_hypercall(exit_reason)) 1737 ret = handle_hypercall(); 1738 else 1739 ret = current->exit_handler(exit_reason); 1740 vmcs_write(GUEST_RFLAGS, regs.rflags); 1741 1742 return ret; 1743 } 1744 1745 /* 1746 * Tries to enter the guest, populates @result with VM-Fail, VM-Exit, entered, 1747 * etc... 1748 */ 1749 static noinline void vmx_enter_guest(struct vmentry_result *result) 1750 { 1751 memset(result, 0, sizeof(*result)); 1752 1753 in_guest = 1; 1754 asm volatile ( 1755 "mov %[HOST_RSP], %%rdi\n\t" 1756 "vmwrite %%rsp, %%rdi\n\t" 1757 LOAD_GPR_C 1758 "cmpb $0, %[launched]\n\t" 1759 "jne 1f\n\t" 1760 "vmlaunch\n\t" 1761 "jmp 2f\n\t" 1762 "1: " 1763 "vmresume\n\t" 1764 "2: " 1765 SAVE_GPR_C 1766 "pushf\n\t" 1767 "pop %%rdi\n\t" 1768 "mov %%rdi, %[vm_fail_flags]\n\t" 1769 "movl $1, %[vm_fail]\n\t" 1770 "jmp 3f\n\t" 1771 "vmx_return:\n\t" 1772 SAVE_GPR_C 1773 "3: \n\t" 1774 : [vm_fail]"+m"(result->vm_fail), 1775 [vm_fail_flags]"=m"(result->flags) 1776 : [launched]"m"(launched), [HOST_RSP]"i"(HOST_RSP) 1777 : "rdi", "memory", "cc" 1778 ); 1779 in_guest = 0; 1780 1781 result->vmlaunch = !launched; 1782 result->instr = launched ? "vmresume" : "vmlaunch"; 1783 result->exit_reason.full = result->vm_fail ? 0xdead : 1784 vmcs_read(EXI_REASON); 1785 result->entered = !result->vm_fail && 1786 !result->exit_reason.failed_vmentry; 1787 } 1788 1789 static int vmx_run(void) 1790 { 1791 struct vmentry_result result; 1792 u32 ret; 1793 1794 while (1) { 1795 vmx_enter_guest(&result); 1796 if (result.entered) { 1797 /* 1798 * VMCS isn't in "launched" state if there's been any 1799 * entry failure (early or otherwise). 1800 */ 1801 launched = 1; 1802 ret = exit_handler(result.exit_reason); 1803 } else if (current->entry_failure_handler) { 1804 ret = current->entry_failure_handler(&result); 1805 } else { 1806 ret = VMX_TEST_EXIT; 1807 } 1808 1809 switch (ret) { 1810 case VMX_TEST_RESUME: 1811 continue; 1812 case VMX_TEST_VMEXIT: 1813 guest_finished = 1; 1814 return 0; 1815 case VMX_TEST_EXIT: 1816 break; 1817 default: 1818 printf("ERROR : Invalid %s_handler return val %d.\n", 1819 result.entered ? "exit" : "entry_failure", 1820 ret); 1821 break; 1822 } 1823 1824 if (result.entered) 1825 print_vmexit_info(result.exit_reason); 1826 else 1827 print_vmentry_failure_info(&result); 1828 abort(); 1829 } 1830 } 1831 1832 static void run_teardown_step(struct test_teardown_step *step) 1833 { 1834 step->func(step->data); 1835 } 1836 1837 static int test_run(struct vmx_test *test) 1838 { 1839 int r; 1840 1841 /* Validate V2 interface. */ 1842 if (test->v2) { 1843 int ret = 0; 1844 if (test->init || test->guest_main || test->exit_handler || 1845 test->syscall_handler) { 1846 report_fail("V2 test cannot specify V1 callbacks."); 1847 ret = 1; 1848 } 1849 if (ret) 1850 return ret; 1851 } 1852 1853 if (test->name == NULL) 1854 test->name = "(no name)"; 1855 if (vmx_on()) { 1856 printf("%s : vmxon failed.\n", __func__); 1857 return 1; 1858 } 1859 1860 init_vmcs(&(test->vmcs)); 1861 /* Directly call test->init is ok here, init_vmcs has done 1862 vmcs init, vmclear and vmptrld*/ 1863 if (test->init && test->init(test->vmcs) != VMX_TEST_START) 1864 goto out; 1865 teardown_count = 0; 1866 v2_guest_main = NULL; 1867 test->exits = 0; 1868 current = test; 1869 regs = test->guest_regs; 1870 vmcs_write(GUEST_RFLAGS, regs.rflags | X86_EFLAGS_FIXED); 1871 launched = 0; 1872 guest_finished = 0; 1873 printf("\nTest suite: %s\n", test->name); 1874 1875 r = setjmp(abort_target); 1876 if (r) { 1877 assert(!in_guest); 1878 goto out; 1879 } 1880 1881 1882 if (test->v2) 1883 test->v2(); 1884 else 1885 vmx_run(); 1886 1887 while (teardown_count > 0) 1888 run_teardown_step(&teardown_steps[--teardown_count]); 1889 1890 if (launched && !guest_finished) 1891 report_fail("Guest didn't run to completion."); 1892 1893 out: 1894 if (vmx_off()) { 1895 printf("%s : vmxoff failed.\n", __func__); 1896 return 1; 1897 } 1898 return 0; 1899 } 1900 1901 /* 1902 * Add a teardown step. Executed after the test's main function returns. 1903 * Teardown steps executed in reverse order. 1904 */ 1905 void test_add_teardown(test_teardown_func func, void *data) 1906 { 1907 struct test_teardown_step *step; 1908 1909 TEST_ASSERT_MSG(teardown_count < MAX_TEST_TEARDOWN_STEPS, 1910 "There are already %d teardown steps.", 1911 teardown_count); 1912 step = &teardown_steps[teardown_count++]; 1913 step->func = func; 1914 step->data = data; 1915 } 1916 1917 static void __test_set_guest(test_guest_func func) 1918 { 1919 assert(current->v2); 1920 v2_guest_main = func; 1921 } 1922 1923 /* 1924 * Set the target of the first enter_guest call. Can only be called once per 1925 * test. Must be called before first enter_guest call. 1926 */ 1927 void test_set_guest(test_guest_func func) 1928 { 1929 TEST_ASSERT_MSG(!v2_guest_main, "Already set guest func."); 1930 __test_set_guest(func); 1931 } 1932 1933 /* 1934 * Set the target of the enter_guest call and reset the RIP so 'func' will 1935 * start from the beginning. This can be called multiple times per test. 1936 */ 1937 void test_override_guest(test_guest_func func) 1938 { 1939 __test_set_guest(func); 1940 init_vmcs_guest(); 1941 } 1942 1943 void test_set_guest_finished(void) 1944 { 1945 guest_finished = 1; 1946 } 1947 1948 static void check_for_guest_termination(union exit_reason exit_reason) 1949 { 1950 if (is_hypercall(exit_reason)) { 1951 int ret; 1952 1953 ret = handle_hypercall(); 1954 switch (ret) { 1955 case VMX_TEST_VMEXIT: 1956 guest_finished = 1; 1957 break; 1958 case VMX_TEST_VMABORT: 1959 continue_abort(); 1960 break; 1961 case VMX_TEST_VMSKIP: 1962 continue_skip(); 1963 break; 1964 default: 1965 printf("ERROR : Invalid handle_hypercall return %d.\n", 1966 ret); 1967 abort(); 1968 } 1969 } 1970 } 1971 1972 /* 1973 * Enters the guest (or launches it for the first time). Error to call once the 1974 * guest has returned (i.e., run past the end of its guest() function). 1975 */ 1976 void __enter_guest(u8 abort_flag, struct vmentry_result *result) 1977 { 1978 TEST_ASSERT_MSG(v2_guest_main, 1979 "Never called test_set_guest_func!"); 1980 1981 TEST_ASSERT_MSG(!guest_finished, 1982 "Called enter_guest() after guest returned."); 1983 1984 vmx_enter_guest(result); 1985 1986 if (result->vm_fail) { 1987 if (abort_flag & ABORT_ON_EARLY_VMENTRY_FAIL) 1988 goto do_abort; 1989 return; 1990 } 1991 if (result->exit_reason.failed_vmentry) { 1992 if ((abort_flag & ABORT_ON_INVALID_GUEST_STATE) || 1993 result->exit_reason.basic != VMX_FAIL_STATE) 1994 goto do_abort; 1995 return; 1996 } 1997 1998 launched = 1; 1999 check_for_guest_termination(result->exit_reason); 2000 return; 2001 2002 do_abort: 2003 print_vmentry_failure_info(result); 2004 abort(); 2005 } 2006 2007 void enter_guest_with_bad_controls(void) 2008 { 2009 struct vmentry_result result; 2010 2011 TEST_ASSERT_MSG(v2_guest_main, 2012 "Never called test_set_guest_func!"); 2013 2014 TEST_ASSERT_MSG(!guest_finished, 2015 "Called enter_guest() after guest returned."); 2016 2017 __enter_guest(ABORT_ON_INVALID_GUEST_STATE, &result); 2018 report(result.vm_fail, "VM-Fail occurred as expected"); 2019 report((result.flags & VMX_ENTRY_FLAGS) == X86_EFLAGS_ZF, 2020 "FLAGS set correctly on VM-Fail"); 2021 report(vmcs_read(VMX_INST_ERROR) == VMXERR_ENTRY_INVALID_CONTROL_FIELD, 2022 "VM-Inst Error # is %d (VM entry with invalid control field(s))", 2023 VMXERR_ENTRY_INVALID_CONTROL_FIELD); 2024 } 2025 2026 void enter_guest(void) 2027 { 2028 struct vmentry_result result; 2029 2030 __enter_guest(ABORT_ON_EARLY_VMENTRY_FAIL | 2031 ABORT_ON_INVALID_GUEST_STATE, &result); 2032 } 2033 2034 extern struct vmx_test vmx_tests[]; 2035 2036 static bool 2037 test_wanted(const char *name, const char *filters[], int filter_count) 2038 { 2039 int i; 2040 bool positive = false; 2041 bool match = false; 2042 char clean_name[strlen(name) + 1]; 2043 char *c; 2044 const char *n; 2045 2046 printf("filter = %s, test = %s\n", filters[0], name); 2047 2048 /* Replace spaces with underscores. */ 2049 n = name; 2050 c = &clean_name[0]; 2051 do *c++ = (*n == ' ') ? '_' : *n; 2052 while (*n++); 2053 2054 for (i = 0; i < filter_count; i++) { 2055 const char *filter = filters[i]; 2056 2057 if (filter[0] == '-') { 2058 if (simple_glob(clean_name, filter + 1)) 2059 return false; 2060 } else { 2061 positive = true; 2062 match |= simple_glob(clean_name, filter); 2063 } 2064 } 2065 2066 if (!positive || match) { 2067 matched++; 2068 return true; 2069 } else { 2070 return false; 2071 } 2072 } 2073 2074 int main(int argc, const char *argv[]) 2075 { 2076 int i = 0; 2077 2078 setup_vm(); 2079 hypercall_field = 0; 2080 2081 /* We want xAPIC mode to test MMIO passthrough from L1 (us) to L2. */ 2082 smp_reset_apic(); 2083 2084 argv++; 2085 argc--; 2086 2087 if (!this_cpu_has(X86_FEATURE_VMX)) { 2088 printf("WARNING: vmx not supported, add '-cpu host'\n"); 2089 goto exit; 2090 } 2091 init_bsp_vmx(); 2092 if (test_wanted("test_vmx_feature_control", argv, argc)) { 2093 /* Sets MSR_IA32_FEATURE_CONTROL to 0x5 */ 2094 if (test_vmx_feature_control() != 0) 2095 goto exit; 2096 } else { 2097 enable_vmx(); 2098 } 2099 2100 if (test_wanted("test_vmxon", argv, argc)) { 2101 /* Enables VMX */ 2102 if (test_vmxon() != 0) 2103 goto exit; 2104 } else { 2105 if (vmx_on()) { 2106 report_fail("vmxon"); 2107 goto exit; 2108 } 2109 } 2110 2111 if (test_wanted("test_vmptrld", argv, argc)) 2112 test_vmptrld(); 2113 if (test_wanted("test_vmclear", argv, argc)) 2114 test_vmclear(); 2115 if (test_wanted("test_vmptrst", argv, argc)) 2116 test_vmptrst(); 2117 if (test_wanted("test_vmwrite_vmread", argv, argc)) 2118 test_vmwrite_vmread(); 2119 if (test_wanted("test_vmcs_high", argv, argc)) 2120 test_vmcs_high(); 2121 if (test_wanted("test_vmcs_lifecycle", argv, argc)) 2122 test_vmcs_lifecycle(); 2123 if (test_wanted("test_vmx_caps", argv, argc)) 2124 test_vmx_caps(); 2125 if (test_wanted("test_vmread_flags_touch", argv, argc)) 2126 test_vmread_flags_touch(); 2127 if (test_wanted("test_vmwrite_flags_touch", argv, argc)) 2128 test_vmwrite_flags_touch(); 2129 2130 /* Balance vmxon from test_vmxon. */ 2131 vmx_off(); 2132 2133 for (; vmx_tests[i].name != NULL; i++) { 2134 if (!test_wanted(vmx_tests[i].name, argv, argc)) 2135 continue; 2136 if (test_run(&vmx_tests[i])) 2137 goto exit; 2138 } 2139 2140 if (!matched) 2141 report(matched, "command line didn't match any tests!"); 2142 2143 exit: 2144 return report_summary(); 2145 } 2146