1 /* 2 * x86/vmx.c : Framework for testing nested virtualization 3 * This is a framework to test nested VMX for KVM, which 4 * started as a project of GSoC 2013. All test cases should 5 * be located in x86/vmx_tests.c and framework related 6 * functions should be in this file. 7 * 8 * How to write test cases? 9 * Add callbacks of test suite in variant "vmx_tests". You can 10 * write: 11 * 1. init function used for initializing test suite 12 * 2. main function for codes running in L2 guest, 13 * 3. exit_handler to handle vmexit of L2 to L1 14 * 4. syscall handler to handle L2 syscall vmexit 15 * 5. vmenter fail handler to handle direct failure of vmenter 16 * 6. guest_regs is loaded when vmenter and saved when 17 * vmexit, you can read and set it in exit_handler 18 * If no special function is needed for a test suite, use 19 * coressponding basic_* functions as callback. More handlers 20 * can be added to "vmx_tests", see details of "struct vmx_test" 21 * and function test_run(). 22 * 23 * Currently, vmx test framework only set up one VCPU and one 24 * concurrent guest test environment with same paging for L2 and 25 * L1. For usage of EPT, only 1:1 mapped paging is used from VFN 26 * to PFN. 27 * 28 * Author : Arthur Chunqi Li <yzt356@gmail.com> 29 */ 30 31 #include "libcflat.h" 32 #include "processor.h" 33 #include "alloc_page.h" 34 #include "vm.h" 35 #include "desc.h" 36 #include "vmx.h" 37 #include "msr.h" 38 #include "smp.h" 39 #include "apic.h" 40 41 u64 *bsp_vmxon_region; 42 struct vmcs *vmcs_root; 43 u32 vpid_cnt; 44 void *guest_stack, *guest_syscall_stack; 45 u32 ctrl_pin, ctrl_enter, ctrl_exit, ctrl_cpu[2]; 46 struct regs regs; 47 48 struct vmx_test *current; 49 50 #define MAX_TEST_TEARDOWN_STEPS 10 51 52 struct test_teardown_step { 53 test_teardown_func func; 54 void *data; 55 }; 56 57 static int teardown_count; 58 static struct test_teardown_step teardown_steps[MAX_TEST_TEARDOWN_STEPS]; 59 60 static test_guest_func v2_guest_main; 61 62 u64 hypercall_field; 63 bool launched; 64 static int matched; 65 static int guest_finished; 66 static int in_guest; 67 68 union vmx_basic basic; 69 union vmx_ctrl_msr ctrl_pin_rev; 70 union vmx_ctrl_msr ctrl_cpu_rev[2]; 71 union vmx_ctrl_msr ctrl_exit_rev; 72 union vmx_ctrl_msr ctrl_enter_rev; 73 union vmx_ept_vpid ept_vpid; 74 75 extern struct descriptor_table_ptr gdt64_desc; 76 extern struct descriptor_table_ptr idt_descr; 77 extern struct descriptor_table_ptr tss_descr; 78 extern void *vmx_return; 79 extern void *entry_sysenter; 80 extern void *guest_entry; 81 82 static volatile u32 stage; 83 84 static jmp_buf abort_target; 85 86 struct vmcs_field { 87 u64 mask; 88 u64 encoding; 89 }; 90 91 #define MASK(_bits) GENMASK_ULL((_bits) - 1, 0) 92 #define MASK_NATURAL MASK(sizeof(unsigned long) * 8) 93 94 static struct vmcs_field vmcs_fields[] = { 95 { MASK(16), VPID }, 96 { MASK(16), PINV }, 97 { MASK(16), EPTP_IDX }, 98 99 { MASK(16), GUEST_SEL_ES }, 100 { MASK(16), GUEST_SEL_CS }, 101 { MASK(16), GUEST_SEL_SS }, 102 { MASK(16), GUEST_SEL_DS }, 103 { MASK(16), GUEST_SEL_FS }, 104 { MASK(16), GUEST_SEL_GS }, 105 { MASK(16), GUEST_SEL_LDTR }, 106 { MASK(16), GUEST_SEL_TR }, 107 { MASK(16), GUEST_INT_STATUS }, 108 109 { MASK(16), HOST_SEL_ES }, 110 { MASK(16), HOST_SEL_CS }, 111 { MASK(16), HOST_SEL_SS }, 112 { MASK(16), HOST_SEL_DS }, 113 { MASK(16), HOST_SEL_FS }, 114 { MASK(16), HOST_SEL_GS }, 115 { MASK(16), HOST_SEL_TR }, 116 117 { MASK(64), IO_BITMAP_A }, 118 { MASK(64), IO_BITMAP_B }, 119 { MASK(64), MSR_BITMAP }, 120 { MASK(64), EXIT_MSR_ST_ADDR }, 121 { MASK(64), EXIT_MSR_LD_ADDR }, 122 { MASK(64), ENTER_MSR_LD_ADDR }, 123 { MASK(64), VMCS_EXEC_PTR }, 124 { MASK(64), TSC_OFFSET }, 125 { MASK(64), APIC_VIRT_ADDR }, 126 { MASK(64), APIC_ACCS_ADDR }, 127 { MASK(64), EPTP }, 128 129 { MASK(64), INFO_PHYS_ADDR }, 130 131 { MASK(64), VMCS_LINK_PTR }, 132 { MASK(64), GUEST_DEBUGCTL }, 133 { MASK(64), GUEST_EFER }, 134 { MASK(64), GUEST_PAT }, 135 { MASK(64), GUEST_PERF_GLOBAL_CTRL }, 136 { MASK(64), GUEST_PDPTE }, 137 138 { MASK(64), HOST_PAT }, 139 { MASK(64), HOST_EFER }, 140 { MASK(64), HOST_PERF_GLOBAL_CTRL }, 141 142 { MASK(32), PIN_CONTROLS }, 143 { MASK(32), CPU_EXEC_CTRL0 }, 144 { MASK(32), EXC_BITMAP }, 145 { MASK(32), PF_ERROR_MASK }, 146 { MASK(32), PF_ERROR_MATCH }, 147 { MASK(32), CR3_TARGET_COUNT }, 148 { MASK(32), EXI_CONTROLS }, 149 { MASK(32), EXI_MSR_ST_CNT }, 150 { MASK(32), EXI_MSR_LD_CNT }, 151 { MASK(32), ENT_CONTROLS }, 152 { MASK(32), ENT_MSR_LD_CNT }, 153 { MASK(32), ENT_INTR_INFO }, 154 { MASK(32), ENT_INTR_ERROR }, 155 { MASK(32), ENT_INST_LEN }, 156 { MASK(32), TPR_THRESHOLD }, 157 { MASK(32), CPU_EXEC_CTRL1 }, 158 159 { MASK(32), VMX_INST_ERROR }, 160 { MASK(32), EXI_REASON }, 161 { MASK(32), EXI_INTR_INFO }, 162 { MASK(32), EXI_INTR_ERROR }, 163 { MASK(32), IDT_VECT_INFO }, 164 { MASK(32), IDT_VECT_ERROR }, 165 { MASK(32), EXI_INST_LEN }, 166 { MASK(32), EXI_INST_INFO }, 167 168 { MASK(32), GUEST_LIMIT_ES }, 169 { MASK(32), GUEST_LIMIT_CS }, 170 { MASK(32), GUEST_LIMIT_SS }, 171 { MASK(32), GUEST_LIMIT_DS }, 172 { MASK(32), GUEST_LIMIT_FS }, 173 { MASK(32), GUEST_LIMIT_GS }, 174 { MASK(32), GUEST_LIMIT_LDTR }, 175 { MASK(32), GUEST_LIMIT_TR }, 176 { MASK(32), GUEST_LIMIT_GDTR }, 177 { MASK(32), GUEST_LIMIT_IDTR }, 178 { 0x1d0ff, GUEST_AR_ES }, 179 { 0x1f0ff, GUEST_AR_CS }, 180 { 0x1d0ff, GUEST_AR_SS }, 181 { 0x1d0ff, GUEST_AR_DS }, 182 { 0x1d0ff, GUEST_AR_FS }, 183 { 0x1d0ff, GUEST_AR_GS }, 184 { 0x1d0ff, GUEST_AR_LDTR }, 185 { 0x1d0ff, GUEST_AR_TR }, 186 { MASK(32), GUEST_INTR_STATE }, 187 { MASK(32), GUEST_ACTV_STATE }, 188 { MASK(32), GUEST_SMBASE }, 189 { MASK(32), GUEST_SYSENTER_CS }, 190 { MASK(32), PREEMPT_TIMER_VALUE }, 191 192 { MASK(32), HOST_SYSENTER_CS }, 193 194 { MASK_NATURAL, CR0_MASK }, 195 { MASK_NATURAL, CR4_MASK }, 196 { MASK_NATURAL, CR0_READ_SHADOW }, 197 { MASK_NATURAL, CR4_READ_SHADOW }, 198 { MASK_NATURAL, CR3_TARGET_0 }, 199 { MASK_NATURAL, CR3_TARGET_1 }, 200 { MASK_NATURAL, CR3_TARGET_2 }, 201 { MASK_NATURAL, CR3_TARGET_3 }, 202 203 { MASK_NATURAL, EXI_QUALIFICATION }, 204 { MASK_NATURAL, IO_RCX }, 205 { MASK_NATURAL, IO_RSI }, 206 { MASK_NATURAL, IO_RDI }, 207 { MASK_NATURAL, IO_RIP }, 208 { MASK_NATURAL, GUEST_LINEAR_ADDRESS }, 209 210 { MASK_NATURAL, GUEST_CR0 }, 211 { MASK_NATURAL, GUEST_CR3 }, 212 { MASK_NATURAL, GUEST_CR4 }, 213 { MASK_NATURAL, GUEST_BASE_ES }, 214 { MASK_NATURAL, GUEST_BASE_CS }, 215 { MASK_NATURAL, GUEST_BASE_SS }, 216 { MASK_NATURAL, GUEST_BASE_DS }, 217 { MASK_NATURAL, GUEST_BASE_FS }, 218 { MASK_NATURAL, GUEST_BASE_GS }, 219 { MASK_NATURAL, GUEST_BASE_LDTR }, 220 { MASK_NATURAL, GUEST_BASE_TR }, 221 { MASK_NATURAL, GUEST_BASE_GDTR }, 222 { MASK_NATURAL, GUEST_BASE_IDTR }, 223 { MASK_NATURAL, GUEST_DR7 }, 224 { MASK_NATURAL, GUEST_RSP }, 225 { MASK_NATURAL, GUEST_RIP }, 226 { MASK_NATURAL, GUEST_RFLAGS }, 227 { MASK_NATURAL, GUEST_PENDING_DEBUG }, 228 { MASK_NATURAL, GUEST_SYSENTER_ESP }, 229 { MASK_NATURAL, GUEST_SYSENTER_EIP }, 230 231 { MASK_NATURAL, HOST_CR0 }, 232 { MASK_NATURAL, HOST_CR3 }, 233 { MASK_NATURAL, HOST_CR4 }, 234 { MASK_NATURAL, HOST_BASE_FS }, 235 { MASK_NATURAL, HOST_BASE_GS }, 236 { MASK_NATURAL, HOST_BASE_TR }, 237 { MASK_NATURAL, HOST_BASE_GDTR }, 238 { MASK_NATURAL, HOST_BASE_IDTR }, 239 { MASK_NATURAL, HOST_SYSENTER_ESP }, 240 { MASK_NATURAL, HOST_SYSENTER_EIP }, 241 { MASK_NATURAL, HOST_RSP }, 242 { MASK_NATURAL, HOST_RIP }, 243 }; 244 245 enum vmcs_field_type { 246 VMCS_FIELD_TYPE_CONTROL = 0, 247 VMCS_FIELD_TYPE_READ_ONLY_DATA = 1, 248 VMCS_FIELD_TYPE_GUEST = 2, 249 VMCS_FIELD_TYPE_HOST = 3, 250 VMCS_FIELD_TYPES, 251 }; 252 253 static inline int vmcs_field_type(struct vmcs_field *f) 254 { 255 return (f->encoding >> VMCS_FIELD_TYPE_SHIFT) & 0x3; 256 } 257 258 static int vmcs_field_readonly(struct vmcs_field *f) 259 { 260 u64 ia32_vmx_misc; 261 262 ia32_vmx_misc = rdmsr(MSR_IA32_VMX_MISC); 263 return !(ia32_vmx_misc & MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS) && 264 (vmcs_field_type(f) == VMCS_FIELD_TYPE_READ_ONLY_DATA); 265 } 266 267 static inline u64 vmcs_field_value(struct vmcs_field *f, u8 cookie) 268 { 269 u64 value; 270 271 /* Incorporate the cookie and the field encoding into the value. */ 272 value = cookie; 273 value |= (f->encoding << 8); 274 value |= 0xdeadbeefull << 32; 275 276 return value & f->mask; 277 } 278 279 static void set_vmcs_field(struct vmcs_field *f, u8 cookie) 280 { 281 vmcs_write(f->encoding, vmcs_field_value(f, cookie)); 282 } 283 284 static bool check_vmcs_field(struct vmcs_field *f, u8 cookie) 285 { 286 u64 expected; 287 u64 actual; 288 int ret; 289 290 if (f->encoding == VMX_INST_ERROR) { 291 printf("Skipping volatile field %lx\n", f->encoding); 292 return true; 293 } 294 295 ret = vmcs_read_checking(f->encoding, &actual); 296 assert(!(ret & X86_EFLAGS_CF)); 297 /* Skip VMCS fields that aren't recognized by the CPU */ 298 if (ret & X86_EFLAGS_ZF) 299 return true; 300 301 if (vmcs_field_readonly(f)) { 302 printf("Skipping read-only field %lx\n", f->encoding); 303 return true; 304 } 305 306 expected = vmcs_field_value(f, cookie); 307 actual &= f->mask; 308 309 if (expected == actual) 310 return true; 311 312 printf("FAIL: VMWRITE/VMREAD %lx (expected: %lx, actual: %lx)\n", 313 f->encoding, (unsigned long) expected, (unsigned long) actual); 314 315 return false; 316 } 317 318 static void set_all_vmcs_fields(u8 cookie) 319 { 320 int i; 321 322 for (i = 0; i < ARRAY_SIZE(vmcs_fields); i++) 323 set_vmcs_field(&vmcs_fields[i], cookie); 324 } 325 326 static bool check_all_vmcs_fields(u8 cookie) 327 { 328 bool pass = true; 329 int i; 330 331 for (i = 0; i < ARRAY_SIZE(vmcs_fields); i++) { 332 if (!check_vmcs_field(&vmcs_fields[i], cookie)) 333 pass = false; 334 } 335 336 return pass; 337 } 338 339 static u32 find_vmcs_max_index(void) 340 { 341 u32 idx, width, type, enc; 342 u64 actual; 343 int ret; 344 345 /* scan backwards and stop when found */ 346 for (idx = (1 << 9) - 1; idx >= 0; idx--) { 347 348 /* try all combinations of width and type */ 349 for (type = 0; type < (1 << 2); type++) { 350 for (width = 0; width < (1 << 2) ; width++) { 351 enc = (idx << VMCS_FIELD_INDEX_SHIFT) | 352 (type << VMCS_FIELD_TYPE_SHIFT) | 353 (width << VMCS_FIELD_WIDTH_SHIFT); 354 355 ret = vmcs_read_checking(enc, &actual); 356 assert(!(ret & X86_EFLAGS_CF)); 357 if (!(ret & X86_EFLAGS_ZF)) 358 return idx; 359 } 360 } 361 } 362 /* some VMCS fields should exist */ 363 assert(0); 364 return 0; 365 } 366 367 static void test_vmwrite_vmread(void) 368 { 369 struct vmcs *vmcs = alloc_page(); 370 u32 vmcs_enum_max, max_index = 0; 371 372 vmcs->hdr.revision_id = basic.revision; 373 assert(!vmcs_clear(vmcs)); 374 assert(!make_vmcs_current(vmcs)); 375 376 set_all_vmcs_fields(0x42); 377 report(check_all_vmcs_fields(0x42), "VMWRITE/VMREAD"); 378 379 vmcs_enum_max = (rdmsr(MSR_IA32_VMX_VMCS_ENUM) & VMCS_FIELD_INDEX_MASK) 380 >> VMCS_FIELD_INDEX_SHIFT; 381 max_index = find_vmcs_max_index(); 382 report(vmcs_enum_max == max_index, 383 "VMX_VMCS_ENUM.MAX_INDEX expected: %x, actual: %x", 384 max_index, vmcs_enum_max); 385 386 assert(!vmcs_clear(vmcs)); 387 free_page(vmcs); 388 } 389 390 static void test_vmcs_high(void) 391 { 392 struct vmcs *vmcs = alloc_page(); 393 394 vmcs->hdr.revision_id = basic.revision; 395 assert(!vmcs_clear(vmcs)); 396 assert(!make_vmcs_current(vmcs)); 397 398 vmcs_write(TSC_OFFSET, 0x0123456789ABCDEFull); 399 report(vmcs_read(TSC_OFFSET) == 0x0123456789ABCDEFull, 400 "VMREAD TSC_OFFSET after VMWRITE TSC_OFFSET"); 401 report(vmcs_read(TSC_OFFSET_HI) == 0x01234567ull, 402 "VMREAD TSC_OFFSET_HI after VMWRITE TSC_OFFSET"); 403 vmcs_write(TSC_OFFSET_HI, 0x76543210ul); 404 report(vmcs_read(TSC_OFFSET_HI) == 0x76543210ul, 405 "VMREAD TSC_OFFSET_HI after VMWRITE TSC_OFFSET_HI"); 406 report(vmcs_read(TSC_OFFSET) == 0x7654321089ABCDEFull, 407 "VMREAD TSC_OFFSET after VMWRITE TSC_OFFSET_HI"); 408 409 assert(!vmcs_clear(vmcs)); 410 free_page(vmcs); 411 } 412 413 static void test_vmcs_lifecycle(void) 414 { 415 struct vmcs *vmcs[2] = {}; 416 int i; 417 418 for (i = 0; i < ARRAY_SIZE(vmcs); i++) { 419 vmcs[i] = alloc_page(); 420 vmcs[i]->hdr.revision_id = basic.revision; 421 } 422 423 #define VMPTRLD(_i) do { \ 424 assert(_i < ARRAY_SIZE(vmcs)); \ 425 assert(!make_vmcs_current(vmcs[_i])); \ 426 printf("VMPTRLD VMCS%d\n", (_i)); \ 427 } while (0) 428 429 #define VMCLEAR(_i) do { \ 430 assert(_i < ARRAY_SIZE(vmcs)); \ 431 assert(!vmcs_clear(vmcs[_i])); \ 432 printf("VMCLEAR VMCS%d\n", (_i)); \ 433 } while (0) 434 435 VMCLEAR(0); 436 VMPTRLD(0); 437 set_all_vmcs_fields(0); 438 report(check_all_vmcs_fields(0), "current:VMCS0 active:[VMCS0]"); 439 440 VMCLEAR(0); 441 VMPTRLD(0); 442 report(check_all_vmcs_fields(0), "current:VMCS0 active:[VMCS0]"); 443 444 VMCLEAR(1); 445 report(check_all_vmcs_fields(0), "current:VMCS0 active:[VMCS0]"); 446 447 VMPTRLD(1); 448 set_all_vmcs_fields(1); 449 report(check_all_vmcs_fields(1), "current:VMCS1 active:[VMCS0,VCMS1]"); 450 451 VMPTRLD(0); 452 report(check_all_vmcs_fields(0), "current:VMCS0 active:[VMCS0,VCMS1]"); 453 VMPTRLD(1); 454 report(check_all_vmcs_fields(1), "current:VMCS1 active:[VMCS0,VCMS1]"); 455 VMPTRLD(1); 456 report(check_all_vmcs_fields(1), "current:VMCS1 active:[VMCS0,VCMS1]"); 457 458 VMCLEAR(0); 459 report(check_all_vmcs_fields(1), "current:VMCS1 active:[VCMS1]"); 460 461 /* VMPTRLD should not erase VMWRITEs to the current VMCS */ 462 set_all_vmcs_fields(2); 463 VMPTRLD(1); 464 report(check_all_vmcs_fields(2), "current:VMCS1 active:[VCMS1]"); 465 466 for (i = 0; i < ARRAY_SIZE(vmcs); i++) { 467 VMCLEAR(i); 468 free_page(vmcs[i]); 469 } 470 471 #undef VMPTRLD 472 #undef VMCLEAR 473 } 474 475 void vmx_set_test_stage(u32 s) 476 { 477 barrier(); 478 stage = s; 479 barrier(); 480 } 481 482 u32 vmx_get_test_stage(void) 483 { 484 u32 s; 485 486 barrier(); 487 s = stage; 488 barrier(); 489 return s; 490 } 491 492 void vmx_inc_test_stage(void) 493 { 494 barrier(); 495 stage++; 496 barrier(); 497 } 498 499 /* entry_sysenter */ 500 asm( 501 ".align 4, 0x90\n\t" 502 ".globl entry_sysenter\n\t" 503 "entry_sysenter:\n\t" 504 SAVE_GPR 505 " and $0xf, %rax\n\t" 506 " mov %rax, %rdi\n\t" 507 " call syscall_handler\n\t" 508 LOAD_GPR 509 " vmresume\n\t" 510 ); 511 512 static void __attribute__((__used__)) syscall_handler(u64 syscall_no) 513 { 514 if (current->syscall_handler) 515 current->syscall_handler(syscall_no); 516 } 517 518 static const char * const exit_reason_descriptions[] = { 519 [VMX_EXC_NMI] = "VMX_EXC_NMI", 520 [VMX_EXTINT] = "VMX_EXTINT", 521 [VMX_TRIPLE_FAULT] = "VMX_TRIPLE_FAULT", 522 [VMX_INIT] = "VMX_INIT", 523 [VMX_SIPI] = "VMX_SIPI", 524 [VMX_SMI_IO] = "VMX_SMI_IO", 525 [VMX_SMI_OTHER] = "VMX_SMI_OTHER", 526 [VMX_INTR_WINDOW] = "VMX_INTR_WINDOW", 527 [VMX_NMI_WINDOW] = "VMX_NMI_WINDOW", 528 [VMX_TASK_SWITCH] = "VMX_TASK_SWITCH", 529 [VMX_CPUID] = "VMX_CPUID", 530 [VMX_GETSEC] = "VMX_GETSEC", 531 [VMX_HLT] = "VMX_HLT", 532 [VMX_INVD] = "VMX_INVD", 533 [VMX_INVLPG] = "VMX_INVLPG", 534 [VMX_RDPMC] = "VMX_RDPMC", 535 [VMX_RDTSC] = "VMX_RDTSC", 536 [VMX_RSM] = "VMX_RSM", 537 [VMX_VMCALL] = "VMX_VMCALL", 538 [VMX_VMCLEAR] = "VMX_VMCLEAR", 539 [VMX_VMLAUNCH] = "VMX_VMLAUNCH", 540 [VMX_VMPTRLD] = "VMX_VMPTRLD", 541 [VMX_VMPTRST] = "VMX_VMPTRST", 542 [VMX_VMREAD] = "VMX_VMREAD", 543 [VMX_VMRESUME] = "VMX_VMRESUME", 544 [VMX_VMWRITE] = "VMX_VMWRITE", 545 [VMX_VMXOFF] = "VMX_VMXOFF", 546 [VMX_VMXON] = "VMX_VMXON", 547 [VMX_CR] = "VMX_CR", 548 [VMX_DR] = "VMX_DR", 549 [VMX_IO] = "VMX_IO", 550 [VMX_RDMSR] = "VMX_RDMSR", 551 [VMX_WRMSR] = "VMX_WRMSR", 552 [VMX_FAIL_STATE] = "VMX_FAIL_STATE", 553 [VMX_FAIL_MSR] = "VMX_FAIL_MSR", 554 [VMX_MWAIT] = "VMX_MWAIT", 555 [VMX_MTF] = "VMX_MTF", 556 [VMX_MONITOR] = "VMX_MONITOR", 557 [VMX_PAUSE] = "VMX_PAUSE", 558 [VMX_FAIL_MCHECK] = "VMX_FAIL_MCHECK", 559 [VMX_TPR_THRESHOLD] = "VMX_TPR_THRESHOLD", 560 [VMX_APIC_ACCESS] = "VMX_APIC_ACCESS", 561 [VMX_EOI_INDUCED] = "VMX_EOI_INDUCED", 562 [VMX_GDTR_IDTR] = "VMX_GDTR_IDTR", 563 [VMX_LDTR_TR] = "VMX_LDTR_TR", 564 [VMX_EPT_VIOLATION] = "VMX_EPT_VIOLATION", 565 [VMX_EPT_MISCONFIG] = "VMX_EPT_MISCONFIG", 566 [VMX_INVEPT] = "VMX_INVEPT", 567 [VMX_PREEMPT] = "VMX_PREEMPT", 568 [VMX_INVVPID] = "VMX_INVVPID", 569 [VMX_WBINVD] = "VMX_WBINVD", 570 [VMX_XSETBV] = "VMX_XSETBV", 571 [VMX_APIC_WRITE] = "VMX_APIC_WRITE", 572 [VMX_RDRAND] = "VMX_RDRAND", 573 [VMX_INVPCID] = "VMX_INVPCID", 574 [VMX_VMFUNC] = "VMX_VMFUNC", 575 [VMX_RDSEED] = "VMX_RDSEED", 576 [VMX_PML_FULL] = "VMX_PML_FULL", 577 [VMX_XSAVES] = "VMX_XSAVES", 578 [VMX_XRSTORS] = "VMX_XRSTORS", 579 }; 580 581 const char *exit_reason_description(u64 reason) 582 { 583 if (reason >= ARRAY_SIZE(exit_reason_descriptions)) 584 return "(unknown)"; 585 return exit_reason_descriptions[reason] ? : "(unused)"; 586 } 587 588 void print_vmexit_info(union exit_reason exit_reason) 589 { 590 u64 guest_rip, guest_rsp; 591 ulong exit_qual = vmcs_read(EXI_QUALIFICATION); 592 guest_rip = vmcs_read(GUEST_RIP); 593 guest_rsp = vmcs_read(GUEST_RSP); 594 printf("VMEXIT info:\n"); 595 printf("\tvmexit reason = %u\n", exit_reason.basic); 596 printf("\tfailed vmentry = %u\n", !!exit_reason.failed_vmentry); 597 printf("\texit qualification = %#lx\n", exit_qual); 598 printf("\tguest_rip = %#lx\n", guest_rip); 599 printf("\tRAX=%#lx RBX=%#lx RCX=%#lx RDX=%#lx\n", 600 regs.rax, regs.rbx, regs.rcx, regs.rdx); 601 printf("\tRSP=%#lx RBP=%#lx RSI=%#lx RDI=%#lx\n", 602 guest_rsp, regs.rbp, regs.rsi, regs.rdi); 603 printf("\tR8 =%#lx R9 =%#lx R10=%#lx R11=%#lx\n", 604 regs.r8, regs.r9, regs.r10, regs.r11); 605 printf("\tR12=%#lx R13=%#lx R14=%#lx R15=%#lx\n", 606 regs.r12, regs.r13, regs.r14, regs.r15); 607 } 608 609 void print_vmentry_failure_info(struct vmentry_result *result) 610 { 611 if (result->entered) 612 return; 613 614 if (result->vm_fail) { 615 printf("VM-Fail on %s: ", result->instr); 616 switch (result->flags & VMX_ENTRY_FLAGS) { 617 case X86_EFLAGS_CF: 618 printf("current-VMCS pointer is not valid.\n"); 619 break; 620 case X86_EFLAGS_ZF: 621 printf("error number is %ld. See Intel 30.4.\n", 622 vmcs_read(VMX_INST_ERROR)); 623 break; 624 default: 625 printf("unexpected flags %lx!\n", result->flags); 626 } 627 } else { 628 u64 qual = vmcs_read(EXI_QUALIFICATION); 629 630 printf("VM-Exit failure on %s (reason=%#x, qual=%#lx): ", 631 result->instr, result->exit_reason.full, qual); 632 633 switch (result->exit_reason.basic) { 634 case VMX_FAIL_STATE: 635 printf("invalid guest state\n"); 636 break; 637 case VMX_FAIL_MSR: 638 printf("MSR loading\n"); 639 break; 640 case VMX_FAIL_MCHECK: 641 printf("machine-check event\n"); 642 break; 643 default: 644 printf("unexpected basic exit reason %u\n", 645 result->exit_reason.basic); 646 } 647 648 if (!result->exit_reason.failed_vmentry) 649 printf("\tVMX_ENTRY_FAILURE BIT NOT SET!\n"); 650 651 if (result->exit_reason.full & 0x7fff0000) 652 printf("\tRESERVED BITS SET!\n"); 653 } 654 } 655 656 /* 657 * VMCLEAR should ensures all VMCS state is flushed to the VMCS 658 * region in memory. 659 */ 660 static void test_vmclear_flushing(void) 661 { 662 struct vmcs *vmcs[3] = {}; 663 int i; 664 665 for (i = 0; i < ARRAY_SIZE(vmcs); i++) { 666 vmcs[i] = alloc_page(); 667 } 668 669 vmcs[0]->hdr.revision_id = basic.revision; 670 assert(!vmcs_clear(vmcs[0])); 671 assert(!make_vmcs_current(vmcs[0])); 672 set_all_vmcs_fields(0x86); 673 674 assert(!vmcs_clear(vmcs[0])); 675 memcpy(vmcs[1], vmcs[0], basic.size); 676 assert(!make_vmcs_current(vmcs[1])); 677 report(check_all_vmcs_fields(0x86), 678 "test vmclear flush (current VMCS)"); 679 680 set_all_vmcs_fields(0x87); 681 assert(!make_vmcs_current(vmcs[0])); 682 assert(!vmcs_clear(vmcs[1])); 683 memcpy(vmcs[2], vmcs[1], basic.size); 684 assert(!make_vmcs_current(vmcs[2])); 685 report(check_all_vmcs_fields(0x87), 686 "test vmclear flush (!current VMCS)"); 687 688 for (i = 0; i < ARRAY_SIZE(vmcs); i++) { 689 assert(!vmcs_clear(vmcs[i])); 690 free_page(vmcs[i]); 691 } 692 } 693 694 static void test_vmclear(void) 695 { 696 struct vmcs *tmp_root; 697 int width = cpuid_maxphyaddr(); 698 699 /* 700 * Note- The tests below do not necessarily have a 701 * valid VMCS, but that's ok since the invalid vmcs 702 * is only used for a specific test and is discarded 703 * without touching its contents 704 */ 705 706 /* Unaligned page access */ 707 tmp_root = (struct vmcs *)((intptr_t)vmcs_root + 1); 708 report(vmcs_clear(tmp_root) == 1, "test vmclear with unaligned vmcs"); 709 710 /* gpa bits beyond physical address width are set*/ 711 tmp_root = (struct vmcs *)((intptr_t)vmcs_root | 712 ((u64)1 << (width+1))); 713 report(vmcs_clear(tmp_root) == 1, 714 "test vmclear with vmcs address bits set beyond physical address width"); 715 716 /* Pass VMXON region */ 717 tmp_root = (struct vmcs *)bsp_vmxon_region; 718 report(vmcs_clear(tmp_root) == 1, "test vmclear with vmxon region"); 719 720 /* Valid VMCS */ 721 report(vmcs_clear(vmcs_root) == 0, 722 "test vmclear with valid vmcs region"); 723 724 test_vmclear_flushing(); 725 } 726 727 static void __attribute__((__used__)) guest_main(void) 728 { 729 if (current->v2) 730 v2_guest_main(); 731 else 732 current->guest_main(); 733 } 734 735 /* guest_entry */ 736 asm( 737 ".align 4, 0x90\n\t" 738 ".globl entry_guest\n\t" 739 "guest_entry:\n\t" 740 " call guest_main\n\t" 741 " mov $1, %edi\n\t" 742 " call hypercall\n\t" 743 ); 744 745 /* EPT paging structure related functions */ 746 /* split_large_ept_entry: Split a 2M/1G large page into 512 smaller PTEs. 747 @ptep : large page table entry to split 748 @level : level of ptep (2 or 3) 749 */ 750 static void split_large_ept_entry(unsigned long *ptep, int level) 751 { 752 unsigned long *new_pt; 753 unsigned long gpa; 754 unsigned long pte; 755 unsigned long prototype; 756 int i; 757 758 pte = *ptep; 759 assert(pte & EPT_PRESENT); 760 assert(pte & EPT_LARGE_PAGE); 761 assert(level == 2 || level == 3); 762 763 new_pt = alloc_page(); 764 assert(new_pt); 765 766 prototype = pte & ~EPT_ADDR_MASK; 767 if (level == 2) 768 prototype &= ~EPT_LARGE_PAGE; 769 770 gpa = pte & EPT_ADDR_MASK; 771 for (i = 0; i < EPT_PGDIR_ENTRIES; i++) { 772 new_pt[i] = prototype | gpa; 773 gpa += 1ul << EPT_LEVEL_SHIFT(level - 1); 774 } 775 776 pte &= ~EPT_LARGE_PAGE; 777 pte &= ~EPT_ADDR_MASK; 778 pte |= virt_to_phys(new_pt); 779 780 *ptep = pte; 781 } 782 783 /* install_ept_entry : Install a page to a given level in EPT 784 @pml4 : addr of pml4 table 785 @pte_level : level of PTE to set 786 @guest_addr : physical address of guest 787 @pte : pte value to set 788 @pt_page : address of page table, NULL for a new page 789 */ 790 void install_ept_entry(unsigned long *pml4, 791 int pte_level, 792 unsigned long guest_addr, 793 unsigned long pte, 794 unsigned long *pt_page) 795 { 796 int level; 797 unsigned long *pt = pml4; 798 unsigned offset; 799 800 /* EPT only uses 48 bits of GPA. */ 801 assert(guest_addr < (1ul << 48)); 802 803 for (level = EPT_PAGE_LEVEL; level > pte_level; --level) { 804 offset = (guest_addr >> EPT_LEVEL_SHIFT(level)) 805 & EPT_PGDIR_MASK; 806 if (!(pt[offset] & (EPT_PRESENT))) { 807 unsigned long *new_pt = pt_page; 808 if (!new_pt) 809 new_pt = alloc_page(); 810 else 811 pt_page = 0; 812 memset(new_pt, 0, PAGE_SIZE); 813 pt[offset] = virt_to_phys(new_pt) 814 | EPT_RA | EPT_WA | EPT_EA; 815 } else if (pt[offset] & EPT_LARGE_PAGE) 816 split_large_ept_entry(&pt[offset], level); 817 pt = phys_to_virt(pt[offset] & EPT_ADDR_MASK); 818 } 819 offset = (guest_addr >> EPT_LEVEL_SHIFT(level)) & EPT_PGDIR_MASK; 820 pt[offset] = pte; 821 } 822 823 /* Map a page, @perm is the permission of the page */ 824 void install_ept(unsigned long *pml4, 825 unsigned long phys, 826 unsigned long guest_addr, 827 u64 perm) 828 { 829 install_ept_entry(pml4, 1, guest_addr, (phys & PAGE_MASK) | perm, 0); 830 } 831 832 /* Map a 1G-size page */ 833 void install_1g_ept(unsigned long *pml4, 834 unsigned long phys, 835 unsigned long guest_addr, 836 u64 perm) 837 { 838 install_ept_entry(pml4, 3, guest_addr, 839 (phys & PAGE_MASK) | perm | EPT_LARGE_PAGE, 0); 840 } 841 842 /* Map a 2M-size page */ 843 void install_2m_ept(unsigned long *pml4, 844 unsigned long phys, 845 unsigned long guest_addr, 846 u64 perm) 847 { 848 install_ept_entry(pml4, 2, guest_addr, 849 (phys & PAGE_MASK) | perm | EPT_LARGE_PAGE, 0); 850 } 851 852 /* setup_ept_range : Setup a range of 1:1 mapped page to EPT paging structure. 853 @start : start address of guest page 854 @len : length of address to be mapped 855 @map_1g : whether 1G page map is used 856 @map_2m : whether 2M page map is used 857 @perm : permission for every page 858 */ 859 void setup_ept_range(unsigned long *pml4, unsigned long start, 860 unsigned long len, int map_1g, int map_2m, u64 perm) 861 { 862 u64 phys = start; 863 u64 max = (u64)len + (u64)start; 864 865 if (map_1g) { 866 while (phys + PAGE_SIZE_1G <= max) { 867 install_1g_ept(pml4, phys, phys, perm); 868 phys += PAGE_SIZE_1G; 869 } 870 } 871 if (map_2m) { 872 while (phys + PAGE_SIZE_2M <= max) { 873 install_2m_ept(pml4, phys, phys, perm); 874 phys += PAGE_SIZE_2M; 875 } 876 } 877 while (phys + PAGE_SIZE <= max) { 878 install_ept(pml4, phys, phys, perm); 879 phys += PAGE_SIZE; 880 } 881 } 882 883 /* get_ept_pte : Get the PTE of a given level in EPT, 884 @level == 1 means get the latest level*/ 885 bool get_ept_pte(unsigned long *pml4, unsigned long guest_addr, int level, 886 unsigned long *pte) 887 { 888 int l; 889 unsigned long *pt = pml4, iter_pte; 890 unsigned offset; 891 892 assert(level >= 1 && level <= 4); 893 894 for (l = EPT_PAGE_LEVEL; ; --l) { 895 offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK; 896 iter_pte = pt[offset]; 897 if (l == level) 898 break; 899 if (l < 4 && (iter_pte & EPT_LARGE_PAGE)) 900 return false; 901 if (!(iter_pte & (EPT_PRESENT))) 902 return false; 903 pt = (unsigned long *)(iter_pte & EPT_ADDR_MASK); 904 } 905 offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK; 906 if (pte) 907 *pte = pt[offset]; 908 return true; 909 } 910 911 static void clear_ept_ad_pte(unsigned long *pml4, unsigned long guest_addr) 912 { 913 int l; 914 unsigned long *pt = pml4; 915 u64 pte; 916 unsigned offset; 917 918 for (l = EPT_PAGE_LEVEL; ; --l) { 919 offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK; 920 pt[offset] &= ~(EPT_ACCESS_FLAG|EPT_DIRTY_FLAG); 921 pte = pt[offset]; 922 if (l == 1 || (l < 4 && (pte & EPT_LARGE_PAGE))) 923 break; 924 pt = (unsigned long *)(pte & EPT_ADDR_MASK); 925 } 926 } 927 928 /* clear_ept_ad : Clear EPT A/D bits for the page table walk and the 929 final GPA of a guest address. */ 930 void clear_ept_ad(unsigned long *pml4, u64 guest_cr3, 931 unsigned long guest_addr) 932 { 933 int l; 934 unsigned long *pt = (unsigned long *)guest_cr3, gpa; 935 u64 pte, offset_in_page; 936 unsigned offset; 937 938 for (l = EPT_PAGE_LEVEL; ; --l) { 939 offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK; 940 941 clear_ept_ad_pte(pml4, (u64) &pt[offset]); 942 pte = pt[offset]; 943 if (l == 1 || (l < 4 && (pte & PT_PAGE_SIZE_MASK))) 944 break; 945 if (!(pte & PT_PRESENT_MASK)) 946 return; 947 pt = (unsigned long *)(pte & PT_ADDR_MASK); 948 } 949 950 offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK; 951 offset_in_page = guest_addr & ((1 << EPT_LEVEL_SHIFT(l)) - 1); 952 gpa = (pt[offset] & PT_ADDR_MASK) | (guest_addr & offset_in_page); 953 clear_ept_ad_pte(pml4, gpa); 954 } 955 956 /* check_ept_ad : Check the content of EPT A/D bits for the page table 957 walk and the final GPA of a guest address. */ 958 void check_ept_ad(unsigned long *pml4, u64 guest_cr3, 959 unsigned long guest_addr, int expected_gpa_ad, 960 int expected_pt_ad) 961 { 962 int l; 963 unsigned long *pt = (unsigned long *)guest_cr3, gpa; 964 u64 ept_pte, pte, offset_in_page; 965 unsigned offset; 966 bool bad_pt_ad = false; 967 968 for (l = EPT_PAGE_LEVEL; ; --l) { 969 offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK; 970 971 if (!get_ept_pte(pml4, (u64) &pt[offset], 1, &ept_pte)) { 972 printf("EPT - guest level %d page table is not mapped.\n", l); 973 return; 974 } 975 976 if (!bad_pt_ad) { 977 bad_pt_ad |= (ept_pte & (EPT_ACCESS_FLAG|EPT_DIRTY_FLAG)) != expected_pt_ad; 978 if (bad_pt_ad) 979 report(false, 980 "EPT - guest level %d page table A=%d/D=%d", 981 l, 982 !!(expected_pt_ad & EPT_ACCESS_FLAG), 983 !!(expected_pt_ad & EPT_DIRTY_FLAG)); 984 } 985 986 pte = pt[offset]; 987 if (l == 1 || (l < 4 && (pte & PT_PAGE_SIZE_MASK))) 988 break; 989 if (!(pte & PT_PRESENT_MASK)) 990 return; 991 pt = (unsigned long *)(pte & PT_ADDR_MASK); 992 } 993 994 if (!bad_pt_ad) 995 report(true, "EPT - guest page table structures A=%d/D=%d", 996 !!(expected_pt_ad & EPT_ACCESS_FLAG), 997 !!(expected_pt_ad & EPT_DIRTY_FLAG)); 998 999 offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK; 1000 offset_in_page = guest_addr & ((1 << EPT_LEVEL_SHIFT(l)) - 1); 1001 gpa = (pt[offset] & PT_ADDR_MASK) | (guest_addr & offset_in_page); 1002 1003 if (!get_ept_pte(pml4, gpa, 1, &ept_pte)) { 1004 report(false, "EPT - guest physical address is not mapped"); 1005 return; 1006 } 1007 report((ept_pte & (EPT_ACCESS_FLAG | EPT_DIRTY_FLAG)) == expected_gpa_ad, 1008 "EPT - guest physical address A=%d/D=%d", 1009 !!(expected_gpa_ad & EPT_ACCESS_FLAG), 1010 !!(expected_gpa_ad & EPT_DIRTY_FLAG)); 1011 } 1012 1013 1014 void ept_sync(int type, u64 eptp) 1015 { 1016 switch (type) { 1017 case INVEPT_SINGLE: 1018 if (ept_vpid.val & EPT_CAP_INVEPT_SINGLE) { 1019 invept(INVEPT_SINGLE, eptp); 1020 break; 1021 } 1022 /* else fall through */ 1023 case INVEPT_GLOBAL: 1024 if (ept_vpid.val & EPT_CAP_INVEPT_ALL) { 1025 invept(INVEPT_GLOBAL, eptp); 1026 break; 1027 } 1028 /* else fall through */ 1029 default: 1030 printf("WARNING: invept is not supported!\n"); 1031 } 1032 } 1033 1034 void set_ept_pte(unsigned long *pml4, unsigned long guest_addr, 1035 int level, u64 pte_val) 1036 { 1037 int l; 1038 unsigned long *pt = pml4; 1039 unsigned offset; 1040 1041 assert(level >= 1 && level <= 4); 1042 1043 for (l = EPT_PAGE_LEVEL; ; --l) { 1044 offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK; 1045 if (l == level) 1046 break; 1047 assert(pt[offset] & EPT_PRESENT); 1048 pt = (unsigned long *)(pt[offset] & EPT_ADDR_MASK); 1049 } 1050 offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK; 1051 pt[offset] = pte_val; 1052 } 1053 1054 bool ept_2m_supported(void) 1055 { 1056 return ept_vpid.val & EPT_CAP_2M_PAGE; 1057 } 1058 1059 bool ept_1g_supported(void) 1060 { 1061 return ept_vpid.val & EPT_CAP_1G_PAGE; 1062 } 1063 1064 bool ept_huge_pages_supported(int level) 1065 { 1066 if (level == 2) 1067 return ept_2m_supported(); 1068 else if (level == 3) 1069 return ept_1g_supported(); 1070 else 1071 return false; 1072 } 1073 1074 bool ept_execute_only_supported(void) 1075 { 1076 return ept_vpid.val & EPT_CAP_WT; 1077 } 1078 1079 bool ept_ad_bits_supported(void) 1080 { 1081 return ept_vpid.val & EPT_CAP_AD_FLAG; 1082 } 1083 1084 void vpid_sync(int type, u16 vpid) 1085 { 1086 switch(type) { 1087 case INVVPID_CONTEXT_GLOBAL: 1088 if (ept_vpid.val & VPID_CAP_INVVPID_CXTGLB) { 1089 invvpid(INVVPID_CONTEXT_GLOBAL, vpid, 0); 1090 break; 1091 } 1092 case INVVPID_ALL: 1093 if (ept_vpid.val & VPID_CAP_INVVPID_ALL) { 1094 invvpid(INVVPID_ALL, vpid, 0); 1095 break; 1096 } 1097 default: 1098 printf("WARNING: invvpid is not supported\n"); 1099 } 1100 } 1101 1102 static void init_vmcs_ctrl(void) 1103 { 1104 /* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */ 1105 /* 26.2.1.1 */ 1106 vmcs_write(PIN_CONTROLS, ctrl_pin); 1107 /* Disable VMEXIT of IO instruction */ 1108 vmcs_write(CPU_EXEC_CTRL0, ctrl_cpu[0]); 1109 if (ctrl_cpu_rev[0].set & CPU_SECONDARY) { 1110 ctrl_cpu[1] = (ctrl_cpu[1] | ctrl_cpu_rev[1].set) & 1111 ctrl_cpu_rev[1].clr; 1112 vmcs_write(CPU_EXEC_CTRL1, ctrl_cpu[1]); 1113 } 1114 vmcs_write(CR3_TARGET_COUNT, 0); 1115 vmcs_write(VPID, ++vpid_cnt); 1116 } 1117 1118 static void init_vmcs_host(void) 1119 { 1120 /* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */ 1121 /* 26.2.1.2 */ 1122 vmcs_write(HOST_EFER, rdmsr(MSR_EFER)); 1123 1124 /* 26.2.1.3 */ 1125 vmcs_write(ENT_CONTROLS, ctrl_enter); 1126 vmcs_write(EXI_CONTROLS, ctrl_exit); 1127 1128 /* 26.2.2 */ 1129 vmcs_write(HOST_CR0, read_cr0()); 1130 vmcs_write(HOST_CR3, read_cr3()); 1131 vmcs_write(HOST_CR4, read_cr4()); 1132 vmcs_write(HOST_SYSENTER_EIP, (u64)(&entry_sysenter)); 1133 vmcs_write(HOST_SYSENTER_CS, KERNEL_CS); 1134 1135 /* 26.2.3 */ 1136 vmcs_write(HOST_SEL_CS, KERNEL_CS); 1137 vmcs_write(HOST_SEL_SS, KERNEL_DS); 1138 vmcs_write(HOST_SEL_DS, KERNEL_DS); 1139 vmcs_write(HOST_SEL_ES, KERNEL_DS); 1140 vmcs_write(HOST_SEL_FS, KERNEL_DS); 1141 vmcs_write(HOST_SEL_GS, KERNEL_DS); 1142 vmcs_write(HOST_SEL_TR, TSS_MAIN); 1143 vmcs_write(HOST_BASE_TR, tss_descr.base); 1144 vmcs_write(HOST_BASE_GDTR, gdt64_desc.base); 1145 vmcs_write(HOST_BASE_IDTR, idt_descr.base); 1146 vmcs_write(HOST_BASE_FS, 0); 1147 vmcs_write(HOST_BASE_GS, 0); 1148 1149 /* Set other vmcs area */ 1150 vmcs_write(PF_ERROR_MASK, 0); 1151 vmcs_write(PF_ERROR_MATCH, 0); 1152 vmcs_write(VMCS_LINK_PTR, ~0ul); 1153 vmcs_write(VMCS_LINK_PTR_HI, ~0ul); 1154 vmcs_write(HOST_RIP, (u64)(&vmx_return)); 1155 } 1156 1157 static void init_vmcs_guest(void) 1158 { 1159 /* 26.3 CHECKING AND LOADING GUEST STATE */ 1160 ulong guest_cr0, guest_cr4, guest_cr3; 1161 /* 26.3.1.1 */ 1162 guest_cr0 = read_cr0(); 1163 guest_cr4 = read_cr4(); 1164 guest_cr3 = read_cr3(); 1165 if (ctrl_enter & ENT_GUEST_64) { 1166 guest_cr0 |= X86_CR0_PG; 1167 guest_cr4 |= X86_CR4_PAE; 1168 } 1169 if ((ctrl_enter & ENT_GUEST_64) == 0) 1170 guest_cr4 &= (~X86_CR4_PCIDE); 1171 if (guest_cr0 & X86_CR0_PG) 1172 guest_cr0 |= X86_CR0_PE; 1173 vmcs_write(GUEST_CR0, guest_cr0); 1174 vmcs_write(GUEST_CR3, guest_cr3); 1175 vmcs_write(GUEST_CR4, guest_cr4); 1176 vmcs_write(GUEST_SYSENTER_CS, KERNEL_CS); 1177 vmcs_write(GUEST_SYSENTER_ESP, 1178 (u64)(guest_syscall_stack + PAGE_SIZE - 1)); 1179 vmcs_write(GUEST_SYSENTER_EIP, (u64)(&entry_sysenter)); 1180 vmcs_write(GUEST_DR7, 0); 1181 vmcs_write(GUEST_EFER, rdmsr(MSR_EFER)); 1182 1183 /* 26.3.1.2 */ 1184 vmcs_write(GUEST_SEL_CS, KERNEL_CS); 1185 vmcs_write(GUEST_SEL_SS, KERNEL_DS); 1186 vmcs_write(GUEST_SEL_DS, KERNEL_DS); 1187 vmcs_write(GUEST_SEL_ES, KERNEL_DS); 1188 vmcs_write(GUEST_SEL_FS, KERNEL_DS); 1189 vmcs_write(GUEST_SEL_GS, KERNEL_DS); 1190 vmcs_write(GUEST_SEL_TR, TSS_MAIN); 1191 vmcs_write(GUEST_SEL_LDTR, 0); 1192 1193 vmcs_write(GUEST_BASE_CS, 0); 1194 vmcs_write(GUEST_BASE_ES, 0); 1195 vmcs_write(GUEST_BASE_SS, 0); 1196 vmcs_write(GUEST_BASE_DS, 0); 1197 vmcs_write(GUEST_BASE_FS, 0); 1198 vmcs_write(GUEST_BASE_GS, 0); 1199 vmcs_write(GUEST_BASE_TR, tss_descr.base); 1200 vmcs_write(GUEST_BASE_LDTR, 0); 1201 1202 vmcs_write(GUEST_LIMIT_CS, 0xFFFFFFFF); 1203 vmcs_write(GUEST_LIMIT_DS, 0xFFFFFFFF); 1204 vmcs_write(GUEST_LIMIT_ES, 0xFFFFFFFF); 1205 vmcs_write(GUEST_LIMIT_SS, 0xFFFFFFFF); 1206 vmcs_write(GUEST_LIMIT_FS, 0xFFFFFFFF); 1207 vmcs_write(GUEST_LIMIT_GS, 0xFFFFFFFF); 1208 vmcs_write(GUEST_LIMIT_LDTR, 0xffff); 1209 vmcs_write(GUEST_LIMIT_TR, tss_descr.limit); 1210 1211 vmcs_write(GUEST_AR_CS, 0xa09b); 1212 vmcs_write(GUEST_AR_DS, 0xc093); 1213 vmcs_write(GUEST_AR_ES, 0xc093); 1214 vmcs_write(GUEST_AR_FS, 0xc093); 1215 vmcs_write(GUEST_AR_GS, 0xc093); 1216 vmcs_write(GUEST_AR_SS, 0xc093); 1217 vmcs_write(GUEST_AR_LDTR, 0x82); 1218 vmcs_write(GUEST_AR_TR, 0x8b); 1219 1220 /* 26.3.1.3 */ 1221 vmcs_write(GUEST_BASE_GDTR, gdt64_desc.base); 1222 vmcs_write(GUEST_BASE_IDTR, idt_descr.base); 1223 vmcs_write(GUEST_LIMIT_GDTR, gdt64_desc.limit); 1224 vmcs_write(GUEST_LIMIT_IDTR, idt_descr.limit); 1225 1226 /* 26.3.1.4 */ 1227 vmcs_write(GUEST_RIP, (u64)(&guest_entry)); 1228 vmcs_write(GUEST_RSP, (u64)(guest_stack + PAGE_SIZE - 1)); 1229 vmcs_write(GUEST_RFLAGS, X86_EFLAGS_FIXED); 1230 1231 /* 26.3.1.5 */ 1232 vmcs_write(GUEST_ACTV_STATE, ACTV_ACTIVE); 1233 vmcs_write(GUEST_INTR_STATE, 0); 1234 } 1235 1236 static int init_vmcs(struct vmcs **vmcs) 1237 { 1238 *vmcs = alloc_page(); 1239 (*vmcs)->hdr.revision_id = basic.revision; 1240 /* vmclear first to init vmcs */ 1241 if (vmcs_clear(*vmcs)) { 1242 printf("%s : vmcs_clear error\n", __func__); 1243 return 1; 1244 } 1245 1246 if (make_vmcs_current(*vmcs)) { 1247 printf("%s : make_vmcs_current error\n", __func__); 1248 return 1; 1249 } 1250 1251 /* All settings to pin/exit/enter/cpu 1252 control fields should be placed here */ 1253 ctrl_pin |= PIN_EXTINT | PIN_NMI | PIN_VIRT_NMI; 1254 ctrl_exit = EXI_LOAD_EFER | EXI_HOST_64; 1255 ctrl_enter = (ENT_LOAD_EFER | ENT_GUEST_64); 1256 /* DIsable IO instruction VMEXIT now */ 1257 ctrl_cpu[0] &= (~(CPU_IO | CPU_IO_BITMAP)); 1258 ctrl_cpu[1] = 0; 1259 1260 ctrl_pin = (ctrl_pin | ctrl_pin_rev.set) & ctrl_pin_rev.clr; 1261 ctrl_enter = (ctrl_enter | ctrl_enter_rev.set) & ctrl_enter_rev.clr; 1262 ctrl_exit = (ctrl_exit | ctrl_exit_rev.set) & ctrl_exit_rev.clr; 1263 ctrl_cpu[0] = (ctrl_cpu[0] | ctrl_cpu_rev[0].set) & ctrl_cpu_rev[0].clr; 1264 1265 init_vmcs_ctrl(); 1266 init_vmcs_host(); 1267 init_vmcs_guest(); 1268 return 0; 1269 } 1270 1271 void enable_vmx(void) 1272 { 1273 bool vmx_enabled = 1274 rdmsr(MSR_IA32_FEATURE_CONTROL) & 1275 FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; 1276 1277 if (!vmx_enabled) { 1278 wrmsr(MSR_IA32_FEATURE_CONTROL, 1279 FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX | 1280 FEATURE_CONTROL_LOCKED); 1281 } 1282 } 1283 1284 static void init_vmx_caps(void) 1285 { 1286 basic.val = rdmsr(MSR_IA32_VMX_BASIC); 1287 ctrl_pin_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PIN 1288 : MSR_IA32_VMX_PINBASED_CTLS); 1289 ctrl_exit_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_EXIT 1290 : MSR_IA32_VMX_EXIT_CTLS); 1291 ctrl_enter_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_ENTRY 1292 : MSR_IA32_VMX_ENTRY_CTLS); 1293 ctrl_cpu_rev[0].val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PROC 1294 : MSR_IA32_VMX_PROCBASED_CTLS); 1295 if ((ctrl_cpu_rev[0].clr & CPU_SECONDARY) != 0) 1296 ctrl_cpu_rev[1].val = rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2); 1297 else 1298 ctrl_cpu_rev[1].val = 0; 1299 if ((ctrl_cpu_rev[1].clr & (CPU_EPT | CPU_VPID)) != 0) 1300 ept_vpid.val = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP); 1301 else 1302 ept_vpid.val = 0; 1303 } 1304 1305 void init_vmx(u64 *vmxon_region) 1306 { 1307 ulong fix_cr0_set, fix_cr0_clr; 1308 ulong fix_cr4_set, fix_cr4_clr; 1309 1310 fix_cr0_set = rdmsr(MSR_IA32_VMX_CR0_FIXED0); 1311 fix_cr0_clr = rdmsr(MSR_IA32_VMX_CR0_FIXED1); 1312 fix_cr4_set = rdmsr(MSR_IA32_VMX_CR4_FIXED0); 1313 fix_cr4_clr = rdmsr(MSR_IA32_VMX_CR4_FIXED1); 1314 1315 write_cr0((read_cr0() & fix_cr0_clr) | fix_cr0_set); 1316 write_cr4((read_cr4() & fix_cr4_clr) | fix_cr4_set | X86_CR4_VMXE); 1317 1318 *vmxon_region = basic.revision; 1319 } 1320 1321 static void alloc_bsp_vmx_pages(void) 1322 { 1323 bsp_vmxon_region = alloc_page(); 1324 guest_stack = alloc_page(); 1325 guest_syscall_stack = alloc_page(); 1326 vmcs_root = alloc_page(); 1327 } 1328 1329 static void init_bsp_vmx(void) 1330 { 1331 init_vmx_caps(); 1332 alloc_bsp_vmx_pages(); 1333 init_vmx(bsp_vmxon_region); 1334 } 1335 1336 static void do_vmxon_off(void *data) 1337 { 1338 vmx_on(); 1339 vmx_off(); 1340 } 1341 1342 static void do_write_feature_control(void *data) 1343 { 1344 wrmsr(MSR_IA32_FEATURE_CONTROL, 0); 1345 } 1346 1347 static int test_vmx_feature_control(void) 1348 { 1349 u64 ia32_feature_control; 1350 bool vmx_enabled; 1351 bool feature_control_locked; 1352 1353 ia32_feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL); 1354 vmx_enabled = 1355 ia32_feature_control & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; 1356 feature_control_locked = 1357 ia32_feature_control & FEATURE_CONTROL_LOCKED; 1358 1359 if (vmx_enabled && feature_control_locked) { 1360 printf("VMX enabled and locked by BIOS\n"); 1361 return 0; 1362 } else if (feature_control_locked) { 1363 printf("ERROR: VMX locked out by BIOS!?\n"); 1364 return 1; 1365 } 1366 1367 wrmsr(MSR_IA32_FEATURE_CONTROL, 0); 1368 report(test_for_exception(GP_VECTOR, &do_vmxon_off, NULL), 1369 "test vmxon with FEATURE_CONTROL cleared"); 1370 1371 wrmsr(MSR_IA32_FEATURE_CONTROL, FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX); 1372 report(test_for_exception(GP_VECTOR, &do_vmxon_off, NULL), 1373 "test vmxon without FEATURE_CONTROL lock"); 1374 1375 wrmsr(MSR_IA32_FEATURE_CONTROL, 1376 FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX | 1377 FEATURE_CONTROL_LOCKED); 1378 1379 ia32_feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL); 1380 vmx_enabled = 1381 ia32_feature_control & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; 1382 report(vmx_enabled, "test enable VMX in FEATURE_CONTROL"); 1383 1384 report(test_for_exception(GP_VECTOR, &do_write_feature_control, NULL), 1385 "test FEATURE_CONTROL lock bit"); 1386 1387 return !vmx_enabled; 1388 } 1389 1390 static int test_vmxon(void) 1391 { 1392 int ret, ret1; 1393 u64 *vmxon_region; 1394 int width = cpuid_maxphyaddr(); 1395 1396 /* Unaligned page access */ 1397 vmxon_region = (u64 *)((intptr_t)bsp_vmxon_region + 1); 1398 ret1 = _vmx_on(vmxon_region); 1399 report(ret1, "test vmxon with unaligned vmxon region"); 1400 if (!ret1) { 1401 ret = 1; 1402 goto out; 1403 } 1404 1405 /* gpa bits beyond physical address width are set*/ 1406 vmxon_region = (u64 *)((intptr_t)bsp_vmxon_region | ((u64)1 << (width+1))); 1407 ret1 = _vmx_on(vmxon_region); 1408 report(ret1, "test vmxon with bits set beyond physical address width"); 1409 if (!ret1) { 1410 ret = 1; 1411 goto out; 1412 } 1413 1414 /* invalid revision indentifier */ 1415 *bsp_vmxon_region = 0xba9da9; 1416 ret1 = vmx_on(); 1417 report(ret1, "test vmxon with invalid revision identifier"); 1418 if (!ret1) { 1419 ret = 1; 1420 goto out; 1421 } 1422 1423 /* and finally a valid region */ 1424 *bsp_vmxon_region = basic.revision; 1425 ret = vmx_on(); 1426 report(!ret, "test vmxon with valid vmxon region"); 1427 1428 out: 1429 return ret; 1430 } 1431 1432 static void test_vmptrld(void) 1433 { 1434 struct vmcs *vmcs, *tmp_root; 1435 int width = cpuid_maxphyaddr(); 1436 1437 vmcs = alloc_page(); 1438 vmcs->hdr.revision_id = basic.revision; 1439 1440 /* Unaligned page access */ 1441 tmp_root = (struct vmcs *)((intptr_t)vmcs + 1); 1442 report(make_vmcs_current(tmp_root) == 1, 1443 "test vmptrld with unaligned vmcs"); 1444 1445 /* gpa bits beyond physical address width are set*/ 1446 tmp_root = (struct vmcs *)((intptr_t)vmcs | 1447 ((u64)1 << (width+1))); 1448 report(make_vmcs_current(tmp_root) == 1, 1449 "test vmptrld with vmcs address bits set beyond physical address width"); 1450 1451 /* Pass VMXON region */ 1452 assert(!vmcs_clear(vmcs)); 1453 assert(!make_vmcs_current(vmcs)); 1454 tmp_root = (struct vmcs *)bsp_vmxon_region; 1455 report(make_vmcs_current(tmp_root) == 1, 1456 "test vmptrld with vmxon region"); 1457 report(vmcs_read(VMX_INST_ERROR) == VMXERR_VMPTRLD_VMXON_POINTER, 1458 "test vmptrld with vmxon region vm-instruction error"); 1459 1460 report(make_vmcs_current(vmcs) == 0, 1461 "test vmptrld with valid vmcs region"); 1462 } 1463 1464 static void test_vmptrst(void) 1465 { 1466 int ret; 1467 struct vmcs *vmcs1, *vmcs2; 1468 1469 vmcs1 = alloc_page(); 1470 init_vmcs(&vmcs1); 1471 ret = vmcs_save(&vmcs2); 1472 report((!ret) && (vmcs1 == vmcs2), "test vmptrst"); 1473 } 1474 1475 struct vmx_ctl_msr { 1476 const char *name; 1477 u32 index, true_index; 1478 u32 default1; 1479 } vmx_ctl_msr[] = { 1480 { "MSR_IA32_VMX_PINBASED_CTLS", MSR_IA32_VMX_PINBASED_CTLS, 1481 MSR_IA32_VMX_TRUE_PIN, 0x16 }, 1482 { "MSR_IA32_VMX_PROCBASED_CTLS", MSR_IA32_VMX_PROCBASED_CTLS, 1483 MSR_IA32_VMX_TRUE_PROC, 0x401e172 }, 1484 { "MSR_IA32_VMX_PROCBASED_CTLS2", MSR_IA32_VMX_PROCBASED_CTLS2, 1485 MSR_IA32_VMX_PROCBASED_CTLS2, 0 }, 1486 { "MSR_IA32_VMX_EXIT_CTLS", MSR_IA32_VMX_EXIT_CTLS, 1487 MSR_IA32_VMX_TRUE_EXIT, 0x36dff }, 1488 { "MSR_IA32_VMX_ENTRY_CTLS", MSR_IA32_VMX_ENTRY_CTLS, 1489 MSR_IA32_VMX_TRUE_ENTRY, 0x11ff }, 1490 }; 1491 1492 static void test_vmx_caps(void) 1493 { 1494 u64 val, default1, fixed0, fixed1; 1495 union vmx_ctrl_msr ctrl, true_ctrl; 1496 unsigned int n; 1497 bool ok; 1498 1499 printf("\nTest suite: VMX capability reporting\n"); 1500 1501 report((basic.revision & (1ul << 31)) == 0 && 1502 basic.size > 0 && basic.size <= 4096 && 1503 (basic.type == 0 || basic.type == 6) && 1504 basic.reserved1 == 0 && basic.reserved2 == 0, 1505 "MSR_IA32_VMX_BASIC"); 1506 1507 val = rdmsr(MSR_IA32_VMX_MISC); 1508 report((!(ctrl_cpu_rev[1].clr & CPU_URG) || val & (1ul << 5)) && 1509 ((val >> 16) & 0x1ff) <= 256 && 1510 (val & 0x80007e00) == 0, 1511 "MSR_IA32_VMX_MISC"); 1512 1513 for (n = 0; n < ARRAY_SIZE(vmx_ctl_msr); n++) { 1514 ctrl.val = rdmsr(vmx_ctl_msr[n].index); 1515 default1 = vmx_ctl_msr[n].default1; 1516 ok = (ctrl.set & default1) == default1; 1517 ok = ok && (ctrl.set & ~ctrl.clr) == 0; 1518 if (ok && basic.ctrl) { 1519 true_ctrl.val = rdmsr(vmx_ctl_msr[n].true_index); 1520 ok = ctrl.clr == true_ctrl.clr; 1521 ok = ok && ctrl.set == (true_ctrl.set | default1); 1522 } 1523 report(ok, "%s", vmx_ctl_msr[n].name); 1524 } 1525 1526 fixed0 = rdmsr(MSR_IA32_VMX_CR0_FIXED0); 1527 fixed1 = rdmsr(MSR_IA32_VMX_CR0_FIXED1); 1528 report(((fixed0 ^ fixed1) & ~fixed1) == 0, 1529 "MSR_IA32_VMX_IA32_VMX_CR0_FIXED0/1"); 1530 1531 fixed0 = rdmsr(MSR_IA32_VMX_CR4_FIXED0); 1532 fixed1 = rdmsr(MSR_IA32_VMX_CR4_FIXED1); 1533 report(((fixed0 ^ fixed1) & ~fixed1) == 0, 1534 "MSR_IA32_VMX_IA32_VMX_CR4_FIXED0/1"); 1535 1536 val = rdmsr(MSR_IA32_VMX_VMCS_ENUM); 1537 report((val & VMCS_FIELD_INDEX_MASK) >= 0x2a && 1538 (val & 0xfffffffffffffc01Ull) == 0, 1539 "MSR_IA32_VMX_VMCS_ENUM"); 1540 1541 val = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP); 1542 report((val & 0xfffff07ef98cbebeUll) == 0, 1543 "MSR_IA32_VMX_EPT_VPID_CAP"); 1544 } 1545 1546 /* This function can only be called in guest */ 1547 static void __attribute__((__used__)) hypercall(u32 hypercall_no) 1548 { 1549 u64 val = 0; 1550 val = (hypercall_no & HYPERCALL_MASK) | HYPERCALL_BIT; 1551 hypercall_field = val; 1552 asm volatile("vmcall\n\t"); 1553 } 1554 1555 static bool is_hypercall(union exit_reason exit_reason) 1556 { 1557 return exit_reason.basic == VMX_VMCALL && 1558 (hypercall_field & HYPERCALL_BIT); 1559 } 1560 1561 static int handle_hypercall(void) 1562 { 1563 ulong hypercall_no; 1564 1565 hypercall_no = hypercall_field & HYPERCALL_MASK; 1566 hypercall_field = 0; 1567 switch (hypercall_no) { 1568 case HYPERCALL_VMEXIT: 1569 return VMX_TEST_VMEXIT; 1570 case HYPERCALL_VMABORT: 1571 return VMX_TEST_VMABORT; 1572 case HYPERCALL_VMSKIP: 1573 return VMX_TEST_VMSKIP; 1574 default: 1575 printf("ERROR : Invalid hypercall number : %ld\n", hypercall_no); 1576 } 1577 return VMX_TEST_EXIT; 1578 } 1579 1580 static void continue_abort(void) 1581 { 1582 assert(!in_guest); 1583 printf("Host was here when guest aborted:\n"); 1584 dump_stack(); 1585 longjmp(abort_target, 1); 1586 abort(); 1587 } 1588 1589 void __abort_test(void) 1590 { 1591 if (in_guest) 1592 hypercall(HYPERCALL_VMABORT); 1593 else 1594 longjmp(abort_target, 1); 1595 abort(); 1596 } 1597 1598 static void continue_skip(void) 1599 { 1600 assert(!in_guest); 1601 longjmp(abort_target, 1); 1602 abort(); 1603 } 1604 1605 void test_skip(const char *msg) 1606 { 1607 printf("%s skipping test: %s\n", in_guest ? "Guest" : "Host", msg); 1608 if (in_guest) 1609 hypercall(HYPERCALL_VMABORT); 1610 else 1611 longjmp(abort_target, 1); 1612 abort(); 1613 } 1614 1615 static int exit_handler(union exit_reason exit_reason) 1616 { 1617 int ret; 1618 1619 current->exits++; 1620 regs.rflags = vmcs_read(GUEST_RFLAGS); 1621 if (is_hypercall(exit_reason)) 1622 ret = handle_hypercall(); 1623 else 1624 ret = current->exit_handler(exit_reason); 1625 vmcs_write(GUEST_RFLAGS, regs.rflags); 1626 1627 return ret; 1628 } 1629 1630 /* 1631 * Tries to enter the guest, populates @result with VM-Fail, VM-Exit, entered, 1632 * etc... 1633 */ 1634 static void vmx_enter_guest(struct vmentry_result *result) 1635 { 1636 memset(result, 0, sizeof(*result)); 1637 1638 in_guest = 1; 1639 asm volatile ( 1640 "mov %[HOST_RSP], %%rdi\n\t" 1641 "vmwrite %%rsp, %%rdi\n\t" 1642 LOAD_GPR_C 1643 "cmpb $0, %[launched]\n\t" 1644 "jne 1f\n\t" 1645 "vmlaunch\n\t" 1646 "jmp 2f\n\t" 1647 "1: " 1648 "vmresume\n\t" 1649 "2: " 1650 SAVE_GPR_C 1651 "pushf\n\t" 1652 "pop %%rdi\n\t" 1653 "mov %%rdi, %[vm_fail_flags]\n\t" 1654 "movl $1, %[vm_fail]\n\t" 1655 "jmp 3f\n\t" 1656 "vmx_return:\n\t" 1657 SAVE_GPR_C 1658 "3: \n\t" 1659 : [vm_fail]"+m"(result->vm_fail), 1660 [vm_fail_flags]"=m"(result->flags) 1661 : [launched]"m"(launched), [HOST_RSP]"i"(HOST_RSP) 1662 : "rdi", "memory", "cc" 1663 ); 1664 in_guest = 0; 1665 1666 result->vmlaunch = !launched; 1667 result->instr = launched ? "vmresume" : "vmlaunch"; 1668 result->exit_reason.full = result->vm_fail ? 0xdead : 1669 vmcs_read(EXI_REASON); 1670 result->entered = !result->vm_fail && 1671 !result->exit_reason.failed_vmentry; 1672 } 1673 1674 static int vmx_run(void) 1675 { 1676 struct vmentry_result result; 1677 u32 ret; 1678 1679 while (1) { 1680 vmx_enter_guest(&result); 1681 if (result.entered) { 1682 /* 1683 * VMCS isn't in "launched" state if there's been any 1684 * entry failure (early or otherwise). 1685 */ 1686 launched = 1; 1687 ret = exit_handler(result.exit_reason); 1688 } else if (current->entry_failure_handler) { 1689 ret = current->entry_failure_handler(&result); 1690 } else { 1691 ret = VMX_TEST_EXIT; 1692 } 1693 1694 switch (ret) { 1695 case VMX_TEST_RESUME: 1696 continue; 1697 case VMX_TEST_VMEXIT: 1698 guest_finished = 1; 1699 return 0; 1700 case VMX_TEST_EXIT: 1701 break; 1702 default: 1703 printf("ERROR : Invalid %s_handler return val %d.\n", 1704 result.entered ? "exit" : "entry_failure", 1705 ret); 1706 break; 1707 } 1708 1709 if (result.entered) 1710 print_vmexit_info(result.exit_reason); 1711 else 1712 print_vmentry_failure_info(&result); 1713 abort(); 1714 } 1715 } 1716 1717 static void run_teardown_step(struct test_teardown_step *step) 1718 { 1719 step->func(step->data); 1720 } 1721 1722 static int test_run(struct vmx_test *test) 1723 { 1724 int r; 1725 1726 /* Validate V2 interface. */ 1727 if (test->v2) { 1728 int ret = 0; 1729 if (test->init || test->guest_main || test->exit_handler || 1730 test->syscall_handler) { 1731 report(0, "V2 test cannot specify V1 callbacks."); 1732 ret = 1; 1733 } 1734 if (ret) 1735 return ret; 1736 } 1737 1738 if (test->name == NULL) 1739 test->name = "(no name)"; 1740 if (vmx_on()) { 1741 printf("%s : vmxon failed.\n", __func__); 1742 return 1; 1743 } 1744 1745 init_vmcs(&(test->vmcs)); 1746 /* Directly call test->init is ok here, init_vmcs has done 1747 vmcs init, vmclear and vmptrld*/ 1748 if (test->init && test->init(test->vmcs) != VMX_TEST_START) 1749 goto out; 1750 teardown_count = 0; 1751 v2_guest_main = NULL; 1752 test->exits = 0; 1753 current = test; 1754 regs = test->guest_regs; 1755 vmcs_write(GUEST_RFLAGS, regs.rflags | X86_EFLAGS_FIXED); 1756 launched = 0; 1757 guest_finished = 0; 1758 printf("\nTest suite: %s\n", test->name); 1759 1760 r = setjmp(abort_target); 1761 if (r) { 1762 assert(!in_guest); 1763 goto out; 1764 } 1765 1766 1767 if (test->v2) 1768 test->v2(); 1769 else 1770 vmx_run(); 1771 1772 while (teardown_count > 0) 1773 run_teardown_step(&teardown_steps[--teardown_count]); 1774 1775 if (launched && !guest_finished) 1776 report(0, "Guest didn't run to completion."); 1777 1778 out: 1779 if (vmx_off()) { 1780 printf("%s : vmxoff failed.\n", __func__); 1781 return 1; 1782 } 1783 return 0; 1784 } 1785 1786 /* 1787 * Add a teardown step. Executed after the test's main function returns. 1788 * Teardown steps executed in reverse order. 1789 */ 1790 void test_add_teardown(test_teardown_func func, void *data) 1791 { 1792 struct test_teardown_step *step; 1793 1794 TEST_ASSERT_MSG(teardown_count < MAX_TEST_TEARDOWN_STEPS, 1795 "There are already %d teardown steps.", 1796 teardown_count); 1797 step = &teardown_steps[teardown_count++]; 1798 step->func = func; 1799 step->data = data; 1800 } 1801 1802 /* 1803 * Set the target of the first enter_guest call. Can only be called once per 1804 * test. Must be called before first enter_guest call. 1805 */ 1806 void test_set_guest(test_guest_func func) 1807 { 1808 assert(current->v2); 1809 TEST_ASSERT_MSG(!v2_guest_main, "Already set guest func."); 1810 v2_guest_main = func; 1811 } 1812 1813 static void check_for_guest_termination(union exit_reason exit_reason) 1814 { 1815 if (is_hypercall(exit_reason)) { 1816 int ret; 1817 1818 ret = handle_hypercall(); 1819 switch (ret) { 1820 case VMX_TEST_VMEXIT: 1821 guest_finished = 1; 1822 break; 1823 case VMX_TEST_VMABORT: 1824 continue_abort(); 1825 break; 1826 case VMX_TEST_VMSKIP: 1827 continue_skip(); 1828 break; 1829 default: 1830 printf("ERROR : Invalid handle_hypercall return %d.\n", 1831 ret); 1832 abort(); 1833 } 1834 } 1835 } 1836 1837 /* 1838 * Enters the guest (or launches it for the first time). Error to call once the 1839 * guest has returned (i.e., run past the end of its guest() function). 1840 */ 1841 void __enter_guest(u8 abort_flag, struct vmentry_result *result) 1842 { 1843 TEST_ASSERT_MSG(v2_guest_main, 1844 "Never called test_set_guest_func!"); 1845 1846 TEST_ASSERT_MSG(!guest_finished, 1847 "Called enter_guest() after guest returned."); 1848 1849 vmx_enter_guest(result); 1850 1851 if (result->vm_fail) { 1852 if (abort_flag & ABORT_ON_EARLY_VMENTRY_FAIL) 1853 goto do_abort; 1854 return; 1855 } 1856 if (result->exit_reason.failed_vmentry) { 1857 if ((abort_flag & ABORT_ON_INVALID_GUEST_STATE) || 1858 result->exit_reason.basic != VMX_FAIL_STATE) 1859 goto do_abort; 1860 return; 1861 } 1862 1863 launched = 1; 1864 check_for_guest_termination(result->exit_reason); 1865 return; 1866 1867 do_abort: 1868 print_vmentry_failure_info(result); 1869 abort(); 1870 } 1871 1872 void enter_guest_with_bad_controls(void) 1873 { 1874 struct vmentry_result result; 1875 1876 TEST_ASSERT_MSG(v2_guest_main, 1877 "Never called test_set_guest_func!"); 1878 1879 TEST_ASSERT_MSG(!guest_finished, 1880 "Called enter_guest() after guest returned."); 1881 1882 __enter_guest(ABORT_ON_INVALID_GUEST_STATE, &result); 1883 report(result.vm_fail, "VM-Fail occurred as expected"); 1884 report((result.flags & VMX_ENTRY_FLAGS) == X86_EFLAGS_ZF, 1885 "FLAGS set correctly on VM-Fail"); 1886 report(vmcs_read(VMX_INST_ERROR) == VMXERR_ENTRY_INVALID_CONTROL_FIELD, 1887 "VM-Inst Error # is %d (VM entry with invalid control field(s))", 1888 VMXERR_ENTRY_INVALID_CONTROL_FIELD); 1889 } 1890 1891 void enter_guest(void) 1892 { 1893 struct vmentry_result result; 1894 1895 __enter_guest(ABORT_ON_EARLY_VMENTRY_FAIL | 1896 ABORT_ON_INVALID_GUEST_STATE, &result); 1897 } 1898 1899 extern struct vmx_test vmx_tests[]; 1900 1901 static bool 1902 test_wanted(const char *name, const char *filters[], int filter_count) 1903 { 1904 int i; 1905 bool positive = false; 1906 bool match = false; 1907 char clean_name[strlen(name) + 1]; 1908 char *c; 1909 const char *n; 1910 1911 printf("filter = %s, test = %s\n", filters[0], name); 1912 1913 /* Replace spaces with underscores. */ 1914 n = name; 1915 c = &clean_name[0]; 1916 do *c++ = (*n == ' ') ? '_' : *n; 1917 while (*n++); 1918 1919 for (i = 0; i < filter_count; i++) { 1920 const char *filter = filters[i]; 1921 1922 if (filter[0] == '-') { 1923 if (simple_glob(clean_name, filter + 1)) 1924 return false; 1925 } else { 1926 positive = true; 1927 match |= simple_glob(clean_name, filter); 1928 } 1929 } 1930 1931 if (!positive || match) { 1932 matched++; 1933 return true; 1934 } else { 1935 return false; 1936 } 1937 } 1938 1939 int main(int argc, const char *argv[]) 1940 { 1941 int i = 0; 1942 1943 setup_vm(); 1944 smp_init(); 1945 hypercall_field = 0; 1946 1947 /* We want xAPIC mode to test MMIO passthrough from L1 (us) to L2. */ 1948 reset_apic(); 1949 1950 argv++; 1951 argc--; 1952 1953 if (!this_cpu_has(X86_FEATURE_VMX)) { 1954 printf("WARNING: vmx not supported, add '-cpu host'\n"); 1955 goto exit; 1956 } 1957 init_bsp_vmx(); 1958 if (test_wanted("test_vmx_feature_control", argv, argc)) { 1959 /* Sets MSR_IA32_FEATURE_CONTROL to 0x5 */ 1960 if (test_vmx_feature_control() != 0) 1961 goto exit; 1962 } else { 1963 enable_vmx(); 1964 } 1965 1966 if (test_wanted("test_vmxon", argv, argc)) { 1967 /* Enables VMX */ 1968 if (test_vmxon() != 0) 1969 goto exit; 1970 } else { 1971 if (vmx_on()) { 1972 report(0, "vmxon"); 1973 goto exit; 1974 } 1975 } 1976 1977 if (test_wanted("test_vmptrld", argv, argc)) 1978 test_vmptrld(); 1979 if (test_wanted("test_vmclear", argv, argc)) 1980 test_vmclear(); 1981 if (test_wanted("test_vmptrst", argv, argc)) 1982 test_vmptrst(); 1983 if (test_wanted("test_vmwrite_vmread", argv, argc)) 1984 test_vmwrite_vmread(); 1985 if (test_wanted("test_vmcs_high", argv, argc)) 1986 test_vmcs_high(); 1987 if (test_wanted("test_vmcs_lifecycle", argv, argc)) 1988 test_vmcs_lifecycle(); 1989 if (test_wanted("test_vmx_caps", argv, argc)) 1990 test_vmx_caps(); 1991 1992 /* Balance vmxon from test_vmxon. */ 1993 vmx_off(); 1994 1995 for (; vmx_tests[i].name != NULL; i++) { 1996 if (!test_wanted(vmx_tests[i].name, argv, argc)) 1997 continue; 1998 if (test_run(&vmx_tests[i])) 1999 goto exit; 2000 } 2001 2002 if (!matched) 2003 report(matched, "command line didn't match any tests!"); 2004 2005 exit: 2006 return report_summary(); 2007 } 2008