1 /* 2 * x86/vmx.c : Framework for testing nested virtualization 3 * This is a framework to test nested VMX for KVM, which 4 * started as a project of GSoC 2013. All test cases should 5 * be located in x86/vmx_tests.c and framework related 6 * functions should be in this file. 7 * 8 * How to write test cases? 9 * Add callbacks of test suite in variant "vmx_tests". You can 10 * write: 11 * 1. init function used for initializing test suite 12 * 2. main function for codes running in L2 guest, 13 * 3. exit_handler to handle vmexit of L2 to L1 14 * 4. syscall handler to handle L2 syscall vmexit 15 * 5. vmenter fail handler to handle direct failure of vmenter 16 * 6. guest_regs is loaded when vmenter and saved when 17 * vmexit, you can read and set it in exit_handler 18 * If no special function is needed for a test suite, use 19 * coressponding basic_* functions as callback. More handlers 20 * can be added to "vmx_tests", see details of "struct vmx_test" 21 * and function test_run(). 22 * 23 * Currently, vmx test framework only set up one VCPU and one 24 * concurrent guest test environment with same paging for L2 and 25 * L1. For usage of EPT, only 1:1 mapped paging is used from VFN 26 * to PFN. 27 * 28 * Author : Arthur Chunqi Li <yzt356@gmail.com> 29 */ 30 31 #include "libcflat.h" 32 #include "processor.h" 33 #include "vm.h" 34 #include "desc.h" 35 #include "vmx.h" 36 #include "msr.h" 37 #include "smp.h" 38 39 u64 *vmxon_region; 40 struct vmcs *vmcs_root; 41 u32 vpid_cnt; 42 void *guest_stack, *guest_syscall_stack; 43 u32 ctrl_pin, ctrl_enter, ctrl_exit, ctrl_cpu[2]; 44 struct regs regs; 45 struct vmx_test *current; 46 u64 hypercall_field; 47 bool launched; 48 49 union vmx_basic basic; 50 union vmx_ctrl_msr ctrl_pin_rev; 51 union vmx_ctrl_msr ctrl_cpu_rev[2]; 52 union vmx_ctrl_msr ctrl_exit_rev; 53 union vmx_ctrl_msr ctrl_enter_rev; 54 union vmx_ept_vpid ept_vpid; 55 56 extern struct descriptor_table_ptr gdt64_desc; 57 extern struct descriptor_table_ptr idt_descr; 58 extern struct descriptor_table_ptr tss_descr; 59 extern void *vmx_return; 60 extern void *entry_sysenter; 61 extern void *guest_entry; 62 63 static volatile u32 stage; 64 65 void vmx_set_test_stage(u32 s) 66 { 67 barrier(); 68 stage = s; 69 barrier(); 70 } 71 72 u32 vmx_get_test_stage(void) 73 { 74 u32 s; 75 76 barrier(); 77 s = stage; 78 barrier(); 79 return s; 80 } 81 82 void vmx_inc_test_stage(void) 83 { 84 barrier(); 85 stage++; 86 barrier(); 87 } 88 89 static int make_vmcs_current(struct vmcs *vmcs) 90 { 91 bool ret; 92 u64 rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF; 93 94 asm volatile ("push %1; popf; vmptrld %2; setbe %0" 95 : "=q" (ret) : "q" (rflags), "m" (vmcs) : "cc"); 96 return ret; 97 } 98 99 /* entry_sysenter */ 100 asm( 101 ".align 4, 0x90\n\t" 102 ".globl entry_sysenter\n\t" 103 "entry_sysenter:\n\t" 104 SAVE_GPR 105 " and $0xf, %rax\n\t" 106 " mov %rax, %rdi\n\t" 107 " call syscall_handler\n\t" 108 LOAD_GPR 109 " vmresume\n\t" 110 ); 111 112 static void __attribute__((__used__)) syscall_handler(u64 syscall_no) 113 { 114 if (current->syscall_handler) 115 current->syscall_handler(syscall_no); 116 } 117 118 static inline int vmx_on() 119 { 120 bool ret; 121 u64 rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF; 122 asm volatile ("push %1; popf; vmxon %2; setbe %0\n\t" 123 : "=q" (ret) : "q" (rflags), "m" (vmxon_region) : "cc"); 124 return ret; 125 } 126 127 static inline int vmx_off() 128 { 129 bool ret; 130 u64 rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF; 131 132 asm volatile("push %1; popf; vmxoff; setbe %0\n\t" 133 : "=q"(ret) : "q" (rflags) : "cc"); 134 return ret; 135 } 136 137 void print_vmexit_info() 138 { 139 u64 guest_rip, guest_rsp; 140 ulong reason = vmcs_read(EXI_REASON) & 0xff; 141 ulong exit_qual = vmcs_read(EXI_QUALIFICATION); 142 guest_rip = vmcs_read(GUEST_RIP); 143 guest_rsp = vmcs_read(GUEST_RSP); 144 printf("VMEXIT info:\n"); 145 printf("\tvmexit reason = %ld\n", reason); 146 printf("\texit qualification = 0x%lx\n", exit_qual); 147 printf("\tBit 31 of reason = %lx\n", (vmcs_read(EXI_REASON) >> 31) & 1); 148 printf("\tguest_rip = 0x%lx\n", guest_rip); 149 printf("\tRAX=0x%lx RBX=0x%lx RCX=0x%lx RDX=0x%lx\n", 150 regs.rax, regs.rbx, regs.rcx, regs.rdx); 151 printf("\tRSP=0x%lx RBP=0x%lx RSI=0x%lx RDI=0x%lx\n", 152 guest_rsp, regs.rbp, regs.rsi, regs.rdi); 153 printf("\tR8 =0x%lx R9 =0x%lx R10=0x%lx R11=0x%lx\n", 154 regs.r8, regs.r9, regs.r10, regs.r11); 155 printf("\tR12=0x%lx R13=0x%lx R14=0x%lx R15=0x%lx\n", 156 regs.r12, regs.r13, regs.r14, regs.r15); 157 } 158 159 void 160 print_vmentry_failure_info(struct vmentry_failure *failure) { 161 if (failure->early) { 162 printf("Early %s failure: ", failure->instr); 163 switch (failure->flags & VMX_ENTRY_FLAGS) { 164 case X86_EFLAGS_CF: 165 printf("current-VMCS pointer is not valid.\n"); 166 break; 167 case X86_EFLAGS_ZF: 168 printf("error number is %ld. See Intel 30.4.\n", 169 vmcs_read(VMX_INST_ERROR)); 170 break; 171 default: 172 printf("unexpected flags %lx!\n", failure->flags); 173 } 174 } else { 175 u64 reason = vmcs_read(EXI_REASON); 176 u64 qual = vmcs_read(EXI_QUALIFICATION); 177 178 printf("Non-early %s failure (reason=0x%lx, qual=0x%lx): ", 179 failure->instr, reason, qual); 180 181 switch (reason & 0xff) { 182 case VMX_FAIL_STATE: 183 printf("invalid guest state\n"); 184 break; 185 case VMX_FAIL_MSR: 186 printf("MSR loading\n"); 187 break; 188 case VMX_FAIL_MCHECK: 189 printf("machine-check event\n"); 190 break; 191 default: 192 printf("unexpected basic exit reason %ld\n", 193 reason & 0xff); 194 } 195 196 if (!(reason & VMX_ENTRY_FAILURE)) 197 printf("\tVMX_ENTRY_FAILURE BIT NOT SET!\n"); 198 199 if (reason & 0x7fff0000) 200 printf("\tRESERVED BITS SET!\n"); 201 } 202 } 203 204 205 static void test_vmclear(void) 206 { 207 struct vmcs *tmp_root; 208 int width = cpuid_maxphyaddr(); 209 210 /* 211 * Note- The tests below do not necessarily have a 212 * valid VMCS, but that's ok since the invalid vmcs 213 * is only used for a specific test and is discarded 214 * without touching its contents 215 */ 216 217 /* Unaligned page access */ 218 tmp_root = (struct vmcs *)((intptr_t)vmcs_root + 1); 219 report("test vmclear with unaligned vmcs", 220 vmcs_clear(tmp_root) == 1); 221 222 /* gpa bits beyond physical address width are set*/ 223 tmp_root = (struct vmcs *)((intptr_t)vmcs_root | 224 ((u64)1 << (width+1))); 225 report("test vmclear with vmcs address bits set beyond physical address width", 226 vmcs_clear(tmp_root) == 1); 227 228 /* Pass VMXON region */ 229 tmp_root = (struct vmcs *)vmxon_region; 230 report("test vmclear with vmxon region", 231 vmcs_clear(tmp_root) == 1); 232 233 /* Valid VMCS */ 234 report("test vmclear with valid vmcs region", vmcs_clear(vmcs_root) == 0); 235 236 } 237 238 static void test_vmxoff(void) 239 { 240 int ret; 241 242 ret = vmx_off(); 243 report("test vmxoff", !ret); 244 } 245 246 static void __attribute__((__used__)) guest_main(void) 247 { 248 current->guest_main(); 249 } 250 251 /* guest_entry */ 252 asm( 253 ".align 4, 0x90\n\t" 254 ".globl entry_guest\n\t" 255 "guest_entry:\n\t" 256 " call guest_main\n\t" 257 " mov $1, %edi\n\t" 258 " call hypercall\n\t" 259 ); 260 261 /* EPT paging structure related functions */ 262 /* split_large_ept_entry: Split a 2M/1G large page into 512 smaller PTEs. 263 @ptep : large page table entry to split 264 @level : level of ptep (2 or 3) 265 */ 266 static void split_large_ept_entry(unsigned long *ptep, int level) 267 { 268 unsigned long *new_pt; 269 unsigned long gpa; 270 unsigned long pte; 271 unsigned long prototype; 272 int i; 273 274 pte = *ptep; 275 assert(pte & EPT_PRESENT); 276 assert(pte & EPT_LARGE_PAGE); 277 assert(level == 2 || level == 3); 278 279 new_pt = alloc_page(); 280 assert(new_pt); 281 memset(new_pt, 0, PAGE_SIZE); 282 283 prototype = pte & ~EPT_ADDR_MASK; 284 if (level == 2) 285 prototype &= ~EPT_LARGE_PAGE; 286 287 gpa = pte & EPT_ADDR_MASK; 288 for (i = 0; i < EPT_PGDIR_ENTRIES; i++) { 289 new_pt[i] = prototype | gpa; 290 gpa += 1ul << EPT_LEVEL_SHIFT(level - 1); 291 } 292 293 pte &= ~EPT_LARGE_PAGE; 294 pte &= ~EPT_ADDR_MASK; 295 pte |= virt_to_phys(new_pt); 296 297 *ptep = pte; 298 } 299 300 /* install_ept_entry : Install a page to a given level in EPT 301 @pml4 : addr of pml4 table 302 @pte_level : level of PTE to set 303 @guest_addr : physical address of guest 304 @pte : pte value to set 305 @pt_page : address of page table, NULL for a new page 306 */ 307 void install_ept_entry(unsigned long *pml4, 308 int pte_level, 309 unsigned long guest_addr, 310 unsigned long pte, 311 unsigned long *pt_page) 312 { 313 int level; 314 unsigned long *pt = pml4; 315 unsigned offset; 316 317 for (level = EPT_PAGE_LEVEL; level > pte_level; --level) { 318 offset = (guest_addr >> EPT_LEVEL_SHIFT(level)) 319 & EPT_PGDIR_MASK; 320 if (!(pt[offset] & (EPT_PRESENT))) { 321 unsigned long *new_pt = pt_page; 322 if (!new_pt) 323 new_pt = alloc_page(); 324 else 325 pt_page = 0; 326 memset(new_pt, 0, PAGE_SIZE); 327 pt[offset] = virt_to_phys(new_pt) 328 | EPT_RA | EPT_WA | EPT_EA; 329 } else if (pt[offset] & EPT_LARGE_PAGE) 330 split_large_ept_entry(&pt[offset], level); 331 pt = phys_to_virt(pt[offset] & EPT_ADDR_MASK); 332 } 333 offset = (guest_addr >> EPT_LEVEL_SHIFT(level)) & EPT_PGDIR_MASK; 334 pt[offset] = pte; 335 } 336 337 /* Map a page, @perm is the permission of the page */ 338 void install_ept(unsigned long *pml4, 339 unsigned long phys, 340 unsigned long guest_addr, 341 u64 perm) 342 { 343 install_ept_entry(pml4, 1, guest_addr, (phys & PAGE_MASK) | perm, 0); 344 } 345 346 /* Map a 1G-size page */ 347 void install_1g_ept(unsigned long *pml4, 348 unsigned long phys, 349 unsigned long guest_addr, 350 u64 perm) 351 { 352 install_ept_entry(pml4, 3, guest_addr, 353 (phys & PAGE_MASK) | perm | EPT_LARGE_PAGE, 0); 354 } 355 356 /* Map a 2M-size page */ 357 void install_2m_ept(unsigned long *pml4, 358 unsigned long phys, 359 unsigned long guest_addr, 360 u64 perm) 361 { 362 install_ept_entry(pml4, 2, guest_addr, 363 (phys & PAGE_MASK) | perm | EPT_LARGE_PAGE, 0); 364 } 365 366 /* setup_ept_range : Setup a range of 1:1 mapped page to EPT paging structure. 367 @start : start address of guest page 368 @len : length of address to be mapped 369 @map_1g : whether 1G page map is used 370 @map_2m : whether 2M page map is used 371 @perm : permission for every page 372 */ 373 void setup_ept_range(unsigned long *pml4, unsigned long start, 374 unsigned long len, int map_1g, int map_2m, u64 perm) 375 { 376 u64 phys = start; 377 u64 max = (u64)len + (u64)start; 378 379 if (map_1g) { 380 while (phys + PAGE_SIZE_1G <= max) { 381 install_1g_ept(pml4, phys, phys, perm); 382 phys += PAGE_SIZE_1G; 383 } 384 } 385 if (map_2m) { 386 while (phys + PAGE_SIZE_2M <= max) { 387 install_2m_ept(pml4, phys, phys, perm); 388 phys += PAGE_SIZE_2M; 389 } 390 } 391 while (phys + PAGE_SIZE <= max) { 392 install_ept(pml4, phys, phys, perm); 393 phys += PAGE_SIZE; 394 } 395 } 396 397 /* get_ept_pte : Get the PTE of a given level in EPT, 398 @level == 1 means get the latest level*/ 399 unsigned long get_ept_pte(unsigned long *pml4, 400 unsigned long guest_addr, int level) 401 { 402 int l; 403 unsigned long *pt = pml4, pte; 404 unsigned offset; 405 406 if (level < 1 || level > 3) 407 return -1; 408 for (l = EPT_PAGE_LEVEL; ; --l) { 409 offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK; 410 pte = pt[offset]; 411 if (!(pte & (EPT_PRESENT))) 412 return 0; 413 if (l == level) 414 break; 415 if (l < 4 && (pte & EPT_LARGE_PAGE)) 416 return pte; 417 pt = (unsigned long *)(pte & EPT_ADDR_MASK); 418 } 419 offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK; 420 pte = pt[offset]; 421 return pte; 422 } 423 424 static void clear_ept_ad_pte(unsigned long *pml4, unsigned long guest_addr) 425 { 426 int l; 427 unsigned long *pt = pml4; 428 u64 pte; 429 unsigned offset; 430 431 for (l = EPT_PAGE_LEVEL; ; --l) { 432 offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK; 433 pt[offset] &= ~(EPT_ACCESS_FLAG|EPT_DIRTY_FLAG); 434 pte = pt[offset]; 435 if (l == 1 || (l < 4 && (pte & EPT_LARGE_PAGE))) 436 break; 437 pt = (unsigned long *)(pte & EPT_ADDR_MASK); 438 } 439 } 440 441 /* clear_ept_ad : Clear EPT A/D bits for the page table walk and the 442 final GPA of a guest address. */ 443 void clear_ept_ad(unsigned long *pml4, u64 guest_cr3, 444 unsigned long guest_addr) 445 { 446 int l; 447 unsigned long *pt = (unsigned long *)guest_cr3, gpa; 448 u64 pte, offset_in_page; 449 unsigned offset; 450 451 for (l = EPT_PAGE_LEVEL; ; --l) { 452 offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK; 453 454 clear_ept_ad_pte(pml4, (u64) &pt[offset]); 455 pte = pt[offset]; 456 if (l == 1 || (l < 4 && (pte & PT_PAGE_SIZE_MASK))) 457 break; 458 if (!(pte & PT_PRESENT_MASK)) 459 return; 460 pt = (unsigned long *)(pte & PT_ADDR_MASK); 461 } 462 463 offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK; 464 offset_in_page = guest_addr & ((1 << EPT_LEVEL_SHIFT(l)) - 1); 465 gpa = (pt[offset] & PT_ADDR_MASK) | (guest_addr & offset_in_page); 466 clear_ept_ad_pte(pml4, gpa); 467 } 468 469 /* check_ept_ad : Check the content of EPT A/D bits for the page table 470 walk and the final GPA of a guest address. */ 471 void check_ept_ad(unsigned long *pml4, u64 guest_cr3, 472 unsigned long guest_addr, int expected_gpa_ad, 473 int expected_pt_ad) 474 { 475 int l; 476 unsigned long *pt = (unsigned long *)guest_cr3, gpa; 477 u64 ept_pte, pte, offset_in_page; 478 unsigned offset; 479 bool bad_pt_ad = false; 480 481 for (l = EPT_PAGE_LEVEL; ; --l) { 482 offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK; 483 484 ept_pte = get_ept_pte(pml4, (u64) &pt[offset], 1); 485 if (ept_pte == 0) 486 return; 487 488 if (!bad_pt_ad) { 489 bad_pt_ad |= (ept_pte & (EPT_ACCESS_FLAG|EPT_DIRTY_FLAG)) != expected_pt_ad; 490 if (bad_pt_ad) 491 report("EPT - guest level %d page table A=%d/D=%d", 492 false, l, 493 !!(expected_pt_ad & EPT_ACCESS_FLAG), 494 !!(expected_pt_ad & EPT_DIRTY_FLAG)); 495 } 496 497 pte = pt[offset]; 498 if (l == 1 || (l < 4 && (pte & PT_PAGE_SIZE_MASK))) 499 break; 500 if (!(pte & PT_PRESENT_MASK)) 501 return; 502 pt = (unsigned long *)(pte & PT_ADDR_MASK); 503 } 504 505 if (!bad_pt_ad) 506 report("EPT - guest page table structures A=%d/D=%d", 507 true, 508 !!(expected_pt_ad & EPT_ACCESS_FLAG), 509 !!(expected_pt_ad & EPT_DIRTY_FLAG)); 510 511 offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK; 512 offset_in_page = guest_addr & ((1 << EPT_LEVEL_SHIFT(l)) - 1); 513 gpa = (pt[offset] & PT_ADDR_MASK) | (guest_addr & offset_in_page); 514 515 ept_pte = get_ept_pte(pml4, gpa, 1); 516 report("EPT - guest physical address A=%d/D=%d", 517 (ept_pte & (EPT_ACCESS_FLAG|EPT_DIRTY_FLAG)) == expected_gpa_ad, 518 !!(expected_gpa_ad & EPT_ACCESS_FLAG), 519 !!(expected_gpa_ad & EPT_DIRTY_FLAG)); 520 } 521 522 523 void ept_sync(int type, u64 eptp) 524 { 525 switch (type) { 526 case INVEPT_SINGLE: 527 if (ept_vpid.val & EPT_CAP_INVEPT_SINGLE) { 528 invept(INVEPT_SINGLE, eptp); 529 break; 530 } 531 /* else fall through */ 532 case INVEPT_GLOBAL: 533 if (ept_vpid.val & EPT_CAP_INVEPT_ALL) { 534 invept(INVEPT_GLOBAL, eptp); 535 break; 536 } 537 /* else fall through */ 538 default: 539 printf("WARNING: invept is not supported!\n"); 540 } 541 } 542 543 int set_ept_pte(unsigned long *pml4, unsigned long guest_addr, 544 int level, u64 pte_val) 545 { 546 int l; 547 unsigned long *pt = pml4; 548 unsigned offset; 549 550 if (level < 1 || level > 3) 551 return -1; 552 for (l = EPT_PAGE_LEVEL; ; --l) { 553 offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK; 554 if (l == level) 555 break; 556 if (!(pt[offset] & (EPT_PRESENT))) 557 return -1; 558 pt = (unsigned long *)(pt[offset] & EPT_ADDR_MASK); 559 } 560 offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK; 561 pt[offset] = pte_val; 562 return 0; 563 } 564 565 void vpid_sync(int type, u16 vpid) 566 { 567 switch(type) { 568 case INVVPID_SINGLE: 569 if (ept_vpid.val & VPID_CAP_INVVPID_SINGLE) { 570 invvpid(INVVPID_SINGLE, vpid, 0); 571 break; 572 } 573 case INVVPID_ALL: 574 if (ept_vpid.val & VPID_CAP_INVVPID_ALL) { 575 invvpid(INVVPID_ALL, vpid, 0); 576 break; 577 } 578 default: 579 printf("WARNING: invvpid is not supported\n"); 580 } 581 } 582 583 static void init_vmcs_ctrl(void) 584 { 585 /* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */ 586 /* 26.2.1.1 */ 587 vmcs_write(PIN_CONTROLS, ctrl_pin); 588 /* Disable VMEXIT of IO instruction */ 589 vmcs_write(CPU_EXEC_CTRL0, ctrl_cpu[0]); 590 if (ctrl_cpu_rev[0].set & CPU_SECONDARY) { 591 ctrl_cpu[1] = (ctrl_cpu[1] | ctrl_cpu_rev[1].set) & 592 ctrl_cpu_rev[1].clr; 593 vmcs_write(CPU_EXEC_CTRL1, ctrl_cpu[1]); 594 } 595 vmcs_write(CR3_TARGET_COUNT, 0); 596 vmcs_write(VPID, ++vpid_cnt); 597 } 598 599 static void init_vmcs_host(void) 600 { 601 /* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */ 602 /* 26.2.1.2 */ 603 vmcs_write(HOST_EFER, rdmsr(MSR_EFER)); 604 605 /* 26.2.1.3 */ 606 vmcs_write(ENT_CONTROLS, ctrl_enter); 607 vmcs_write(EXI_CONTROLS, ctrl_exit); 608 609 /* 26.2.2 */ 610 vmcs_write(HOST_CR0, read_cr0()); 611 vmcs_write(HOST_CR3, read_cr3()); 612 vmcs_write(HOST_CR4, read_cr4()); 613 vmcs_write(HOST_SYSENTER_EIP, (u64)(&entry_sysenter)); 614 vmcs_write(HOST_SYSENTER_CS, KERNEL_CS); 615 616 /* 26.2.3 */ 617 vmcs_write(HOST_SEL_CS, KERNEL_CS); 618 vmcs_write(HOST_SEL_SS, KERNEL_DS); 619 vmcs_write(HOST_SEL_DS, KERNEL_DS); 620 vmcs_write(HOST_SEL_ES, KERNEL_DS); 621 vmcs_write(HOST_SEL_FS, KERNEL_DS); 622 vmcs_write(HOST_SEL_GS, KERNEL_DS); 623 vmcs_write(HOST_SEL_TR, TSS_MAIN); 624 vmcs_write(HOST_BASE_TR, tss_descr.base); 625 vmcs_write(HOST_BASE_GDTR, gdt64_desc.base); 626 vmcs_write(HOST_BASE_IDTR, idt_descr.base); 627 vmcs_write(HOST_BASE_FS, 0); 628 vmcs_write(HOST_BASE_GS, 0); 629 630 /* Set other vmcs area */ 631 vmcs_write(PF_ERROR_MASK, 0); 632 vmcs_write(PF_ERROR_MATCH, 0); 633 vmcs_write(VMCS_LINK_PTR, ~0ul); 634 vmcs_write(VMCS_LINK_PTR_HI, ~0ul); 635 vmcs_write(HOST_RIP, (u64)(&vmx_return)); 636 } 637 638 static void init_vmcs_guest(void) 639 { 640 /* 26.3 CHECKING AND LOADING GUEST STATE */ 641 ulong guest_cr0, guest_cr4, guest_cr3; 642 /* 26.3.1.1 */ 643 guest_cr0 = read_cr0(); 644 guest_cr4 = read_cr4(); 645 guest_cr3 = read_cr3(); 646 if (ctrl_enter & ENT_GUEST_64) { 647 guest_cr0 |= X86_CR0_PG; 648 guest_cr4 |= X86_CR4_PAE; 649 } 650 if ((ctrl_enter & ENT_GUEST_64) == 0) 651 guest_cr4 &= (~X86_CR4_PCIDE); 652 if (guest_cr0 & X86_CR0_PG) 653 guest_cr0 |= X86_CR0_PE; 654 vmcs_write(GUEST_CR0, guest_cr0); 655 vmcs_write(GUEST_CR3, guest_cr3); 656 vmcs_write(GUEST_CR4, guest_cr4); 657 vmcs_write(GUEST_SYSENTER_CS, KERNEL_CS); 658 vmcs_write(GUEST_SYSENTER_ESP, 659 (u64)(guest_syscall_stack + PAGE_SIZE - 1)); 660 vmcs_write(GUEST_SYSENTER_EIP, (u64)(&entry_sysenter)); 661 vmcs_write(GUEST_DR7, 0); 662 vmcs_write(GUEST_EFER, rdmsr(MSR_EFER)); 663 664 /* 26.3.1.2 */ 665 vmcs_write(GUEST_SEL_CS, KERNEL_CS); 666 vmcs_write(GUEST_SEL_SS, KERNEL_DS); 667 vmcs_write(GUEST_SEL_DS, KERNEL_DS); 668 vmcs_write(GUEST_SEL_ES, KERNEL_DS); 669 vmcs_write(GUEST_SEL_FS, KERNEL_DS); 670 vmcs_write(GUEST_SEL_GS, KERNEL_DS); 671 vmcs_write(GUEST_SEL_TR, TSS_MAIN); 672 vmcs_write(GUEST_SEL_LDTR, 0); 673 674 vmcs_write(GUEST_BASE_CS, 0); 675 vmcs_write(GUEST_BASE_ES, 0); 676 vmcs_write(GUEST_BASE_SS, 0); 677 vmcs_write(GUEST_BASE_DS, 0); 678 vmcs_write(GUEST_BASE_FS, 0); 679 vmcs_write(GUEST_BASE_GS, 0); 680 vmcs_write(GUEST_BASE_TR, tss_descr.base); 681 vmcs_write(GUEST_BASE_LDTR, 0); 682 683 vmcs_write(GUEST_LIMIT_CS, 0xFFFFFFFF); 684 vmcs_write(GUEST_LIMIT_DS, 0xFFFFFFFF); 685 vmcs_write(GUEST_LIMIT_ES, 0xFFFFFFFF); 686 vmcs_write(GUEST_LIMIT_SS, 0xFFFFFFFF); 687 vmcs_write(GUEST_LIMIT_FS, 0xFFFFFFFF); 688 vmcs_write(GUEST_LIMIT_GS, 0xFFFFFFFF); 689 vmcs_write(GUEST_LIMIT_LDTR, 0xffff); 690 vmcs_write(GUEST_LIMIT_TR, tss_descr.limit); 691 692 vmcs_write(GUEST_AR_CS, 0xa09b); 693 vmcs_write(GUEST_AR_DS, 0xc093); 694 vmcs_write(GUEST_AR_ES, 0xc093); 695 vmcs_write(GUEST_AR_FS, 0xc093); 696 vmcs_write(GUEST_AR_GS, 0xc093); 697 vmcs_write(GUEST_AR_SS, 0xc093); 698 vmcs_write(GUEST_AR_LDTR, 0x82); 699 vmcs_write(GUEST_AR_TR, 0x8b); 700 701 /* 26.3.1.3 */ 702 vmcs_write(GUEST_BASE_GDTR, gdt64_desc.base); 703 vmcs_write(GUEST_BASE_IDTR, idt_descr.base); 704 vmcs_write(GUEST_LIMIT_GDTR, gdt64_desc.limit); 705 vmcs_write(GUEST_LIMIT_IDTR, idt_descr.limit); 706 707 /* 26.3.1.4 */ 708 vmcs_write(GUEST_RIP, (u64)(&guest_entry)); 709 vmcs_write(GUEST_RSP, (u64)(guest_stack + PAGE_SIZE - 1)); 710 vmcs_write(GUEST_RFLAGS, 0x2); 711 712 /* 26.3.1.5 */ 713 vmcs_write(GUEST_ACTV_STATE, ACTV_ACTIVE); 714 vmcs_write(GUEST_INTR_STATE, 0); 715 } 716 717 static int init_vmcs(struct vmcs **vmcs) 718 { 719 *vmcs = alloc_page(); 720 memset(*vmcs, 0, PAGE_SIZE); 721 (*vmcs)->revision_id = basic.revision; 722 /* vmclear first to init vmcs */ 723 if (vmcs_clear(*vmcs)) { 724 printf("%s : vmcs_clear error\n", __func__); 725 return 1; 726 } 727 728 if (make_vmcs_current(*vmcs)) { 729 printf("%s : make_vmcs_current error\n", __func__); 730 return 1; 731 } 732 733 /* All settings to pin/exit/enter/cpu 734 control fields should be placed here */ 735 ctrl_pin |= PIN_EXTINT | PIN_NMI | PIN_VIRT_NMI; 736 ctrl_exit = EXI_LOAD_EFER | EXI_HOST_64; 737 ctrl_enter = (ENT_LOAD_EFER | ENT_GUEST_64); 738 /* DIsable IO instruction VMEXIT now */ 739 ctrl_cpu[0] &= (~(CPU_IO | CPU_IO_BITMAP)); 740 ctrl_cpu[1] = 0; 741 742 ctrl_pin = (ctrl_pin | ctrl_pin_rev.set) & ctrl_pin_rev.clr; 743 ctrl_enter = (ctrl_enter | ctrl_enter_rev.set) & ctrl_enter_rev.clr; 744 ctrl_exit = (ctrl_exit | ctrl_exit_rev.set) & ctrl_exit_rev.clr; 745 ctrl_cpu[0] = (ctrl_cpu[0] | ctrl_cpu_rev[0].set) & ctrl_cpu_rev[0].clr; 746 747 init_vmcs_ctrl(); 748 init_vmcs_host(); 749 init_vmcs_guest(); 750 return 0; 751 } 752 753 static void init_vmx(void) 754 { 755 ulong fix_cr0_set, fix_cr0_clr; 756 ulong fix_cr4_set, fix_cr4_clr; 757 758 vmxon_region = alloc_page(); 759 memset(vmxon_region, 0, PAGE_SIZE); 760 761 fix_cr0_set = rdmsr(MSR_IA32_VMX_CR0_FIXED0); 762 fix_cr0_clr = rdmsr(MSR_IA32_VMX_CR0_FIXED1); 763 fix_cr4_set = rdmsr(MSR_IA32_VMX_CR4_FIXED0); 764 fix_cr4_clr = rdmsr(MSR_IA32_VMX_CR4_FIXED1); 765 basic.val = rdmsr(MSR_IA32_VMX_BASIC); 766 ctrl_pin_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PIN 767 : MSR_IA32_VMX_PINBASED_CTLS); 768 ctrl_exit_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_EXIT 769 : MSR_IA32_VMX_EXIT_CTLS); 770 ctrl_enter_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_ENTRY 771 : MSR_IA32_VMX_ENTRY_CTLS); 772 ctrl_cpu_rev[0].val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PROC 773 : MSR_IA32_VMX_PROCBASED_CTLS); 774 if ((ctrl_cpu_rev[0].clr & CPU_SECONDARY) != 0) 775 ctrl_cpu_rev[1].val = rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2); 776 else 777 ctrl_cpu_rev[1].val = 0; 778 if ((ctrl_cpu_rev[1].clr & (CPU_EPT | CPU_VPID)) != 0) 779 ept_vpid.val = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP); 780 else 781 ept_vpid.val = 0; 782 783 write_cr0((read_cr0() & fix_cr0_clr) | fix_cr0_set); 784 write_cr4((read_cr4() & fix_cr4_clr) | fix_cr4_set | X86_CR4_VMXE); 785 786 *vmxon_region = basic.revision; 787 788 guest_stack = alloc_page(); 789 memset(guest_stack, 0, PAGE_SIZE); 790 guest_syscall_stack = alloc_page(); 791 memset(guest_syscall_stack, 0, PAGE_SIZE); 792 } 793 794 static void do_vmxon_off(void *data) 795 { 796 vmx_on(); 797 vmx_off(); 798 } 799 800 static void do_write_feature_control(void *data) 801 { 802 wrmsr(MSR_IA32_FEATURE_CONTROL, 0); 803 } 804 805 static int test_vmx_feature_control(void) 806 { 807 u64 ia32_feature_control; 808 bool vmx_enabled; 809 810 ia32_feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL); 811 vmx_enabled = ((ia32_feature_control & 0x5) == 0x5); 812 if ((ia32_feature_control & 0x5) == 0x5) { 813 printf("VMX enabled and locked by BIOS\n"); 814 return 0; 815 } else if (ia32_feature_control & 0x1) { 816 printf("ERROR: VMX locked out by BIOS!?\n"); 817 return 1; 818 } 819 820 wrmsr(MSR_IA32_FEATURE_CONTROL, 0); 821 report("test vmxon with FEATURE_CONTROL cleared", 822 test_for_exception(GP_VECTOR, &do_vmxon_off, NULL)); 823 824 wrmsr(MSR_IA32_FEATURE_CONTROL, 0x4); 825 report("test vmxon without FEATURE_CONTROL lock", 826 test_for_exception(GP_VECTOR, &do_vmxon_off, NULL)); 827 828 wrmsr(MSR_IA32_FEATURE_CONTROL, 0x5); 829 vmx_enabled = ((rdmsr(MSR_IA32_FEATURE_CONTROL) & 0x5) == 0x5); 830 report("test enable VMX in FEATURE_CONTROL", vmx_enabled); 831 832 report("test FEATURE_CONTROL lock bit", 833 test_for_exception(GP_VECTOR, &do_write_feature_control, NULL)); 834 835 return !vmx_enabled; 836 } 837 838 static int test_vmxon(void) 839 { 840 int ret, ret1; 841 u64 *tmp_region = vmxon_region; 842 int width = cpuid_maxphyaddr(); 843 844 /* Unaligned page access */ 845 vmxon_region = (u64 *)((intptr_t)vmxon_region + 1); 846 ret1 = vmx_on(); 847 report("test vmxon with unaligned vmxon region", ret1); 848 if (!ret1) { 849 ret = 1; 850 goto out; 851 } 852 853 /* gpa bits beyond physical address width are set*/ 854 vmxon_region = (u64 *)((intptr_t)tmp_region | ((u64)1 << (width+1))); 855 ret1 = vmx_on(); 856 report("test vmxon with bits set beyond physical address width", ret1); 857 if (!ret1) { 858 ret = 1; 859 goto out; 860 } 861 862 /* invalid revision indentifier */ 863 vmxon_region = tmp_region; 864 *vmxon_region = 0xba9da9; 865 ret1 = vmx_on(); 866 report("test vmxon with invalid revision identifier", ret1); 867 if (!ret1) { 868 ret = 1; 869 goto out; 870 } 871 872 /* and finally a valid region */ 873 *vmxon_region = basic.revision; 874 ret = vmx_on(); 875 report("test vmxon with valid vmxon region", !ret); 876 877 out: 878 return ret; 879 } 880 881 static void test_vmptrld(void) 882 { 883 struct vmcs *vmcs, *tmp_root; 884 int width = cpuid_maxphyaddr(); 885 886 vmcs = alloc_page(); 887 vmcs->revision_id = basic.revision; 888 889 /* Unaligned page access */ 890 tmp_root = (struct vmcs *)((intptr_t)vmcs + 1); 891 report("test vmptrld with unaligned vmcs", 892 make_vmcs_current(tmp_root) == 1); 893 894 /* gpa bits beyond physical address width are set*/ 895 tmp_root = (struct vmcs *)((intptr_t)vmcs | 896 ((u64)1 << (width+1))); 897 report("test vmptrld with vmcs address bits set beyond physical address width", 898 make_vmcs_current(tmp_root) == 1); 899 900 /* Pass VMXON region */ 901 tmp_root = (struct vmcs *)vmxon_region; 902 report("test vmptrld with vmxon region", 903 make_vmcs_current(tmp_root) == 1); 904 905 report("test vmptrld with valid vmcs region", make_vmcs_current(vmcs) == 0); 906 } 907 908 static void test_vmptrst(void) 909 { 910 int ret; 911 struct vmcs *vmcs1, *vmcs2; 912 913 vmcs1 = alloc_page(); 914 memset(vmcs1, 0, PAGE_SIZE); 915 init_vmcs(&vmcs1); 916 ret = vmcs_save(&vmcs2); 917 report("test vmptrst", (!ret) && (vmcs1 == vmcs2)); 918 } 919 920 struct vmx_ctl_msr { 921 const char *name; 922 u32 index, true_index; 923 u32 default1; 924 } vmx_ctl_msr[] = { 925 { "MSR_IA32_VMX_PINBASED_CTLS", MSR_IA32_VMX_PINBASED_CTLS, 926 MSR_IA32_VMX_TRUE_PIN, 0x16 }, 927 { "MSR_IA32_VMX_PROCBASED_CTLS", MSR_IA32_VMX_PROCBASED_CTLS, 928 MSR_IA32_VMX_TRUE_PROC, 0x401e172 }, 929 { "MSR_IA32_VMX_PROCBASED_CTLS2", MSR_IA32_VMX_PROCBASED_CTLS2, 930 MSR_IA32_VMX_PROCBASED_CTLS2, 0 }, 931 { "MSR_IA32_VMX_EXIT_CTLS", MSR_IA32_VMX_EXIT_CTLS, 932 MSR_IA32_VMX_TRUE_EXIT, 0x36dff }, 933 { "MSR_IA32_VMX_ENTRY_CTLS", MSR_IA32_VMX_ENTRY_CTLS, 934 MSR_IA32_VMX_TRUE_ENTRY, 0x11ff }, 935 }; 936 937 static void test_vmx_caps(void) 938 { 939 u64 val, default1, fixed0, fixed1; 940 union vmx_ctrl_msr ctrl, true_ctrl; 941 unsigned int n; 942 bool ok; 943 944 printf("\nTest suite: VMX capability reporting\n"); 945 946 report("MSR_IA32_VMX_BASIC", 947 (basic.revision & (1ul << 31)) == 0 && 948 basic.size > 0 && basic.size <= 4096 && 949 (basic.type == 0 || basic.type == 6) && 950 basic.reserved1 == 0 && basic.reserved2 == 0); 951 952 val = rdmsr(MSR_IA32_VMX_MISC); 953 report("MSR_IA32_VMX_MISC", 954 (!(ctrl_cpu_rev[1].clr & CPU_URG) || val & (1ul << 5)) && 955 ((val >> 16) & 0x1ff) <= 256 && 956 (val & 0xc0007e00) == 0); 957 958 for (n = 0; n < ARRAY_SIZE(vmx_ctl_msr); n++) { 959 ctrl.val = rdmsr(vmx_ctl_msr[n].index); 960 default1 = vmx_ctl_msr[n].default1; 961 ok = (ctrl.set & default1) == default1; 962 ok = ok && (ctrl.set & ~ctrl.clr) == 0; 963 if (ok && basic.ctrl) { 964 true_ctrl.val = rdmsr(vmx_ctl_msr[n].true_index); 965 ok = ctrl.clr == true_ctrl.clr; 966 ok = ok && ctrl.set == (true_ctrl.set | default1); 967 } 968 report(vmx_ctl_msr[n].name, ok); 969 } 970 971 fixed0 = rdmsr(MSR_IA32_VMX_CR0_FIXED0); 972 fixed1 = rdmsr(MSR_IA32_VMX_CR0_FIXED1); 973 report("MSR_IA32_VMX_IA32_VMX_CR0_FIXED0/1", 974 ((fixed0 ^ fixed1) & ~fixed1) == 0); 975 976 fixed0 = rdmsr(MSR_IA32_VMX_CR4_FIXED0); 977 fixed1 = rdmsr(MSR_IA32_VMX_CR4_FIXED1); 978 report("MSR_IA32_VMX_IA32_VMX_CR4_FIXED0/1", 979 ((fixed0 ^ fixed1) & ~fixed1) == 0); 980 981 val = rdmsr(MSR_IA32_VMX_VMCS_ENUM); 982 report("MSR_IA32_VMX_VMCS_ENUM", 983 (val & 0x3e) >= 0x2a && 984 (val & 0xfffffffffffffc01Ull) == 0); 985 986 val = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP); 987 report("MSR_IA32_VMX_EPT_VPID_CAP", 988 (val & 0xfffff07ef98cbebeUll) == 0); 989 } 990 991 /* This function can only be called in guest */ 992 static void __attribute__((__used__)) hypercall(u32 hypercall_no) 993 { 994 u64 val = 0; 995 val = (hypercall_no & HYPERCALL_MASK) | HYPERCALL_BIT; 996 hypercall_field = val; 997 asm volatile("vmcall\n\t"); 998 } 999 1000 static bool is_hypercall() 1001 { 1002 ulong reason, hyper_bit; 1003 1004 reason = vmcs_read(EXI_REASON) & 0xff; 1005 hyper_bit = hypercall_field & HYPERCALL_BIT; 1006 if (reason == VMX_VMCALL && hyper_bit) 1007 return true; 1008 return false; 1009 } 1010 1011 static int handle_hypercall() 1012 { 1013 ulong hypercall_no; 1014 1015 hypercall_no = hypercall_field & HYPERCALL_MASK; 1016 hypercall_field = 0; 1017 switch (hypercall_no) { 1018 case HYPERCALL_VMEXIT: 1019 return VMX_TEST_VMEXIT; 1020 default: 1021 printf("ERROR : Invalid hypercall number : %ld\n", hypercall_no); 1022 } 1023 return VMX_TEST_EXIT; 1024 } 1025 1026 static int exit_handler() 1027 { 1028 int ret; 1029 1030 current->exits++; 1031 regs.rflags = vmcs_read(GUEST_RFLAGS); 1032 if (is_hypercall()) 1033 ret = handle_hypercall(); 1034 else 1035 ret = current->exit_handler(); 1036 vmcs_write(GUEST_RFLAGS, regs.rflags); 1037 1038 return ret; 1039 } 1040 1041 /* 1042 * Called if vmlaunch or vmresume fails. 1043 * @early - failure due to "VMX controls and host-state area" (26.2) 1044 * @vmlaunch - was this a vmlaunch or vmresume 1045 * @rflags - host rflags 1046 */ 1047 static int 1048 entry_failure_handler(struct vmentry_failure *failure) 1049 { 1050 if (current->entry_failure_handler) 1051 return current->entry_failure_handler(failure); 1052 else 1053 return VMX_TEST_EXIT; 1054 } 1055 1056 static int vmx_run() 1057 { 1058 unsigned long host_rflags; 1059 1060 while (1) { 1061 u32 ret; 1062 u32 fail = 0; 1063 bool entered; 1064 struct vmentry_failure failure; 1065 1066 asm volatile ( 1067 "mov %[HOST_RSP], %%rdi\n\t" 1068 "vmwrite %%rsp, %%rdi\n\t" 1069 LOAD_GPR_C 1070 "cmpb $0, %[launched]\n\t" 1071 "jne 1f\n\t" 1072 "vmlaunch\n\t" 1073 "jmp 2f\n\t" 1074 "1: " 1075 "vmresume\n\t" 1076 "2: " 1077 SAVE_GPR_C 1078 "pushf\n\t" 1079 "pop %%rdi\n\t" 1080 "mov %%rdi, %[host_rflags]\n\t" 1081 "movl $1, %[fail]\n\t" 1082 "jmp 3f\n\t" 1083 "vmx_return:\n\t" 1084 SAVE_GPR_C 1085 "3: \n\t" 1086 : [fail]"+m"(fail), [host_rflags]"=m"(host_rflags) 1087 : [launched]"m"(launched), [HOST_RSP]"i"(HOST_RSP) 1088 : "rdi", "memory", "cc" 1089 1090 ); 1091 1092 entered = !fail && !(vmcs_read(EXI_REASON) & VMX_ENTRY_FAILURE); 1093 1094 if (entered) { 1095 /* 1096 * VMCS isn't in "launched" state if there's been any 1097 * entry failure (early or otherwise). 1098 */ 1099 launched = 1; 1100 ret = exit_handler(); 1101 } else { 1102 failure.flags = host_rflags; 1103 failure.vmlaunch = !launched; 1104 failure.instr = launched ? "vmresume" : "vmlaunch"; 1105 failure.early = fail; 1106 ret = entry_failure_handler(&failure); 1107 } 1108 1109 switch (ret) { 1110 case VMX_TEST_RESUME: 1111 continue; 1112 case VMX_TEST_VMEXIT: 1113 return 0; 1114 case VMX_TEST_EXIT: 1115 break; 1116 default: 1117 printf("ERROR : Invalid %s_handler return val %d.\n", 1118 entered ? "exit" : "entry_failure", 1119 ret); 1120 break; 1121 } 1122 1123 if (entered) 1124 print_vmexit_info(); 1125 else 1126 print_vmentry_failure_info(&failure); 1127 abort(); 1128 } 1129 } 1130 1131 static int test_run(struct vmx_test *test) 1132 { 1133 if (test->name == NULL) 1134 test->name = "(no name)"; 1135 if (vmx_on()) { 1136 printf("%s : vmxon failed.\n", __func__); 1137 return 1; 1138 } 1139 init_vmcs(&(test->vmcs)); 1140 /* Directly call test->init is ok here, init_vmcs has done 1141 vmcs init, vmclear and vmptrld*/ 1142 if (test->init && test->init(test->vmcs) != VMX_TEST_START) 1143 goto out; 1144 test->exits = 0; 1145 current = test; 1146 regs = test->guest_regs; 1147 vmcs_write(GUEST_RFLAGS, regs.rflags | 0x2); 1148 launched = 0; 1149 printf("\nTest suite: %s\n", test->name); 1150 vmx_run(); 1151 out: 1152 if (vmx_off()) { 1153 printf("%s : vmxoff failed.\n", __func__); 1154 return 1; 1155 } 1156 return 0; 1157 } 1158 1159 extern struct vmx_test vmx_tests[]; 1160 1161 int main(void) 1162 { 1163 int i = 0; 1164 1165 setup_vm(); 1166 setup_idt(); 1167 hypercall_field = 0; 1168 1169 if (!(cpuid(1).c & (1 << 5))) { 1170 printf("WARNING: vmx not supported, add '-cpu host'\n"); 1171 goto exit; 1172 } 1173 init_vmx(); 1174 if (test_vmx_feature_control() != 0) 1175 goto exit; 1176 /* Set basic test ctxt the same as "null" */ 1177 current = &vmx_tests[0]; 1178 if (test_vmxon() != 0) 1179 goto exit; 1180 test_vmptrld(); 1181 test_vmclear(); 1182 test_vmptrst(); 1183 init_vmcs(&vmcs_root); 1184 if (vmx_run()) { 1185 report("test vmlaunch", 0); 1186 goto exit; 1187 } 1188 test_vmxoff(); 1189 test_vmx_caps(); 1190 1191 while (vmx_tests[++i].name != NULL) 1192 if (test_run(&vmx_tests[i])) 1193 goto exit; 1194 1195 exit: 1196 return report_summary(); 1197 } 1198