1 /* 2 * x86/vmx.c : Framework for testing nested virtualization 3 * This is a framework to test nested VMX for KVM, which 4 * started as a project of GSoC 2013. All test cases should 5 * be located in x86/vmx_tests.c and framework related 6 * functions should be in this file. 7 * 8 * How to write test cases? 9 * Add callbacks of test suite in variant "vmx_tests". You can 10 * write: 11 * 1. init function used for initializing test suite 12 * 2. main function for codes running in L2 guest, 13 * 3. exit_handler to handle vmexit of L2 to L1 14 * 4. syscall handler to handle L2 syscall vmexit 15 * 5. vmenter fail handler to handle direct failure of vmenter 16 * 6. guest_regs is loaded when vmenter and saved when 17 * vmexit, you can read and set it in exit_handler 18 * If no special function is needed for a test suite, use 19 * coressponding basic_* functions as callback. More handlers 20 * can be added to "vmx_tests", see details of "struct vmx_test" 21 * and function test_run(). 22 * 23 * Currently, vmx test framework only set up one VCPU and one 24 * concurrent guest test environment with same paging for L2 and 25 * L1. For usage of EPT, only 1:1 mapped paging is used from VFN 26 * to PFN. 27 * 28 * Author : Arthur Chunqi Li <yzt356@gmail.com> 29 */ 30 31 #include "libcflat.h" 32 #include "processor.h" 33 #include "vm.h" 34 #include "desc.h" 35 #include "vmx.h" 36 #include "msr.h" 37 #include "smp.h" 38 39 u64 *vmxon_region; 40 struct vmcs *vmcs_root; 41 u32 vpid_cnt; 42 void *guest_stack, *guest_syscall_stack; 43 u32 ctrl_pin, ctrl_enter, ctrl_exit, ctrl_cpu[2]; 44 struct regs regs; 45 struct vmx_test *current; 46 u64 hypercall_field; 47 bool launched; 48 49 union vmx_basic basic; 50 union vmx_ctrl_msr ctrl_pin_rev; 51 union vmx_ctrl_msr ctrl_cpu_rev[2]; 52 union vmx_ctrl_msr ctrl_exit_rev; 53 union vmx_ctrl_msr ctrl_enter_rev; 54 union vmx_ept_vpid ept_vpid; 55 56 extern struct descriptor_table_ptr gdt64_desc; 57 extern struct descriptor_table_ptr idt_descr; 58 extern struct descriptor_table_ptr tss_descr; 59 extern void *vmx_return; 60 extern void *entry_sysenter; 61 extern void *guest_entry; 62 63 static volatile u32 stage; 64 65 void vmx_set_test_stage(u32 s) 66 { 67 barrier(); 68 stage = s; 69 barrier(); 70 } 71 72 u32 vmx_get_test_stage(void) 73 { 74 u32 s; 75 76 barrier(); 77 s = stage; 78 barrier(); 79 return s; 80 } 81 82 void vmx_inc_test_stage(void) 83 { 84 barrier(); 85 stage++; 86 barrier(); 87 } 88 89 static int make_vmcs_current(struct vmcs *vmcs) 90 { 91 bool ret; 92 u64 rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF; 93 94 asm volatile ("push %1; popf; vmptrld %2; setbe %0" 95 : "=q" (ret) : "q" (rflags), "m" (vmcs) : "cc"); 96 return ret; 97 } 98 99 /* entry_sysenter */ 100 asm( 101 ".align 4, 0x90\n\t" 102 ".globl entry_sysenter\n\t" 103 "entry_sysenter:\n\t" 104 SAVE_GPR 105 " and $0xf, %rax\n\t" 106 " mov %rax, %rdi\n\t" 107 " call syscall_handler\n\t" 108 LOAD_GPR 109 " vmresume\n\t" 110 ); 111 112 static void __attribute__((__used__)) syscall_handler(u64 syscall_no) 113 { 114 if (current->syscall_handler) 115 current->syscall_handler(syscall_no); 116 } 117 118 static inline int vmx_on() 119 { 120 bool ret; 121 u64 rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF; 122 asm volatile ("push %1; popf; vmxon %2; setbe %0\n\t" 123 : "=q" (ret) : "q" (rflags), "m" (vmxon_region) : "cc"); 124 return ret; 125 } 126 127 static inline int vmx_off() 128 { 129 bool ret; 130 u64 rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF; 131 132 asm volatile("push %1; popf; vmxoff; setbe %0\n\t" 133 : "=q"(ret) : "q" (rflags) : "cc"); 134 return ret; 135 } 136 137 void print_vmexit_info() 138 { 139 u64 guest_rip, guest_rsp; 140 ulong reason = vmcs_read(EXI_REASON) & 0xff; 141 ulong exit_qual = vmcs_read(EXI_QUALIFICATION); 142 guest_rip = vmcs_read(GUEST_RIP); 143 guest_rsp = vmcs_read(GUEST_RSP); 144 printf("VMEXIT info:\n"); 145 printf("\tvmexit reason = %ld\n", reason); 146 printf("\texit qualification = 0x%lx\n", exit_qual); 147 printf("\tBit 31 of reason = %lx\n", (vmcs_read(EXI_REASON) >> 31) & 1); 148 printf("\tguest_rip = 0x%lx\n", guest_rip); 149 printf("\tRAX=0x%lx RBX=0x%lx RCX=0x%lx RDX=0x%lx\n", 150 regs.rax, regs.rbx, regs.rcx, regs.rdx); 151 printf("\tRSP=0x%lx RBP=0x%lx RSI=0x%lx RDI=0x%lx\n", 152 guest_rsp, regs.rbp, regs.rsi, regs.rdi); 153 printf("\tR8 =0x%lx R9 =0x%lx R10=0x%lx R11=0x%lx\n", 154 regs.r8, regs.r9, regs.r10, regs.r11); 155 printf("\tR12=0x%lx R13=0x%lx R14=0x%lx R15=0x%lx\n", 156 regs.r12, regs.r13, regs.r14, regs.r15); 157 } 158 159 void 160 print_vmentry_failure_info(struct vmentry_failure *failure) { 161 if (failure->early) { 162 printf("Early %s failure: ", failure->instr); 163 switch (failure->flags & VMX_ENTRY_FLAGS) { 164 case X86_EFLAGS_CF: 165 printf("current-VMCS pointer is not valid.\n"); 166 break; 167 case X86_EFLAGS_ZF: 168 printf("error number is %ld. See Intel 30.4.\n", 169 vmcs_read(VMX_INST_ERROR)); 170 break; 171 default: 172 printf("unexpected flags %lx!\n", failure->flags); 173 } 174 } else { 175 u64 reason = vmcs_read(EXI_REASON); 176 u64 qual = vmcs_read(EXI_QUALIFICATION); 177 178 printf("Non-early %s failure (reason=0x%lx, qual=0x%lx): ", 179 failure->instr, reason, qual); 180 181 switch (reason & 0xff) { 182 case VMX_FAIL_STATE: 183 printf("invalid guest state\n"); 184 break; 185 case VMX_FAIL_MSR: 186 printf("MSR loading\n"); 187 break; 188 case VMX_FAIL_MCHECK: 189 printf("machine-check event\n"); 190 break; 191 default: 192 printf("unexpected basic exit reason %ld\n", 193 reason & 0xff); 194 } 195 196 if (!(reason & VMX_ENTRY_FAILURE)) 197 printf("\tVMX_ENTRY_FAILURE BIT NOT SET!\n"); 198 199 if (reason & 0x7fff0000) 200 printf("\tRESERVED BITS SET!\n"); 201 } 202 } 203 204 205 static void test_vmclear(void) 206 { 207 struct vmcs *tmp_root; 208 int width = cpuid_maxphyaddr(); 209 210 /* 211 * Note- The tests below do not necessarily have a 212 * valid VMCS, but that's ok since the invalid vmcs 213 * is only used for a specific test and is discarded 214 * without touching its contents 215 */ 216 217 /* Unaligned page access */ 218 tmp_root = (struct vmcs *)((intptr_t)vmcs_root + 1); 219 report("test vmclear with unaligned vmcs", 220 vmcs_clear(tmp_root) == 1); 221 222 /* gpa bits beyond physical address width are set*/ 223 tmp_root = (struct vmcs *)((intptr_t)vmcs_root | 224 ((u64)1 << (width+1))); 225 report("test vmclear with vmcs address bits set beyond physical address width", 226 vmcs_clear(tmp_root) == 1); 227 228 /* Pass VMXON region */ 229 tmp_root = (struct vmcs *)vmxon_region; 230 report("test vmclear with vmxon region", 231 vmcs_clear(tmp_root) == 1); 232 233 /* Valid VMCS */ 234 report("test vmclear with valid vmcs region", vmcs_clear(vmcs_root) == 0); 235 236 } 237 238 static void test_vmxoff(void) 239 { 240 int ret; 241 242 ret = vmx_off(); 243 report("test vmxoff", !ret); 244 } 245 246 static void __attribute__((__used__)) guest_main(void) 247 { 248 current->guest_main(); 249 } 250 251 /* guest_entry */ 252 asm( 253 ".align 4, 0x90\n\t" 254 ".globl entry_guest\n\t" 255 "guest_entry:\n\t" 256 " call guest_main\n\t" 257 " mov $1, %edi\n\t" 258 " call hypercall\n\t" 259 ); 260 261 /* EPT paging structure related functions */ 262 /* split_large_ept_entry: Split a 2M/1G large page into 512 smaller PTEs. 263 @ptep : large page table entry to split 264 @level : level of ptep (2 or 3) 265 */ 266 static void split_large_ept_entry(unsigned long *ptep, int level) 267 { 268 unsigned long *new_pt; 269 unsigned long gpa; 270 unsigned long pte; 271 unsigned long prototype; 272 int i; 273 274 pte = *ptep; 275 assert(pte & EPT_PRESENT); 276 assert(pte & EPT_LARGE_PAGE); 277 assert(level == 2 || level == 3); 278 279 new_pt = alloc_page(); 280 assert(new_pt); 281 memset(new_pt, 0, PAGE_SIZE); 282 283 prototype = pte & ~EPT_ADDR_MASK; 284 if (level == 2) 285 prototype &= ~EPT_LARGE_PAGE; 286 287 gpa = pte & EPT_ADDR_MASK; 288 for (i = 0; i < EPT_PGDIR_ENTRIES; i++) { 289 new_pt[i] = prototype | gpa; 290 gpa += 1ul << EPT_LEVEL_SHIFT(level - 1); 291 } 292 293 pte &= ~EPT_LARGE_PAGE; 294 pte &= ~EPT_ADDR_MASK; 295 pte |= virt_to_phys(new_pt); 296 297 *ptep = pte; 298 } 299 300 /* install_ept_entry : Install a page to a given level in EPT 301 @pml4 : addr of pml4 table 302 @pte_level : level of PTE to set 303 @guest_addr : physical address of guest 304 @pte : pte value to set 305 @pt_page : address of page table, NULL for a new page 306 */ 307 void install_ept_entry(unsigned long *pml4, 308 int pte_level, 309 unsigned long guest_addr, 310 unsigned long pte, 311 unsigned long *pt_page) 312 { 313 int level; 314 unsigned long *pt = pml4; 315 unsigned offset; 316 317 for (level = EPT_PAGE_LEVEL; level > pte_level; --level) { 318 offset = (guest_addr >> EPT_LEVEL_SHIFT(level)) 319 & EPT_PGDIR_MASK; 320 if (!(pt[offset] & (EPT_PRESENT))) { 321 unsigned long *new_pt = pt_page; 322 if (!new_pt) 323 new_pt = alloc_page(); 324 else 325 pt_page = 0; 326 memset(new_pt, 0, PAGE_SIZE); 327 pt[offset] = virt_to_phys(new_pt) 328 | EPT_RA | EPT_WA | EPT_EA; 329 } else if (pt[offset] & EPT_LARGE_PAGE) 330 split_large_ept_entry(&pt[offset], level); 331 pt = phys_to_virt(pt[offset] & EPT_ADDR_MASK); 332 } 333 offset = (guest_addr >> EPT_LEVEL_SHIFT(level)) & EPT_PGDIR_MASK; 334 pt[offset] = pte; 335 } 336 337 /* Map a page, @perm is the permission of the page */ 338 void install_ept(unsigned long *pml4, 339 unsigned long phys, 340 unsigned long guest_addr, 341 u64 perm) 342 { 343 install_ept_entry(pml4, 1, guest_addr, (phys & PAGE_MASK) | perm, 0); 344 } 345 346 /* Map a 1G-size page */ 347 void install_1g_ept(unsigned long *pml4, 348 unsigned long phys, 349 unsigned long guest_addr, 350 u64 perm) 351 { 352 install_ept_entry(pml4, 3, guest_addr, 353 (phys & PAGE_MASK) | perm | EPT_LARGE_PAGE, 0); 354 } 355 356 /* Map a 2M-size page */ 357 void install_2m_ept(unsigned long *pml4, 358 unsigned long phys, 359 unsigned long guest_addr, 360 u64 perm) 361 { 362 install_ept_entry(pml4, 2, guest_addr, 363 (phys & PAGE_MASK) | perm | EPT_LARGE_PAGE, 0); 364 } 365 366 /* setup_ept_range : Setup a range of 1:1 mapped page to EPT paging structure. 367 @start : start address of guest page 368 @len : length of address to be mapped 369 @map_1g : whether 1G page map is used 370 @map_2m : whether 2M page map is used 371 @perm : permission for every page 372 */ 373 void setup_ept_range(unsigned long *pml4, unsigned long start, 374 unsigned long len, int map_1g, int map_2m, u64 perm) 375 { 376 u64 phys = start; 377 u64 max = (u64)len + (u64)start; 378 379 if (map_1g) { 380 while (phys + PAGE_SIZE_1G <= max) { 381 install_1g_ept(pml4, phys, phys, perm); 382 phys += PAGE_SIZE_1G; 383 } 384 } 385 if (map_2m) { 386 while (phys + PAGE_SIZE_2M <= max) { 387 install_2m_ept(pml4, phys, phys, perm); 388 phys += PAGE_SIZE_2M; 389 } 390 } 391 while (phys + PAGE_SIZE <= max) { 392 install_ept(pml4, phys, phys, perm); 393 phys += PAGE_SIZE; 394 } 395 } 396 397 /* get_ept_pte : Get the PTE of a given level in EPT, 398 @level == 1 means get the latest level*/ 399 unsigned long get_ept_pte(unsigned long *pml4, 400 unsigned long guest_addr, int level) 401 { 402 int l; 403 unsigned long *pt = pml4, pte; 404 unsigned offset; 405 406 if (level < 1 || level > 3) 407 return -1; 408 for (l = EPT_PAGE_LEVEL; ; --l) { 409 offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK; 410 pte = pt[offset]; 411 if (!(pte & (EPT_PRESENT))) 412 return 0; 413 if (l == level) 414 break; 415 if (l < 4 && (pte & EPT_LARGE_PAGE)) 416 return pte; 417 pt = (unsigned long *)(pte & EPT_ADDR_MASK); 418 } 419 offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK; 420 pte = pt[offset]; 421 return pte; 422 } 423 424 void ept_sync(int type, u64 eptp) 425 { 426 switch (type) { 427 case INVEPT_SINGLE: 428 if (ept_vpid.val & EPT_CAP_INVEPT_SINGLE) { 429 invept(INVEPT_SINGLE, eptp); 430 break; 431 } 432 /* else fall through */ 433 case INVEPT_GLOBAL: 434 if (ept_vpid.val & EPT_CAP_INVEPT_ALL) { 435 invept(INVEPT_GLOBAL, eptp); 436 break; 437 } 438 /* else fall through */ 439 default: 440 printf("WARNING: invept is not supported!\n"); 441 } 442 } 443 444 int set_ept_pte(unsigned long *pml4, unsigned long guest_addr, 445 int level, u64 pte_val) 446 { 447 int l; 448 unsigned long *pt = pml4; 449 unsigned offset; 450 451 if (level < 1 || level > 3) 452 return -1; 453 for (l = EPT_PAGE_LEVEL; ; --l) { 454 offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK; 455 if (l == level) 456 break; 457 if (!(pt[offset] & (EPT_PRESENT))) 458 return -1; 459 pt = (unsigned long *)(pt[offset] & EPT_ADDR_MASK); 460 } 461 offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK; 462 pt[offset] = pte_val; 463 return 0; 464 } 465 466 void vpid_sync(int type, u16 vpid) 467 { 468 switch(type) { 469 case INVVPID_SINGLE: 470 if (ept_vpid.val & VPID_CAP_INVVPID_SINGLE) { 471 invvpid(INVVPID_SINGLE, vpid, 0); 472 break; 473 } 474 case INVVPID_ALL: 475 if (ept_vpid.val & VPID_CAP_INVVPID_ALL) { 476 invvpid(INVVPID_ALL, vpid, 0); 477 break; 478 } 479 default: 480 printf("WARNING: invvpid is not supported\n"); 481 } 482 } 483 484 static void init_vmcs_ctrl(void) 485 { 486 /* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */ 487 /* 26.2.1.1 */ 488 vmcs_write(PIN_CONTROLS, ctrl_pin); 489 /* Disable VMEXIT of IO instruction */ 490 vmcs_write(CPU_EXEC_CTRL0, ctrl_cpu[0]); 491 if (ctrl_cpu_rev[0].set & CPU_SECONDARY) { 492 ctrl_cpu[1] = (ctrl_cpu[1] | ctrl_cpu_rev[1].set) & 493 ctrl_cpu_rev[1].clr; 494 vmcs_write(CPU_EXEC_CTRL1, ctrl_cpu[1]); 495 } 496 vmcs_write(CR3_TARGET_COUNT, 0); 497 vmcs_write(VPID, ++vpid_cnt); 498 } 499 500 static void init_vmcs_host(void) 501 { 502 /* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */ 503 /* 26.2.1.2 */ 504 vmcs_write(HOST_EFER, rdmsr(MSR_EFER)); 505 506 /* 26.2.1.3 */ 507 vmcs_write(ENT_CONTROLS, ctrl_enter); 508 vmcs_write(EXI_CONTROLS, ctrl_exit); 509 510 /* 26.2.2 */ 511 vmcs_write(HOST_CR0, read_cr0()); 512 vmcs_write(HOST_CR3, read_cr3()); 513 vmcs_write(HOST_CR4, read_cr4()); 514 vmcs_write(HOST_SYSENTER_EIP, (u64)(&entry_sysenter)); 515 vmcs_write(HOST_SYSENTER_CS, KERNEL_CS); 516 517 /* 26.2.3 */ 518 vmcs_write(HOST_SEL_CS, KERNEL_CS); 519 vmcs_write(HOST_SEL_SS, KERNEL_DS); 520 vmcs_write(HOST_SEL_DS, KERNEL_DS); 521 vmcs_write(HOST_SEL_ES, KERNEL_DS); 522 vmcs_write(HOST_SEL_FS, KERNEL_DS); 523 vmcs_write(HOST_SEL_GS, KERNEL_DS); 524 vmcs_write(HOST_SEL_TR, TSS_MAIN); 525 vmcs_write(HOST_BASE_TR, tss_descr.base); 526 vmcs_write(HOST_BASE_GDTR, gdt64_desc.base); 527 vmcs_write(HOST_BASE_IDTR, idt_descr.base); 528 vmcs_write(HOST_BASE_FS, 0); 529 vmcs_write(HOST_BASE_GS, 0); 530 531 /* Set other vmcs area */ 532 vmcs_write(PF_ERROR_MASK, 0); 533 vmcs_write(PF_ERROR_MATCH, 0); 534 vmcs_write(VMCS_LINK_PTR, ~0ul); 535 vmcs_write(VMCS_LINK_PTR_HI, ~0ul); 536 vmcs_write(HOST_RIP, (u64)(&vmx_return)); 537 } 538 539 static void init_vmcs_guest(void) 540 { 541 /* 26.3 CHECKING AND LOADING GUEST STATE */ 542 ulong guest_cr0, guest_cr4, guest_cr3; 543 /* 26.3.1.1 */ 544 guest_cr0 = read_cr0(); 545 guest_cr4 = read_cr4(); 546 guest_cr3 = read_cr3(); 547 if (ctrl_enter & ENT_GUEST_64) { 548 guest_cr0 |= X86_CR0_PG; 549 guest_cr4 |= X86_CR4_PAE; 550 } 551 if ((ctrl_enter & ENT_GUEST_64) == 0) 552 guest_cr4 &= (~X86_CR4_PCIDE); 553 if (guest_cr0 & X86_CR0_PG) 554 guest_cr0 |= X86_CR0_PE; 555 vmcs_write(GUEST_CR0, guest_cr0); 556 vmcs_write(GUEST_CR3, guest_cr3); 557 vmcs_write(GUEST_CR4, guest_cr4); 558 vmcs_write(GUEST_SYSENTER_CS, KERNEL_CS); 559 vmcs_write(GUEST_SYSENTER_ESP, 560 (u64)(guest_syscall_stack + PAGE_SIZE - 1)); 561 vmcs_write(GUEST_SYSENTER_EIP, (u64)(&entry_sysenter)); 562 vmcs_write(GUEST_DR7, 0); 563 vmcs_write(GUEST_EFER, rdmsr(MSR_EFER)); 564 565 /* 26.3.1.2 */ 566 vmcs_write(GUEST_SEL_CS, KERNEL_CS); 567 vmcs_write(GUEST_SEL_SS, KERNEL_DS); 568 vmcs_write(GUEST_SEL_DS, KERNEL_DS); 569 vmcs_write(GUEST_SEL_ES, KERNEL_DS); 570 vmcs_write(GUEST_SEL_FS, KERNEL_DS); 571 vmcs_write(GUEST_SEL_GS, KERNEL_DS); 572 vmcs_write(GUEST_SEL_TR, TSS_MAIN); 573 vmcs_write(GUEST_SEL_LDTR, 0); 574 575 vmcs_write(GUEST_BASE_CS, 0); 576 vmcs_write(GUEST_BASE_ES, 0); 577 vmcs_write(GUEST_BASE_SS, 0); 578 vmcs_write(GUEST_BASE_DS, 0); 579 vmcs_write(GUEST_BASE_FS, 0); 580 vmcs_write(GUEST_BASE_GS, 0); 581 vmcs_write(GUEST_BASE_TR, tss_descr.base); 582 vmcs_write(GUEST_BASE_LDTR, 0); 583 584 vmcs_write(GUEST_LIMIT_CS, 0xFFFFFFFF); 585 vmcs_write(GUEST_LIMIT_DS, 0xFFFFFFFF); 586 vmcs_write(GUEST_LIMIT_ES, 0xFFFFFFFF); 587 vmcs_write(GUEST_LIMIT_SS, 0xFFFFFFFF); 588 vmcs_write(GUEST_LIMIT_FS, 0xFFFFFFFF); 589 vmcs_write(GUEST_LIMIT_GS, 0xFFFFFFFF); 590 vmcs_write(GUEST_LIMIT_LDTR, 0xffff); 591 vmcs_write(GUEST_LIMIT_TR, tss_descr.limit); 592 593 vmcs_write(GUEST_AR_CS, 0xa09b); 594 vmcs_write(GUEST_AR_DS, 0xc093); 595 vmcs_write(GUEST_AR_ES, 0xc093); 596 vmcs_write(GUEST_AR_FS, 0xc093); 597 vmcs_write(GUEST_AR_GS, 0xc093); 598 vmcs_write(GUEST_AR_SS, 0xc093); 599 vmcs_write(GUEST_AR_LDTR, 0x82); 600 vmcs_write(GUEST_AR_TR, 0x8b); 601 602 /* 26.3.1.3 */ 603 vmcs_write(GUEST_BASE_GDTR, gdt64_desc.base); 604 vmcs_write(GUEST_BASE_IDTR, idt_descr.base); 605 vmcs_write(GUEST_LIMIT_GDTR, gdt64_desc.limit); 606 vmcs_write(GUEST_LIMIT_IDTR, idt_descr.limit); 607 608 /* 26.3.1.4 */ 609 vmcs_write(GUEST_RIP, (u64)(&guest_entry)); 610 vmcs_write(GUEST_RSP, (u64)(guest_stack + PAGE_SIZE - 1)); 611 vmcs_write(GUEST_RFLAGS, 0x2); 612 613 /* 26.3.1.5 */ 614 vmcs_write(GUEST_ACTV_STATE, ACTV_ACTIVE); 615 vmcs_write(GUEST_INTR_STATE, 0); 616 } 617 618 static int init_vmcs(struct vmcs **vmcs) 619 { 620 *vmcs = alloc_page(); 621 memset(*vmcs, 0, PAGE_SIZE); 622 (*vmcs)->revision_id = basic.revision; 623 /* vmclear first to init vmcs */ 624 if (vmcs_clear(*vmcs)) { 625 printf("%s : vmcs_clear error\n", __func__); 626 return 1; 627 } 628 629 if (make_vmcs_current(*vmcs)) { 630 printf("%s : make_vmcs_current error\n", __func__); 631 return 1; 632 } 633 634 /* All settings to pin/exit/enter/cpu 635 control fields should be placed here */ 636 ctrl_pin |= PIN_EXTINT | PIN_NMI | PIN_VIRT_NMI; 637 ctrl_exit = EXI_LOAD_EFER | EXI_HOST_64; 638 ctrl_enter = (ENT_LOAD_EFER | ENT_GUEST_64); 639 /* DIsable IO instruction VMEXIT now */ 640 ctrl_cpu[0] &= (~(CPU_IO | CPU_IO_BITMAP)); 641 ctrl_cpu[1] = 0; 642 643 ctrl_pin = (ctrl_pin | ctrl_pin_rev.set) & ctrl_pin_rev.clr; 644 ctrl_enter = (ctrl_enter | ctrl_enter_rev.set) & ctrl_enter_rev.clr; 645 ctrl_exit = (ctrl_exit | ctrl_exit_rev.set) & ctrl_exit_rev.clr; 646 ctrl_cpu[0] = (ctrl_cpu[0] | ctrl_cpu_rev[0].set) & ctrl_cpu_rev[0].clr; 647 648 init_vmcs_ctrl(); 649 init_vmcs_host(); 650 init_vmcs_guest(); 651 return 0; 652 } 653 654 static void init_vmx(void) 655 { 656 ulong fix_cr0_set, fix_cr0_clr; 657 ulong fix_cr4_set, fix_cr4_clr; 658 659 vmxon_region = alloc_page(); 660 memset(vmxon_region, 0, PAGE_SIZE); 661 662 fix_cr0_set = rdmsr(MSR_IA32_VMX_CR0_FIXED0); 663 fix_cr0_clr = rdmsr(MSR_IA32_VMX_CR0_FIXED1); 664 fix_cr4_set = rdmsr(MSR_IA32_VMX_CR4_FIXED0); 665 fix_cr4_clr = rdmsr(MSR_IA32_VMX_CR4_FIXED1); 666 basic.val = rdmsr(MSR_IA32_VMX_BASIC); 667 ctrl_pin_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PIN 668 : MSR_IA32_VMX_PINBASED_CTLS); 669 ctrl_exit_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_EXIT 670 : MSR_IA32_VMX_EXIT_CTLS); 671 ctrl_enter_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_ENTRY 672 : MSR_IA32_VMX_ENTRY_CTLS); 673 ctrl_cpu_rev[0].val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PROC 674 : MSR_IA32_VMX_PROCBASED_CTLS); 675 if ((ctrl_cpu_rev[0].clr & CPU_SECONDARY) != 0) 676 ctrl_cpu_rev[1].val = rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2); 677 else 678 ctrl_cpu_rev[1].val = 0; 679 if ((ctrl_cpu_rev[1].clr & (CPU_EPT | CPU_VPID)) != 0) 680 ept_vpid.val = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP); 681 else 682 ept_vpid.val = 0; 683 684 write_cr0((read_cr0() & fix_cr0_clr) | fix_cr0_set); 685 write_cr4((read_cr4() & fix_cr4_clr) | fix_cr4_set | X86_CR4_VMXE); 686 687 *vmxon_region = basic.revision; 688 689 guest_stack = alloc_page(); 690 memset(guest_stack, 0, PAGE_SIZE); 691 guest_syscall_stack = alloc_page(); 692 memset(guest_syscall_stack, 0, PAGE_SIZE); 693 } 694 695 static void do_vmxon_off(void *data) 696 { 697 vmx_on(); 698 vmx_off(); 699 } 700 701 static void do_write_feature_control(void *data) 702 { 703 wrmsr(MSR_IA32_FEATURE_CONTROL, 0); 704 } 705 706 static int test_vmx_feature_control(void) 707 { 708 u64 ia32_feature_control; 709 bool vmx_enabled; 710 711 ia32_feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL); 712 vmx_enabled = ((ia32_feature_control & 0x5) == 0x5); 713 if ((ia32_feature_control & 0x5) == 0x5) { 714 printf("VMX enabled and locked by BIOS\n"); 715 return 0; 716 } else if (ia32_feature_control & 0x1) { 717 printf("ERROR: VMX locked out by BIOS!?\n"); 718 return 1; 719 } 720 721 wrmsr(MSR_IA32_FEATURE_CONTROL, 0); 722 report("test vmxon with FEATURE_CONTROL cleared", 723 test_for_exception(GP_VECTOR, &do_vmxon_off, NULL)); 724 725 wrmsr(MSR_IA32_FEATURE_CONTROL, 0x4); 726 report("test vmxon without FEATURE_CONTROL lock", 727 test_for_exception(GP_VECTOR, &do_vmxon_off, NULL)); 728 729 wrmsr(MSR_IA32_FEATURE_CONTROL, 0x5); 730 vmx_enabled = ((rdmsr(MSR_IA32_FEATURE_CONTROL) & 0x5) == 0x5); 731 report("test enable VMX in FEATURE_CONTROL", vmx_enabled); 732 733 report("test FEATURE_CONTROL lock bit", 734 test_for_exception(GP_VECTOR, &do_write_feature_control, NULL)); 735 736 return !vmx_enabled; 737 } 738 739 static int test_vmxon(void) 740 { 741 int ret, ret1; 742 u64 *tmp_region = vmxon_region; 743 int width = cpuid_maxphyaddr(); 744 745 /* Unaligned page access */ 746 vmxon_region = (u64 *)((intptr_t)vmxon_region + 1); 747 ret1 = vmx_on(); 748 report("test vmxon with unaligned vmxon region", ret1); 749 if (!ret1) { 750 ret = 1; 751 goto out; 752 } 753 754 /* gpa bits beyond physical address width are set*/ 755 vmxon_region = (u64 *)((intptr_t)tmp_region | ((u64)1 << (width+1))); 756 ret1 = vmx_on(); 757 report("test vmxon with bits set beyond physical address width", ret1); 758 if (!ret1) { 759 ret = 1; 760 goto out; 761 } 762 763 /* invalid revision indentifier */ 764 vmxon_region = tmp_region; 765 *vmxon_region = 0xba9da9; 766 ret1 = vmx_on(); 767 report("test vmxon with invalid revision identifier", ret1); 768 if (!ret1) { 769 ret = 1; 770 goto out; 771 } 772 773 /* and finally a valid region */ 774 *vmxon_region = basic.revision; 775 ret = vmx_on(); 776 report("test vmxon with valid vmxon region", !ret); 777 778 out: 779 return ret; 780 } 781 782 static void test_vmptrld(void) 783 { 784 struct vmcs *vmcs, *tmp_root; 785 int width = cpuid_maxphyaddr(); 786 787 vmcs = alloc_page(); 788 vmcs->revision_id = basic.revision; 789 790 /* Unaligned page access */ 791 tmp_root = (struct vmcs *)((intptr_t)vmcs + 1); 792 report("test vmptrld with unaligned vmcs", 793 make_vmcs_current(tmp_root) == 1); 794 795 /* gpa bits beyond physical address width are set*/ 796 tmp_root = (struct vmcs *)((intptr_t)vmcs | 797 ((u64)1 << (width+1))); 798 report("test vmptrld with vmcs address bits set beyond physical address width", 799 make_vmcs_current(tmp_root) == 1); 800 801 /* Pass VMXON region */ 802 tmp_root = (struct vmcs *)vmxon_region; 803 report("test vmptrld with vmxon region", 804 make_vmcs_current(tmp_root) == 1); 805 806 report("test vmptrld with valid vmcs region", make_vmcs_current(vmcs) == 0); 807 } 808 809 static void test_vmptrst(void) 810 { 811 int ret; 812 struct vmcs *vmcs1, *vmcs2; 813 814 vmcs1 = alloc_page(); 815 memset(vmcs1, 0, PAGE_SIZE); 816 init_vmcs(&vmcs1); 817 ret = vmcs_save(&vmcs2); 818 report("test vmptrst", (!ret) && (vmcs1 == vmcs2)); 819 } 820 821 struct vmx_ctl_msr { 822 const char *name; 823 u32 index, true_index; 824 u32 default1; 825 } vmx_ctl_msr[] = { 826 { "MSR_IA32_VMX_PINBASED_CTLS", MSR_IA32_VMX_PINBASED_CTLS, 827 MSR_IA32_VMX_TRUE_PIN, 0x16 }, 828 { "MSR_IA32_VMX_PROCBASED_CTLS", MSR_IA32_VMX_PROCBASED_CTLS, 829 MSR_IA32_VMX_TRUE_PROC, 0x401e172 }, 830 { "MSR_IA32_VMX_PROCBASED_CTLS2", MSR_IA32_VMX_PROCBASED_CTLS2, 831 MSR_IA32_VMX_PROCBASED_CTLS2, 0 }, 832 { "MSR_IA32_VMX_EXIT_CTLS", MSR_IA32_VMX_EXIT_CTLS, 833 MSR_IA32_VMX_TRUE_EXIT, 0x36dff }, 834 { "MSR_IA32_VMX_ENTRY_CTLS", MSR_IA32_VMX_ENTRY_CTLS, 835 MSR_IA32_VMX_TRUE_ENTRY, 0x11ff }, 836 }; 837 838 static void test_vmx_caps(void) 839 { 840 u64 val, default1, fixed0, fixed1; 841 union vmx_ctrl_msr ctrl, true_ctrl; 842 unsigned int n; 843 bool ok; 844 845 printf("\nTest suite: VMX capability reporting\n"); 846 847 report("MSR_IA32_VMX_BASIC", 848 (basic.revision & (1ul << 31)) == 0 && 849 basic.size > 0 && basic.size <= 4096 && 850 (basic.type == 0 || basic.type == 6) && 851 basic.reserved1 == 0 && basic.reserved2 == 0); 852 853 val = rdmsr(MSR_IA32_VMX_MISC); 854 report("MSR_IA32_VMX_MISC", 855 (!(ctrl_cpu_rev[1].clr & CPU_URG) || val & (1ul << 5)) && 856 ((val >> 16) & 0x1ff) <= 256 && 857 (val & 0xc0007e00) == 0); 858 859 for (n = 0; n < ARRAY_SIZE(vmx_ctl_msr); n++) { 860 ctrl.val = rdmsr(vmx_ctl_msr[n].index); 861 default1 = vmx_ctl_msr[n].default1; 862 ok = (ctrl.set & default1) == default1; 863 ok = ok && (ctrl.set & ~ctrl.clr) == 0; 864 if (ok && basic.ctrl) { 865 true_ctrl.val = rdmsr(vmx_ctl_msr[n].true_index); 866 ok = ctrl.clr == true_ctrl.clr; 867 ok = ok && ctrl.set == (true_ctrl.set | default1); 868 } 869 report(vmx_ctl_msr[n].name, ok); 870 } 871 872 fixed0 = rdmsr(MSR_IA32_VMX_CR0_FIXED0); 873 fixed1 = rdmsr(MSR_IA32_VMX_CR0_FIXED1); 874 report("MSR_IA32_VMX_IA32_VMX_CR0_FIXED0/1", 875 ((fixed0 ^ fixed1) & ~fixed1) == 0); 876 877 fixed0 = rdmsr(MSR_IA32_VMX_CR4_FIXED0); 878 fixed1 = rdmsr(MSR_IA32_VMX_CR4_FIXED1); 879 report("MSR_IA32_VMX_IA32_VMX_CR4_FIXED0/1", 880 ((fixed0 ^ fixed1) & ~fixed1) == 0); 881 882 val = rdmsr(MSR_IA32_VMX_VMCS_ENUM); 883 report("MSR_IA32_VMX_VMCS_ENUM", 884 (val & 0x3e) >= 0x2a && 885 (val & 0xfffffffffffffc01Ull) == 0); 886 887 val = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP); 888 report("MSR_IA32_VMX_EPT_VPID_CAP", 889 (val & 0xfffff07ef9eebebeUll) == 0); 890 } 891 892 /* This function can only be called in guest */ 893 static void __attribute__((__used__)) hypercall(u32 hypercall_no) 894 { 895 u64 val = 0; 896 val = (hypercall_no & HYPERCALL_MASK) | HYPERCALL_BIT; 897 hypercall_field = val; 898 asm volatile("vmcall\n\t"); 899 } 900 901 static bool is_hypercall() 902 { 903 ulong reason, hyper_bit; 904 905 reason = vmcs_read(EXI_REASON) & 0xff; 906 hyper_bit = hypercall_field & HYPERCALL_BIT; 907 if (reason == VMX_VMCALL && hyper_bit) 908 return true; 909 return false; 910 } 911 912 static int handle_hypercall() 913 { 914 ulong hypercall_no; 915 916 hypercall_no = hypercall_field & HYPERCALL_MASK; 917 hypercall_field = 0; 918 switch (hypercall_no) { 919 case HYPERCALL_VMEXIT: 920 return VMX_TEST_VMEXIT; 921 default: 922 printf("ERROR : Invalid hypercall number : %ld\n", hypercall_no); 923 } 924 return VMX_TEST_EXIT; 925 } 926 927 static int exit_handler() 928 { 929 int ret; 930 931 current->exits++; 932 regs.rflags = vmcs_read(GUEST_RFLAGS); 933 if (is_hypercall()) 934 ret = handle_hypercall(); 935 else 936 ret = current->exit_handler(); 937 vmcs_write(GUEST_RFLAGS, regs.rflags); 938 939 return ret; 940 } 941 942 /* 943 * Called if vmlaunch or vmresume fails. 944 * @early - failure due to "VMX controls and host-state area" (26.2) 945 * @vmlaunch - was this a vmlaunch or vmresume 946 * @rflags - host rflags 947 */ 948 static int 949 entry_failure_handler(struct vmentry_failure *failure) 950 { 951 if (current->entry_failure_handler) 952 return current->entry_failure_handler(failure); 953 else 954 return VMX_TEST_EXIT; 955 } 956 957 static int vmx_run() 958 { 959 unsigned long host_rflags; 960 961 while (1) { 962 u32 ret; 963 u32 fail = 0; 964 bool entered; 965 struct vmentry_failure failure; 966 967 asm volatile ( 968 "mov %[HOST_RSP], %%rdi\n\t" 969 "vmwrite %%rsp, %%rdi\n\t" 970 LOAD_GPR_C 971 "cmpb $0, %[launched]\n\t" 972 "jne 1f\n\t" 973 "vmlaunch\n\t" 974 "jmp 2f\n\t" 975 "1: " 976 "vmresume\n\t" 977 "2: " 978 SAVE_GPR_C 979 "pushf\n\t" 980 "pop %%rdi\n\t" 981 "mov %%rdi, %[host_rflags]\n\t" 982 "movl $1, %[fail]\n\t" 983 "jmp 3f\n\t" 984 "vmx_return:\n\t" 985 SAVE_GPR_C 986 "3: \n\t" 987 : [fail]"+m"(fail), [host_rflags]"=m"(host_rflags) 988 : [launched]"m"(launched), [HOST_RSP]"i"(HOST_RSP) 989 : "rdi", "memory", "cc" 990 991 ); 992 993 entered = !fail && !(vmcs_read(EXI_REASON) & VMX_ENTRY_FAILURE); 994 995 if (entered) { 996 /* 997 * VMCS isn't in "launched" state if there's been any 998 * entry failure (early or otherwise). 999 */ 1000 launched = 1; 1001 ret = exit_handler(); 1002 } else { 1003 failure.flags = host_rflags; 1004 failure.vmlaunch = !launched; 1005 failure.instr = launched ? "vmresume" : "vmlaunch"; 1006 failure.early = fail; 1007 ret = entry_failure_handler(&failure); 1008 } 1009 1010 switch (ret) { 1011 case VMX_TEST_RESUME: 1012 continue; 1013 case VMX_TEST_VMEXIT: 1014 return 0; 1015 case VMX_TEST_EXIT: 1016 break; 1017 default: 1018 printf("ERROR : Invalid %s_handler return val %d.\n", 1019 entered ? "exit" : "entry_failure", 1020 ret); 1021 break; 1022 } 1023 1024 if (entered) 1025 print_vmexit_info(); 1026 else 1027 print_vmentry_failure_info(&failure); 1028 abort(); 1029 } 1030 } 1031 1032 static int test_run(struct vmx_test *test) 1033 { 1034 if (test->name == NULL) 1035 test->name = "(no name)"; 1036 if (vmx_on()) { 1037 printf("%s : vmxon failed.\n", __func__); 1038 return 1; 1039 } 1040 init_vmcs(&(test->vmcs)); 1041 /* Directly call test->init is ok here, init_vmcs has done 1042 vmcs init, vmclear and vmptrld*/ 1043 if (test->init && test->init(test->vmcs) != VMX_TEST_START) 1044 goto out; 1045 test->exits = 0; 1046 current = test; 1047 regs = test->guest_regs; 1048 vmcs_write(GUEST_RFLAGS, regs.rflags | 0x2); 1049 launched = 0; 1050 printf("\nTest suite: %s\n", test->name); 1051 vmx_run(); 1052 out: 1053 if (vmx_off()) { 1054 printf("%s : vmxoff failed.\n", __func__); 1055 return 1; 1056 } 1057 return 0; 1058 } 1059 1060 extern struct vmx_test vmx_tests[]; 1061 1062 int main(void) 1063 { 1064 int i = 0; 1065 1066 setup_vm(); 1067 setup_idt(); 1068 hypercall_field = 0; 1069 1070 if (!(cpuid(1).c & (1 << 5))) { 1071 printf("WARNING: vmx not supported, add '-cpu host'\n"); 1072 goto exit; 1073 } 1074 init_vmx(); 1075 if (test_vmx_feature_control() != 0) 1076 goto exit; 1077 /* Set basic test ctxt the same as "null" */ 1078 current = &vmx_tests[0]; 1079 if (test_vmxon() != 0) 1080 goto exit; 1081 test_vmptrld(); 1082 test_vmclear(); 1083 test_vmptrst(); 1084 init_vmcs(&vmcs_root); 1085 if (vmx_run()) { 1086 report("test vmlaunch", 0); 1087 goto exit; 1088 } 1089 test_vmxoff(); 1090 test_vmx_caps(); 1091 1092 while (vmx_tests[++i].name != NULL) 1093 if (test_run(&vmx_tests[i])) 1094 goto exit; 1095 1096 exit: 1097 return report_summary(); 1098 } 1099