1 /* 2 * x86/vmx.c : Framework for testing nested virtualization 3 * This is a framework to test nested VMX for KVM, which 4 * started as a project of GSoC 2013. All test cases should 5 * be located in x86/vmx_tests.c and framework related 6 * functions should be in this file. 7 * 8 * How to write test cases? 9 * Add callbacks of test suite in variant "vmx_tests". You can 10 * write: 11 * 1. init function used for initializing test suite 12 * 2. main function for codes running in L2 guest, 13 * 3. exit_handler to handle vmexit of L2 to L1 14 * 4. syscall handler to handle L2 syscall vmexit 15 * 5. vmenter fail handler to handle direct failure of vmenter 16 * 6. guest_regs is loaded when vmenter and saved when 17 * vmexit, you can read and set it in exit_handler 18 * If no special function is needed for a test suite, use 19 * coressponding basic_* functions as callback. More handlers 20 * can be added to "vmx_tests", see details of "struct vmx_test" 21 * and function test_run(). 22 * 23 * Currently, vmx test framework only set up one VCPU and one 24 * concurrent guest test environment with same paging for L2 and 25 * L1. For usage of EPT, only 1:1 mapped paging is used from VFN 26 * to PFN. 27 * 28 * Author : Arthur Chunqi Li <yzt356@gmail.com> 29 */ 30 31 #include "libcflat.h" 32 #include "processor.h" 33 #include "vm.h" 34 #include "desc.h" 35 #include "vmx.h" 36 #include "msr.h" 37 #include "smp.h" 38 #include "io.h" 39 40 u64 *vmxon_region; 41 struct vmcs *vmcs_root; 42 u32 vpid_cnt; 43 void *guest_stack, *guest_syscall_stack; 44 u32 ctrl_pin, ctrl_enter, ctrl_exit, ctrl_cpu[2]; 45 struct regs regs; 46 struct vmx_test *current; 47 u64 hypercall_field; 48 bool launched; 49 50 union vmx_basic basic; 51 union vmx_ctrl_msr ctrl_pin_rev; 52 union vmx_ctrl_msr ctrl_cpu_rev[2]; 53 union vmx_ctrl_msr ctrl_exit_rev; 54 union vmx_ctrl_msr ctrl_enter_rev; 55 union vmx_ept_vpid ept_vpid; 56 57 extern struct descriptor_table_ptr gdt64_desc; 58 extern struct descriptor_table_ptr idt_descr; 59 extern struct descriptor_table_ptr tss_descr; 60 extern void *vmx_return; 61 extern void *entry_sysenter; 62 extern void *guest_entry; 63 64 static volatile u32 stage; 65 66 void vmx_set_test_stage(u32 s) 67 { 68 barrier(); 69 stage = s; 70 barrier(); 71 } 72 73 u32 vmx_get_test_stage(void) 74 { 75 u32 s; 76 77 barrier(); 78 s = stage; 79 barrier(); 80 return s; 81 } 82 83 void vmx_inc_test_stage(void) 84 { 85 barrier(); 86 stage++; 87 barrier(); 88 } 89 90 static int make_vmcs_current(struct vmcs *vmcs) 91 { 92 bool ret; 93 u64 rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF; 94 95 asm volatile ("push %1; popf; vmptrld %2; setbe %0" 96 : "=q" (ret) : "q" (rflags), "m" (vmcs) : "cc"); 97 return ret; 98 } 99 100 /* entry_sysenter */ 101 asm( 102 ".align 4, 0x90\n\t" 103 ".globl entry_sysenter\n\t" 104 "entry_sysenter:\n\t" 105 SAVE_GPR 106 " and $0xf, %rax\n\t" 107 " mov %rax, %rdi\n\t" 108 " call syscall_handler\n\t" 109 LOAD_GPR 110 " vmresume\n\t" 111 ); 112 113 static void __attribute__((__used__)) syscall_handler(u64 syscall_no) 114 { 115 if (current->syscall_handler) 116 current->syscall_handler(syscall_no); 117 } 118 119 static inline int vmx_on() 120 { 121 bool ret; 122 u64 rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF; 123 asm volatile ("push %1; popf; vmxon %2; setbe %0\n\t" 124 : "=q" (ret) : "q" (rflags), "m" (vmxon_region) : "cc"); 125 return ret; 126 } 127 128 static inline int vmx_off() 129 { 130 bool ret; 131 u64 rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF; 132 133 asm volatile("push %1; popf; vmxoff; setbe %0\n\t" 134 : "=q"(ret) : "q" (rflags) : "cc"); 135 return ret; 136 } 137 138 void print_vmexit_info() 139 { 140 u64 guest_rip, guest_rsp; 141 ulong reason = vmcs_read(EXI_REASON) & 0xff; 142 ulong exit_qual = vmcs_read(EXI_QUALIFICATION); 143 guest_rip = vmcs_read(GUEST_RIP); 144 guest_rsp = vmcs_read(GUEST_RSP); 145 printf("VMEXIT info:\n"); 146 printf("\tvmexit reason = %ld\n", reason); 147 printf("\texit qualification = 0x%lx\n", exit_qual); 148 printf("\tBit 31 of reason = %lx\n", (vmcs_read(EXI_REASON) >> 31) & 1); 149 printf("\tguest_rip = 0x%lx\n", guest_rip); 150 printf("\tRAX=0x%lx RBX=0x%lx RCX=0x%lx RDX=0x%lx\n", 151 regs.rax, regs.rbx, regs.rcx, regs.rdx); 152 printf("\tRSP=0x%lx RBP=0x%lx RSI=0x%lx RDI=0x%lx\n", 153 guest_rsp, regs.rbp, regs.rsi, regs.rdi); 154 printf("\tR8 =0x%lx R9 =0x%lx R10=0x%lx R11=0x%lx\n", 155 regs.r8, regs.r9, regs.r10, regs.r11); 156 printf("\tR12=0x%lx R13=0x%lx R14=0x%lx R15=0x%lx\n", 157 regs.r12, regs.r13, regs.r14, regs.r15); 158 } 159 160 void 161 print_vmentry_failure_info(struct vmentry_failure *failure) { 162 if (failure->early) { 163 printf("Early %s failure: ", failure->instr); 164 switch (failure->flags & VMX_ENTRY_FLAGS) { 165 case X86_EFLAGS_ZF: 166 printf("current-VMCS pointer is not valid.\n"); 167 break; 168 case X86_EFLAGS_CF: 169 printf("error number is %ld. See Intel 30.4.\n", 170 vmcs_read(VMX_INST_ERROR)); 171 break; 172 default: 173 printf("unexpected flags %lx!\n", failure->flags); 174 } 175 } else { 176 u64 reason = vmcs_read(EXI_REASON); 177 u64 qual = vmcs_read(EXI_QUALIFICATION); 178 179 printf("Non-early %s failure (reason=0x%lx, qual=0x%lx): ", 180 failure->instr, reason, qual); 181 182 switch (reason & 0xff) { 183 case VMX_FAIL_STATE: 184 printf("invalid guest state\n"); 185 break; 186 case VMX_FAIL_MSR: 187 printf("MSR loading\n"); 188 break; 189 case VMX_FAIL_MCHECK: 190 printf("machine-check event\n"); 191 break; 192 default: 193 printf("unexpected basic exit reason %ld\n", 194 reason & 0xff); 195 } 196 197 if (!(reason & VMX_ENTRY_FAILURE)) 198 printf("\tVMX_ENTRY_FAILURE BIT NOT SET!\n"); 199 200 if (reason & 0x7fff0000) 201 printf("\tRESERVED BITS SET!\n"); 202 } 203 } 204 205 206 static void test_vmclear(void) 207 { 208 struct vmcs *tmp_root; 209 int width = cpuid_maxphyaddr(); 210 211 /* 212 * Note- The tests below do not necessarily have a 213 * valid VMCS, but that's ok since the invalid vmcs 214 * is only used for a specific test and is discarded 215 * without touching its contents 216 */ 217 218 /* Unaligned page access */ 219 tmp_root = (struct vmcs *)((intptr_t)vmcs_root + 1); 220 report("test vmclear with unaligned vmcs", 221 vmcs_clear(tmp_root) == 1); 222 223 /* gpa bits beyond physical address width are set*/ 224 tmp_root = (struct vmcs *)((intptr_t)vmcs_root | 225 ((u64)1 << (width+1))); 226 report("test vmclear with vmcs address bits set beyond physical address width", 227 vmcs_clear(tmp_root) == 1); 228 229 /* Pass VMXON region */ 230 tmp_root = (struct vmcs *)vmxon_region; 231 report("test vmclear with vmxon region", 232 vmcs_clear(tmp_root) == 1); 233 234 /* Valid VMCS */ 235 report("test vmclear with valid vmcs region", vmcs_clear(vmcs_root) == 0); 236 237 } 238 239 static void test_vmxoff(void) 240 { 241 int ret; 242 243 ret = vmx_off(); 244 report("test vmxoff", !ret); 245 } 246 247 static void __attribute__((__used__)) guest_main(void) 248 { 249 current->guest_main(); 250 } 251 252 /* guest_entry */ 253 asm( 254 ".align 4, 0x90\n\t" 255 ".globl entry_guest\n\t" 256 "guest_entry:\n\t" 257 " call guest_main\n\t" 258 " mov $1, %edi\n\t" 259 " call hypercall\n\t" 260 ); 261 262 /* EPT paging structure related functions */ 263 /* split_large_ept_entry: Split a 2M/1G large page into 512 smaller PTEs. 264 @ptep : large page table entry to split 265 @level : level of ptep (2 or 3) 266 */ 267 static void split_large_ept_entry(unsigned long *ptep, int level) 268 { 269 unsigned long *new_pt; 270 unsigned long gpa; 271 unsigned long pte; 272 unsigned long prototype; 273 int i; 274 275 pte = *ptep; 276 assert(pte & EPT_PRESENT); 277 assert(pte & EPT_LARGE_PAGE); 278 assert(level == 2 || level == 3); 279 280 new_pt = alloc_page(); 281 assert(new_pt); 282 memset(new_pt, 0, PAGE_SIZE); 283 284 prototype = pte & ~EPT_ADDR_MASK; 285 if (level == 2) 286 prototype &= ~EPT_LARGE_PAGE; 287 288 gpa = pte & EPT_ADDR_MASK; 289 for (i = 0; i < EPT_PGDIR_ENTRIES; i++) { 290 new_pt[i] = prototype | gpa; 291 gpa += 1ul << EPT_LEVEL_SHIFT(level - 1); 292 } 293 294 pte &= ~EPT_LARGE_PAGE; 295 pte &= ~EPT_ADDR_MASK; 296 pte |= virt_to_phys(new_pt); 297 298 *ptep = pte; 299 } 300 301 /* install_ept_entry : Install a page to a given level in EPT 302 @pml4 : addr of pml4 table 303 @pte_level : level of PTE to set 304 @guest_addr : physical address of guest 305 @pte : pte value to set 306 @pt_page : address of page table, NULL for a new page 307 */ 308 void install_ept_entry(unsigned long *pml4, 309 int pte_level, 310 unsigned long guest_addr, 311 unsigned long pte, 312 unsigned long *pt_page) 313 { 314 int level; 315 unsigned long *pt = pml4; 316 unsigned offset; 317 318 for (level = EPT_PAGE_LEVEL; level > pte_level; --level) { 319 offset = (guest_addr >> EPT_LEVEL_SHIFT(level)) 320 & EPT_PGDIR_MASK; 321 if (!(pt[offset] & (EPT_PRESENT))) { 322 unsigned long *new_pt = pt_page; 323 if (!new_pt) 324 new_pt = alloc_page(); 325 else 326 pt_page = 0; 327 memset(new_pt, 0, PAGE_SIZE); 328 pt[offset] = virt_to_phys(new_pt) 329 | EPT_RA | EPT_WA | EPT_EA; 330 } else if (pt[offset] & EPT_LARGE_PAGE) 331 split_large_ept_entry(&pt[offset], level); 332 pt = phys_to_virt(pt[offset] & EPT_ADDR_MASK); 333 } 334 offset = (guest_addr >> EPT_LEVEL_SHIFT(level)) & EPT_PGDIR_MASK; 335 pt[offset] = pte; 336 } 337 338 /* Map a page, @perm is the permission of the page */ 339 void install_ept(unsigned long *pml4, 340 unsigned long phys, 341 unsigned long guest_addr, 342 u64 perm) 343 { 344 install_ept_entry(pml4, 1, guest_addr, (phys & PAGE_MASK) | perm, 0); 345 } 346 347 /* Map a 1G-size page */ 348 void install_1g_ept(unsigned long *pml4, 349 unsigned long phys, 350 unsigned long guest_addr, 351 u64 perm) 352 { 353 install_ept_entry(pml4, 3, guest_addr, 354 (phys & PAGE_MASK) | perm | EPT_LARGE_PAGE, 0); 355 } 356 357 /* Map a 2M-size page */ 358 void install_2m_ept(unsigned long *pml4, 359 unsigned long phys, 360 unsigned long guest_addr, 361 u64 perm) 362 { 363 install_ept_entry(pml4, 2, guest_addr, 364 (phys & PAGE_MASK) | perm | EPT_LARGE_PAGE, 0); 365 } 366 367 /* setup_ept_range : Setup a range of 1:1 mapped page to EPT paging structure. 368 @start : start address of guest page 369 @len : length of address to be mapped 370 @map_1g : whether 1G page map is used 371 @map_2m : whether 2M page map is used 372 @perm : permission for every page 373 */ 374 void setup_ept_range(unsigned long *pml4, unsigned long start, 375 unsigned long len, int map_1g, int map_2m, u64 perm) 376 { 377 u64 phys = start; 378 u64 max = (u64)len + (u64)start; 379 380 if (map_1g) { 381 while (phys + PAGE_SIZE_1G <= max) { 382 install_1g_ept(pml4, phys, phys, perm); 383 phys += PAGE_SIZE_1G; 384 } 385 } 386 if (map_2m) { 387 while (phys + PAGE_SIZE_2M <= max) { 388 install_2m_ept(pml4, phys, phys, perm); 389 phys += PAGE_SIZE_2M; 390 } 391 } 392 while (phys + PAGE_SIZE <= max) { 393 install_ept(pml4, phys, phys, perm); 394 phys += PAGE_SIZE; 395 } 396 } 397 398 /* get_ept_pte : Get the PTE of a given level in EPT, 399 @level == 1 means get the latest level*/ 400 unsigned long get_ept_pte(unsigned long *pml4, 401 unsigned long guest_addr, int level) 402 { 403 int l; 404 unsigned long *pt = pml4, pte; 405 unsigned offset; 406 407 if (level < 1 || level > 3) 408 return -1; 409 for (l = EPT_PAGE_LEVEL; ; --l) { 410 offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK; 411 pte = pt[offset]; 412 if (!(pte & (EPT_PRESENT))) 413 return 0; 414 if (l == level) 415 break; 416 if (l < 4 && (pte & EPT_LARGE_PAGE)) 417 return pte; 418 pt = (unsigned long *)(pte & EPT_ADDR_MASK); 419 } 420 offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK; 421 pte = pt[offset]; 422 return pte; 423 } 424 425 void ept_sync(int type, u64 eptp) 426 { 427 switch (type) { 428 case INVEPT_SINGLE: 429 if (ept_vpid.val & EPT_CAP_INVEPT_SINGLE) { 430 invept(INVEPT_SINGLE, eptp); 431 break; 432 } 433 /* else fall through */ 434 case INVEPT_GLOBAL: 435 if (ept_vpid.val & EPT_CAP_INVEPT_ALL) { 436 invept(INVEPT_GLOBAL, eptp); 437 break; 438 } 439 /* else fall through */ 440 default: 441 printf("WARNING: invept is not supported!\n"); 442 } 443 } 444 445 int set_ept_pte(unsigned long *pml4, unsigned long guest_addr, 446 int level, u64 pte_val) 447 { 448 int l; 449 unsigned long *pt = pml4; 450 unsigned offset; 451 452 if (level < 1 || level > 3) 453 return -1; 454 for (l = EPT_PAGE_LEVEL; ; --l) { 455 offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK; 456 if (l == level) 457 break; 458 if (!(pt[offset] & (EPT_PRESENT))) 459 return -1; 460 pt = (unsigned long *)(pt[offset] & EPT_ADDR_MASK); 461 } 462 offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK; 463 pt[offset] = pte_val; 464 return 0; 465 } 466 467 void vpid_sync(int type, u16 vpid) 468 { 469 switch(type) { 470 case INVVPID_SINGLE: 471 if (ept_vpid.val & VPID_CAP_INVVPID_SINGLE) { 472 invvpid(INVVPID_SINGLE, vpid, 0); 473 break; 474 } 475 case INVVPID_ALL: 476 if (ept_vpid.val & VPID_CAP_INVVPID_ALL) { 477 invvpid(INVVPID_ALL, vpid, 0); 478 break; 479 } 480 default: 481 printf("WARNING: invvpid is not supported\n"); 482 } 483 } 484 485 static void init_vmcs_ctrl(void) 486 { 487 /* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */ 488 /* 26.2.1.1 */ 489 vmcs_write(PIN_CONTROLS, ctrl_pin); 490 /* Disable VMEXIT of IO instruction */ 491 vmcs_write(CPU_EXEC_CTRL0, ctrl_cpu[0]); 492 if (ctrl_cpu_rev[0].set & CPU_SECONDARY) { 493 ctrl_cpu[1] = (ctrl_cpu[1] | ctrl_cpu_rev[1].set) & 494 ctrl_cpu_rev[1].clr; 495 vmcs_write(CPU_EXEC_CTRL1, ctrl_cpu[1]); 496 } 497 vmcs_write(CR3_TARGET_COUNT, 0); 498 vmcs_write(VPID, ++vpid_cnt); 499 } 500 501 static void init_vmcs_host(void) 502 { 503 /* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */ 504 /* 26.2.1.2 */ 505 vmcs_write(HOST_EFER, rdmsr(MSR_EFER)); 506 507 /* 26.2.1.3 */ 508 vmcs_write(ENT_CONTROLS, ctrl_enter); 509 vmcs_write(EXI_CONTROLS, ctrl_exit); 510 511 /* 26.2.2 */ 512 vmcs_write(HOST_CR0, read_cr0()); 513 vmcs_write(HOST_CR3, read_cr3()); 514 vmcs_write(HOST_CR4, read_cr4()); 515 vmcs_write(HOST_SYSENTER_EIP, (u64)(&entry_sysenter)); 516 vmcs_write(HOST_SYSENTER_CS, KERNEL_CS); 517 518 /* 26.2.3 */ 519 vmcs_write(HOST_SEL_CS, KERNEL_CS); 520 vmcs_write(HOST_SEL_SS, KERNEL_DS); 521 vmcs_write(HOST_SEL_DS, KERNEL_DS); 522 vmcs_write(HOST_SEL_ES, KERNEL_DS); 523 vmcs_write(HOST_SEL_FS, KERNEL_DS); 524 vmcs_write(HOST_SEL_GS, KERNEL_DS); 525 vmcs_write(HOST_SEL_TR, TSS_MAIN); 526 vmcs_write(HOST_BASE_TR, tss_descr.base); 527 vmcs_write(HOST_BASE_GDTR, gdt64_desc.base); 528 vmcs_write(HOST_BASE_IDTR, idt_descr.base); 529 vmcs_write(HOST_BASE_FS, 0); 530 vmcs_write(HOST_BASE_GS, 0); 531 532 /* Set other vmcs area */ 533 vmcs_write(PF_ERROR_MASK, 0); 534 vmcs_write(PF_ERROR_MATCH, 0); 535 vmcs_write(VMCS_LINK_PTR, ~0ul); 536 vmcs_write(VMCS_LINK_PTR_HI, ~0ul); 537 vmcs_write(HOST_RIP, (u64)(&vmx_return)); 538 } 539 540 static void init_vmcs_guest(void) 541 { 542 /* 26.3 CHECKING AND LOADING GUEST STATE */ 543 ulong guest_cr0, guest_cr4, guest_cr3; 544 /* 26.3.1.1 */ 545 guest_cr0 = read_cr0(); 546 guest_cr4 = read_cr4(); 547 guest_cr3 = read_cr3(); 548 if (ctrl_enter & ENT_GUEST_64) { 549 guest_cr0 |= X86_CR0_PG; 550 guest_cr4 |= X86_CR4_PAE; 551 } 552 if ((ctrl_enter & ENT_GUEST_64) == 0) 553 guest_cr4 &= (~X86_CR4_PCIDE); 554 if (guest_cr0 & X86_CR0_PG) 555 guest_cr0 |= X86_CR0_PE; 556 vmcs_write(GUEST_CR0, guest_cr0); 557 vmcs_write(GUEST_CR3, guest_cr3); 558 vmcs_write(GUEST_CR4, guest_cr4); 559 vmcs_write(GUEST_SYSENTER_CS, KERNEL_CS); 560 vmcs_write(GUEST_SYSENTER_ESP, 561 (u64)(guest_syscall_stack + PAGE_SIZE - 1)); 562 vmcs_write(GUEST_SYSENTER_EIP, (u64)(&entry_sysenter)); 563 vmcs_write(GUEST_DR7, 0); 564 vmcs_write(GUEST_EFER, rdmsr(MSR_EFER)); 565 566 /* 26.3.1.2 */ 567 vmcs_write(GUEST_SEL_CS, KERNEL_CS); 568 vmcs_write(GUEST_SEL_SS, KERNEL_DS); 569 vmcs_write(GUEST_SEL_DS, KERNEL_DS); 570 vmcs_write(GUEST_SEL_ES, KERNEL_DS); 571 vmcs_write(GUEST_SEL_FS, KERNEL_DS); 572 vmcs_write(GUEST_SEL_GS, KERNEL_DS); 573 vmcs_write(GUEST_SEL_TR, TSS_MAIN); 574 vmcs_write(GUEST_SEL_LDTR, 0); 575 576 vmcs_write(GUEST_BASE_CS, 0); 577 vmcs_write(GUEST_BASE_ES, 0); 578 vmcs_write(GUEST_BASE_SS, 0); 579 vmcs_write(GUEST_BASE_DS, 0); 580 vmcs_write(GUEST_BASE_FS, 0); 581 vmcs_write(GUEST_BASE_GS, 0); 582 vmcs_write(GUEST_BASE_TR, tss_descr.base); 583 vmcs_write(GUEST_BASE_LDTR, 0); 584 585 vmcs_write(GUEST_LIMIT_CS, 0xFFFFFFFF); 586 vmcs_write(GUEST_LIMIT_DS, 0xFFFFFFFF); 587 vmcs_write(GUEST_LIMIT_ES, 0xFFFFFFFF); 588 vmcs_write(GUEST_LIMIT_SS, 0xFFFFFFFF); 589 vmcs_write(GUEST_LIMIT_FS, 0xFFFFFFFF); 590 vmcs_write(GUEST_LIMIT_GS, 0xFFFFFFFF); 591 vmcs_write(GUEST_LIMIT_LDTR, 0xffff); 592 vmcs_write(GUEST_LIMIT_TR, tss_descr.limit); 593 594 vmcs_write(GUEST_AR_CS, 0xa09b); 595 vmcs_write(GUEST_AR_DS, 0xc093); 596 vmcs_write(GUEST_AR_ES, 0xc093); 597 vmcs_write(GUEST_AR_FS, 0xc093); 598 vmcs_write(GUEST_AR_GS, 0xc093); 599 vmcs_write(GUEST_AR_SS, 0xc093); 600 vmcs_write(GUEST_AR_LDTR, 0x82); 601 vmcs_write(GUEST_AR_TR, 0x8b); 602 603 /* 26.3.1.3 */ 604 vmcs_write(GUEST_BASE_GDTR, gdt64_desc.base); 605 vmcs_write(GUEST_BASE_IDTR, idt_descr.base); 606 vmcs_write(GUEST_LIMIT_GDTR, gdt64_desc.limit); 607 vmcs_write(GUEST_LIMIT_IDTR, idt_descr.limit); 608 609 /* 26.3.1.4 */ 610 vmcs_write(GUEST_RIP, (u64)(&guest_entry)); 611 vmcs_write(GUEST_RSP, (u64)(guest_stack + PAGE_SIZE - 1)); 612 vmcs_write(GUEST_RFLAGS, 0x2); 613 614 /* 26.3.1.5 */ 615 vmcs_write(GUEST_ACTV_STATE, ACTV_ACTIVE); 616 vmcs_write(GUEST_INTR_STATE, 0); 617 } 618 619 static int init_vmcs(struct vmcs **vmcs) 620 { 621 *vmcs = alloc_page(); 622 memset(*vmcs, 0, PAGE_SIZE); 623 (*vmcs)->revision_id = basic.revision; 624 /* vmclear first to init vmcs */ 625 if (vmcs_clear(*vmcs)) { 626 printf("%s : vmcs_clear error\n", __func__); 627 return 1; 628 } 629 630 if (make_vmcs_current(*vmcs)) { 631 printf("%s : make_vmcs_current error\n", __func__); 632 return 1; 633 } 634 635 /* All settings to pin/exit/enter/cpu 636 control fields should be placed here */ 637 ctrl_pin |= PIN_EXTINT | PIN_NMI | PIN_VIRT_NMI; 638 ctrl_exit = EXI_LOAD_EFER | EXI_HOST_64; 639 ctrl_enter = (ENT_LOAD_EFER | ENT_GUEST_64); 640 /* DIsable IO instruction VMEXIT now */ 641 ctrl_cpu[0] &= (~(CPU_IO | CPU_IO_BITMAP)); 642 ctrl_cpu[1] = 0; 643 644 ctrl_pin = (ctrl_pin | ctrl_pin_rev.set) & ctrl_pin_rev.clr; 645 ctrl_enter = (ctrl_enter | ctrl_enter_rev.set) & ctrl_enter_rev.clr; 646 ctrl_exit = (ctrl_exit | ctrl_exit_rev.set) & ctrl_exit_rev.clr; 647 ctrl_cpu[0] = (ctrl_cpu[0] | ctrl_cpu_rev[0].set) & ctrl_cpu_rev[0].clr; 648 649 init_vmcs_ctrl(); 650 init_vmcs_host(); 651 init_vmcs_guest(); 652 return 0; 653 } 654 655 static void init_vmx(void) 656 { 657 ulong fix_cr0_set, fix_cr0_clr; 658 ulong fix_cr4_set, fix_cr4_clr; 659 660 vmxon_region = alloc_page(); 661 memset(vmxon_region, 0, PAGE_SIZE); 662 663 fix_cr0_set = rdmsr(MSR_IA32_VMX_CR0_FIXED0); 664 fix_cr0_clr = rdmsr(MSR_IA32_VMX_CR0_FIXED1); 665 fix_cr4_set = rdmsr(MSR_IA32_VMX_CR4_FIXED0); 666 fix_cr4_clr = rdmsr(MSR_IA32_VMX_CR4_FIXED1); 667 basic.val = rdmsr(MSR_IA32_VMX_BASIC); 668 ctrl_pin_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PIN 669 : MSR_IA32_VMX_PINBASED_CTLS); 670 ctrl_exit_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_EXIT 671 : MSR_IA32_VMX_EXIT_CTLS); 672 ctrl_enter_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_ENTRY 673 : MSR_IA32_VMX_ENTRY_CTLS); 674 ctrl_cpu_rev[0].val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PROC 675 : MSR_IA32_VMX_PROCBASED_CTLS); 676 if ((ctrl_cpu_rev[0].clr & CPU_SECONDARY) != 0) 677 ctrl_cpu_rev[1].val = rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2); 678 else 679 ctrl_cpu_rev[1].val = 0; 680 if ((ctrl_cpu_rev[1].clr & (CPU_EPT | CPU_VPID)) != 0) 681 ept_vpid.val = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP); 682 else 683 ept_vpid.val = 0; 684 685 write_cr0((read_cr0() & fix_cr0_clr) | fix_cr0_set); 686 write_cr4((read_cr4() & fix_cr4_clr) | fix_cr4_set | X86_CR4_VMXE); 687 688 *vmxon_region = basic.revision; 689 690 guest_stack = alloc_page(); 691 memset(guest_stack, 0, PAGE_SIZE); 692 guest_syscall_stack = alloc_page(); 693 memset(guest_syscall_stack, 0, PAGE_SIZE); 694 } 695 696 static void do_vmxon_off(void *data) 697 { 698 vmx_on(); 699 vmx_off(); 700 } 701 702 static void do_write_feature_control(void *data) 703 { 704 wrmsr(MSR_IA32_FEATURE_CONTROL, 0); 705 } 706 707 static int test_vmx_feature_control(void) 708 { 709 u64 ia32_feature_control; 710 bool vmx_enabled; 711 712 ia32_feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL); 713 vmx_enabled = ((ia32_feature_control & 0x5) == 0x5); 714 if ((ia32_feature_control & 0x5) == 0x5) { 715 printf("VMX enabled and locked by BIOS\n"); 716 return 0; 717 } else if (ia32_feature_control & 0x1) { 718 printf("ERROR: VMX locked out by BIOS!?\n"); 719 return 1; 720 } 721 722 wrmsr(MSR_IA32_FEATURE_CONTROL, 0); 723 report("test vmxon with FEATURE_CONTROL cleared", 724 test_for_exception(GP_VECTOR, &do_vmxon_off, NULL)); 725 726 wrmsr(MSR_IA32_FEATURE_CONTROL, 0x4); 727 report("test vmxon without FEATURE_CONTROL lock", 728 test_for_exception(GP_VECTOR, &do_vmxon_off, NULL)); 729 730 wrmsr(MSR_IA32_FEATURE_CONTROL, 0x5); 731 vmx_enabled = ((rdmsr(MSR_IA32_FEATURE_CONTROL) & 0x5) == 0x5); 732 report("test enable VMX in FEATURE_CONTROL", vmx_enabled); 733 734 report("test FEATURE_CONTROL lock bit", 735 test_for_exception(GP_VECTOR, &do_write_feature_control, NULL)); 736 737 return !vmx_enabled; 738 } 739 740 static int test_vmxon(void) 741 { 742 int ret, ret1; 743 u64 *tmp_region = vmxon_region; 744 int width = cpuid_maxphyaddr(); 745 746 /* Unaligned page access */ 747 vmxon_region = (u64 *)((intptr_t)vmxon_region + 1); 748 ret1 = vmx_on(); 749 report("test vmxon with unaligned vmxon region", ret1); 750 if (!ret1) { 751 ret = 1; 752 goto out; 753 } 754 755 /* gpa bits beyond physical address width are set*/ 756 vmxon_region = (u64 *)((intptr_t)tmp_region | ((u64)1 << (width+1))); 757 ret1 = vmx_on(); 758 report("test vmxon with bits set beyond physical address width", ret1); 759 if (!ret1) { 760 ret = 1; 761 goto out; 762 } 763 764 /* invalid revision indentifier */ 765 vmxon_region = tmp_region; 766 *vmxon_region = 0xba9da9; 767 ret1 = vmx_on(); 768 report("test vmxon with invalid revision identifier", ret1); 769 if (!ret1) { 770 ret = 1; 771 goto out; 772 } 773 774 /* and finally a valid region */ 775 *vmxon_region = basic.revision; 776 ret = vmx_on(); 777 report("test vmxon with valid vmxon region", !ret); 778 779 out: 780 return ret; 781 } 782 783 static void test_vmptrld(void) 784 { 785 struct vmcs *vmcs, *tmp_root; 786 int width = cpuid_maxphyaddr(); 787 788 vmcs = alloc_page(); 789 vmcs->revision_id = basic.revision; 790 791 /* Unaligned page access */ 792 tmp_root = (struct vmcs *)((intptr_t)vmcs + 1); 793 report("test vmptrld with unaligned vmcs", 794 make_vmcs_current(tmp_root) == 1); 795 796 /* gpa bits beyond physical address width are set*/ 797 tmp_root = (struct vmcs *)((intptr_t)vmcs | 798 ((u64)1 << (width+1))); 799 report("test vmptrld with vmcs address bits set beyond physical address width", 800 make_vmcs_current(tmp_root) == 1); 801 802 /* Pass VMXON region */ 803 tmp_root = (struct vmcs *)vmxon_region; 804 report("test vmptrld with vmxon region", 805 make_vmcs_current(tmp_root) == 1); 806 807 report("test vmptrld with valid vmcs region", make_vmcs_current(vmcs) == 0); 808 } 809 810 static void test_vmptrst(void) 811 { 812 int ret; 813 struct vmcs *vmcs1, *vmcs2; 814 815 vmcs1 = alloc_page(); 816 memset(vmcs1, 0, PAGE_SIZE); 817 init_vmcs(&vmcs1); 818 ret = vmcs_save(&vmcs2); 819 report("test vmptrst", (!ret) && (vmcs1 == vmcs2)); 820 } 821 822 struct vmx_ctl_msr { 823 const char *name; 824 u32 index, true_index; 825 u32 default1; 826 } vmx_ctl_msr[] = { 827 { "MSR_IA32_VMX_PINBASED_CTLS", MSR_IA32_VMX_PINBASED_CTLS, 828 MSR_IA32_VMX_TRUE_PIN, 0x16 }, 829 { "MSR_IA32_VMX_PROCBASED_CTLS", MSR_IA32_VMX_PROCBASED_CTLS, 830 MSR_IA32_VMX_TRUE_PROC, 0x401e172 }, 831 { "MSR_IA32_VMX_PROCBASED_CTLS2", MSR_IA32_VMX_PROCBASED_CTLS2, 832 MSR_IA32_VMX_PROCBASED_CTLS2, 0 }, 833 { "MSR_IA32_VMX_EXIT_CTLS", MSR_IA32_VMX_EXIT_CTLS, 834 MSR_IA32_VMX_TRUE_EXIT, 0x36dff }, 835 { "MSR_IA32_VMX_ENTRY_CTLS", MSR_IA32_VMX_ENTRY_CTLS, 836 MSR_IA32_VMX_TRUE_ENTRY, 0x11ff }, 837 }; 838 839 static void test_vmx_caps(void) 840 { 841 u64 val, default1, fixed0, fixed1; 842 union vmx_ctrl_msr ctrl, true_ctrl; 843 unsigned int n; 844 bool ok; 845 846 printf("\nTest suite: VMX capability reporting\n"); 847 848 report("MSR_IA32_VMX_BASIC", 849 (basic.revision & (1ul << 31)) == 0 && 850 basic.size > 0 && basic.size <= 4096 && 851 (basic.type == 0 || basic.type == 6) && 852 basic.reserved1 == 0 && basic.reserved2 == 0); 853 854 val = rdmsr(MSR_IA32_VMX_MISC); 855 report("MSR_IA32_VMX_MISC", 856 (!(ctrl_cpu_rev[1].clr & CPU_URG) || val & (1ul << 5)) && 857 ((val >> 16) & 0x1ff) <= 256 && 858 (val & 0xc0007e00) == 0); 859 860 for (n = 0; n < ARRAY_SIZE(vmx_ctl_msr); n++) { 861 ctrl.val = rdmsr(vmx_ctl_msr[n].index); 862 default1 = vmx_ctl_msr[n].default1; 863 ok = (ctrl.set & default1) == default1; 864 ok = ok && (ctrl.set & ~ctrl.clr) == 0; 865 if (ok && basic.ctrl) { 866 true_ctrl.val = rdmsr(vmx_ctl_msr[n].true_index); 867 ok = ctrl.clr == true_ctrl.clr; 868 ok = ok && ctrl.set == (true_ctrl.set | default1); 869 } 870 report(vmx_ctl_msr[n].name, ok); 871 } 872 873 fixed0 = rdmsr(MSR_IA32_VMX_CR0_FIXED0); 874 fixed1 = rdmsr(MSR_IA32_VMX_CR0_FIXED1); 875 report("MSR_IA32_VMX_IA32_VMX_CR0_FIXED0/1", 876 ((fixed0 ^ fixed1) & ~fixed1) == 0); 877 878 fixed0 = rdmsr(MSR_IA32_VMX_CR4_FIXED0); 879 fixed1 = rdmsr(MSR_IA32_VMX_CR4_FIXED1); 880 report("MSR_IA32_VMX_IA32_VMX_CR4_FIXED0/1", 881 ((fixed0 ^ fixed1) & ~fixed1) == 0); 882 883 val = rdmsr(MSR_IA32_VMX_VMCS_ENUM); 884 report("MSR_IA32_VMX_VMCS_ENUM", 885 (val & 0x3e) >= 0x2a && 886 (val & 0xfffffffffffffc01Ull) == 0); 887 888 val = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP); 889 report("MSR_IA32_VMX_EPT_VPID_CAP", 890 (val & 0xfffff07ef9eebebeUll) == 0); 891 } 892 893 /* This function can only be called in guest */ 894 static void __attribute__((__used__)) hypercall(u32 hypercall_no) 895 { 896 u64 val = 0; 897 val = (hypercall_no & HYPERCALL_MASK) | HYPERCALL_BIT; 898 hypercall_field = val; 899 asm volatile("vmcall\n\t"); 900 } 901 902 static bool is_hypercall() 903 { 904 ulong reason, hyper_bit; 905 906 reason = vmcs_read(EXI_REASON) & 0xff; 907 hyper_bit = hypercall_field & HYPERCALL_BIT; 908 if (reason == VMX_VMCALL && hyper_bit) 909 return true; 910 return false; 911 } 912 913 static int handle_hypercall() 914 { 915 ulong hypercall_no; 916 917 hypercall_no = hypercall_field & HYPERCALL_MASK; 918 hypercall_field = 0; 919 switch (hypercall_no) { 920 case HYPERCALL_VMEXIT: 921 return VMX_TEST_VMEXIT; 922 default: 923 printf("ERROR : Invalid hypercall number : %ld\n", hypercall_no); 924 } 925 return VMX_TEST_EXIT; 926 } 927 928 static int exit_handler() 929 { 930 int ret; 931 932 current->exits++; 933 regs.rflags = vmcs_read(GUEST_RFLAGS); 934 if (is_hypercall()) 935 ret = handle_hypercall(); 936 else 937 ret = current->exit_handler(); 938 vmcs_write(GUEST_RFLAGS, regs.rflags); 939 940 return ret; 941 } 942 943 /* 944 * Called if vmlaunch or vmresume fails. 945 * @early - failure due to "VMX controls and host-state area" (26.2) 946 * @vmlaunch - was this a vmlaunch or vmresume 947 * @rflags - host rflags 948 */ 949 static int 950 entry_failure_handler(struct vmentry_failure *failure) 951 { 952 if (current->entry_failure_handler) 953 return current->entry_failure_handler(failure); 954 else 955 return VMX_TEST_EXIT; 956 } 957 958 static int vmx_run() 959 { 960 unsigned long host_rflags; 961 962 while (1) { 963 u32 ret; 964 u32 fail = 0; 965 bool entered; 966 struct vmentry_failure failure; 967 968 asm volatile ( 969 "mov %[HOST_RSP], %%rdi\n\t" 970 "vmwrite %%rsp, %%rdi\n\t" 971 LOAD_GPR_C 972 "cmpl $0, %[launched]\n\t" 973 "jne 1f\n\t" 974 "vmlaunch\n\t" 975 "jmp 2f\n\t" 976 "1: " 977 "vmresume\n\t" 978 "2: " 979 SAVE_GPR_C 980 "pushf\n\t" 981 "pop %%rdi\n\t" 982 "mov %%rdi, %[host_rflags]\n\t" 983 "movl $1, %[fail]\n\t" 984 "jmp 3f\n\t" 985 "vmx_return:\n\t" 986 SAVE_GPR_C 987 "3: \n\t" 988 : [fail]"+m"(fail), [host_rflags]"=m"(host_rflags) 989 : [launched]"m"(launched), [HOST_RSP]"i"(HOST_RSP) 990 : "rdi", "memory", "cc" 991 992 ); 993 994 entered = !fail && !(vmcs_read(EXI_REASON) & VMX_ENTRY_FAILURE); 995 996 if (entered) { 997 /* 998 * VMCS isn't in "launched" state if there's been any 999 * entry failure (early or otherwise). 1000 */ 1001 launched = 1; 1002 ret = exit_handler(); 1003 } else { 1004 failure.flags = host_rflags; 1005 failure.vmlaunch = !launched; 1006 failure.instr = launched ? "vmresume" : "vmlaunch"; 1007 failure.early = fail; 1008 ret = entry_failure_handler(&failure); 1009 } 1010 1011 switch (ret) { 1012 case VMX_TEST_RESUME: 1013 continue; 1014 case VMX_TEST_VMEXIT: 1015 return 0; 1016 case VMX_TEST_EXIT: 1017 break; 1018 default: 1019 printf("ERROR : Invalid %s_handler return val %d.\n", 1020 entered ? "exit" : "entry_failure", 1021 ret); 1022 break; 1023 } 1024 1025 if (entered) 1026 print_vmexit_info(); 1027 else 1028 print_vmentry_failure_info(&failure); 1029 abort(); 1030 } 1031 } 1032 1033 static int test_run(struct vmx_test *test) 1034 { 1035 if (test->name == NULL) 1036 test->name = "(no name)"; 1037 if (vmx_on()) { 1038 printf("%s : vmxon failed.\n", __func__); 1039 return 1; 1040 } 1041 init_vmcs(&(test->vmcs)); 1042 /* Directly call test->init is ok here, init_vmcs has done 1043 vmcs init, vmclear and vmptrld*/ 1044 if (test->init && test->init(test->vmcs) != VMX_TEST_START) 1045 goto out; 1046 test->exits = 0; 1047 current = test; 1048 regs = test->guest_regs; 1049 vmcs_write(GUEST_RFLAGS, regs.rflags | 0x2); 1050 launched = 0; 1051 printf("\nTest suite: %s\n", test->name); 1052 vmx_run(); 1053 out: 1054 if (vmx_off()) { 1055 printf("%s : vmxoff failed.\n", __func__); 1056 return 1; 1057 } 1058 return 0; 1059 } 1060 1061 extern struct vmx_test vmx_tests[]; 1062 1063 int main(void) 1064 { 1065 int i = 0; 1066 1067 setup_vm(); 1068 setup_idt(); 1069 hypercall_field = 0; 1070 1071 if (!(cpuid(1).c & (1 << 5))) { 1072 printf("WARNING: vmx not supported, add '-cpu host'\n"); 1073 goto exit; 1074 } 1075 init_vmx(); 1076 if (test_vmx_feature_control() != 0) 1077 goto exit; 1078 /* Set basic test ctxt the same as "null" */ 1079 current = &vmx_tests[0]; 1080 if (test_vmxon() != 0) 1081 goto exit; 1082 test_vmptrld(); 1083 test_vmclear(); 1084 test_vmptrst(); 1085 init_vmcs(&vmcs_root); 1086 if (vmx_run()) { 1087 report("test vmlaunch", 0); 1088 goto exit; 1089 } 1090 test_vmxoff(); 1091 test_vmx_caps(); 1092 1093 while (vmx_tests[++i].name != NULL) 1094 if (test_run(&vmx_tests[i])) 1095 goto exit; 1096 1097 exit: 1098 return report_summary(); 1099 } 1100