1 /* 2 * x86/vmx.c : Framework for testing nested virtualization 3 * This is a framework to test nested VMX for KVM, which 4 * started as a project of GSoC 2013. All test cases should 5 * be located in x86/vmx_tests.c and framework related 6 * functions should be in this file. 7 * 8 * How to write test cases? 9 * Add callbacks of test suite in variant "vmx_tests". You can 10 * write: 11 * 1. init function used for initializing test suite 12 * 2. main function for codes running in L2 guest, 13 * 3. exit_handler to handle vmexit of L2 to L1 14 * 4. syscall handler to handle L2 syscall vmexit 15 * 5. vmenter fail handler to handle direct failure of vmenter 16 * 6. guest_regs is loaded when vmenter and saved when 17 * vmexit, you can read and set it in exit_handler 18 * If no special function is needed for a test suite, use 19 * coressponding basic_* functions as callback. More handlers 20 * can be added to "vmx_tests", see details of "struct vmx_test" 21 * and function test_run(). 22 * 23 * Currently, vmx test framework only set up one VCPU and one 24 * concurrent guest test environment with same paging for L2 and 25 * L1. For usage of EPT, only 1:1 mapped paging is used from VFN 26 * to PFN. 27 * 28 * Author : Arthur Chunqi Li <yzt356@gmail.com> 29 */ 30 31 #include "libcflat.h" 32 #include "processor.h" 33 #include "vm.h" 34 #include "desc.h" 35 #include "vmx.h" 36 #include "msr.h" 37 #include "smp.h" 38 #include "io.h" 39 40 u64 *vmxon_region; 41 struct vmcs *vmcs_root; 42 u32 vpid_cnt; 43 void *guest_stack, *guest_syscall_stack; 44 u32 ctrl_pin, ctrl_enter, ctrl_exit, ctrl_cpu[2]; 45 struct regs regs; 46 struct vmx_test *current; 47 u64 hypercall_field; 48 bool launched; 49 u64 host_rflags; 50 51 union vmx_basic basic; 52 union vmx_ctrl_msr ctrl_pin_rev; 53 union vmx_ctrl_msr ctrl_cpu_rev[2]; 54 union vmx_ctrl_msr ctrl_exit_rev; 55 union vmx_ctrl_msr ctrl_enter_rev; 56 union vmx_ept_vpid ept_vpid; 57 58 extern struct descriptor_table_ptr gdt64_desc; 59 extern struct descriptor_table_ptr idt_descr; 60 extern struct descriptor_table_ptr tss_descr; 61 extern void *vmx_return; 62 extern void *entry_sysenter; 63 extern void *guest_entry; 64 65 static volatile u32 stage; 66 67 void vmx_set_test_stage(u32 s) 68 { 69 barrier(); 70 stage = s; 71 barrier(); 72 } 73 74 u32 vmx_get_test_stage(void) 75 { 76 u32 s; 77 78 barrier(); 79 s = stage; 80 barrier(); 81 return s; 82 } 83 84 void vmx_inc_test_stage(void) 85 { 86 barrier(); 87 stage++; 88 barrier(); 89 } 90 91 static int make_vmcs_current(struct vmcs *vmcs) 92 { 93 bool ret; 94 u64 rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF; 95 96 asm volatile ("push %1; popf; vmptrld %2; setbe %0" 97 : "=q" (ret) : "q" (rflags), "m" (vmcs) : "cc"); 98 return ret; 99 } 100 101 /* entry_sysenter */ 102 asm( 103 ".align 4, 0x90\n\t" 104 ".globl entry_sysenter\n\t" 105 "entry_sysenter:\n\t" 106 SAVE_GPR 107 " and $0xf, %rax\n\t" 108 " mov %rax, %rdi\n\t" 109 " call syscall_handler\n\t" 110 LOAD_GPR 111 " vmresume\n\t" 112 ); 113 114 static void __attribute__((__used__)) syscall_handler(u64 syscall_no) 115 { 116 if (current->syscall_handler) 117 current->syscall_handler(syscall_no); 118 } 119 120 static inline int vmx_on() 121 { 122 bool ret; 123 u64 rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF; 124 asm volatile ("push %1; popf; vmxon %2; setbe %0\n\t" 125 : "=q" (ret) : "q" (rflags), "m" (vmxon_region) : "cc"); 126 return ret; 127 } 128 129 static inline int vmx_off() 130 { 131 bool ret; 132 u64 rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF; 133 134 asm volatile("push %1; popf; vmxoff; setbe %0\n\t" 135 : "=q"(ret) : "q" (rflags) : "cc"); 136 return ret; 137 } 138 139 void print_vmexit_info() 140 { 141 u64 guest_rip, guest_rsp; 142 ulong reason = vmcs_read(EXI_REASON) & 0xff; 143 ulong exit_qual = vmcs_read(EXI_QUALIFICATION); 144 guest_rip = vmcs_read(GUEST_RIP); 145 guest_rsp = vmcs_read(GUEST_RSP); 146 printf("VMEXIT info:\n"); 147 printf("\tvmexit reason = %d\n", reason); 148 printf("\texit qualification = 0x%x\n", exit_qual); 149 printf("\tBit 31 of reason = %x\n", (vmcs_read(EXI_REASON) >> 31) & 1); 150 printf("\tguest_rip = 0x%llx\n", guest_rip); 151 printf("\tRAX=0x%llx RBX=0x%llx RCX=0x%llx RDX=0x%llx\n", 152 regs.rax, regs.rbx, regs.rcx, regs.rdx); 153 printf("\tRSP=0x%llx RBP=0x%llx RSI=0x%llx RDI=0x%llx\n", 154 guest_rsp, regs.rbp, regs.rsi, regs.rdi); 155 printf("\tR8 =0x%llx R9 =0x%llx R10=0x%llx R11=0x%llx\n", 156 regs.r8, regs.r9, regs.r10, regs.r11); 157 printf("\tR12=0x%llx R13=0x%llx R14=0x%llx R15=0x%llx\n", 158 regs.r12, regs.r13, regs.r14, regs.r15); 159 } 160 161 static void test_vmclear(void) 162 { 163 struct vmcs *tmp_root; 164 int width = cpuid_maxphyaddr(); 165 166 /* 167 * Note- The tests below do not necessarily have a 168 * valid VMCS, but that's ok since the invalid vmcs 169 * is only used for a specific test and is discarded 170 * without touching its contents 171 */ 172 173 /* Unaligned page access */ 174 tmp_root = (struct vmcs *)((intptr_t)vmcs_root + 1); 175 report("test vmclear with unaligned vmcs", 176 vmcs_clear(tmp_root) == 1); 177 178 /* gpa bits beyond physical address width are set*/ 179 tmp_root = (struct vmcs *)((intptr_t)vmcs_root | 180 ((u64)1 << (width+1))); 181 report("test vmclear with vmcs address bits set beyond physical address width", 182 vmcs_clear(tmp_root) == 1); 183 184 /* Pass VMXON region */ 185 tmp_root = (struct vmcs *)vmxon_region; 186 report("test vmclear with vmxon region", 187 vmcs_clear(tmp_root) == 1); 188 189 /* Valid VMCS */ 190 report("test vmclear with valid vmcs region", vmcs_clear(vmcs_root) == 0); 191 192 } 193 194 static void test_vmxoff(void) 195 { 196 int ret; 197 198 ret = vmx_off(); 199 report("test vmxoff", !ret); 200 } 201 202 static void __attribute__((__used__)) guest_main(void) 203 { 204 current->guest_main(); 205 } 206 207 /* guest_entry */ 208 asm( 209 ".align 4, 0x90\n\t" 210 ".globl entry_guest\n\t" 211 "guest_entry:\n\t" 212 " call guest_main\n\t" 213 " mov $1, %edi\n\t" 214 " call hypercall\n\t" 215 ); 216 217 /* EPT paging structure related functions */ 218 /* split_large_ept_entry: Split a 2M/1G large page into 512 smaller PTEs. 219 @ptep : large page table entry to split 220 @level : level of ptep (2 or 3) 221 */ 222 static void split_large_ept_entry(unsigned long *ptep, int level) 223 { 224 unsigned long *new_pt; 225 unsigned long gpa; 226 unsigned long pte; 227 unsigned long prototype; 228 int i; 229 230 pte = *ptep; 231 assert(pte & EPT_PRESENT); 232 assert(pte & EPT_LARGE_PAGE); 233 assert(level == 2 || level == 3); 234 235 new_pt = alloc_page(); 236 assert(new_pt); 237 memset(new_pt, 0, PAGE_SIZE); 238 239 prototype = pte & ~EPT_ADDR_MASK; 240 if (level == 2) 241 prototype &= ~EPT_LARGE_PAGE; 242 243 gpa = pte & EPT_ADDR_MASK; 244 for (i = 0; i < EPT_PGDIR_ENTRIES; i++) { 245 new_pt[i] = prototype | gpa; 246 gpa += 1ul << EPT_LEVEL_SHIFT(level - 1); 247 } 248 249 pte &= ~EPT_LARGE_PAGE; 250 pte &= ~EPT_ADDR_MASK; 251 pte |= virt_to_phys(new_pt); 252 253 *ptep = pte; 254 } 255 256 /* install_ept_entry : Install a page to a given level in EPT 257 @pml4 : addr of pml4 table 258 @pte_level : level of PTE to set 259 @guest_addr : physical address of guest 260 @pte : pte value to set 261 @pt_page : address of page table, NULL for a new page 262 */ 263 void install_ept_entry(unsigned long *pml4, 264 int pte_level, 265 unsigned long guest_addr, 266 unsigned long pte, 267 unsigned long *pt_page) 268 { 269 int level; 270 unsigned long *pt = pml4; 271 unsigned offset; 272 273 for (level = EPT_PAGE_LEVEL; level > pte_level; --level) { 274 offset = (guest_addr >> EPT_LEVEL_SHIFT(level)) 275 & EPT_PGDIR_MASK; 276 if (!(pt[offset] & (EPT_PRESENT))) { 277 unsigned long *new_pt = pt_page; 278 if (!new_pt) 279 new_pt = alloc_page(); 280 else 281 pt_page = 0; 282 memset(new_pt, 0, PAGE_SIZE); 283 pt[offset] = virt_to_phys(new_pt) 284 | EPT_RA | EPT_WA | EPT_EA; 285 } else if (pt[offset] & EPT_LARGE_PAGE) 286 split_large_ept_entry(&pt[offset], level); 287 pt = phys_to_virt(pt[offset] & EPT_ADDR_MASK); 288 } 289 offset = (guest_addr >> EPT_LEVEL_SHIFT(level)) & EPT_PGDIR_MASK; 290 pt[offset] = pte; 291 } 292 293 /* Map a page, @perm is the permission of the page */ 294 void install_ept(unsigned long *pml4, 295 unsigned long phys, 296 unsigned long guest_addr, 297 u64 perm) 298 { 299 install_ept_entry(pml4, 1, guest_addr, (phys & PAGE_MASK) | perm, 0); 300 } 301 302 /* Map a 1G-size page */ 303 void install_1g_ept(unsigned long *pml4, 304 unsigned long phys, 305 unsigned long guest_addr, 306 u64 perm) 307 { 308 install_ept_entry(pml4, 3, guest_addr, 309 (phys & PAGE_MASK) | perm | EPT_LARGE_PAGE, 0); 310 } 311 312 /* Map a 2M-size page */ 313 void install_2m_ept(unsigned long *pml4, 314 unsigned long phys, 315 unsigned long guest_addr, 316 u64 perm) 317 { 318 install_ept_entry(pml4, 2, guest_addr, 319 (phys & PAGE_MASK) | perm | EPT_LARGE_PAGE, 0); 320 } 321 322 /* setup_ept_range : Setup a range of 1:1 mapped page to EPT paging structure. 323 @start : start address of guest page 324 @len : length of address to be mapped 325 @map_1g : whether 1G page map is used 326 @map_2m : whether 2M page map is used 327 @perm : permission for every page 328 */ 329 void setup_ept_range(unsigned long *pml4, unsigned long start, 330 unsigned long len, int map_1g, int map_2m, u64 perm) 331 { 332 u64 phys = start; 333 u64 max = (u64)len + (u64)start; 334 335 if (map_1g) { 336 while (phys + PAGE_SIZE_1G <= max) { 337 install_1g_ept(pml4, phys, phys, perm); 338 phys += PAGE_SIZE_1G; 339 } 340 } 341 if (map_2m) { 342 while (phys + PAGE_SIZE_2M <= max) { 343 install_2m_ept(pml4, phys, phys, perm); 344 phys += PAGE_SIZE_2M; 345 } 346 } 347 while (phys + PAGE_SIZE <= max) { 348 install_ept(pml4, phys, phys, perm); 349 phys += PAGE_SIZE; 350 } 351 } 352 353 /* get_ept_pte : Get the PTE of a given level in EPT, 354 @level == 1 means get the latest level*/ 355 unsigned long get_ept_pte(unsigned long *pml4, 356 unsigned long guest_addr, int level) 357 { 358 int l; 359 unsigned long *pt = pml4, pte; 360 unsigned offset; 361 362 if (level < 1 || level > 3) 363 return -1; 364 for (l = EPT_PAGE_LEVEL; ; --l) { 365 offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK; 366 pte = pt[offset]; 367 if (!(pte & (EPT_PRESENT))) 368 return 0; 369 if (l == level) 370 break; 371 if (l < 4 && (pte & EPT_LARGE_PAGE)) 372 return pte; 373 pt = (unsigned long *)(pte & EPT_ADDR_MASK); 374 } 375 offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK; 376 pte = pt[offset]; 377 return pte; 378 } 379 380 void ept_sync(int type, u64 eptp) 381 { 382 switch (type) { 383 case INVEPT_SINGLE: 384 if (ept_vpid.val & EPT_CAP_INVEPT_SINGLE) { 385 invept(INVEPT_SINGLE, eptp); 386 break; 387 } 388 /* else fall through */ 389 case INVEPT_GLOBAL: 390 if (ept_vpid.val & EPT_CAP_INVEPT_ALL) { 391 invept(INVEPT_GLOBAL, eptp); 392 break; 393 } 394 /* else fall through */ 395 default: 396 printf("WARNING: invept is not supported!\n"); 397 } 398 } 399 400 int set_ept_pte(unsigned long *pml4, unsigned long guest_addr, 401 int level, u64 pte_val) 402 { 403 int l; 404 unsigned long *pt = pml4; 405 unsigned offset; 406 407 if (level < 1 || level > 3) 408 return -1; 409 for (l = EPT_PAGE_LEVEL; ; --l) { 410 offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK; 411 if (l == level) 412 break; 413 if (!(pt[offset] & (EPT_PRESENT))) 414 return -1; 415 pt = (unsigned long *)(pt[offset] & EPT_ADDR_MASK); 416 } 417 offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK; 418 pt[offset] = pte_val; 419 return 0; 420 } 421 422 void vpid_sync(int type, u16 vpid) 423 { 424 switch(type) { 425 case INVVPID_SINGLE: 426 if (ept_vpid.val & VPID_CAP_INVVPID_SINGLE) { 427 invvpid(INVVPID_SINGLE, vpid, 0); 428 break; 429 } 430 case INVVPID_ALL: 431 if (ept_vpid.val & VPID_CAP_INVVPID_ALL) { 432 invvpid(INVVPID_ALL, vpid, 0); 433 break; 434 } 435 default: 436 printf("WARNING: invvpid is not supported\n"); 437 } 438 } 439 440 static void init_vmcs_ctrl(void) 441 { 442 /* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */ 443 /* 26.2.1.1 */ 444 vmcs_write(PIN_CONTROLS, ctrl_pin); 445 /* Disable VMEXIT of IO instruction */ 446 vmcs_write(CPU_EXEC_CTRL0, ctrl_cpu[0]); 447 if (ctrl_cpu_rev[0].set & CPU_SECONDARY) { 448 ctrl_cpu[1] = (ctrl_cpu[1] | ctrl_cpu_rev[1].set) & 449 ctrl_cpu_rev[1].clr; 450 vmcs_write(CPU_EXEC_CTRL1, ctrl_cpu[1]); 451 } 452 vmcs_write(CR3_TARGET_COUNT, 0); 453 vmcs_write(VPID, ++vpid_cnt); 454 } 455 456 static void init_vmcs_host(void) 457 { 458 /* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */ 459 /* 26.2.1.2 */ 460 vmcs_write(HOST_EFER, rdmsr(MSR_EFER)); 461 462 /* 26.2.1.3 */ 463 vmcs_write(ENT_CONTROLS, ctrl_enter); 464 vmcs_write(EXI_CONTROLS, ctrl_exit); 465 466 /* 26.2.2 */ 467 vmcs_write(HOST_CR0, read_cr0()); 468 vmcs_write(HOST_CR3, read_cr3()); 469 vmcs_write(HOST_CR4, read_cr4()); 470 vmcs_write(HOST_SYSENTER_EIP, (u64)(&entry_sysenter)); 471 vmcs_write(HOST_SYSENTER_CS, KERNEL_CS); 472 473 /* 26.2.3 */ 474 vmcs_write(HOST_SEL_CS, KERNEL_CS); 475 vmcs_write(HOST_SEL_SS, KERNEL_DS); 476 vmcs_write(HOST_SEL_DS, KERNEL_DS); 477 vmcs_write(HOST_SEL_ES, KERNEL_DS); 478 vmcs_write(HOST_SEL_FS, KERNEL_DS); 479 vmcs_write(HOST_SEL_GS, KERNEL_DS); 480 vmcs_write(HOST_SEL_TR, TSS_MAIN); 481 vmcs_write(HOST_BASE_TR, tss_descr.base); 482 vmcs_write(HOST_BASE_GDTR, gdt64_desc.base); 483 vmcs_write(HOST_BASE_IDTR, idt_descr.base); 484 vmcs_write(HOST_BASE_FS, 0); 485 vmcs_write(HOST_BASE_GS, 0); 486 487 /* Set other vmcs area */ 488 vmcs_write(PF_ERROR_MASK, 0); 489 vmcs_write(PF_ERROR_MATCH, 0); 490 vmcs_write(VMCS_LINK_PTR, ~0ul); 491 vmcs_write(VMCS_LINK_PTR_HI, ~0ul); 492 vmcs_write(HOST_RIP, (u64)(&vmx_return)); 493 } 494 495 static void init_vmcs_guest(void) 496 { 497 /* 26.3 CHECKING AND LOADING GUEST STATE */ 498 ulong guest_cr0, guest_cr4, guest_cr3; 499 /* 26.3.1.1 */ 500 guest_cr0 = read_cr0(); 501 guest_cr4 = read_cr4(); 502 guest_cr3 = read_cr3(); 503 if (ctrl_enter & ENT_GUEST_64) { 504 guest_cr0 |= X86_CR0_PG; 505 guest_cr4 |= X86_CR4_PAE; 506 } 507 if ((ctrl_enter & ENT_GUEST_64) == 0) 508 guest_cr4 &= (~X86_CR4_PCIDE); 509 if (guest_cr0 & X86_CR0_PG) 510 guest_cr0 |= X86_CR0_PE; 511 vmcs_write(GUEST_CR0, guest_cr0); 512 vmcs_write(GUEST_CR3, guest_cr3); 513 vmcs_write(GUEST_CR4, guest_cr4); 514 vmcs_write(GUEST_SYSENTER_CS, KERNEL_CS); 515 vmcs_write(GUEST_SYSENTER_ESP, 516 (u64)(guest_syscall_stack + PAGE_SIZE - 1)); 517 vmcs_write(GUEST_SYSENTER_EIP, (u64)(&entry_sysenter)); 518 vmcs_write(GUEST_DR7, 0); 519 vmcs_write(GUEST_EFER, rdmsr(MSR_EFER)); 520 521 /* 26.3.1.2 */ 522 vmcs_write(GUEST_SEL_CS, KERNEL_CS); 523 vmcs_write(GUEST_SEL_SS, KERNEL_DS); 524 vmcs_write(GUEST_SEL_DS, KERNEL_DS); 525 vmcs_write(GUEST_SEL_ES, KERNEL_DS); 526 vmcs_write(GUEST_SEL_FS, KERNEL_DS); 527 vmcs_write(GUEST_SEL_GS, KERNEL_DS); 528 vmcs_write(GUEST_SEL_TR, TSS_MAIN); 529 vmcs_write(GUEST_SEL_LDTR, 0); 530 531 vmcs_write(GUEST_BASE_CS, 0); 532 vmcs_write(GUEST_BASE_ES, 0); 533 vmcs_write(GUEST_BASE_SS, 0); 534 vmcs_write(GUEST_BASE_DS, 0); 535 vmcs_write(GUEST_BASE_FS, 0); 536 vmcs_write(GUEST_BASE_GS, 0); 537 vmcs_write(GUEST_BASE_TR, tss_descr.base); 538 vmcs_write(GUEST_BASE_LDTR, 0); 539 540 vmcs_write(GUEST_LIMIT_CS, 0xFFFFFFFF); 541 vmcs_write(GUEST_LIMIT_DS, 0xFFFFFFFF); 542 vmcs_write(GUEST_LIMIT_ES, 0xFFFFFFFF); 543 vmcs_write(GUEST_LIMIT_SS, 0xFFFFFFFF); 544 vmcs_write(GUEST_LIMIT_FS, 0xFFFFFFFF); 545 vmcs_write(GUEST_LIMIT_GS, 0xFFFFFFFF); 546 vmcs_write(GUEST_LIMIT_LDTR, 0xffff); 547 vmcs_write(GUEST_LIMIT_TR, tss_descr.limit); 548 549 vmcs_write(GUEST_AR_CS, 0xa09b); 550 vmcs_write(GUEST_AR_DS, 0xc093); 551 vmcs_write(GUEST_AR_ES, 0xc093); 552 vmcs_write(GUEST_AR_FS, 0xc093); 553 vmcs_write(GUEST_AR_GS, 0xc093); 554 vmcs_write(GUEST_AR_SS, 0xc093); 555 vmcs_write(GUEST_AR_LDTR, 0x82); 556 vmcs_write(GUEST_AR_TR, 0x8b); 557 558 /* 26.3.1.3 */ 559 vmcs_write(GUEST_BASE_GDTR, gdt64_desc.base); 560 vmcs_write(GUEST_BASE_IDTR, idt_descr.base); 561 vmcs_write(GUEST_LIMIT_GDTR, gdt64_desc.limit); 562 vmcs_write(GUEST_LIMIT_IDTR, idt_descr.limit); 563 564 /* 26.3.1.4 */ 565 vmcs_write(GUEST_RIP, (u64)(&guest_entry)); 566 vmcs_write(GUEST_RSP, (u64)(guest_stack + PAGE_SIZE - 1)); 567 vmcs_write(GUEST_RFLAGS, 0x2); 568 569 /* 26.3.1.5 */ 570 vmcs_write(GUEST_ACTV_STATE, ACTV_ACTIVE); 571 vmcs_write(GUEST_INTR_STATE, 0); 572 } 573 574 static int init_vmcs(struct vmcs **vmcs) 575 { 576 *vmcs = alloc_page(); 577 memset(*vmcs, 0, PAGE_SIZE); 578 (*vmcs)->revision_id = basic.revision; 579 /* vmclear first to init vmcs */ 580 if (vmcs_clear(*vmcs)) { 581 printf("%s : vmcs_clear error\n", __func__); 582 return 1; 583 } 584 585 if (make_vmcs_current(*vmcs)) { 586 printf("%s : make_vmcs_current error\n", __func__); 587 return 1; 588 } 589 590 /* All settings to pin/exit/enter/cpu 591 control fields should be placed here */ 592 ctrl_pin |= PIN_EXTINT | PIN_NMI | PIN_VIRT_NMI; 593 ctrl_exit = EXI_LOAD_EFER | EXI_HOST_64; 594 ctrl_enter = (ENT_LOAD_EFER | ENT_GUEST_64); 595 /* DIsable IO instruction VMEXIT now */ 596 ctrl_cpu[0] &= (~(CPU_IO | CPU_IO_BITMAP)); 597 ctrl_cpu[1] = 0; 598 599 ctrl_pin = (ctrl_pin | ctrl_pin_rev.set) & ctrl_pin_rev.clr; 600 ctrl_enter = (ctrl_enter | ctrl_enter_rev.set) & ctrl_enter_rev.clr; 601 ctrl_exit = (ctrl_exit | ctrl_exit_rev.set) & ctrl_exit_rev.clr; 602 ctrl_cpu[0] = (ctrl_cpu[0] | ctrl_cpu_rev[0].set) & ctrl_cpu_rev[0].clr; 603 604 init_vmcs_ctrl(); 605 init_vmcs_host(); 606 init_vmcs_guest(); 607 return 0; 608 } 609 610 static void init_vmx(void) 611 { 612 ulong fix_cr0_set, fix_cr0_clr; 613 ulong fix_cr4_set, fix_cr4_clr; 614 615 vmxon_region = alloc_page(); 616 memset(vmxon_region, 0, PAGE_SIZE); 617 618 fix_cr0_set = rdmsr(MSR_IA32_VMX_CR0_FIXED0); 619 fix_cr0_clr = rdmsr(MSR_IA32_VMX_CR0_FIXED1); 620 fix_cr4_set = rdmsr(MSR_IA32_VMX_CR4_FIXED0); 621 fix_cr4_clr = rdmsr(MSR_IA32_VMX_CR4_FIXED1); 622 basic.val = rdmsr(MSR_IA32_VMX_BASIC); 623 ctrl_pin_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PIN 624 : MSR_IA32_VMX_PINBASED_CTLS); 625 ctrl_exit_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_EXIT 626 : MSR_IA32_VMX_EXIT_CTLS); 627 ctrl_enter_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_ENTRY 628 : MSR_IA32_VMX_ENTRY_CTLS); 629 ctrl_cpu_rev[0].val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PROC 630 : MSR_IA32_VMX_PROCBASED_CTLS); 631 if ((ctrl_cpu_rev[0].clr & CPU_SECONDARY) != 0) 632 ctrl_cpu_rev[1].val = rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2); 633 else 634 ctrl_cpu_rev[1].val = 0; 635 if ((ctrl_cpu_rev[1].clr & (CPU_EPT | CPU_VPID)) != 0) 636 ept_vpid.val = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP); 637 else 638 ept_vpid.val = 0; 639 640 write_cr0((read_cr0() & fix_cr0_clr) | fix_cr0_set); 641 write_cr4((read_cr4() & fix_cr4_clr) | fix_cr4_set | X86_CR4_VMXE); 642 643 *vmxon_region = basic.revision; 644 645 guest_stack = alloc_page(); 646 memset(guest_stack, 0, PAGE_SIZE); 647 guest_syscall_stack = alloc_page(); 648 memset(guest_syscall_stack, 0, PAGE_SIZE); 649 } 650 651 static void do_vmxon_off(void *data) 652 { 653 vmx_on(); 654 vmx_off(); 655 } 656 657 static void do_write_feature_control(void *data) 658 { 659 wrmsr(MSR_IA32_FEATURE_CONTROL, 0); 660 } 661 662 static int test_vmx_feature_control(void) 663 { 664 u64 ia32_feature_control; 665 bool vmx_enabled; 666 667 ia32_feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL); 668 vmx_enabled = ((ia32_feature_control & 0x5) == 0x5); 669 if ((ia32_feature_control & 0x5) == 0x5) { 670 printf("VMX enabled and locked by BIOS\n"); 671 return 0; 672 } else if (ia32_feature_control & 0x1) { 673 printf("ERROR: VMX locked out by BIOS!?\n"); 674 return 1; 675 } 676 677 wrmsr(MSR_IA32_FEATURE_CONTROL, 0); 678 report("test vmxon with FEATURE_CONTROL cleared", 679 test_for_exception(GP_VECTOR, &do_vmxon_off, NULL)); 680 681 wrmsr(MSR_IA32_FEATURE_CONTROL, 0x4); 682 report("test vmxon without FEATURE_CONTROL lock", 683 test_for_exception(GP_VECTOR, &do_vmxon_off, NULL)); 684 685 wrmsr(MSR_IA32_FEATURE_CONTROL, 0x5); 686 vmx_enabled = ((rdmsr(MSR_IA32_FEATURE_CONTROL) & 0x5) == 0x5); 687 report("test enable VMX in FEATURE_CONTROL", vmx_enabled); 688 689 report("test FEATURE_CONTROL lock bit", 690 test_for_exception(GP_VECTOR, &do_write_feature_control, NULL)); 691 692 return !vmx_enabled; 693 } 694 695 static int test_vmxon(void) 696 { 697 int ret, ret1; 698 u64 *tmp_region = vmxon_region; 699 int width = cpuid_maxphyaddr(); 700 701 /* Unaligned page access */ 702 vmxon_region = (u64 *)((intptr_t)vmxon_region + 1); 703 ret1 = vmx_on(); 704 report("test vmxon with unaligned vmxon region", ret1); 705 if (!ret1) { 706 ret = 1; 707 goto out; 708 } 709 710 /* gpa bits beyond physical address width are set*/ 711 vmxon_region = (u64 *)((intptr_t)tmp_region | ((u64)1 << (width+1))); 712 ret1 = vmx_on(); 713 report("test vmxon with bits set beyond physical address width", ret1); 714 if (!ret1) { 715 ret = 1; 716 goto out; 717 } 718 719 /* invalid revision indentifier */ 720 vmxon_region = tmp_region; 721 *vmxon_region = 0xba9da9; 722 ret1 = vmx_on(); 723 report("test vmxon with invalid revision identifier", ret1); 724 if (!ret1) { 725 ret = 1; 726 goto out; 727 } 728 729 /* and finally a valid region */ 730 *vmxon_region = basic.revision; 731 ret = vmx_on(); 732 report("test vmxon with valid vmxon region", !ret); 733 734 out: 735 return ret; 736 } 737 738 static void test_vmptrld(void) 739 { 740 struct vmcs *vmcs, *tmp_root; 741 int width = cpuid_maxphyaddr(); 742 743 vmcs = alloc_page(); 744 vmcs->revision_id = basic.revision; 745 746 /* Unaligned page access */ 747 tmp_root = (struct vmcs *)((intptr_t)vmcs + 1); 748 report("test vmptrld with unaligned vmcs", 749 make_vmcs_current(tmp_root) == 1); 750 751 /* gpa bits beyond physical address width are set*/ 752 tmp_root = (struct vmcs *)((intptr_t)vmcs | 753 ((u64)1 << (width+1))); 754 report("test vmptrld with vmcs address bits set beyond physical address width", 755 make_vmcs_current(tmp_root) == 1); 756 757 /* Pass VMXON region */ 758 tmp_root = (struct vmcs *)vmxon_region; 759 report("test vmptrld with vmxon region", 760 make_vmcs_current(tmp_root) == 1); 761 762 report("test vmptrld with valid vmcs region", make_vmcs_current(vmcs) == 0); 763 } 764 765 static void test_vmptrst(void) 766 { 767 int ret; 768 struct vmcs *vmcs1, *vmcs2; 769 770 vmcs1 = alloc_page(); 771 memset(vmcs1, 0, PAGE_SIZE); 772 init_vmcs(&vmcs1); 773 ret = vmcs_save(&vmcs2); 774 report("test vmptrst", (!ret) && (vmcs1 == vmcs2)); 775 } 776 777 struct vmx_ctl_msr { 778 const char *name; 779 u32 index, true_index; 780 u32 default1; 781 } vmx_ctl_msr[] = { 782 { "MSR_IA32_VMX_PINBASED_CTLS", MSR_IA32_VMX_PINBASED_CTLS, 783 MSR_IA32_VMX_TRUE_PIN, 0x16 }, 784 { "MSR_IA32_VMX_PROCBASED_CTLS", MSR_IA32_VMX_PROCBASED_CTLS, 785 MSR_IA32_VMX_TRUE_PROC, 0x401e172 }, 786 { "MSR_IA32_VMX_PROCBASED_CTLS2", MSR_IA32_VMX_PROCBASED_CTLS2, 787 MSR_IA32_VMX_PROCBASED_CTLS2, 0 }, 788 { "MSR_IA32_VMX_EXIT_CTLS", MSR_IA32_VMX_EXIT_CTLS, 789 MSR_IA32_VMX_TRUE_EXIT, 0x36dff }, 790 { "MSR_IA32_VMX_ENTRY_CTLS", MSR_IA32_VMX_ENTRY_CTLS, 791 MSR_IA32_VMX_TRUE_ENTRY, 0x11ff }, 792 }; 793 794 static void test_vmx_caps(void) 795 { 796 u64 val, default1, fixed0, fixed1; 797 union vmx_ctrl_msr ctrl, true_ctrl; 798 unsigned int n; 799 bool ok; 800 801 printf("\nTest suite: VMX capability reporting\n"); 802 803 report("MSR_IA32_VMX_BASIC", 804 (basic.revision & (1ul << 31)) == 0 && 805 basic.size > 0 && basic.size <= 4096 && 806 (basic.type == 0 || basic.type == 6) && 807 basic.reserved1 == 0 && basic.reserved2 == 0); 808 809 val = rdmsr(MSR_IA32_VMX_MISC); 810 report("MSR_IA32_VMX_MISC", 811 (!(ctrl_cpu_rev[1].clr & CPU_URG) || val & (1ul << 5)) && 812 ((val >> 16) & 0x1ff) <= 256 && 813 (val & 0xc0007e00) == 0); 814 815 for (n = 0; n < ARRAY_SIZE(vmx_ctl_msr); n++) { 816 ctrl.val = rdmsr(vmx_ctl_msr[n].index); 817 default1 = vmx_ctl_msr[n].default1; 818 ok = (ctrl.set & default1) == default1; 819 ok = ok && (ctrl.set & ~ctrl.clr) == 0; 820 if (ok && basic.ctrl) { 821 true_ctrl.val = rdmsr(vmx_ctl_msr[n].true_index); 822 ok = ctrl.clr == true_ctrl.clr; 823 ok = ok && ctrl.set == (true_ctrl.set | default1); 824 } 825 report(vmx_ctl_msr[n].name, ok); 826 } 827 828 fixed0 = rdmsr(MSR_IA32_VMX_CR0_FIXED0); 829 fixed1 = rdmsr(MSR_IA32_VMX_CR0_FIXED1); 830 report("MSR_IA32_VMX_IA32_VMX_CR0_FIXED0/1", 831 ((fixed0 ^ fixed1) & ~fixed1) == 0); 832 833 fixed0 = rdmsr(MSR_IA32_VMX_CR4_FIXED0); 834 fixed1 = rdmsr(MSR_IA32_VMX_CR4_FIXED1); 835 report("MSR_IA32_VMX_IA32_VMX_CR4_FIXED0/1", 836 ((fixed0 ^ fixed1) & ~fixed1) == 0); 837 838 val = rdmsr(MSR_IA32_VMX_VMCS_ENUM); 839 report("MSR_IA32_VMX_VMCS_ENUM", 840 (val & 0x3e) >= 0x2a && 841 (val & 0xfffffffffffffc01Ull) == 0); 842 843 val = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP); 844 report("MSR_IA32_VMX_EPT_VPID_CAP", 845 (val & 0xfffff07ef9eebebeUll) == 0); 846 } 847 848 /* This function can only be called in guest */ 849 static void __attribute__((__used__)) hypercall(u32 hypercall_no) 850 { 851 u64 val = 0; 852 val = (hypercall_no & HYPERCALL_MASK) | HYPERCALL_BIT; 853 hypercall_field = val; 854 asm volatile("vmcall\n\t"); 855 } 856 857 static bool is_hypercall() 858 { 859 ulong reason, hyper_bit; 860 861 reason = vmcs_read(EXI_REASON) & 0xff; 862 hyper_bit = hypercall_field & HYPERCALL_BIT; 863 if (reason == VMX_VMCALL && hyper_bit) 864 return true; 865 return false; 866 } 867 868 static int handle_hypercall() 869 { 870 ulong hypercall_no; 871 872 hypercall_no = hypercall_field & HYPERCALL_MASK; 873 hypercall_field = 0; 874 switch (hypercall_no) { 875 case HYPERCALL_VMEXIT: 876 return VMX_TEST_VMEXIT; 877 default: 878 printf("ERROR : Invalid hypercall number : %d\n", hypercall_no); 879 } 880 return VMX_TEST_EXIT; 881 } 882 883 static int exit_handler() 884 { 885 int ret; 886 887 current->exits++; 888 regs.rflags = vmcs_read(GUEST_RFLAGS); 889 if (is_hypercall()) 890 ret = handle_hypercall(); 891 else 892 ret = current->exit_handler(); 893 vmcs_write(GUEST_RFLAGS, regs.rflags); 894 switch (ret) { 895 case VMX_TEST_VMEXIT: 896 case VMX_TEST_RESUME: 897 return ret; 898 case VMX_TEST_EXIT: 899 break; 900 default: 901 printf("ERROR : Invalid exit_handler return val %d.\n" 902 , ret); 903 } 904 print_vmexit_info(); 905 abort(); 906 return 0; 907 } 908 909 static int vmx_run() 910 { 911 u32 ret = 0, fail = 0; 912 913 while (1) { 914 asm volatile ( 915 "mov %%rsp, %%rsi\n\t" 916 "mov %2, %%rdi\n\t" 917 "vmwrite %%rsi, %%rdi\n\t" 918 919 LOAD_GPR_C 920 "cmpl $0, %1\n\t" 921 "jne 1f\n\t" 922 LOAD_RFLAGS 923 "vmlaunch\n\t" 924 "jmp 2f\n\t" 925 "1: " 926 "vmresume\n\t" 927 "2: " 928 "setbe %0\n\t" 929 "vmx_return:\n\t" 930 SAVE_GPR_C 931 SAVE_RFLAGS 932 : "=m"(fail) 933 : "m"(launched), "i"(HOST_RSP) 934 : "rdi", "rsi", "memory", "cc" 935 936 ); 937 if (fail) 938 ret = launched ? VMX_TEST_RESUME_ERR : 939 VMX_TEST_LAUNCH_ERR; 940 else { 941 launched = 1; 942 ret = exit_handler(); 943 } 944 if (ret != VMX_TEST_RESUME) 945 break; 946 } 947 launched = 0; 948 switch (ret) { 949 case VMX_TEST_VMEXIT: 950 return 0; 951 case VMX_TEST_LAUNCH_ERR: 952 printf("%s : vmlaunch failed.\n", __func__); 953 if ((!(host_rflags & X86_EFLAGS_CF) && !(host_rflags & X86_EFLAGS_ZF)) 954 || ((host_rflags & X86_EFLAGS_CF) && (host_rflags & X86_EFLAGS_ZF))) 955 printf("\tvmlaunch set wrong flags\n"); 956 report("test vmlaunch", 0); 957 break; 958 case VMX_TEST_RESUME_ERR: 959 printf("%s : vmresume failed.\n", __func__); 960 if ((!(host_rflags & X86_EFLAGS_CF) && !(host_rflags & X86_EFLAGS_ZF)) 961 || ((host_rflags & X86_EFLAGS_CF) && (host_rflags & X86_EFLAGS_ZF))) 962 printf("\tvmresume set wrong flags\n"); 963 report("test vmresume", 0); 964 break; 965 default: 966 printf("%s : unhandled ret from exit_handler, ret=%d.\n", __func__, ret); 967 break; 968 } 969 return 1; 970 } 971 972 static int test_run(struct vmx_test *test) 973 { 974 if (test->name == NULL) 975 test->name = "(no name)"; 976 if (vmx_on()) { 977 printf("%s : vmxon failed.\n", __func__); 978 return 1; 979 } 980 init_vmcs(&(test->vmcs)); 981 /* Directly call test->init is ok here, init_vmcs has done 982 vmcs init, vmclear and vmptrld*/ 983 if (test->init && test->init(test->vmcs) != VMX_TEST_START) 984 goto out; 985 test->exits = 0; 986 current = test; 987 regs = test->guest_regs; 988 vmcs_write(GUEST_RFLAGS, regs.rflags | 0x2); 989 launched = 0; 990 printf("\nTest suite: %s\n", test->name); 991 vmx_run(); 992 out: 993 if (vmx_off()) { 994 printf("%s : vmxoff failed.\n", __func__); 995 return 1; 996 } 997 return 0; 998 } 999 1000 extern struct vmx_test vmx_tests[]; 1001 1002 int main(void) 1003 { 1004 int i = 0; 1005 1006 setup_vm(); 1007 setup_idt(); 1008 hypercall_field = 0; 1009 1010 if (!(cpuid(1).c & (1 << 5))) { 1011 printf("WARNING: vmx not supported, add '-cpu host'\n"); 1012 goto exit; 1013 } 1014 init_vmx(); 1015 if (test_vmx_feature_control() != 0) 1016 goto exit; 1017 /* Set basic test ctxt the same as "null" */ 1018 current = &vmx_tests[0]; 1019 if (test_vmxon() != 0) 1020 goto exit; 1021 test_vmptrld(); 1022 test_vmclear(); 1023 test_vmptrst(); 1024 init_vmcs(&vmcs_root); 1025 if (vmx_run()) { 1026 report("test vmlaunch", 0); 1027 goto exit; 1028 } 1029 test_vmxoff(); 1030 test_vmx_caps(); 1031 1032 while (vmx_tests[++i].name != NULL) 1033 if (test_run(&vmx_tests[i])) 1034 goto exit; 1035 1036 exit: 1037 return report_summary(); 1038 } 1039