1 /* 2 * All test cases of nested virtualization should be in this file 3 * 4 * Author : Arthur Chunqi Li <yzt356@gmail.com> 5 */ 6 7 #include <asm/debugreg.h> 8 9 #include "vmx.h" 10 #include "msr.h" 11 #include "processor.h" 12 #include "pmu.h" 13 #include "vm.h" 14 #include "pci.h" 15 #include "fwcfg.h" 16 #include "isr.h" 17 #include "desc.h" 18 #include "apic.h" 19 #include "vmalloc.h" 20 #include "alloc_page.h" 21 #include "smp.h" 22 #include "delay.h" 23 #include "access.h" 24 #include "x86/usermode.h" 25 26 /* 27 * vmcs.GUEST_PENDING_DEBUG has the same format as DR6, although some bits that 28 * are legal in DR6 are reserved in vmcs.GUEST_PENDING_DEBUG. And if any data 29 * or I/O breakpoint matches *and* was enabled, bit 12 is also set. 30 */ 31 #define PENDING_DBG_TRAP BIT(12) 32 33 #define VPID_CAP_INVVPID_TYPES_SHIFT 40 34 35 u64 ia32_pat; 36 u64 ia32_efer; 37 void *io_bitmap_a, *io_bitmap_b; 38 u16 ioport; 39 40 unsigned long *pml4; 41 u64 eptp; 42 void *data_page1, *data_page2; 43 44 phys_addr_t pci_physaddr; 45 46 void *pml_log; 47 #define PML_INDEX 512 48 49 static inline unsigned ffs(unsigned x) 50 { 51 int pos = -1; 52 53 __asm__ __volatile__("bsf %1, %%eax; cmovnz %%eax, %0" 54 : "+r"(pos) : "rm"(x) : "eax"); 55 return pos + 1; 56 } 57 58 static inline void vmcall(void) 59 { 60 asm volatile("vmcall"); 61 } 62 63 static u32 *get_vapic_page(void) 64 { 65 return (u32 *)phys_to_virt(vmcs_read(APIC_VIRT_ADDR)); 66 } 67 68 static void basic_guest_main(void) 69 { 70 report_pass("Basic VMX test"); 71 } 72 73 static int basic_exit_handler(union exit_reason exit_reason) 74 { 75 report_fail("Basic VMX test"); 76 print_vmexit_info(exit_reason); 77 return VMX_TEST_EXIT; 78 } 79 80 static void vmenter_main(void) 81 { 82 u64 rax; 83 u64 rsp, resume_rsp; 84 85 report_pass("test vmlaunch"); 86 87 asm volatile( 88 "mov %%rsp, %0\n\t" 89 "mov %3, %%rax\n\t" 90 "vmcall\n\t" 91 "mov %%rax, %1\n\t" 92 "mov %%rsp, %2\n\t" 93 : "=r"(rsp), "=r"(rax), "=r"(resume_rsp) 94 : "g"(0xABCD)); 95 report((rax == 0xFFFF) && (rsp == resume_rsp), "test vmresume"); 96 } 97 98 static int vmenter_exit_handler(union exit_reason exit_reason) 99 { 100 u64 guest_rip = vmcs_read(GUEST_RIP); 101 102 switch (exit_reason.basic) { 103 case VMX_VMCALL: 104 if (regs.rax != 0xABCD) { 105 report_fail("test vmresume"); 106 return VMX_TEST_VMEXIT; 107 } 108 regs.rax = 0xFFFF; 109 vmcs_write(GUEST_RIP, guest_rip + 3); 110 return VMX_TEST_RESUME; 111 default: 112 report_fail("test vmresume"); 113 print_vmexit_info(exit_reason); 114 } 115 return VMX_TEST_VMEXIT; 116 } 117 118 u32 preempt_scale; 119 volatile unsigned long long tsc_val; 120 volatile u32 preempt_val; 121 u64 saved_rip; 122 123 static int preemption_timer_init(struct vmcs *vmcs) 124 { 125 if (!(ctrl_pin_rev.clr & PIN_PREEMPT)) { 126 printf("\tPreemption timer is not supported\n"); 127 return VMX_TEST_EXIT; 128 } 129 vmcs_write(PIN_CONTROLS, vmcs_read(PIN_CONTROLS) | PIN_PREEMPT); 130 preempt_val = 10000000; 131 vmcs_write(PREEMPT_TIMER_VALUE, preempt_val); 132 preempt_scale = rdmsr(MSR_IA32_VMX_MISC) & 0x1F; 133 134 if (!(ctrl_exit_rev.clr & EXI_SAVE_PREEMPT)) 135 printf("\tSave preemption value is not supported\n"); 136 137 return VMX_TEST_START; 138 } 139 140 static void preemption_timer_main(void) 141 { 142 tsc_val = rdtsc(); 143 if (ctrl_exit_rev.clr & EXI_SAVE_PREEMPT) { 144 vmx_set_test_stage(0); 145 vmcall(); 146 if (vmx_get_test_stage() == 1) 147 vmcall(); 148 } 149 vmx_set_test_stage(1); 150 while (vmx_get_test_stage() == 1) { 151 if (((rdtsc() - tsc_val) >> preempt_scale) 152 > 10 * preempt_val) { 153 vmx_set_test_stage(2); 154 vmcall(); 155 } 156 } 157 tsc_val = rdtsc(); 158 asm volatile ("hlt"); 159 vmcall(); 160 vmx_set_test_stage(5); 161 vmcall(); 162 } 163 164 static int preemption_timer_exit_handler(union exit_reason exit_reason) 165 { 166 bool guest_halted; 167 u64 guest_rip; 168 u32 insn_len; 169 u32 ctrl_exit; 170 171 guest_rip = vmcs_read(GUEST_RIP); 172 insn_len = vmcs_read(EXI_INST_LEN); 173 switch (exit_reason.basic) { 174 case VMX_PREEMPT: 175 switch (vmx_get_test_stage()) { 176 case 1: 177 case 2: 178 report(((rdtsc() - tsc_val) >> preempt_scale) >= preempt_val, 179 "busy-wait for preemption timer"); 180 vmx_set_test_stage(3); 181 vmcs_write(PREEMPT_TIMER_VALUE, preempt_val); 182 return VMX_TEST_RESUME; 183 case 3: 184 guest_halted = 185 (vmcs_read(GUEST_ACTV_STATE) == ACTV_HLT); 186 report(((rdtsc() - tsc_val) >> preempt_scale) >= preempt_val 187 && guest_halted, 188 "preemption timer during hlt"); 189 vmx_set_test_stage(4); 190 vmcs_write(PIN_CONTROLS, 191 vmcs_read(PIN_CONTROLS) & ~PIN_PREEMPT); 192 vmcs_write(EXI_CONTROLS, 193 vmcs_read(EXI_CONTROLS) & ~EXI_SAVE_PREEMPT); 194 vmcs_write(GUEST_ACTV_STATE, ACTV_ACTIVE); 195 return VMX_TEST_RESUME; 196 case 4: 197 report(saved_rip == guest_rip, 198 "preemption timer with 0 value"); 199 break; 200 default: 201 report_fail("Invalid stage."); 202 print_vmexit_info(exit_reason); 203 break; 204 } 205 break; 206 case VMX_VMCALL: 207 vmcs_write(GUEST_RIP, guest_rip + insn_len); 208 switch (vmx_get_test_stage()) { 209 case 0: 210 report(vmcs_read(PREEMPT_TIMER_VALUE) == preempt_val, 211 "Keep preemption value"); 212 vmx_set_test_stage(1); 213 vmcs_write(PREEMPT_TIMER_VALUE, preempt_val); 214 ctrl_exit = (vmcs_read(EXI_CONTROLS) | 215 EXI_SAVE_PREEMPT) & ctrl_exit_rev.clr; 216 vmcs_write(EXI_CONTROLS, ctrl_exit); 217 return VMX_TEST_RESUME; 218 case 1: 219 report(vmcs_read(PREEMPT_TIMER_VALUE) < preempt_val, 220 "Save preemption value"); 221 return VMX_TEST_RESUME; 222 case 2: 223 report_fail("busy-wait for preemption timer"); 224 vmx_set_test_stage(3); 225 vmcs_write(PREEMPT_TIMER_VALUE, preempt_val); 226 return VMX_TEST_RESUME; 227 case 3: 228 report_fail("preemption timer during hlt"); 229 vmx_set_test_stage(4); 230 /* fall through */ 231 case 4: 232 vmcs_write(PIN_CONTROLS, 233 vmcs_read(PIN_CONTROLS) | PIN_PREEMPT); 234 vmcs_write(PREEMPT_TIMER_VALUE, 0); 235 saved_rip = guest_rip + insn_len; 236 return VMX_TEST_RESUME; 237 case 5: 238 report_fail("preemption timer with 0 value (vmcall stage 5)"); 239 break; 240 default: 241 // Should not reach here 242 report_fail("unexpected stage, %d", 243 vmx_get_test_stage()); 244 print_vmexit_info(exit_reason); 245 return VMX_TEST_VMEXIT; 246 } 247 break; 248 default: 249 report_fail("Unknown exit reason, 0x%x", exit_reason.full); 250 print_vmexit_info(exit_reason); 251 } 252 vmcs_write(PIN_CONTROLS, vmcs_read(PIN_CONTROLS) & ~PIN_PREEMPT); 253 return VMX_TEST_VMEXIT; 254 } 255 256 static void msr_bmp_init(void) 257 { 258 void *msr_bitmap; 259 u32 ctrl_cpu0; 260 261 msr_bitmap = alloc_page(); 262 ctrl_cpu0 = vmcs_read(CPU_EXEC_CTRL0); 263 ctrl_cpu0 |= CPU_MSR_BITMAP; 264 vmcs_write(CPU_EXEC_CTRL0, ctrl_cpu0); 265 vmcs_write(MSR_BITMAP, (u64)msr_bitmap); 266 } 267 268 static void *get_msr_bitmap(void) 269 { 270 void *msr_bitmap; 271 272 if (vmcs_read(CPU_EXEC_CTRL0) & CPU_MSR_BITMAP) { 273 msr_bitmap = (void *)vmcs_read(MSR_BITMAP); 274 } else { 275 msr_bitmap = alloc_page(); 276 memset(msr_bitmap, 0xff, PAGE_SIZE); 277 vmcs_write(MSR_BITMAP, (u64)msr_bitmap); 278 vmcs_set_bits(CPU_EXEC_CTRL0, CPU_MSR_BITMAP); 279 } 280 281 return msr_bitmap; 282 } 283 284 static void disable_intercept_for_x2apic_msrs(void) 285 { 286 unsigned long *msr_bitmap = (unsigned long *)get_msr_bitmap(); 287 u32 msr; 288 289 for (msr = APIC_BASE_MSR; 290 msr < (APIC_BASE_MSR+0xff); 291 msr += BITS_PER_LONG) { 292 unsigned int word = msr / BITS_PER_LONG; 293 294 msr_bitmap[word] = 0; 295 msr_bitmap[word + (0x800 / sizeof(long))] = 0; 296 } 297 } 298 299 static int test_ctrl_pat_init(struct vmcs *vmcs) 300 { 301 u64 ctrl_ent; 302 u64 ctrl_exi; 303 304 msr_bmp_init(); 305 if (!(ctrl_exit_rev.clr & EXI_SAVE_PAT) && 306 !(ctrl_exit_rev.clr & EXI_LOAD_PAT) && 307 !(ctrl_enter_rev.clr & ENT_LOAD_PAT)) { 308 printf("\tSave/load PAT is not supported\n"); 309 return 1; 310 } 311 312 ctrl_ent = vmcs_read(ENT_CONTROLS); 313 ctrl_exi = vmcs_read(EXI_CONTROLS); 314 ctrl_ent |= ctrl_enter_rev.clr & ENT_LOAD_PAT; 315 ctrl_exi |= ctrl_exit_rev.clr & (EXI_SAVE_PAT | EXI_LOAD_PAT); 316 vmcs_write(ENT_CONTROLS, ctrl_ent); 317 vmcs_write(EXI_CONTROLS, ctrl_exi); 318 ia32_pat = rdmsr(MSR_IA32_CR_PAT); 319 vmcs_write(GUEST_PAT, 0x0); 320 vmcs_write(HOST_PAT, ia32_pat); 321 return VMX_TEST_START; 322 } 323 324 static void test_ctrl_pat_main(void) 325 { 326 u64 guest_ia32_pat; 327 328 guest_ia32_pat = rdmsr(MSR_IA32_CR_PAT); 329 if (!(ctrl_enter_rev.clr & ENT_LOAD_PAT)) 330 printf("\tENT_LOAD_PAT is not supported.\n"); 331 else { 332 if (guest_ia32_pat != 0) { 333 report_fail("Entry load PAT"); 334 return; 335 } 336 } 337 wrmsr(MSR_IA32_CR_PAT, 0x6); 338 vmcall(); 339 guest_ia32_pat = rdmsr(MSR_IA32_CR_PAT); 340 if (ctrl_enter_rev.clr & ENT_LOAD_PAT) 341 report(guest_ia32_pat == ia32_pat, "Entry load PAT"); 342 } 343 344 static int test_ctrl_pat_exit_handler(union exit_reason exit_reason) 345 { 346 u64 guest_rip; 347 u64 guest_pat; 348 349 guest_rip = vmcs_read(GUEST_RIP); 350 switch (exit_reason.basic) { 351 case VMX_VMCALL: 352 guest_pat = vmcs_read(GUEST_PAT); 353 if (!(ctrl_exit_rev.clr & EXI_SAVE_PAT)) { 354 printf("\tEXI_SAVE_PAT is not supported\n"); 355 vmcs_write(GUEST_PAT, 0x6); 356 } else { 357 report(guest_pat == 0x6, "Exit save PAT"); 358 } 359 if (!(ctrl_exit_rev.clr & EXI_LOAD_PAT)) 360 printf("\tEXI_LOAD_PAT is not supported\n"); 361 else 362 report(rdmsr(MSR_IA32_CR_PAT) == ia32_pat, 363 "Exit load PAT"); 364 vmcs_write(GUEST_PAT, ia32_pat); 365 vmcs_write(GUEST_RIP, guest_rip + 3); 366 return VMX_TEST_RESUME; 367 default: 368 printf("ERROR : Unknown exit reason, 0x%x.\n", exit_reason.full); 369 break; 370 } 371 return VMX_TEST_VMEXIT; 372 } 373 374 static int test_ctrl_efer_init(struct vmcs *vmcs) 375 { 376 u64 ctrl_ent; 377 u64 ctrl_exi; 378 379 msr_bmp_init(); 380 ctrl_ent = vmcs_read(ENT_CONTROLS) | ENT_LOAD_EFER; 381 ctrl_exi = vmcs_read(EXI_CONTROLS) | EXI_SAVE_EFER | EXI_LOAD_EFER; 382 vmcs_write(ENT_CONTROLS, ctrl_ent & ctrl_enter_rev.clr); 383 vmcs_write(EXI_CONTROLS, ctrl_exi & ctrl_exit_rev.clr); 384 ia32_efer = rdmsr(MSR_EFER); 385 vmcs_write(GUEST_EFER, ia32_efer ^ EFER_NX); 386 vmcs_write(HOST_EFER, ia32_efer ^ EFER_NX); 387 return VMX_TEST_START; 388 } 389 390 static void test_ctrl_efer_main(void) 391 { 392 u64 guest_ia32_efer; 393 394 guest_ia32_efer = rdmsr(MSR_EFER); 395 if (!(ctrl_enter_rev.clr & ENT_LOAD_EFER)) 396 printf("\tENT_LOAD_EFER is not supported.\n"); 397 else { 398 if (guest_ia32_efer != (ia32_efer ^ EFER_NX)) { 399 report_fail("Entry load EFER"); 400 return; 401 } 402 } 403 wrmsr(MSR_EFER, ia32_efer); 404 vmcall(); 405 guest_ia32_efer = rdmsr(MSR_EFER); 406 if (ctrl_enter_rev.clr & ENT_LOAD_EFER) 407 report(guest_ia32_efer == ia32_efer, "Entry load EFER"); 408 } 409 410 static int test_ctrl_efer_exit_handler(union exit_reason exit_reason) 411 { 412 u64 guest_rip; 413 u64 guest_efer; 414 415 guest_rip = vmcs_read(GUEST_RIP); 416 switch (exit_reason.basic) { 417 case VMX_VMCALL: 418 guest_efer = vmcs_read(GUEST_EFER); 419 if (!(ctrl_exit_rev.clr & EXI_SAVE_EFER)) { 420 printf("\tEXI_SAVE_EFER is not supported\n"); 421 vmcs_write(GUEST_EFER, ia32_efer); 422 } else { 423 report(guest_efer == ia32_efer, "Exit save EFER"); 424 } 425 if (!(ctrl_exit_rev.clr & EXI_LOAD_EFER)) { 426 printf("\tEXI_LOAD_EFER is not supported\n"); 427 wrmsr(MSR_EFER, ia32_efer ^ EFER_NX); 428 } else { 429 report(rdmsr(MSR_EFER) == (ia32_efer ^ EFER_NX), 430 "Exit load EFER"); 431 } 432 vmcs_write(GUEST_PAT, ia32_efer); 433 vmcs_write(GUEST_RIP, guest_rip + 3); 434 return VMX_TEST_RESUME; 435 default: 436 printf("ERROR : Unknown exit reason, 0x%x.\n", exit_reason.full); 437 break; 438 } 439 return VMX_TEST_VMEXIT; 440 } 441 442 u32 guest_cr0, guest_cr4; 443 444 static void cr_shadowing_main(void) 445 { 446 u32 cr0, cr4, tmp; 447 448 // Test read through 449 vmx_set_test_stage(0); 450 guest_cr0 = read_cr0(); 451 if (vmx_get_test_stage() == 1) 452 report_fail("Read through CR0"); 453 else 454 vmcall(); 455 vmx_set_test_stage(1); 456 guest_cr4 = read_cr4(); 457 if (vmx_get_test_stage() == 2) 458 report_fail("Read through CR4"); 459 else 460 vmcall(); 461 // Test write through 462 guest_cr0 = guest_cr0 ^ (X86_CR0_TS | X86_CR0_MP); 463 guest_cr4 = guest_cr4 ^ (X86_CR4_TSD | X86_CR4_DE); 464 vmx_set_test_stage(2); 465 write_cr0(guest_cr0); 466 if (vmx_get_test_stage() == 3) 467 report_fail("Write through CR0"); 468 else 469 vmcall(); 470 vmx_set_test_stage(3); 471 write_cr4(guest_cr4); 472 if (vmx_get_test_stage() == 4) 473 report_fail("Write through CR4"); 474 else 475 vmcall(); 476 // Test read shadow 477 vmx_set_test_stage(4); 478 vmcall(); 479 cr0 = read_cr0(); 480 if (vmx_get_test_stage() != 5) 481 report(cr0 == guest_cr0, "Read shadowing CR0"); 482 vmx_set_test_stage(5); 483 cr4 = read_cr4(); 484 if (vmx_get_test_stage() != 6) 485 report(cr4 == guest_cr4, "Read shadowing CR4"); 486 // Test write shadow (same value with shadow) 487 vmx_set_test_stage(6); 488 write_cr0(guest_cr0); 489 if (vmx_get_test_stage() == 7) 490 report_fail("Write shadowing CR0 (same value with shadow)"); 491 else 492 vmcall(); 493 vmx_set_test_stage(7); 494 write_cr4(guest_cr4); 495 if (vmx_get_test_stage() == 8) 496 report_fail("Write shadowing CR4 (same value with shadow)"); 497 else 498 vmcall(); 499 // Test write shadow (different value) 500 vmx_set_test_stage(8); 501 tmp = guest_cr0 ^ X86_CR0_TS; 502 asm volatile("mov %0, %%rsi\n\t" 503 "mov %%rsi, %%cr0\n\t" 504 ::"m"(tmp) 505 :"rsi", "memory", "cc"); 506 report(vmx_get_test_stage() == 9, 507 "Write shadowing different X86_CR0_TS"); 508 vmx_set_test_stage(9); 509 tmp = guest_cr0 ^ X86_CR0_MP; 510 asm volatile("mov %0, %%rsi\n\t" 511 "mov %%rsi, %%cr0\n\t" 512 ::"m"(tmp) 513 :"rsi", "memory", "cc"); 514 report(vmx_get_test_stage() == 10, 515 "Write shadowing different X86_CR0_MP"); 516 vmx_set_test_stage(10); 517 tmp = guest_cr4 ^ X86_CR4_TSD; 518 asm volatile("mov %0, %%rsi\n\t" 519 "mov %%rsi, %%cr4\n\t" 520 ::"m"(tmp) 521 :"rsi", "memory", "cc"); 522 report(vmx_get_test_stage() == 11, 523 "Write shadowing different X86_CR4_TSD"); 524 vmx_set_test_stage(11); 525 tmp = guest_cr4 ^ X86_CR4_DE; 526 asm volatile("mov %0, %%rsi\n\t" 527 "mov %%rsi, %%cr4\n\t" 528 ::"m"(tmp) 529 :"rsi", "memory", "cc"); 530 report(vmx_get_test_stage() == 12, 531 "Write shadowing different X86_CR4_DE"); 532 } 533 534 static int cr_shadowing_exit_handler(union exit_reason exit_reason) 535 { 536 u64 guest_rip; 537 u32 insn_len; 538 u32 exit_qual; 539 540 guest_rip = vmcs_read(GUEST_RIP); 541 insn_len = vmcs_read(EXI_INST_LEN); 542 exit_qual = vmcs_read(EXI_QUALIFICATION); 543 switch (exit_reason.basic) { 544 case VMX_VMCALL: 545 switch (vmx_get_test_stage()) { 546 case 0: 547 report(guest_cr0 == vmcs_read(GUEST_CR0), 548 "Read through CR0"); 549 break; 550 case 1: 551 report(guest_cr4 == vmcs_read(GUEST_CR4), 552 "Read through CR4"); 553 break; 554 case 2: 555 report(guest_cr0 == vmcs_read(GUEST_CR0), 556 "Write through CR0"); 557 break; 558 case 3: 559 report(guest_cr4 == vmcs_read(GUEST_CR4), 560 "Write through CR4"); 561 break; 562 case 4: 563 guest_cr0 = vmcs_read(GUEST_CR0) ^ (X86_CR0_TS | X86_CR0_MP); 564 guest_cr4 = vmcs_read(GUEST_CR4) ^ (X86_CR4_TSD | X86_CR4_DE); 565 vmcs_write(CR0_MASK, X86_CR0_TS | X86_CR0_MP); 566 vmcs_write(CR0_READ_SHADOW, guest_cr0 & (X86_CR0_TS | X86_CR0_MP)); 567 vmcs_write(CR4_MASK, X86_CR4_TSD | X86_CR4_DE); 568 vmcs_write(CR4_READ_SHADOW, guest_cr4 & (X86_CR4_TSD | X86_CR4_DE)); 569 break; 570 case 6: 571 report(guest_cr0 == (vmcs_read(GUEST_CR0) ^ (X86_CR0_TS | X86_CR0_MP)), 572 "Write shadowing CR0 (same value)"); 573 break; 574 case 7: 575 report(guest_cr4 == (vmcs_read(GUEST_CR4) ^ (X86_CR4_TSD | X86_CR4_DE)), 576 "Write shadowing CR4 (same value)"); 577 break; 578 default: 579 // Should not reach here 580 report_fail("unexpected stage, %d", 581 vmx_get_test_stage()); 582 print_vmexit_info(exit_reason); 583 return VMX_TEST_VMEXIT; 584 } 585 vmcs_write(GUEST_RIP, guest_rip + insn_len); 586 return VMX_TEST_RESUME; 587 case VMX_CR: 588 switch (vmx_get_test_stage()) { 589 case 4: 590 report_fail("Read shadowing CR0"); 591 vmx_inc_test_stage(); 592 break; 593 case 5: 594 report_fail("Read shadowing CR4"); 595 vmx_inc_test_stage(); 596 break; 597 case 6: 598 report_fail("Write shadowing CR0 (same value)"); 599 vmx_inc_test_stage(); 600 break; 601 case 7: 602 report_fail("Write shadowing CR4 (same value)"); 603 vmx_inc_test_stage(); 604 break; 605 case 8: 606 case 9: 607 // 0x600 encodes "mov %esi, %cr0" 608 if (exit_qual == 0x600) 609 vmx_inc_test_stage(); 610 break; 611 case 10: 612 case 11: 613 // 0x604 encodes "mov %esi, %cr4" 614 if (exit_qual == 0x604) 615 vmx_inc_test_stage(); 616 break; 617 default: 618 // Should not reach here 619 report_fail("unexpected stage, %d", 620 vmx_get_test_stage()); 621 print_vmexit_info(exit_reason); 622 return VMX_TEST_VMEXIT; 623 } 624 vmcs_write(GUEST_RIP, guest_rip + insn_len); 625 return VMX_TEST_RESUME; 626 default: 627 report_fail("Unknown exit reason, 0x%x", exit_reason.full); 628 print_vmexit_info(exit_reason); 629 } 630 return VMX_TEST_VMEXIT; 631 } 632 633 static int iobmp_init(struct vmcs *vmcs) 634 { 635 u32 ctrl_cpu0; 636 637 io_bitmap_a = alloc_page(); 638 io_bitmap_b = alloc_page(); 639 ctrl_cpu0 = vmcs_read(CPU_EXEC_CTRL0); 640 ctrl_cpu0 |= CPU_IO_BITMAP; 641 ctrl_cpu0 &= (~CPU_IO); 642 vmcs_write(CPU_EXEC_CTRL0, ctrl_cpu0); 643 vmcs_write(IO_BITMAP_A, (u64)io_bitmap_a); 644 vmcs_write(IO_BITMAP_B, (u64)io_bitmap_b); 645 return VMX_TEST_START; 646 } 647 648 static void iobmp_main(void) 649 { 650 // stage 0, test IO pass 651 vmx_set_test_stage(0); 652 inb(0x5000); 653 outb(0x0, 0x5000); 654 report(vmx_get_test_stage() == 0, "I/O bitmap - I/O pass"); 655 // test IO width, in/out 656 ((u8 *)io_bitmap_a)[0] = 0xFF; 657 vmx_set_test_stage(2); 658 inb(0x0); 659 report(vmx_get_test_stage() == 3, "I/O bitmap - trap in"); 660 vmx_set_test_stage(3); 661 outw(0x0, 0x0); 662 report(vmx_get_test_stage() == 4, "I/O bitmap - trap out"); 663 vmx_set_test_stage(4); 664 inl(0x0); 665 report(vmx_get_test_stage() == 5, "I/O bitmap - I/O width, long"); 666 // test low/high IO port 667 vmx_set_test_stage(5); 668 ((u8 *)io_bitmap_a)[0x5000 / 8] = (1 << (0x5000 % 8)); 669 inb(0x5000); 670 report(vmx_get_test_stage() == 6, "I/O bitmap - I/O port, low part"); 671 vmx_set_test_stage(6); 672 ((u8 *)io_bitmap_b)[0x1000 / 8] = (1 << (0x1000 % 8)); 673 inb(0x9000); 674 report(vmx_get_test_stage() == 7, "I/O bitmap - I/O port, high part"); 675 // test partial pass 676 vmx_set_test_stage(7); 677 inl(0x4FFF); 678 report(vmx_get_test_stage() == 8, "I/O bitmap - partial pass"); 679 // test overrun 680 vmx_set_test_stage(8); 681 memset(io_bitmap_a, 0x0, PAGE_SIZE); 682 memset(io_bitmap_b, 0x0, PAGE_SIZE); 683 inl(0xFFFF); 684 report(vmx_get_test_stage() == 9, "I/O bitmap - overrun"); 685 vmx_set_test_stage(9); 686 vmcall(); 687 outb(0x0, 0x0); 688 report(vmx_get_test_stage() == 9, 689 "I/O bitmap - ignore unconditional exiting"); 690 vmx_set_test_stage(10); 691 vmcall(); 692 outb(0x0, 0x0); 693 report(vmx_get_test_stage() == 11, 694 "I/O bitmap - unconditional exiting"); 695 } 696 697 static int iobmp_exit_handler(union exit_reason exit_reason) 698 { 699 u64 guest_rip; 700 ulong exit_qual; 701 u32 insn_len, ctrl_cpu0; 702 703 guest_rip = vmcs_read(GUEST_RIP); 704 exit_qual = vmcs_read(EXI_QUALIFICATION); 705 insn_len = vmcs_read(EXI_INST_LEN); 706 switch (exit_reason.basic) { 707 case VMX_IO: 708 switch (vmx_get_test_stage()) { 709 case 0: 710 case 1: 711 vmx_inc_test_stage(); 712 break; 713 case 2: 714 report((exit_qual & VMX_IO_SIZE_MASK) == _VMX_IO_BYTE, 715 "I/O bitmap - I/O width, byte"); 716 report(exit_qual & VMX_IO_IN, 717 "I/O bitmap - I/O direction, in"); 718 vmx_inc_test_stage(); 719 break; 720 case 3: 721 report((exit_qual & VMX_IO_SIZE_MASK) == _VMX_IO_WORD, 722 "I/O bitmap - I/O width, word"); 723 report(!(exit_qual & VMX_IO_IN), 724 "I/O bitmap - I/O direction, out"); 725 vmx_inc_test_stage(); 726 break; 727 case 4: 728 report((exit_qual & VMX_IO_SIZE_MASK) == _VMX_IO_LONG, 729 "I/O bitmap - I/O width, long"); 730 vmx_inc_test_stage(); 731 break; 732 case 5: 733 if (((exit_qual & VMX_IO_PORT_MASK) >> VMX_IO_PORT_SHIFT) == 0x5000) 734 vmx_inc_test_stage(); 735 break; 736 case 6: 737 if (((exit_qual & VMX_IO_PORT_MASK) >> VMX_IO_PORT_SHIFT) == 0x9000) 738 vmx_inc_test_stage(); 739 break; 740 case 7: 741 if (((exit_qual & VMX_IO_PORT_MASK) >> VMX_IO_PORT_SHIFT) == 0x4FFF) 742 vmx_inc_test_stage(); 743 break; 744 case 8: 745 if (((exit_qual & VMX_IO_PORT_MASK) >> VMX_IO_PORT_SHIFT) == 0xFFFF) 746 vmx_inc_test_stage(); 747 break; 748 case 9: 749 case 10: 750 ctrl_cpu0 = vmcs_read(CPU_EXEC_CTRL0); 751 vmcs_write(CPU_EXEC_CTRL0, ctrl_cpu0 & ~CPU_IO); 752 vmx_inc_test_stage(); 753 break; 754 default: 755 // Should not reach here 756 report_fail("unexpected stage, %d", 757 vmx_get_test_stage()); 758 print_vmexit_info(exit_reason); 759 return VMX_TEST_VMEXIT; 760 } 761 vmcs_write(GUEST_RIP, guest_rip + insn_len); 762 return VMX_TEST_RESUME; 763 case VMX_VMCALL: 764 switch (vmx_get_test_stage()) { 765 case 9: 766 ctrl_cpu0 = vmcs_read(CPU_EXEC_CTRL0); 767 ctrl_cpu0 |= CPU_IO | CPU_IO_BITMAP; 768 vmcs_write(CPU_EXEC_CTRL0, ctrl_cpu0); 769 break; 770 case 10: 771 ctrl_cpu0 = vmcs_read(CPU_EXEC_CTRL0); 772 ctrl_cpu0 = (ctrl_cpu0 & ~CPU_IO_BITMAP) | CPU_IO; 773 vmcs_write(CPU_EXEC_CTRL0, ctrl_cpu0); 774 break; 775 default: 776 // Should not reach here 777 report_fail("unexpected stage, %d", 778 vmx_get_test_stage()); 779 print_vmexit_info(exit_reason); 780 return VMX_TEST_VMEXIT; 781 } 782 vmcs_write(GUEST_RIP, guest_rip + insn_len); 783 return VMX_TEST_RESUME; 784 default: 785 printf("guest_rip = %#lx\n", guest_rip); 786 printf("\tERROR : Unknown exit reason, 0x%x\n", exit_reason.full); 787 break; 788 } 789 return VMX_TEST_VMEXIT; 790 } 791 792 #define INSN_CPU0 0 793 #define INSN_CPU1 1 794 #define INSN_ALWAYS_TRAP 2 795 796 #define FIELD_EXIT_QUAL (1 << 0) 797 #define FIELD_INSN_INFO (1 << 1) 798 799 asm( 800 "insn_hlt: hlt;ret\n\t" 801 "insn_invlpg: invlpg 0x12345678;ret\n\t" 802 "insn_mwait: xor %eax, %eax; xor %ecx, %ecx; mwait;ret\n\t" 803 "insn_rdpmc: xor %ecx, %ecx; rdpmc;ret\n\t" 804 "insn_rdtsc: rdtsc;ret\n\t" 805 "insn_cr3_load: mov cr3,%rax; mov %rax,%cr3;ret\n\t" 806 "insn_cr3_store: mov %cr3,%rax;ret\n\t" 807 "insn_cr8_load: xor %eax, %eax; mov %rax,%cr8;ret\n\t" 808 "insn_cr8_store: mov %cr8,%rax;ret\n\t" 809 "insn_monitor: xor %eax, %eax; xor %ecx, %ecx; xor %edx, %edx; monitor;ret\n\t" 810 "insn_pause: pause;ret\n\t" 811 "insn_wbinvd: wbinvd;ret\n\t" 812 "insn_cpuid: mov $10, %eax; cpuid;ret\n\t" 813 "insn_invd: invd;ret\n\t" 814 "insn_sgdt: sgdt gdt_descr;ret\n\t" 815 "insn_lgdt: lgdt gdt_descr;ret\n\t" 816 "insn_sidt: sidt idt_descr;ret\n\t" 817 "insn_lidt: lidt idt_descr;ret\n\t" 818 "insn_sldt: sldt %ax;ret\n\t" 819 "insn_lldt: xor %eax, %eax; lldt %ax;ret\n\t" 820 "insn_str: str %ax;ret\n\t" 821 "insn_rdrand: rdrand %rax;ret\n\t" 822 "insn_rdseed: rdseed %rax;ret\n\t" 823 ); 824 extern void insn_hlt(void); 825 extern void insn_invlpg(void); 826 extern void insn_mwait(void); 827 extern void insn_rdpmc(void); 828 extern void insn_rdtsc(void); 829 extern void insn_cr3_load(void); 830 extern void insn_cr3_store(void); 831 extern void insn_cr8_load(void); 832 extern void insn_cr8_store(void); 833 extern void insn_monitor(void); 834 extern void insn_pause(void); 835 extern void insn_wbinvd(void); 836 extern void insn_sgdt(void); 837 extern void insn_lgdt(void); 838 extern void insn_sidt(void); 839 extern void insn_lidt(void); 840 extern void insn_sldt(void); 841 extern void insn_lldt(void); 842 extern void insn_str(void); 843 extern void insn_cpuid(void); 844 extern void insn_invd(void); 845 extern void insn_rdrand(void); 846 extern void insn_rdseed(void); 847 848 u32 cur_insn; 849 u64 cr3; 850 851 typedef bool (*supported_fn)(void); 852 853 static bool this_cpu_has_mwait(void) 854 { 855 return this_cpu_has(X86_FEATURE_MWAIT); 856 } 857 858 struct insn_table { 859 const char *name; 860 u32 flag; 861 void (*insn_func)(void); 862 u32 type; 863 u32 reason; 864 ulong exit_qual; 865 u32 insn_info; 866 // Use FIELD_EXIT_QUAL and FIELD_INSN_INFO to define 867 // which field need to be tested, reason is always tested 868 u32 test_field; 869 const supported_fn supported_fn; 870 u8 disabled; 871 }; 872 873 /* 874 * Add more test cases of instruction intercept here. Elements in this 875 * table is: 876 * name/control flag/insn function/type/exit reason/exit qulification/ 877 * instruction info/field to test 878 * The last field defines which fields (exit_qual and insn_info) need to be 879 * tested in exit handler. If set to 0, only "reason" is checked. 880 */ 881 static struct insn_table insn_table[] = { 882 // Flags for Primary Processor-Based VM-Execution Controls 883 {"HLT", CPU_HLT, insn_hlt, INSN_CPU0, 12, 0, 0, 0}, 884 {"INVLPG", CPU_INVLPG, insn_invlpg, INSN_CPU0, 14, 885 0x12345678, 0, FIELD_EXIT_QUAL}, 886 {"MWAIT", CPU_MWAIT, insn_mwait, INSN_CPU0, 36, 0, 0, 0, this_cpu_has_mwait}, 887 {"RDPMC", CPU_RDPMC, insn_rdpmc, INSN_CPU0, 15, 0, 0, 0, this_cpu_has_pmu}, 888 {"RDTSC", CPU_RDTSC, insn_rdtsc, INSN_CPU0, 16, 0, 0, 0}, 889 {"CR3 load", CPU_CR3_LOAD, insn_cr3_load, INSN_CPU0, 28, 0x3, 0, 890 FIELD_EXIT_QUAL}, 891 {"CR3 store", CPU_CR3_STORE, insn_cr3_store, INSN_CPU0, 28, 0x13, 0, 892 FIELD_EXIT_QUAL}, 893 {"CR8 load", CPU_CR8_LOAD, insn_cr8_load, INSN_CPU0, 28, 0x8, 0, 894 FIELD_EXIT_QUAL}, 895 {"CR8 store", CPU_CR8_STORE, insn_cr8_store, INSN_CPU0, 28, 0x18, 0, 896 FIELD_EXIT_QUAL}, 897 {"MONITOR", CPU_MONITOR, insn_monitor, INSN_CPU0, 39, 0, 0, 0, this_cpu_has_mwait}, 898 {"PAUSE", CPU_PAUSE, insn_pause, INSN_CPU0, 40, 0, 0, 0}, 899 // Flags for Secondary Processor-Based VM-Execution Controls 900 {"WBINVD", CPU_WBINVD, insn_wbinvd, INSN_CPU1, 54, 0, 0, 0}, 901 {"DESC_TABLE (SGDT)", CPU_DESC_TABLE, insn_sgdt, INSN_CPU1, 46, 0, 0, 0}, 902 {"DESC_TABLE (LGDT)", CPU_DESC_TABLE, insn_lgdt, INSN_CPU1, 46, 0, 0, 0}, 903 {"DESC_TABLE (SIDT)", CPU_DESC_TABLE, insn_sidt, INSN_CPU1, 46, 0, 0, 0}, 904 {"DESC_TABLE (LIDT)", CPU_DESC_TABLE, insn_lidt, INSN_CPU1, 46, 0, 0, 0}, 905 {"DESC_TABLE (SLDT)", CPU_DESC_TABLE, insn_sldt, INSN_CPU1, 47, 0, 0, 0}, 906 {"DESC_TABLE (LLDT)", CPU_DESC_TABLE, insn_lldt, INSN_CPU1, 47, 0, 0, 0}, 907 {"DESC_TABLE (STR)", CPU_DESC_TABLE, insn_str, INSN_CPU1, 47, 0, 0, 0}, 908 /* LTR causes a #GP if done with a busy selector, so it is not tested. */ 909 {"RDRAND", CPU_RDRAND, insn_rdrand, INSN_CPU1, VMX_RDRAND, 0, 0, 0}, 910 {"RDSEED", CPU_RDSEED, insn_rdseed, INSN_CPU1, VMX_RDSEED, 0, 0, 0}, 911 // Instructions always trap 912 {"CPUID", 0, insn_cpuid, INSN_ALWAYS_TRAP, 10, 0, 0, 0}, 913 {"INVD", 0, insn_invd, INSN_ALWAYS_TRAP, 13, 0, 0, 0}, 914 // Instructions never trap 915 {NULL}, 916 }; 917 918 static int insn_intercept_init(struct vmcs *vmcs) 919 { 920 u32 ctrl_cpu, cur_insn; 921 922 ctrl_cpu = ctrl_cpu_rev[0].set | CPU_SECONDARY; 923 ctrl_cpu &= ctrl_cpu_rev[0].clr; 924 vmcs_write(CPU_EXEC_CTRL0, ctrl_cpu); 925 vmcs_write(CPU_EXEC_CTRL1, ctrl_cpu_rev[1].set); 926 cr3 = read_cr3(); 927 928 for (cur_insn = 0; insn_table[cur_insn].name != NULL; cur_insn++) { 929 if (insn_table[cur_insn].supported_fn == NULL) 930 continue; 931 insn_table[cur_insn].disabled = !insn_table[cur_insn].supported_fn(); 932 } 933 return VMX_TEST_START; 934 } 935 936 static void insn_intercept_main(void) 937 { 938 for (cur_insn = 0; insn_table[cur_insn].name != NULL; cur_insn++) { 939 vmx_set_test_stage(cur_insn * 2); 940 if ((insn_table[cur_insn].type == INSN_CPU0 && 941 !(ctrl_cpu_rev[0].clr & insn_table[cur_insn].flag)) || 942 (insn_table[cur_insn].type == INSN_CPU1 && 943 !(ctrl_cpu_rev[1].clr & insn_table[cur_insn].flag))) { 944 printf("\tCPU_CTRL%d.CPU_%s is not supported.\n", 945 insn_table[cur_insn].type - INSN_CPU0, 946 insn_table[cur_insn].name); 947 continue; 948 } 949 950 if (insn_table[cur_insn].disabled) { 951 printf("\tFeature required for %s is not supported.\n", 952 insn_table[cur_insn].name); 953 continue; 954 } 955 956 if ((insn_table[cur_insn].type == INSN_CPU0 && 957 !(ctrl_cpu_rev[0].set & insn_table[cur_insn].flag)) || 958 (insn_table[cur_insn].type == INSN_CPU1 && 959 !(ctrl_cpu_rev[1].set & insn_table[cur_insn].flag))) { 960 /* skip hlt, it stalls the guest and is tested below */ 961 if (insn_table[cur_insn].insn_func != insn_hlt) 962 insn_table[cur_insn].insn_func(); 963 report(vmx_get_test_stage() == cur_insn * 2, 964 "execute %s", 965 insn_table[cur_insn].name); 966 } else if (insn_table[cur_insn].type != INSN_ALWAYS_TRAP) 967 printf("\tCPU_CTRL%d.CPU_%s always traps.\n", 968 insn_table[cur_insn].type - INSN_CPU0, 969 insn_table[cur_insn].name); 970 971 vmcall(); 972 973 insn_table[cur_insn].insn_func(); 974 report(vmx_get_test_stage() == cur_insn * 2 + 1, 975 "intercept %s", 976 insn_table[cur_insn].name); 977 978 vmx_set_test_stage(cur_insn * 2 + 1); 979 vmcall(); 980 } 981 } 982 983 static int insn_intercept_exit_handler(union exit_reason exit_reason) 984 { 985 u64 guest_rip; 986 ulong exit_qual; 987 u32 insn_len; 988 u32 insn_info; 989 bool pass; 990 991 guest_rip = vmcs_read(GUEST_RIP); 992 exit_qual = vmcs_read(EXI_QUALIFICATION); 993 insn_len = vmcs_read(EXI_INST_LEN); 994 insn_info = vmcs_read(EXI_INST_INFO); 995 996 if (exit_reason.basic == VMX_VMCALL) { 997 u32 val = 0; 998 999 if (insn_table[cur_insn].type == INSN_CPU0) 1000 val = vmcs_read(CPU_EXEC_CTRL0); 1001 else if (insn_table[cur_insn].type == INSN_CPU1) 1002 val = vmcs_read(CPU_EXEC_CTRL1); 1003 1004 if (vmx_get_test_stage() & 1) 1005 val &= ~insn_table[cur_insn].flag; 1006 else 1007 val |= insn_table[cur_insn].flag; 1008 1009 if (insn_table[cur_insn].type == INSN_CPU0) 1010 vmcs_write(CPU_EXEC_CTRL0, val | ctrl_cpu_rev[0].set); 1011 else if (insn_table[cur_insn].type == INSN_CPU1) 1012 vmcs_write(CPU_EXEC_CTRL1, val | ctrl_cpu_rev[1].set); 1013 } else { 1014 pass = (cur_insn * 2 == vmx_get_test_stage()) && 1015 insn_table[cur_insn].reason == exit_reason.full; 1016 if (insn_table[cur_insn].test_field & FIELD_EXIT_QUAL && 1017 insn_table[cur_insn].exit_qual != exit_qual) 1018 pass = false; 1019 if (insn_table[cur_insn].test_field & FIELD_INSN_INFO && 1020 insn_table[cur_insn].insn_info != insn_info) 1021 pass = false; 1022 if (pass) 1023 vmx_inc_test_stage(); 1024 } 1025 vmcs_write(GUEST_RIP, guest_rip + insn_len); 1026 return VMX_TEST_RESUME; 1027 } 1028 1029 /** 1030 * __setup_ept - Setup the VMCS fields to enable Extended Page Tables (EPT) 1031 * @hpa: Host physical address of the top-level, a.k.a. root, EPT table 1032 * @enable_ad: Whether or not to enable Access/Dirty bits for EPT entries 1033 * 1034 * Returns 0 on success, 1 on failure. 1035 * 1036 * Note that @hpa doesn't need to point at actual memory if VM-Launch is 1037 * expected to fail, e.g. setup_dummy_ept() arbitrarily passes '0' to satisfy 1038 * the various EPTP consistency checks, but doesn't ensure backing for HPA '0'. 1039 */ 1040 static int __setup_ept(u64 hpa, bool enable_ad) 1041 { 1042 if (!(ctrl_cpu_rev[0].clr & CPU_SECONDARY) || 1043 !(ctrl_cpu_rev[1].clr & CPU_EPT)) { 1044 printf("\tEPT is not supported\n"); 1045 return 1; 1046 } 1047 if (!is_ept_memtype_supported(EPT_MEM_TYPE_WB)) { 1048 printf("\tWB memtype for EPT walks not supported\n"); 1049 return 1; 1050 } 1051 1052 if (!is_4_level_ept_supported()) { 1053 /* Support for 4-level EPT is mandatory. */ 1054 report(false, "4-level EPT support check"); 1055 printf("\tPWL4 is not supported\n"); 1056 return 1; 1057 } 1058 1059 eptp = EPT_MEM_TYPE_WB; 1060 eptp |= (3 << EPTP_PG_WALK_LEN_SHIFT); 1061 eptp |= hpa; 1062 if (enable_ad) 1063 eptp |= EPTP_AD_FLAG; 1064 1065 vmcs_write(EPTP, eptp); 1066 vmcs_write(CPU_EXEC_CTRL0, vmcs_read(CPU_EXEC_CTRL0)| CPU_SECONDARY); 1067 vmcs_write(CPU_EXEC_CTRL1, vmcs_read(CPU_EXEC_CTRL1)| CPU_EPT); 1068 1069 return 0; 1070 } 1071 1072 /** 1073 * setup_ept - Enable Extended Page Tables (EPT) and setup an identity map 1074 * @enable_ad: Whether or not to enable Access/Dirty bits for EPT entries 1075 * 1076 * Returns 0 on success, 1 on failure. 1077 * 1078 * This is the "real" function for setting up EPT tables, i.e. use this for 1079 * tests that need to run code in the guest with EPT enabled. 1080 */ 1081 static int setup_ept(bool enable_ad) 1082 { 1083 unsigned long end_of_memory; 1084 1085 pml4 = alloc_page(); 1086 1087 if (__setup_ept(virt_to_phys(pml4), enable_ad)) 1088 return 1; 1089 1090 end_of_memory = fwcfg_get_u64(FW_CFG_RAM_SIZE); 1091 if (end_of_memory < (1ul << 32)) 1092 end_of_memory = (1ul << 32); 1093 /* Cannot use large EPT pages if we need to track EPT 1094 * accessed/dirty bits at 4K granularity. 1095 */ 1096 setup_ept_range(pml4, 0, end_of_memory, 0, 1097 !enable_ad && ept_2m_supported(), 1098 EPT_WA | EPT_RA | EPT_EA); 1099 return 0; 1100 } 1101 1102 /** 1103 * setup_dummy_ept - Enable Extended Page Tables (EPT) with a dummy root HPA 1104 * 1105 * Setup EPT using a semi-arbitrary dummy root HPA. This function is intended 1106 * for use by tests that need EPT enabled to verify dependent VMCS controls 1107 * but never expect to fully enter the guest, i.e. don't need setup the actual 1108 * EPT tables. 1109 */ 1110 static void setup_dummy_ept(void) 1111 { 1112 if (__setup_ept(0, false)) 1113 report_abort("EPT setup unexpectedly failed"); 1114 } 1115 1116 static int enable_unrestricted_guest(bool need_valid_ept) 1117 { 1118 if (!(ctrl_cpu_rev[0].clr & CPU_SECONDARY) || 1119 !(ctrl_cpu_rev[1].clr & CPU_URG) || 1120 !(ctrl_cpu_rev[1].clr & CPU_EPT)) 1121 return 1; 1122 1123 if (need_valid_ept) 1124 setup_ept(false); 1125 else 1126 setup_dummy_ept(); 1127 1128 vmcs_write(CPU_EXEC_CTRL0, vmcs_read(CPU_EXEC_CTRL0) | CPU_SECONDARY); 1129 vmcs_write(CPU_EXEC_CTRL1, vmcs_read(CPU_EXEC_CTRL1) | CPU_URG); 1130 1131 return 0; 1132 } 1133 1134 static void ept_enable_ad_bits(void) 1135 { 1136 eptp |= EPTP_AD_FLAG; 1137 vmcs_write(EPTP, eptp); 1138 } 1139 1140 static void ept_disable_ad_bits(void) 1141 { 1142 eptp &= ~EPTP_AD_FLAG; 1143 vmcs_write(EPTP, eptp); 1144 } 1145 1146 static int ept_ad_enabled(void) 1147 { 1148 return eptp & EPTP_AD_FLAG; 1149 } 1150 1151 static void ept_enable_ad_bits_or_skip_test(void) 1152 { 1153 if (!ept_ad_bits_supported()) 1154 test_skip("EPT AD bits not supported."); 1155 ept_enable_ad_bits(); 1156 } 1157 1158 static int apic_version; 1159 1160 static int ept_init_common(bool have_ad) 1161 { 1162 int ret; 1163 struct pci_dev pcidev; 1164 1165 /* INVEPT is required by the EPT violation handler. */ 1166 if (!is_invept_type_supported(INVEPT_SINGLE)) 1167 return VMX_TEST_EXIT; 1168 1169 if (setup_ept(have_ad)) 1170 return VMX_TEST_EXIT; 1171 1172 data_page1 = alloc_page(); 1173 data_page2 = alloc_page(); 1174 *((u32 *)data_page1) = MAGIC_VAL_1; 1175 *((u32 *)data_page2) = MAGIC_VAL_2; 1176 install_ept(pml4, (unsigned long)data_page1, (unsigned long)data_page2, 1177 EPT_RA | EPT_WA | EPT_EA); 1178 1179 apic_version = apic_read(APIC_LVR); 1180 1181 ret = pci_find_dev(PCI_VENDOR_ID_REDHAT, PCI_DEVICE_ID_REDHAT_TEST); 1182 if (ret != PCIDEVADDR_INVALID) { 1183 pci_dev_init(&pcidev, ret); 1184 pci_physaddr = pcidev.resource[PCI_TESTDEV_BAR_MEM]; 1185 } 1186 1187 return VMX_TEST_START; 1188 } 1189 1190 static int ept_init(struct vmcs *vmcs) 1191 { 1192 return ept_init_common(false); 1193 } 1194 1195 static void ept_common(void) 1196 { 1197 vmx_set_test_stage(0); 1198 if (*((u32 *)data_page2) != MAGIC_VAL_1 || 1199 *((u32 *)data_page1) != MAGIC_VAL_1) 1200 report_fail("EPT basic framework - read"); 1201 else { 1202 *((u32 *)data_page2) = MAGIC_VAL_3; 1203 vmcall(); 1204 if (vmx_get_test_stage() == 1) { 1205 if (*((u32 *)data_page1) == MAGIC_VAL_3 && 1206 *((u32 *)data_page2) == MAGIC_VAL_2) 1207 report_pass("EPT basic framework"); 1208 else 1209 report_pass("EPT basic framework - remap"); 1210 } 1211 } 1212 // Test EPT Misconfigurations 1213 vmx_set_test_stage(1); 1214 vmcall(); 1215 *((u32 *)data_page1) = MAGIC_VAL_1; 1216 if (vmx_get_test_stage() != 2) { 1217 report_fail("EPT misconfigurations"); 1218 goto t1; 1219 } 1220 vmx_set_test_stage(2); 1221 vmcall(); 1222 *((u32 *)data_page1) = MAGIC_VAL_1; 1223 report(vmx_get_test_stage() == 3, "EPT misconfigurations"); 1224 t1: 1225 // Test EPT violation 1226 vmx_set_test_stage(3); 1227 vmcall(); 1228 *((u32 *)data_page1) = MAGIC_VAL_1; 1229 report(vmx_get_test_stage() == 4, "EPT violation - page permission"); 1230 // Violation caused by EPT paging structure 1231 vmx_set_test_stage(4); 1232 vmcall(); 1233 *((u32 *)data_page1) = MAGIC_VAL_2; 1234 report(vmx_get_test_stage() == 5, "EPT violation - paging structure"); 1235 1236 // MMIO Read/Write 1237 vmx_set_test_stage(5); 1238 vmcall(); 1239 1240 *(u32 volatile *)pci_physaddr; 1241 report(vmx_get_test_stage() == 6, "MMIO EPT violation - read"); 1242 1243 *(u32 volatile *)pci_physaddr = MAGIC_VAL_1; 1244 report(vmx_get_test_stage() == 7, "MMIO EPT violation - write"); 1245 } 1246 1247 static void ept_main(void) 1248 { 1249 ept_common(); 1250 1251 // Test EPT access to L1 MMIO 1252 vmx_set_test_stage(7); 1253 report(*((u32 *)0xfee00030UL) == apic_version, "EPT - MMIO access"); 1254 1255 // Test invalid operand for INVEPT 1256 vmcall(); 1257 report(vmx_get_test_stage() == 8, "EPT - unsupported INVEPT"); 1258 } 1259 1260 static bool invept_test(int type, u64 eptp) 1261 { 1262 bool ret, supported; 1263 1264 supported = ept_vpid.val & (EPT_CAP_INVEPT_SINGLE >> INVEPT_SINGLE << type); 1265 ret = __invept(type, eptp); 1266 1267 if (ret == !supported) 1268 return false; 1269 1270 if (!supported) 1271 printf("WARNING: unsupported invept passed!\n"); 1272 else 1273 printf("WARNING: invept failed!\n"); 1274 1275 return true; 1276 } 1277 1278 static int pml_exit_handler(union exit_reason exit_reason) 1279 { 1280 u16 index, count; 1281 u64 *pmlbuf = pml_log; 1282 u64 guest_rip = vmcs_read(GUEST_RIP);; 1283 u64 guest_cr3 = vmcs_read(GUEST_CR3); 1284 u32 insn_len = vmcs_read(EXI_INST_LEN); 1285 1286 switch (exit_reason.basic) { 1287 case VMX_VMCALL: 1288 switch (vmx_get_test_stage()) { 1289 case 0: 1290 index = vmcs_read(GUEST_PML_INDEX); 1291 for (count = index + 1; count < PML_INDEX; count++) { 1292 if (pmlbuf[count] == (u64)data_page2) { 1293 vmx_inc_test_stage(); 1294 clear_ept_ad(pml4, guest_cr3, (unsigned long)data_page2); 1295 break; 1296 } 1297 } 1298 break; 1299 case 1: 1300 index = vmcs_read(GUEST_PML_INDEX); 1301 /* Keep clearing the dirty bit till a overflow */ 1302 clear_ept_ad(pml4, guest_cr3, (unsigned long)data_page2); 1303 break; 1304 default: 1305 report_fail("unexpected stage, %d.", 1306 vmx_get_test_stage()); 1307 print_vmexit_info(exit_reason); 1308 return VMX_TEST_VMEXIT; 1309 } 1310 vmcs_write(GUEST_RIP, guest_rip + insn_len); 1311 return VMX_TEST_RESUME; 1312 case VMX_PML_FULL: 1313 vmx_inc_test_stage(); 1314 vmcs_write(GUEST_PML_INDEX, PML_INDEX - 1); 1315 return VMX_TEST_RESUME; 1316 default: 1317 report_fail("Unknown exit reason, 0x%x", exit_reason.full); 1318 print_vmexit_info(exit_reason); 1319 } 1320 return VMX_TEST_VMEXIT; 1321 } 1322 1323 static int ept_exit_handler_common(union exit_reason exit_reason, bool have_ad) 1324 { 1325 u64 guest_rip; 1326 u64 guest_cr3; 1327 u32 insn_len; 1328 u32 exit_qual; 1329 static unsigned long data_page1_pte, data_page1_pte_pte, memaddr_pte, 1330 guest_pte_addr; 1331 1332 guest_rip = vmcs_read(GUEST_RIP); 1333 guest_cr3 = vmcs_read(GUEST_CR3); 1334 insn_len = vmcs_read(EXI_INST_LEN); 1335 exit_qual = vmcs_read(EXI_QUALIFICATION); 1336 pteval_t *ptep; 1337 switch (exit_reason.basic) { 1338 case VMX_VMCALL: 1339 switch (vmx_get_test_stage()) { 1340 case 0: 1341 check_ept_ad(pml4, guest_cr3, 1342 (unsigned long)data_page1, 1343 have_ad ? EPT_ACCESS_FLAG : 0, 1344 have_ad ? EPT_ACCESS_FLAG | EPT_DIRTY_FLAG : 0); 1345 check_ept_ad(pml4, guest_cr3, 1346 (unsigned long)data_page2, 1347 have_ad ? EPT_ACCESS_FLAG | EPT_DIRTY_FLAG : 0, 1348 have_ad ? EPT_ACCESS_FLAG | EPT_DIRTY_FLAG : 0); 1349 clear_ept_ad(pml4, guest_cr3, (unsigned long)data_page1); 1350 clear_ept_ad(pml4, guest_cr3, (unsigned long)data_page2); 1351 if (have_ad) 1352 invept(INVEPT_SINGLE, eptp); 1353 if (*((u32 *)data_page1) == MAGIC_VAL_3 && 1354 *((u32 *)data_page2) == MAGIC_VAL_2) { 1355 vmx_inc_test_stage(); 1356 install_ept(pml4, (unsigned long)data_page2, 1357 (unsigned long)data_page2, 1358 EPT_RA | EPT_WA | EPT_EA); 1359 } else 1360 report_fail("EPT basic framework - write"); 1361 break; 1362 case 1: 1363 install_ept(pml4, (unsigned long)data_page1, 1364 (unsigned long)data_page1, EPT_WA); 1365 invept(INVEPT_SINGLE, eptp); 1366 break; 1367 case 2: 1368 install_ept(pml4, (unsigned long)data_page1, 1369 (unsigned long)data_page1, 1370 EPT_RA | EPT_WA | EPT_EA | 1371 (2 << EPT_MEM_TYPE_SHIFT)); 1372 invept(INVEPT_SINGLE, eptp); 1373 break; 1374 case 3: 1375 clear_ept_ad(pml4, guest_cr3, (unsigned long)data_page1); 1376 TEST_ASSERT(get_ept_pte(pml4, (unsigned long)data_page1, 1377 1, &data_page1_pte)); 1378 set_ept_pte(pml4, (unsigned long)data_page1, 1379 1, data_page1_pte & ~EPT_PRESENT); 1380 invept(INVEPT_SINGLE, eptp); 1381 break; 1382 case 4: 1383 ptep = get_pte_level((pgd_t *)guest_cr3, data_page1, /*level=*/2); 1384 guest_pte_addr = virt_to_phys(ptep) & PAGE_MASK; 1385 1386 TEST_ASSERT(get_ept_pte(pml4, guest_pte_addr, 2, &data_page1_pte_pte)); 1387 set_ept_pte(pml4, guest_pte_addr, 2, 1388 data_page1_pte_pte & ~EPT_PRESENT); 1389 invept(INVEPT_SINGLE, eptp); 1390 break; 1391 case 5: 1392 install_ept(pml4, (unsigned long)pci_physaddr, 1393 (unsigned long)pci_physaddr, 0); 1394 invept(INVEPT_SINGLE, eptp); 1395 break; 1396 case 7: 1397 if (!invept_test(0, eptp)) 1398 vmx_inc_test_stage(); 1399 break; 1400 // Should not reach here 1401 default: 1402 report_fail("ERROR - unexpected stage, %d.", 1403 vmx_get_test_stage()); 1404 print_vmexit_info(exit_reason); 1405 return VMX_TEST_VMEXIT; 1406 } 1407 vmcs_write(GUEST_RIP, guest_rip + insn_len); 1408 return VMX_TEST_RESUME; 1409 case VMX_EPT_MISCONFIG: 1410 switch (vmx_get_test_stage()) { 1411 case 1: 1412 case 2: 1413 vmx_inc_test_stage(); 1414 install_ept(pml4, (unsigned long)data_page1, 1415 (unsigned long)data_page1, 1416 EPT_RA | EPT_WA | EPT_EA); 1417 invept(INVEPT_SINGLE, eptp); 1418 break; 1419 // Should not reach here 1420 default: 1421 report_fail("ERROR - unexpected stage, %d.", 1422 vmx_get_test_stage()); 1423 print_vmexit_info(exit_reason); 1424 return VMX_TEST_VMEXIT; 1425 } 1426 return VMX_TEST_RESUME; 1427 case VMX_EPT_VIOLATION: 1428 /* 1429 * Exit-qualifications are masked not to account for advanced 1430 * VM-exit information. Once KVM supports this feature, this 1431 * masking should be removed. 1432 */ 1433 exit_qual &= ~EPT_VLT_GUEST_MASK; 1434 1435 switch(vmx_get_test_stage()) { 1436 case 3: 1437 check_ept_ad(pml4, guest_cr3, (unsigned long)data_page1, 0, 1438 have_ad ? EPT_ACCESS_FLAG | EPT_DIRTY_FLAG : 0); 1439 clear_ept_ad(pml4, guest_cr3, (unsigned long)data_page1); 1440 if (exit_qual == (EPT_VLT_WR | EPT_VLT_LADDR_VLD | 1441 EPT_VLT_PADDR)) 1442 vmx_inc_test_stage(); 1443 set_ept_pte(pml4, (unsigned long)data_page1, 1444 1, data_page1_pte | (EPT_PRESENT)); 1445 invept(INVEPT_SINGLE, eptp); 1446 break; 1447 case 4: 1448 check_ept_ad(pml4, guest_cr3, (unsigned long)data_page1, 0, 1449 have_ad ? EPT_ACCESS_FLAG | EPT_DIRTY_FLAG : 0); 1450 clear_ept_ad(pml4, guest_cr3, (unsigned long)data_page1); 1451 if (exit_qual == (EPT_VLT_RD | 1452 (have_ad ? EPT_VLT_WR : 0) | 1453 EPT_VLT_LADDR_VLD)) 1454 vmx_inc_test_stage(); 1455 set_ept_pte(pml4, guest_pte_addr, 2, 1456 data_page1_pte_pte | (EPT_PRESENT)); 1457 invept(INVEPT_SINGLE, eptp); 1458 break; 1459 case 5: 1460 if (exit_qual & EPT_VLT_RD) 1461 vmx_inc_test_stage(); 1462 TEST_ASSERT(get_ept_pte(pml4, (unsigned long)pci_physaddr, 1463 1, &memaddr_pte)); 1464 set_ept_pte(pml4, memaddr_pte, 1, memaddr_pte | EPT_RA); 1465 invept(INVEPT_SINGLE, eptp); 1466 break; 1467 case 6: 1468 if (exit_qual & EPT_VLT_WR) 1469 vmx_inc_test_stage(); 1470 TEST_ASSERT(get_ept_pte(pml4, (unsigned long)pci_physaddr, 1471 1, &memaddr_pte)); 1472 set_ept_pte(pml4, memaddr_pte, 1, memaddr_pte | EPT_RA | EPT_WA); 1473 invept(INVEPT_SINGLE, eptp); 1474 break; 1475 default: 1476 // Should not reach here 1477 report_fail("ERROR : unexpected stage, %d", 1478 vmx_get_test_stage()); 1479 print_vmexit_info(exit_reason); 1480 return VMX_TEST_VMEXIT; 1481 } 1482 return VMX_TEST_RESUME; 1483 default: 1484 report_fail("Unknown exit reason, 0x%x", exit_reason.full); 1485 print_vmexit_info(exit_reason); 1486 } 1487 return VMX_TEST_VMEXIT; 1488 } 1489 1490 static int ept_exit_handler(union exit_reason exit_reason) 1491 { 1492 return ept_exit_handler_common(exit_reason, false); 1493 } 1494 1495 static int eptad_init(struct vmcs *vmcs) 1496 { 1497 int r = ept_init_common(true); 1498 1499 if (r == VMX_TEST_EXIT) 1500 return r; 1501 1502 if (!ept_ad_bits_supported()) { 1503 printf("\tEPT A/D bits are not supported"); 1504 return VMX_TEST_EXIT; 1505 } 1506 1507 return r; 1508 } 1509 1510 static int pml_init(struct vmcs *vmcs) 1511 { 1512 u32 ctrl_cpu; 1513 int r = eptad_init(vmcs); 1514 1515 if (r == VMX_TEST_EXIT) 1516 return r; 1517 1518 if (!(ctrl_cpu_rev[0].clr & CPU_SECONDARY) || 1519 !(ctrl_cpu_rev[1].clr & CPU_PML)) { 1520 printf("\tPML is not supported"); 1521 return VMX_TEST_EXIT; 1522 } 1523 1524 pml_log = alloc_page(); 1525 vmcs_write(PMLADDR, (u64)pml_log); 1526 vmcs_write(GUEST_PML_INDEX, PML_INDEX - 1); 1527 1528 ctrl_cpu = vmcs_read(CPU_EXEC_CTRL1) | CPU_PML; 1529 vmcs_write(CPU_EXEC_CTRL1, ctrl_cpu); 1530 1531 return VMX_TEST_START; 1532 } 1533 1534 static void pml_main(void) 1535 { 1536 int count = 0; 1537 1538 vmx_set_test_stage(0); 1539 *((u32 *)data_page2) = 0x1; 1540 vmcall(); 1541 report(vmx_get_test_stage() == 1, "PML - Dirty GPA Logging"); 1542 1543 while (vmx_get_test_stage() == 1) { 1544 vmcall(); 1545 *((u32 *)data_page2) = 0x1; 1546 if (count++ > PML_INDEX) 1547 break; 1548 } 1549 report(vmx_get_test_stage() == 2, "PML Full Event"); 1550 } 1551 1552 static void eptad_main(void) 1553 { 1554 ept_common(); 1555 } 1556 1557 static int eptad_exit_handler(union exit_reason exit_reason) 1558 { 1559 return ept_exit_handler_common(exit_reason, true); 1560 } 1561 1562 #define TIMER_VECTOR 222 1563 1564 static volatile bool timer_fired; 1565 1566 static void timer_isr(isr_regs_t *regs) 1567 { 1568 timer_fired = true; 1569 apic_write(APIC_EOI, 0); 1570 } 1571 1572 static int interrupt_init(struct vmcs *vmcs) 1573 { 1574 msr_bmp_init(); 1575 vmcs_write(PIN_CONTROLS, vmcs_read(PIN_CONTROLS) & ~PIN_EXTINT); 1576 handle_irq(TIMER_VECTOR, timer_isr); 1577 return VMX_TEST_START; 1578 } 1579 1580 static void interrupt_main(void) 1581 { 1582 long long start, loops; 1583 1584 vmx_set_test_stage(0); 1585 1586 apic_write(APIC_LVTT, TIMER_VECTOR); 1587 sti(); 1588 1589 apic_write(APIC_TMICT, 1); 1590 for (loops = 0; loops < 10000000 && !timer_fired; loops++) 1591 asm volatile ("nop"); 1592 report(timer_fired, "direct interrupt while running guest"); 1593 1594 apic_write(APIC_TMICT, 0); 1595 cli(); 1596 vmcall(); 1597 timer_fired = false; 1598 apic_write(APIC_TMICT, 1); 1599 for (loops = 0; loops < 10000000 && !timer_fired; loops++) 1600 asm volatile ("nop"); 1601 report(timer_fired, "intercepted interrupt while running guest"); 1602 1603 sti(); 1604 apic_write(APIC_TMICT, 0); 1605 cli(); 1606 vmcall(); 1607 timer_fired = false; 1608 start = rdtsc(); 1609 apic_write(APIC_TMICT, 1000000); 1610 1611 safe_halt(); 1612 1613 report(rdtsc() - start > 1000000 && timer_fired, 1614 "direct interrupt + hlt"); 1615 1616 apic_write(APIC_TMICT, 0); 1617 cli(); 1618 vmcall(); 1619 timer_fired = false; 1620 start = rdtsc(); 1621 apic_write(APIC_TMICT, 1000000); 1622 1623 safe_halt(); 1624 1625 report(rdtsc() - start > 10000 && timer_fired, 1626 "intercepted interrupt + hlt"); 1627 1628 apic_write(APIC_TMICT, 0); 1629 cli(); 1630 vmcall(); 1631 timer_fired = false; 1632 start = rdtsc(); 1633 apic_write(APIC_TMICT, 1000000); 1634 1635 sti_nop(); 1636 vmcall(); 1637 1638 report(rdtsc() - start > 10000 && timer_fired, 1639 "direct interrupt + activity state hlt"); 1640 1641 apic_write(APIC_TMICT, 0); 1642 cli(); 1643 vmcall(); 1644 timer_fired = false; 1645 start = rdtsc(); 1646 apic_write(APIC_TMICT, 1000000); 1647 1648 sti_nop(); 1649 vmcall(); 1650 1651 report(rdtsc() - start > 10000 && timer_fired, 1652 "intercepted interrupt + activity state hlt"); 1653 1654 apic_write(APIC_TMICT, 0); 1655 cli(); 1656 vmx_set_test_stage(7); 1657 vmcall(); 1658 timer_fired = false; 1659 apic_write(APIC_TMICT, 1); 1660 for (loops = 0; loops < 10000000 && !timer_fired; loops++) 1661 asm volatile ("nop"); 1662 report(timer_fired, 1663 "running a guest with interrupt acknowledgement set"); 1664 1665 apic_write(APIC_TMICT, 0); 1666 sti(); 1667 timer_fired = false; 1668 vmcall(); 1669 report(timer_fired, "Inject an event to a halted guest"); 1670 } 1671 1672 static int interrupt_exit_handler(union exit_reason exit_reason) 1673 { 1674 u64 guest_rip = vmcs_read(GUEST_RIP); 1675 u32 insn_len = vmcs_read(EXI_INST_LEN); 1676 1677 switch (exit_reason.basic) { 1678 case VMX_VMCALL: 1679 switch (vmx_get_test_stage()) { 1680 case 0: 1681 case 2: 1682 case 5: 1683 vmcs_write(PIN_CONTROLS, 1684 vmcs_read(PIN_CONTROLS) | PIN_EXTINT); 1685 break; 1686 case 7: 1687 vmcs_write(EXI_CONTROLS, vmcs_read(EXI_CONTROLS) | EXI_INTA); 1688 vmcs_write(PIN_CONTROLS, 1689 vmcs_read(PIN_CONTROLS) | PIN_EXTINT); 1690 break; 1691 case 1: 1692 case 3: 1693 vmcs_write(PIN_CONTROLS, 1694 vmcs_read(PIN_CONTROLS) & ~PIN_EXTINT); 1695 break; 1696 case 4: 1697 case 6: 1698 vmcs_write(GUEST_ACTV_STATE, ACTV_HLT); 1699 break; 1700 1701 case 8: 1702 vmcs_write(GUEST_ACTV_STATE, ACTV_HLT); 1703 vmcs_write(ENT_INTR_INFO, 1704 TIMER_VECTOR | 1705 (VMX_INTR_TYPE_EXT_INTR << INTR_INFO_INTR_TYPE_SHIFT) | 1706 INTR_INFO_VALID_MASK); 1707 break; 1708 } 1709 vmx_inc_test_stage(); 1710 vmcs_write(GUEST_RIP, guest_rip + insn_len); 1711 return VMX_TEST_RESUME; 1712 case VMX_EXTINT: 1713 if (vmcs_read(EXI_CONTROLS) & EXI_INTA) { 1714 int vector = vmcs_read(EXI_INTR_INFO) & 0xff; 1715 handle_external_interrupt(vector); 1716 } else { 1717 sti_nop_cli(); 1718 } 1719 if (vmx_get_test_stage() >= 2) 1720 vmcs_write(GUEST_ACTV_STATE, ACTV_ACTIVE); 1721 return VMX_TEST_RESUME; 1722 default: 1723 report_fail("Unknown exit reason, 0x%x", exit_reason.full); 1724 print_vmexit_info(exit_reason); 1725 } 1726 1727 return VMX_TEST_VMEXIT; 1728 } 1729 1730 1731 static volatile int nmi_fired; 1732 1733 #define NMI_DELAY 100000000ULL 1734 1735 static void nmi_isr(isr_regs_t *regs) 1736 { 1737 nmi_fired = true; 1738 } 1739 1740 static int nmi_hlt_init(struct vmcs *vmcs) 1741 { 1742 msr_bmp_init(); 1743 handle_irq(NMI_VECTOR, nmi_isr); 1744 vmcs_write(PIN_CONTROLS, 1745 vmcs_read(PIN_CONTROLS) & ~PIN_NMI); 1746 vmcs_write(PIN_CONTROLS, 1747 vmcs_read(PIN_CONTROLS) & ~PIN_VIRT_NMI); 1748 return VMX_TEST_START; 1749 } 1750 1751 static void nmi_message_thread(void *data) 1752 { 1753 while (vmx_get_test_stage() != 1) 1754 pause(); 1755 1756 delay(NMI_DELAY); 1757 apic_icr_write(APIC_DEST_PHYSICAL | APIC_DM_NMI | APIC_INT_ASSERT, id_map[0]); 1758 1759 while (vmx_get_test_stage() != 2) 1760 pause(); 1761 1762 delay(NMI_DELAY); 1763 apic_icr_write(APIC_DEST_PHYSICAL | APIC_DM_NMI | APIC_INT_ASSERT, id_map[0]); 1764 } 1765 1766 static void nmi_hlt_main(void) 1767 { 1768 long long start; 1769 1770 if (cpu_count() < 2) { 1771 report_skip("%s : CPU count < 2", __func__); 1772 vmx_set_test_stage(-1); 1773 return; 1774 } 1775 1776 vmx_set_test_stage(0); 1777 on_cpu_async(1, nmi_message_thread, NULL); 1778 start = rdtsc(); 1779 vmx_set_test_stage(1); 1780 asm volatile ("hlt"); 1781 report((rdtsc() - start > NMI_DELAY) && nmi_fired, 1782 "direct NMI + hlt"); 1783 if (!nmi_fired) 1784 vmx_set_test_stage(-1); 1785 nmi_fired = false; 1786 1787 vmcall(); 1788 1789 start = rdtsc(); 1790 vmx_set_test_stage(2); 1791 asm volatile ("hlt"); 1792 report((rdtsc() - start > NMI_DELAY) && !nmi_fired, 1793 "intercepted NMI + hlt"); 1794 if (nmi_fired) { 1795 report(!nmi_fired, "intercepted NMI was dispatched"); 1796 vmx_set_test_stage(-1); 1797 return; 1798 } 1799 vmx_set_test_stage(3); 1800 } 1801 1802 static int nmi_hlt_exit_handler(union exit_reason exit_reason) 1803 { 1804 u64 guest_rip = vmcs_read(GUEST_RIP); 1805 u32 insn_len = vmcs_read(EXI_INST_LEN); 1806 1807 switch (vmx_get_test_stage()) { 1808 case 1: 1809 if (exit_reason.basic != VMX_VMCALL) { 1810 report_fail("VMEXIT not due to vmcall. Exit reason 0x%x", 1811 exit_reason.full); 1812 print_vmexit_info(exit_reason); 1813 return VMX_TEST_VMEXIT; 1814 } 1815 1816 vmcs_write(PIN_CONTROLS, 1817 vmcs_read(PIN_CONTROLS) | PIN_NMI); 1818 vmcs_write(PIN_CONTROLS, 1819 vmcs_read(PIN_CONTROLS) | PIN_VIRT_NMI); 1820 vmcs_write(GUEST_RIP, guest_rip + insn_len); 1821 break; 1822 1823 case 2: 1824 if (exit_reason.basic != VMX_EXC_NMI) { 1825 report_fail("VMEXIT not due to NMI intercept. Exit reason 0x%x", 1826 exit_reason.full); 1827 print_vmexit_info(exit_reason); 1828 return VMX_TEST_VMEXIT; 1829 } 1830 report_pass("NMI intercept while running guest"); 1831 vmcs_write(GUEST_ACTV_STATE, ACTV_ACTIVE); 1832 break; 1833 1834 case 3: 1835 break; 1836 1837 default: 1838 return VMX_TEST_VMEXIT; 1839 } 1840 1841 if (vmx_get_test_stage() == 3) 1842 return VMX_TEST_VMEXIT; 1843 1844 return VMX_TEST_RESUME; 1845 } 1846 1847 1848 static int dbgctls_init(struct vmcs *vmcs) 1849 { 1850 u64 dr7 = 0x402; 1851 u64 zero = 0; 1852 1853 msr_bmp_init(); 1854 asm volatile( 1855 "mov %0,%%dr0\n\t" 1856 "mov %0,%%dr1\n\t" 1857 "mov %0,%%dr2\n\t" 1858 "mov %1,%%dr7\n\t" 1859 : : "r" (zero), "r" (dr7)); 1860 wrmsr(MSR_IA32_DEBUGCTLMSR, 0x1); 1861 vmcs_write(GUEST_DR7, 0x404); 1862 vmcs_write(GUEST_DEBUGCTL, 0x2); 1863 1864 vmcs_write(ENT_CONTROLS, vmcs_read(ENT_CONTROLS) | ENT_LOAD_DBGCTLS); 1865 vmcs_write(EXI_CONTROLS, vmcs_read(EXI_CONTROLS) | EXI_SAVE_DBGCTLS); 1866 1867 return VMX_TEST_START; 1868 } 1869 1870 static void dbgctls_main(void) 1871 { 1872 u64 dr7, debugctl; 1873 1874 asm volatile("mov %%dr7,%0" : "=r" (dr7)); 1875 debugctl = rdmsr(MSR_IA32_DEBUGCTLMSR); 1876 /* Commented out: KVM does not support DEBUGCTL so far */ 1877 (void)debugctl; 1878 report(dr7 == 0x404, "Load debug controls" /* && debugctl == 0x2 */); 1879 1880 dr7 = 0x408; 1881 asm volatile("mov %0,%%dr7" : : "r" (dr7)); 1882 wrmsr(MSR_IA32_DEBUGCTLMSR, 0x3); 1883 1884 vmx_set_test_stage(0); 1885 vmcall(); 1886 report(vmx_get_test_stage() == 1, "Save debug controls"); 1887 1888 if (ctrl_enter_rev.set & ENT_LOAD_DBGCTLS || 1889 ctrl_exit_rev.set & EXI_SAVE_DBGCTLS) { 1890 printf("\tDebug controls are always loaded/saved\n"); 1891 return; 1892 } 1893 vmx_set_test_stage(2); 1894 vmcall(); 1895 1896 asm volatile("mov %%dr7,%0" : "=r" (dr7)); 1897 debugctl = rdmsr(MSR_IA32_DEBUGCTLMSR); 1898 /* Commented out: KVM does not support DEBUGCTL so far */ 1899 (void)debugctl; 1900 report(dr7 == 0x402, 1901 "Guest=host debug controls" /* && debugctl == 0x1 */); 1902 1903 dr7 = 0x408; 1904 asm volatile("mov %0,%%dr7" : : "r" (dr7)); 1905 wrmsr(MSR_IA32_DEBUGCTLMSR, 0x3); 1906 1907 vmx_set_test_stage(3); 1908 vmcall(); 1909 report(vmx_get_test_stage() == 4, "Don't save debug controls"); 1910 } 1911 1912 static int dbgctls_exit_handler(union exit_reason exit_reason) 1913 { 1914 u32 insn_len = vmcs_read(EXI_INST_LEN); 1915 u64 guest_rip = vmcs_read(GUEST_RIP); 1916 u64 dr7, debugctl; 1917 1918 asm volatile("mov %%dr7,%0" : "=r" (dr7)); 1919 debugctl = rdmsr(MSR_IA32_DEBUGCTLMSR); 1920 1921 switch (exit_reason.basic) { 1922 case VMX_VMCALL: 1923 switch (vmx_get_test_stage()) { 1924 case 0: 1925 if (dr7 == 0x400 && debugctl == 0 && 1926 vmcs_read(GUEST_DR7) == 0x408 /* && 1927 Commented out: KVM does not support DEBUGCTL so far 1928 vmcs_read(GUEST_DEBUGCTL) == 0x3 */) 1929 vmx_inc_test_stage(); 1930 break; 1931 case 2: 1932 dr7 = 0x402; 1933 asm volatile("mov %0,%%dr7" : : "r" (dr7)); 1934 wrmsr(MSR_IA32_DEBUGCTLMSR, 0x1); 1935 vmcs_write(GUEST_DR7, 0x404); 1936 vmcs_write(GUEST_DEBUGCTL, 0x2); 1937 1938 vmcs_write(ENT_CONTROLS, 1939 vmcs_read(ENT_CONTROLS) & ~ENT_LOAD_DBGCTLS); 1940 vmcs_write(EXI_CONTROLS, 1941 vmcs_read(EXI_CONTROLS) & ~EXI_SAVE_DBGCTLS); 1942 break; 1943 case 3: 1944 if (dr7 == 0x400 && debugctl == 0 && 1945 vmcs_read(GUEST_DR7) == 0x404 /* && 1946 Commented out: KVM does not support DEBUGCTL so far 1947 vmcs_read(GUEST_DEBUGCTL) == 0x2 */) 1948 vmx_inc_test_stage(); 1949 break; 1950 } 1951 vmcs_write(GUEST_RIP, guest_rip + insn_len); 1952 return VMX_TEST_RESUME; 1953 default: 1954 report_fail("Unknown exit reason, %d", exit_reason.full); 1955 print_vmexit_info(exit_reason); 1956 } 1957 return VMX_TEST_VMEXIT; 1958 } 1959 1960 struct vmx_msr_entry { 1961 u32 index; 1962 u32 reserved; 1963 u64 value; 1964 } __attribute__((packed)); 1965 1966 #define MSR_MAGIC 0x31415926 1967 struct vmx_msr_entry *exit_msr_store, *entry_msr_load, *exit_msr_load; 1968 1969 static int msr_switch_init(struct vmcs *vmcs) 1970 { 1971 msr_bmp_init(); 1972 exit_msr_store = alloc_page(); 1973 exit_msr_load = alloc_page(); 1974 entry_msr_load = alloc_page(); 1975 entry_msr_load[0].index = MSR_KERNEL_GS_BASE; 1976 entry_msr_load[0].value = MSR_MAGIC; 1977 1978 vmx_set_test_stage(1); 1979 vmcs_write(ENT_MSR_LD_CNT, 1); 1980 vmcs_write(ENTER_MSR_LD_ADDR, (u64)entry_msr_load); 1981 vmcs_write(EXI_MSR_ST_CNT, 1); 1982 vmcs_write(EXIT_MSR_ST_ADDR, (u64)exit_msr_store); 1983 vmcs_write(EXI_MSR_LD_CNT, 1); 1984 vmcs_write(EXIT_MSR_LD_ADDR, (u64)exit_msr_load); 1985 return VMX_TEST_START; 1986 } 1987 1988 static void msr_switch_main(void) 1989 { 1990 if (vmx_get_test_stage() == 1) { 1991 report(rdmsr(MSR_KERNEL_GS_BASE) == MSR_MAGIC, 1992 "VM entry MSR load"); 1993 vmx_set_test_stage(2); 1994 wrmsr(MSR_KERNEL_GS_BASE, MSR_MAGIC + 1); 1995 exit_msr_store[0].index = MSR_KERNEL_GS_BASE; 1996 exit_msr_load[0].index = MSR_KERNEL_GS_BASE; 1997 exit_msr_load[0].value = MSR_MAGIC + 2; 1998 } 1999 vmcall(); 2000 } 2001 2002 static int msr_switch_exit_handler(union exit_reason exit_reason) 2003 { 2004 if (exit_reason.basic == VMX_VMCALL && vmx_get_test_stage() == 2) { 2005 report(exit_msr_store[0].value == MSR_MAGIC + 1, 2006 "VM exit MSR store"); 2007 report(rdmsr(MSR_KERNEL_GS_BASE) == MSR_MAGIC + 2, 2008 "VM exit MSR load"); 2009 vmx_set_test_stage(3); 2010 entry_msr_load[0].index = MSR_FS_BASE; 2011 return VMX_TEST_RESUME; 2012 } 2013 printf("ERROR %s: unexpected stage=%u or reason=0x%x\n", 2014 __func__, vmx_get_test_stage(), exit_reason.full); 2015 return VMX_TEST_EXIT; 2016 } 2017 2018 static int msr_switch_entry_failure(struct vmentry_result *result) 2019 { 2020 if (result->vm_fail) { 2021 printf("ERROR %s: VM-Fail on %s\n", __func__, result->instr); 2022 return VMX_TEST_EXIT; 2023 } 2024 2025 if (result->exit_reason.failed_vmentry && 2026 result->exit_reason.basic == VMX_FAIL_MSR && 2027 vmx_get_test_stage() == 3) { 2028 report(vmcs_read(EXI_QUALIFICATION) == 1, 2029 "VM entry MSR load: try to load FS_BASE"); 2030 return VMX_TEST_VMEXIT; 2031 } 2032 printf("ERROR %s: unexpected stage=%u or reason=%x\n", 2033 __func__, vmx_get_test_stage(), result->exit_reason.full); 2034 return VMX_TEST_EXIT; 2035 } 2036 2037 static int vmmcall_init(struct vmcs *vmcs) 2038 { 2039 vmcs_write(EXC_BITMAP, 1 << UD_VECTOR); 2040 return VMX_TEST_START; 2041 } 2042 2043 static void vmmcall_main(void) 2044 { 2045 asm volatile( 2046 "mov $0xABCD, %%rax\n\t" 2047 "vmmcall\n\t" 2048 ::: "rax"); 2049 2050 report_fail("VMMCALL"); 2051 } 2052 2053 static int vmmcall_exit_handler(union exit_reason exit_reason) 2054 { 2055 switch (exit_reason.basic) { 2056 case VMX_VMCALL: 2057 printf("here\n"); 2058 report_fail("VMMCALL triggers #UD"); 2059 break; 2060 case VMX_EXC_NMI: 2061 report((vmcs_read(EXI_INTR_INFO) & 0xff) == UD_VECTOR, 2062 "VMMCALL triggers #UD"); 2063 break; 2064 default: 2065 report_fail("Unknown exit reason, 0x%x", exit_reason.full); 2066 print_vmexit_info(exit_reason); 2067 } 2068 2069 return VMX_TEST_VMEXIT; 2070 } 2071 2072 static int disable_rdtscp_init(struct vmcs *vmcs) 2073 { 2074 u32 ctrl_cpu1; 2075 2076 if (ctrl_cpu_rev[0].clr & CPU_SECONDARY) { 2077 ctrl_cpu1 = vmcs_read(CPU_EXEC_CTRL1); 2078 ctrl_cpu1 &= ~CPU_RDTSCP; 2079 vmcs_write(CPU_EXEC_CTRL1, ctrl_cpu1); 2080 } 2081 2082 return VMX_TEST_START; 2083 } 2084 2085 static void disable_rdtscp_ud_handler(struct ex_regs *regs) 2086 { 2087 switch (vmx_get_test_stage()) { 2088 case 0: 2089 report_pass("RDTSCP triggers #UD"); 2090 vmx_inc_test_stage(); 2091 regs->rip += 3; 2092 break; 2093 case 2: 2094 report_pass("RDPID triggers #UD"); 2095 vmx_inc_test_stage(); 2096 regs->rip += 4; 2097 break; 2098 } 2099 return; 2100 2101 } 2102 2103 static void disable_rdtscp_main(void) 2104 { 2105 /* Test that #UD is properly injected in L2. */ 2106 handle_exception(UD_VECTOR, disable_rdtscp_ud_handler); 2107 2108 vmx_set_test_stage(0); 2109 asm volatile("rdtscp" : : : "eax", "ecx", "edx"); 2110 vmcall(); 2111 asm volatile(".byte 0xf3, 0x0f, 0xc7, 0xf8" : : : "eax"); 2112 2113 handle_exception(UD_VECTOR, 0); 2114 vmcall(); 2115 } 2116 2117 static int disable_rdtscp_exit_handler(union exit_reason exit_reason) 2118 { 2119 switch (exit_reason.basic) { 2120 case VMX_VMCALL: 2121 switch (vmx_get_test_stage()) { 2122 case 0: 2123 report_fail("RDTSCP triggers #UD"); 2124 vmx_inc_test_stage(); 2125 /* fallthrough */ 2126 case 1: 2127 vmx_inc_test_stage(); 2128 vmcs_write(GUEST_RIP, vmcs_read(GUEST_RIP) + 3); 2129 return VMX_TEST_RESUME; 2130 case 2: 2131 report_fail("RDPID triggers #UD"); 2132 break; 2133 } 2134 break; 2135 2136 default: 2137 report_fail("Unknown exit reason, 0x%x", exit_reason.full); 2138 print_vmexit_info(exit_reason); 2139 } 2140 return VMX_TEST_VMEXIT; 2141 } 2142 2143 static void exit_monitor_from_l2_main(void) 2144 { 2145 printf("Calling exit(0) from l2...\n"); 2146 exit(0); 2147 } 2148 2149 static int exit_monitor_from_l2_handler(union exit_reason exit_reason) 2150 { 2151 report_fail("The guest should have killed the VMM"); 2152 return VMX_TEST_EXIT; 2153 } 2154 2155 static void assert_exit_reason(u64 expected) 2156 { 2157 u64 actual = vmcs_read(EXI_REASON); 2158 2159 TEST_ASSERT_EQ_MSG(expected, actual, "Expected %s, got %s.", 2160 exit_reason_description(expected), 2161 exit_reason_description(actual)); 2162 } 2163 2164 static void skip_exit_insn(void) 2165 { 2166 u64 guest_rip = vmcs_read(GUEST_RIP); 2167 u32 insn_len = vmcs_read(EXI_INST_LEN); 2168 vmcs_write(GUEST_RIP, guest_rip + insn_len); 2169 } 2170 2171 static void skip_exit_vmcall(void) 2172 { 2173 assert_exit_reason(VMX_VMCALL); 2174 skip_exit_insn(); 2175 } 2176 2177 static void v2_null_test_guest(void) 2178 { 2179 } 2180 2181 static void v2_null_test(void) 2182 { 2183 test_set_guest(v2_null_test_guest); 2184 enter_guest(); 2185 report_pass(__func__); 2186 } 2187 2188 static void v2_multiple_entries_test_guest(void) 2189 { 2190 vmx_set_test_stage(1); 2191 vmcall(); 2192 vmx_set_test_stage(2); 2193 } 2194 2195 static void v2_multiple_entries_test(void) 2196 { 2197 test_set_guest(v2_multiple_entries_test_guest); 2198 enter_guest(); 2199 TEST_ASSERT_EQ(vmx_get_test_stage(), 1); 2200 skip_exit_vmcall(); 2201 enter_guest(); 2202 TEST_ASSERT_EQ(vmx_get_test_stage(), 2); 2203 report_pass(__func__); 2204 } 2205 2206 static int fixture_test_data = 1; 2207 2208 static void fixture_test_teardown(void *data) 2209 { 2210 *((int *) data) = 1; 2211 } 2212 2213 static void fixture_test_guest(void) 2214 { 2215 fixture_test_data++; 2216 } 2217 2218 2219 static void fixture_test_setup(void) 2220 { 2221 TEST_ASSERT_EQ_MSG(1, fixture_test_data, 2222 "fixture_test_teardown didn't run?!"); 2223 fixture_test_data = 2; 2224 test_add_teardown(fixture_test_teardown, &fixture_test_data); 2225 test_set_guest(fixture_test_guest); 2226 } 2227 2228 static void fixture_test_case1(void) 2229 { 2230 fixture_test_setup(); 2231 TEST_ASSERT_EQ(2, fixture_test_data); 2232 enter_guest(); 2233 TEST_ASSERT_EQ(3, fixture_test_data); 2234 report_pass(__func__); 2235 } 2236 2237 static void fixture_test_case2(void) 2238 { 2239 fixture_test_setup(); 2240 TEST_ASSERT_EQ(2, fixture_test_data); 2241 enter_guest(); 2242 TEST_ASSERT_EQ(3, fixture_test_data); 2243 report_pass(__func__); 2244 } 2245 2246 enum ept_access_op { 2247 OP_READ, 2248 OP_WRITE, 2249 OP_EXEC, 2250 OP_FLUSH_TLB, 2251 OP_EXIT, 2252 }; 2253 2254 static struct ept_access_test_data { 2255 unsigned long gpa; 2256 unsigned long *gva; 2257 unsigned long hpa; 2258 unsigned long *hva; 2259 enum ept_access_op op; 2260 } ept_access_test_data; 2261 2262 extern unsigned char ret42_start; 2263 extern unsigned char ret42_end; 2264 2265 /* Returns 42. */ 2266 asm( 2267 ".align 64\n" 2268 "ret42_start:\n" 2269 "mov $42, %eax\n" 2270 "ret\n" 2271 "ret42_end:\n" 2272 ); 2273 2274 static void 2275 diagnose_ept_violation_qual(u64 expected, u64 actual) 2276 { 2277 2278 #define DIAGNOSE(flag) \ 2279 do { \ 2280 if ((expected & flag) != (actual & flag)) \ 2281 printf(#flag " %sexpected\n", \ 2282 (expected & flag) ? "" : "un"); \ 2283 } while (0) 2284 2285 DIAGNOSE(EPT_VLT_RD); 2286 DIAGNOSE(EPT_VLT_WR); 2287 DIAGNOSE(EPT_VLT_FETCH); 2288 DIAGNOSE(EPT_VLT_PERM_RD); 2289 DIAGNOSE(EPT_VLT_PERM_WR); 2290 DIAGNOSE(EPT_VLT_PERM_EX); 2291 DIAGNOSE(EPT_VLT_LADDR_VLD); 2292 DIAGNOSE(EPT_VLT_PADDR); 2293 2294 #undef DIAGNOSE 2295 } 2296 2297 static void do_ept_access_op(enum ept_access_op op) 2298 { 2299 ept_access_test_data.op = op; 2300 enter_guest(); 2301 } 2302 2303 /* 2304 * Force the guest to flush its TLB (i.e., flush gva -> gpa mappings). Only 2305 * needed by tests that modify guest PTEs. 2306 */ 2307 static void ept_access_test_guest_flush_tlb(void) 2308 { 2309 do_ept_access_op(OP_FLUSH_TLB); 2310 skip_exit_vmcall(); 2311 } 2312 2313 /* 2314 * Modifies the EPT entry at @level in the mapping of @gpa. First clears the 2315 * bits in @clear then sets the bits in @set. @mkhuge transforms the entry into 2316 * a huge page. 2317 */ 2318 static unsigned long ept_twiddle(unsigned long gpa, bool mkhuge, int level, 2319 unsigned long clear, unsigned long set) 2320 { 2321 struct ept_access_test_data *data = &ept_access_test_data; 2322 unsigned long orig_pte; 2323 unsigned long pte; 2324 2325 /* Screw with the mapping at the requested level. */ 2326 TEST_ASSERT(get_ept_pte(pml4, gpa, level, &orig_pte)); 2327 pte = orig_pte; 2328 if (mkhuge) 2329 pte = (orig_pte & ~EPT_ADDR_MASK) | data->hpa | EPT_LARGE_PAGE; 2330 else 2331 pte = orig_pte; 2332 pte = (pte & ~clear) | set; 2333 set_ept_pte(pml4, gpa, level, pte); 2334 invept(INVEPT_SINGLE, eptp); 2335 2336 return orig_pte; 2337 } 2338 2339 static void ept_untwiddle(unsigned long gpa, int level, unsigned long orig_pte) 2340 { 2341 set_ept_pte(pml4, gpa, level, orig_pte); 2342 invept(INVEPT_SINGLE, eptp); 2343 } 2344 2345 static void do_ept_violation(bool leaf, enum ept_access_op op, 2346 u64 expected_qual, u64 expected_paddr) 2347 { 2348 u64 qual; 2349 2350 /* Try the access and observe the violation. */ 2351 do_ept_access_op(op); 2352 2353 assert_exit_reason(VMX_EPT_VIOLATION); 2354 2355 qual = vmcs_read(EXI_QUALIFICATION); 2356 2357 /* Mask undefined bits (which may later be defined in certain cases). */ 2358 qual &= ~(EPT_VLT_GUEST_USER | EPT_VLT_GUEST_RW | EPT_VLT_GUEST_EX | 2359 EPT_VLT_PERM_USER_EX); 2360 2361 diagnose_ept_violation_qual(expected_qual, qual); 2362 TEST_EXPECT_EQ(expected_qual, qual); 2363 2364 #if 0 2365 /* Disable for now otherwise every test will fail */ 2366 TEST_EXPECT_EQ(vmcs_read(GUEST_LINEAR_ADDRESS), 2367 (unsigned long) ( 2368 op == OP_EXEC ? data->gva + 1 : data->gva)); 2369 #endif 2370 /* 2371 * TODO: tests that probe expected_paddr in pages other than the one at 2372 * the beginning of the 1g region. 2373 */ 2374 TEST_EXPECT_EQ(vmcs_read(INFO_PHYS_ADDR), expected_paddr); 2375 } 2376 2377 static void 2378 ept_violation_at_level_mkhuge(bool mkhuge, int level, unsigned long clear, 2379 unsigned long set, enum ept_access_op op, 2380 u64 expected_qual) 2381 { 2382 struct ept_access_test_data *data = &ept_access_test_data; 2383 unsigned long orig_pte; 2384 2385 orig_pte = ept_twiddle(data->gpa, mkhuge, level, clear, set); 2386 2387 do_ept_violation(level == 1 || mkhuge, op, expected_qual, 2388 op == OP_EXEC ? data->gpa + sizeof(unsigned long) : 2389 data->gpa); 2390 2391 /* Fix the violation and resume the op loop. */ 2392 ept_untwiddle(data->gpa, level, orig_pte); 2393 enter_guest(); 2394 skip_exit_vmcall(); 2395 } 2396 2397 static void 2398 ept_violation_at_level(int level, unsigned long clear, unsigned long set, 2399 enum ept_access_op op, u64 expected_qual) 2400 { 2401 ept_violation_at_level_mkhuge(false, level, clear, set, op, 2402 expected_qual); 2403 if (ept_huge_pages_supported(level)) 2404 ept_violation_at_level_mkhuge(true, level, clear, set, op, 2405 expected_qual); 2406 } 2407 2408 static void ept_violation(unsigned long clear, unsigned long set, 2409 enum ept_access_op op, u64 expected_qual) 2410 { 2411 ept_violation_at_level(1, clear, set, op, expected_qual); 2412 ept_violation_at_level(2, clear, set, op, expected_qual); 2413 ept_violation_at_level(3, clear, set, op, expected_qual); 2414 ept_violation_at_level(4, clear, set, op, expected_qual); 2415 } 2416 2417 static void ept_access_violation(unsigned long access, enum ept_access_op op, 2418 u64 expected_qual) 2419 { 2420 ept_violation(EPT_PRESENT, access, op, 2421 expected_qual | EPT_VLT_LADDR_VLD | EPT_VLT_PADDR); 2422 } 2423 2424 /* 2425 * For translations that don't involve a GVA, that is physical address (paddr) 2426 * accesses, EPT violations don't set the flag EPT_VLT_PADDR. For a typical 2427 * guest memory access, the hardware does GVA -> GPA -> HPA. However, certain 2428 * translations don't involve GVAs, such as when the hardware does the guest 2429 * page table walk. For example, in translating GVA_1 -> GPA_1, the guest MMU 2430 * might try to set an A bit on a guest PTE. If the GPA_2 that the PTE resides 2431 * on isn't present in the EPT, then the EPT violation will be for GPA_2 and 2432 * the EPT_VLT_PADDR bit will be clear in the exit qualification. 2433 * 2434 * Note that paddr violations can also be triggered by loading PAE page tables 2435 * with wonky addresses. We don't test that yet. 2436 * 2437 * This function modifies the EPT entry that maps the GPA that the guest page 2438 * table entry mapping ept_access_test_data.gva resides on. 2439 * 2440 * @ept_access EPT permissions to set. Other permissions are cleared. 2441 * 2442 * @pte_ad Set the A/D bits on the guest PTE accordingly. 2443 * 2444 * @op Guest operation to perform with 2445 * ept_access_test_data.gva. 2446 * 2447 * @expect_violation 2448 * Is a violation expected during the paddr access? 2449 * 2450 * @expected_qual Expected qualification for the EPT violation. 2451 * EPT_VLT_PADDR should be clear. 2452 */ 2453 static void ept_access_paddr(unsigned long ept_access, unsigned long pte_ad, 2454 enum ept_access_op op, bool expect_violation, 2455 u64 expected_qual) 2456 { 2457 struct ept_access_test_data *data = &ept_access_test_data; 2458 unsigned long *ptep; 2459 unsigned long gpa; 2460 unsigned long orig_epte; 2461 unsigned long epte; 2462 int i; 2463 2464 /* Modify the guest PTE mapping data->gva according to @pte_ad. */ 2465 ptep = get_pte_level(current_page_table(), data->gva, /*level=*/1); 2466 TEST_ASSERT(ptep); 2467 TEST_ASSERT_EQ(*ptep & PT_ADDR_MASK, data->gpa); 2468 *ptep = (*ptep & ~PT_AD_MASK) | pte_ad; 2469 ept_access_test_guest_flush_tlb(); 2470 2471 /* 2472 * Now modify the access bits on the EPT entry for the GPA that the 2473 * guest PTE resides on. Note that by modifying a single EPT entry, 2474 * we're potentially affecting 512 guest PTEs. However, we've carefully 2475 * constructed our test such that those other 511 PTEs aren't used by 2476 * the guest: data->gva is at the beginning of a 1G huge page, thus the 2477 * PTE we're modifying is at the beginning of a 4K page and the 2478 * following 511 entries are also under our control (and not touched by 2479 * the guest). 2480 */ 2481 gpa = virt_to_phys(ptep); 2482 TEST_ASSERT_EQ(gpa & ~PAGE_MASK, 0); 2483 /* 2484 * Make sure the guest page table page is mapped with a 4K EPT entry, 2485 * otherwise our level=1 twiddling below will fail. We use the 2486 * identity map (gpa = gpa) since page tables are shared with the host. 2487 */ 2488 install_ept(pml4, gpa, gpa, EPT_PRESENT); 2489 orig_epte = ept_twiddle(gpa, /*mkhuge=*/0, /*level=*/1, 2490 /*clear=*/EPT_PRESENT, /*set=*/ept_access); 2491 2492 if (expect_violation) { 2493 do_ept_violation(/*leaf=*/true, op, 2494 expected_qual | EPT_VLT_LADDR_VLD, gpa); 2495 ept_untwiddle(gpa, /*level=*/1, orig_epte); 2496 do_ept_access_op(op); 2497 } else { 2498 do_ept_access_op(op); 2499 if (ept_ad_enabled()) { 2500 for (i = EPT_PAGE_LEVEL; i > 0; i--) { 2501 TEST_ASSERT(get_ept_pte(pml4, gpa, i, &epte)); 2502 TEST_ASSERT(epte & EPT_ACCESS_FLAG); 2503 if (i == 1) 2504 TEST_ASSERT(epte & EPT_DIRTY_FLAG); 2505 else 2506 TEST_ASSERT_EQ(epte & EPT_DIRTY_FLAG, 0); 2507 } 2508 } 2509 2510 ept_untwiddle(gpa, /*level=*/1, orig_epte); 2511 } 2512 2513 TEST_ASSERT(*ptep & PT_ACCESSED_MASK); 2514 if ((pte_ad & PT_DIRTY_MASK) || op == OP_WRITE) 2515 TEST_ASSERT(*ptep & PT_DIRTY_MASK); 2516 2517 skip_exit_vmcall(); 2518 } 2519 2520 static void ept_access_allowed_paddr(unsigned long ept_access, 2521 unsigned long pte_ad, 2522 enum ept_access_op op) 2523 { 2524 ept_access_paddr(ept_access, pte_ad, op, /*expect_violation=*/false, 2525 /*expected_qual=*/-1); 2526 } 2527 2528 static void ept_access_violation_paddr(unsigned long ept_access, 2529 unsigned long pte_ad, 2530 enum ept_access_op op, 2531 u64 expected_qual) 2532 { 2533 ept_access_paddr(ept_access, pte_ad, op, /*expect_violation=*/true, 2534 expected_qual); 2535 } 2536 2537 2538 static void ept_allowed_at_level_mkhuge(bool mkhuge, int level, 2539 unsigned long clear, 2540 unsigned long set, 2541 enum ept_access_op op) 2542 { 2543 struct ept_access_test_data *data = &ept_access_test_data; 2544 unsigned long orig_pte; 2545 2546 orig_pte = ept_twiddle(data->gpa, mkhuge, level, clear, set); 2547 2548 /* No violation. Should proceed to vmcall. */ 2549 do_ept_access_op(op); 2550 skip_exit_vmcall(); 2551 2552 ept_untwiddle(data->gpa, level, orig_pte); 2553 } 2554 2555 static void ept_allowed_at_level(int level, unsigned long clear, 2556 unsigned long set, enum ept_access_op op) 2557 { 2558 ept_allowed_at_level_mkhuge(false, level, clear, set, op); 2559 if (ept_huge_pages_supported(level)) 2560 ept_allowed_at_level_mkhuge(true, level, clear, set, op); 2561 } 2562 2563 static void ept_allowed(unsigned long clear, unsigned long set, 2564 enum ept_access_op op) 2565 { 2566 ept_allowed_at_level(1, clear, set, op); 2567 ept_allowed_at_level(2, clear, set, op); 2568 ept_allowed_at_level(3, clear, set, op); 2569 ept_allowed_at_level(4, clear, set, op); 2570 } 2571 2572 static void ept_ignored_bit(int bit) 2573 { 2574 /* Set the bit. */ 2575 ept_allowed(0, 1ul << bit, OP_READ); 2576 ept_allowed(0, 1ul << bit, OP_WRITE); 2577 ept_allowed(0, 1ul << bit, OP_EXEC); 2578 2579 /* Clear the bit. */ 2580 ept_allowed(1ul << bit, 0, OP_READ); 2581 ept_allowed(1ul << bit, 0, OP_WRITE); 2582 ept_allowed(1ul << bit, 0, OP_EXEC); 2583 } 2584 2585 static void ept_access_allowed(unsigned long access, enum ept_access_op op) 2586 { 2587 ept_allowed(EPT_PRESENT, access, op); 2588 } 2589 2590 2591 static void ept_misconfig_at_level_mkhuge_op(bool mkhuge, int level, 2592 unsigned long clear, 2593 unsigned long set, 2594 enum ept_access_op op) 2595 { 2596 struct ept_access_test_data *data = &ept_access_test_data; 2597 unsigned long orig_pte; 2598 2599 orig_pte = ept_twiddle(data->gpa, mkhuge, level, clear, set); 2600 2601 do_ept_access_op(op); 2602 assert_exit_reason(VMX_EPT_MISCONFIG); 2603 2604 /* Intel 27.2.1, "For all other VM exits, this field is cleared." */ 2605 #if 0 2606 /* broken: */ 2607 TEST_EXPECT_EQ_MSG(vmcs_read(EXI_QUALIFICATION), 0); 2608 #endif 2609 #if 0 2610 /* 2611 * broken: 2612 * According to description of exit qual for EPT violation, 2613 * EPT_VLT_LADDR_VLD indicates if GUEST_LINEAR_ADDRESS is valid. 2614 * However, I can't find anything that says GUEST_LINEAR_ADDRESS ought 2615 * to be set for msiconfig. 2616 */ 2617 TEST_EXPECT_EQ(vmcs_read(GUEST_LINEAR_ADDRESS), 2618 (unsigned long) ( 2619 op == OP_EXEC ? data->gva + 1 : data->gva)); 2620 #endif 2621 2622 /* Fix the violation and resume the op loop. */ 2623 ept_untwiddle(data->gpa, level, orig_pte); 2624 enter_guest(); 2625 skip_exit_vmcall(); 2626 } 2627 2628 static void ept_misconfig_at_level_mkhuge(bool mkhuge, int level, 2629 unsigned long clear, 2630 unsigned long set) 2631 { 2632 /* The op shouldn't matter (read, write, exec), so try them all! */ 2633 ept_misconfig_at_level_mkhuge_op(mkhuge, level, clear, set, OP_READ); 2634 ept_misconfig_at_level_mkhuge_op(mkhuge, level, clear, set, OP_WRITE); 2635 ept_misconfig_at_level_mkhuge_op(mkhuge, level, clear, set, OP_EXEC); 2636 } 2637 2638 static void ept_misconfig_at_level(int level, unsigned long clear, 2639 unsigned long set) 2640 { 2641 ept_misconfig_at_level_mkhuge(false, level, clear, set); 2642 if (ept_huge_pages_supported(level)) 2643 ept_misconfig_at_level_mkhuge(true, level, clear, set); 2644 } 2645 2646 static void ept_misconfig(unsigned long clear, unsigned long set) 2647 { 2648 ept_misconfig_at_level(1, clear, set); 2649 ept_misconfig_at_level(2, clear, set); 2650 ept_misconfig_at_level(3, clear, set); 2651 ept_misconfig_at_level(4, clear, set); 2652 } 2653 2654 static void ept_access_misconfig(unsigned long access) 2655 { 2656 ept_misconfig(EPT_PRESENT, access); 2657 } 2658 2659 static void ept_reserved_bit_at_level_nohuge(int level, int bit) 2660 { 2661 /* Setting the bit causes a misconfig. */ 2662 ept_misconfig_at_level_mkhuge(false, level, 0, 1ul << bit); 2663 2664 /* Making the entry non-present turns reserved bits into ignored. */ 2665 ept_violation_at_level(level, EPT_PRESENT, 1ul << bit, OP_READ, 2666 EPT_VLT_RD | EPT_VLT_LADDR_VLD | EPT_VLT_PADDR); 2667 } 2668 2669 static void ept_reserved_bit_at_level_huge(int level, int bit) 2670 { 2671 /* Setting the bit causes a misconfig. */ 2672 ept_misconfig_at_level_mkhuge(true, level, 0, 1ul << bit); 2673 2674 /* Making the entry non-present turns reserved bits into ignored. */ 2675 ept_violation_at_level(level, EPT_PRESENT, 1ul << bit, OP_READ, 2676 EPT_VLT_RD | EPT_VLT_LADDR_VLD | EPT_VLT_PADDR); 2677 } 2678 2679 static void ept_reserved_bit_at_level(int level, int bit) 2680 { 2681 /* Setting the bit causes a misconfig. */ 2682 ept_misconfig_at_level(level, 0, 1ul << bit); 2683 2684 /* Making the entry non-present turns reserved bits into ignored. */ 2685 ept_violation_at_level(level, EPT_PRESENT, 1ul << bit, OP_READ, 2686 EPT_VLT_RD | EPT_VLT_LADDR_VLD | EPT_VLT_PADDR); 2687 } 2688 2689 static void ept_reserved_bit(int bit) 2690 { 2691 ept_reserved_bit_at_level(1, bit); 2692 ept_reserved_bit_at_level(2, bit); 2693 ept_reserved_bit_at_level(3, bit); 2694 ept_reserved_bit_at_level(4, bit); 2695 } 2696 2697 #define PAGE_2M_ORDER 9 2698 #define PAGE_1G_ORDER 18 2699 2700 static void *get_1g_page(void) 2701 { 2702 static void *alloc; 2703 2704 if (!alloc) 2705 alloc = alloc_pages(PAGE_1G_ORDER); 2706 return alloc; 2707 } 2708 2709 static void ept_access_test_teardown(void *unused) 2710 { 2711 /* Exit the guest cleanly. */ 2712 do_ept_access_op(OP_EXIT); 2713 } 2714 2715 static void ept_access_test_guest(void) 2716 { 2717 struct ept_access_test_data *data = &ept_access_test_data; 2718 int (*code)(void) = (int (*)(void)) &data->gva[1]; 2719 2720 while (true) { 2721 switch (data->op) { 2722 case OP_READ: 2723 TEST_ASSERT_EQ(*data->gva, MAGIC_VAL_1); 2724 break; 2725 case OP_WRITE: 2726 *data->gva = MAGIC_VAL_2; 2727 TEST_ASSERT_EQ(*data->gva, MAGIC_VAL_2); 2728 *data->gva = MAGIC_VAL_1; 2729 break; 2730 case OP_EXEC: 2731 TEST_ASSERT_EQ(42, code()); 2732 break; 2733 case OP_FLUSH_TLB: 2734 write_cr3(read_cr3()); 2735 break; 2736 case OP_EXIT: 2737 return; 2738 default: 2739 TEST_ASSERT_MSG(false, "Unknown op %d", data->op); 2740 } 2741 vmcall(); 2742 } 2743 } 2744 2745 static void ept_access_test_setup(void) 2746 { 2747 struct ept_access_test_data *data = &ept_access_test_data; 2748 unsigned long npages = 1ul << PAGE_1G_ORDER; 2749 unsigned long size = npages * PAGE_SIZE; 2750 unsigned long *page_table = current_page_table(); 2751 unsigned long pte; 2752 2753 if (setup_ept(false)) 2754 test_skip("EPT not supported"); 2755 2756 /* We use data->gpa = 1 << 39 so that test data has a separate pml4 entry */ 2757 if (cpuid_maxphyaddr() < 40) 2758 test_skip("Test needs MAXPHYADDR >= 40"); 2759 2760 test_set_guest(ept_access_test_guest); 2761 test_add_teardown(ept_access_test_teardown, NULL); 2762 2763 data->hva = get_1g_page(); 2764 TEST_ASSERT(data->hva); 2765 data->hpa = virt_to_phys(data->hva); 2766 2767 data->gpa = 1ul << 39; 2768 data->gva = (void *) ALIGN((unsigned long) alloc_vpages(npages * 2), 2769 size); 2770 TEST_ASSERT(!any_present_pages(page_table, data->gva, size)); 2771 install_pages(page_table, data->gpa, size, data->gva); 2772 2773 /* 2774 * Make sure nothing's mapped here so the tests that screw with the 2775 * pml4 entry don't inadvertently break something. 2776 */ 2777 TEST_ASSERT(get_ept_pte(pml4, data->gpa, 4, &pte) && pte == 0); 2778 TEST_ASSERT(get_ept_pte(pml4, data->gpa + size - 1, 4, &pte) && pte == 0); 2779 install_ept(pml4, data->hpa, data->gpa, EPT_PRESENT); 2780 2781 data->hva[0] = MAGIC_VAL_1; 2782 memcpy(&data->hva[1], &ret42_start, &ret42_end - &ret42_start); 2783 } 2784 2785 static void ept_access_test_not_present(void) 2786 { 2787 ept_access_test_setup(); 2788 /* --- */ 2789 ept_access_violation(0, OP_READ, EPT_VLT_RD); 2790 ept_access_violation(0, OP_WRITE, EPT_VLT_WR); 2791 ept_access_violation(0, OP_EXEC, EPT_VLT_FETCH); 2792 } 2793 2794 static void ept_access_test_read_only(void) 2795 { 2796 ept_access_test_setup(); 2797 2798 /* r-- */ 2799 ept_access_allowed(EPT_RA, OP_READ); 2800 ept_access_violation(EPT_RA, OP_WRITE, EPT_VLT_WR | EPT_VLT_PERM_RD); 2801 ept_access_violation(EPT_RA, OP_EXEC, EPT_VLT_FETCH | EPT_VLT_PERM_RD); 2802 } 2803 2804 static void ept_access_test_write_only(void) 2805 { 2806 ept_access_test_setup(); 2807 /* -w- */ 2808 ept_access_misconfig(EPT_WA); 2809 } 2810 2811 static void ept_access_test_read_write(void) 2812 { 2813 ept_access_test_setup(); 2814 /* rw- */ 2815 ept_access_allowed(EPT_RA | EPT_WA, OP_READ); 2816 ept_access_allowed(EPT_RA | EPT_WA, OP_WRITE); 2817 ept_access_violation(EPT_RA | EPT_WA, OP_EXEC, 2818 EPT_VLT_FETCH | EPT_VLT_PERM_RD | EPT_VLT_PERM_WR); 2819 } 2820 2821 2822 static void ept_access_test_execute_only(void) 2823 { 2824 ept_access_test_setup(); 2825 /* --x */ 2826 if (ept_execute_only_supported()) { 2827 ept_access_violation(EPT_EA, OP_READ, 2828 EPT_VLT_RD | EPT_VLT_PERM_EX); 2829 ept_access_violation(EPT_EA, OP_WRITE, 2830 EPT_VLT_WR | EPT_VLT_PERM_EX); 2831 ept_access_allowed(EPT_EA, OP_EXEC); 2832 } else { 2833 ept_access_misconfig(EPT_EA); 2834 } 2835 } 2836 2837 static void ept_access_test_read_execute(void) 2838 { 2839 ept_access_test_setup(); 2840 /* r-x */ 2841 ept_access_allowed(EPT_RA | EPT_EA, OP_READ); 2842 ept_access_violation(EPT_RA | EPT_EA, OP_WRITE, 2843 EPT_VLT_WR | EPT_VLT_PERM_RD | EPT_VLT_PERM_EX); 2844 ept_access_allowed(EPT_RA | EPT_EA, OP_EXEC); 2845 } 2846 2847 static void ept_access_test_write_execute(void) 2848 { 2849 ept_access_test_setup(); 2850 /* -wx */ 2851 ept_access_misconfig(EPT_WA | EPT_EA); 2852 } 2853 2854 static void ept_access_test_read_write_execute(void) 2855 { 2856 ept_access_test_setup(); 2857 /* rwx */ 2858 ept_access_allowed(EPT_RA | EPT_WA | EPT_EA, OP_READ); 2859 ept_access_allowed(EPT_RA | EPT_WA | EPT_EA, OP_WRITE); 2860 ept_access_allowed(EPT_RA | EPT_WA | EPT_EA, OP_EXEC); 2861 } 2862 2863 static void ept_access_test_reserved_bits(void) 2864 { 2865 int i; 2866 int maxphyaddr; 2867 2868 ept_access_test_setup(); 2869 2870 /* Reserved bits above maxphyaddr. */ 2871 maxphyaddr = cpuid_maxphyaddr(); 2872 for (i = maxphyaddr; i <= 51; i++) { 2873 report_prefix_pushf("reserved_bit=%d", i); 2874 ept_reserved_bit(i); 2875 report_prefix_pop(); 2876 } 2877 2878 /* Level-specific reserved bits. */ 2879 ept_reserved_bit_at_level_nohuge(2, 3); 2880 ept_reserved_bit_at_level_nohuge(2, 4); 2881 ept_reserved_bit_at_level_nohuge(2, 5); 2882 ept_reserved_bit_at_level_nohuge(2, 6); 2883 /* 2M alignment. */ 2884 for (i = 12; i < 20; i++) { 2885 report_prefix_pushf("reserved_bit=%d", i); 2886 ept_reserved_bit_at_level_huge(2, i); 2887 report_prefix_pop(); 2888 } 2889 ept_reserved_bit_at_level_nohuge(3, 3); 2890 ept_reserved_bit_at_level_nohuge(3, 4); 2891 ept_reserved_bit_at_level_nohuge(3, 5); 2892 ept_reserved_bit_at_level_nohuge(3, 6); 2893 /* 1G alignment. */ 2894 for (i = 12; i < 29; i++) { 2895 report_prefix_pushf("reserved_bit=%d", i); 2896 ept_reserved_bit_at_level_huge(3, i); 2897 report_prefix_pop(); 2898 } 2899 ept_reserved_bit_at_level(4, 3); 2900 ept_reserved_bit_at_level(4, 4); 2901 ept_reserved_bit_at_level(4, 5); 2902 ept_reserved_bit_at_level(4, 6); 2903 ept_reserved_bit_at_level(4, 7); 2904 } 2905 2906 static void ept_access_test_ignored_bits(void) 2907 { 2908 ept_access_test_setup(); 2909 /* 2910 * Bits ignored at every level. Bits 8 and 9 (A and D) are ignored as 2911 * far as translation is concerned even if AD bits are enabled in the 2912 * EPTP. Bit 63 is ignored because "EPT-violation #VE" VM-execution 2913 * control is 0. 2914 */ 2915 ept_ignored_bit(8); 2916 ept_ignored_bit(9); 2917 ept_ignored_bit(10); 2918 ept_ignored_bit(11); 2919 ept_ignored_bit(52); 2920 ept_ignored_bit(53); 2921 ept_ignored_bit(54); 2922 ept_ignored_bit(55); 2923 ept_ignored_bit(56); 2924 ept_ignored_bit(57); 2925 ept_ignored_bit(58); 2926 ept_ignored_bit(59); 2927 ept_ignored_bit(60); 2928 ept_ignored_bit(61); 2929 ept_ignored_bit(62); 2930 ept_ignored_bit(63); 2931 } 2932 2933 static void ept_access_test_paddr_not_present_ad_disabled(void) 2934 { 2935 ept_access_test_setup(); 2936 ept_disable_ad_bits(); 2937 2938 ept_access_violation_paddr(0, PT_AD_MASK, OP_READ, EPT_VLT_RD); 2939 ept_access_violation_paddr(0, PT_AD_MASK, OP_WRITE, EPT_VLT_RD); 2940 ept_access_violation_paddr(0, PT_AD_MASK, OP_EXEC, EPT_VLT_RD); 2941 } 2942 2943 static void ept_access_test_paddr_not_present_ad_enabled(void) 2944 { 2945 u64 qual = EPT_VLT_RD | EPT_VLT_WR; 2946 2947 ept_access_test_setup(); 2948 ept_enable_ad_bits_or_skip_test(); 2949 2950 ept_access_violation_paddr(0, PT_AD_MASK, OP_READ, qual); 2951 ept_access_violation_paddr(0, PT_AD_MASK, OP_WRITE, qual); 2952 ept_access_violation_paddr(0, PT_AD_MASK, OP_EXEC, qual); 2953 } 2954 2955 static void ept_access_test_paddr_read_only_ad_disabled(void) 2956 { 2957 /* 2958 * When EPT AD bits are disabled, all accesses to guest paging 2959 * structures are reported separately as a read and (after 2960 * translation of the GPA to host physical address) a read+write 2961 * if the A/D bits have to be set. 2962 */ 2963 u64 qual = EPT_VLT_WR | EPT_VLT_RD | EPT_VLT_PERM_RD; 2964 2965 ept_access_test_setup(); 2966 ept_disable_ad_bits(); 2967 2968 /* Can't update A bit, so all accesses fail. */ 2969 ept_access_violation_paddr(EPT_RA, 0, OP_READ, qual); 2970 ept_access_violation_paddr(EPT_RA, 0, OP_WRITE, qual); 2971 ept_access_violation_paddr(EPT_RA, 0, OP_EXEC, qual); 2972 /* AD bits disabled, so only writes try to update the D bit. */ 2973 ept_access_allowed_paddr(EPT_RA, PT_ACCESSED_MASK, OP_READ); 2974 ept_access_violation_paddr(EPT_RA, PT_ACCESSED_MASK, OP_WRITE, qual); 2975 ept_access_allowed_paddr(EPT_RA, PT_ACCESSED_MASK, OP_EXEC); 2976 /* Both A and D already set, so read-only is OK. */ 2977 ept_access_allowed_paddr(EPT_RA, PT_AD_MASK, OP_READ); 2978 ept_access_allowed_paddr(EPT_RA, PT_AD_MASK, OP_WRITE); 2979 ept_access_allowed_paddr(EPT_RA, PT_AD_MASK, OP_EXEC); 2980 } 2981 2982 static void ept_access_test_paddr_read_only_ad_enabled(void) 2983 { 2984 /* 2985 * When EPT AD bits are enabled, all accesses to guest paging 2986 * structures are considered writes as far as EPT translation 2987 * is concerned. 2988 */ 2989 u64 qual = EPT_VLT_WR | EPT_VLT_RD | EPT_VLT_PERM_RD; 2990 2991 ept_access_test_setup(); 2992 ept_enable_ad_bits_or_skip_test(); 2993 2994 ept_access_violation_paddr(EPT_RA, 0, OP_READ, qual); 2995 ept_access_violation_paddr(EPT_RA, 0, OP_WRITE, qual); 2996 ept_access_violation_paddr(EPT_RA, 0, OP_EXEC, qual); 2997 ept_access_violation_paddr(EPT_RA, PT_ACCESSED_MASK, OP_READ, qual); 2998 ept_access_violation_paddr(EPT_RA, PT_ACCESSED_MASK, OP_WRITE, qual); 2999 ept_access_violation_paddr(EPT_RA, PT_ACCESSED_MASK, OP_EXEC, qual); 3000 ept_access_violation_paddr(EPT_RA, PT_AD_MASK, OP_READ, qual); 3001 ept_access_violation_paddr(EPT_RA, PT_AD_MASK, OP_WRITE, qual); 3002 ept_access_violation_paddr(EPT_RA, PT_AD_MASK, OP_EXEC, qual); 3003 } 3004 3005 static void ept_access_test_paddr_read_write(void) 3006 { 3007 ept_access_test_setup(); 3008 /* Read-write access to paging structure. */ 3009 ept_access_allowed_paddr(EPT_RA | EPT_WA, 0, OP_READ); 3010 ept_access_allowed_paddr(EPT_RA | EPT_WA, 0, OP_WRITE); 3011 ept_access_allowed_paddr(EPT_RA | EPT_WA, 0, OP_EXEC); 3012 } 3013 3014 static void ept_access_test_paddr_read_write_execute(void) 3015 { 3016 ept_access_test_setup(); 3017 /* RWX access to paging structure. */ 3018 ept_access_allowed_paddr(EPT_PRESENT, 0, OP_READ); 3019 ept_access_allowed_paddr(EPT_PRESENT, 0, OP_WRITE); 3020 ept_access_allowed_paddr(EPT_PRESENT, 0, OP_EXEC); 3021 } 3022 3023 static void ept_access_test_paddr_read_execute_ad_disabled(void) 3024 { 3025 /* 3026 * When EPT AD bits are disabled, all accesses to guest paging 3027 * structures are reported separately as a read and (after 3028 * translation of the GPA to host physical address) a read+write 3029 * if the A/D bits have to be set. 3030 */ 3031 u64 qual = EPT_VLT_WR | EPT_VLT_RD | EPT_VLT_PERM_RD | EPT_VLT_PERM_EX; 3032 3033 ept_access_test_setup(); 3034 ept_disable_ad_bits(); 3035 3036 /* Can't update A bit, so all accesses fail. */ 3037 ept_access_violation_paddr(EPT_RA | EPT_EA, 0, OP_READ, qual); 3038 ept_access_violation_paddr(EPT_RA | EPT_EA, 0, OP_WRITE, qual); 3039 ept_access_violation_paddr(EPT_RA | EPT_EA, 0, OP_EXEC, qual); 3040 /* AD bits disabled, so only writes try to update the D bit. */ 3041 ept_access_allowed_paddr(EPT_RA | EPT_EA, PT_ACCESSED_MASK, OP_READ); 3042 ept_access_violation_paddr(EPT_RA | EPT_EA, PT_ACCESSED_MASK, OP_WRITE, qual); 3043 ept_access_allowed_paddr(EPT_RA | EPT_EA, PT_ACCESSED_MASK, OP_EXEC); 3044 /* Both A and D already set, so read-only is OK. */ 3045 ept_access_allowed_paddr(EPT_RA | EPT_EA, PT_AD_MASK, OP_READ); 3046 ept_access_allowed_paddr(EPT_RA | EPT_EA, PT_AD_MASK, OP_WRITE); 3047 ept_access_allowed_paddr(EPT_RA | EPT_EA, PT_AD_MASK, OP_EXEC); 3048 } 3049 3050 static void ept_access_test_paddr_read_execute_ad_enabled(void) 3051 { 3052 /* 3053 * When EPT AD bits are enabled, all accesses to guest paging 3054 * structures are considered writes as far as EPT translation 3055 * is concerned. 3056 */ 3057 u64 qual = EPT_VLT_WR | EPT_VLT_RD | EPT_VLT_PERM_RD | EPT_VLT_PERM_EX; 3058 3059 ept_access_test_setup(); 3060 ept_enable_ad_bits_or_skip_test(); 3061 3062 ept_access_violation_paddr(EPT_RA | EPT_EA, 0, OP_READ, qual); 3063 ept_access_violation_paddr(EPT_RA | EPT_EA, 0, OP_WRITE, qual); 3064 ept_access_violation_paddr(EPT_RA | EPT_EA, 0, OP_EXEC, qual); 3065 ept_access_violation_paddr(EPT_RA | EPT_EA, PT_ACCESSED_MASK, OP_READ, qual); 3066 ept_access_violation_paddr(EPT_RA | EPT_EA, PT_ACCESSED_MASK, OP_WRITE, qual); 3067 ept_access_violation_paddr(EPT_RA | EPT_EA, PT_ACCESSED_MASK, OP_EXEC, qual); 3068 ept_access_violation_paddr(EPT_RA | EPT_EA, PT_AD_MASK, OP_READ, qual); 3069 ept_access_violation_paddr(EPT_RA | EPT_EA, PT_AD_MASK, OP_WRITE, qual); 3070 ept_access_violation_paddr(EPT_RA | EPT_EA, PT_AD_MASK, OP_EXEC, qual); 3071 } 3072 3073 static void ept_access_test_paddr_not_present_page_fault(void) 3074 { 3075 ept_access_test_setup(); 3076 /* 3077 * TODO: test no EPT violation as long as guest PF occurs. e.g., GPA is 3078 * page is read-only in EPT but GVA is also mapped read only in PT. 3079 * Thus guest page fault before host takes EPT violation for trying to 3080 * update A bit. 3081 */ 3082 } 3083 3084 static void ept_access_test_force_2m_page(void) 3085 { 3086 ept_access_test_setup(); 3087 3088 TEST_ASSERT_EQ(ept_2m_supported(), true); 3089 ept_allowed_at_level_mkhuge(true, 2, 0, 0, OP_READ); 3090 ept_violation_at_level_mkhuge(true, 2, EPT_PRESENT, EPT_RA, OP_WRITE, 3091 EPT_VLT_WR | EPT_VLT_PERM_RD | 3092 EPT_VLT_LADDR_VLD | EPT_VLT_PADDR); 3093 ept_misconfig_at_level_mkhuge(true, 2, EPT_PRESENT, EPT_WA); 3094 } 3095 3096 static bool invvpid_valid(u64 type, u64 vpid, u64 gla) 3097 { 3098 if (!is_invvpid_type_supported(type)) 3099 return false; 3100 3101 if (vpid >> 16) 3102 return false; 3103 3104 if (type != INVVPID_ALL && !vpid) 3105 return false; 3106 3107 if (type == INVVPID_ADDR && !is_canonical(gla)) 3108 return false; 3109 3110 return true; 3111 } 3112 3113 static void try_invvpid(u64 type, u64 vpid, u64 gla) 3114 { 3115 int rc; 3116 bool valid = invvpid_valid(type, vpid, gla); 3117 u64 expected = valid ? VMXERR_UNSUPPORTED_VMCS_COMPONENT 3118 : VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID; 3119 /* 3120 * Set VMX_INST_ERROR to VMXERR_UNVALID_VMCS_COMPONENT, so 3121 * that we can tell if it is updated by INVVPID. 3122 */ 3123 vmcs_read(~0); 3124 rc = __invvpid(type, vpid, gla); 3125 report(!rc == valid, "INVVPID type %ld VPID %lx GLA %lx %s", type, 3126 vpid, gla, 3127 valid ? "passes" : "fails"); 3128 report(vmcs_read(VMX_INST_ERROR) == expected, 3129 "After %s INVVPID, VMX_INST_ERR is %ld (actual %ld)", 3130 rc ? "failed" : "successful", 3131 expected, vmcs_read(VMX_INST_ERROR)); 3132 } 3133 3134 static inline unsigned long get_first_supported_invvpid_type(void) 3135 { 3136 u64 type = ffs(ept_vpid.val >> VPID_CAP_INVVPID_TYPES_SHIFT) - 1; 3137 3138 __TEST_ASSERT(type >= INVVPID_ADDR && type <= INVVPID_CONTEXT_LOCAL); 3139 return type; 3140 } 3141 3142 static void ds_invvpid(void *data) 3143 { 3144 asm volatile("invvpid %0, %1" 3145 : 3146 : "m"(*(struct invvpid_operand *)data), 3147 "r"(get_first_supported_invvpid_type())); 3148 } 3149 3150 /* 3151 * The SS override is ignored in 64-bit mode, so we use an addressing 3152 * mode with %rsp as the base register to generate an implicit SS 3153 * reference. 3154 */ 3155 static void ss_invvpid(void *data) 3156 { 3157 asm volatile("sub %%rsp,%0; invvpid (%%rsp,%0,1), %1" 3158 : "+r"(data) 3159 : "r"(get_first_supported_invvpid_type())); 3160 } 3161 3162 static void invvpid_test_gp(void) 3163 { 3164 bool fault; 3165 3166 fault = test_for_exception(GP_VECTOR, &ds_invvpid, 3167 (void *)NONCANONICAL); 3168 report(fault, "INVVPID with non-canonical DS operand raises #GP"); 3169 } 3170 3171 static void invvpid_test_ss(void) 3172 { 3173 bool fault; 3174 3175 fault = test_for_exception(SS_VECTOR, &ss_invvpid, 3176 (void *)NONCANONICAL); 3177 report(fault, "INVVPID with non-canonical SS operand raises #SS"); 3178 } 3179 3180 static void invvpid_test_pf(void) 3181 { 3182 void *vpage = alloc_vpage(); 3183 bool fault; 3184 3185 fault = test_for_exception(PF_VECTOR, &ds_invvpid, vpage); 3186 report(fault, "INVVPID with unmapped operand raises #PF"); 3187 } 3188 3189 static void try_compat_invvpid(void *unused) 3190 { 3191 struct far_pointer32 fp = { 3192 .offset = (uintptr_t)&&invvpid, 3193 .selector = KERNEL_CS32, 3194 }; 3195 uintptr_t rsp; 3196 3197 asm volatile ("mov %%rsp, %0" : "=r"(rsp)); 3198 3199 TEST_ASSERT_MSG(fp.offset == (uintptr_t)&&invvpid, 3200 "Code address too high."); 3201 TEST_ASSERT_MSG(rsp == (u32)rsp, "Stack address too high."); 3202 3203 asm goto ("lcall *%0" : : "m" (fp) : "rax" : invvpid); 3204 return; 3205 invvpid: 3206 asm volatile (".code32;" 3207 "invvpid (%eax), %eax;" 3208 "lret;" 3209 ".code64"); 3210 __builtin_unreachable(); 3211 } 3212 3213 static void invvpid_test_compatibility_mode(void) 3214 { 3215 bool fault; 3216 3217 fault = test_for_exception(UD_VECTOR, &try_compat_invvpid, NULL); 3218 report(fault, "Compatibility mode INVVPID raises #UD"); 3219 } 3220 3221 static void invvpid_test_not_in_vmx_operation(void) 3222 { 3223 bool fault; 3224 3225 TEST_ASSERT(!vmx_off()); 3226 fault = test_for_exception(UD_VECTOR, &ds_invvpid, NULL); 3227 report(fault, "INVVPID outside of VMX operation raises #UD"); 3228 TEST_ASSERT(!vmx_on()); 3229 } 3230 3231 /* 3232 * This does not test real-address mode, virtual-8086 mode, protected mode, 3233 * or CPL > 0. 3234 */ 3235 static void invvpid_test(void) 3236 { 3237 int i; 3238 unsigned types = 0; 3239 unsigned type; 3240 3241 if (!is_vpid_supported()) 3242 test_skip("VPID not supported"); 3243 3244 if (!is_invvpid_supported()) 3245 test_skip("INVVPID not supported.\n"); 3246 3247 if (is_invvpid_type_supported(INVVPID_ADDR)) 3248 types |= 1u << INVVPID_ADDR; 3249 if (is_invvpid_type_supported(INVVPID_CONTEXT_GLOBAL)) 3250 types |= 1u << INVVPID_CONTEXT_GLOBAL; 3251 if (is_invvpid_type_supported(INVVPID_ALL)) 3252 types |= 1u << INVVPID_ALL; 3253 if (is_invvpid_type_supported(INVVPID_CONTEXT_LOCAL)) 3254 types |= 1u << INVVPID_CONTEXT_LOCAL; 3255 3256 if (!types) 3257 test_skip("No INVVPID types supported.\n"); 3258 3259 for (i = -127; i < 128; i++) 3260 try_invvpid(i, 0xffff, 0); 3261 3262 /* 3263 * VPID must not be more than 16 bits. 3264 */ 3265 for (i = 0; i < 64; i++) 3266 for (type = 0; type < 4; type++) 3267 if (types & (1u << type)) 3268 try_invvpid(type, 1ul << i, 0); 3269 3270 /* 3271 * VPID must not be zero, except for "all contexts." 3272 */ 3273 for (type = 0; type < 4; type++) 3274 if (types & (1u << type)) 3275 try_invvpid(type, 0, 0); 3276 3277 /* 3278 * The gla operand is only validated for single-address INVVPID. 3279 */ 3280 if (types & (1u << INVVPID_ADDR)) 3281 try_invvpid(INVVPID_ADDR, 0xffff, NONCANONICAL); 3282 3283 invvpid_test_gp(); 3284 invvpid_test_ss(); 3285 invvpid_test_pf(); 3286 invvpid_test_compatibility_mode(); 3287 invvpid_test_not_in_vmx_operation(); 3288 } 3289 3290 static void test_assert_vmlaunch_inst_error(u32 expected_error) 3291 { 3292 u32 vmx_inst_err = vmcs_read(VMX_INST_ERROR); 3293 3294 report(vmx_inst_err == expected_error, 3295 "VMX inst error is %d (actual %d)", expected_error, vmx_inst_err); 3296 } 3297 3298 /* 3299 * This version is wildly unsafe and should _only_ be used to test VM-Fail 3300 * scenarios involving HOST_RIP. 3301 */ 3302 static void test_vmx_vmlaunch_must_fail(u32 expected_error) 3303 { 3304 /* Read the function name. */ 3305 TEST_ASSERT(expected_error); 3306 3307 /* 3308 * Don't bother with any prep work, if VMLAUNCH passes the VM-Fail 3309 * consistency checks and generates a VM-Exit, then the test is doomed 3310 * no matter what as it will jump to a garbage RIP. 3311 */ 3312 __asm__ __volatile__ ("vmlaunch"); 3313 test_assert_vmlaunch_inst_error(expected_error); 3314 } 3315 3316 /* 3317 * Test for early VMLAUNCH failure. Returns true if VMLAUNCH makes it 3318 * at least as far as the guest-state checks. Returns false if the 3319 * VMLAUNCH fails early and execution falls through to the next 3320 * instruction. 3321 */ 3322 static bool vmlaunch(void) 3323 { 3324 u32 exit_reason; 3325 3326 /* 3327 * Indirectly set VMX_INST_ERR to 12 ("VMREAD/VMWRITE from/to 3328 * unsupported VMCS component"). The caller can then check 3329 * to see if a failed VM-entry sets VMX_INST_ERR as expected. 3330 */ 3331 vmcs_write(~0u, 0); 3332 3333 vmcs_write(HOST_RIP, (uintptr_t)&&success); 3334 __asm__ __volatile__ goto ("vmwrite %%rsp, %0; vmlaunch" 3335 : 3336 : "r" ((u64)HOST_RSP) 3337 : "cc", "memory" 3338 : success); 3339 return false; 3340 success: 3341 exit_reason = vmcs_read(EXI_REASON); 3342 TEST_ASSERT(exit_reason == (VMX_FAIL_STATE | VMX_ENTRY_FAILURE) || 3343 exit_reason == (VMX_FAIL_MSR | VMX_ENTRY_FAILURE)); 3344 return true; 3345 } 3346 3347 /* 3348 * Try to launch the current VMCS. 3349 */ 3350 static void test_vmx_vmlaunch(u32 xerror) 3351 { 3352 bool success = vmlaunch(); 3353 3354 report(success == !xerror, "vmlaunch %s", 3355 !xerror ? "succeeds" : "fails"); 3356 if (!success && xerror) 3357 test_assert_vmlaunch_inst_error(xerror); 3358 } 3359 3360 /* 3361 * Try to launch the current VMCS, and expect one of two possible 3362 * errors (or success) codes. 3363 */ 3364 static void test_vmx_vmlaunch2(u32 xerror1, u32 xerror2) 3365 { 3366 bool success = vmlaunch(); 3367 u32 vmx_inst_err; 3368 3369 if (!xerror1 == !xerror2) 3370 report(success == !xerror1, "vmlaunch %s", 3371 !xerror1 ? "succeeds" : "fails"); 3372 3373 if (!success && (xerror1 || xerror2)) { 3374 vmx_inst_err = vmcs_read(VMX_INST_ERROR); 3375 report(vmx_inst_err == xerror1 || vmx_inst_err == xerror2, 3376 "VMX inst error is %d or %d (actual %d)", xerror1, 3377 xerror2, vmx_inst_err); 3378 } 3379 } 3380 3381 static void test_vmx_invalid_controls(void) 3382 { 3383 test_vmx_vmlaunch(VMXERR_ENTRY_INVALID_CONTROL_FIELD); 3384 } 3385 3386 static void test_vmx_valid_controls(void) 3387 { 3388 test_vmx_vmlaunch(0); 3389 } 3390 3391 /* 3392 * Test a particular value of a VM-execution control bit, if the value 3393 * is required or if the value is zero. 3394 */ 3395 static void test_rsvd_ctl_bit_value(const char *name, union vmx_ctrl_msr msr, 3396 enum Encoding encoding, unsigned bit, 3397 unsigned val) 3398 { 3399 u32 mask = 1u << bit; 3400 bool expected; 3401 u32 controls; 3402 3403 if (msr.set & mask) 3404 TEST_ASSERT(msr.clr & mask); 3405 3406 /* 3407 * We can't arbitrarily turn on a control bit, because it may 3408 * introduce dependencies on other VMCS fields. So, we only 3409 * test turning on bits that have a required setting. 3410 */ 3411 if (val && (msr.clr & mask) && !(msr.set & mask)) 3412 return; 3413 3414 report_prefix_pushf("%s %s bit %d", 3415 val ? "Set" : "Clear", name, bit); 3416 3417 controls = vmcs_read(encoding); 3418 if (val) { 3419 vmcs_write(encoding, msr.set | mask); 3420 expected = (msr.clr & mask); 3421 } else { 3422 vmcs_write(encoding, msr.set & ~mask); 3423 expected = !(msr.set & mask); 3424 } 3425 if (expected) 3426 test_vmx_valid_controls(); 3427 else 3428 test_vmx_invalid_controls(); 3429 vmcs_write(encoding, controls); 3430 report_prefix_pop(); 3431 } 3432 3433 /* 3434 * Test reserved values of a VM-execution control bit, based on the 3435 * allowed bit settings from the corresponding VMX capability MSR. 3436 */ 3437 static void test_rsvd_ctl_bit(const char *name, union vmx_ctrl_msr msr, 3438 enum Encoding encoding, unsigned bit) 3439 { 3440 test_rsvd_ctl_bit_value(name, msr, encoding, bit, 0); 3441 test_rsvd_ctl_bit_value(name, msr, encoding, bit, 1); 3442 } 3443 3444 /* 3445 * Reserved bits in the pin-based VM-execution controls must be set 3446 * properly. Software may consult the VMX capability MSRs to determine 3447 * the proper settings. 3448 * [Intel SDM] 3449 */ 3450 static void test_pin_based_ctls(void) 3451 { 3452 unsigned bit; 3453 3454 printf("%s: %lx\n", basic_msr.ctrl ? "MSR_IA32_VMX_TRUE_PIN" : 3455 "MSR_IA32_VMX_PINBASED_CTLS", ctrl_pin_rev.val); 3456 for (bit = 0; bit < 32; bit++) 3457 test_rsvd_ctl_bit("pin-based controls", 3458 ctrl_pin_rev, PIN_CONTROLS, bit); 3459 } 3460 3461 /* 3462 * Reserved bits in the primary processor-based VM-execution controls 3463 * must be set properly. Software may consult the VMX capability MSRs 3464 * to determine the proper settings. 3465 * [Intel SDM] 3466 */ 3467 static void test_primary_processor_based_ctls(void) 3468 { 3469 unsigned bit; 3470 3471 printf("\n%s: %lx\n", basic_msr.ctrl ? "MSR_IA32_VMX_TRUE_PROC" : 3472 "MSR_IA32_VMX_PROCBASED_CTLS", ctrl_cpu_rev[0].val); 3473 for (bit = 0; bit < 32; bit++) 3474 test_rsvd_ctl_bit("primary processor-based controls", 3475 ctrl_cpu_rev[0], CPU_EXEC_CTRL0, bit); 3476 } 3477 3478 /* 3479 * If the "activate secondary controls" primary processor-based 3480 * VM-execution control is 1, reserved bits in the secondary 3481 * processor-based VM-execution controls must be cleared. Software may 3482 * consult the VMX capability MSRs to determine which bits are 3483 * reserved. 3484 * If the "activate secondary controls" primary processor-based 3485 * VM-execution control is 0 (or if the processor does not support the 3486 * 1-setting of that control), no checks are performed on the 3487 * secondary processor-based VM-execution controls. 3488 * [Intel SDM] 3489 */ 3490 static void test_secondary_processor_based_ctls(void) 3491 { 3492 u32 primary; 3493 u32 secondary; 3494 unsigned bit; 3495 3496 if (!(ctrl_cpu_rev[0].clr & CPU_SECONDARY)) 3497 return; 3498 3499 primary = vmcs_read(CPU_EXEC_CTRL0); 3500 secondary = vmcs_read(CPU_EXEC_CTRL1); 3501 3502 vmcs_write(CPU_EXEC_CTRL0, primary | CPU_SECONDARY); 3503 printf("\nMSR_IA32_VMX_PROCBASED_CTLS2: %lx\n", ctrl_cpu_rev[1].val); 3504 for (bit = 0; bit < 32; bit++) 3505 test_rsvd_ctl_bit("secondary processor-based controls", 3506 ctrl_cpu_rev[1], CPU_EXEC_CTRL1, bit); 3507 3508 /* 3509 * When the "activate secondary controls" VM-execution control 3510 * is clear, there are no checks on the secondary controls. 3511 */ 3512 vmcs_write(CPU_EXEC_CTRL0, primary & ~CPU_SECONDARY); 3513 vmcs_write(CPU_EXEC_CTRL1, ~0); 3514 report(vmlaunch(), 3515 "Secondary processor-based controls ignored"); 3516 vmcs_write(CPU_EXEC_CTRL1, secondary); 3517 vmcs_write(CPU_EXEC_CTRL0, primary); 3518 } 3519 3520 static void try_cr3_target_count(unsigned i, unsigned max) 3521 { 3522 report_prefix_pushf("CR3 target count 0x%x", i); 3523 vmcs_write(CR3_TARGET_COUNT, i); 3524 if (i <= max) 3525 test_vmx_valid_controls(); 3526 else 3527 test_vmx_invalid_controls(); 3528 report_prefix_pop(); 3529 } 3530 3531 /* 3532 * The CR3-target count must not be greater than 4. Future processors 3533 * may support a different number of CR3-target values. Software 3534 * should read the VMX capability MSR IA32_VMX_MISC to determine the 3535 * number of values supported. 3536 * [Intel SDM] 3537 */ 3538 static void test_cr3_targets(void) 3539 { 3540 unsigned supported_targets = (rdmsr(MSR_IA32_VMX_MISC) >> 16) & 0x1ff; 3541 u32 cr3_targets = vmcs_read(CR3_TARGET_COUNT); 3542 unsigned i; 3543 3544 printf("\nSupported CR3 targets: %d\n", supported_targets); 3545 TEST_ASSERT(supported_targets <= 256); 3546 3547 try_cr3_target_count(-1u, supported_targets); 3548 try_cr3_target_count(0x80000000, supported_targets); 3549 try_cr3_target_count(0x7fffffff, supported_targets); 3550 for (i = 0; i <= supported_targets + 1; i++) 3551 try_cr3_target_count(i, supported_targets); 3552 vmcs_write(CR3_TARGET_COUNT, cr3_targets); 3553 3554 /* VMWRITE to nonexistent target fields should fail. */ 3555 for (i = supported_targets; i < 256; i++) 3556 TEST_ASSERT(vmcs_write(CR3_TARGET_0 + i*2, 0)); 3557 } 3558 3559 /* 3560 * Test a particular address setting in the VMCS 3561 */ 3562 static void test_vmcs_addr(const char *name, 3563 enum Encoding encoding, 3564 u64 align, 3565 bool ignored, 3566 bool skip_beyond_mapped_ram, 3567 u64 addr) 3568 { 3569 report_prefix_pushf("%s = %lx", name, addr); 3570 vmcs_write(encoding, addr); 3571 if (skip_beyond_mapped_ram && 3572 addr > fwcfg_get_u64(FW_CFG_RAM_SIZE) - align && 3573 addr < (1ul << cpuid_maxphyaddr())) 3574 printf("Skipping physical address beyond mapped RAM\n"); 3575 else if (ignored || (IS_ALIGNED(addr, align) && 3576 addr < (1ul << cpuid_maxphyaddr()))) 3577 test_vmx_valid_controls(); 3578 else 3579 test_vmx_invalid_controls(); 3580 report_prefix_pop(); 3581 } 3582 3583 /* 3584 * Test interesting values for a VMCS address 3585 */ 3586 static void test_vmcs_addr_values(const char *name, 3587 enum Encoding encoding, 3588 u64 align, 3589 bool ignored, 3590 bool skip_beyond_mapped_ram, 3591 u32 bit_start, u32 bit_end) 3592 { 3593 unsigned i; 3594 u64 orig_val = vmcs_read(encoding); 3595 3596 for (i = bit_start; i <= bit_end; i++) 3597 test_vmcs_addr(name, encoding, align, ignored, 3598 skip_beyond_mapped_ram, 1ul << i); 3599 3600 test_vmcs_addr(name, encoding, align, ignored, 3601 skip_beyond_mapped_ram, PAGE_SIZE - 1); 3602 test_vmcs_addr(name, encoding, align, ignored, 3603 skip_beyond_mapped_ram, PAGE_SIZE); 3604 test_vmcs_addr(name, encoding, align, ignored, 3605 skip_beyond_mapped_ram, 3606 (1ul << cpuid_maxphyaddr()) - PAGE_SIZE); 3607 test_vmcs_addr(name, encoding, align, ignored, 3608 skip_beyond_mapped_ram, -1ul); 3609 3610 vmcs_write(encoding, orig_val); 3611 } 3612 3613 /* 3614 * Test a physical address reference in the VMCS, when the corresponding 3615 * feature is enabled and when the corresponding feature is disabled. 3616 */ 3617 static void test_vmcs_addr_reference(u32 control_bit, enum Encoding field, 3618 const char *field_name, 3619 const char *control_name, u64 align, 3620 bool skip_beyond_mapped_ram, 3621 bool control_primary) 3622 { 3623 u32 primary = vmcs_read(CPU_EXEC_CTRL0); 3624 u32 secondary = vmcs_read(CPU_EXEC_CTRL1); 3625 u64 page_addr; 3626 3627 if (control_primary) { 3628 if (!(ctrl_cpu_rev[0].clr & control_bit)) 3629 return; 3630 } else { 3631 if (!(ctrl_cpu_rev[1].clr & control_bit)) 3632 return; 3633 } 3634 3635 page_addr = vmcs_read(field); 3636 3637 report_prefix_pushf("%s enabled", control_name); 3638 if (control_primary) { 3639 vmcs_write(CPU_EXEC_CTRL0, primary | control_bit); 3640 } else { 3641 vmcs_write(CPU_EXEC_CTRL0, primary | CPU_SECONDARY); 3642 vmcs_write(CPU_EXEC_CTRL1, secondary | control_bit); 3643 } 3644 3645 test_vmcs_addr_values(field_name, field, align, false, 3646 skip_beyond_mapped_ram, 0, 63); 3647 report_prefix_pop(); 3648 3649 report_prefix_pushf("%s disabled", control_name); 3650 if (control_primary) { 3651 vmcs_write(CPU_EXEC_CTRL0, primary & ~control_bit); 3652 } else { 3653 vmcs_write(CPU_EXEC_CTRL0, primary & ~CPU_SECONDARY); 3654 vmcs_write(CPU_EXEC_CTRL1, secondary & ~control_bit); 3655 } 3656 3657 test_vmcs_addr_values(field_name, field, align, true, false, 0, 63); 3658 report_prefix_pop(); 3659 3660 vmcs_write(field, page_addr); 3661 vmcs_write(CPU_EXEC_CTRL0, primary); 3662 vmcs_write(CPU_EXEC_CTRL1, secondary); 3663 } 3664 3665 /* 3666 * If the "use I/O bitmaps" VM-execution control is 1, bits 11:0 of 3667 * each I/O-bitmap address must be 0. Neither address should set any 3668 * bits beyond the processor's physical-address width. 3669 * [Intel SDM] 3670 */ 3671 static void test_io_bitmaps(void) 3672 { 3673 test_vmcs_addr_reference(CPU_IO_BITMAP, IO_BITMAP_A, 3674 "I/O bitmap A", "Use I/O bitmaps", 3675 PAGE_SIZE, false, true); 3676 test_vmcs_addr_reference(CPU_IO_BITMAP, IO_BITMAP_B, 3677 "I/O bitmap B", "Use I/O bitmaps", 3678 PAGE_SIZE, false, true); 3679 } 3680 3681 /* 3682 * If the "use MSR bitmaps" VM-execution control is 1, bits 11:0 of 3683 * the MSR-bitmap address must be 0. The address should not set any 3684 * bits beyond the processor's physical-address width. 3685 * [Intel SDM] 3686 */ 3687 static void test_msr_bitmap(void) 3688 { 3689 test_vmcs_addr_reference(CPU_MSR_BITMAP, MSR_BITMAP, 3690 "MSR bitmap", "Use MSR bitmaps", 3691 PAGE_SIZE, false, true); 3692 } 3693 3694 /* 3695 * If the "use TPR shadow" VM-execution control is 1, the virtual-APIC 3696 * address must satisfy the following checks: 3697 * - Bits 11:0 of the address must be 0. 3698 * - The address should not set any bits beyond the processor's 3699 * physical-address width. 3700 * [Intel SDM] 3701 */ 3702 static void test_apic_virt_addr(void) 3703 { 3704 /* 3705 * Ensure the processor will never use the virtual-APIC page, since 3706 * we will point it to invalid RAM. Otherwise KVM is puzzled about 3707 * what we're trying to achieve and fails vmentry. 3708 */ 3709 u32 cpu_ctrls0 = vmcs_read(CPU_EXEC_CTRL0); 3710 vmcs_write(CPU_EXEC_CTRL0, cpu_ctrls0 | CPU_CR8_LOAD | CPU_CR8_STORE); 3711 test_vmcs_addr_reference(CPU_TPR_SHADOW, APIC_VIRT_ADDR, 3712 "virtual-APIC address", "Use TPR shadow", 3713 PAGE_SIZE, false, true); 3714 vmcs_write(CPU_EXEC_CTRL0, cpu_ctrls0); 3715 } 3716 3717 /* 3718 * If the "virtualize APIC-accesses" VM-execution control is 1, the 3719 * APIC-access address must satisfy the following checks: 3720 * - Bits 11:0 of the address must be 0. 3721 * - The address should not set any bits beyond the processor's 3722 * physical-address width. 3723 * [Intel SDM] 3724 */ 3725 static void test_apic_access_addr(void) 3726 { 3727 void *apic_access_page = alloc_page(); 3728 3729 vmcs_write(APIC_ACCS_ADDR, virt_to_phys(apic_access_page)); 3730 3731 test_vmcs_addr_reference(CPU_VIRT_APIC_ACCESSES, APIC_ACCS_ADDR, 3732 "APIC-access address", 3733 "virtualize APIC-accesses", PAGE_SIZE, 3734 true, false); 3735 } 3736 3737 static bool set_bit_pattern(u8 mask, u32 *secondary) 3738 { 3739 u8 i; 3740 bool flag = false; 3741 u32 test_bits[3] = { 3742 CPU_VIRT_X2APIC, 3743 CPU_APIC_REG_VIRT, 3744 CPU_VINTD 3745 }; 3746 3747 for (i = 0; i < ARRAY_SIZE(test_bits); i++) { 3748 if ((mask & (1u << i)) && 3749 (ctrl_cpu_rev[1].clr & test_bits[i])) { 3750 *secondary |= test_bits[i]; 3751 flag = true; 3752 } 3753 } 3754 3755 return (flag); 3756 } 3757 3758 /* 3759 * If the "use TPR shadow" VM-execution control is 0, the following 3760 * VM-execution controls must also be 0: 3761 * - virtualize x2APIC mode 3762 * - APIC-register virtualization 3763 * - virtual-interrupt delivery 3764 * [Intel SDM] 3765 * 3766 * 2. If the "virtualize x2APIC mode" VM-execution control is 1, the 3767 * "virtualize APIC accesses" VM-execution control must be 0. 3768 * [Intel SDM] 3769 */ 3770 static void test_apic_virtual_ctls(void) 3771 { 3772 u32 saved_primary = vmcs_read(CPU_EXEC_CTRL0); 3773 u32 saved_secondary = vmcs_read(CPU_EXEC_CTRL1); 3774 u32 primary = saved_primary; 3775 u32 secondary = saved_secondary; 3776 bool is_ctrl_valid = false; 3777 char str[10] = "disabled"; 3778 u8 i = 0, j; 3779 3780 /* 3781 * First test 3782 */ 3783 if (!((ctrl_cpu_rev[0].clr & (CPU_SECONDARY | CPU_TPR_SHADOW)) == 3784 (CPU_SECONDARY | CPU_TPR_SHADOW))) 3785 return; 3786 3787 primary |= CPU_SECONDARY; 3788 primary &= ~CPU_TPR_SHADOW; 3789 vmcs_write(CPU_EXEC_CTRL0, primary); 3790 3791 while (1) { 3792 for (j = 1; j < 8; j++) { 3793 secondary &= ~(CPU_VIRT_X2APIC | CPU_APIC_REG_VIRT | CPU_VINTD); 3794 if (primary & CPU_TPR_SHADOW) { 3795 is_ctrl_valid = true; 3796 } else { 3797 if (! set_bit_pattern(j, &secondary)) 3798 is_ctrl_valid = true; 3799 else 3800 is_ctrl_valid = false; 3801 } 3802 3803 vmcs_write(CPU_EXEC_CTRL1, secondary); 3804 report_prefix_pushf("Use TPR shadow %s, virtualize x2APIC mode %s, APIC-register virtualization %s, virtual-interrupt delivery %s", 3805 str, (secondary & CPU_VIRT_X2APIC) ? "enabled" : "disabled", (secondary & CPU_APIC_REG_VIRT) ? "enabled" : "disabled", (secondary & CPU_VINTD) ? "enabled" : "disabled"); 3806 if (is_ctrl_valid) 3807 test_vmx_valid_controls(); 3808 else 3809 test_vmx_invalid_controls(); 3810 report_prefix_pop(); 3811 } 3812 3813 if (i == 1) 3814 break; 3815 i++; 3816 3817 primary |= CPU_TPR_SHADOW; 3818 vmcs_write(CPU_EXEC_CTRL0, primary); 3819 strcpy(str, "enabled"); 3820 } 3821 3822 /* 3823 * Second test 3824 */ 3825 u32 apic_virt_ctls = (CPU_VIRT_X2APIC | CPU_VIRT_APIC_ACCESSES); 3826 3827 primary = saved_primary; 3828 secondary = saved_secondary; 3829 if (!((ctrl_cpu_rev[1].clr & apic_virt_ctls) == apic_virt_ctls)) 3830 return; 3831 3832 vmcs_write(CPU_EXEC_CTRL0, primary | CPU_SECONDARY); 3833 secondary &= ~CPU_VIRT_APIC_ACCESSES; 3834 vmcs_write(CPU_EXEC_CTRL1, secondary & ~CPU_VIRT_X2APIC); 3835 report_prefix_pushf("Virtualize x2APIC mode disabled; virtualize APIC access disabled"); 3836 test_vmx_valid_controls(); 3837 report_prefix_pop(); 3838 3839 vmcs_write(CPU_EXEC_CTRL1, secondary | CPU_VIRT_APIC_ACCESSES); 3840 report_prefix_pushf("Virtualize x2APIC mode disabled; virtualize APIC access enabled"); 3841 test_vmx_valid_controls(); 3842 report_prefix_pop(); 3843 3844 vmcs_write(CPU_EXEC_CTRL1, secondary | CPU_VIRT_X2APIC); 3845 report_prefix_pushf("Virtualize x2APIC mode enabled; virtualize APIC access enabled"); 3846 test_vmx_invalid_controls(); 3847 report_prefix_pop(); 3848 3849 vmcs_write(CPU_EXEC_CTRL1, secondary & ~CPU_VIRT_APIC_ACCESSES); 3850 report_prefix_pushf("Virtualize x2APIC mode enabled; virtualize APIC access disabled"); 3851 test_vmx_valid_controls(); 3852 report_prefix_pop(); 3853 3854 vmcs_write(CPU_EXEC_CTRL0, saved_primary); 3855 vmcs_write(CPU_EXEC_CTRL1, saved_secondary); 3856 } 3857 3858 /* 3859 * If the "virtual-interrupt delivery" VM-execution control is 1, the 3860 * "external-interrupt exiting" VM-execution control must be 1. 3861 * [Intel SDM] 3862 */ 3863 static void test_virtual_intr_ctls(void) 3864 { 3865 u32 saved_primary = vmcs_read(CPU_EXEC_CTRL0); 3866 u32 saved_secondary = vmcs_read(CPU_EXEC_CTRL1); 3867 u32 saved_pin = vmcs_read(PIN_CONTROLS); 3868 u32 primary = saved_primary; 3869 u32 secondary = saved_secondary; 3870 u32 pin = saved_pin; 3871 3872 if (!((ctrl_cpu_rev[1].clr & CPU_VINTD) && 3873 (ctrl_pin_rev.clr & PIN_EXTINT))) 3874 return; 3875 3876 vmcs_write(CPU_EXEC_CTRL0, primary | CPU_SECONDARY | CPU_TPR_SHADOW); 3877 vmcs_write(CPU_EXEC_CTRL1, secondary & ~CPU_VINTD); 3878 vmcs_write(PIN_CONTROLS, pin & ~PIN_EXTINT); 3879 report_prefix_pushf("Virtualize interrupt-delivery disabled; external-interrupt exiting disabled"); 3880 test_vmx_valid_controls(); 3881 report_prefix_pop(); 3882 3883 vmcs_write(CPU_EXEC_CTRL1, secondary | CPU_VINTD); 3884 report_prefix_pushf("Virtualize interrupt-delivery enabled; external-interrupt exiting disabled"); 3885 test_vmx_invalid_controls(); 3886 report_prefix_pop(); 3887 3888 vmcs_write(PIN_CONTROLS, pin | PIN_EXTINT); 3889 report_prefix_pushf("Virtualize interrupt-delivery enabled; external-interrupt exiting enabled"); 3890 test_vmx_valid_controls(); 3891 report_prefix_pop(); 3892 3893 vmcs_write(PIN_CONTROLS, pin & ~PIN_EXTINT); 3894 report_prefix_pushf("Virtualize interrupt-delivery enabled; external-interrupt exiting disabled"); 3895 test_vmx_invalid_controls(); 3896 report_prefix_pop(); 3897 3898 vmcs_write(CPU_EXEC_CTRL0, saved_primary); 3899 vmcs_write(CPU_EXEC_CTRL1, saved_secondary); 3900 vmcs_write(PIN_CONTROLS, saved_pin); 3901 } 3902 3903 static void test_pi_desc_addr(u64 addr, bool is_ctrl_valid) 3904 { 3905 vmcs_write(POSTED_INTR_DESC_ADDR, addr); 3906 report_prefix_pushf("Process-posted-interrupts enabled; posted-interrupt-descriptor-address 0x%lx", addr); 3907 if (is_ctrl_valid) 3908 test_vmx_valid_controls(); 3909 else 3910 test_vmx_invalid_controls(); 3911 report_prefix_pop(); 3912 } 3913 3914 /* 3915 * If the "process posted interrupts" VM-execution control is 1, the 3916 * following must be true: 3917 * 3918 * - The "virtual-interrupt delivery" VM-execution control is 1. 3919 * - The "acknowledge interrupt on exit" VM-exit control is 1. 3920 * - The posted-interrupt notification vector has a value in the 3921 * - range 0 - 255 (bits 15:8 are all 0). 3922 * - Bits 5:0 of the posted-interrupt descriptor address are all 0. 3923 * - The posted-interrupt descriptor address does not set any bits 3924 * beyond the processor's physical-address width. 3925 * [Intel SDM] 3926 */ 3927 static void test_posted_intr(void) 3928 { 3929 u32 saved_primary = vmcs_read(CPU_EXEC_CTRL0); 3930 u32 saved_secondary = vmcs_read(CPU_EXEC_CTRL1); 3931 u32 saved_pin = vmcs_read(PIN_CONTROLS); 3932 u32 exit_ctl_saved = vmcs_read(EXI_CONTROLS); 3933 u32 primary = saved_primary; 3934 u32 secondary = saved_secondary; 3935 u32 pin = saved_pin; 3936 u32 exit_ctl = exit_ctl_saved; 3937 u16 vec; 3938 int i; 3939 3940 if (!((ctrl_pin_rev.clr & PIN_POST_INTR) && 3941 (ctrl_cpu_rev[1].clr & CPU_VINTD) && 3942 (ctrl_exit_rev.clr & EXI_INTA))) 3943 return; 3944 3945 vmcs_write(CPU_EXEC_CTRL0, primary | CPU_SECONDARY | CPU_TPR_SHADOW); 3946 3947 /* 3948 * Test virtual-interrupt-delivery and acknowledge-interrupt-on-exit 3949 */ 3950 pin |= PIN_POST_INTR; 3951 vmcs_write(PIN_CONTROLS, pin); 3952 secondary &= ~CPU_VINTD; 3953 vmcs_write(CPU_EXEC_CTRL1, secondary); 3954 report_prefix_pushf("Process-posted-interrupts enabled; virtual-interrupt-delivery disabled"); 3955 test_vmx_invalid_controls(); 3956 report_prefix_pop(); 3957 3958 secondary |= CPU_VINTD; 3959 vmcs_write(CPU_EXEC_CTRL1, secondary); 3960 report_prefix_pushf("Process-posted-interrupts enabled; virtual-interrupt-delivery enabled"); 3961 test_vmx_invalid_controls(); 3962 report_prefix_pop(); 3963 3964 exit_ctl &= ~EXI_INTA; 3965 vmcs_write(EXI_CONTROLS, exit_ctl); 3966 report_prefix_pushf("Process-posted-interrupts enabled; virtual-interrupt-delivery enabled; acknowledge-interrupt-on-exit disabled"); 3967 test_vmx_invalid_controls(); 3968 report_prefix_pop(); 3969 3970 exit_ctl |= EXI_INTA; 3971 vmcs_write(EXI_CONTROLS, exit_ctl); 3972 report_prefix_pushf("Process-posted-interrupts enabled; virtual-interrupt-delivery enabled; acknowledge-interrupt-on-exit enabled"); 3973 test_vmx_valid_controls(); 3974 report_prefix_pop(); 3975 3976 secondary &= ~CPU_VINTD; 3977 vmcs_write(CPU_EXEC_CTRL1, secondary); 3978 report_prefix_pushf("Process-posted-interrupts enabled; virtual-interrupt-delivery disabled; acknowledge-interrupt-on-exit enabled"); 3979 test_vmx_invalid_controls(); 3980 report_prefix_pop(); 3981 3982 secondary |= CPU_VINTD; 3983 vmcs_write(CPU_EXEC_CTRL1, secondary); 3984 report_prefix_pushf("Process-posted-interrupts enabled; virtual-interrupt-delivery enabled; acknowledge-interrupt-on-exit enabled"); 3985 test_vmx_valid_controls(); 3986 report_prefix_pop(); 3987 3988 /* 3989 * Test posted-interrupt notification vector 3990 */ 3991 for (i = 0; i < 8; i++) { 3992 vec = (1ul << i); 3993 vmcs_write(PINV, vec); 3994 report_prefix_pushf("Process-posted-interrupts enabled; posted-interrupt-notification-vector %u", vec); 3995 test_vmx_valid_controls(); 3996 report_prefix_pop(); 3997 } 3998 for (i = 8; i < 16; i++) { 3999 vec = (1ul << i); 4000 vmcs_write(PINV, vec); 4001 report_prefix_pushf("Process-posted-interrupts enabled; posted-interrupt-notification-vector %u", vec); 4002 test_vmx_invalid_controls(); 4003 report_prefix_pop(); 4004 } 4005 4006 vec &= ~(0xff << 8); 4007 vmcs_write(PINV, vec); 4008 report_prefix_pushf("Process-posted-interrupts enabled; posted-interrupt-notification-vector %u", vec); 4009 test_vmx_valid_controls(); 4010 report_prefix_pop(); 4011 4012 /* 4013 * Test posted-interrupt descriptor address 4014 */ 4015 for (i = 0; i < 6; i++) { 4016 test_pi_desc_addr(1ul << i, false); 4017 } 4018 4019 test_pi_desc_addr(0xf0, false); 4020 test_pi_desc_addr(0xff, false); 4021 test_pi_desc_addr(0x0f, false); 4022 test_pi_desc_addr(0x8000, true); 4023 test_pi_desc_addr(0x00, true); 4024 test_pi_desc_addr(0xc000, true); 4025 4026 test_vmcs_addr_values("process-posted interrupts", 4027 POSTED_INTR_DESC_ADDR, 64, 4028 false, false, 0, 63); 4029 4030 vmcs_write(CPU_EXEC_CTRL0, saved_primary); 4031 vmcs_write(CPU_EXEC_CTRL1, saved_secondary); 4032 vmcs_write(PIN_CONTROLS, saved_pin); 4033 } 4034 4035 static void test_apic_ctls(void) 4036 { 4037 test_apic_virt_addr(); 4038 test_apic_access_addr(); 4039 test_apic_virtual_ctls(); 4040 test_virtual_intr_ctls(); 4041 test_posted_intr(); 4042 } 4043 4044 /* 4045 * If the "enable VPID" VM-execution control is 1, the value of the 4046 * of the VPID VM-execution control field must not be 0000H. 4047 * [Intel SDM] 4048 */ 4049 static void test_vpid(void) 4050 { 4051 u32 saved_primary = vmcs_read(CPU_EXEC_CTRL0); 4052 u32 saved_secondary = vmcs_read(CPU_EXEC_CTRL1); 4053 u16 vpid = 0x0000; 4054 int i; 4055 4056 if (!is_vpid_supported()) { 4057 report_skip("%s : Secondary controls and/or VPID not supported", __func__); 4058 return; 4059 } 4060 4061 vmcs_write(CPU_EXEC_CTRL0, saved_primary | CPU_SECONDARY); 4062 vmcs_write(CPU_EXEC_CTRL1, saved_secondary & ~CPU_VPID); 4063 vmcs_write(VPID, vpid); 4064 report_prefix_pushf("VPID disabled; VPID value %x", vpid); 4065 test_vmx_valid_controls(); 4066 report_prefix_pop(); 4067 4068 vmcs_write(CPU_EXEC_CTRL1, saved_secondary | CPU_VPID); 4069 report_prefix_pushf("VPID enabled; VPID value %x", vpid); 4070 test_vmx_invalid_controls(); 4071 report_prefix_pop(); 4072 4073 for (i = 0; i < 16; i++) { 4074 vpid = (short)1 << i;; 4075 vmcs_write(VPID, vpid); 4076 report_prefix_pushf("VPID enabled; VPID value %x", vpid); 4077 test_vmx_valid_controls(); 4078 report_prefix_pop(); 4079 } 4080 4081 vmcs_write(CPU_EXEC_CTRL0, saved_primary); 4082 vmcs_write(CPU_EXEC_CTRL1, saved_secondary); 4083 } 4084 4085 static void set_vtpr(unsigned vtpr) 4086 { 4087 *(u32 *)phys_to_virt(vmcs_read(APIC_VIRT_ADDR) + APIC_TASKPRI) = vtpr; 4088 } 4089 4090 static void try_tpr_threshold_and_vtpr(unsigned threshold, unsigned vtpr) 4091 { 4092 bool valid = true; 4093 u32 primary = vmcs_read(CPU_EXEC_CTRL0); 4094 u32 secondary = vmcs_read(CPU_EXEC_CTRL1); 4095 4096 if ((primary & CPU_TPR_SHADOW) && 4097 (!(primary & CPU_SECONDARY) || 4098 !(secondary & (CPU_VINTD | CPU_VIRT_APIC_ACCESSES)))) 4099 valid = (threshold & 0xf) <= ((vtpr >> 4) & 0xf); 4100 4101 set_vtpr(vtpr); 4102 report_prefix_pushf("TPR threshold 0x%x, VTPR.class 0x%x", 4103 threshold, (vtpr >> 4) & 0xf); 4104 if (valid) 4105 test_vmx_valid_controls(); 4106 else 4107 test_vmx_invalid_controls(); 4108 report_prefix_pop(); 4109 } 4110 4111 static void test_invalid_event_injection(void) 4112 { 4113 u32 ent_intr_info_save = vmcs_read(ENT_INTR_INFO); 4114 u32 ent_intr_error_save = vmcs_read(ENT_INTR_ERROR); 4115 u32 ent_inst_len_save = vmcs_read(ENT_INST_LEN); 4116 u32 primary_save = vmcs_read(CPU_EXEC_CTRL0); 4117 u32 secondary_save = vmcs_read(CPU_EXEC_CTRL1); 4118 u64 guest_cr0_save = vmcs_read(GUEST_CR0); 4119 u32 ent_intr_info_base = INTR_INFO_VALID_MASK; 4120 u32 ent_intr_info, ent_intr_err, ent_intr_len; 4121 u32 cnt; 4122 4123 /* Setup */ 4124 report_prefix_push("invalid event injection"); 4125 vmcs_write(ENT_INTR_ERROR, 0x00000000); 4126 vmcs_write(ENT_INST_LEN, 0x00000001); 4127 4128 /* The field's interruption type is not set to a reserved value. */ 4129 ent_intr_info = ent_intr_info_base | INTR_TYPE_RESERVED | DE_VECTOR; 4130 report_prefix_pushf("%s, VM-entry intr info=0x%x", 4131 "RESERVED interruption type invalid [-]", 4132 ent_intr_info); 4133 vmcs_write(ENT_INTR_INFO, ent_intr_info); 4134 test_vmx_invalid_controls(); 4135 report_prefix_pop(); 4136 4137 ent_intr_info = ent_intr_info_base | INTR_TYPE_EXT_INTR | 4138 DE_VECTOR; 4139 report_prefix_pushf("%s, VM-entry intr info=0x%x", 4140 "RESERVED interruption type invalid [+]", 4141 ent_intr_info); 4142 vmcs_write(ENT_INTR_INFO, ent_intr_info); 4143 test_vmx_valid_controls(); 4144 report_prefix_pop(); 4145 4146 /* If the interruption type is other event, the vector is 0. */ 4147 ent_intr_info = ent_intr_info_base | INTR_TYPE_OTHER_EVENT | DB_VECTOR; 4148 report_prefix_pushf("%s, VM-entry intr info=0x%x", 4149 "(OTHER EVENT && vector != 0) invalid [-]", 4150 ent_intr_info); 4151 vmcs_write(ENT_INTR_INFO, ent_intr_info); 4152 test_vmx_invalid_controls(); 4153 report_prefix_pop(); 4154 4155 /* If the interruption type is NMI, the vector is 2 (negative case). */ 4156 ent_intr_info = ent_intr_info_base | INTR_TYPE_NMI_INTR | DE_VECTOR; 4157 report_prefix_pushf("%s, VM-entry intr info=0x%x", 4158 "(NMI && vector != 2) invalid [-]", ent_intr_info); 4159 vmcs_write(ENT_INTR_INFO, ent_intr_info); 4160 test_vmx_invalid_controls(); 4161 report_prefix_pop(); 4162 4163 /* If the interruption type is NMI, the vector is 2 (positive case). */ 4164 ent_intr_info = ent_intr_info_base | INTR_TYPE_NMI_INTR | NMI_VECTOR; 4165 report_prefix_pushf("%s, VM-entry intr info=0x%x", 4166 "(NMI && vector == 2) valid [+]", ent_intr_info); 4167 vmcs_write(ENT_INTR_INFO, ent_intr_info); 4168 test_vmx_valid_controls(); 4169 report_prefix_pop(); 4170 4171 /* 4172 * If the interruption type 4173 * is HW exception, the vector is at most 31. 4174 */ 4175 ent_intr_info = ent_intr_info_base | INTR_TYPE_HARD_EXCEPTION | 0x20; 4176 report_prefix_pushf("%s, VM-entry intr info=0x%x", 4177 "(HW exception && vector > 31) invalid [-]", 4178 ent_intr_info); 4179 vmcs_write(ENT_INTR_INFO, ent_intr_info); 4180 test_vmx_invalid_controls(); 4181 report_prefix_pop(); 4182 4183 /* 4184 * deliver-error-code is 1 iff either 4185 * (a) the "unrestricted guest" VM-execution control is 0 4186 * (b) CR0.PE is set. 4187 */ 4188 4189 /* Assert that unrestricted guest is disabled or unsupported */ 4190 assert(!(ctrl_cpu_rev[0].clr & CPU_SECONDARY) || 4191 !(secondary_save & CPU_URG)); 4192 4193 ent_intr_info = ent_intr_info_base | INTR_TYPE_HARD_EXCEPTION | 4194 GP_VECTOR; 4195 report_prefix_pushf("%s, VM-entry intr info=0x%x", 4196 "error code <-> (!URG || prot_mode) [-]", 4197 ent_intr_info); 4198 vmcs_write(GUEST_CR0, guest_cr0_save & ~X86_CR0_PE & ~X86_CR0_PG); 4199 vmcs_write(ENT_INTR_INFO, ent_intr_info); 4200 if (basic_msr.no_hw_errcode_cc) 4201 test_vmx_valid_controls(); 4202 else 4203 test_vmx_invalid_controls(); 4204 report_prefix_pop(); 4205 4206 ent_intr_info = ent_intr_info_base | INTR_INFO_DELIVER_CODE_MASK | 4207 INTR_TYPE_HARD_EXCEPTION | GP_VECTOR; 4208 report_prefix_pushf("%s, VM-entry intr info=0x%x", 4209 "error code <-> (!URG || prot_mode) [+]", 4210 ent_intr_info); 4211 vmcs_write(GUEST_CR0, guest_cr0_save & ~X86_CR0_PE & ~X86_CR0_PG); 4212 vmcs_write(ENT_INTR_INFO, ent_intr_info); 4213 test_vmx_valid_controls(); 4214 report_prefix_pop(); 4215 4216 if (enable_unrestricted_guest(false)) 4217 goto skip_unrestricted_guest; 4218 4219 ent_intr_info = ent_intr_info_base | INTR_INFO_DELIVER_CODE_MASK | 4220 INTR_TYPE_HARD_EXCEPTION | GP_VECTOR; 4221 report_prefix_pushf("%s, VM-entry intr info=0x%x", 4222 "error code <-> (!URG || prot_mode) [-]", 4223 ent_intr_info); 4224 vmcs_write(GUEST_CR0, guest_cr0_save & ~X86_CR0_PE & ~X86_CR0_PG); 4225 vmcs_write(ENT_INTR_INFO, ent_intr_info); 4226 test_vmx_invalid_controls(); 4227 report_prefix_pop(); 4228 4229 ent_intr_info = ent_intr_info_base | INTR_TYPE_HARD_EXCEPTION | 4230 GP_VECTOR; 4231 report_prefix_pushf("%s, VM-entry intr info=0x%x", 4232 "error code <-> (!URG || prot_mode) [-]", 4233 ent_intr_info); 4234 vmcs_write(GUEST_CR0, guest_cr0_save | X86_CR0_PE); 4235 vmcs_write(ENT_INTR_INFO, ent_intr_info); 4236 if (basic_msr.no_hw_errcode_cc) 4237 test_vmx_valid_controls(); 4238 else 4239 test_vmx_invalid_controls(); 4240 report_prefix_pop(); 4241 4242 vmcs_write(CPU_EXEC_CTRL1, secondary_save); 4243 vmcs_write(CPU_EXEC_CTRL0, primary_save); 4244 4245 skip_unrestricted_guest: 4246 vmcs_write(GUEST_CR0, guest_cr0_save); 4247 4248 /* deliver-error-code is 1 iff the interruption type is HW exception */ 4249 report_prefix_push("error code <-> HW exception"); 4250 for (cnt = 0; cnt < 8; cnt++) { 4251 u32 exception_type_mask = cnt << 8; 4252 u32 deliver_error_code_mask = 4253 exception_type_mask != INTR_TYPE_HARD_EXCEPTION ? 4254 INTR_INFO_DELIVER_CODE_MASK : 0; 4255 4256 ent_intr_info = ent_intr_info_base | deliver_error_code_mask | 4257 exception_type_mask | GP_VECTOR; 4258 report_prefix_pushf("VM-entry intr info=0x%x [-]", 4259 ent_intr_info); 4260 vmcs_write(ENT_INTR_INFO, ent_intr_info); 4261 if (exception_type_mask == INTR_TYPE_HARD_EXCEPTION && 4262 basic_msr.no_hw_errcode_cc) 4263 test_vmx_valid_controls(); 4264 else 4265 test_vmx_invalid_controls(); 4266 report_prefix_pop(); 4267 } 4268 report_prefix_pop(); 4269 4270 /* 4271 * deliver-error-code is 1 iff the the vector 4272 * indicates an exception that would normally deliver an error code 4273 */ 4274 report_prefix_push("error code <-> vector delivers error code"); 4275 for (cnt = 0; cnt < 32; cnt++) { 4276 bool has_error_code = false; 4277 u32 deliver_error_code_mask; 4278 4279 switch (cnt) { 4280 case DF_VECTOR: 4281 case TS_VECTOR: 4282 case NP_VECTOR: 4283 case SS_VECTOR: 4284 case GP_VECTOR: 4285 case PF_VECTOR: 4286 case AC_VECTOR: 4287 has_error_code = true; 4288 case CP_VECTOR: 4289 /* Some CPUs have error code and some do not, skip */ 4290 continue; 4291 } 4292 4293 /* Negative case */ 4294 deliver_error_code_mask = has_error_code ? 4295 0 : 4296 INTR_INFO_DELIVER_CODE_MASK; 4297 ent_intr_info = ent_intr_info_base | deliver_error_code_mask | 4298 INTR_TYPE_HARD_EXCEPTION | cnt; 4299 report_prefix_pushf("VM-entry intr info=0x%x [-]", 4300 ent_intr_info); 4301 vmcs_write(ENT_INTR_INFO, ent_intr_info); 4302 if (basic_msr.no_hw_errcode_cc) 4303 test_vmx_valid_controls(); 4304 else 4305 test_vmx_invalid_controls(); 4306 report_prefix_pop(); 4307 4308 /* Positive case */ 4309 deliver_error_code_mask = has_error_code ? 4310 INTR_INFO_DELIVER_CODE_MASK : 4311 0; 4312 ent_intr_info = ent_intr_info_base | deliver_error_code_mask | 4313 INTR_TYPE_HARD_EXCEPTION | cnt; 4314 report_prefix_pushf("VM-entry intr info=0x%x [+]", 4315 ent_intr_info); 4316 vmcs_write(ENT_INTR_INFO, ent_intr_info); 4317 test_vmx_valid_controls(); 4318 report_prefix_pop(); 4319 } 4320 report_prefix_pop(); 4321 4322 /* Reserved bits in the field (30:12) are 0. */ 4323 report_prefix_push("reserved bits clear"); 4324 for (cnt = 12; cnt <= 30; cnt++) { 4325 ent_intr_info = ent_intr_info_base | 4326 INTR_INFO_DELIVER_CODE_MASK | 4327 INTR_TYPE_HARD_EXCEPTION | GP_VECTOR | 4328 (1U << cnt); 4329 report_prefix_pushf("VM-entry intr info=0x%x [-]", 4330 ent_intr_info); 4331 vmcs_write(ENT_INTR_INFO, ent_intr_info); 4332 test_vmx_invalid_controls(); 4333 report_prefix_pop(); 4334 } 4335 report_prefix_pop(); 4336 4337 /* 4338 * If deliver-error-code is 1 4339 * bits 31:16 of the VM-entry exception error-code field are 0. 4340 */ 4341 ent_intr_info = ent_intr_info_base | INTR_INFO_DELIVER_CODE_MASK | 4342 INTR_TYPE_HARD_EXCEPTION | GP_VECTOR; 4343 report_prefix_pushf("%s, VM-entry intr info=0x%x", 4344 "VM-entry exception error code[31:16] clear", 4345 ent_intr_info); 4346 vmcs_write(ENT_INTR_INFO, ent_intr_info); 4347 for (cnt = 16; cnt <= 31; cnt++) { 4348 ent_intr_err = 1U << cnt; 4349 report_prefix_pushf("VM-entry intr error=0x%x [-]", 4350 ent_intr_err); 4351 vmcs_write(ENT_INTR_ERROR, ent_intr_err); 4352 test_vmx_invalid_controls(); 4353 report_prefix_pop(); 4354 } 4355 vmcs_write(ENT_INTR_ERROR, 0x00000000); 4356 report_prefix_pop(); 4357 4358 /* 4359 * If the interruption type is software interrupt, software exception, 4360 * or privileged software exception, the VM-entry instruction-length 4361 * field is in the range 0 - 15. 4362 */ 4363 4364 for (cnt = 0; cnt < 3; cnt++) { 4365 switch (cnt) { 4366 case 0: 4367 ent_intr_info = ent_intr_info_base | 4368 INTR_TYPE_SOFT_INTR; 4369 break; 4370 case 1: 4371 ent_intr_info = ent_intr_info_base | 4372 INTR_TYPE_SOFT_EXCEPTION; 4373 break; 4374 case 2: 4375 ent_intr_info = ent_intr_info_base | 4376 INTR_TYPE_PRIV_SW_EXCEPTION; 4377 break; 4378 } 4379 report_prefix_pushf("%s, VM-entry intr info=0x%x", 4380 "VM-entry instruction-length check", 4381 ent_intr_info); 4382 vmcs_write(ENT_INTR_INFO, ent_intr_info); 4383 4384 /* Instruction length set to -1 (0xFFFFFFFF) should fail */ 4385 ent_intr_len = -1; 4386 report_prefix_pushf("VM-entry intr length = 0x%x [-]", 4387 ent_intr_len); 4388 vmcs_write(ENT_INST_LEN, ent_intr_len); 4389 test_vmx_invalid_controls(); 4390 report_prefix_pop(); 4391 4392 /* Instruction length set to 16 should fail */ 4393 ent_intr_len = 0x00000010; 4394 report_prefix_pushf("VM-entry intr length = 0x%x [-]", 4395 ent_intr_len); 4396 vmcs_write(ENT_INST_LEN, 0x00000010); 4397 test_vmx_invalid_controls(); 4398 report_prefix_pop(); 4399 4400 report_prefix_pop(); 4401 } 4402 4403 /* Cleanup */ 4404 vmcs_write(ENT_INTR_INFO, ent_intr_info_save); 4405 vmcs_write(ENT_INTR_ERROR, ent_intr_error_save); 4406 vmcs_write(ENT_INST_LEN, ent_inst_len_save); 4407 vmcs_write(CPU_EXEC_CTRL0, primary_save); 4408 vmcs_write(CPU_EXEC_CTRL1, secondary_save); 4409 vmcs_write(GUEST_CR0, guest_cr0_save); 4410 report_prefix_pop(); 4411 } 4412 4413 /* 4414 * Test interesting vTPR values for a given TPR threshold. 4415 */ 4416 static void test_vtpr_values(unsigned threshold) 4417 { 4418 try_tpr_threshold_and_vtpr(threshold, (threshold - 1) << 4); 4419 try_tpr_threshold_and_vtpr(threshold, threshold << 4); 4420 try_tpr_threshold_and_vtpr(threshold, (threshold + 1) << 4); 4421 } 4422 4423 static void try_tpr_threshold(unsigned threshold) 4424 { 4425 bool valid = true; 4426 4427 u32 primary = vmcs_read(CPU_EXEC_CTRL0); 4428 u32 secondary = vmcs_read(CPU_EXEC_CTRL1); 4429 4430 if ((primary & CPU_TPR_SHADOW) && !((primary & CPU_SECONDARY) && 4431 (secondary & CPU_VINTD))) 4432 valid = !(threshold >> 4); 4433 4434 set_vtpr(-1); 4435 vmcs_write(TPR_THRESHOLD, threshold); 4436 report_prefix_pushf("TPR threshold 0x%x, VTPR.class 0xf", threshold); 4437 if (valid) 4438 test_vmx_valid_controls(); 4439 else 4440 test_vmx_invalid_controls(); 4441 report_prefix_pop(); 4442 4443 if (valid) 4444 test_vtpr_values(threshold); 4445 } 4446 4447 /* 4448 * Test interesting TPR threshold values. 4449 */ 4450 static void test_tpr_threshold_values(void) 4451 { 4452 unsigned i; 4453 4454 for (i = 0; i < 0x10; i++) 4455 try_tpr_threshold(i); 4456 for (i = 4; i < 32; i++) 4457 try_tpr_threshold(1u << i); 4458 try_tpr_threshold(-1u); 4459 try_tpr_threshold(0x7fffffff); 4460 } 4461 4462 /* 4463 * This test covers the following two VM entry checks: 4464 * 4465 * i) If the "use TPR shadow" VM-execution control is 1 and the 4466 * "virtual-interrupt delivery" VM-execution control is 0, bits 4467 * 31:4 of the TPR threshold VM-execution control field must 4468 be 0. 4469 * [Intel SDM] 4470 * 4471 * ii) If the "use TPR shadow" VM-execution control is 1, the 4472 * "virtual-interrupt delivery" VM-execution control is 0 4473 * and the "virtualize APIC accesses" VM-execution control 4474 * is 0, the value of bits 3:0 of the TPR threshold VM-execution 4475 * control field must not be greater than the value of bits 4476 * 7:4 of VTPR. 4477 * [Intel SDM] 4478 */ 4479 static void test_tpr_threshold(void) 4480 { 4481 u32 primary = vmcs_read(CPU_EXEC_CTRL0); 4482 u64 apic_virt_addr = vmcs_read(APIC_VIRT_ADDR); 4483 u64 threshold = vmcs_read(TPR_THRESHOLD); 4484 void *virtual_apic_page; 4485 4486 if (!(ctrl_cpu_rev[0].clr & CPU_TPR_SHADOW)) 4487 return; 4488 4489 virtual_apic_page = alloc_page(); 4490 memset(virtual_apic_page, 0xff, PAGE_SIZE); 4491 vmcs_write(APIC_VIRT_ADDR, virt_to_phys(virtual_apic_page)); 4492 4493 vmcs_write(CPU_EXEC_CTRL0, primary & ~(CPU_TPR_SHADOW | CPU_SECONDARY)); 4494 report_prefix_pushf("Use TPR shadow disabled, secondary controls disabled"); 4495 test_tpr_threshold_values(); 4496 report_prefix_pop(); 4497 vmcs_write(CPU_EXEC_CTRL0, vmcs_read(CPU_EXEC_CTRL0) | CPU_TPR_SHADOW); 4498 report_prefix_pushf("Use TPR shadow enabled, secondary controls disabled"); 4499 test_tpr_threshold_values(); 4500 report_prefix_pop(); 4501 4502 if (!((ctrl_cpu_rev[0].clr & CPU_SECONDARY) && 4503 (ctrl_cpu_rev[1].clr & (CPU_VINTD | CPU_VIRT_APIC_ACCESSES)))) 4504 goto out; 4505 u32 secondary = vmcs_read(CPU_EXEC_CTRL1); 4506 4507 if (ctrl_cpu_rev[1].clr & CPU_VINTD) { 4508 vmcs_write(CPU_EXEC_CTRL1, CPU_VINTD); 4509 report_prefix_pushf("Use TPR shadow enabled; secondary controls disabled; virtual-interrupt delivery enabled; virtualize APIC accesses disabled"); 4510 test_tpr_threshold_values(); 4511 report_prefix_pop(); 4512 4513 vmcs_write(CPU_EXEC_CTRL0, 4514 vmcs_read(CPU_EXEC_CTRL0) | CPU_SECONDARY); 4515 report_prefix_pushf("Use TPR shadow enabled; secondary controls enabled; virtual-interrupt delivery enabled; virtualize APIC accesses disabled"); 4516 test_tpr_threshold_values(); 4517 report_prefix_pop(); 4518 } 4519 4520 if (ctrl_cpu_rev[1].clr & CPU_VIRT_APIC_ACCESSES) { 4521 vmcs_write(CPU_EXEC_CTRL0, 4522 vmcs_read(CPU_EXEC_CTRL0) & ~CPU_SECONDARY); 4523 vmcs_write(CPU_EXEC_CTRL1, CPU_VIRT_APIC_ACCESSES); 4524 report_prefix_pushf("Use TPR shadow enabled; secondary controls disabled; virtual-interrupt delivery enabled; virtualize APIC accesses enabled"); 4525 test_tpr_threshold_values(); 4526 report_prefix_pop(); 4527 4528 vmcs_write(CPU_EXEC_CTRL0, 4529 vmcs_read(CPU_EXEC_CTRL0) | CPU_SECONDARY); 4530 report_prefix_pushf("Use TPR shadow enabled; secondary controls enabled; virtual-interrupt delivery enabled; virtualize APIC accesses enabled"); 4531 test_tpr_threshold_values(); 4532 report_prefix_pop(); 4533 } 4534 4535 if ((ctrl_cpu_rev[1].clr & 4536 (CPU_VINTD | CPU_VIRT_APIC_ACCESSES)) == 4537 (CPU_VINTD | CPU_VIRT_APIC_ACCESSES)) { 4538 vmcs_write(CPU_EXEC_CTRL0, 4539 vmcs_read(CPU_EXEC_CTRL0) & ~CPU_SECONDARY); 4540 vmcs_write(CPU_EXEC_CTRL1, 4541 CPU_VINTD | CPU_VIRT_APIC_ACCESSES); 4542 report_prefix_pushf("Use TPR shadow enabled; secondary controls disabled; virtual-interrupt delivery enabled; virtualize APIC accesses enabled"); 4543 test_tpr_threshold_values(); 4544 report_prefix_pop(); 4545 4546 vmcs_write(CPU_EXEC_CTRL0, 4547 vmcs_read(CPU_EXEC_CTRL0) | CPU_SECONDARY); 4548 report_prefix_pushf("Use TPR shadow enabled; secondary controls enabled; virtual-interrupt delivery enabled; virtualize APIC accesses enabled"); 4549 test_tpr_threshold_values(); 4550 report_prefix_pop(); 4551 } 4552 4553 vmcs_write(CPU_EXEC_CTRL1, secondary); 4554 out: 4555 vmcs_write(TPR_THRESHOLD, threshold); 4556 vmcs_write(APIC_VIRT_ADDR, apic_virt_addr); 4557 vmcs_write(CPU_EXEC_CTRL0, primary); 4558 } 4559 4560 /* 4561 * This test verifies the following two vmentry checks: 4562 * 4563 * If the "NMI exiting" VM-execution control is 0, "Virtual NMIs" 4564 * VM-execution control must be 0. 4565 * [Intel SDM] 4566 * 4567 * If the "virtual NMIs" VM-execution control is 0, the "NMI-window 4568 * exiting" VM-execution control must be 0. 4569 * [Intel SDM] 4570 */ 4571 static void test_nmi_ctrls(void) 4572 { 4573 u32 pin_ctrls, cpu_ctrls0, test_pin_ctrls, test_cpu_ctrls0; 4574 4575 if ((ctrl_pin_rev.clr & (PIN_NMI | PIN_VIRT_NMI)) != 4576 (PIN_NMI | PIN_VIRT_NMI)) { 4577 report_skip("%s : NMI exiting and/or Virtual NMIs not supported", __func__); 4578 return; 4579 } 4580 4581 /* Save the controls so that we can restore them after our tests */ 4582 pin_ctrls = vmcs_read(PIN_CONTROLS); 4583 cpu_ctrls0 = vmcs_read(CPU_EXEC_CTRL0); 4584 4585 test_pin_ctrls = pin_ctrls & ~(PIN_NMI | PIN_VIRT_NMI); 4586 test_cpu_ctrls0 = cpu_ctrls0 & ~CPU_NMI_WINDOW; 4587 4588 vmcs_write(PIN_CONTROLS, test_pin_ctrls); 4589 report_prefix_pushf("NMI-exiting disabled, virtual-NMIs disabled"); 4590 test_vmx_valid_controls(); 4591 report_prefix_pop(); 4592 4593 vmcs_write(PIN_CONTROLS, test_pin_ctrls | PIN_VIRT_NMI); 4594 report_prefix_pushf("NMI-exiting disabled, virtual-NMIs enabled"); 4595 test_vmx_invalid_controls(); 4596 report_prefix_pop(); 4597 4598 vmcs_write(PIN_CONTROLS, test_pin_ctrls | (PIN_NMI | PIN_VIRT_NMI)); 4599 report_prefix_pushf("NMI-exiting enabled, virtual-NMIs enabled"); 4600 test_vmx_valid_controls(); 4601 report_prefix_pop(); 4602 4603 vmcs_write(PIN_CONTROLS, test_pin_ctrls | PIN_NMI); 4604 report_prefix_pushf("NMI-exiting enabled, virtual-NMIs disabled"); 4605 test_vmx_valid_controls(); 4606 report_prefix_pop(); 4607 4608 if (!(ctrl_cpu_rev[0].clr & CPU_NMI_WINDOW)) { 4609 report_info("NMI-window exiting is not supported, skipping..."); 4610 goto done; 4611 } 4612 4613 vmcs_write(PIN_CONTROLS, test_pin_ctrls); 4614 vmcs_write(CPU_EXEC_CTRL0, test_cpu_ctrls0 | CPU_NMI_WINDOW); 4615 report_prefix_pushf("Virtual-NMIs disabled, NMI-window-exiting enabled"); 4616 test_vmx_invalid_controls(); 4617 report_prefix_pop(); 4618 4619 vmcs_write(PIN_CONTROLS, test_pin_ctrls); 4620 vmcs_write(CPU_EXEC_CTRL0, test_cpu_ctrls0); 4621 report_prefix_pushf("Virtual-NMIs disabled, NMI-window-exiting disabled"); 4622 test_vmx_valid_controls(); 4623 report_prefix_pop(); 4624 4625 vmcs_write(PIN_CONTROLS, test_pin_ctrls | (PIN_NMI | PIN_VIRT_NMI)); 4626 vmcs_write(CPU_EXEC_CTRL0, test_cpu_ctrls0 | CPU_NMI_WINDOW); 4627 report_prefix_pushf("Virtual-NMIs enabled, NMI-window-exiting enabled"); 4628 test_vmx_valid_controls(); 4629 report_prefix_pop(); 4630 4631 vmcs_write(PIN_CONTROLS, test_pin_ctrls | (PIN_NMI | PIN_VIRT_NMI)); 4632 vmcs_write(CPU_EXEC_CTRL0, test_cpu_ctrls0); 4633 report_prefix_pushf("Virtual-NMIs enabled, NMI-window-exiting disabled"); 4634 test_vmx_valid_controls(); 4635 report_prefix_pop(); 4636 4637 /* Restore the controls to their original values */ 4638 vmcs_write(CPU_EXEC_CTRL0, cpu_ctrls0); 4639 done: 4640 vmcs_write(PIN_CONTROLS, pin_ctrls); 4641 } 4642 4643 static void test_eptp_ad_bit(u64 eptp, bool is_ctrl_valid) 4644 { 4645 vmcs_write(EPTP, eptp); 4646 report_prefix_pushf("Enable-EPT enabled; EPT accessed and dirty flag %s", 4647 (eptp & EPTP_AD_FLAG) ? "1": "0"); 4648 if (is_ctrl_valid) 4649 test_vmx_valid_controls(); 4650 else 4651 test_vmx_invalid_controls(); 4652 report_prefix_pop(); 4653 4654 } 4655 4656 /* 4657 * 1. If the "enable EPT" VM-execution control is 1, the "EPTP VM-execution" 4658 * control field must satisfy the following checks: 4659 * 4660 * - The EPT memory type (bits 2:0) must be a value supported by the 4661 * processor as indicated in the IA32_VMX_EPT_VPID_CAP MSR. 4662 * - Bits 5:3 (1 less than the EPT page-walk length) must indicate a 4663 * supported EPT page-walk length. 4664 * - Bit 6 (enable bit for accessed and dirty flags for EPT) must be 4665 * 0 if bit 21 of the IA32_VMX_EPT_VPID_CAP MSR is read as 0, 4666 * indicating that the processor does not support accessed and dirty 4667 * dirty flags for EPT. 4668 * - Reserved bits 11:7 and 63:N (where N is the processor's 4669 * physical-address width) must all be 0. 4670 * 4671 * 2. If the "unrestricted guest" VM-execution control is 1, the 4672 * "enable EPT" VM-execution control must also be 1. 4673 */ 4674 static void test_ept_eptp(void) 4675 { 4676 u32 primary_saved = vmcs_read(CPU_EXEC_CTRL0); 4677 u32 secondary_saved = vmcs_read(CPU_EXEC_CTRL1); 4678 u64 eptp_saved = vmcs_read(EPTP); 4679 u32 secondary; 4680 u64 eptp; 4681 u32 i, maxphysaddr; 4682 u64 j, resv_bits_mask = 0; 4683 4684 if (__setup_ept(0xfed40000, false)) { 4685 report_skip("%s : EPT not supported", __func__); 4686 return; 4687 } 4688 4689 test_vmx_valid_controls(); 4690 4691 setup_dummy_ept(); 4692 4693 secondary = vmcs_read(CPU_EXEC_CTRL1); 4694 eptp = vmcs_read(EPTP); 4695 4696 for (i = 0; i < 8; i++) { 4697 eptp = (eptp & ~EPT_MEM_TYPE_MASK) | i; 4698 vmcs_write(EPTP, eptp); 4699 report_prefix_pushf("Enable-EPT enabled; EPT memory type %lu", 4700 eptp & EPT_MEM_TYPE_MASK); 4701 if (is_ept_memtype_supported(i)) 4702 test_vmx_valid_controls(); 4703 else 4704 test_vmx_invalid_controls(); 4705 report_prefix_pop(); 4706 } 4707 4708 eptp = (eptp & ~EPT_MEM_TYPE_MASK) | 6ul; 4709 4710 /* 4711 * Page walk length (bits 5:3). Note, the value in VMCS.EPTP "is 1 4712 * less than the EPT page-walk length". 4713 */ 4714 for (i = 0; i < 8; i++) { 4715 eptp = (eptp & ~EPTP_PG_WALK_LEN_MASK) | 4716 (i << EPTP_PG_WALK_LEN_SHIFT); 4717 4718 vmcs_write(EPTP, eptp); 4719 report_prefix_pushf("Enable-EPT enabled; EPT page walk length %lu", 4720 eptp & EPTP_PG_WALK_LEN_MASK); 4721 if (i == 3 || (i == 4 && is_5_level_ept_supported())) 4722 test_vmx_valid_controls(); 4723 else 4724 test_vmx_invalid_controls(); 4725 report_prefix_pop(); 4726 } 4727 4728 eptp = (eptp & ~EPTP_PG_WALK_LEN_MASK) | 4729 3ul << EPTP_PG_WALK_LEN_SHIFT; 4730 4731 /* 4732 * Accessed and dirty flag (bit 6) 4733 */ 4734 if (ept_ad_bits_supported()) { 4735 report_info("Processor supports accessed and dirty flag"); 4736 eptp &= ~EPTP_AD_FLAG; 4737 test_eptp_ad_bit(eptp, true); 4738 4739 eptp |= EPTP_AD_FLAG; 4740 test_eptp_ad_bit(eptp, true); 4741 } else { 4742 report_info("Processor does not supports accessed and dirty flag"); 4743 eptp &= ~EPTP_AD_FLAG; 4744 test_eptp_ad_bit(eptp, true); 4745 4746 eptp |= EPTP_AD_FLAG; 4747 test_eptp_ad_bit(eptp, false); 4748 } 4749 4750 /* 4751 * Reserved bits [11:7] and [63:N] 4752 */ 4753 for (i = 0; i < 32; i++) { 4754 eptp = (eptp & 4755 ~(EPTP_RESERV_BITS_MASK << EPTP_RESERV_BITS_SHIFT)) | 4756 (i << EPTP_RESERV_BITS_SHIFT); 4757 vmcs_write(EPTP, eptp); 4758 report_prefix_pushf("Enable-EPT enabled; reserved bits [11:7] %lu", 4759 (eptp >> EPTP_RESERV_BITS_SHIFT) & 4760 EPTP_RESERV_BITS_MASK); 4761 if (i == 0) 4762 test_vmx_valid_controls(); 4763 else 4764 test_vmx_invalid_controls(); 4765 report_prefix_pop(); 4766 } 4767 4768 eptp = (eptp & ~(EPTP_RESERV_BITS_MASK << EPTP_RESERV_BITS_SHIFT)); 4769 4770 maxphysaddr = cpuid_maxphyaddr(); 4771 for (i = 0; i < (63 - maxphysaddr + 1); i++) { 4772 resv_bits_mask |= 1ul << i; 4773 } 4774 4775 for (j = maxphysaddr - 1; j <= 63; j++) { 4776 eptp = (eptp & ~(resv_bits_mask << maxphysaddr)) | 4777 (j < maxphysaddr ? 0 : 1ul << j); 4778 vmcs_write(EPTP, eptp); 4779 report_prefix_pushf("Enable-EPT enabled; reserved bits [63:N] %lu", 4780 (eptp >> maxphysaddr) & resv_bits_mask); 4781 if (j < maxphysaddr) 4782 test_vmx_valid_controls(); 4783 else 4784 test_vmx_invalid_controls(); 4785 report_prefix_pop(); 4786 } 4787 4788 secondary &= ~(CPU_EPT | CPU_URG); 4789 vmcs_write(CPU_EXEC_CTRL1, secondary); 4790 report_prefix_pushf("Enable-EPT disabled, unrestricted-guest disabled"); 4791 test_vmx_valid_controls(); 4792 report_prefix_pop(); 4793 4794 if (!(ctrl_cpu_rev[1].clr & CPU_URG)) 4795 goto skip_unrestricted_guest; 4796 4797 secondary |= CPU_URG; 4798 vmcs_write(CPU_EXEC_CTRL1, secondary); 4799 report_prefix_pushf("Enable-EPT disabled, unrestricted-guest enabled"); 4800 test_vmx_invalid_controls(); 4801 report_prefix_pop(); 4802 4803 secondary |= CPU_EPT; 4804 setup_dummy_ept(); 4805 report_prefix_pushf("Enable-EPT enabled, unrestricted-guest enabled"); 4806 test_vmx_valid_controls(); 4807 report_prefix_pop(); 4808 4809 skip_unrestricted_guest: 4810 secondary &= ~CPU_URG; 4811 vmcs_write(CPU_EXEC_CTRL1, secondary); 4812 report_prefix_pushf("Enable-EPT enabled, unrestricted-guest disabled"); 4813 test_vmx_valid_controls(); 4814 report_prefix_pop(); 4815 4816 vmcs_write(CPU_EXEC_CTRL0, primary_saved); 4817 vmcs_write(CPU_EXEC_CTRL1, secondary_saved); 4818 vmcs_write(EPTP, eptp_saved); 4819 } 4820 4821 /* 4822 * If the 'enable PML' VM-execution control is 1, the 'enable EPT' 4823 * VM-execution control must also be 1. In addition, the PML address 4824 * must satisfy the following checks: 4825 * 4826 * * Bits 11:0 of the address must be 0. 4827 * * The address should not set any bits beyond the processor's 4828 * physical-address width. 4829 * 4830 * [Intel SDM] 4831 */ 4832 static void test_pml(void) 4833 { 4834 u32 primary_saved = vmcs_read(CPU_EXEC_CTRL0); 4835 u32 secondary_saved = vmcs_read(CPU_EXEC_CTRL1); 4836 u32 primary = primary_saved; 4837 u32 secondary = secondary_saved; 4838 4839 if (!((ctrl_cpu_rev[0].clr & CPU_SECONDARY) && 4840 (ctrl_cpu_rev[1].clr & CPU_EPT) && (ctrl_cpu_rev[1].clr & CPU_PML))) { 4841 report_skip("%s : \"Secondary execution\" or \"enable EPT\" or \"enable PML\" control not supported", __func__); 4842 return; 4843 } 4844 4845 primary |= CPU_SECONDARY; 4846 vmcs_write(CPU_EXEC_CTRL0, primary); 4847 secondary &= ~(CPU_PML | CPU_EPT); 4848 vmcs_write(CPU_EXEC_CTRL1, secondary); 4849 report_prefix_pushf("enable-PML disabled, enable-EPT disabled"); 4850 test_vmx_valid_controls(); 4851 report_prefix_pop(); 4852 4853 secondary |= CPU_PML; 4854 vmcs_write(CPU_EXEC_CTRL1, secondary); 4855 report_prefix_pushf("enable-PML enabled, enable-EPT disabled"); 4856 test_vmx_invalid_controls(); 4857 report_prefix_pop(); 4858 4859 secondary |= CPU_EPT; 4860 setup_dummy_ept(); 4861 report_prefix_pushf("enable-PML enabled, enable-EPT enabled"); 4862 test_vmx_valid_controls(); 4863 report_prefix_pop(); 4864 4865 secondary &= ~CPU_PML; 4866 vmcs_write(CPU_EXEC_CTRL1, secondary); 4867 report_prefix_pushf("enable-PML disabled, enable EPT enabled"); 4868 test_vmx_valid_controls(); 4869 report_prefix_pop(); 4870 4871 test_vmcs_addr_reference(CPU_PML, PMLADDR, "PML address", "PML", 4872 PAGE_SIZE, false, false); 4873 4874 vmcs_write(CPU_EXEC_CTRL0, primary_saved); 4875 vmcs_write(CPU_EXEC_CTRL1, secondary_saved); 4876 } 4877 4878 /* 4879 * If the "activate VMX-preemption timer" VM-execution control is 0, the 4880 * the "save VMX-preemption timer value" VM-exit control must also be 0. 4881 * 4882 * [Intel SDM] 4883 */ 4884 static void test_vmx_preemption_timer(void) 4885 { 4886 u32 saved_pin = vmcs_read(PIN_CONTROLS); 4887 u32 saved_exit = vmcs_read(EXI_CONTROLS); 4888 u32 pin = saved_pin; 4889 u32 exit = saved_exit; 4890 4891 if (!((ctrl_exit_rev.clr & EXI_SAVE_PREEMPT) || 4892 (ctrl_pin_rev.clr & PIN_PREEMPT))) { 4893 report_skip("%s : \"Save-VMX-preemption-timer\" and/or \"Enable-VMX-preemption-timer\" control not supported", __func__); 4894 return; 4895 } 4896 4897 pin |= PIN_PREEMPT; 4898 vmcs_write(PIN_CONTROLS, pin); 4899 exit &= ~EXI_SAVE_PREEMPT; 4900 vmcs_write(EXI_CONTROLS, exit); 4901 report_prefix_pushf("enable-VMX-preemption-timer enabled, save-VMX-preemption-timer disabled"); 4902 test_vmx_valid_controls(); 4903 report_prefix_pop(); 4904 4905 exit |= EXI_SAVE_PREEMPT; 4906 vmcs_write(EXI_CONTROLS, exit); 4907 report_prefix_pushf("enable-VMX-preemption-timer enabled, save-VMX-preemption-timer enabled"); 4908 test_vmx_valid_controls(); 4909 report_prefix_pop(); 4910 4911 pin &= ~PIN_PREEMPT; 4912 vmcs_write(PIN_CONTROLS, pin); 4913 report_prefix_pushf("enable-VMX-preemption-timer disabled, save-VMX-preemption-timer enabled"); 4914 test_vmx_invalid_controls(); 4915 report_prefix_pop(); 4916 4917 exit &= ~EXI_SAVE_PREEMPT; 4918 vmcs_write(EXI_CONTROLS, exit); 4919 report_prefix_pushf("enable-VMX-preemption-timer disabled, save-VMX-preemption-timer disabled"); 4920 test_vmx_valid_controls(); 4921 report_prefix_pop(); 4922 4923 vmcs_write(PIN_CONTROLS, saved_pin); 4924 vmcs_write(EXI_CONTROLS, saved_exit); 4925 } 4926 4927 extern unsigned char test_mtf1; 4928 extern unsigned char test_mtf2; 4929 extern unsigned char test_mtf3; 4930 extern unsigned char test_mtf4; 4931 4932 static void test_mtf_guest(void) 4933 { 4934 asm ("vmcall;\n\t" 4935 "out %al, $0x80;\n\t" 4936 "test_mtf1:\n\t" 4937 "vmcall;\n\t" 4938 "out %al, $0x80;\n\t" 4939 "test_mtf2:\n\t" 4940 /* 4941 * Prepare for the 'MOV CR3' test. Attempt to induce a 4942 * general-protection fault by moving a non-canonical address into 4943 * CR3. The 'MOV CR3' instruction does not take an imm64 operand, 4944 * so we must MOV the desired value into a register first. 4945 * 4946 * MOV RAX is done before the VMCALL such that MTF is only enabled 4947 * for the instruction under test. 4948 */ 4949 "mov $0xaaaaaaaaaaaaaaaa, %rax;\n\t" 4950 "vmcall;\n\t" 4951 "mov %rax, %cr3;\n\t" 4952 "test_mtf3:\n\t" 4953 "vmcall;\n\t" 4954 /* 4955 * ICEBP/INT1 instruction. Though the instruction is now 4956 * documented, don't rely on assemblers enumerating the 4957 * instruction. Resort to hand assembly. 4958 */ 4959 ".byte 0xf1;\n\t" 4960 "vmcall;\n\t" 4961 "test_mtf4:\n\t" 4962 "mov $0, %eax;\n\t"); 4963 } 4964 4965 static void test_mtf_gp_handler(struct ex_regs *regs) 4966 { 4967 regs->rip = (unsigned long) &test_mtf3; 4968 } 4969 4970 static void test_mtf_db_handler(struct ex_regs *regs) 4971 { 4972 } 4973 4974 static void enable_mtf(void) 4975 { 4976 u32 ctrl0 = vmcs_read(CPU_EXEC_CTRL0); 4977 4978 vmcs_write(CPU_EXEC_CTRL0, ctrl0 | CPU_MTF); 4979 } 4980 4981 static void disable_mtf(void) 4982 { 4983 u32 ctrl0 = vmcs_read(CPU_EXEC_CTRL0); 4984 4985 vmcs_write(CPU_EXEC_CTRL0, ctrl0 & ~CPU_MTF); 4986 } 4987 4988 static void enable_tf(void) 4989 { 4990 unsigned long rflags = vmcs_read(GUEST_RFLAGS); 4991 4992 vmcs_write(GUEST_RFLAGS, rflags | X86_EFLAGS_TF); 4993 } 4994 4995 static void disable_tf(void) 4996 { 4997 unsigned long rflags = vmcs_read(GUEST_RFLAGS); 4998 4999 vmcs_write(GUEST_RFLAGS, rflags & ~X86_EFLAGS_TF); 5000 } 5001 5002 static void report_mtf(const char *insn_name, unsigned long exp_rip) 5003 { 5004 unsigned long rip = vmcs_read(GUEST_RIP); 5005 5006 assert_exit_reason(VMX_MTF); 5007 report(rip == exp_rip, "MTF VM-exit after %s. RIP: 0x%lx (expected 0x%lx)", 5008 insn_name, rip, exp_rip); 5009 } 5010 5011 static void vmx_mtf_test(void) 5012 { 5013 unsigned long pending_dbg; 5014 handler old_gp, old_db; 5015 5016 if (!(ctrl_cpu_rev[0].clr & CPU_MTF)) { 5017 report_skip("%s : \"Monitor trap flag\" exec control not supported", __func__); 5018 return; 5019 } 5020 5021 test_set_guest(test_mtf_guest); 5022 5023 /* Expect an MTF VM-exit after OUT instruction */ 5024 enter_guest(); 5025 skip_exit_vmcall(); 5026 5027 enable_mtf(); 5028 enter_guest(); 5029 report_mtf("OUT", (unsigned long) &test_mtf1); 5030 disable_mtf(); 5031 5032 /* 5033 * Concurrent #DB trap and MTF on instruction boundary. Expect MTF 5034 * VM-exit with populated 'pending debug exceptions' VMCS field. 5035 */ 5036 enter_guest(); 5037 skip_exit_vmcall(); 5038 5039 enable_mtf(); 5040 enable_tf(); 5041 5042 enter_guest(); 5043 report_mtf("OUT", (unsigned long) &test_mtf2); 5044 pending_dbg = vmcs_read(GUEST_PENDING_DEBUG); 5045 report(pending_dbg & DR6_BS, 5046 "'pending debug exceptions' field after MTF VM-exit: 0x%lx (expected 0x%lx)", 5047 pending_dbg, (unsigned long) DR6_BS); 5048 5049 disable_mtf(); 5050 disable_tf(); 5051 vmcs_write(GUEST_PENDING_DEBUG, 0); 5052 5053 /* 5054 * #GP exception takes priority over MTF. Expect MTF VM-exit with RIP 5055 * advanced to first instruction of #GP handler. 5056 */ 5057 enter_guest(); 5058 skip_exit_vmcall(); 5059 5060 old_gp = handle_exception(GP_VECTOR, test_mtf_gp_handler); 5061 5062 enable_mtf(); 5063 enter_guest(); 5064 report_mtf("MOV CR3", (unsigned long) get_idt_addr(&boot_idt[GP_VECTOR])); 5065 disable_mtf(); 5066 5067 /* 5068 * Concurrent MTF and privileged software exception (i.e. ICEBP/INT1). 5069 * MTF should follow the delivery of #DB trap, though the SDM doesn't 5070 * provide clear indication of the relative priority. 5071 */ 5072 enter_guest(); 5073 skip_exit_vmcall(); 5074 5075 handle_exception(GP_VECTOR, old_gp); 5076 old_db = handle_exception(DB_VECTOR, test_mtf_db_handler); 5077 5078 enable_mtf(); 5079 enter_guest(); 5080 report_mtf("INT1", (unsigned long) get_idt_addr(&boot_idt[DB_VECTOR])); 5081 disable_mtf(); 5082 5083 enter_guest(); 5084 skip_exit_vmcall(); 5085 handle_exception(DB_VECTOR, old_db); 5086 vmcs_write(ENT_INTR_INFO, INTR_INFO_VALID_MASK | INTR_TYPE_OTHER_EVENT); 5087 enter_guest(); 5088 report_mtf("injected MTF", (unsigned long) &test_mtf4); 5089 enter_guest(); 5090 } 5091 5092 extern char vmx_mtf_pdpte_guest_begin; 5093 extern char vmx_mtf_pdpte_guest_end; 5094 5095 asm("vmx_mtf_pdpte_guest_begin:\n\t" 5096 "mov %cr0, %rax\n\t" /* save CR0 with PG=1 */ 5097 "vmcall\n\t" /* on return from this CR0.PG=0 */ 5098 "mov %rax, %cr0\n\t" /* restore CR0.PG=1 to enter PAE mode */ 5099 "vmcall\n\t" 5100 "retq\n\t" 5101 "vmx_mtf_pdpte_guest_end:"); 5102 5103 static void vmx_mtf_pdpte_test(void) 5104 { 5105 void *test_mtf_pdpte_guest; 5106 pteval_t *pdpt; 5107 u32 guest_ar_cs; 5108 u64 guest_efer; 5109 pteval_t *pte; 5110 u64 guest_cr0; 5111 u64 guest_cr3; 5112 u64 guest_cr4; 5113 u64 ent_ctls; 5114 int i; 5115 5116 if (setup_ept(false)) 5117 return; 5118 5119 if (!(ctrl_cpu_rev[0].clr & CPU_MTF)) { 5120 report_skip("%s : \"Monitor trap flag\" exec control not supported", __func__); 5121 return; 5122 } 5123 5124 if (!(ctrl_cpu_rev[1].clr & CPU_URG)) { 5125 report_skip("%s : \"Unrestricted guest\" exec control not supported", __func__); 5126 return; 5127 } 5128 5129 vmcs_write(EXC_BITMAP, ~0); 5130 vmcs_write(CPU_EXEC_CTRL1, vmcs_read(CPU_EXEC_CTRL1) | CPU_URG); 5131 5132 /* 5133 * Copy the guest code to an identity-mapped page. 5134 */ 5135 test_mtf_pdpte_guest = alloc_page(); 5136 memcpy(test_mtf_pdpte_guest, &vmx_mtf_pdpte_guest_begin, 5137 &vmx_mtf_pdpte_guest_end - &vmx_mtf_pdpte_guest_begin); 5138 5139 test_set_guest(test_mtf_pdpte_guest); 5140 5141 enter_guest(); 5142 skip_exit_vmcall(); 5143 5144 /* 5145 * Put the guest in non-paged 32-bit protected mode, ready to enter 5146 * PAE mode when CR0.PG is set. CR4.PAE will already have been set 5147 * when the guest started out in long mode. 5148 */ 5149 ent_ctls = vmcs_read(ENT_CONTROLS); 5150 vmcs_write(ENT_CONTROLS, ent_ctls & ~ENT_GUEST_64); 5151 5152 guest_efer = vmcs_read(GUEST_EFER); 5153 vmcs_write(GUEST_EFER, guest_efer & ~(EFER_LMA | EFER_LME)); 5154 5155 /* 5156 * Set CS access rights bits for 32-bit protected mode: 5157 * 3:0 B execute/read/accessed 5158 * 4 1 code or data 5159 * 6:5 0 descriptor privilege level 5160 * 7 1 present 5161 * 11:8 0 reserved 5162 * 12 0 available for use by system software 5163 * 13 0 64 bit mode not active 5164 * 14 1 default operation size 32-bit segment 5165 * 15 1 page granularity: segment limit in 4K units 5166 * 16 0 segment usable 5167 * 31:17 0 reserved 5168 */ 5169 guest_ar_cs = vmcs_read(GUEST_AR_CS); 5170 vmcs_write(GUEST_AR_CS, 0xc09b); 5171 5172 guest_cr0 = vmcs_read(GUEST_CR0); 5173 vmcs_write(GUEST_CR0, guest_cr0 & ~X86_CR0_PG); 5174 5175 guest_cr4 = vmcs_read(GUEST_CR4); 5176 vmcs_write(GUEST_CR4, guest_cr4 & ~X86_CR4_PCIDE); 5177 5178 guest_cr3 = vmcs_read(GUEST_CR3); 5179 5180 /* 5181 * Turn the 4-level page table into a PAE page table by following the 0th 5182 * PML4 entry to a PDPT page, and grab the first four PDPTEs from that 5183 * page. 5184 * 5185 * Why does this work? 5186 * 5187 * PAE uses 32-bit addressing which implies: 5188 * Bits 11:0 page offset 5189 * Bits 20:12 entry into 512-entry page table 5190 * Bits 29:21 entry into a 512-entry directory table 5191 * Bits 31:30 entry into the page directory pointer table. 5192 * Bits 63:32 zero 5193 * 5194 * As only 2 bits are needed to select the PDPTEs for the entire 5195 * 32-bit address space, take the first 4 PDPTEs in the level 3 page 5196 * directory pointer table. It doesn't matter which of these PDPTEs 5197 * are present because they must cover the guest code given that it 5198 * has already run successfully. 5199 * 5200 * Get a pointer to PTE for GVA=0 in the page directory pointer table 5201 */ 5202 pte = get_pte_level( 5203 (pgd_t *)phys_to_virt(guest_cr3 & ~X86_CR3_PCID_MASK), 0, 5204 PDPT_LEVEL); 5205 5206 /* 5207 * Need some memory for the 4-entry PAE page directory pointer 5208 * table. Use the end of the identity-mapped page where the guest code 5209 * is stored. There is definitely space as the guest code is only a 5210 * few bytes. 5211 */ 5212 pdpt = test_mtf_pdpte_guest + PAGE_SIZE - 4 * sizeof(pteval_t); 5213 5214 /* 5215 * Copy the first four PDPTEs into the PAE page table with reserved 5216 * bits cleared. Note that permission bits from the PML4E and PDPTE 5217 * are not propagated. 5218 */ 5219 for (i = 0; i < 4; i++) { 5220 TEST_ASSERT_EQ_MSG(0, (pte[i] & PDPTE64_RSVD_MASK), 5221 "PDPTE has invalid reserved bits"); 5222 TEST_ASSERT_EQ_MSG(0, (pte[i] & PDPTE64_PAGE_SIZE_MASK), 5223 "Cannot use 1GB super pages for PAE"); 5224 pdpt[i] = pte[i] & ~(PAE_PDPTE_RSVD_MASK); 5225 } 5226 vmcs_write(GUEST_CR3, virt_to_phys(pdpt)); 5227 5228 enable_mtf(); 5229 enter_guest(); 5230 assert_exit_reason(VMX_MTF); 5231 disable_mtf(); 5232 5233 /* 5234 * The four PDPTEs should have been loaded into the VMCS when 5235 * the guest set CR0.PG to enter PAE mode. 5236 */ 5237 for (i = 0; i < 4; i++) { 5238 u64 pdpte = vmcs_read(GUEST_PDPTE + 2 * i); 5239 5240 report(pdpte == pdpt[i], "PDPTE%d is 0x%lx (expected 0x%lx)", 5241 i, pdpte, pdpt[i]); 5242 } 5243 5244 /* 5245 * Now, try to enter the guest in PAE mode. If the PDPTEs in the 5246 * vmcs are wrong, this will fail. 5247 */ 5248 enter_guest(); 5249 skip_exit_vmcall(); 5250 5251 /* 5252 * Return guest to 64-bit mode and wrap up. 5253 */ 5254 vmcs_write(ENT_CONTROLS, ent_ctls); 5255 vmcs_write(GUEST_EFER, guest_efer); 5256 vmcs_write(GUEST_AR_CS, guest_ar_cs); 5257 vmcs_write(GUEST_CR0, guest_cr0); 5258 vmcs_write(GUEST_CR4, guest_cr4); 5259 vmcs_write(GUEST_CR3, guest_cr3); 5260 5261 enter_guest(); 5262 } 5263 5264 /* 5265 * Tests for VM-execution control fields 5266 */ 5267 static void test_vm_execution_ctls(void) 5268 { 5269 test_pin_based_ctls(); 5270 test_primary_processor_based_ctls(); 5271 test_secondary_processor_based_ctls(); 5272 test_cr3_targets(); 5273 test_io_bitmaps(); 5274 test_msr_bitmap(); 5275 test_apic_ctls(); 5276 test_tpr_threshold(); 5277 test_nmi_ctrls(); 5278 test_pml(); 5279 test_vpid(); 5280 test_ept_eptp(); 5281 test_vmx_preemption_timer(); 5282 } 5283 5284 /* 5285 * The following checks are performed for the VM-entry MSR-load address if 5286 * the VM-entry MSR-load count field is non-zero: 5287 * 5288 * - The lower 4 bits of the VM-entry MSR-load address must be 0. 5289 * The address should not set any bits beyond the processor's 5290 * physical-address width. 5291 * 5292 * - The address of the last byte in the VM-entry MSR-load area 5293 * should not set any bits beyond the processor's physical-address 5294 * width. The address of this last byte is VM-entry MSR-load address 5295 * + (MSR count * 16) - 1. (The arithmetic used for the computation 5296 * uses more bits than the processor's physical-address width.) 5297 * 5298 * 5299 * [Intel SDM] 5300 */ 5301 static void test_entry_msr_load(void) 5302 { 5303 entry_msr_load = alloc_page(); 5304 u64 tmp; 5305 u32 entry_msr_ld_cnt = 1; 5306 int i; 5307 u32 addr_len = 64; 5308 5309 vmcs_write(ENT_MSR_LD_CNT, entry_msr_ld_cnt); 5310 5311 /* Check first 4 bits of VM-entry MSR-load address */ 5312 for (i = 0; i < 4; i++) { 5313 tmp = (u64)entry_msr_load | 1ull << i; 5314 vmcs_write(ENTER_MSR_LD_ADDR, tmp); 5315 report_prefix_pushf("VM-entry MSR-load addr [4:0] %lx", 5316 tmp & 0xf); 5317 test_vmx_invalid_controls(); 5318 report_prefix_pop(); 5319 } 5320 5321 if (basic_msr.val & (1ul << 48)) 5322 addr_len = 32; 5323 5324 test_vmcs_addr_values("VM-entry-MSR-load address", 5325 ENTER_MSR_LD_ADDR, 16, false, false, 5326 4, addr_len - 1); 5327 5328 /* 5329 * Check last byte of VM-entry MSR-load address 5330 */ 5331 entry_msr_load = (struct vmx_msr_entry *)((u64)entry_msr_load & ~0xf); 5332 5333 for (i = (addr_len == 64 ? cpuid_maxphyaddr(): addr_len); 5334 i < 64; i++) { 5335 tmp = ((u64)entry_msr_load + entry_msr_ld_cnt * 16 - 1) | 5336 1ul << i; 5337 vmcs_write(ENTER_MSR_LD_ADDR, 5338 tmp - (entry_msr_ld_cnt * 16 - 1)); 5339 test_vmx_invalid_controls(); 5340 } 5341 5342 vmcs_write(ENT_MSR_LD_CNT, 2); 5343 vmcs_write(ENTER_MSR_LD_ADDR, (1ULL << cpuid_maxphyaddr()) - 16); 5344 test_vmx_invalid_controls(); 5345 vmcs_write(ENTER_MSR_LD_ADDR, (1ULL << cpuid_maxphyaddr()) - 32); 5346 test_vmx_valid_controls(); 5347 vmcs_write(ENTER_MSR_LD_ADDR, (1ULL << cpuid_maxphyaddr()) - 48); 5348 test_vmx_valid_controls(); 5349 } 5350 5351 static struct vmx_state_area_test_data { 5352 u32 msr; 5353 u64 exp; 5354 bool enabled; 5355 } vmx_state_area_test_data; 5356 5357 static void guest_state_test_main(void) 5358 { 5359 u64 obs; 5360 struct vmx_state_area_test_data *data = &vmx_state_area_test_data; 5361 5362 while (1) { 5363 if (vmx_get_test_stage() == 2) 5364 break; 5365 5366 if (data->enabled) { 5367 obs = rdmsr(data->msr); 5368 report(data->exp == obs, 5369 "Guest state is 0x%lx (expected 0x%lx)", 5370 obs, data->exp); 5371 } 5372 5373 vmcall(); 5374 } 5375 5376 asm volatile("fnop"); 5377 } 5378 5379 static void test_guest_state(const char *test, bool xfail, u64 field, 5380 const char * field_name) 5381 { 5382 struct vmentry_result result; 5383 u8 abort_flags; 5384 5385 abort_flags = ABORT_ON_EARLY_VMENTRY_FAIL; 5386 if (!xfail) 5387 abort_flags = ABORT_ON_INVALID_GUEST_STATE; 5388 5389 __enter_guest(abort_flags, &result); 5390 5391 report(result.exit_reason.failed_vmentry == xfail && 5392 ((xfail && result.exit_reason.basic == VMX_FAIL_STATE) || 5393 (!xfail && result.exit_reason.basic == VMX_VMCALL)) && 5394 (!xfail || vmcs_read(EXI_QUALIFICATION) == ENTRY_FAIL_DEFAULT), 5395 "%s, %s = %lx", test, field_name, field); 5396 5397 if (!result.exit_reason.failed_vmentry) 5398 skip_exit_insn(); 5399 } 5400 5401 /* 5402 * Tests for VM-entry control fields 5403 */ 5404 static void test_vm_entry_ctls(void) 5405 { 5406 test_invalid_event_injection(); 5407 test_entry_msr_load(); 5408 } 5409 5410 /* 5411 * The following checks are performed for the VM-exit MSR-store address if 5412 * the VM-exit MSR-store count field is non-zero: 5413 * 5414 * - The lower 4 bits of the VM-exit MSR-store address must be 0. 5415 * The address should not set any bits beyond the processor's 5416 * physical-address width. 5417 * 5418 * - The address of the last byte in the VM-exit MSR-store area 5419 * should not set any bits beyond the processor's physical-address 5420 * width. The address of this last byte is VM-exit MSR-store address 5421 * + (MSR count * 16) - 1. (The arithmetic used for the computation 5422 * uses more bits than the processor's physical-address width.) 5423 * 5424 * If IA32_VMX_BASIC[48] is read as 1, neither address should set any bits 5425 * in the range 63:32. 5426 * 5427 * [Intel SDM] 5428 */ 5429 static void test_exit_msr_store(void) 5430 { 5431 exit_msr_store = alloc_page(); 5432 u64 tmp; 5433 u32 exit_msr_st_cnt = 1; 5434 int i; 5435 u32 addr_len = 64; 5436 5437 vmcs_write(EXI_MSR_ST_CNT, exit_msr_st_cnt); 5438 5439 /* Check first 4 bits of VM-exit MSR-store address */ 5440 for (i = 0; i < 4; i++) { 5441 tmp = (u64)exit_msr_store | 1ull << i; 5442 vmcs_write(EXIT_MSR_ST_ADDR, tmp); 5443 report_prefix_pushf("VM-exit MSR-store addr [4:0] %lx", 5444 tmp & 0xf); 5445 test_vmx_invalid_controls(); 5446 report_prefix_pop(); 5447 } 5448 5449 if (basic_msr.val & (1ul << 48)) 5450 addr_len = 32; 5451 5452 test_vmcs_addr_values("VM-exit-MSR-store address", 5453 EXIT_MSR_ST_ADDR, 16, false, false, 5454 4, addr_len - 1); 5455 5456 /* 5457 * Check last byte of VM-exit MSR-store address 5458 */ 5459 exit_msr_store = (struct vmx_msr_entry *)((u64)exit_msr_store & ~0xf); 5460 5461 for (i = (addr_len == 64 ? cpuid_maxphyaddr(): addr_len); 5462 i < 64; i++) { 5463 tmp = ((u64)exit_msr_store + exit_msr_st_cnt * 16 - 1) | 5464 1ul << i; 5465 vmcs_write(EXIT_MSR_ST_ADDR, 5466 tmp - (exit_msr_st_cnt * 16 - 1)); 5467 test_vmx_invalid_controls(); 5468 } 5469 5470 vmcs_write(EXI_MSR_ST_CNT, 2); 5471 vmcs_write(EXIT_MSR_ST_ADDR, (1ULL << cpuid_maxphyaddr()) - 16); 5472 test_vmx_invalid_controls(); 5473 vmcs_write(EXIT_MSR_ST_ADDR, (1ULL << cpuid_maxphyaddr()) - 32); 5474 test_vmx_valid_controls(); 5475 vmcs_write(EXIT_MSR_ST_ADDR, (1ULL << cpuid_maxphyaddr()) - 48); 5476 test_vmx_valid_controls(); 5477 } 5478 5479 /* 5480 * Tests for VM-exit controls 5481 */ 5482 static void test_vm_exit_ctls(void) 5483 { 5484 test_exit_msr_store(); 5485 } 5486 5487 /* 5488 * Check that the virtual CPU checks all of the VMX controls as 5489 * documented in the Intel SDM. 5490 */ 5491 static void vmx_controls_test(void) 5492 { 5493 /* 5494 * Bit 1 of the guest's RFLAGS must be 1, or VM-entry will 5495 * fail due to invalid guest state, should we make it that 5496 * far. 5497 */ 5498 vmcs_write(GUEST_RFLAGS, 0); 5499 5500 test_vm_execution_ctls(); 5501 test_vm_exit_ctls(); 5502 test_vm_entry_ctls(); 5503 } 5504 5505 struct apic_reg_virt_config { 5506 bool apic_register_virtualization; 5507 bool use_tpr_shadow; 5508 bool virtualize_apic_accesses; 5509 bool virtualize_x2apic_mode; 5510 bool activate_secondary_controls; 5511 }; 5512 5513 struct apic_reg_test { 5514 const char *name; 5515 struct apic_reg_virt_config apic_reg_virt_config; 5516 }; 5517 5518 struct apic_reg_virt_expectation { 5519 enum Reason rd_exit_reason; 5520 enum Reason wr_exit_reason; 5521 u32 val; 5522 u32 (*virt_fn)(u32); 5523 5524 /* 5525 * If false, accessing the APIC access address from L2 is treated as a 5526 * normal memory operation, rather than triggering virtualization. 5527 */ 5528 bool virtualize_apic_accesses; 5529 }; 5530 5531 static u32 apic_virt_identity(u32 val) 5532 { 5533 return val; 5534 } 5535 5536 static u32 apic_virt_nibble1(u32 val) 5537 { 5538 return val & 0xf0; 5539 } 5540 5541 static u32 apic_virt_byte3(u32 val) 5542 { 5543 return val & (0xff << 24); 5544 } 5545 5546 static bool apic_reg_virt_exit_expectation( 5547 u32 reg, struct apic_reg_virt_config *config, 5548 struct apic_reg_virt_expectation *expectation) 5549 { 5550 /* Good configs, where some L2 APIC accesses are virtualized. */ 5551 bool virtualize_apic_accesses_only = 5552 config->virtualize_apic_accesses && 5553 !config->use_tpr_shadow && 5554 !config->apic_register_virtualization && 5555 !config->virtualize_x2apic_mode && 5556 config->activate_secondary_controls; 5557 bool virtualize_apic_accesses_and_use_tpr_shadow = 5558 config->virtualize_apic_accesses && 5559 config->use_tpr_shadow && 5560 !config->apic_register_virtualization && 5561 !config->virtualize_x2apic_mode && 5562 config->activate_secondary_controls; 5563 bool apic_register_virtualization = 5564 config->virtualize_apic_accesses && 5565 config->use_tpr_shadow && 5566 config->apic_register_virtualization && 5567 !config->virtualize_x2apic_mode && 5568 config->activate_secondary_controls; 5569 5570 expectation->val = MAGIC_VAL_1; 5571 expectation->virt_fn = apic_virt_identity; 5572 expectation->virtualize_apic_accesses = 5573 config->virtualize_apic_accesses && 5574 config->activate_secondary_controls; 5575 if (virtualize_apic_accesses_only) { 5576 expectation->rd_exit_reason = VMX_APIC_ACCESS; 5577 expectation->wr_exit_reason = VMX_APIC_ACCESS; 5578 } else if (virtualize_apic_accesses_and_use_tpr_shadow) { 5579 switch (reg) { 5580 case APIC_TASKPRI: 5581 expectation->rd_exit_reason = VMX_VMCALL; 5582 expectation->wr_exit_reason = VMX_VMCALL; 5583 expectation->virt_fn = apic_virt_nibble1; 5584 break; 5585 default: 5586 expectation->rd_exit_reason = VMX_APIC_ACCESS; 5587 expectation->wr_exit_reason = VMX_APIC_ACCESS; 5588 } 5589 } else if (apic_register_virtualization) { 5590 expectation->rd_exit_reason = VMX_VMCALL; 5591 5592 switch (reg) { 5593 case APIC_ID: 5594 case APIC_EOI: 5595 case APIC_LDR: 5596 case APIC_DFR: 5597 case APIC_SPIV: 5598 case APIC_ESR: 5599 case APIC_ICR: 5600 case APIC_LVTT: 5601 case APIC_LVTTHMR: 5602 case APIC_LVTPC: 5603 case APIC_LVT0: 5604 case APIC_LVT1: 5605 case APIC_LVTERR: 5606 case APIC_TMICT: 5607 case APIC_TDCR: 5608 expectation->wr_exit_reason = VMX_APIC_WRITE; 5609 break; 5610 case APIC_LVR: 5611 case APIC_ISR ... APIC_ISR + 0x70: 5612 case APIC_TMR ... APIC_TMR + 0x70: 5613 case APIC_IRR ... APIC_IRR + 0x70: 5614 expectation->wr_exit_reason = VMX_APIC_ACCESS; 5615 break; 5616 case APIC_TASKPRI: 5617 expectation->wr_exit_reason = VMX_VMCALL; 5618 expectation->virt_fn = apic_virt_nibble1; 5619 break; 5620 case APIC_ICR2: 5621 expectation->wr_exit_reason = VMX_VMCALL; 5622 expectation->virt_fn = apic_virt_byte3; 5623 break; 5624 default: 5625 expectation->rd_exit_reason = VMX_APIC_ACCESS; 5626 expectation->wr_exit_reason = VMX_APIC_ACCESS; 5627 } 5628 } else if (!expectation->virtualize_apic_accesses) { 5629 /* 5630 * No APIC registers are directly virtualized. This includes 5631 * VTPR, which can be virtualized through MOV to/from CR8 via 5632 * the use TPR shadow control, but not through directly 5633 * accessing VTPR. 5634 */ 5635 expectation->rd_exit_reason = VMX_VMCALL; 5636 expectation->wr_exit_reason = VMX_VMCALL; 5637 } else { 5638 printf("Cannot parse APIC register virtualization config:\n" 5639 "\tvirtualize_apic_accesses: %d\n" 5640 "\tuse_tpr_shadow: %d\n" 5641 "\tapic_register_virtualization: %d\n" 5642 "\tvirtualize_x2apic_mode: %d\n" 5643 "\tactivate_secondary_controls: %d\n", 5644 config->virtualize_apic_accesses, 5645 config->use_tpr_shadow, 5646 config->apic_register_virtualization, 5647 config->virtualize_x2apic_mode, 5648 config->activate_secondary_controls); 5649 5650 return false; 5651 } 5652 5653 return true; 5654 } 5655 5656 struct apic_reg_test apic_reg_tests[] = { 5657 /* Good configs, where some L2 APIC accesses are virtualized. */ 5658 { 5659 .name = "Virtualize APIC accesses", 5660 .apic_reg_virt_config = { 5661 .virtualize_apic_accesses = true, 5662 .use_tpr_shadow = false, 5663 .apic_register_virtualization = false, 5664 .virtualize_x2apic_mode = false, 5665 .activate_secondary_controls = true, 5666 }, 5667 }, 5668 { 5669 .name = "Virtualize APIC accesses + Use TPR shadow", 5670 .apic_reg_virt_config = { 5671 .virtualize_apic_accesses = true, 5672 .use_tpr_shadow = true, 5673 .apic_register_virtualization = false, 5674 .virtualize_x2apic_mode = false, 5675 .activate_secondary_controls = true, 5676 }, 5677 }, 5678 { 5679 .name = "APIC-register virtualization", 5680 .apic_reg_virt_config = { 5681 .virtualize_apic_accesses = true, 5682 .use_tpr_shadow = true, 5683 .apic_register_virtualization = true, 5684 .virtualize_x2apic_mode = false, 5685 .activate_secondary_controls = true, 5686 }, 5687 }, 5688 5689 /* 5690 * Test that the secondary processor-based VM-execution controls are 5691 * correctly ignored when "activate secondary controls" is disabled. 5692 */ 5693 { 5694 .name = "Activate secondary controls off", 5695 .apic_reg_virt_config = { 5696 .virtualize_apic_accesses = true, 5697 .use_tpr_shadow = false, 5698 .apic_register_virtualization = true, 5699 .virtualize_x2apic_mode = true, 5700 .activate_secondary_controls = false, 5701 }, 5702 }, 5703 { 5704 .name = "Activate secondary controls off + Use TPR shadow", 5705 .apic_reg_virt_config = { 5706 .virtualize_apic_accesses = true, 5707 .use_tpr_shadow = true, 5708 .apic_register_virtualization = true, 5709 .virtualize_x2apic_mode = true, 5710 .activate_secondary_controls = false, 5711 }, 5712 }, 5713 5714 /* 5715 * Test that the APIC access address is treated like an arbitrary memory 5716 * address when "virtualize APIC accesses" is disabled. 5717 */ 5718 { 5719 .name = "Virtualize APIC accesses off + Use TPR shadow", 5720 .apic_reg_virt_config = { 5721 .virtualize_apic_accesses = false, 5722 .use_tpr_shadow = true, 5723 .apic_register_virtualization = true, 5724 .virtualize_x2apic_mode = true, 5725 .activate_secondary_controls = true, 5726 }, 5727 }, 5728 5729 /* 5730 * Test that VM entry fails due to invalid controls when 5731 * "APIC-register virtualization" is enabled while "use TPR shadow" is 5732 * disabled. 5733 */ 5734 { 5735 .name = "APIC-register virtualization + Use TPR shadow off", 5736 .apic_reg_virt_config = { 5737 .virtualize_apic_accesses = true, 5738 .use_tpr_shadow = false, 5739 .apic_register_virtualization = true, 5740 .virtualize_x2apic_mode = false, 5741 .activate_secondary_controls = true, 5742 }, 5743 }, 5744 5745 /* 5746 * Test that VM entry fails due to invalid controls when 5747 * "Virtualize x2APIC mode" is enabled while "use TPR shadow" is 5748 * disabled. 5749 */ 5750 { 5751 .name = "Virtualize x2APIC mode + Use TPR shadow off", 5752 .apic_reg_virt_config = { 5753 .virtualize_apic_accesses = false, 5754 .use_tpr_shadow = false, 5755 .apic_register_virtualization = false, 5756 .virtualize_x2apic_mode = true, 5757 .activate_secondary_controls = true, 5758 }, 5759 }, 5760 { 5761 .name = "Virtualize x2APIC mode + Use TPR shadow off v2", 5762 .apic_reg_virt_config = { 5763 .virtualize_apic_accesses = false, 5764 .use_tpr_shadow = false, 5765 .apic_register_virtualization = true, 5766 .virtualize_x2apic_mode = true, 5767 .activate_secondary_controls = true, 5768 }, 5769 }, 5770 5771 /* 5772 * Test that VM entry fails due to invalid controls when 5773 * "virtualize x2APIC mode" is enabled while "virtualize APIC accesses" 5774 * is enabled. 5775 */ 5776 { 5777 .name = "Virtualize x2APIC mode + Virtualize APIC accesses", 5778 .apic_reg_virt_config = { 5779 .virtualize_apic_accesses = true, 5780 .use_tpr_shadow = true, 5781 .apic_register_virtualization = false, 5782 .virtualize_x2apic_mode = true, 5783 .activate_secondary_controls = true, 5784 }, 5785 }, 5786 { 5787 .name = "Virtualize x2APIC mode + Virtualize APIC accesses v2", 5788 .apic_reg_virt_config = { 5789 .virtualize_apic_accesses = true, 5790 .use_tpr_shadow = true, 5791 .apic_register_virtualization = true, 5792 .virtualize_x2apic_mode = true, 5793 .activate_secondary_controls = true, 5794 }, 5795 }, 5796 }; 5797 5798 enum Apic_op { 5799 APIC_OP_XAPIC_RD, 5800 APIC_OP_XAPIC_WR, 5801 TERMINATE, 5802 }; 5803 5804 static u32 vmx_xapic_read(u32 *apic_access_address, u32 reg) 5805 { 5806 return *(volatile u32 *)((uintptr_t)apic_access_address + reg); 5807 } 5808 5809 static void vmx_xapic_write(u32 *apic_access_address, u32 reg, u32 val) 5810 { 5811 *(volatile u32 *)((uintptr_t)apic_access_address + reg) = val; 5812 } 5813 5814 struct apic_reg_virt_guest_args { 5815 enum Apic_op op; 5816 u32 *apic_access_address; 5817 u32 reg; 5818 u32 val; 5819 bool check_rd; 5820 u32 (*virt_fn)(u32); 5821 } apic_reg_virt_guest_args; 5822 5823 static void apic_reg_virt_guest(void) 5824 { 5825 volatile struct apic_reg_virt_guest_args *args = 5826 &apic_reg_virt_guest_args; 5827 5828 for (;;) { 5829 enum Apic_op op = args->op; 5830 u32 *apic_access_address = args->apic_access_address; 5831 u32 reg = args->reg; 5832 u32 val = args->val; 5833 bool check_rd = args->check_rd; 5834 u32 (*virt_fn)(u32) = args->virt_fn; 5835 5836 if (op == TERMINATE) 5837 break; 5838 5839 if (op == APIC_OP_XAPIC_RD) { 5840 u32 ret = vmx_xapic_read(apic_access_address, reg); 5841 5842 if (check_rd) { 5843 u32 want = virt_fn(val); 5844 u32 got = virt_fn(ret); 5845 5846 report(got == want, 5847 "read 0x%x, expected 0x%x.", got, want); 5848 } 5849 } else if (op == APIC_OP_XAPIC_WR) { 5850 vmx_xapic_write(apic_access_address, reg, val); 5851 } 5852 5853 /* 5854 * The L1 should always execute a vmcall after it's done testing 5855 * an individual APIC operation. This helps to validate that the 5856 * L1 and L2 are in sync with each other, as expected. 5857 */ 5858 vmcall(); 5859 } 5860 } 5861 5862 static void test_xapic_rd( 5863 u32 reg, struct apic_reg_virt_expectation *expectation, 5864 u32 *apic_access_address, u32 *virtual_apic_page) 5865 { 5866 u32 val = expectation->val; 5867 u32 exit_reason_want = expectation->rd_exit_reason; 5868 struct apic_reg_virt_guest_args *args = &apic_reg_virt_guest_args; 5869 5870 report_prefix_pushf("xapic - reading 0x%03x", reg); 5871 5872 /* Configure guest to do an xapic read */ 5873 args->op = APIC_OP_XAPIC_RD; 5874 args->apic_access_address = apic_access_address; 5875 args->reg = reg; 5876 args->val = val; 5877 args->check_rd = exit_reason_want == VMX_VMCALL; 5878 args->virt_fn = expectation->virt_fn; 5879 5880 /* Setup virtual APIC page */ 5881 if (!expectation->virtualize_apic_accesses) { 5882 apic_access_address[apic_reg_index(reg)] = val; 5883 virtual_apic_page[apic_reg_index(reg)] = 0; 5884 } else if (exit_reason_want == VMX_VMCALL) { 5885 apic_access_address[apic_reg_index(reg)] = 0; 5886 virtual_apic_page[apic_reg_index(reg)] = val; 5887 } 5888 5889 /* Enter guest */ 5890 enter_guest(); 5891 5892 /* 5893 * Validate the behavior and 5894 * pass a magic value back to the guest. 5895 */ 5896 if (exit_reason_want == VMX_APIC_ACCESS) { 5897 u32 apic_page_offset = vmcs_read(EXI_QUALIFICATION) & 0xfff; 5898 5899 assert_exit_reason(exit_reason_want); 5900 report(apic_page_offset == reg, 5901 "got APIC access exit @ page offset 0x%03x, want 0x%03x", 5902 apic_page_offset, reg); 5903 skip_exit_insn(); 5904 5905 /* Reenter guest so it can consume/check rcx and exit again. */ 5906 enter_guest(); 5907 } else if (exit_reason_want != VMX_VMCALL) { 5908 report_fail("Oops, bad exit expectation: %u.", exit_reason_want); 5909 } 5910 5911 skip_exit_vmcall(); 5912 report_prefix_pop(); 5913 } 5914 5915 static void test_xapic_wr( 5916 u32 reg, struct apic_reg_virt_expectation *expectation, 5917 u32 *apic_access_address, u32 *virtual_apic_page) 5918 { 5919 u32 val = expectation->val; 5920 u32 exit_reason_want = expectation->wr_exit_reason; 5921 struct apic_reg_virt_guest_args *args = &apic_reg_virt_guest_args; 5922 bool virtualized = 5923 expectation->virtualize_apic_accesses && 5924 (exit_reason_want == VMX_APIC_WRITE || 5925 exit_reason_want == VMX_VMCALL); 5926 bool checked = false; 5927 5928 report_prefix_pushf("xapic - writing 0x%x to 0x%03x", val, reg); 5929 5930 /* Configure guest to do an xapic read */ 5931 args->op = APIC_OP_XAPIC_WR; 5932 args->apic_access_address = apic_access_address; 5933 args->reg = reg; 5934 args->val = val; 5935 5936 /* Setup virtual APIC page */ 5937 if (virtualized || !expectation->virtualize_apic_accesses) { 5938 apic_access_address[apic_reg_index(reg)] = 0; 5939 virtual_apic_page[apic_reg_index(reg)] = 0; 5940 } 5941 5942 /* Enter guest */ 5943 enter_guest(); 5944 5945 /* 5946 * Validate the behavior and 5947 * pass a magic value back to the guest. 5948 */ 5949 if (exit_reason_want == VMX_APIC_ACCESS) { 5950 u32 apic_page_offset = vmcs_read(EXI_QUALIFICATION) & 0xfff; 5951 5952 assert_exit_reason(exit_reason_want); 5953 report(apic_page_offset == reg, 5954 "got APIC access exit @ page offset 0x%03x, want 0x%03x", 5955 apic_page_offset, reg); 5956 skip_exit_insn(); 5957 5958 /* Reenter guest so it can consume/check rcx and exit again. */ 5959 enter_guest(); 5960 } else if (exit_reason_want == VMX_APIC_WRITE) { 5961 assert_exit_reason(exit_reason_want); 5962 report(virtual_apic_page[apic_reg_index(reg)] == val, 5963 "got APIC write exit @ page offset 0x%03x; val is 0x%x, want 0x%x", 5964 apic_reg_index(reg), 5965 virtual_apic_page[apic_reg_index(reg)], val); 5966 checked = true; 5967 5968 /* Reenter guest so it can consume/check rcx and exit again. */ 5969 enter_guest(); 5970 } else if (exit_reason_want != VMX_VMCALL) { 5971 report_fail("Oops, bad exit expectation: %u.", exit_reason_want); 5972 } 5973 5974 assert_exit_reason(VMX_VMCALL); 5975 if (virtualized && !checked) { 5976 u32 want = expectation->virt_fn(val); 5977 u32 got = virtual_apic_page[apic_reg_index(reg)]; 5978 got = expectation->virt_fn(got); 5979 5980 report(got == want, "exitless write; val is 0x%x, want 0x%x", 5981 got, want); 5982 } else if (!expectation->virtualize_apic_accesses && !checked) { 5983 u32 got = apic_access_address[apic_reg_index(reg)]; 5984 5985 report(got == val, 5986 "non-virtualized write; val is 0x%x, want 0x%x", got, 5987 val); 5988 } else if (!expectation->virtualize_apic_accesses && checked) { 5989 report_fail("Non-virtualized write was prematurely checked!"); 5990 } 5991 5992 skip_exit_vmcall(); 5993 report_prefix_pop(); 5994 } 5995 5996 enum Config_type { 5997 CONFIG_TYPE_GOOD, 5998 CONFIG_TYPE_UNSUPPORTED, 5999 CONFIG_TYPE_VMENTRY_FAILS_EARLY, 6000 }; 6001 6002 static enum Config_type configure_apic_reg_virt_test( 6003 struct apic_reg_virt_config *apic_reg_virt_config) 6004 { 6005 u32 cpu_exec_ctrl0 = vmcs_read(CPU_EXEC_CTRL0); 6006 u32 cpu_exec_ctrl1 = vmcs_read(CPU_EXEC_CTRL1); 6007 /* Configs where L2 entry fails early, due to invalid controls. */ 6008 bool use_tpr_shadow_incorrectly_off = 6009 !apic_reg_virt_config->use_tpr_shadow && 6010 (apic_reg_virt_config->apic_register_virtualization || 6011 apic_reg_virt_config->virtualize_x2apic_mode) && 6012 apic_reg_virt_config->activate_secondary_controls; 6013 bool virtualize_apic_accesses_incorrectly_on = 6014 apic_reg_virt_config->virtualize_apic_accesses && 6015 apic_reg_virt_config->virtualize_x2apic_mode && 6016 apic_reg_virt_config->activate_secondary_controls; 6017 bool vmentry_fails_early = 6018 use_tpr_shadow_incorrectly_off || 6019 virtualize_apic_accesses_incorrectly_on; 6020 6021 if (apic_reg_virt_config->activate_secondary_controls) { 6022 if (!(ctrl_cpu_rev[0].clr & CPU_SECONDARY)) { 6023 printf("VM-execution control \"activate secondary controls\" NOT supported.\n"); 6024 return CONFIG_TYPE_UNSUPPORTED; 6025 } 6026 cpu_exec_ctrl0 |= CPU_SECONDARY; 6027 } else { 6028 cpu_exec_ctrl0 &= ~CPU_SECONDARY; 6029 } 6030 6031 if (apic_reg_virt_config->virtualize_apic_accesses) { 6032 if (!(ctrl_cpu_rev[1].clr & CPU_VIRT_APIC_ACCESSES)) { 6033 printf("VM-execution control \"virtualize APIC accesses\" NOT supported.\n"); 6034 return CONFIG_TYPE_UNSUPPORTED; 6035 } 6036 cpu_exec_ctrl1 |= CPU_VIRT_APIC_ACCESSES; 6037 } else { 6038 cpu_exec_ctrl1 &= ~CPU_VIRT_APIC_ACCESSES; 6039 } 6040 6041 if (apic_reg_virt_config->use_tpr_shadow) { 6042 if (!(ctrl_cpu_rev[0].clr & CPU_TPR_SHADOW)) { 6043 printf("VM-execution control \"use TPR shadow\" NOT supported.\n"); 6044 return CONFIG_TYPE_UNSUPPORTED; 6045 } 6046 cpu_exec_ctrl0 |= CPU_TPR_SHADOW; 6047 } else { 6048 cpu_exec_ctrl0 &= ~CPU_TPR_SHADOW; 6049 } 6050 6051 if (apic_reg_virt_config->apic_register_virtualization) { 6052 if (!(ctrl_cpu_rev[1].clr & CPU_APIC_REG_VIRT)) { 6053 printf("VM-execution control \"APIC-register virtualization\" NOT supported.\n"); 6054 return CONFIG_TYPE_UNSUPPORTED; 6055 } 6056 cpu_exec_ctrl1 |= CPU_APIC_REG_VIRT; 6057 } else { 6058 cpu_exec_ctrl1 &= ~CPU_APIC_REG_VIRT; 6059 } 6060 6061 if (apic_reg_virt_config->virtualize_x2apic_mode) { 6062 if (!(ctrl_cpu_rev[1].clr & CPU_VIRT_X2APIC)) { 6063 printf("VM-execution control \"virtualize x2APIC mode\" NOT supported.\n"); 6064 return CONFIG_TYPE_UNSUPPORTED; 6065 } 6066 cpu_exec_ctrl1 |= CPU_VIRT_X2APIC; 6067 } else { 6068 cpu_exec_ctrl1 &= ~CPU_VIRT_X2APIC; 6069 } 6070 6071 vmcs_write(CPU_EXEC_CTRL0, cpu_exec_ctrl0); 6072 vmcs_write(CPU_EXEC_CTRL1, cpu_exec_ctrl1); 6073 6074 if (vmentry_fails_early) 6075 return CONFIG_TYPE_VMENTRY_FAILS_EARLY; 6076 6077 return CONFIG_TYPE_GOOD; 6078 } 6079 6080 static bool cpu_has_apicv(void) 6081 { 6082 return ((ctrl_cpu_rev[1].clr & CPU_APIC_REG_VIRT) && 6083 (ctrl_cpu_rev[1].clr & CPU_VINTD) && 6084 (ctrl_pin_rev.clr & PIN_POST_INTR)); 6085 } 6086 6087 /* Validates APIC register access across valid virtualization configurations. */ 6088 static void apic_reg_virt_test(void) 6089 { 6090 u32 *apic_access_address; 6091 u32 *virtual_apic_page; 6092 u64 control; 6093 u64 cpu_exec_ctrl0 = vmcs_read(CPU_EXEC_CTRL0); 6094 u64 cpu_exec_ctrl1 = vmcs_read(CPU_EXEC_CTRL1); 6095 int i; 6096 struct apic_reg_virt_guest_args *args = &apic_reg_virt_guest_args; 6097 6098 if (!cpu_has_apicv()) { 6099 report_skip("%s : Not all required APICv bits supported", __func__); 6100 return; 6101 } 6102 6103 control = cpu_exec_ctrl1; 6104 control &= ~CPU_VINTD; 6105 vmcs_write(CPU_EXEC_CTRL1, control); 6106 6107 test_set_guest(apic_reg_virt_guest); 6108 6109 /* 6110 * From the SDM: The 1-setting of the "virtualize APIC accesses" 6111 * VM-execution is guaranteed to apply only if translations to the 6112 * APIC-access address use a 4-KByte page. 6113 */ 6114 apic_access_address = alloc_page(); 6115 force_4k_page(apic_access_address); 6116 vmcs_write(APIC_ACCS_ADDR, virt_to_phys(apic_access_address)); 6117 6118 virtual_apic_page = alloc_page(); 6119 vmcs_write(APIC_VIRT_ADDR, virt_to_phys(virtual_apic_page)); 6120 6121 for (i = 0; i < ARRAY_SIZE(apic_reg_tests); i++) { 6122 struct apic_reg_test *apic_reg_test = &apic_reg_tests[i]; 6123 struct apic_reg_virt_config *apic_reg_virt_config = 6124 &apic_reg_test->apic_reg_virt_config; 6125 enum Config_type config_type; 6126 u32 reg; 6127 6128 printf("--- %s test ---\n", apic_reg_test->name); 6129 config_type = 6130 configure_apic_reg_virt_test(apic_reg_virt_config); 6131 if (config_type == CONFIG_TYPE_UNSUPPORTED) { 6132 printf("Skip because of missing features.\n"); 6133 continue; 6134 } 6135 6136 if (config_type == CONFIG_TYPE_VMENTRY_FAILS_EARLY) { 6137 enter_guest_with_bad_controls(); 6138 continue; 6139 } 6140 6141 for (reg = 0; reg < PAGE_SIZE / sizeof(u32); reg += 0x10) { 6142 struct apic_reg_virt_expectation expectation = {}; 6143 bool ok; 6144 6145 ok = apic_reg_virt_exit_expectation( 6146 reg, apic_reg_virt_config, &expectation); 6147 if (!ok) { 6148 report_fail("Malformed test."); 6149 break; 6150 } 6151 6152 test_xapic_rd(reg, &expectation, apic_access_address, 6153 virtual_apic_page); 6154 test_xapic_wr(reg, &expectation, apic_access_address, 6155 virtual_apic_page); 6156 } 6157 } 6158 6159 /* Terminate the guest */ 6160 vmcs_write(CPU_EXEC_CTRL0, cpu_exec_ctrl0); 6161 vmcs_write(CPU_EXEC_CTRL1, cpu_exec_ctrl1); 6162 args->op = TERMINATE; 6163 enter_guest(); 6164 assert_exit_reason(VMX_VMCALL); 6165 } 6166 6167 struct virt_x2apic_mode_config { 6168 struct apic_reg_virt_config apic_reg_virt_config; 6169 bool virtual_interrupt_delivery; 6170 bool use_msr_bitmaps; 6171 bool disable_x2apic_msr_intercepts; 6172 bool disable_x2apic; 6173 }; 6174 6175 struct virt_x2apic_mode_test_case { 6176 const char *name; 6177 struct virt_x2apic_mode_config virt_x2apic_mode_config; 6178 }; 6179 6180 enum Virt_x2apic_mode_behavior_type { 6181 X2APIC_ACCESS_VIRTUALIZED, 6182 X2APIC_ACCESS_PASSED_THROUGH, 6183 X2APIC_ACCESS_TRIGGERS_GP, 6184 }; 6185 6186 struct virt_x2apic_mode_expectation { 6187 enum Reason rd_exit_reason; 6188 enum Reason wr_exit_reason; 6189 6190 /* 6191 * RDMSR and WRMSR handle 64-bit values. However, except for ICR, all of 6192 * the x2APIC registers are 32 bits. Notice: 6193 * 1. vmx_x2apic_read() clears the upper 32 bits for 32-bit registers. 6194 * 2. vmx_x2apic_write() expects the val arg to be well-formed. 6195 */ 6196 u64 rd_val; 6197 u64 wr_val; 6198 6199 /* 6200 * Compares input to virtualized output; 6201 * 1st arg is pointer to return expected virtualization output. 6202 */ 6203 u64 (*virt_fn)(u64); 6204 6205 enum Virt_x2apic_mode_behavior_type rd_behavior; 6206 enum Virt_x2apic_mode_behavior_type wr_behavior; 6207 bool wr_only; 6208 }; 6209 6210 static u64 virt_x2apic_mode_identity(u64 val) 6211 { 6212 return val; 6213 } 6214 6215 static u64 virt_x2apic_mode_nibble1(u64 val) 6216 { 6217 return val & 0xf0; 6218 } 6219 6220 static void virt_x2apic_mode_rd_expectation( 6221 u32 reg, bool virt_x2apic_mode_on, bool disable_x2apic, 6222 bool apic_register_virtualization, bool virtual_interrupt_delivery, 6223 struct virt_x2apic_mode_expectation *expectation) 6224 { 6225 enum x2apic_reg_semantics semantics = get_x2apic_reg_semantics(reg); 6226 6227 expectation->rd_exit_reason = VMX_VMCALL; 6228 expectation->virt_fn = virt_x2apic_mode_identity; 6229 if (virt_x2apic_mode_on && apic_register_virtualization) { 6230 expectation->rd_val = MAGIC_VAL_1; 6231 if (reg == APIC_PROCPRI && virtual_interrupt_delivery) 6232 expectation->virt_fn = virt_x2apic_mode_nibble1; 6233 else if (reg == APIC_TASKPRI) 6234 expectation->virt_fn = virt_x2apic_mode_nibble1; 6235 expectation->rd_behavior = X2APIC_ACCESS_VIRTUALIZED; 6236 } else if (virt_x2apic_mode_on && !apic_register_virtualization && 6237 reg == APIC_TASKPRI) { 6238 expectation->rd_val = MAGIC_VAL_1; 6239 expectation->virt_fn = virt_x2apic_mode_nibble1; 6240 expectation->rd_behavior = X2APIC_ACCESS_VIRTUALIZED; 6241 } else if (!disable_x2apic && (semantics & X2APIC_READABLE)) { 6242 expectation->rd_val = apic_read(reg); 6243 expectation->rd_behavior = X2APIC_ACCESS_PASSED_THROUGH; 6244 } else { 6245 expectation->rd_behavior = X2APIC_ACCESS_TRIGGERS_GP; 6246 } 6247 } 6248 6249 /* 6250 * get_x2apic_wr_val() creates an innocuous write value for an x2APIC register. 6251 * 6252 * For writable registers, get_x2apic_wr_val() deposits the write value into the 6253 * val pointer arg and returns true. For non-writable registers, val is not 6254 * modified and get_x2apic_wr_val() returns false. 6255 */ 6256 static bool get_x2apic_wr_val(u32 reg, u64 *val) 6257 { 6258 switch (reg) { 6259 case APIC_TASKPRI: 6260 /* Bits 31:8 are reserved. */ 6261 *val &= 0xff; 6262 break; 6263 case APIC_EOI: 6264 case APIC_ESR: 6265 case APIC_TMICT: 6266 /* 6267 * EOI, ESR: WRMSR of a non-zero value causes #GP(0). 6268 * TMICT: A write of 0 to the initial-count register effectively 6269 * stops the local APIC timer, in both one-shot and 6270 * periodic mode. 6271 */ 6272 *val = 0; 6273 break; 6274 case APIC_SPIV: 6275 case APIC_LVTT: 6276 case APIC_LVTTHMR: 6277 case APIC_LVTPC: 6278 case APIC_LVT0: 6279 case APIC_LVT1: 6280 case APIC_LVTERR: 6281 case APIC_TDCR: 6282 /* 6283 * To avoid writing a 1 to a reserved bit or causing some other 6284 * unintended side effect, read the current value and use it as 6285 * the write value. 6286 */ 6287 *val = apic_read(reg); 6288 break; 6289 case APIC_CMCI: 6290 if (!apic_lvt_entry_supported(6)) 6291 return false; 6292 *val = apic_read(reg); 6293 break; 6294 case APIC_ICR: 6295 *val = 0x40000 | 0xf1; 6296 break; 6297 case APIC_SELF_IPI: 6298 /* 6299 * With special processing (i.e., virtualize x2APIC mode + 6300 * virtual interrupt delivery), writing zero causes an 6301 * APIC-write VM exit. We plan to add a test for enabling 6302 * "virtual-interrupt delivery" in VMCS12, and that's where we 6303 * will test a self IPI with special processing. 6304 */ 6305 *val = 0x0; 6306 break; 6307 default: 6308 return false; 6309 } 6310 6311 return true; 6312 } 6313 6314 static bool special_processing_applies(u32 reg, u64 *val, 6315 bool virt_int_delivery) 6316 { 6317 bool special_processing = 6318 (reg == APIC_TASKPRI) || 6319 (virt_int_delivery && 6320 (reg == APIC_EOI || reg == APIC_SELF_IPI)); 6321 6322 if (special_processing) { 6323 TEST_ASSERT(get_x2apic_wr_val(reg, val)); 6324 return true; 6325 } 6326 6327 return false; 6328 } 6329 6330 static void virt_x2apic_mode_wr_expectation( 6331 u32 reg, bool virt_x2apic_mode_on, bool disable_x2apic, 6332 bool virt_int_delivery, 6333 struct virt_x2apic_mode_expectation *expectation) 6334 { 6335 expectation->wr_exit_reason = VMX_VMCALL; 6336 expectation->wr_val = MAGIC_VAL_1; 6337 expectation->wr_only = false; 6338 6339 if (virt_x2apic_mode_on && 6340 special_processing_applies(reg, &expectation->wr_val, 6341 virt_int_delivery)) { 6342 expectation->wr_behavior = X2APIC_ACCESS_VIRTUALIZED; 6343 if (reg == APIC_SELF_IPI) 6344 expectation->wr_exit_reason = VMX_APIC_WRITE; 6345 } else if (!disable_x2apic && 6346 get_x2apic_wr_val(reg, &expectation->wr_val)) { 6347 expectation->wr_behavior = X2APIC_ACCESS_PASSED_THROUGH; 6348 if (reg == APIC_EOI || reg == APIC_SELF_IPI) 6349 expectation->wr_only = true; 6350 if (reg == APIC_ICR) 6351 expectation->wr_exit_reason = VMX_EXTINT; 6352 } else { 6353 expectation->wr_behavior = X2APIC_ACCESS_TRIGGERS_GP; 6354 /* 6355 * Writing 1 to a reserved bit triggers a #GP. 6356 * Thus, set the write value to 0, which seems 6357 * the most likely to detect a missed #GP. 6358 */ 6359 expectation->wr_val = 0; 6360 } 6361 } 6362 6363 static void virt_x2apic_mode_exit_expectation( 6364 u32 reg, struct virt_x2apic_mode_config *config, 6365 struct virt_x2apic_mode_expectation *expectation) 6366 { 6367 struct apic_reg_virt_config *base_config = 6368 &config->apic_reg_virt_config; 6369 bool virt_x2apic_mode_on = 6370 base_config->virtualize_x2apic_mode && 6371 config->use_msr_bitmaps && 6372 config->disable_x2apic_msr_intercepts && 6373 base_config->activate_secondary_controls; 6374 6375 virt_x2apic_mode_wr_expectation( 6376 reg, virt_x2apic_mode_on, config->disable_x2apic, 6377 config->virtual_interrupt_delivery, expectation); 6378 virt_x2apic_mode_rd_expectation( 6379 reg, virt_x2apic_mode_on, config->disable_x2apic, 6380 base_config->apic_register_virtualization, 6381 config->virtual_interrupt_delivery, expectation); 6382 } 6383 6384 struct virt_x2apic_mode_test_case virt_x2apic_mode_tests[] = { 6385 /* 6386 * Baseline "virtualize x2APIC mode" configuration: 6387 * - virtualize x2APIC mode 6388 * - virtual-interrupt delivery 6389 * - APIC-register virtualization 6390 * - x2APIC MSR intercepts disabled 6391 * 6392 * Reads come from virtual APIC page, special processing applies to 6393 * VTPR, EOI, and SELF IPI, and all other writes pass through to L1 6394 * APIC. 6395 */ 6396 { 6397 .name = "Baseline", 6398 .virt_x2apic_mode_config = { 6399 .virtual_interrupt_delivery = true, 6400 .use_msr_bitmaps = true, 6401 .disable_x2apic_msr_intercepts = true, 6402 .disable_x2apic = false, 6403 .apic_reg_virt_config = { 6404 .apic_register_virtualization = true, 6405 .use_tpr_shadow = true, 6406 .virtualize_apic_accesses = false, 6407 .virtualize_x2apic_mode = true, 6408 .activate_secondary_controls = true, 6409 }, 6410 }, 6411 }, 6412 { 6413 .name = "Baseline w/ x2apic disabled", 6414 .virt_x2apic_mode_config = { 6415 .virtual_interrupt_delivery = true, 6416 .use_msr_bitmaps = true, 6417 .disable_x2apic_msr_intercepts = true, 6418 .disable_x2apic = true, 6419 .apic_reg_virt_config = { 6420 .apic_register_virtualization = true, 6421 .use_tpr_shadow = true, 6422 .virtualize_apic_accesses = false, 6423 .virtualize_x2apic_mode = true, 6424 .activate_secondary_controls = true, 6425 }, 6426 }, 6427 }, 6428 6429 /* 6430 * Baseline, minus virtual-interrupt delivery. Reads come from virtual 6431 * APIC page, special processing applies to VTPR, and all other writes 6432 * pass through to L1 APIC. 6433 */ 6434 { 6435 .name = "Baseline - virtual interrupt delivery", 6436 .virt_x2apic_mode_config = { 6437 .virtual_interrupt_delivery = false, 6438 .use_msr_bitmaps = true, 6439 .disable_x2apic_msr_intercepts = true, 6440 .disable_x2apic = false, 6441 .apic_reg_virt_config = { 6442 .apic_register_virtualization = true, 6443 .use_tpr_shadow = true, 6444 .virtualize_apic_accesses = false, 6445 .virtualize_x2apic_mode = true, 6446 .activate_secondary_controls = true, 6447 }, 6448 }, 6449 }, 6450 6451 /* 6452 * Baseline, minus APIC-register virtualization. x2APIC reads pass 6453 * through to L1's APIC, unless reading VTPR 6454 */ 6455 { 6456 .name = "Virtualize x2APIC mode, no APIC reg virt", 6457 .virt_x2apic_mode_config = { 6458 .virtual_interrupt_delivery = true, 6459 .use_msr_bitmaps = true, 6460 .disable_x2apic_msr_intercepts = true, 6461 .disable_x2apic = false, 6462 .apic_reg_virt_config = { 6463 .apic_register_virtualization = false, 6464 .use_tpr_shadow = true, 6465 .virtualize_apic_accesses = false, 6466 .virtualize_x2apic_mode = true, 6467 .activate_secondary_controls = true, 6468 }, 6469 }, 6470 }, 6471 { 6472 .name = "Virtualize x2APIC mode, no APIC reg virt, x2APIC off", 6473 .virt_x2apic_mode_config = { 6474 .virtual_interrupt_delivery = true, 6475 .use_msr_bitmaps = true, 6476 .disable_x2apic_msr_intercepts = true, 6477 .disable_x2apic = true, 6478 .apic_reg_virt_config = { 6479 .apic_register_virtualization = false, 6480 .use_tpr_shadow = true, 6481 .virtualize_apic_accesses = false, 6482 .virtualize_x2apic_mode = true, 6483 .activate_secondary_controls = true, 6484 }, 6485 }, 6486 }, 6487 6488 /* 6489 * Enable "virtualize x2APIC mode" and "APIC-register virtualization", 6490 * and disable intercepts for the x2APIC MSRs, but fail to enable 6491 * "activate secondary controls" (i.e. L2 gets access to L1's x2APIC 6492 * MSRs). 6493 */ 6494 { 6495 .name = "Fail to enable activate secondary controls", 6496 .virt_x2apic_mode_config = { 6497 .virtual_interrupt_delivery = true, 6498 .use_msr_bitmaps = true, 6499 .disable_x2apic_msr_intercepts = true, 6500 .disable_x2apic = false, 6501 .apic_reg_virt_config = { 6502 .apic_register_virtualization = true, 6503 .use_tpr_shadow = true, 6504 .virtualize_apic_accesses = false, 6505 .virtualize_x2apic_mode = true, 6506 .activate_secondary_controls = false, 6507 }, 6508 }, 6509 }, 6510 6511 /* 6512 * Enable "APIC-register virtualization" and enable "activate secondary 6513 * controls" and disable intercepts for the x2APIC MSRs, but do not 6514 * enable the "virtualize x2APIC mode" VM-execution control (i.e. L2 6515 * gets access to L1's x2APIC MSRs). 6516 */ 6517 { 6518 .name = "Fail to enable virtualize x2APIC mode", 6519 .virt_x2apic_mode_config = { 6520 .virtual_interrupt_delivery = true, 6521 .use_msr_bitmaps = true, 6522 .disable_x2apic_msr_intercepts = true, 6523 .disable_x2apic = false, 6524 .apic_reg_virt_config = { 6525 .apic_register_virtualization = true, 6526 .use_tpr_shadow = true, 6527 .virtualize_apic_accesses = false, 6528 .virtualize_x2apic_mode = false, 6529 .activate_secondary_controls = true, 6530 }, 6531 }, 6532 }, 6533 6534 /* 6535 * Disable "Virtualize x2APIC mode", disable x2APIC MSR intercepts, and 6536 * enable "APIC-register virtualization" --> L2 gets L1's x2APIC MSRs. 6537 */ 6538 { 6539 .name = "Baseline", 6540 .virt_x2apic_mode_config = { 6541 .virtual_interrupt_delivery = true, 6542 .use_msr_bitmaps = true, 6543 .disable_x2apic_msr_intercepts = true, 6544 .disable_x2apic = false, 6545 .apic_reg_virt_config = { 6546 .apic_register_virtualization = true, 6547 .use_tpr_shadow = true, 6548 .virtualize_apic_accesses = false, 6549 .virtualize_x2apic_mode = false, 6550 .activate_secondary_controls = true, 6551 }, 6552 }, 6553 }, 6554 }; 6555 6556 enum X2apic_op { 6557 X2APIC_OP_RD, 6558 X2APIC_OP_WR, 6559 X2APIC_TERMINATE, 6560 }; 6561 6562 static u64 vmx_x2apic_read(u32 reg) 6563 { 6564 u32 msr_addr = x2apic_msr(reg); 6565 u64 val; 6566 6567 val = rdmsr(msr_addr); 6568 6569 return val; 6570 } 6571 6572 static void vmx_x2apic_write(u32 reg, u64 val) 6573 { 6574 u32 msr_addr = x2apic_msr(reg); 6575 6576 wrmsr(msr_addr, val); 6577 } 6578 6579 struct virt_x2apic_mode_guest_args { 6580 enum X2apic_op op; 6581 u32 reg; 6582 u64 val; 6583 bool should_gp; 6584 u64 (*virt_fn)(u64); 6585 } virt_x2apic_mode_guest_args; 6586 6587 static volatile bool handle_x2apic_gp_ran; 6588 static volatile u32 handle_x2apic_gp_insn_len; 6589 static void handle_x2apic_gp(struct ex_regs *regs) 6590 { 6591 handle_x2apic_gp_ran = true; 6592 regs->rip += handle_x2apic_gp_insn_len; 6593 } 6594 6595 static handler setup_x2apic_gp_handler(void) 6596 { 6597 handler old_handler; 6598 6599 old_handler = handle_exception(GP_VECTOR, handle_x2apic_gp); 6600 /* RDMSR and WRMSR are both 2 bytes, assuming no prefixes. */ 6601 handle_x2apic_gp_insn_len = 2; 6602 6603 return old_handler; 6604 } 6605 6606 static void teardown_x2apic_gp_handler(handler old_handler) 6607 { 6608 handle_exception(GP_VECTOR, old_handler); 6609 6610 /* 6611 * Defensively reset instruction length, so that if the handler is 6612 * incorrectly used, it will loop infinitely, rather than run off into 6613 * la la land. 6614 */ 6615 handle_x2apic_gp_insn_len = 0; 6616 handle_x2apic_gp_ran = false; 6617 } 6618 6619 static void virt_x2apic_mode_guest(void) 6620 { 6621 volatile struct virt_x2apic_mode_guest_args *args = 6622 &virt_x2apic_mode_guest_args; 6623 6624 for (;;) { 6625 enum X2apic_op op = args->op; 6626 u32 reg = args->reg; 6627 u64 val = args->val; 6628 bool should_gp = args->should_gp; 6629 u64 (*virt_fn)(u64) = args->virt_fn; 6630 handler old_handler; 6631 6632 if (op == X2APIC_TERMINATE) 6633 break; 6634 6635 if (should_gp) { 6636 TEST_ASSERT(!handle_x2apic_gp_ran); 6637 old_handler = setup_x2apic_gp_handler(); 6638 } 6639 6640 if (op == X2APIC_OP_RD) { 6641 u64 ret = vmx_x2apic_read(reg); 6642 6643 if (!should_gp) { 6644 u64 want = virt_fn(val); 6645 u64 got = virt_fn(ret); 6646 6647 report(got == want, 6648 "APIC read; got 0x%lx, want 0x%lx.", 6649 got, want); 6650 } 6651 } else if (op == X2APIC_OP_WR) { 6652 vmx_x2apic_write(reg, val); 6653 } 6654 6655 if (should_gp) { 6656 report(handle_x2apic_gp_ran, 6657 "x2APIC op triggered GP."); 6658 teardown_x2apic_gp_handler(old_handler); 6659 } 6660 6661 /* 6662 * The L1 should always execute a vmcall after it's done testing 6663 * an individual APIC operation. This helps to validate that the 6664 * L1 and L2 are in sync with each other, as expected. 6665 */ 6666 vmcall(); 6667 } 6668 } 6669 6670 static void test_x2apic_rd( 6671 u32 reg, struct virt_x2apic_mode_expectation *expectation, 6672 u32 *virtual_apic_page) 6673 { 6674 u64 val = expectation->rd_val; 6675 u32 exit_reason_want = expectation->rd_exit_reason; 6676 struct virt_x2apic_mode_guest_args *args = &virt_x2apic_mode_guest_args; 6677 6678 report_prefix_pushf("x2apic - reading 0x%03x", reg); 6679 6680 /* Configure guest to do an x2apic read */ 6681 args->op = X2APIC_OP_RD; 6682 args->reg = reg; 6683 args->val = val; 6684 args->should_gp = expectation->rd_behavior == X2APIC_ACCESS_TRIGGERS_GP; 6685 args->virt_fn = expectation->virt_fn; 6686 6687 /* Setup virtual APIC page */ 6688 if (expectation->rd_behavior == X2APIC_ACCESS_VIRTUALIZED) 6689 virtual_apic_page[apic_reg_index(reg)] = (u32)val; 6690 6691 /* Enter guest */ 6692 enter_guest(); 6693 6694 if (exit_reason_want != VMX_VMCALL) { 6695 report_fail("Oops, bad exit expectation: %u.", exit_reason_want); 6696 } 6697 6698 skip_exit_vmcall(); 6699 report_prefix_pop(); 6700 } 6701 6702 static volatile bool handle_x2apic_ipi_ran; 6703 static void handle_x2apic_ipi(isr_regs_t *regs) 6704 { 6705 handle_x2apic_ipi_ran = true; 6706 eoi(); 6707 } 6708 6709 static void test_x2apic_wr( 6710 u32 reg, struct virt_x2apic_mode_expectation *expectation, 6711 u32 *virtual_apic_page) 6712 { 6713 u64 val = expectation->wr_val; 6714 u32 exit_reason_want = expectation->wr_exit_reason; 6715 struct virt_x2apic_mode_guest_args *args = &virt_x2apic_mode_guest_args; 6716 int ipi_vector = 0xf1; 6717 u32 restore_val = 0; 6718 6719 report_prefix_pushf("x2apic - writing 0x%lx to 0x%03x", val, reg); 6720 6721 /* Configure guest to do an x2apic read */ 6722 args->op = X2APIC_OP_WR; 6723 args->reg = reg; 6724 args->val = val; 6725 args->should_gp = expectation->wr_behavior == X2APIC_ACCESS_TRIGGERS_GP; 6726 6727 /* Setup virtual APIC page */ 6728 if (expectation->wr_behavior == X2APIC_ACCESS_VIRTUALIZED) 6729 virtual_apic_page[apic_reg_index(reg)] = 0; 6730 if (expectation->wr_behavior == X2APIC_ACCESS_PASSED_THROUGH && !expectation->wr_only) 6731 restore_val = apic_read(reg); 6732 6733 /* Setup IPI handler */ 6734 handle_x2apic_ipi_ran = false; 6735 handle_irq(ipi_vector, handle_x2apic_ipi); 6736 6737 /* Enter guest */ 6738 enter_guest(); 6739 6740 /* 6741 * Validate the behavior and 6742 * pass a magic value back to the guest. 6743 */ 6744 if (exit_reason_want == VMX_EXTINT) { 6745 assert_exit_reason(exit_reason_want); 6746 6747 /* Clear the external interrupt. */ 6748 sti_nop_cli(); 6749 report(handle_x2apic_ipi_ran, 6750 "Got pending interrupt after IRQ enabled."); 6751 6752 enter_guest(); 6753 } else if (exit_reason_want == VMX_APIC_WRITE) { 6754 assert_exit_reason(exit_reason_want); 6755 report(virtual_apic_page[apic_reg_index(reg)] == val, 6756 "got APIC write exit @ page offset 0x%03x; val is 0x%x, want 0x%lx", 6757 apic_reg_index(reg), 6758 virtual_apic_page[apic_reg_index(reg)], val); 6759 6760 /* Reenter guest so it can consume/check rcx and exit again. */ 6761 enter_guest(); 6762 } else if (exit_reason_want != VMX_VMCALL) { 6763 report_fail("Oops, bad exit expectation: %u.", exit_reason_want); 6764 } 6765 6766 assert_exit_reason(VMX_VMCALL); 6767 if (expectation->wr_behavior == X2APIC_ACCESS_VIRTUALIZED) { 6768 u64 want = val; 6769 u32 got = virtual_apic_page[apic_reg_index(reg)]; 6770 6771 report(got == want, "x2APIC write; got 0x%x, want 0x%lx", got, 6772 want); 6773 } else if (expectation->wr_behavior == X2APIC_ACCESS_PASSED_THROUGH) { 6774 if (!expectation->wr_only) { 6775 u32 got = apic_read(reg); 6776 bool ok; 6777 6778 /* 6779 * When L1's TPR is passed through to L2, the lower 6780 * nibble can be lost. For example, if L2 executes 6781 * WRMSR(0x808, 0x78), then, L1 might read 0x70. 6782 * 6783 * Here's how the lower nibble can get lost: 6784 * 1. L2 executes WRMSR(0x808, 0x78). 6785 * 2. L2 exits to L0 with a WRMSR exit. 6786 * 3. L0 emulates WRMSR, by writing L1's TPR. 6787 * 4. L0 re-enters L2. 6788 * 5. L2 exits to L0 (reason doesn't matter). 6789 * 6. L0 reflects L2's exit to L1. 6790 * 7. Before entering L1, L0 exits to user-space 6791 * (e.g., to satisfy TPR access reporting). 6792 * 8. User-space executes KVM_SET_REGS ioctl, which 6793 * clears the lower nibble of L1's TPR. 6794 */ 6795 if (reg == APIC_TASKPRI) { 6796 got = apic_virt_nibble1(got); 6797 val = apic_virt_nibble1(val); 6798 } 6799 6800 ok = got == val; 6801 report(ok, 6802 "non-virtualized write; val is 0x%x, want 0x%lx", 6803 got, val); 6804 apic_write(reg, restore_val); 6805 } else { 6806 report_pass("non-virtualized and write-only OK"); 6807 } 6808 } 6809 skip_exit_insn(); 6810 6811 report_prefix_pop(); 6812 } 6813 6814 static enum Config_type configure_virt_x2apic_mode_test( 6815 struct virt_x2apic_mode_config *virt_x2apic_mode_config, 6816 u8 *msr_bitmap_page) 6817 { 6818 int msr; 6819 u32 cpu_exec_ctrl0 = vmcs_read(CPU_EXEC_CTRL0); 6820 u64 cpu_exec_ctrl1 = vmcs_read(CPU_EXEC_CTRL1); 6821 6822 /* x2apic-specific VMCS config */ 6823 if (virt_x2apic_mode_config->use_msr_bitmaps) { 6824 /* virt_x2apic_mode_test() checks for MSR bitmaps support */ 6825 cpu_exec_ctrl0 |= CPU_MSR_BITMAP; 6826 } else { 6827 cpu_exec_ctrl0 &= ~CPU_MSR_BITMAP; 6828 } 6829 6830 if (virt_x2apic_mode_config->virtual_interrupt_delivery) { 6831 if (!(ctrl_cpu_rev[1].clr & CPU_VINTD)) { 6832 report_skip("%s : \"virtual-interrupt delivery\" exec control not supported", __func__); 6833 return CONFIG_TYPE_UNSUPPORTED; 6834 } 6835 cpu_exec_ctrl1 |= CPU_VINTD; 6836 } else { 6837 cpu_exec_ctrl1 &= ~CPU_VINTD; 6838 } 6839 6840 vmcs_write(CPU_EXEC_CTRL0, cpu_exec_ctrl0); 6841 vmcs_write(CPU_EXEC_CTRL1, cpu_exec_ctrl1); 6842 6843 /* x2APIC MSR intercepts are usually off for "Virtualize x2APIC mode" */ 6844 for (msr = 0x800; msr <= 0x8ff; msr++) { 6845 if (virt_x2apic_mode_config->disable_x2apic_msr_intercepts) { 6846 clear_bit(msr, msr_bitmap_page + 0x000); 6847 clear_bit(msr, msr_bitmap_page + 0x800); 6848 } else { 6849 set_bit(msr, msr_bitmap_page + 0x000); 6850 set_bit(msr, msr_bitmap_page + 0x800); 6851 } 6852 } 6853 6854 /* x2APIC mode can impact virtualization */ 6855 reset_apic(); 6856 if (!virt_x2apic_mode_config->disable_x2apic) 6857 enable_x2apic(); 6858 6859 return configure_apic_reg_virt_test( 6860 &virt_x2apic_mode_config->apic_reg_virt_config); 6861 } 6862 6863 static void virt_x2apic_mode_test(void) 6864 { 6865 u32 *virtual_apic_page; 6866 u8 *msr_bitmap_page; 6867 u64 cpu_exec_ctrl0 = vmcs_read(CPU_EXEC_CTRL0); 6868 u64 cpu_exec_ctrl1 = vmcs_read(CPU_EXEC_CTRL1); 6869 int i; 6870 struct virt_x2apic_mode_guest_args *args = &virt_x2apic_mode_guest_args; 6871 6872 if (!cpu_has_apicv()) { 6873 report_skip("%s : Not all required APICv bits supported", __func__); 6874 return; 6875 } 6876 6877 /* 6878 * This is to exercise an issue in KVM's logic to merge L0's and L1's 6879 * MSR bitmaps. Previously, an L1 could get at L0's x2APIC MSRs by 6880 * writing the IA32_SPEC_CTRL MSR or the IA32_PRED_CMD MSRs. KVM would 6881 * then proceed to manipulate the MSR bitmaps, as if VMCS12 had the 6882 * "Virtualize x2APIC mod" control set, even when it didn't. 6883 */ 6884 if (this_cpu_has(X86_FEATURE_SPEC_CTRL)) 6885 wrmsr(MSR_IA32_SPEC_CTRL, 1); 6886 6887 /* 6888 * Check that VMCS12 supports: 6889 * - "Virtual-APIC address", indicated by "use TPR shadow" 6890 * - "MSR-bitmap address", indicated by "use MSR bitmaps" 6891 */ 6892 if (!(ctrl_cpu_rev[0].clr & CPU_TPR_SHADOW)) { 6893 report_skip("%s : \"Use TPR shadow\" exec control not supported", __func__); 6894 return; 6895 } else if (!(ctrl_cpu_rev[0].clr & CPU_MSR_BITMAP)) { 6896 report_skip("%s : \"Use MSR bitmaps\" exec control not supported", __func__); 6897 return; 6898 } 6899 6900 test_set_guest(virt_x2apic_mode_guest); 6901 6902 virtual_apic_page = alloc_page(); 6903 vmcs_write(APIC_VIRT_ADDR, virt_to_phys(virtual_apic_page)); 6904 6905 msr_bitmap_page = alloc_page(); 6906 memset(msr_bitmap_page, 0xff, PAGE_SIZE); 6907 vmcs_write(MSR_BITMAP, virt_to_phys(msr_bitmap_page)); 6908 6909 for (i = 0; i < ARRAY_SIZE(virt_x2apic_mode_tests); i++) { 6910 struct virt_x2apic_mode_test_case *virt_x2apic_mode_test_case = 6911 &virt_x2apic_mode_tests[i]; 6912 struct virt_x2apic_mode_config *virt_x2apic_mode_config = 6913 &virt_x2apic_mode_test_case->virt_x2apic_mode_config; 6914 enum Config_type config_type; 6915 u32 reg; 6916 6917 printf("--- %s test ---\n", virt_x2apic_mode_test_case->name); 6918 config_type = 6919 configure_virt_x2apic_mode_test(virt_x2apic_mode_config, 6920 msr_bitmap_page); 6921 if (config_type == CONFIG_TYPE_UNSUPPORTED) { 6922 report_skip("Skip because of missing features."); 6923 continue; 6924 } else if (config_type == CONFIG_TYPE_VMENTRY_FAILS_EARLY) { 6925 enter_guest_with_bad_controls(); 6926 continue; 6927 } 6928 6929 for (reg = 0; reg < PAGE_SIZE / sizeof(u32); reg += 0x10) { 6930 struct virt_x2apic_mode_expectation expectation; 6931 6932 virt_x2apic_mode_exit_expectation( 6933 reg, virt_x2apic_mode_config, &expectation); 6934 6935 test_x2apic_rd(reg, &expectation, virtual_apic_page); 6936 test_x2apic_wr(reg, &expectation, virtual_apic_page); 6937 } 6938 } 6939 6940 6941 /* Terminate the guest */ 6942 vmcs_write(CPU_EXEC_CTRL0, cpu_exec_ctrl0); 6943 vmcs_write(CPU_EXEC_CTRL1, cpu_exec_ctrl1); 6944 args->op = X2APIC_TERMINATE; 6945 enter_guest(); 6946 assert_exit_reason(VMX_VMCALL); 6947 } 6948 6949 static void test_ctl_reg(const char *cr_name, u64 cr, u64 fixed0, u64 fixed1) 6950 { 6951 u64 val; 6952 u64 cr_saved = vmcs_read(cr); 6953 int i; 6954 6955 val = fixed0 & fixed1; 6956 if (cr == HOST_CR4) 6957 vmcs_write(cr, val | X86_CR4_PAE); 6958 else 6959 vmcs_write(cr, val); 6960 report_prefix_pushf("%s %lx", cr_name, val); 6961 if (val == fixed0) 6962 test_vmx_vmlaunch(0); 6963 else 6964 test_vmx_vmlaunch(VMXERR_ENTRY_INVALID_HOST_STATE_FIELD); 6965 report_prefix_pop(); 6966 6967 for (i = 0; i < 64; i++) { 6968 6969 /* Set a bit when the corresponding bit in fixed1 is 0 */ 6970 if ((fixed1 & (1ull << i)) == 0) { 6971 if (cr == HOST_CR4 && ((1ull << i) & X86_CR4_SMEP || 6972 (1ull << i) & X86_CR4_SMAP)) 6973 continue; 6974 6975 vmcs_write(cr, cr_saved | (1ull << i)); 6976 report_prefix_pushf("%s %llx", cr_name, 6977 cr_saved | (1ull << i)); 6978 test_vmx_vmlaunch( 6979 VMXERR_ENTRY_INVALID_HOST_STATE_FIELD); 6980 report_prefix_pop(); 6981 } 6982 6983 /* Unset a bit when the corresponding bit in fixed0 is 1 */ 6984 if (fixed0 & (1ull << i)) { 6985 vmcs_write(cr, cr_saved & ~(1ull << i)); 6986 report_prefix_pushf("%s %llx", cr_name, 6987 cr_saved & ~(1ull << i)); 6988 test_vmx_vmlaunch( 6989 VMXERR_ENTRY_INVALID_HOST_STATE_FIELD); 6990 report_prefix_pop(); 6991 } 6992 } 6993 6994 vmcs_write(cr, cr_saved); 6995 } 6996 6997 /* 6998 * 1. The CR0 field must not set any bit to a value not supported in VMX 6999 * operation. 7000 * 2. The CR4 field must not set any bit to a value not supported in VMX 7001 * operation. 7002 * 3. On processors that support Intel 64 architecture, the CR3 field must 7003 * be such that bits 63:52 and bits in the range 51:32 beyond the 7004 * processor's physical-address width must be 0. 7005 * 7006 * [Intel SDM] 7007 */ 7008 static void test_host_ctl_regs(void) 7009 { 7010 u64 fixed0, fixed1, cr3, cr3_saved; 7011 int i; 7012 7013 /* Test CR0 */ 7014 fixed0 = rdmsr(MSR_IA32_VMX_CR0_FIXED0); 7015 fixed1 = rdmsr(MSR_IA32_VMX_CR0_FIXED1); 7016 test_ctl_reg("HOST_CR0", HOST_CR0, fixed0, fixed1); 7017 7018 /* Test CR4 */ 7019 fixed0 = rdmsr(MSR_IA32_VMX_CR4_FIXED0); 7020 fixed1 = rdmsr(MSR_IA32_VMX_CR4_FIXED1) & 7021 ~(X86_CR4_SMEP | X86_CR4_SMAP); 7022 test_ctl_reg("HOST_CR4", HOST_CR4, fixed0, fixed1); 7023 7024 /* Test CR3 */ 7025 cr3_saved = vmcs_read(HOST_CR3); 7026 for (i = cpuid_maxphyaddr(); i < 64; i++) { 7027 cr3 = cr3_saved | (1ul << i); 7028 vmcs_write(HOST_CR3, cr3); 7029 report_prefix_pushf("HOST_CR3 %lx", cr3); 7030 test_vmx_vmlaunch(VMXERR_ENTRY_INVALID_HOST_STATE_FIELD); 7031 report_prefix_pop(); 7032 } 7033 7034 vmcs_write(HOST_CR3, cr3_saved); 7035 } 7036 7037 static void test_efer_vmlaunch(u32 fld, bool ok) 7038 { 7039 if (fld == HOST_EFER) { 7040 if (ok) 7041 test_vmx_vmlaunch(0); 7042 else 7043 test_vmx_vmlaunch2(VMXERR_ENTRY_INVALID_CONTROL_FIELD, 7044 VMXERR_ENTRY_INVALID_HOST_STATE_FIELD); 7045 } else { 7046 test_guest_state("EFER test", !ok, GUEST_EFER, "GUEST_EFER"); 7047 } 7048 } 7049 7050 static void test_efer_one(u32 fld, const char * fld_name, u64 efer, 7051 u32 ctrl_fld, u64 ctrl, 7052 int i, const char *efer_bit_name) 7053 { 7054 bool ok; 7055 7056 ok = true; 7057 if (ctrl_fld == EXI_CONTROLS && (ctrl & EXI_LOAD_EFER)) { 7058 if (!!(efer & EFER_LMA) != !!(ctrl & EXI_HOST_64)) 7059 ok = false; 7060 if (!!(efer & EFER_LME) != !!(ctrl & EXI_HOST_64)) 7061 ok = false; 7062 } 7063 if (ctrl_fld == ENT_CONTROLS && (ctrl & ENT_LOAD_EFER)) { 7064 /* Check LMA too since CR0.PG is set. */ 7065 if (!!(efer & EFER_LMA) != !!(ctrl & ENT_GUEST_64)) 7066 ok = false; 7067 if (!!(efer & EFER_LME) != !!(ctrl & ENT_GUEST_64)) 7068 ok = false; 7069 } 7070 7071 /* 7072 * Skip the test if it would enter the guest in 32-bit mode. 7073 * Perhaps write the test in assembly and make sure it 7074 * can be run in either mode? 7075 */ 7076 if (fld == GUEST_EFER && ok && !(ctrl & ENT_GUEST_64)) 7077 return; 7078 7079 vmcs_write(ctrl_fld, ctrl); 7080 vmcs_write(fld, efer); 7081 report_prefix_pushf("%s %s bit turned %s, controls %s", 7082 fld_name, efer_bit_name, 7083 (i & 1) ? "on" : "off", 7084 (i & 2) ? "on" : "off"); 7085 7086 test_efer_vmlaunch(fld, ok); 7087 report_prefix_pop(); 7088 } 7089 7090 static void test_efer_bit(u32 fld, const char * fld_name, 7091 u32 ctrl_fld, u64 ctrl_bit, u64 efer_bit, 7092 const char *efer_bit_name) 7093 { 7094 u64 efer_saved = vmcs_read(fld); 7095 u32 ctrl_saved = vmcs_read(ctrl_fld); 7096 int i; 7097 7098 for (i = 0; i < 4; i++) { 7099 u64 efer = efer_saved & ~efer_bit; 7100 u64 ctrl = ctrl_saved & ~ctrl_bit; 7101 7102 if (i & 1) 7103 efer |= efer_bit; 7104 if (i & 2) 7105 ctrl |= ctrl_bit; 7106 7107 test_efer_one(fld, fld_name, efer, ctrl_fld, ctrl, 7108 i, efer_bit_name); 7109 } 7110 7111 vmcs_write(ctrl_fld, ctrl_saved); 7112 vmcs_write(fld, efer_saved); 7113 } 7114 7115 static void test_efer(u32 fld, const char * fld_name, u32 ctrl_fld, 7116 u64 ctrl_bit1, u64 ctrl_bit2) 7117 { 7118 u64 efer_saved = vmcs_read(fld); 7119 u32 ctrl_saved = vmcs_read(ctrl_fld); 7120 u64 efer_reserved_bits = ~((u64)(EFER_SCE | EFER_LME | EFER_LMA)); 7121 u64 i; 7122 u64 efer; 7123 7124 if (this_cpu_has(X86_FEATURE_NX)) 7125 efer_reserved_bits &= ~EFER_NX; 7126 7127 if (!ctrl_bit1) { 7128 report_skip("%s : \"Load-IA32-EFER\" exit control not supported", __func__); 7129 goto test_entry_exit_mode; 7130 } 7131 7132 report_prefix_pushf("%s %lx", fld_name, efer_saved); 7133 test_efer_vmlaunch(fld, true); 7134 report_prefix_pop(); 7135 7136 /* 7137 * Check reserved bits 7138 */ 7139 vmcs_write(ctrl_fld, ctrl_saved & ~ctrl_bit1); 7140 for (i = 0; i < 64; i++) { 7141 if ((1ull << i) & efer_reserved_bits) { 7142 efer = efer_saved | (1ull << i); 7143 vmcs_write(fld, efer); 7144 report_prefix_pushf("%s %lx", fld_name, efer); 7145 test_efer_vmlaunch(fld, true); 7146 report_prefix_pop(); 7147 } 7148 } 7149 7150 vmcs_write(ctrl_fld, ctrl_saved | ctrl_bit1); 7151 for (i = 0; i < 64; i++) { 7152 if ((1ull << i) & efer_reserved_bits) { 7153 efer = efer_saved | (1ull << i); 7154 vmcs_write(fld, efer); 7155 report_prefix_pushf("%s %lx", fld_name, efer); 7156 test_efer_vmlaunch(fld, false); 7157 report_prefix_pop(); 7158 } 7159 } 7160 7161 vmcs_write(ctrl_fld, ctrl_saved); 7162 vmcs_write(fld, efer_saved); 7163 7164 /* 7165 * Check LMA and LME bits 7166 */ 7167 test_efer_bit(fld, fld_name, 7168 ctrl_fld, ctrl_bit1, 7169 EFER_LMA, 7170 "EFER_LMA"); 7171 test_efer_bit(fld, fld_name, 7172 ctrl_fld, ctrl_bit1, 7173 EFER_LME, 7174 "EFER_LME"); 7175 7176 test_entry_exit_mode: 7177 test_efer_bit(fld, fld_name, 7178 ctrl_fld, ctrl_bit2, 7179 EFER_LMA, 7180 "EFER_LMA"); 7181 test_efer_bit(fld, fld_name, 7182 ctrl_fld, ctrl_bit2, 7183 EFER_LME, 7184 "EFER_LME"); 7185 } 7186 7187 /* 7188 * If the 'load IA32_EFER' VM-exit control is 1, bits reserved in the 7189 * IA32_EFER MSR must be 0 in the field for that register. In addition, 7190 * the values of the LMA and LME bits in the field must each be that of 7191 * the 'host address-space size' VM-exit control. 7192 * 7193 * [Intel SDM] 7194 */ 7195 static void test_host_efer(void) 7196 { 7197 test_efer(HOST_EFER, "HOST_EFER", EXI_CONTROLS, 7198 ctrl_exit_rev.clr & EXI_LOAD_EFER, 7199 EXI_HOST_64); 7200 } 7201 7202 /* 7203 * If the 'load IA32_EFER' VM-enter control is 1, bits reserved in the 7204 * IA32_EFER MSR must be 0 in the field for that register. In addition, 7205 * the values of the LMA and LME bits in the field must each be that of 7206 * the 'IA32e-mode guest' VM-exit control. 7207 */ 7208 static void test_guest_efer(void) 7209 { 7210 if (!(ctrl_enter_rev.clr & ENT_LOAD_EFER)) { 7211 report_skip("%s : \"Load-IA32-EFER\" entry control not supported", __func__); 7212 return; 7213 } 7214 7215 vmcs_write(GUEST_EFER, rdmsr(MSR_EFER)); 7216 test_efer(GUEST_EFER, "GUEST_EFER", ENT_CONTROLS, 7217 ctrl_enter_rev.clr & ENT_LOAD_EFER, 7218 ENT_GUEST_64); 7219 } 7220 7221 /* 7222 * PAT values higher than 8 are uninteresting since they're likely lumped 7223 * in with "8". We only test values above 8 one bit at a time, 7224 * in order to reduce the number of VM-Entries and keep the runtime reasonable. 7225 */ 7226 #define PAT_VAL_LIMIT 8 7227 7228 static void test_pat(u32 field, const char * field_name, u32 ctrl_field, 7229 u64 ctrl_bit) 7230 { 7231 u32 ctrl_saved = vmcs_read(ctrl_field); 7232 u64 pat_saved = vmcs_read(field); 7233 u64 i, val; 7234 u32 j; 7235 int error; 7236 7237 vmcs_clear_bits(ctrl_field, ctrl_bit); 7238 7239 for (i = 0; i < 256; i = (i < PAT_VAL_LIMIT) ? i + 1 : i * 2) { 7240 /* Test PAT0..PAT7 fields */ 7241 for (j = 0; j < (i ? 8 : 1); j++) { 7242 val = i << j * 8; 7243 vmcs_write(field, val); 7244 if (field == HOST_PAT) { 7245 report_prefix_pushf("%s %lx", field_name, val); 7246 test_vmx_vmlaunch(0); 7247 report_prefix_pop(); 7248 7249 } else { // GUEST_PAT 7250 test_guest_state("ENT_LOAD_PAT enabled", false, 7251 val, "GUEST_PAT"); 7252 } 7253 } 7254 } 7255 7256 vmcs_set_bits(ctrl_field, ctrl_bit); 7257 for (i = 0; i < 256; i = (i < PAT_VAL_LIMIT) ? i + 1 : i * 2) { 7258 /* Test PAT0..PAT7 fields */ 7259 for (j = 0; j < (i ? 8 : 1); j++) { 7260 val = i << j * 8; 7261 vmcs_write(field, val); 7262 7263 if (field == HOST_PAT) { 7264 report_prefix_pushf("%s %lx", field_name, val); 7265 if (i == 0x2 || i == 0x3 || i >= 0x8) 7266 error = 7267 VMXERR_ENTRY_INVALID_HOST_STATE_FIELD; 7268 else 7269 error = 0; 7270 7271 test_vmx_vmlaunch(error); 7272 report_prefix_pop(); 7273 7274 } else { // GUEST_PAT 7275 error = (i == 0x2 || i == 0x3 || i >= 0x8); 7276 test_guest_state("ENT_LOAD_PAT enabled", !!error, 7277 val, "GUEST_PAT"); 7278 } 7279 7280 } 7281 } 7282 7283 vmcs_write(ctrl_field, ctrl_saved); 7284 vmcs_write(field, pat_saved); 7285 } 7286 7287 /* 7288 * If the "load IA32_PAT" VM-exit control is 1, the value of the field 7289 * for the IA32_PAT MSR must be one that could be written by WRMSR 7290 * without fault at CPL 0. Specifically, each of the 8 bytes in the 7291 * field must have one of the values 0 (UC), 1 (WC), 4 (WT), 5 (WP), 7292 * 6 (WB), or 7 (UC-). 7293 * 7294 * [Intel SDM] 7295 */ 7296 static void test_load_host_pat(void) 7297 { 7298 /* 7299 * "load IA32_PAT" VM-exit control 7300 */ 7301 if (!(ctrl_exit_rev.clr & EXI_LOAD_PAT)) { 7302 report_skip("%s : \"Load-IA32-PAT\" exit control not supported", __func__); 7303 return; 7304 } 7305 7306 test_pat(HOST_PAT, "HOST_PAT", EXI_CONTROLS, EXI_LOAD_PAT); 7307 } 7308 7309 union cpuidA_eax { 7310 struct { 7311 unsigned int version_id:8; 7312 unsigned int num_counters_gp:8; 7313 unsigned int bit_width:8; 7314 unsigned int mask_length:8; 7315 } split; 7316 unsigned int full; 7317 }; 7318 7319 union cpuidA_edx { 7320 struct { 7321 unsigned int num_counters_fixed:5; 7322 unsigned int bit_width_fixed:8; 7323 unsigned int reserved:9; 7324 } split; 7325 unsigned int full; 7326 }; 7327 7328 static bool valid_pgc(u64 val) 7329 { 7330 struct cpuid id; 7331 union cpuidA_eax eax; 7332 union cpuidA_edx edx; 7333 u64 mask; 7334 7335 id = cpuid(0xA); 7336 eax.full = id.a; 7337 edx.full = id.d; 7338 mask = ~(((1ull << eax.split.num_counters_gp) - 1) | 7339 (((1ull << edx.split.num_counters_fixed) - 1) << 32)); 7340 7341 return !(val & mask); 7342 } 7343 7344 static void test_pgc_vmlaunch(u32 xerror, u32 xreason, bool xfail, bool host) 7345 { 7346 u32 inst_err; 7347 u64 obs; 7348 bool success; 7349 struct vmx_state_area_test_data *data = &vmx_state_area_test_data; 7350 7351 if (host) { 7352 success = vmlaunch(); 7353 obs = rdmsr(data->msr); 7354 if (!success) { 7355 inst_err = vmcs_read(VMX_INST_ERROR); 7356 report(xerror == inst_err, "vmlaunch failed, " 7357 "VMX Inst Error is %d (expected %d)", 7358 inst_err, xerror); 7359 } else { 7360 report(!data->enabled || data->exp == obs, 7361 "Host state is 0x%lx (expected 0x%lx)", 7362 obs, data->exp); 7363 report(success != xfail, "vmlaunch succeeded"); 7364 } 7365 } else { 7366 test_guest_state("load GUEST_PERF_GLOBAL_CTRL", xfail, 7367 GUEST_PERF_GLOBAL_CTRL, 7368 "GUEST_PERF_GLOBAL_CTRL"); 7369 } 7370 } 7371 7372 /* 7373 * test_load_perf_global_ctrl is a generic function for testing the 7374 * "load IA32_PERF_GLOBAL_CTRL" VM-{Entry,Exit} controls. This test function 7375 * tests the provided ctrl_val when disabled and enabled. 7376 * 7377 * @nr: VMCS field number corresponding to the host/guest state field 7378 * @name: Name of the above VMCS field for printing in test report 7379 * @ctrl_nr: VMCS field number corresponding to the VM-{Entry,Exit} control 7380 * @ctrl_val: Bit to set on the ctrl_field 7381 */ 7382 static void test_perf_global_ctrl(u32 nr, const char *name, u32 ctrl_nr, 7383 const char *ctrl_name, u64 ctrl_val) 7384 { 7385 u64 ctrl_saved = vmcs_read(ctrl_nr); 7386 u64 pgc_saved = vmcs_read(nr); 7387 u64 i, val; 7388 bool host = nr == HOST_PERF_GLOBAL_CTRL; 7389 struct vmx_state_area_test_data *data = &vmx_state_area_test_data; 7390 7391 data->msr = MSR_CORE_PERF_GLOBAL_CTRL; 7392 msr_bmp_init(); 7393 vmcs_write(ctrl_nr, ctrl_saved & ~ctrl_val); 7394 data->enabled = false; 7395 report_prefix_pushf("\"load IA32_PERF_GLOBAL_CTRL\"=0 on %s", 7396 ctrl_name); 7397 7398 for (i = 0; i < 64; i++) { 7399 val = 1ull << i; 7400 vmcs_write(nr, val); 7401 report_prefix_pushf("%s = 0x%lx", name, val); 7402 test_pgc_vmlaunch(0, VMX_VMCALL, false, host); 7403 report_prefix_pop(); 7404 } 7405 report_prefix_pop(); 7406 7407 vmcs_write(ctrl_nr, ctrl_saved | ctrl_val); 7408 data->enabled = true; 7409 report_prefix_pushf("\"load IA32_PERF_GLOBAL_CTRL\"=1 on %s", 7410 ctrl_name); 7411 for (i = 0; i < 64; i++) { 7412 val = 1ull << i; 7413 data->exp = val; 7414 vmcs_write(nr, val); 7415 report_prefix_pushf("%s = 0x%lx", name, val); 7416 if (valid_pgc(val)) { 7417 test_pgc_vmlaunch(0, VMX_VMCALL, false, host); 7418 } else { 7419 if (host) 7420 test_pgc_vmlaunch( 7421 VMXERR_ENTRY_INVALID_HOST_STATE_FIELD, 7422 0, 7423 true, 7424 host); 7425 else 7426 test_pgc_vmlaunch( 7427 0, 7428 VMX_ENTRY_FAILURE | VMX_FAIL_STATE, 7429 true, 7430 host); 7431 } 7432 report_prefix_pop(); 7433 } 7434 7435 data->enabled = false; 7436 report_prefix_pop(); 7437 vmcs_write(ctrl_nr, ctrl_saved); 7438 vmcs_write(nr, pgc_saved); 7439 } 7440 7441 static void test_load_host_perf_global_ctrl(void) 7442 { 7443 if (!this_cpu_has_perf_global_ctrl()) { 7444 report_skip("%s : \"IA32_PERF_GLOBAL_CTRL\" MSR not supported", __func__); 7445 return; 7446 } 7447 7448 if (!(ctrl_exit_rev.clr & EXI_LOAD_PERF)) { 7449 report_skip("%s : \"Load IA32_PERF_GLOBAL_CTRL\" exit control not supported", __func__); 7450 return; 7451 } 7452 7453 test_perf_global_ctrl(HOST_PERF_GLOBAL_CTRL, "HOST_PERF_GLOBAL_CTRL", 7454 EXI_CONTROLS, "EXI_CONTROLS", EXI_LOAD_PERF); 7455 } 7456 7457 7458 static void test_load_guest_perf_global_ctrl(void) 7459 { 7460 if (!this_cpu_has_perf_global_ctrl()) { 7461 report_skip("%s : \"IA32_PERF_GLOBAL_CTRL\" MSR not supported", __func__); 7462 return; 7463 } 7464 7465 if (!(ctrl_enter_rev.clr & ENT_LOAD_PERF)) { 7466 report_skip("%s : \"Load IA32_PERF_GLOBAL_CTRL\" entry control not supported", __func__); 7467 return; 7468 } 7469 7470 test_perf_global_ctrl(GUEST_PERF_GLOBAL_CTRL, "GUEST_PERF_GLOBAL_CTRL", 7471 ENT_CONTROLS, "ENT_CONTROLS", ENT_LOAD_PERF); 7472 } 7473 7474 7475 /* 7476 * test_vmcs_field - test a value for the given VMCS field 7477 * @field: VMCS field 7478 * @field_name: string name of VMCS field 7479 * @bit_start: starting bit 7480 * @bit_end: ending bit 7481 * @val: value that the bit range must or must not contain 7482 * @valid_val: whether value given in 'val' must be valid or not 7483 * @error: expected VMCS error when vmentry fails for an invalid value 7484 */ 7485 static void test_vmcs_field(u64 field, const char *field_name, u32 bit_start, 7486 u32 bit_end, u64 val, bool valid_val, u32 error) 7487 { 7488 u64 field_saved = vmcs_read(field); 7489 u32 i; 7490 u64 tmp; 7491 u32 bit_on; 7492 u64 mask = ~0ull; 7493 7494 mask = (mask >> bit_end) << bit_end; 7495 mask = mask | ((1 << bit_start) - 1); 7496 tmp = (field_saved & mask) | (val << bit_start); 7497 7498 vmcs_write(field, tmp); 7499 report_prefix_pushf("%s %lx", field_name, tmp); 7500 if (valid_val) 7501 test_vmx_vmlaunch(0); 7502 else 7503 test_vmx_vmlaunch(error); 7504 report_prefix_pop(); 7505 7506 for (i = bit_start; i <= bit_end; i = i + 2) { 7507 bit_on = ((1ull < i) & (val << bit_start)) ? 0 : 1; 7508 if (bit_on) 7509 tmp = field_saved | (1ull << i); 7510 else 7511 tmp = field_saved & ~(1ull << i); 7512 vmcs_write(field, tmp); 7513 report_prefix_pushf("%s %lx", field_name, tmp); 7514 if (valid_val) 7515 test_vmx_vmlaunch(error); 7516 else 7517 test_vmx_vmlaunch(0); 7518 report_prefix_pop(); 7519 } 7520 7521 vmcs_write(field, field_saved); 7522 } 7523 7524 static void test_canonical(u64 field, const char * field_name, bool host) 7525 { 7526 u64 addr_saved = vmcs_read(field); 7527 7528 /* 7529 * Use the existing value if possible. Writing a random canonical 7530 * value is not an option as doing so would corrupt the field being 7531 * tested and likely hose the test. 7532 */ 7533 if (is_canonical(addr_saved)) { 7534 if (host) { 7535 report_prefix_pushf("%s %lx", field_name, addr_saved); 7536 test_vmx_vmlaunch(0); 7537 report_prefix_pop(); 7538 } else { 7539 test_guest_state("Test canonical address", false, 7540 addr_saved, field_name); 7541 } 7542 } 7543 7544 vmcs_write(field, NONCANONICAL); 7545 7546 if (host) { 7547 report_prefix_pushf("%s %llx", field_name, NONCANONICAL); 7548 test_vmx_vmlaunch(VMXERR_ENTRY_INVALID_HOST_STATE_FIELD); 7549 report_prefix_pop(); 7550 } else { 7551 test_guest_state("Test non-canonical address", true, 7552 NONCANONICAL, field_name); 7553 } 7554 7555 vmcs_write(field, addr_saved); 7556 } 7557 7558 #define TEST_RPL_TI_FLAGS(reg, name) \ 7559 test_vmcs_field(reg, name, 0, 2, 0x0, true, \ 7560 VMXERR_ENTRY_INVALID_HOST_STATE_FIELD); 7561 7562 #define TEST_CS_TR_FLAGS(reg, name) \ 7563 test_vmcs_field(reg, name, 3, 15, 0x0000, false, \ 7564 VMXERR_ENTRY_INVALID_HOST_STATE_FIELD); 7565 7566 /* 7567 * 1. In the selector field for each of CS, SS, DS, ES, FS, GS and TR, the 7568 * RPL (bits 1:0) and the TI flag (bit 2) must be 0. 7569 * 2. The selector fields for CS and TR cannot be 0000H. 7570 * 3. The selector field for SS cannot be 0000H if the "host address-space 7571 * size" VM-exit control is 0. 7572 * 4. On processors that support Intel 64 architecture, the base-address 7573 * fields for FS, GS and TR must contain canonical addresses. 7574 */ 7575 static void test_host_segment_regs(void) 7576 { 7577 u16 selector_saved; 7578 7579 /* 7580 * Test RPL and TI flags 7581 */ 7582 TEST_RPL_TI_FLAGS(HOST_SEL_CS, "HOST_SEL_CS"); 7583 TEST_RPL_TI_FLAGS(HOST_SEL_SS, "HOST_SEL_SS"); 7584 TEST_RPL_TI_FLAGS(HOST_SEL_DS, "HOST_SEL_DS"); 7585 TEST_RPL_TI_FLAGS(HOST_SEL_ES, "HOST_SEL_ES"); 7586 TEST_RPL_TI_FLAGS(HOST_SEL_FS, "HOST_SEL_FS"); 7587 TEST_RPL_TI_FLAGS(HOST_SEL_GS, "HOST_SEL_GS"); 7588 TEST_RPL_TI_FLAGS(HOST_SEL_TR, "HOST_SEL_TR"); 7589 7590 /* 7591 * Test that CS and TR fields can not be 0x0000 7592 */ 7593 TEST_CS_TR_FLAGS(HOST_SEL_CS, "HOST_SEL_CS"); 7594 TEST_CS_TR_FLAGS(HOST_SEL_TR, "HOST_SEL_TR"); 7595 7596 /* 7597 * SS field can not be 0x0000 if "host address-space size" VM-exit 7598 * control is 0 7599 */ 7600 selector_saved = vmcs_read(HOST_SEL_SS); 7601 vmcs_write(HOST_SEL_SS, 0); 7602 report_prefix_pushf("HOST_SEL_SS 0"); 7603 if (vmcs_read(EXI_CONTROLS) & EXI_HOST_64) { 7604 test_vmx_vmlaunch(0); 7605 } else { 7606 test_vmx_vmlaunch(VMXERR_ENTRY_INVALID_HOST_STATE_FIELD); 7607 } 7608 report_prefix_pop(); 7609 7610 vmcs_write(HOST_SEL_SS, selector_saved); 7611 7612 /* 7613 * Base address for FS, GS and TR must be canonical 7614 */ 7615 test_canonical(HOST_BASE_FS, "HOST_BASE_FS", true); 7616 test_canonical(HOST_BASE_GS, "HOST_BASE_GS", true); 7617 test_canonical(HOST_BASE_TR, "HOST_BASE_TR", true); 7618 } 7619 7620 /* 7621 * On processors that support Intel 64 architecture, the base-address 7622 * fields for GDTR and IDTR must contain canonical addresses. 7623 */ 7624 static void test_host_desc_tables(void) 7625 { 7626 test_canonical(HOST_BASE_GDTR, "HOST_BASE_GDTR", true); 7627 test_canonical(HOST_BASE_IDTR, "HOST_BASE_IDTR", true); 7628 } 7629 7630 /* 7631 * If the "host address-space size" VM-exit control is 0, the following must 7632 * hold: 7633 * - The "IA-32e mode guest" VM-entry control is 0. 7634 * - Bit 17 of the CR4 field (corresponding to CR4.PCIDE) is 0. 7635 * - Bits 63:32 in the RIP field are 0. 7636 * 7637 * If the "host address-space size" VM-exit control is 1, the following must 7638 * hold: 7639 * - Bit 5 of the CR4 field (corresponding to CR4.PAE) is 1. 7640 * - The RIP field contains a canonical address. 7641 * 7642 */ 7643 static void test_host_addr_size(void) 7644 { 7645 u64 cr4_saved = vmcs_read(HOST_CR4); 7646 u64 rip_saved = vmcs_read(HOST_RIP); 7647 u64 entry_ctrl_saved = vmcs_read(ENT_CONTROLS); 7648 7649 assert(vmcs_read(EXI_CONTROLS) & EXI_HOST_64); 7650 assert(cr4_saved & X86_CR4_PAE); 7651 7652 vmcs_write(ENT_CONTROLS, entry_ctrl_saved | ENT_GUEST_64); 7653 report_prefix_pushf("\"IA-32e mode guest\" enabled"); 7654 test_vmx_vmlaunch(0); 7655 report_prefix_pop(); 7656 7657 if (this_cpu_has(X86_FEATURE_PCID)) { 7658 vmcs_write(HOST_CR4, cr4_saved | X86_CR4_PCIDE); 7659 report_prefix_pushf("\"CR4.PCIDE\" set"); 7660 test_vmx_vmlaunch(0); 7661 report_prefix_pop(); 7662 } 7663 7664 vmcs_write(HOST_CR4, cr4_saved & ~X86_CR4_PAE); 7665 report_prefix_pushf("\"CR4.PAE\" unset"); 7666 test_vmx_vmlaunch(VMXERR_ENTRY_INVALID_HOST_STATE_FIELD); 7667 vmcs_write(HOST_CR4, cr4_saved); 7668 report_prefix_pop(); 7669 7670 vmcs_write(HOST_RIP, NONCANONICAL); 7671 report_prefix_pushf("HOST_RIP %llx", NONCANONICAL); 7672 test_vmx_vmlaunch_must_fail(VMXERR_ENTRY_INVALID_HOST_STATE_FIELD); 7673 report_prefix_pop(); 7674 7675 vmcs_write(ENT_CONTROLS, entry_ctrl_saved | ENT_GUEST_64); 7676 vmcs_write(HOST_RIP, rip_saved); 7677 vmcs_write(HOST_CR4, cr4_saved); 7678 7679 /* 7680 * Restore host's active CR4 and RIP values by triggering a VM-Exit. 7681 * The original CR4 and RIP values in the VMCS are restored between 7682 * testcases as needed, but don't guarantee a VM-Exit and so the active 7683 * CR4 and RIP may still hold a test value. Running with the test CR4 7684 * and RIP values at some point is unavoidable, and the active values 7685 * are unlikely to affect VM-Enter, so the above doesn't force a VM-exit 7686 * between testcases. Note, if VM-Enter is surrounded by CALL+RET then 7687 * the active RIP will already be restored, but that's also not 7688 * guaranteed, and CR4 needs to be restored regardless. 7689 */ 7690 report_prefix_pushf("restore host state"); 7691 test_vmx_vmlaunch(0); 7692 report_prefix_pop(); 7693 } 7694 7695 /* 7696 * Check that the virtual CPU checks the VMX Host State Area as 7697 * documented in the Intel SDM. 7698 */ 7699 static void vmx_host_state_area_test(void) 7700 { 7701 /* 7702 * Bit 1 of the guest's RFLAGS must be 1, or VM-entry will 7703 * fail due to invalid guest state, should we make it that 7704 * far. 7705 */ 7706 vmcs_write(GUEST_RFLAGS, 0); 7707 7708 test_host_ctl_regs(); 7709 7710 test_canonical(HOST_SYSENTER_ESP, "HOST_SYSENTER_ESP", true); 7711 test_canonical(HOST_SYSENTER_EIP, "HOST_SYSENTER_EIP", true); 7712 7713 test_host_efer(); 7714 test_load_host_pat(); 7715 test_host_segment_regs(); 7716 test_host_desc_tables(); 7717 test_host_addr_size(); 7718 test_load_host_perf_global_ctrl(); 7719 } 7720 7721 /* 7722 * If the "load debug controls" VM-entry control is 1, bits 63:32 in 7723 * the DR7 field must be 0. 7724 * 7725 * [Intel SDM] 7726 */ 7727 static void test_guest_dr7(void) 7728 { 7729 u32 ent_saved = vmcs_read(ENT_CONTROLS); 7730 u64 dr7_saved = vmcs_read(GUEST_DR7); 7731 u64 val; 7732 int i; 7733 7734 if (ctrl_enter_rev.set & ENT_LOAD_DBGCTLS) { 7735 vmcs_clear_bits(ENT_CONTROLS, ENT_LOAD_DBGCTLS); 7736 for (i = 0; i < 64; i++) { 7737 val = 1ull << i; 7738 vmcs_write(GUEST_DR7, val); 7739 test_guest_state("ENT_LOAD_DBGCTLS disabled", false, 7740 val, "GUEST_DR7"); 7741 } 7742 } 7743 if (ctrl_enter_rev.clr & ENT_LOAD_DBGCTLS) { 7744 vmcs_set_bits(ENT_CONTROLS, ENT_LOAD_DBGCTLS); 7745 for (i = 0; i < 64; i++) { 7746 val = 1ull << i; 7747 vmcs_write(GUEST_DR7, val); 7748 test_guest_state("ENT_LOAD_DBGCTLS enabled", i >= 32, 7749 val, "GUEST_DR7"); 7750 } 7751 } 7752 vmcs_write(GUEST_DR7, dr7_saved); 7753 vmcs_write(ENT_CONTROLS, ent_saved); 7754 } 7755 7756 /* 7757 * If the "load IA32_PAT" VM-entry control is 1, the value of the field 7758 * for the IA32_PAT MSR must be one that could be written by WRMSR 7759 * without fault at CPL 0. Specifically, each of the 8 bytes in the 7760 * field must have one of the values 0 (UC), 1 (WC), 4 (WT), 5 (WP), 7761 * 6 (WB), or 7 (UC-). 7762 * 7763 * [Intel SDM] 7764 */ 7765 static void test_load_guest_pat(void) 7766 { 7767 /* 7768 * "load IA32_PAT" VM-entry control 7769 */ 7770 if (!(ctrl_enter_rev.clr & ENT_LOAD_PAT)) { 7771 report_skip("%s : \"Load-IA32-PAT\" entry control not supported", __func__); 7772 return; 7773 } 7774 7775 test_pat(GUEST_PAT, "GUEST_PAT", ENT_CONTROLS, ENT_LOAD_PAT); 7776 } 7777 7778 #define MSR_IA32_BNDCFGS_RSVD_MASK 0x00000ffc 7779 7780 /* 7781 * If the "load IA32_BNDCFGS" VM-entry control is 1, the following 7782 * checks are performed on the field for the IA32_BNDCFGS MSR: 7783 * 7784 * - Bits reserved in the IA32_BNDCFGS MSR must be 0. 7785 * - The linear address in bits 63:12 must be canonical. 7786 * 7787 * [Intel SDM] 7788 */ 7789 static void test_load_guest_bndcfgs(void) 7790 { 7791 u64 bndcfgs_saved = vmcs_read(GUEST_BNDCFGS); 7792 u64 bndcfgs; 7793 7794 if (!(ctrl_enter_rev.clr & ENT_LOAD_BNDCFGS)) { 7795 report_skip("%s : \"Load-IA32-BNDCFGS\" entry control not supported", __func__); 7796 return; 7797 } 7798 7799 vmcs_clear_bits(ENT_CONTROLS, ENT_LOAD_BNDCFGS); 7800 7801 vmcs_write(GUEST_BNDCFGS, NONCANONICAL); 7802 test_guest_state("ENT_LOAD_BNDCFGS disabled", false, 7803 GUEST_BNDCFGS, "GUEST_BNDCFGS"); 7804 bndcfgs = bndcfgs_saved | MSR_IA32_BNDCFGS_RSVD_MASK; 7805 vmcs_write(GUEST_BNDCFGS, bndcfgs); 7806 test_guest_state("ENT_LOAD_BNDCFGS disabled", false, 7807 GUEST_BNDCFGS, "GUEST_BNDCFGS"); 7808 7809 vmcs_set_bits(ENT_CONTROLS, ENT_LOAD_BNDCFGS); 7810 7811 vmcs_write(GUEST_BNDCFGS, NONCANONICAL); 7812 test_guest_state("ENT_LOAD_BNDCFGS enabled", true, 7813 GUEST_BNDCFGS, "GUEST_BNDCFGS"); 7814 bndcfgs = bndcfgs_saved | MSR_IA32_BNDCFGS_RSVD_MASK; 7815 vmcs_write(GUEST_BNDCFGS, bndcfgs); 7816 test_guest_state("ENT_LOAD_BNDCFGS enabled", true, 7817 GUEST_BNDCFGS, "GUEST_BNDCFGS"); 7818 7819 vmcs_write(GUEST_BNDCFGS, bndcfgs_saved); 7820 } 7821 7822 #define GUEST_SEG_UNUSABLE_MASK (1u << 16) 7823 #define GUEST_SEG_SEL_TI_MASK (1u << 2) 7824 7825 7826 #define TEST_SEGMENT_SEL(test, xfail, sel, val) \ 7827 do { \ 7828 vmcs_write(sel, val); \ 7829 test_guest_state(test " segment", xfail, val, xstr(sel)); \ 7830 } while (0) 7831 7832 #define TEST_INVALID_SEG_SEL(sel, val) \ 7833 TEST_SEGMENT_SEL("Invalid: " xstr(val), true, sel, val); 7834 7835 #define TEST_VALID_SEG_SEL(sel, val) \ 7836 TEST_SEGMENT_SEL("Valid: " xstr(val), false, sel, val); 7837 7838 /* 7839 * The following checks are done on the Selector field of the Guest Segment 7840 * Registers: 7841 * - TR. The TI flag (bit 2) must be 0. 7842 * - LDTR. If LDTR is usable, the TI flag (bit 2) must be 0. 7843 * - SS. If the guest will not be virtual-8086 and the "unrestricted 7844 * guest" VM-execution control is 0, the RPL (bits 1:0) must equal 7845 * the RPL of the selector field for CS. 7846 * 7847 * [Intel SDM] 7848 */ 7849 static void test_guest_segment_sel_fields(void) 7850 { 7851 u16 sel_saved; 7852 u32 ar_saved; 7853 u32 cpu_ctrl0_saved; 7854 u32 cpu_ctrl1_saved; 7855 u16 cs_rpl_bits; 7856 7857 /* 7858 * Test for GUEST_SEL_TR 7859 */ 7860 sel_saved = vmcs_read(GUEST_SEL_TR); 7861 TEST_INVALID_SEG_SEL(GUEST_SEL_TR, sel_saved | GUEST_SEG_SEL_TI_MASK); 7862 vmcs_write(GUEST_SEL_TR, sel_saved); 7863 7864 /* 7865 * Test for GUEST_SEL_LDTR 7866 */ 7867 sel_saved = vmcs_read(GUEST_SEL_LDTR); 7868 ar_saved = vmcs_read(GUEST_AR_LDTR); 7869 /* LDTR is set unusable */ 7870 vmcs_write(GUEST_AR_LDTR, ar_saved | GUEST_SEG_UNUSABLE_MASK); 7871 TEST_VALID_SEG_SEL(GUEST_SEL_LDTR, sel_saved | GUEST_SEG_SEL_TI_MASK); 7872 TEST_VALID_SEG_SEL(GUEST_SEL_LDTR, sel_saved & ~GUEST_SEG_SEL_TI_MASK); 7873 /* LDTR is set usable */ 7874 vmcs_write(GUEST_AR_LDTR, ar_saved & ~GUEST_SEG_UNUSABLE_MASK); 7875 TEST_INVALID_SEG_SEL(GUEST_SEL_LDTR, sel_saved | GUEST_SEG_SEL_TI_MASK); 7876 7877 TEST_VALID_SEG_SEL(GUEST_SEL_LDTR, sel_saved & ~GUEST_SEG_SEL_TI_MASK); 7878 7879 vmcs_write(GUEST_AR_LDTR, ar_saved); 7880 vmcs_write(GUEST_SEL_LDTR, sel_saved); 7881 7882 /* 7883 * Test for GUEST_SEL_SS 7884 */ 7885 cpu_ctrl0_saved = vmcs_read(CPU_EXEC_CTRL0); 7886 cpu_ctrl1_saved = vmcs_read(CPU_EXEC_CTRL1); 7887 ar_saved = vmcs_read(GUEST_AR_SS); 7888 /* Turn off "unrestricted guest" vm-execution control */ 7889 vmcs_write(CPU_EXEC_CTRL1, cpu_ctrl1_saved & ~CPU_URG); 7890 cs_rpl_bits = vmcs_read(GUEST_SEL_CS) & 0x3; 7891 sel_saved = vmcs_read(GUEST_SEL_SS); 7892 TEST_INVALID_SEG_SEL(GUEST_SEL_SS, ((sel_saved & ~0x3) | (~cs_rpl_bits & 0x3))); 7893 TEST_VALID_SEG_SEL(GUEST_SEL_SS, ((sel_saved & ~0x3) | (cs_rpl_bits & 0x3))); 7894 /* Make SS usable if it's unusable or vice-versa */ 7895 if (ar_saved & GUEST_SEG_UNUSABLE_MASK) 7896 vmcs_write(GUEST_AR_SS, ar_saved & ~GUEST_SEG_UNUSABLE_MASK); 7897 else 7898 vmcs_write(GUEST_AR_SS, ar_saved | GUEST_SEG_UNUSABLE_MASK); 7899 TEST_INVALID_SEG_SEL(GUEST_SEL_SS, ((sel_saved & ~0x3) | (~cs_rpl_bits & 0x3))); 7900 TEST_VALID_SEG_SEL(GUEST_SEL_SS, ((sel_saved & ~0x3) | (cs_rpl_bits & 0x3))); 7901 7902 /* Need a valid EPTP as the passing case fully enters the guest. */ 7903 if (enable_unrestricted_guest(true)) 7904 goto skip_ss_tests; 7905 7906 TEST_VALID_SEG_SEL(GUEST_SEL_SS, ((sel_saved & ~0x3) | (~cs_rpl_bits & 0x3))); 7907 TEST_VALID_SEG_SEL(GUEST_SEL_SS, ((sel_saved & ~0x3) | (cs_rpl_bits & 0x3))); 7908 7909 /* Make SS usable if it's unusable or vice-versa */ 7910 if (vmcs_read(GUEST_AR_SS) & GUEST_SEG_UNUSABLE_MASK) 7911 vmcs_write(GUEST_AR_SS, ar_saved & ~GUEST_SEG_UNUSABLE_MASK); 7912 else 7913 vmcs_write(GUEST_AR_SS, ar_saved | GUEST_SEG_UNUSABLE_MASK); 7914 TEST_VALID_SEG_SEL(GUEST_SEL_SS, ((sel_saved & ~0x3) | (~cs_rpl_bits & 0x3))); 7915 TEST_VALID_SEG_SEL(GUEST_SEL_SS, ((sel_saved & ~0x3) | (cs_rpl_bits & 0x3))); 7916 skip_ss_tests: 7917 7918 vmcs_write(GUEST_AR_SS, ar_saved); 7919 vmcs_write(GUEST_SEL_SS, sel_saved); 7920 vmcs_write(CPU_EXEC_CTRL0, cpu_ctrl0_saved); 7921 vmcs_write(CPU_EXEC_CTRL1, cpu_ctrl1_saved); 7922 } 7923 7924 #define TEST_SEGMENT_BASE_ADDR_UPPER_BITS(xfail, seg_base) \ 7925 do { \ 7926 addr_saved = vmcs_read(seg_base); \ 7927 for (i = 32; i < 63; i = i + 4) { \ 7928 addr = addr_saved | 1ull << i; \ 7929 vmcs_write(seg_base, addr); \ 7930 test_guest_state("seg.BASE[63:32] != 0, usable = " xstr(xfail), \ 7931 xfail, addr, xstr(seg_base)); \ 7932 } \ 7933 vmcs_write(seg_base, addr_saved); \ 7934 } while (0) 7935 7936 #define TEST_SEGMENT_BASE_ADDR_CANONICAL(xfail, seg_base) \ 7937 do { \ 7938 addr_saved = vmcs_read(seg_base); \ 7939 vmcs_write(seg_base, NONCANONICAL); \ 7940 test_guest_state("seg.BASE non-canonical, usable = " xstr(xfail), \ 7941 xfail, NONCANONICAL, xstr(seg_base)); \ 7942 vmcs_write(seg_base, addr_saved); \ 7943 } while (0) 7944 7945 /* 7946 * The following checks are done on the Base Address field of the Guest 7947 * Segment Registers on processors that support Intel 64 architecture: 7948 * - TR, FS, GS : The address must be canonical. 7949 * - LDTR : If LDTR is usable, the address must be canonical. 7950 * - CS : Bits 63:32 of the address must be zero. 7951 * - SS, DS, ES : If the register is usable, bits 63:32 of the address 7952 * must be zero. 7953 * 7954 * [Intel SDM] 7955 */ 7956 static void test_guest_segment_base_addr_fields(void) 7957 { 7958 u64 addr_saved; 7959 u64 addr; 7960 u32 ar_saved; 7961 int i; 7962 7963 /* 7964 * The address of TR, FS, GS and LDTR must be canonical. 7965 */ 7966 TEST_SEGMENT_BASE_ADDR_CANONICAL(true, GUEST_BASE_TR); 7967 TEST_SEGMENT_BASE_ADDR_CANONICAL(true, GUEST_BASE_FS); 7968 TEST_SEGMENT_BASE_ADDR_CANONICAL(true, GUEST_BASE_GS); 7969 ar_saved = vmcs_read(GUEST_AR_LDTR); 7970 /* Make LDTR unusable */ 7971 vmcs_write(GUEST_AR_LDTR, ar_saved | GUEST_SEG_UNUSABLE_MASK); 7972 TEST_SEGMENT_BASE_ADDR_CANONICAL(false, GUEST_BASE_LDTR); 7973 /* Make LDTR usable */ 7974 vmcs_write(GUEST_AR_LDTR, ar_saved & ~GUEST_SEG_UNUSABLE_MASK); 7975 TEST_SEGMENT_BASE_ADDR_CANONICAL(true, GUEST_BASE_LDTR); 7976 7977 vmcs_write(GUEST_AR_LDTR, ar_saved); 7978 7979 /* 7980 * Bits 63:32 in CS, SS, DS and ES base address must be zero 7981 */ 7982 TEST_SEGMENT_BASE_ADDR_UPPER_BITS(true, GUEST_BASE_CS); 7983 ar_saved = vmcs_read(GUEST_AR_SS); 7984 /* Make SS unusable */ 7985 vmcs_write(GUEST_AR_SS, ar_saved | GUEST_SEG_UNUSABLE_MASK); 7986 TEST_SEGMENT_BASE_ADDR_UPPER_BITS(false, GUEST_BASE_SS); 7987 /* Make SS usable */ 7988 vmcs_write(GUEST_AR_SS, ar_saved & ~GUEST_SEG_UNUSABLE_MASK); 7989 TEST_SEGMENT_BASE_ADDR_UPPER_BITS(true, GUEST_BASE_SS); 7990 vmcs_write(GUEST_AR_SS, ar_saved); 7991 7992 ar_saved = vmcs_read(GUEST_AR_DS); 7993 /* Make DS unusable */ 7994 vmcs_write(GUEST_AR_DS, ar_saved | GUEST_SEG_UNUSABLE_MASK); 7995 TEST_SEGMENT_BASE_ADDR_UPPER_BITS(false, GUEST_BASE_DS); 7996 /* Make DS usable */ 7997 vmcs_write(GUEST_AR_DS, ar_saved & ~GUEST_SEG_UNUSABLE_MASK); 7998 TEST_SEGMENT_BASE_ADDR_UPPER_BITS(true, GUEST_BASE_DS); 7999 vmcs_write(GUEST_AR_DS, ar_saved); 8000 8001 ar_saved = vmcs_read(GUEST_AR_ES); 8002 /* Make ES unusable */ 8003 vmcs_write(GUEST_AR_ES, ar_saved | GUEST_SEG_UNUSABLE_MASK); 8004 TEST_SEGMENT_BASE_ADDR_UPPER_BITS(false, GUEST_BASE_ES); 8005 /* Make ES usable */ 8006 vmcs_write(GUEST_AR_ES, ar_saved & ~GUEST_SEG_UNUSABLE_MASK); 8007 TEST_SEGMENT_BASE_ADDR_UPPER_BITS(true, GUEST_BASE_ES); 8008 vmcs_write(GUEST_AR_ES, ar_saved); 8009 } 8010 8011 /* 8012 * Check that the virtual CPU checks the VMX Guest State Area as 8013 * documented in the Intel SDM. 8014 */ 8015 static void vmx_guest_state_area_test(void) 8016 { 8017 vmx_set_test_stage(1); 8018 test_set_guest(guest_state_test_main); 8019 8020 /* 8021 * The IA32_SYSENTER_ESP field and the IA32_SYSENTER_EIP field 8022 * must each contain a canonical address. 8023 */ 8024 test_canonical(GUEST_SYSENTER_ESP, "GUEST_SYSENTER_ESP", false); 8025 test_canonical(GUEST_SYSENTER_EIP, "GUEST_SYSENTER_EIP", false); 8026 8027 test_guest_dr7(); 8028 test_load_guest_pat(); 8029 test_guest_efer(); 8030 test_load_guest_perf_global_ctrl(); 8031 test_load_guest_bndcfgs(); 8032 8033 test_guest_segment_sel_fields(); 8034 test_guest_segment_base_addr_fields(); 8035 8036 test_canonical(GUEST_BASE_GDTR, "GUEST_BASE_GDTR", false); 8037 test_canonical(GUEST_BASE_IDTR, "GUEST_BASE_IDTR", false); 8038 8039 u32 guest_desc_limit_saved = vmcs_read(GUEST_LIMIT_GDTR); 8040 int i; 8041 for (i = 16; i <= 31; i++) { 8042 u32 tmp = guest_desc_limit_saved | (1ull << i); 8043 vmcs_write(GUEST_LIMIT_GDTR, tmp); 8044 test_guest_state("GDT.limit > 0xffff", true, tmp, "GUEST_LIMIT_GDTR"); 8045 } 8046 vmcs_write(GUEST_LIMIT_GDTR, guest_desc_limit_saved); 8047 8048 guest_desc_limit_saved = vmcs_read(GUEST_LIMIT_IDTR); 8049 for (i = 16; i <= 31; i++) { 8050 u32 tmp = guest_desc_limit_saved | (1ull << i); 8051 vmcs_write(GUEST_LIMIT_IDTR, tmp); 8052 test_guest_state("IDT.limit > 0xffff", true, tmp, "GUEST_LIMIT_IDTR"); 8053 } 8054 vmcs_write(GUEST_LIMIT_IDTR, guest_desc_limit_saved); 8055 8056 /* 8057 * Let the guest finish execution 8058 */ 8059 vmx_set_test_stage(2); 8060 enter_guest(); 8061 } 8062 8063 extern void unrestricted_guest_main(void); 8064 asm (".code32\n" 8065 "unrestricted_guest_main:\n" 8066 "vmcall\n" 8067 "nop\n" 8068 "mov $1, %edi\n" 8069 "call hypercall\n" 8070 ".code64\n"); 8071 8072 static void setup_unrestricted_guest(void) 8073 { 8074 vmcs_write(GUEST_CR0, vmcs_read(GUEST_CR0) & ~(X86_CR0_PG)); 8075 vmcs_write(ENT_CONTROLS, vmcs_read(ENT_CONTROLS) & ~ENT_GUEST_64); 8076 vmcs_write(GUEST_EFER, vmcs_read(GUEST_EFER) & ~EFER_LMA); 8077 vmcs_write(GUEST_RIP, virt_to_phys(unrestricted_guest_main)); 8078 } 8079 8080 static void unsetup_unrestricted_guest(void) 8081 { 8082 vmcs_write(GUEST_CR0, vmcs_read(GUEST_CR0) | X86_CR0_PG); 8083 vmcs_write(ENT_CONTROLS, vmcs_read(ENT_CONTROLS) | ENT_GUEST_64); 8084 vmcs_write(GUEST_EFER, vmcs_read(GUEST_EFER) | EFER_LMA); 8085 vmcs_write(GUEST_RIP, (u64) phys_to_virt(vmcs_read(GUEST_RIP))); 8086 vmcs_write(GUEST_RSP, (u64) phys_to_virt(vmcs_read(GUEST_RSP))); 8087 } 8088 8089 /* 8090 * If "unrestricted guest" secondary VM-execution control is set, guests 8091 * can run in unpaged protected mode. 8092 */ 8093 static void vmentry_unrestricted_guest_test(void) 8094 { 8095 if (enable_unrestricted_guest(true)) { 8096 report_skip("%s: \"Unrestricted guest\" exec control not supported", __func__); 8097 return; 8098 } 8099 8100 test_set_guest(unrestricted_guest_main); 8101 setup_unrestricted_guest(); 8102 test_guest_state("Unrestricted guest test", false, CPU_URG, "CPU_URG"); 8103 8104 /* 8105 * Let the guest finish execution as a regular guest 8106 */ 8107 unsetup_unrestricted_guest(); 8108 vmcs_write(CPU_EXEC_CTRL1, vmcs_read(CPU_EXEC_CTRL1) & ~CPU_URG); 8109 enter_guest(); 8110 } 8111 8112 static bool valid_vmcs_for_vmentry(void) 8113 { 8114 struct vmcs *current_vmcs = NULL; 8115 8116 if (vmcs_save(¤t_vmcs)) 8117 return false; 8118 8119 return current_vmcs && !current_vmcs->hdr.shadow_vmcs; 8120 } 8121 8122 static void try_vmentry_in_movss_shadow(void) 8123 { 8124 u32 vm_inst_err; 8125 u32 flags; 8126 bool early_failure = false; 8127 u32 expected_flags = X86_EFLAGS_FIXED; 8128 bool valid_vmcs = valid_vmcs_for_vmentry(); 8129 8130 expected_flags |= valid_vmcs ? X86_EFLAGS_ZF : X86_EFLAGS_CF; 8131 8132 /* 8133 * Indirectly set VM_INST_ERR to 12 ("VMREAD/VMWRITE from/to 8134 * unsupported VMCS component"). 8135 */ 8136 vmcs_write(~0u, 0); 8137 8138 __asm__ __volatile__ ("mov %[host_rsp], %%edx;" 8139 "vmwrite %%rsp, %%rdx;" 8140 "mov 0f, %%rax;" 8141 "mov %[host_rip], %%edx;" 8142 "vmwrite %%rax, %%rdx;" 8143 "mov $-1, %%ah;" 8144 "sahf;" 8145 "mov %%ss, %%ax;" 8146 "mov %%ax, %%ss;" 8147 "vmlaunch;" 8148 "mov $1, %[early_failure];" 8149 "0: lahf;" 8150 "movzbl %%ah, %[flags]" 8151 : [early_failure] "+r" (early_failure), 8152 [flags] "=&a" (flags) 8153 : [host_rsp] "i" (HOST_RSP), 8154 [host_rip] "i" (HOST_RIP) 8155 : "rdx", "cc", "memory"); 8156 vm_inst_err = vmcs_read(VMX_INST_ERROR); 8157 8158 report(early_failure, "Early VM-entry failure"); 8159 report(flags == expected_flags, "RFLAGS[8:0] is %x (actual %x)", 8160 expected_flags, flags); 8161 if (valid_vmcs) 8162 report(vm_inst_err == VMXERR_ENTRY_EVENTS_BLOCKED_BY_MOV_SS, 8163 "VM-instruction error is %d (actual %d)", 8164 VMXERR_ENTRY_EVENTS_BLOCKED_BY_MOV_SS, vm_inst_err); 8165 } 8166 8167 static void vmentry_movss_shadow_test(void) 8168 { 8169 struct vmcs *orig_vmcs; 8170 8171 TEST_ASSERT(!vmcs_save(&orig_vmcs)); 8172 8173 /* 8174 * Set the launched flag on the current VMCS to verify the correct 8175 * error priority, below. 8176 */ 8177 test_set_guest(v2_null_test_guest); 8178 enter_guest(); 8179 8180 /* 8181 * With bit 1 of the guest's RFLAGS clear, VM-entry should 8182 * fail due to invalid guest state (if we make it that far). 8183 */ 8184 vmcs_write(GUEST_RFLAGS, 0); 8185 8186 /* 8187 * "VM entry with events blocked by MOV SS" takes precedence over 8188 * "VMLAUNCH with non-clear VMCS." 8189 */ 8190 report_prefix_push("valid current-VMCS"); 8191 try_vmentry_in_movss_shadow(); 8192 report_prefix_pop(); 8193 8194 /* 8195 * VMfailInvalid takes precedence over "VM entry with events 8196 * blocked by MOV SS." 8197 */ 8198 TEST_ASSERT(!vmcs_clear(orig_vmcs)); 8199 report_prefix_push("no current-VMCS"); 8200 try_vmentry_in_movss_shadow(); 8201 report_prefix_pop(); 8202 8203 TEST_ASSERT(!make_vmcs_current(orig_vmcs)); 8204 vmcs_write(GUEST_RFLAGS, X86_EFLAGS_FIXED); 8205 } 8206 8207 static void vmx_ldtr_test_guest(void) 8208 { 8209 u16 ldtr = sldt(); 8210 8211 report(ldtr == NP_SEL, "Expected %x for L2 LDTR selector (got %x)", 8212 NP_SEL, ldtr); 8213 } 8214 8215 /* 8216 * Ensure that the L1 LDTR is set to 0 on VM-exit. 8217 */ 8218 static void vmx_ldtr_test(void) 8219 { 8220 const u8 ldt_ar = 0x82; /* Present LDT */ 8221 u16 sel = FIRST_SPARE_SEL; 8222 8223 /* Set up a non-zero L1 LDTR prior to VM-entry. */ 8224 set_gdt_entry(sel, 0, 0, ldt_ar, 0); 8225 lldt(sel); 8226 8227 test_set_guest(vmx_ldtr_test_guest); 8228 /* 8229 * Set up a different LDTR for L2. The actual GDT contents are 8230 * irrelevant, since we stuff the hidden descriptor state 8231 * straight into the VMCS rather than reading it from the GDT. 8232 */ 8233 vmcs_write(GUEST_SEL_LDTR, NP_SEL); 8234 vmcs_write(GUEST_AR_LDTR, ldt_ar); 8235 enter_guest(); 8236 8237 /* 8238 * VM-exit should clear LDTR (and make it unusable, but we 8239 * won't verify that here). 8240 */ 8241 sel = sldt(); 8242 report(!sel, "Expected 0 for L1 LDTR selector (got %x)", sel); 8243 } 8244 8245 static void vmx_single_vmcall_guest(void) 8246 { 8247 vmcall(); 8248 } 8249 8250 static void vmx_cr_load_test(void) 8251 { 8252 unsigned long cr3, cr4, orig_cr3, orig_cr4; 8253 u32 ctrls[2] = {0}; 8254 pgd_t *pml5; 8255 8256 orig_cr4 = read_cr4(); 8257 orig_cr3 = read_cr3(); 8258 8259 if (!this_cpu_has(X86_FEATURE_PCID)) { 8260 report_skip("%s : PCID not detected", __func__); 8261 return; 8262 } 8263 if (!this_cpu_has(X86_FEATURE_MCE)) { 8264 report_skip("%s : MCE not detected", __func__); 8265 return; 8266 } 8267 8268 TEST_ASSERT(!(orig_cr3 & X86_CR3_PCID_MASK)); 8269 8270 /* Enable PCID for L1. */ 8271 cr4 = orig_cr4 | X86_CR4_PCIDE; 8272 cr3 = orig_cr3 | 0x1; 8273 TEST_ASSERT(!write_cr4_safe(cr4)); 8274 write_cr3(cr3); 8275 8276 test_set_guest(vmx_single_vmcall_guest); 8277 vmcs_write(HOST_CR4, cr4); 8278 vmcs_write(HOST_CR3, cr3); 8279 enter_guest(); 8280 8281 /* 8282 * No exception is expected. 8283 * 8284 * NB. KVM loads the last guest write to CR4 into CR4 read 8285 * shadow. In order to trigger an exit to KVM, we can toggle a 8286 * bit that is owned by KVM. We use CR4.MCE, which shall 8287 * have no side effect because normally no guest MCE (e.g., as the 8288 * result of bad memory) would happen during this test. 8289 */ 8290 TEST_ASSERT(!write_cr4_safe(cr4 ^ X86_CR4_MCE)); 8291 8292 /* Cleanup L1 state. */ 8293 write_cr3(orig_cr3); 8294 TEST_ASSERT(!write_cr4_safe(orig_cr4)); 8295 8296 if (!this_cpu_has(X86_FEATURE_LA57)) 8297 goto done; 8298 8299 /* 8300 * Allocate a full page for PML5 to guarantee alignment, though only 8301 * the first entry needs to be filled (the test's virtual addresses 8302 * most definitely do not have any of bits 56:48 set). 8303 */ 8304 pml5 = alloc_page(); 8305 *pml5 = orig_cr3 | PT_PRESENT_MASK | PT_WRITABLE_MASK; 8306 8307 /* 8308 * Transition to/from 5-level paging in the host via VM-Exit. CR4.LA57 8309 * can't be toggled while long is active via MOV CR4, but there are no 8310 * such restrictions on VM-Exit. 8311 */ 8312 lol_5level: 8313 vmcs_write(HOST_CR4, orig_cr4 | X86_CR4_LA57); 8314 vmcs_write(HOST_CR3, virt_to_phys(pml5)); 8315 enter_guest(); 8316 8317 /* 8318 * VMREAD with a memory operand to verify KVM detects the LA57 change, 8319 * e.g. uses the correct guest root level in gva_to_gpa(). 8320 */ 8321 TEST_ASSERT(vmcs_readm(HOST_CR3) == virt_to_phys(pml5)); 8322 TEST_ASSERT(vmcs_readm(HOST_CR4) == (orig_cr4 | X86_CR4_LA57)); 8323 8324 vmcs_write(HOST_CR4, orig_cr4); 8325 vmcs_write(HOST_CR3, orig_cr3); 8326 enter_guest(); 8327 8328 TEST_ASSERT(vmcs_readm(HOST_CR3) == orig_cr3); 8329 TEST_ASSERT(vmcs_readm(HOST_CR4) == orig_cr4); 8330 8331 /* 8332 * And now do the same LA57 shenanigans with EPT enabled. KVM uses 8333 * two separate MMUs when L1 uses TDP, whereas the above shadow paging 8334 * version shares an MMU between L1 and L2. 8335 * 8336 * If the saved execution controls are non-zero then the EPT version 8337 * has already run. In that case, restore the old controls. If EPT 8338 * setup fails, e.g. EPT isn't supported, fall through and finish up. 8339 */ 8340 if (ctrls[0]) { 8341 vmcs_write(CPU_EXEC_CTRL0, ctrls[0]); 8342 vmcs_write(CPU_EXEC_CTRL1, ctrls[1]); 8343 } else if (!setup_ept(false)) { 8344 ctrls[0] = vmcs_read(CPU_EXEC_CTRL0); 8345 ctrls[1] = vmcs_read(CPU_EXEC_CTRL1); 8346 goto lol_5level; 8347 } 8348 8349 free_page(pml5); 8350 8351 done: 8352 skip_exit_vmcall(); 8353 enter_guest(); 8354 } 8355 8356 static void vmx_cr4_osxsave_test_guest(void) 8357 { 8358 write_cr4(read_cr4() & ~X86_CR4_OSXSAVE); 8359 } 8360 8361 /* 8362 * Ensure that kvm recalculates the L1 guest's CPUID.01H:ECX.OSXSAVE 8363 * after VM-exit from an L2 guest that sets CR4.OSXSAVE to a different 8364 * value than in L1. 8365 */ 8366 static void vmx_cr4_osxsave_test(void) 8367 { 8368 if (!this_cpu_has(X86_FEATURE_XSAVE)) { 8369 report_skip("%s : XSAVE not detected", __func__); 8370 return; 8371 } 8372 8373 if (!(read_cr4() & X86_CR4_OSXSAVE)) { 8374 unsigned long cr4 = read_cr4() | X86_CR4_OSXSAVE; 8375 8376 write_cr4(cr4); 8377 vmcs_write(GUEST_CR4, cr4); 8378 vmcs_write(HOST_CR4, cr4); 8379 } 8380 8381 TEST_ASSERT(this_cpu_has(X86_FEATURE_OSXSAVE)); 8382 8383 test_set_guest(vmx_cr4_osxsave_test_guest); 8384 enter_guest(); 8385 8386 TEST_ASSERT(this_cpu_has(X86_FEATURE_OSXSAVE)); 8387 } 8388 8389 /* 8390 * FNOP with both CR0.TS and CR0.EM clear should not generate #NM, and the L2 8391 * guest should exit normally. 8392 */ 8393 static void vmx_no_nm_test(void) 8394 { 8395 test_set_guest(fnop); 8396 vmcs_write(GUEST_CR0, read_cr0() & ~(X86_CR0_TS | X86_CR0_EM)); 8397 enter_guest(); 8398 } 8399 8400 bool vmx_pending_event_ipi_fired; 8401 static void vmx_pending_event_ipi_isr(isr_regs_t *regs) 8402 { 8403 vmx_pending_event_ipi_fired = true; 8404 eoi(); 8405 } 8406 8407 bool vmx_pending_event_guest_run; 8408 static void vmx_pending_event_guest(void) 8409 { 8410 vmcall(); 8411 vmx_pending_event_guest_run = true; 8412 } 8413 8414 static void vmx_pending_event_test_core(bool guest_hlt) 8415 { 8416 int ipi_vector = 0xf1; 8417 8418 vmx_pending_event_ipi_fired = false; 8419 handle_irq(ipi_vector, vmx_pending_event_ipi_isr); 8420 8421 vmx_pending_event_guest_run = false; 8422 test_set_guest(vmx_pending_event_guest); 8423 8424 vmcs_set_bits(PIN_CONTROLS, PIN_EXTINT); 8425 8426 enter_guest(); 8427 skip_exit_vmcall(); 8428 8429 if (guest_hlt) 8430 vmcs_write(GUEST_ACTV_STATE, ACTV_HLT); 8431 8432 cli(); 8433 apic_icr_write(APIC_DEST_SELF | APIC_DEST_PHYSICAL | 8434 APIC_DM_FIXED | ipi_vector, 8435 0); 8436 8437 enter_guest(); 8438 8439 assert_exit_reason(VMX_EXTINT); 8440 report(!vmx_pending_event_guest_run, 8441 "Guest did not run before host received IPI"); 8442 8443 sti_nop_cli(); 8444 report(vmx_pending_event_ipi_fired, 8445 "Got pending interrupt after IRQ enabled"); 8446 8447 if (guest_hlt) 8448 vmcs_write(GUEST_ACTV_STATE, ACTV_ACTIVE); 8449 8450 enter_guest(); 8451 report(vmx_pending_event_guest_run, 8452 "Guest finished running when no interrupt"); 8453 } 8454 8455 static void vmx_pending_event_test(void) 8456 { 8457 vmx_pending_event_test_core(false); 8458 } 8459 8460 static void vmx_pending_event_hlt_test(void) 8461 { 8462 vmx_pending_event_test_core(true); 8463 } 8464 8465 static int vmx_window_test_db_count; 8466 8467 static void vmx_window_test_db_handler(struct ex_regs *regs) 8468 { 8469 vmx_window_test_db_count++; 8470 } 8471 8472 static void vmx_nmi_window_test_guest(void) 8473 { 8474 handle_exception(DB_VECTOR, vmx_window_test_db_handler); 8475 8476 asm volatile("vmcall\n\t" 8477 "nop\n\t"); 8478 8479 handle_exception(DB_VECTOR, NULL); 8480 } 8481 8482 static void verify_nmi_window_exit(u64 rip) 8483 { 8484 u32 exit_reason = vmcs_read(EXI_REASON); 8485 8486 report(exit_reason == VMX_NMI_WINDOW, 8487 "Exit reason (%d) is 'NMI window'", exit_reason); 8488 report(vmcs_read(GUEST_RIP) == rip, "RIP (%#lx) is %#lx", 8489 vmcs_read(GUEST_RIP), rip); 8490 vmcs_write(GUEST_ACTV_STATE, ACTV_ACTIVE); 8491 } 8492 8493 static void vmx_nmi_window_test(void) 8494 { 8495 u64 nop_addr; 8496 void *db_fault_addr = get_idt_addr(&boot_idt[DB_VECTOR]); 8497 8498 if (!(ctrl_pin_rev.clr & PIN_VIRT_NMI)) { 8499 report_skip("%s : \"Virtual NMIs\" exec control not supported", __func__); 8500 return; 8501 } 8502 8503 if (!(ctrl_cpu_rev[0].clr & CPU_NMI_WINDOW)) { 8504 report_skip("%s : \"NMI-window exiting\" exec control not supported", __func__); 8505 return; 8506 } 8507 8508 vmx_window_test_db_count = 0; 8509 8510 report_prefix_push("NMI-window"); 8511 test_set_guest(vmx_nmi_window_test_guest); 8512 vmcs_set_bits(PIN_CONTROLS, PIN_VIRT_NMI); 8513 enter_guest(); 8514 skip_exit_vmcall(); 8515 nop_addr = vmcs_read(GUEST_RIP); 8516 8517 /* 8518 * Ask for "NMI-window exiting," and expect an immediate VM-exit. 8519 * RIP will not advance. 8520 */ 8521 report_prefix_push("active, no blocking"); 8522 vmcs_set_bits(CPU_EXEC_CTRL0, CPU_NMI_WINDOW); 8523 enter_guest(); 8524 verify_nmi_window_exit(nop_addr); 8525 report_prefix_pop(); 8526 8527 /* 8528 * Ask for "NMI-window exiting" in a MOV-SS shadow, and expect 8529 * a VM-exit on the next instruction after the nop. (The nop 8530 * is one byte.) 8531 */ 8532 report_prefix_push("active, blocking by MOV-SS"); 8533 vmcs_write(GUEST_INTR_STATE, GUEST_INTR_STATE_MOVSS); 8534 enter_guest(); 8535 verify_nmi_window_exit(nop_addr + 1); 8536 report_prefix_pop(); 8537 8538 /* 8539 * Ask for "NMI-window exiting" (with event injection), and 8540 * expect a VM-exit after the event is injected. (RIP should 8541 * be at the address specified in the IDT entry for #DB.) 8542 */ 8543 report_prefix_push("active, no blocking, injecting #DB"); 8544 vmcs_write(ENT_INTR_INFO, 8545 INTR_INFO_VALID_MASK | INTR_TYPE_HARD_EXCEPTION | DB_VECTOR); 8546 enter_guest(); 8547 verify_nmi_window_exit((u64)db_fault_addr); 8548 report_prefix_pop(); 8549 8550 /* 8551 * Ask for "NMI-window exiting" with NMI blocking, and expect 8552 * a VM-exit after the next IRET (i.e. after the #DB handler 8553 * returns). So, RIP should be back at one byte past the nop. 8554 */ 8555 report_prefix_push("active, blocking by NMI"); 8556 vmcs_write(GUEST_INTR_STATE, GUEST_INTR_STATE_NMI); 8557 enter_guest(); 8558 verify_nmi_window_exit(nop_addr + 1); 8559 report(vmx_window_test_db_count == 1, 8560 "#DB handler executed once (actual %d times)", 8561 vmx_window_test_db_count); 8562 report_prefix_pop(); 8563 8564 if (!(rdmsr(MSR_IA32_VMX_MISC) & (1 << 6))) { 8565 report_skip("CPU does not support activity state HLT."); 8566 } else { 8567 /* 8568 * Ask for "NMI-window exiting" when entering activity 8569 * state HLT, and expect an immediate VM-exit. RIP is 8570 * still one byte past the nop. 8571 */ 8572 report_prefix_push("halted, no blocking"); 8573 vmcs_write(GUEST_ACTV_STATE, ACTV_HLT); 8574 enter_guest(); 8575 verify_nmi_window_exit(nop_addr + 1); 8576 report_prefix_pop(); 8577 8578 /* 8579 * Ask for "NMI-window exiting" when entering activity 8580 * state HLT (with event injection), and expect a 8581 * VM-exit after the event is injected. (RIP should be 8582 * at the address specified in the IDT entry for #DB.) 8583 */ 8584 report_prefix_push("halted, no blocking, injecting #DB"); 8585 vmcs_write(GUEST_ACTV_STATE, ACTV_HLT); 8586 vmcs_write(ENT_INTR_INFO, 8587 INTR_INFO_VALID_MASK | INTR_TYPE_HARD_EXCEPTION | 8588 DB_VECTOR); 8589 enter_guest(); 8590 verify_nmi_window_exit((u64)db_fault_addr); 8591 report_prefix_pop(); 8592 } 8593 8594 vmcs_clear_bits(CPU_EXEC_CTRL0, CPU_NMI_WINDOW); 8595 enter_guest(); 8596 report_prefix_pop(); 8597 } 8598 8599 static void vmx_intr_window_test_guest(void) 8600 { 8601 handle_exception(DB_VECTOR, vmx_window_test_db_handler); 8602 8603 /* 8604 * The two consecutive STIs are to ensure that only the first 8605 * one has a shadow. Note that NOP and STI are one byte 8606 * instructions. 8607 */ 8608 asm volatile("vmcall\n\t" 8609 "nop\n\t" 8610 "sti\n\t" 8611 "sti\n\t"); 8612 8613 handle_exception(DB_VECTOR, NULL); 8614 } 8615 8616 static void verify_intr_window_exit(u64 rip) 8617 { 8618 u32 exit_reason = vmcs_read(EXI_REASON); 8619 8620 report(exit_reason == VMX_INTR_WINDOW, 8621 "Exit reason (%d) is 'interrupt window'", exit_reason); 8622 report(vmcs_read(GUEST_RIP) == rip, "RIP (%#lx) is %#lx", 8623 vmcs_read(GUEST_RIP), rip); 8624 vmcs_write(GUEST_ACTV_STATE, ACTV_ACTIVE); 8625 } 8626 8627 static void vmx_intr_window_test(void) 8628 { 8629 u64 vmcall_addr; 8630 u64 nop_addr; 8631 unsigned int orig_db_gate_type; 8632 void *db_fault_addr = get_idt_addr(&boot_idt[DB_VECTOR]); 8633 8634 if (!(ctrl_cpu_rev[0].clr & CPU_INTR_WINDOW)) { 8635 report_skip("%s : \"Interrupt-window exiting\" exec control not supported", __func__); 8636 return; 8637 } 8638 8639 /* 8640 * Change the IDT entry for #DB from interrupt gate to trap gate, 8641 * so that it won't clear RFLAGS.IF. We don't want interrupts to 8642 * be disabled after vectoring a #DB. 8643 */ 8644 orig_db_gate_type = boot_idt[DB_VECTOR].type; 8645 boot_idt[DB_VECTOR].type = 15; 8646 8647 report_prefix_push("interrupt-window"); 8648 test_set_guest(vmx_intr_window_test_guest); 8649 enter_guest(); 8650 assert_exit_reason(VMX_VMCALL); 8651 vmcall_addr = vmcs_read(GUEST_RIP); 8652 8653 /* 8654 * Ask for "interrupt-window exiting" with RFLAGS.IF set and 8655 * no blocking; expect an immediate VM-exit. Note that we have 8656 * not advanced past the vmcall instruction yet, so RIP should 8657 * point to the vmcall instruction. 8658 */ 8659 report_prefix_push("active, no blocking, RFLAGS.IF=1"); 8660 vmcs_set_bits(CPU_EXEC_CTRL0, CPU_INTR_WINDOW); 8661 vmcs_write(GUEST_RFLAGS, X86_EFLAGS_FIXED | X86_EFLAGS_IF); 8662 enter_guest(); 8663 verify_intr_window_exit(vmcall_addr); 8664 report_prefix_pop(); 8665 8666 /* 8667 * Ask for "interrupt-window exiting" (with event injection) 8668 * with RFLAGS.IF set and no blocking; expect a VM-exit after 8669 * the event is injected. That is, RIP should should be at the 8670 * address specified in the IDT entry for #DB. 8671 */ 8672 report_prefix_push("active, no blocking, RFLAGS.IF=1, injecting #DB"); 8673 vmcs_write(ENT_INTR_INFO, 8674 INTR_INFO_VALID_MASK | INTR_TYPE_HARD_EXCEPTION | DB_VECTOR); 8675 vmcall_addr = vmcs_read(GUEST_RIP); 8676 enter_guest(); 8677 verify_intr_window_exit((u64)db_fault_addr); 8678 report_prefix_pop(); 8679 8680 /* 8681 * Let the L2 guest run through the IRET, back to the VMCALL. 8682 * We have to clear the "interrupt-window exiting" 8683 * VM-execution control, or it would just keep causing 8684 * VM-exits. Then, advance past the VMCALL and set the 8685 * "interrupt-window exiting" VM-execution control again. 8686 */ 8687 vmcs_clear_bits(CPU_EXEC_CTRL0, CPU_INTR_WINDOW); 8688 enter_guest(); 8689 skip_exit_vmcall(); 8690 nop_addr = vmcs_read(GUEST_RIP); 8691 vmcs_set_bits(CPU_EXEC_CTRL0, CPU_INTR_WINDOW); 8692 8693 /* 8694 * Ask for "interrupt-window exiting" in a MOV-SS shadow with 8695 * RFLAGS.IF set, and expect a VM-exit on the next 8696 * instruction. (NOP is one byte.) 8697 */ 8698 report_prefix_push("active, blocking by MOV-SS, RFLAGS.IF=1"); 8699 vmcs_write(GUEST_INTR_STATE, GUEST_INTR_STATE_MOVSS); 8700 enter_guest(); 8701 verify_intr_window_exit(nop_addr + 1); 8702 report_prefix_pop(); 8703 8704 /* 8705 * Back up to the NOP and ask for "interrupt-window exiting" 8706 * in an STI shadow with RFLAGS.IF set, and expect a VM-exit 8707 * on the next instruction. (NOP is one byte.) 8708 */ 8709 report_prefix_push("active, blocking by STI, RFLAGS.IF=1"); 8710 vmcs_write(GUEST_RIP, nop_addr); 8711 vmcs_write(GUEST_INTR_STATE, GUEST_INTR_STATE_STI); 8712 enter_guest(); 8713 verify_intr_window_exit(nop_addr + 1); 8714 report_prefix_pop(); 8715 8716 /* 8717 * Ask for "interrupt-window exiting" with RFLAGS.IF clear, 8718 * and expect a VM-exit on the instruction following the STI 8719 * shadow. Only the first STI (which is one byte past the NOP) 8720 * should have a shadow. The second STI (which is two bytes 8721 * past the NOP) has no shadow. Therefore, the interrupt 8722 * window opens at three bytes past the NOP. 8723 */ 8724 report_prefix_push("active, RFLAGS.IF = 0"); 8725 vmcs_write(GUEST_RFLAGS, X86_EFLAGS_FIXED); 8726 enter_guest(); 8727 verify_intr_window_exit(nop_addr + 3); 8728 report_prefix_pop(); 8729 8730 if (!(rdmsr(MSR_IA32_VMX_MISC) & (1 << 6))) { 8731 report_skip("CPU does not support activity state HLT."); 8732 } else { 8733 /* 8734 * Ask for "interrupt-window exiting" when entering 8735 * activity state HLT, and expect an immediate 8736 * VM-exit. RIP is still three bytes past the nop. 8737 */ 8738 report_prefix_push("halted, no blocking"); 8739 vmcs_write(GUEST_ACTV_STATE, ACTV_HLT); 8740 enter_guest(); 8741 verify_intr_window_exit(nop_addr + 3); 8742 report_prefix_pop(); 8743 8744 /* 8745 * Ask for "interrupt-window exiting" when entering 8746 * activity state HLT (with event injection), and 8747 * expect a VM-exit after the event is injected. That 8748 * is, RIP should should be at the address specified 8749 * in the IDT entry for #DB. 8750 */ 8751 report_prefix_push("halted, no blocking, injecting #DB"); 8752 vmcs_write(GUEST_ACTV_STATE, ACTV_HLT); 8753 vmcs_write(ENT_INTR_INFO, 8754 INTR_INFO_VALID_MASK | INTR_TYPE_HARD_EXCEPTION | 8755 DB_VECTOR); 8756 enter_guest(); 8757 verify_intr_window_exit((u64)db_fault_addr); 8758 report_prefix_pop(); 8759 } 8760 8761 boot_idt[DB_VECTOR].type = orig_db_gate_type; 8762 vmcs_clear_bits(CPU_EXEC_CTRL0, CPU_INTR_WINDOW); 8763 enter_guest(); 8764 report_prefix_pop(); 8765 } 8766 8767 #define GUEST_TSC_OFFSET (1u << 30) 8768 8769 static u64 guest_tsc; 8770 8771 static void vmx_store_tsc_test_guest(void) 8772 { 8773 guest_tsc = rdtsc(); 8774 } 8775 8776 /* 8777 * This test ensures that when IA32_TSC is in the VM-exit MSR-store 8778 * list, the value saved is not subject to the TSC offset that is 8779 * applied to RDTSC/RDTSCP/RDMSR(IA32_TSC) in guest execution. 8780 */ 8781 static void vmx_store_tsc_test(void) 8782 { 8783 struct vmx_msr_entry msr_entry = { .index = MSR_IA32_TSC }; 8784 u64 low, high; 8785 8786 if (!(ctrl_cpu_rev[0].clr & CPU_USE_TSC_OFFSET)) { 8787 report_skip("%s : \"Use TSC offsetting\" exec control not supported", __func__); 8788 return; 8789 } 8790 8791 test_set_guest(vmx_store_tsc_test_guest); 8792 8793 vmcs_set_bits(CPU_EXEC_CTRL0, CPU_USE_TSC_OFFSET); 8794 vmcs_write(EXI_MSR_ST_CNT, 1); 8795 vmcs_write(EXIT_MSR_ST_ADDR, virt_to_phys(&msr_entry)); 8796 vmcs_write(TSC_OFFSET, GUEST_TSC_OFFSET); 8797 8798 low = rdtsc(); 8799 enter_guest(); 8800 high = rdtsc(); 8801 8802 report(low + GUEST_TSC_OFFSET <= guest_tsc && 8803 guest_tsc <= high + GUEST_TSC_OFFSET, 8804 "RDTSC value in the guest (%lu) is in range [%lu, %lu]", 8805 guest_tsc, low + GUEST_TSC_OFFSET, high + GUEST_TSC_OFFSET); 8806 report(low <= msr_entry.value && msr_entry.value <= high, 8807 "IA32_TSC value saved in the VM-exit MSR-store list (%lu) is in range [%lu, %lu]", 8808 msr_entry.value, low, high); 8809 } 8810 8811 static void vmx_preemption_timer_zero_test_db_handler(struct ex_regs *regs) 8812 { 8813 } 8814 8815 static void vmx_preemption_timer_zero_test_guest(void) 8816 { 8817 while (vmx_get_test_stage() < 3) 8818 vmcall(); 8819 } 8820 8821 static void vmx_preemption_timer_zero_activate_preemption_timer(void) 8822 { 8823 vmcs_set_bits(PIN_CONTROLS, PIN_PREEMPT); 8824 vmcs_write(PREEMPT_TIMER_VALUE, 0); 8825 } 8826 8827 static void vmx_preemption_timer_zero_advance_past_vmcall(void) 8828 { 8829 vmcs_clear_bits(PIN_CONTROLS, PIN_PREEMPT); 8830 enter_guest(); 8831 skip_exit_vmcall(); 8832 } 8833 8834 static void vmx_preemption_timer_zero_inject_db(bool intercept_db) 8835 { 8836 vmx_preemption_timer_zero_activate_preemption_timer(); 8837 vmcs_write(ENT_INTR_INFO, INTR_INFO_VALID_MASK | 8838 INTR_TYPE_HARD_EXCEPTION | DB_VECTOR); 8839 vmcs_write(EXC_BITMAP, intercept_db ? 1 << DB_VECTOR : 0); 8840 enter_guest(); 8841 } 8842 8843 static void vmx_preemption_timer_zero_set_pending_dbg(u32 exception_bitmap) 8844 { 8845 vmx_preemption_timer_zero_activate_preemption_timer(); 8846 vmcs_write(GUEST_PENDING_DEBUG, PENDING_DBG_TRAP | DR6_TRAP1); 8847 vmcs_write(EXC_BITMAP, exception_bitmap); 8848 enter_guest(); 8849 } 8850 8851 static void vmx_preemption_timer_zero_expect_preempt_at_rip(u64 expected_rip) 8852 { 8853 u32 reason = (u32)vmcs_read(EXI_REASON); 8854 u64 guest_rip = vmcs_read(GUEST_RIP); 8855 8856 report(reason == VMX_PREEMPT && guest_rip == expected_rip, 8857 "Exit reason is 0x%x (expected 0x%x) and guest RIP is %lx (0x%lx expected).", 8858 reason, VMX_PREEMPT, guest_rip, expected_rip); 8859 } 8860 8861 /* 8862 * This test ensures that when the VMX preemption timer is zero at 8863 * VM-entry, a VM-exit occurs after any event injection and after any 8864 * pending debug exceptions are raised, but before execution of any 8865 * guest instructions. 8866 */ 8867 static void vmx_preemption_timer_zero_test(void) 8868 { 8869 u64 db_fault_address = (u64)get_idt_addr(&boot_idt[DB_VECTOR]); 8870 handler old_db; 8871 u32 reason; 8872 8873 if (!(ctrl_pin_rev.clr & PIN_PREEMPT)) { 8874 report_skip("%s : \"Activate VMX-preemption timer\" pin control not supported", __func__); 8875 return; 8876 } 8877 8878 /* 8879 * Install a custom #DB handler that doesn't abort. 8880 */ 8881 old_db = handle_exception(DB_VECTOR, 8882 vmx_preemption_timer_zero_test_db_handler); 8883 8884 test_set_guest(vmx_preemption_timer_zero_test_guest); 8885 8886 /* 8887 * VMX-preemption timer should fire after event injection. 8888 */ 8889 vmx_set_test_stage(0); 8890 vmx_preemption_timer_zero_inject_db(0); 8891 vmx_preemption_timer_zero_expect_preempt_at_rip(db_fault_address); 8892 vmx_preemption_timer_zero_advance_past_vmcall(); 8893 8894 /* 8895 * VMX-preemption timer should fire after event injection. 8896 * Exception bitmap is irrelevant, since you can't intercept 8897 * an event that you injected. 8898 */ 8899 vmx_set_test_stage(1); 8900 vmx_preemption_timer_zero_inject_db(true); 8901 vmx_preemption_timer_zero_expect_preempt_at_rip(db_fault_address); 8902 vmx_preemption_timer_zero_advance_past_vmcall(); 8903 8904 /* 8905 * VMX-preemption timer should fire after pending debug exceptions 8906 * have delivered a #DB trap. 8907 */ 8908 vmx_set_test_stage(2); 8909 vmx_preemption_timer_zero_set_pending_dbg(0); 8910 vmx_preemption_timer_zero_expect_preempt_at_rip(db_fault_address); 8911 vmx_preemption_timer_zero_advance_past_vmcall(); 8912 8913 /* 8914 * VMX-preemption timer would fire after pending debug exceptions 8915 * have delivered a #DB trap, but in this case, the #DB trap is 8916 * intercepted. 8917 */ 8918 vmx_set_test_stage(3); 8919 vmx_preemption_timer_zero_set_pending_dbg(1 << DB_VECTOR); 8920 reason = (u32)vmcs_read(EXI_REASON); 8921 report(reason == VMX_EXC_NMI, "Exit reason is 0x%x (expected 0x%x)", 8922 reason, VMX_EXC_NMI); 8923 8924 vmcs_clear_bits(PIN_CONTROLS, PIN_PREEMPT); 8925 enter_guest(); 8926 8927 handle_exception(DB_VECTOR, old_db); 8928 } 8929 8930 static u64 vmx_preemption_timer_tf_test_prev_rip; 8931 8932 static void vmx_preemption_timer_tf_test_db_handler(struct ex_regs *regs) 8933 { 8934 extern char vmx_preemption_timer_tf_test_endloop; 8935 8936 if (vmx_get_test_stage() == 2) { 8937 /* 8938 * Stage 2 means that we're done, one way or another. 8939 * Arrange for the iret to drop us out of the wbinvd 8940 * loop and stop single-stepping. 8941 */ 8942 regs->rip = (u64)&vmx_preemption_timer_tf_test_endloop; 8943 regs->rflags &= ~X86_EFLAGS_TF; 8944 } else if (regs->rip == vmx_preemption_timer_tf_test_prev_rip) { 8945 /* 8946 * The RIP should alternate between the wbinvd and the 8947 * jmp instruction in the code below. If we ever see 8948 * the same instruction twice in a row, that means a 8949 * single-step trap has been dropped. Let the 8950 * hypervisor know about the failure by executing a 8951 * VMCALL. 8952 */ 8953 vmcall(); 8954 } 8955 vmx_preemption_timer_tf_test_prev_rip = regs->rip; 8956 } 8957 8958 static void vmx_preemption_timer_tf_test_guest(void) 8959 { 8960 /* 8961 * The hypervisor doesn't intercept WBINVD, so the loop below 8962 * shouldn't be a problem--it's just two instructions 8963 * executing in VMX non-root mode. However, when the 8964 * hypervisor is running in a virtual environment, the parent 8965 * hypervisor might intercept WBINVD and emulate it. If the 8966 * parent hypervisor is broken, the single-step trap after the 8967 * WBINVD might be lost. 8968 */ 8969 asm volatile("vmcall\n\t" 8970 "0: wbinvd\n\t" 8971 "1: jmp 0b\n\t" 8972 "vmx_preemption_timer_tf_test_endloop:"); 8973 } 8974 8975 /* 8976 * Ensure that the delivery of a "VMX-preemption timer expired" 8977 * VM-exit doesn't disrupt single-stepping in the guest. Note that 8978 * passing this test doesn't ensure correctness, because the test will 8979 * only fail if the VMX-preemtion timer fires at the right time (or 8980 * the wrong time, as it were). 8981 */ 8982 static void vmx_preemption_timer_tf_test(void) 8983 { 8984 handler old_db; 8985 u32 reason; 8986 int i; 8987 8988 if (!(ctrl_pin_rev.clr & PIN_PREEMPT)) { 8989 report_skip("%s : \"Activate VMX-preemption timer\" pin control not supported", __func__); 8990 return; 8991 } 8992 8993 old_db = handle_exception(DB_VECTOR, 8994 vmx_preemption_timer_tf_test_db_handler); 8995 8996 test_set_guest(vmx_preemption_timer_tf_test_guest); 8997 8998 enter_guest(); 8999 skip_exit_vmcall(); 9000 9001 vmx_set_test_stage(1); 9002 vmcs_set_bits(PIN_CONTROLS, PIN_PREEMPT); 9003 vmcs_write(PREEMPT_TIMER_VALUE, 50000); 9004 vmcs_write(GUEST_RFLAGS, X86_EFLAGS_FIXED | X86_EFLAGS_TF); 9005 9006 /* 9007 * The only exit we should see is "VMX-preemption timer 9008 * expired." If we get a VMCALL exit, that means the #DB 9009 * handler has detected a missing single-step trap. It doesn't 9010 * matter where the guest RIP is when the VMX-preemption timer 9011 * expires (whether it's in the WBINVD loop or in the #DB 9012 * handler)--a single-step trap should never be discarded. 9013 */ 9014 for (i = 0; i < 10000; i++) { 9015 enter_guest(); 9016 reason = (u32)vmcs_read(EXI_REASON); 9017 if (reason == VMX_PREEMPT) 9018 continue; 9019 TEST_ASSERT(reason == VMX_VMCALL); 9020 skip_exit_insn(); 9021 break; 9022 } 9023 9024 report(reason == VMX_PREEMPT, "No single-step traps skipped"); 9025 9026 vmx_set_test_stage(2); 9027 vmcs_clear_bits(PIN_CONTROLS, PIN_PREEMPT); 9028 enter_guest(); 9029 9030 handle_exception(DB_VECTOR, old_db); 9031 } 9032 9033 #define VMX_PREEMPTION_TIMER_EXPIRY_CYCLES 1000000 9034 9035 static u64 vmx_preemption_timer_expiry_start; 9036 static u64 vmx_preemption_timer_expiry_finish; 9037 9038 static void vmx_preemption_timer_expiry_test_guest(void) 9039 { 9040 vmcall(); 9041 vmx_preemption_timer_expiry_start = fenced_rdtsc(); 9042 9043 while (vmx_get_test_stage() == 0) 9044 vmx_preemption_timer_expiry_finish = fenced_rdtsc(); 9045 } 9046 9047 /* 9048 * Test that the VMX-preemption timer is not excessively delayed. 9049 * 9050 * Per the SDM, volume 3, VM-entry starts the VMX-preemption timer 9051 * with the unsigned value in the VMX-preemption timer-value field, 9052 * and the VMX-preemption timer counts down by 1 every time bit X in 9053 * the TSC changes due to a TSC increment (where X is 9054 * IA32_VMX_MISC[4:0]). If the timer counts down to zero in any state 9055 * other than the wait-for-SIPI state, the logical processor 9056 * transitions to the C0 C-state and causes a VM-exit. 9057 * 9058 * The guest code above reads the starting TSC after VM-entry. At this 9059 * point, the VMX-preemption timer has already been activated. Next, 9060 * the guest code reads the current TSC in a loop, storing the value 9061 * read to memory. 9062 * 9063 * If the RDTSC in the loop reads a value past the VMX-preemption 9064 * timer deadline, then the VMX-preemption timer VM-exit must be 9065 * delivered before the next instruction retires. Even if a higher 9066 * priority SMI is delivered first, the VMX-preemption timer VM-exit 9067 * must be delivered before the next instruction retires. Hence, a TSC 9068 * value past the VMX-preemption timer deadline might be read, but it 9069 * cannot be stored. If a TSC value past the deadline *is* stored, 9070 * then the architectural specification has been violated. 9071 */ 9072 static void vmx_preemption_timer_expiry_test(void) 9073 { 9074 u32 preemption_timer_value; 9075 union vmx_misc misc; 9076 u64 tsc_deadline; 9077 u32 reason; 9078 9079 if (!(ctrl_pin_rev.clr & PIN_PREEMPT)) { 9080 report_skip("%s : \"Activate VMX-preemption timer\" pin control not supported", __func__); 9081 return; 9082 } 9083 9084 test_set_guest(vmx_preemption_timer_expiry_test_guest); 9085 9086 enter_guest(); 9087 skip_exit_vmcall(); 9088 9089 misc.val = rdmsr(MSR_IA32_VMX_MISC); 9090 preemption_timer_value = 9091 VMX_PREEMPTION_TIMER_EXPIRY_CYCLES >> misc.pt_bit; 9092 9093 vmcs_set_bits(PIN_CONTROLS, PIN_PREEMPT); 9094 vmcs_write(PREEMPT_TIMER_VALUE, preemption_timer_value); 9095 vmx_set_test_stage(0); 9096 9097 enter_guest(); 9098 reason = (u32)vmcs_read(EXI_REASON); 9099 TEST_ASSERT(reason == VMX_PREEMPT); 9100 9101 tsc_deadline = ((vmx_preemption_timer_expiry_start >> misc.pt_bit) << 9102 misc.pt_bit) + (preemption_timer_value << misc.pt_bit); 9103 9104 report(vmx_preemption_timer_expiry_finish < tsc_deadline, 9105 "Last stored guest TSC (%lu) < TSC deadline (%lu)", 9106 vmx_preemption_timer_expiry_finish, tsc_deadline); 9107 9108 vmcs_clear_bits(PIN_CONTROLS, PIN_PREEMPT); 9109 vmx_set_test_stage(1); 9110 enter_guest(); 9111 } 9112 9113 static void vmx_db_test_guest(void) 9114 { 9115 /* 9116 * For a hardware generated single-step #DB. 9117 */ 9118 asm volatile("vmcall;" 9119 "nop;" 9120 ".Lpost_nop:"); 9121 /* 9122 * ...in a MOVSS shadow, with pending debug exceptions. 9123 */ 9124 asm volatile("vmcall;" 9125 "nop;" 9126 ".Lpost_movss_nop:"); 9127 /* 9128 * For an L0 synthesized single-step #DB. (L0 intercepts WBINVD and 9129 * emulates it in software.) 9130 */ 9131 asm volatile("vmcall;" 9132 "wbinvd;" 9133 ".Lpost_wbinvd:"); 9134 /* 9135 * ...in a MOVSS shadow, with pending debug exceptions. 9136 */ 9137 asm volatile("vmcall;" 9138 "wbinvd;" 9139 ".Lpost_movss_wbinvd:"); 9140 /* 9141 * For a hardware generated single-step #DB in a transactional region. 9142 */ 9143 asm volatile("vmcall;" 9144 ".Lxbegin: xbegin .Lskip_rtm;" 9145 "xend;" 9146 ".Lskip_rtm:"); 9147 } 9148 9149 /* 9150 * Clear the pending debug exceptions and RFLAGS.TF and re-enter 9151 * L2. No #DB is delivered and L2 continues to the next point of 9152 * interest. 9153 */ 9154 static void dismiss_db(void) 9155 { 9156 vmcs_write(GUEST_PENDING_DEBUG, 0); 9157 vmcs_write(GUEST_RFLAGS, X86_EFLAGS_FIXED); 9158 enter_guest(); 9159 } 9160 9161 /* 9162 * Check a variety of VMCS fields relevant to an intercepted #DB exception. 9163 * Then throw away the #DB exception and resume L2. 9164 */ 9165 static void check_db_exit(bool xfail_qual, bool xfail_dr6, bool xfail_pdbg, 9166 void *expected_rip, u64 expected_exit_qual, 9167 u64 expected_dr6) 9168 { 9169 u32 reason = vmcs_read(EXI_REASON); 9170 u32 intr_info = vmcs_read(EXI_INTR_INFO); 9171 u64 exit_qual = vmcs_read(EXI_QUALIFICATION); 9172 u64 guest_rip = vmcs_read(GUEST_RIP); 9173 u64 guest_pending_dbg = vmcs_read(GUEST_PENDING_DEBUG); 9174 u64 dr6 = read_dr6(); 9175 const u32 expected_intr_info = INTR_INFO_VALID_MASK | 9176 INTR_TYPE_HARD_EXCEPTION | DB_VECTOR; 9177 9178 report(reason == VMX_EXC_NMI && intr_info == expected_intr_info, 9179 "Expected #DB VM-exit"); 9180 report((u64)expected_rip == guest_rip, "Expected RIP %p (actual %lx)", 9181 expected_rip, guest_rip); 9182 report_xfail(xfail_pdbg, 0 == guest_pending_dbg, 9183 "Expected pending debug exceptions 0 (actual %lx)", 9184 guest_pending_dbg); 9185 report_xfail(xfail_qual, expected_exit_qual == exit_qual, 9186 "Expected exit qualification %lx (actual %lx)", 9187 expected_exit_qual, exit_qual); 9188 report_xfail(xfail_dr6, expected_dr6 == dr6, 9189 "Expected DR6 %lx (actual %lx)", expected_dr6, dr6); 9190 dismiss_db(); 9191 } 9192 9193 /* 9194 * Assuming the guest has just exited on a VMCALL instruction, skip 9195 * over the vmcall, and set the guest's RFLAGS.TF in the VMCS. If 9196 * pending debug exceptions are non-zero, set the VMCS up as if the 9197 * previous instruction was a MOVSS that generated the indicated 9198 * pending debug exceptions. Then enter L2. 9199 */ 9200 static void single_step_guest(const char *test_name, u64 starting_dr6, 9201 u64 pending_debug_exceptions) 9202 { 9203 printf("\n%s\n", test_name); 9204 skip_exit_vmcall(); 9205 write_dr6(starting_dr6); 9206 vmcs_write(GUEST_RFLAGS, X86_EFLAGS_FIXED | X86_EFLAGS_TF); 9207 if (pending_debug_exceptions) { 9208 vmcs_write(GUEST_PENDING_DEBUG, pending_debug_exceptions); 9209 vmcs_write(GUEST_INTR_STATE, GUEST_INTR_STATE_MOVSS); 9210 } 9211 enter_guest(); 9212 } 9213 9214 /* 9215 * When L1 intercepts #DB, verify that a single-step trap clears 9216 * pending debug exceptions, populates the exit qualification field 9217 * properly, and that DR6 is not prematurely clobbered. In a 9218 * (simulated) MOVSS shadow, make sure that the pending debug 9219 * exception bits are properly accumulated into the exit qualification 9220 * field. 9221 */ 9222 static void vmx_db_test(void) 9223 { 9224 /* 9225 * We are going to set a few arbitrary bits in DR6 to verify that 9226 * (a) DR6 is not modified by an intercepted #DB, and 9227 * (b) stale bits in DR6 (DR6.BD, in particular) don't leak into 9228 * the exit qualification field for a subsequent #DB exception. 9229 */ 9230 const u64 starting_dr6 = DR6_ACTIVE_LOW | DR6_BS | DR6_TRAP3 | DR6_TRAP1; 9231 extern char post_nop asm(".Lpost_nop"); 9232 extern char post_movss_nop asm(".Lpost_movss_nop"); 9233 extern char post_wbinvd asm(".Lpost_wbinvd"); 9234 extern char post_movss_wbinvd asm(".Lpost_movss_wbinvd"); 9235 extern char xbegin asm(".Lxbegin"); 9236 extern char skip_rtm asm(".Lskip_rtm"); 9237 9238 /* 9239 * L1 wants to intercept #DB exceptions encountered in L2. 9240 */ 9241 vmcs_write(EXC_BITMAP, BIT(DB_VECTOR)); 9242 9243 /* 9244 * Start L2 and run it up to the first point of interest. 9245 */ 9246 test_set_guest(vmx_db_test_guest); 9247 enter_guest(); 9248 9249 /* 9250 * Hardware-delivered #DB trap for single-step sets the 9251 * standard that L0 has to follow for emulated instructions. 9252 */ 9253 single_step_guest("Hardware delivered single-step", starting_dr6, 0); 9254 check_db_exit(false, false, false, &post_nop, DR6_BS, starting_dr6); 9255 9256 /* 9257 * Hardware-delivered #DB trap for single-step in MOVSS shadow 9258 * also sets the standard that L0 has to follow for emulated 9259 * instructions. Here, we establish the VMCS pending debug 9260 * exceptions to indicate that the simulated MOVSS triggered a 9261 * data breakpoint as well as the single-step trap. 9262 */ 9263 single_step_guest("Hardware delivered single-step in MOVSS shadow", 9264 starting_dr6, DR6_BS | PENDING_DBG_TRAP | DR6_TRAP0); 9265 check_db_exit(false, false, false, &post_movss_nop, DR6_BS | DR6_TRAP0, 9266 starting_dr6); 9267 9268 /* 9269 * L0 synthesized #DB trap for single-step is buggy, because 9270 * kvm (a) clobbers DR6 too early, and (b) tries its best to 9271 * reconstitute the exit qualification from the prematurely 9272 * modified DR6, but fails miserably. 9273 */ 9274 single_step_guest("Software synthesized single-step", starting_dr6, 0); 9275 check_db_exit(false, false, false, &post_wbinvd, DR6_BS, starting_dr6); 9276 9277 /* 9278 * L0 synthesized #DB trap for single-step in MOVSS shadow is 9279 * even worse, because L0 also leaves the pending debug 9280 * exceptions in the VMCS instead of accumulating them into 9281 * the exit qualification field for the #DB exception. 9282 */ 9283 single_step_guest("Software synthesized single-step in MOVSS shadow", 9284 starting_dr6, DR6_BS | PENDING_DBG_TRAP | DR6_TRAP0); 9285 check_db_exit(true, false, true, &post_movss_wbinvd, DR6_BS | DR6_TRAP0, 9286 starting_dr6); 9287 9288 /* 9289 * Optional RTM test for hardware that supports RTM, to 9290 * demonstrate that the current volume 3 of the SDM 9291 * (325384-067US), table 27-1 is incorrect. Bit 16 of the exit 9292 * qualification for debug exceptions is not reserved. It is 9293 * set to 1 if a debug exception (#DB) or a breakpoint 9294 * exception (#BP) occurs inside an RTM region while advanced 9295 * debugging of RTM transactional regions is enabled. 9296 */ 9297 if (this_cpu_has(X86_FEATURE_RTM)) { 9298 vmcs_write(ENT_CONTROLS, 9299 vmcs_read(ENT_CONTROLS) | ENT_LOAD_DBGCTLS); 9300 /* 9301 * Set DR7.RTM[bit 11] and IA32_DEBUGCTL.RTM[bit 15] 9302 * in the guest to enable advanced debugging of RTM 9303 * transactional regions. 9304 */ 9305 vmcs_write(GUEST_DR7, BIT(11)); 9306 vmcs_write(GUEST_DEBUGCTL, BIT(15)); 9307 single_step_guest("Hardware delivered single-step in " 9308 "transactional region", starting_dr6, 0); 9309 check_db_exit(false, false, false, &xbegin, BIT(16), 9310 starting_dr6); 9311 } else { 9312 vmcs_write(GUEST_RIP, (u64)&skip_rtm); 9313 enter_guest(); 9314 } 9315 } 9316 9317 static void enable_vid(void) 9318 { 9319 void *virtual_apic_page; 9320 9321 assert(cpu_has_apicv()); 9322 9323 enable_x2apic(); 9324 disable_intercept_for_x2apic_msrs(); 9325 9326 virtual_apic_page = alloc_page(); 9327 vmcs_write(APIC_VIRT_ADDR, (u64)virtual_apic_page); 9328 9329 vmcs_set_bits(PIN_CONTROLS, PIN_EXTINT); 9330 9331 vmcs_write(EOI_EXIT_BITMAP0, 0x0); 9332 vmcs_write(EOI_EXIT_BITMAP1, 0x0); 9333 vmcs_write(EOI_EXIT_BITMAP2, 0x0); 9334 vmcs_write(EOI_EXIT_BITMAP3, 0x0); 9335 9336 vmcs_set_bits(CPU_EXEC_CTRL0, CPU_SECONDARY | CPU_TPR_SHADOW); 9337 vmcs_set_bits(CPU_EXEC_CTRL1, CPU_VINTD | CPU_VIRT_X2APIC); 9338 } 9339 9340 static void trigger_ioapic_scan_thread(void *data) 9341 { 9342 /* Wait until other CPU entered L2 */ 9343 while (vmx_get_test_stage() != 1) 9344 ; 9345 9346 /* Trigger ioapic scan */ 9347 ioapic_set_redir(0xf, 0x79, TRIGGER_LEVEL); 9348 vmx_set_test_stage(2); 9349 } 9350 9351 static void irq_79_handler_guest(isr_regs_t *regs) 9352 { 9353 eoi(); 9354 9355 /* L1 expects vmexit on VMX_VMCALL and not VMX_EOI_INDUCED */ 9356 vmcall(); 9357 } 9358 9359 /* 9360 * Constant for num of busy-loop iterations after which 9361 * a timer interrupt should have happened in host 9362 */ 9363 #define TIMER_INTERRUPT_DELAY 100000000 9364 9365 static void vmx_eoi_bitmap_ioapic_scan_test_guest(void) 9366 { 9367 handle_irq(0x79, irq_79_handler_guest); 9368 sti(); 9369 9370 /* Signal to L1 CPU to trigger ioapic scan */ 9371 vmx_set_test_stage(1); 9372 /* Wait until L1 CPU to trigger ioapic scan */ 9373 while (vmx_get_test_stage() != 2) 9374 ; 9375 9376 /* 9377 * Wait for L0 timer interrupt to be raised while we run in L2 9378 * such that L0 will process the IOAPIC scan request before 9379 * resuming L2 9380 */ 9381 delay(TIMER_INTERRUPT_DELAY); 9382 9383 asm volatile ("int $0x79"); 9384 } 9385 9386 static void vmx_eoi_bitmap_ioapic_scan_test(void) 9387 { 9388 if (!cpu_has_apicv() || (cpu_count() < 2)) { 9389 report_skip("%s : Not all required APICv bits supported or CPU count < 2", __func__); 9390 return; 9391 } 9392 9393 enable_vid(); 9394 9395 on_cpu_async(1, trigger_ioapic_scan_thread, NULL); 9396 test_set_guest(vmx_eoi_bitmap_ioapic_scan_test_guest); 9397 9398 /* 9399 * Launch L2. 9400 * We expect the exit reason to be VMX_VMCALL (and not EOI INDUCED). 9401 * In case the reason isn't VMX_VMCALL, the assertion inside 9402 * skip_exit_vmcall() will fail. 9403 */ 9404 enter_guest(); 9405 skip_exit_vmcall(); 9406 9407 /* Let L2 finish */ 9408 enter_guest(); 9409 report_pass(__func__); 9410 } 9411 9412 #define HLT_WITH_RVI_VECTOR (0xf1) 9413 9414 bool vmx_hlt_with_rvi_guest_isr_fired; 9415 static void vmx_hlt_with_rvi_guest_isr(isr_regs_t *regs) 9416 { 9417 vmx_hlt_with_rvi_guest_isr_fired = true; 9418 eoi(); 9419 } 9420 9421 static void vmx_hlt_with_rvi_guest(void) 9422 { 9423 handle_irq(HLT_WITH_RVI_VECTOR, vmx_hlt_with_rvi_guest_isr); 9424 9425 sti_nop(); 9426 asm volatile ("nop"); 9427 9428 vmcall(); 9429 } 9430 9431 static void vmx_hlt_with_rvi_test(void) 9432 { 9433 if (!cpu_has_apicv()) { 9434 report_skip("%s : Not all required APICv bits supported", __func__); 9435 return; 9436 } 9437 9438 enable_vid(); 9439 9440 vmx_hlt_with_rvi_guest_isr_fired = false; 9441 test_set_guest(vmx_hlt_with_rvi_guest); 9442 9443 enter_guest(); 9444 skip_exit_vmcall(); 9445 9446 vmcs_write(GUEST_ACTV_STATE, ACTV_HLT); 9447 vmcs_write(GUEST_INT_STATUS, HLT_WITH_RVI_VECTOR); 9448 enter_guest(); 9449 9450 report(vmx_hlt_with_rvi_guest_isr_fired, "Interrupt raised in guest"); 9451 } 9452 9453 static void set_irq_line_thread(void *data) 9454 { 9455 /* Wait until other CPU entered L2 */ 9456 while (vmx_get_test_stage() != 1) 9457 ; 9458 9459 /* Set irq-line 0xf to raise vector 0x78 for vCPU 0 */ 9460 ioapic_set_redir(0xf, 0x78, TRIGGER_LEVEL); 9461 vmx_set_test_stage(2); 9462 } 9463 9464 static bool irq_78_handler_vmcall_before_eoi; 9465 static void irq_78_handler_guest(isr_regs_t *regs) 9466 { 9467 set_irq_line(0xf, 0); 9468 if (irq_78_handler_vmcall_before_eoi) 9469 vmcall(); 9470 eoi(); 9471 vmcall(); 9472 } 9473 9474 static void vmx_apic_passthrough_guest(void) 9475 { 9476 handle_irq(0x78, irq_78_handler_guest); 9477 sti(); 9478 9479 /* If requested, wait for other CPU to trigger ioapic scan */ 9480 if (vmx_get_test_stage() < 1) { 9481 vmx_set_test_stage(1); 9482 while (vmx_get_test_stage() != 2) 9483 ; 9484 } 9485 9486 set_irq_line(0xf, 1); 9487 } 9488 9489 static void vmx_apic_passthrough(bool set_irq_line_from_thread) 9490 { 9491 if (set_irq_line_from_thread && (cpu_count() < 2)) { 9492 report_skip("%s : CPU count < 2", __func__); 9493 return; 9494 } 9495 9496 /* Test device is required for generating IRQs */ 9497 if (!test_device_enabled()) { 9498 report_skip("%s : No test device enabled", __func__); 9499 return; 9500 } 9501 u64 cpu_ctrl_0 = CPU_SECONDARY; 9502 u64 cpu_ctrl_1 = 0; 9503 9504 disable_intercept_for_x2apic_msrs(); 9505 9506 vmcs_write(PIN_CONTROLS, vmcs_read(PIN_CONTROLS) & ~PIN_EXTINT); 9507 9508 vmcs_write(CPU_EXEC_CTRL0, vmcs_read(CPU_EXEC_CTRL0) | cpu_ctrl_0); 9509 vmcs_write(CPU_EXEC_CTRL1, vmcs_read(CPU_EXEC_CTRL1) | cpu_ctrl_1); 9510 9511 if (set_irq_line_from_thread) { 9512 irq_78_handler_vmcall_before_eoi = false; 9513 on_cpu_async(1, set_irq_line_thread, NULL); 9514 } else { 9515 irq_78_handler_vmcall_before_eoi = true; 9516 ioapic_set_redir(0xf, 0x78, TRIGGER_LEVEL); 9517 vmx_set_test_stage(2); 9518 } 9519 test_set_guest(vmx_apic_passthrough_guest); 9520 9521 if (irq_78_handler_vmcall_before_eoi) { 9522 /* Before EOI remote_irr should still be set */ 9523 enter_guest(); 9524 skip_exit_vmcall(); 9525 TEST_ASSERT_EQ_MSG(1, (int)ioapic_read_redir(0xf).remote_irr, 9526 "IOAPIC pass-through: remote_irr=1 before EOI"); 9527 } 9528 9529 /* After EOI remote_irr should be cleared */ 9530 enter_guest(); 9531 skip_exit_vmcall(); 9532 TEST_ASSERT_EQ_MSG(0, (int)ioapic_read_redir(0xf).remote_irr, 9533 "IOAPIC pass-through: remote_irr=0 after EOI"); 9534 9535 /* Let L2 finish */ 9536 enter_guest(); 9537 report_pass(__func__); 9538 } 9539 9540 static void vmx_apic_passthrough_test(void) 9541 { 9542 vmx_apic_passthrough(false); 9543 } 9544 9545 static void vmx_apic_passthrough_thread_test(void) 9546 { 9547 vmx_apic_passthrough(true); 9548 } 9549 9550 static void vmx_apic_passthrough_tpr_threshold_guest(void) 9551 { 9552 cli(); 9553 apic_set_tpr(0); 9554 } 9555 9556 static bool vmx_apic_passthrough_tpr_threshold_ipi_isr_fired; 9557 static void vmx_apic_passthrough_tpr_threshold_ipi_isr(isr_regs_t *regs) 9558 { 9559 vmx_apic_passthrough_tpr_threshold_ipi_isr_fired = true; 9560 eoi(); 9561 } 9562 9563 static void vmx_apic_passthrough_tpr_threshold_test(void) 9564 { 9565 int ipi_vector = 0xe1; 9566 9567 disable_intercept_for_x2apic_msrs(); 9568 vmcs_clear_bits(PIN_CONTROLS, PIN_EXTINT); 9569 9570 /* Raise L0 TPR-threshold by queueing vector in LAPIC IRR */ 9571 cli(); 9572 apic_set_tpr((ipi_vector >> 4) + 1); 9573 apic_icr_write(APIC_DEST_SELF | APIC_DEST_PHYSICAL | 9574 APIC_DM_FIXED | ipi_vector, 9575 0); 9576 9577 test_set_guest(vmx_apic_passthrough_tpr_threshold_guest); 9578 enter_guest(); 9579 9580 report(apic_get_tpr() == 0, "TPR was zero by guest"); 9581 9582 /* Clean pending self-IPI */ 9583 vmx_apic_passthrough_tpr_threshold_ipi_isr_fired = false; 9584 handle_irq(ipi_vector, vmx_apic_passthrough_tpr_threshold_ipi_isr); 9585 sti_nop(); 9586 report(vmx_apic_passthrough_tpr_threshold_ipi_isr_fired, "self-IPI fired"); 9587 9588 report_pass(__func__); 9589 } 9590 9591 static u64 init_signal_test_exit_reason; 9592 static bool init_signal_test_thread_continued; 9593 9594 static void init_signal_test_thread(void *data) 9595 { 9596 struct vmcs *test_vmcs = data; 9597 9598 /* Enter VMX operation (i.e. exec VMXON) */ 9599 u64 *ap_vmxon_region = alloc_page(); 9600 enable_vmx(); 9601 init_vmx(ap_vmxon_region); 9602 TEST_ASSERT(!__vmxon_safe(ap_vmxon_region)); 9603 9604 /* Signal CPU have entered VMX operation */ 9605 vmx_set_test_stage(1); 9606 9607 /* Wait for BSP CPU to send INIT signal */ 9608 while (vmx_get_test_stage() != 2) 9609 ; 9610 9611 /* 9612 * Signal that we continue as usual as INIT signal 9613 * should be blocked while CPU is in VMX operation 9614 */ 9615 vmx_set_test_stage(3); 9616 9617 /* Wait for signal to enter VMX non-root mode */ 9618 while (vmx_get_test_stage() != 4) 9619 ; 9620 9621 /* Enter VMX non-root mode */ 9622 test_set_guest(v2_null_test_guest); 9623 make_vmcs_current(test_vmcs); 9624 enter_guest(); 9625 /* Save exit reason for BSP CPU to compare to expected result */ 9626 init_signal_test_exit_reason = vmcs_read(EXI_REASON); 9627 /* VMCLEAR test-vmcs so it could be loaded by BSP CPU */ 9628 vmcs_clear(test_vmcs); 9629 launched = false; 9630 /* Signal that CPU exited to VMX root mode */ 9631 vmx_set_test_stage(5); 9632 9633 /* Wait for BSP CPU to signal to exit VMX operation */ 9634 while (vmx_get_test_stage() != 6) 9635 ; 9636 9637 /* Exit VMX operation (i.e. exec VMXOFF) */ 9638 vmx_off(); 9639 9640 /* 9641 * Signal to BSP CPU that we continue as usual as INIT signal 9642 * should have been consumed by VMX_INIT exit from guest 9643 */ 9644 vmx_set_test_stage(7); 9645 9646 /* Wait for BSP CPU to signal to enter VMX operation */ 9647 while (vmx_get_test_stage() != 8) 9648 ; 9649 /* Enter VMX operation (i.e. exec VMXON) */ 9650 TEST_ASSERT(!__vmxon_safe(ap_vmxon_region)); 9651 /* Signal to BSP we are in VMX operation */ 9652 vmx_set_test_stage(9); 9653 9654 /* Wait for BSP CPU to send INIT signal */ 9655 while (vmx_get_test_stage() != 10) 9656 ; 9657 9658 /* Exit VMX operation (i.e. exec VMXOFF) */ 9659 vmx_off(); 9660 9661 /* 9662 * Exiting VMX operation should result in latched 9663 * INIT signal being processed. Therefore, we should 9664 * never reach the below code. Thus, signal to BSP 9665 * CPU if we have reached here so it is able to 9666 * report an issue if it happens. 9667 */ 9668 init_signal_test_thread_continued = true; 9669 } 9670 9671 #define INIT_SIGNAL_TEST_DELAY 100000000ULL 9672 9673 static void vmx_init_signal_test(void) 9674 { 9675 struct vmcs *test_vmcs; 9676 9677 if (cpu_count() < 2) { 9678 report_skip("%s : CPU count < 2", __func__); 9679 return; 9680 } 9681 9682 /* VMCLEAR test-vmcs so it could be loaded by other CPU */ 9683 vmcs_save(&test_vmcs); 9684 vmcs_clear(test_vmcs); 9685 9686 vmx_set_test_stage(0); 9687 on_cpu_async(1, init_signal_test_thread, test_vmcs); 9688 9689 /* Wait for other CPU to enter VMX operation */ 9690 while (vmx_get_test_stage() != 1) 9691 ; 9692 9693 /* Send INIT signal to other CPU */ 9694 apic_icr_write(APIC_DEST_PHYSICAL | APIC_DM_INIT | APIC_INT_ASSERT, 9695 id_map[1]); 9696 /* Signal other CPU we have sent INIT signal */ 9697 vmx_set_test_stage(2); 9698 9699 /* 9700 * Wait reasonable amount of time for INIT signal to 9701 * be received on other CPU and verify that other CPU 9702 * have proceed as usual to next test stage as INIT 9703 * signal should be blocked while other CPU in 9704 * VMX operation 9705 */ 9706 delay(INIT_SIGNAL_TEST_DELAY); 9707 report(vmx_get_test_stage() == 3, 9708 "INIT signal blocked when CPU in VMX operation"); 9709 /* No point to continue if we failed at this point */ 9710 if (vmx_get_test_stage() != 3) 9711 return; 9712 9713 /* Signal other CPU to enter VMX non-root mode */ 9714 init_signal_test_exit_reason = -1ull; 9715 vmx_set_test_stage(4); 9716 /* 9717 * Wait reasonable amount of time for other CPU 9718 * to exit to VMX root mode 9719 */ 9720 delay(INIT_SIGNAL_TEST_DELAY); 9721 if (vmx_get_test_stage() != 5) { 9722 report_fail("Pending INIT signal didn't result in VMX exit"); 9723 return; 9724 } 9725 report(init_signal_test_exit_reason == VMX_INIT, 9726 "INIT signal during VMX non-root mode result in exit-reason %s (%lu)", 9727 exit_reason_description(init_signal_test_exit_reason), 9728 init_signal_test_exit_reason); 9729 9730 /* Run guest to completion */ 9731 make_vmcs_current(test_vmcs); 9732 enter_guest(); 9733 9734 /* Signal other CPU to exit VMX operation */ 9735 init_signal_test_thread_continued = false; 9736 vmx_set_test_stage(6); 9737 9738 /* Wait reasonable amount of time for other CPU to exit VMX operation */ 9739 delay(INIT_SIGNAL_TEST_DELAY); 9740 report(vmx_get_test_stage() == 7, 9741 "INIT signal consumed on VMX_INIT exit"); 9742 /* No point to continue if we failed at this point */ 9743 if (vmx_get_test_stage() != 7) 9744 return; 9745 9746 /* Signal other CPU to enter VMX operation */ 9747 vmx_set_test_stage(8); 9748 /* Wait for other CPU to enter VMX operation */ 9749 while (vmx_get_test_stage() != 9) 9750 ; 9751 9752 /* Send INIT signal to other CPU */ 9753 apic_icr_write(APIC_DEST_PHYSICAL | APIC_DM_INIT | APIC_INT_ASSERT, 9754 id_map[1]); 9755 /* Signal other CPU we have sent INIT signal */ 9756 vmx_set_test_stage(10); 9757 9758 /* 9759 * Wait reasonable amount of time for other CPU 9760 * to exit VMX operation and process INIT signal 9761 */ 9762 delay(INIT_SIGNAL_TEST_DELAY); 9763 report(!init_signal_test_thread_continued, 9764 "INIT signal processed after exit VMX operation"); 9765 9766 /* 9767 * TODO: Send SIPI to other CPU to sipi_entry (See x86/cstart64.S) 9768 * to re-init it to kvm-unit-tests standard environment. 9769 * Somehow (?) verify that SIPI was indeed received. 9770 */ 9771 } 9772 9773 #define SIPI_SIGNAL_TEST_DELAY 100000000ULL 9774 9775 static void vmx_sipi_test_guest(void) 9776 { 9777 if (apic_id() == 0) { 9778 /* wait AP enter guest with activity=WAIT_SIPI */ 9779 while (vmx_get_test_stage() != 1) 9780 ; 9781 delay(SIPI_SIGNAL_TEST_DELAY); 9782 9783 /* First SIPI signal */ 9784 apic_icr_write(APIC_DEST_PHYSICAL | APIC_DM_STARTUP | APIC_INT_ASSERT, id_map[1]); 9785 report_pass("BSP(L2): Send first SIPI to cpu[%d]", id_map[1]); 9786 9787 /* wait AP enter guest */ 9788 while (vmx_get_test_stage() != 2) 9789 ; 9790 delay(SIPI_SIGNAL_TEST_DELAY); 9791 9792 /* Second SIPI signal should be ignored since AP is not in WAIT_SIPI state */ 9793 apic_icr_write(APIC_DEST_PHYSICAL | APIC_DM_STARTUP | APIC_INT_ASSERT, id_map[1]); 9794 report_pass("BSP(L2): Send second SIPI to cpu[%d]", id_map[1]); 9795 9796 /* Delay a while to check whether second SIPI would cause VMExit */ 9797 delay(SIPI_SIGNAL_TEST_DELAY); 9798 9799 /* Test is done, notify AP to exit test */ 9800 vmx_set_test_stage(3); 9801 9802 /* wait AP exit non-root mode */ 9803 while (vmx_get_test_stage() != 5) 9804 ; 9805 } else { 9806 /* wait BSP notify test is done */ 9807 while (vmx_get_test_stage() != 3) 9808 ; 9809 9810 /* AP exit guest */ 9811 vmx_set_test_stage(4); 9812 } 9813 } 9814 9815 static void sipi_test_ap_thread(void *data) 9816 { 9817 struct vmcs *ap_vmcs; 9818 u64 *ap_vmxon_region; 9819 void *ap_stack, *ap_syscall_stack; 9820 u64 cpu_ctrl_0 = CPU_SECONDARY; 9821 u64 cpu_ctrl_1 = 0; 9822 9823 /* Enter VMX operation (i.e. exec VMXON) */ 9824 ap_vmxon_region = alloc_page(); 9825 enable_vmx(); 9826 init_vmx(ap_vmxon_region); 9827 TEST_ASSERT(!__vmxon_safe(ap_vmxon_region)); 9828 init_vmcs(&ap_vmcs); 9829 make_vmcs_current(ap_vmcs); 9830 9831 /* Set stack for AP */ 9832 ap_stack = alloc_page(); 9833 ap_syscall_stack = alloc_page(); 9834 vmcs_write(GUEST_RSP, (u64)(ap_stack + PAGE_SIZE - 1)); 9835 vmcs_write(GUEST_SYSENTER_ESP, (u64)(ap_syscall_stack + PAGE_SIZE - 1)); 9836 9837 /* passthrough lapic to L2 */ 9838 disable_intercept_for_x2apic_msrs(); 9839 vmcs_write(PIN_CONTROLS, vmcs_read(PIN_CONTROLS) & ~PIN_EXTINT); 9840 vmcs_write(CPU_EXEC_CTRL0, vmcs_read(CPU_EXEC_CTRL0) | cpu_ctrl_0); 9841 vmcs_write(CPU_EXEC_CTRL1, vmcs_read(CPU_EXEC_CTRL1) | cpu_ctrl_1); 9842 9843 /* Set guest activity state to wait-for-SIPI state */ 9844 vmcs_write(GUEST_ACTV_STATE, ACTV_WAIT_SIPI); 9845 9846 vmx_set_test_stage(1); 9847 9848 /* AP enter guest */ 9849 enter_guest(); 9850 9851 if (vmcs_read(EXI_REASON) == VMX_SIPI) { 9852 report_pass("AP: Handle SIPI VMExit"); 9853 vmcs_write(GUEST_ACTV_STATE, ACTV_ACTIVE); 9854 vmx_set_test_stage(2); 9855 } else { 9856 report_fail("AP: Unexpected VMExit, reason=%ld", vmcs_read(EXI_REASON)); 9857 vmx_off(); 9858 return; 9859 } 9860 9861 /* AP enter guest */ 9862 enter_guest(); 9863 9864 report(vmcs_read(EXI_REASON) != VMX_SIPI, 9865 "AP: should no SIPI VMExit since activity is not in WAIT_SIPI state"); 9866 9867 /* notify BSP that AP is already exit from non-root mode */ 9868 vmx_set_test_stage(5); 9869 9870 /* Leave VMX operation */ 9871 vmx_off(); 9872 } 9873 9874 static void vmx_sipi_signal_test(void) 9875 { 9876 if (!(rdmsr(MSR_IA32_VMX_MISC) & MSR_IA32_VMX_MISC_ACTIVITY_WAIT_SIPI)) { 9877 report_skip("%s : \"ACTIVITY_WAIT_SIPI state\" not supported", __func__); 9878 return; 9879 } 9880 9881 if (cpu_count() < 2) { 9882 report_skip("%s : CPU count < 2", __func__); 9883 return; 9884 } 9885 9886 u64 cpu_ctrl_0 = CPU_SECONDARY; 9887 u64 cpu_ctrl_1 = 0; 9888 9889 /* passthrough lapic to L2 */ 9890 disable_intercept_for_x2apic_msrs(); 9891 vmcs_write(PIN_CONTROLS, vmcs_read(PIN_CONTROLS) & ~PIN_EXTINT); 9892 vmcs_write(CPU_EXEC_CTRL0, vmcs_read(CPU_EXEC_CTRL0) | cpu_ctrl_0); 9893 vmcs_write(CPU_EXEC_CTRL1, vmcs_read(CPU_EXEC_CTRL1) | cpu_ctrl_1); 9894 9895 test_set_guest(vmx_sipi_test_guest); 9896 9897 /* update CR3 on AP */ 9898 on_cpu(1, update_cr3, (void *)read_cr3()); 9899 9900 /* start AP */ 9901 on_cpu_async(1, sipi_test_ap_thread, NULL); 9902 9903 vmx_set_test_stage(0); 9904 9905 /* BSP enter guest */ 9906 enter_guest(); 9907 } 9908 9909 9910 enum vmcs_access { 9911 ACCESS_VMREAD, 9912 ACCESS_VMWRITE, 9913 ACCESS_NONE, 9914 }; 9915 9916 struct vmcs_shadow_test_common { 9917 enum vmcs_access op; 9918 enum Reason reason; 9919 u64 field; 9920 u64 value; 9921 u64 flags; 9922 u64 time; 9923 } l1_l2_common; 9924 9925 static inline u64 vmread_flags(u64 field, u64 *val) 9926 { 9927 u64 flags; 9928 9929 asm volatile ("vmread %2, %1; pushf; pop %0" 9930 : "=r" (flags), "=rm" (*val) : "r" (field) : "cc"); 9931 return flags & X86_EFLAGS_ALU; 9932 } 9933 9934 static inline u64 vmwrite_flags(u64 field, u64 val) 9935 { 9936 u64 flags; 9937 9938 asm volatile ("vmwrite %1, %2; pushf; pop %0" 9939 : "=r"(flags) : "rm" (val), "r" (field) : "cc"); 9940 return flags & X86_EFLAGS_ALU; 9941 } 9942 9943 static void vmx_vmcs_shadow_test_guest(void) 9944 { 9945 struct vmcs_shadow_test_common *c = &l1_l2_common; 9946 u64 start; 9947 9948 while (c->op != ACCESS_NONE) { 9949 start = rdtsc(); 9950 switch (c->op) { 9951 default: 9952 c->flags = -1ull; 9953 break; 9954 case ACCESS_VMREAD: 9955 c->flags = vmread_flags(c->field, &c->value); 9956 break; 9957 case ACCESS_VMWRITE: 9958 c->flags = vmwrite_flags(c->field, 0); 9959 break; 9960 } 9961 c->time = rdtsc() - start; 9962 vmcall(); 9963 } 9964 } 9965 9966 static u64 vmread_from_shadow(u64 field) 9967 { 9968 struct vmcs *primary; 9969 struct vmcs *shadow; 9970 u64 value; 9971 9972 TEST_ASSERT(!vmcs_save(&primary)); 9973 shadow = (struct vmcs *)vmcs_read(VMCS_LINK_PTR); 9974 TEST_ASSERT(!make_vmcs_current(shadow)); 9975 value = vmcs_read(field); 9976 TEST_ASSERT(!make_vmcs_current(primary)); 9977 return value; 9978 } 9979 9980 static u64 vmwrite_to_shadow(u64 field, u64 value) 9981 { 9982 struct vmcs *primary; 9983 struct vmcs *shadow; 9984 9985 TEST_ASSERT(!vmcs_save(&primary)); 9986 shadow = (struct vmcs *)vmcs_read(VMCS_LINK_PTR); 9987 TEST_ASSERT(!make_vmcs_current(shadow)); 9988 vmcs_write(field, value); 9989 value = vmcs_read(field); 9990 TEST_ASSERT(!make_vmcs_current(primary)); 9991 return value; 9992 } 9993 9994 static void vmcs_shadow_test_access(u8 *bitmap[2], enum vmcs_access access) 9995 { 9996 struct vmcs_shadow_test_common *c = &l1_l2_common; 9997 9998 c->op = access; 9999 vmcs_write(VMX_INST_ERROR, 0); 10000 enter_guest(); 10001 c->reason = vmcs_read(EXI_REASON) & 0xffff; 10002 if (c->reason != VMX_VMCALL) { 10003 skip_exit_insn(); 10004 enter_guest(); 10005 } 10006 skip_exit_vmcall(); 10007 } 10008 10009 static void vmcs_shadow_test_field(u8 *bitmap[2], u64 field) 10010 { 10011 struct vmcs_shadow_test_common *c = &l1_l2_common; 10012 struct vmcs *shadow; 10013 u64 value; 10014 uintptr_t flags[2]; 10015 bool good_shadow; 10016 u32 vmx_inst_error; 10017 10018 report_prefix_pushf("field %lx", field); 10019 c->field = field; 10020 10021 shadow = (struct vmcs *)vmcs_read(VMCS_LINK_PTR); 10022 if (shadow != (struct vmcs *)-1ull) { 10023 flags[ACCESS_VMREAD] = vmread_flags(field, &value); 10024 flags[ACCESS_VMWRITE] = vmwrite_flags(field, value); 10025 good_shadow = !flags[ACCESS_VMREAD] && !flags[ACCESS_VMWRITE]; 10026 } else { 10027 /* 10028 * When VMCS link pointer is -1ull, VMWRITE/VMREAD on 10029 * shadowed-fields should fail with setting RFLAGS.CF. 10030 */ 10031 flags[ACCESS_VMREAD] = X86_EFLAGS_CF; 10032 flags[ACCESS_VMWRITE] = X86_EFLAGS_CF; 10033 good_shadow = false; 10034 } 10035 10036 /* Intercept both VMREAD and VMWRITE. */ 10037 report_prefix_push("no VMREAD/VMWRITE permission"); 10038 /* VMWRITE/VMREAD done on reserved-bit should always intercept */ 10039 if (!(field >> VMCS_FIELD_RESERVED_SHIFT)) { 10040 set_bit(field, bitmap[ACCESS_VMREAD]); 10041 set_bit(field, bitmap[ACCESS_VMWRITE]); 10042 } 10043 vmcs_shadow_test_access(bitmap, ACCESS_VMWRITE); 10044 report(c->reason == VMX_VMWRITE, "not shadowed for VMWRITE"); 10045 vmcs_shadow_test_access(bitmap, ACCESS_VMREAD); 10046 report(c->reason == VMX_VMREAD, "not shadowed for VMREAD"); 10047 report_prefix_pop(); 10048 10049 if (field >> VMCS_FIELD_RESERVED_SHIFT) 10050 goto out; 10051 10052 /* Permit shadowed VMREAD. */ 10053 report_prefix_push("VMREAD permission only"); 10054 clear_bit(field, bitmap[ACCESS_VMREAD]); 10055 set_bit(field, bitmap[ACCESS_VMWRITE]); 10056 if (good_shadow) 10057 value = vmwrite_to_shadow(field, MAGIC_VAL_1 + field); 10058 vmcs_shadow_test_access(bitmap, ACCESS_VMWRITE); 10059 report(c->reason == VMX_VMWRITE, "not shadowed for VMWRITE"); 10060 vmcs_shadow_test_access(bitmap, ACCESS_VMREAD); 10061 vmx_inst_error = vmcs_read(VMX_INST_ERROR); 10062 report(c->reason == VMX_VMCALL, "shadowed for VMREAD (in %ld cycles)", 10063 c->time); 10064 report(c->flags == flags[ACCESS_VMREAD], 10065 "ALU flags after VMREAD (%lx) are as expected (%lx)", 10066 c->flags, flags[ACCESS_VMREAD]); 10067 if (good_shadow) 10068 report(c->value == value, 10069 "value read from shadow (%lx) is as expected (%lx)", 10070 c->value, value); 10071 else if (shadow != (struct vmcs *)-1ull && flags[ACCESS_VMREAD]) 10072 report(vmx_inst_error == VMXERR_UNSUPPORTED_VMCS_COMPONENT, 10073 "VMX_INST_ERROR (%d) is as expected (%d)", 10074 vmx_inst_error, VMXERR_UNSUPPORTED_VMCS_COMPONENT); 10075 report_prefix_pop(); 10076 10077 /* Permit shadowed VMWRITE. */ 10078 report_prefix_push("VMWRITE permission only"); 10079 set_bit(field, bitmap[ACCESS_VMREAD]); 10080 clear_bit(field, bitmap[ACCESS_VMWRITE]); 10081 if (good_shadow) 10082 vmwrite_to_shadow(field, MAGIC_VAL_1 + field); 10083 vmcs_shadow_test_access(bitmap, ACCESS_VMWRITE); 10084 vmx_inst_error = vmcs_read(VMX_INST_ERROR); 10085 report(c->reason == VMX_VMCALL, 10086 "shadowed for VMWRITE (in %ld cycles)", 10087 c->time); 10088 report(c->flags == flags[ACCESS_VMREAD], 10089 "ALU flags after VMWRITE (%lx) are as expected (%lx)", 10090 c->flags, flags[ACCESS_VMREAD]); 10091 if (good_shadow) { 10092 value = vmread_from_shadow(field); 10093 report(value == 0, 10094 "shadow VMCS value (%lx) is as expected (%lx)", value, 10095 0ul); 10096 } else if (shadow != (struct vmcs *)-1ull && flags[ACCESS_VMWRITE]) { 10097 report(vmx_inst_error == VMXERR_UNSUPPORTED_VMCS_COMPONENT, 10098 "VMX_INST_ERROR (%d) is as expected (%d)", 10099 vmx_inst_error, VMXERR_UNSUPPORTED_VMCS_COMPONENT); 10100 } 10101 vmcs_shadow_test_access(bitmap, ACCESS_VMREAD); 10102 report(c->reason == VMX_VMREAD, "not shadowed for VMREAD"); 10103 report_prefix_pop(); 10104 10105 /* Permit shadowed VMREAD and VMWRITE. */ 10106 report_prefix_push("VMREAD and VMWRITE permission"); 10107 clear_bit(field, bitmap[ACCESS_VMREAD]); 10108 clear_bit(field, bitmap[ACCESS_VMWRITE]); 10109 if (good_shadow) 10110 vmwrite_to_shadow(field, MAGIC_VAL_1 + field); 10111 vmcs_shadow_test_access(bitmap, ACCESS_VMWRITE); 10112 vmx_inst_error = vmcs_read(VMX_INST_ERROR); 10113 report(c->reason == VMX_VMCALL, 10114 "shadowed for VMWRITE (in %ld cycles)", 10115 c->time); 10116 report(c->flags == flags[ACCESS_VMREAD], 10117 "ALU flags after VMWRITE (%lx) are as expected (%lx)", 10118 c->flags, flags[ACCESS_VMREAD]); 10119 if (good_shadow) { 10120 value = vmread_from_shadow(field); 10121 report(value == 0, 10122 "shadow VMCS value (%lx) is as expected (%lx)", value, 10123 0ul); 10124 } else if (shadow != (struct vmcs *)-1ull && flags[ACCESS_VMWRITE]) { 10125 report(vmx_inst_error == VMXERR_UNSUPPORTED_VMCS_COMPONENT, 10126 "VMX_INST_ERROR (%d) is as expected (%d)", 10127 vmx_inst_error, VMXERR_UNSUPPORTED_VMCS_COMPONENT); 10128 } 10129 vmcs_shadow_test_access(bitmap, ACCESS_VMREAD); 10130 vmx_inst_error = vmcs_read(VMX_INST_ERROR); 10131 report(c->reason == VMX_VMCALL, "shadowed for VMREAD (in %ld cycles)", 10132 c->time); 10133 report(c->flags == flags[ACCESS_VMREAD], 10134 "ALU flags after VMREAD (%lx) are as expected (%lx)", 10135 c->flags, flags[ACCESS_VMREAD]); 10136 if (good_shadow) 10137 report(c->value == 0, 10138 "value read from shadow (%lx) is as expected (%lx)", 10139 c->value, 0ul); 10140 else if (shadow != (struct vmcs *)-1ull && flags[ACCESS_VMREAD]) 10141 report(vmx_inst_error == VMXERR_UNSUPPORTED_VMCS_COMPONENT, 10142 "VMX_INST_ERROR (%d) is as expected (%d)", 10143 vmx_inst_error, VMXERR_UNSUPPORTED_VMCS_COMPONENT); 10144 report_prefix_pop(); 10145 10146 out: 10147 report_prefix_pop(); 10148 } 10149 10150 static void vmx_vmcs_shadow_test_body(u8 *bitmap[2]) 10151 { 10152 unsigned base; 10153 unsigned index; 10154 unsigned bit; 10155 unsigned highest_index = rdmsr(MSR_IA32_VMX_VMCS_ENUM); 10156 10157 /* Run test on all possible valid VMCS fields */ 10158 for (base = 0; 10159 base < (1 << VMCS_FIELD_RESERVED_SHIFT); 10160 base += (1 << VMCS_FIELD_TYPE_SHIFT)) 10161 for (index = 0; index <= highest_index; index++) 10162 vmcs_shadow_test_field(bitmap, base + index); 10163 10164 /* 10165 * Run tests on some invalid VMCS fields 10166 * (Have reserved bit set). 10167 */ 10168 for (bit = VMCS_FIELD_RESERVED_SHIFT; bit < VMCS_FIELD_BIT_SIZE; bit++) 10169 vmcs_shadow_test_field(bitmap, (1ull << bit)); 10170 } 10171 10172 static void vmx_vmcs_shadow_test(void) 10173 { 10174 u8 *bitmap[2]; 10175 struct vmcs *shadow; 10176 10177 if (!(ctrl_cpu_rev[0].clr & CPU_SECONDARY)) { 10178 report_skip("%s : \"Activate secondary controls\" not supported", __func__); 10179 return; 10180 } 10181 10182 if (!(ctrl_cpu_rev[1].clr & CPU_SHADOW_VMCS)) { 10183 report_skip("%s : \"VMCS shadowing\" not supported", __func__); 10184 return; 10185 } 10186 10187 if (!(rdmsr(MSR_IA32_VMX_MISC) & 10188 MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS)) { 10189 report_skip("%s : VMWRITE can't modify VM-exit information fields.", __func__); 10190 return; 10191 } 10192 10193 test_set_guest(vmx_vmcs_shadow_test_guest); 10194 10195 bitmap[ACCESS_VMREAD] = alloc_page(); 10196 bitmap[ACCESS_VMWRITE] = alloc_page(); 10197 10198 vmcs_write(VMREAD_BITMAP, virt_to_phys(bitmap[ACCESS_VMREAD])); 10199 vmcs_write(VMWRITE_BITMAP, virt_to_phys(bitmap[ACCESS_VMWRITE])); 10200 10201 shadow = alloc_page(); 10202 shadow->hdr.revision_id = basic_msr.revision; 10203 shadow->hdr.shadow_vmcs = 1; 10204 TEST_ASSERT(!vmcs_clear(shadow)); 10205 10206 vmcs_clear_bits(CPU_EXEC_CTRL0, CPU_RDTSC); 10207 vmcs_set_bits(CPU_EXEC_CTRL0, CPU_SECONDARY); 10208 vmcs_set_bits(CPU_EXEC_CTRL1, CPU_SHADOW_VMCS); 10209 10210 vmcs_write(VMCS_LINK_PTR, virt_to_phys(shadow)); 10211 report_prefix_push("valid link pointer"); 10212 vmx_vmcs_shadow_test_body(bitmap); 10213 report_prefix_pop(); 10214 10215 vmcs_write(VMCS_LINK_PTR, -1ull); 10216 report_prefix_push("invalid link pointer"); 10217 vmx_vmcs_shadow_test_body(bitmap); 10218 report_prefix_pop(); 10219 10220 l1_l2_common.op = ACCESS_NONE; 10221 enter_guest(); 10222 } 10223 10224 /* 10225 * This test monitors the difference between a guest RDTSC instruction 10226 * and the IA32_TIME_STAMP_COUNTER MSR value stored in the VMCS12 10227 * VM-exit MSR-store list when taking a VM-exit on the instruction 10228 * following RDTSC. 10229 */ 10230 #define RDTSC_DIFF_ITERS 100000 10231 #define RDTSC_DIFF_FAILS 100 10232 #define HOST_CAPTURED_GUEST_TSC_DIFF_THRESHOLD 750 10233 10234 /* 10235 * Set 'use TSC offsetting' and set the guest offset to the 10236 * inverse of the host's current TSC value, so that the guest starts running 10237 * with an effective TSC value of 0. 10238 */ 10239 static void reset_guest_tsc_to_zero(void) 10240 { 10241 vmcs_set_bits(CPU_EXEC_CTRL0, CPU_USE_TSC_OFFSET); 10242 vmcs_write(TSC_OFFSET, -rdtsc()); 10243 } 10244 10245 static void rdtsc_vmexit_diff_test_guest(void) 10246 { 10247 int i; 10248 10249 for (i = 0; i < RDTSC_DIFF_ITERS; i++) 10250 /* Ensure rdtsc is the last instruction before the vmcall. */ 10251 asm volatile("rdtsc; vmcall" : : : "eax", "edx"); 10252 } 10253 10254 /* 10255 * This function only considers the "use TSC offsetting" VM-execution 10256 * control. It does not handle "use TSC scaling" (because the latter 10257 * isn't available to the host today.) 10258 */ 10259 static unsigned long long host_time_to_guest_time(unsigned long long t) 10260 { 10261 TEST_ASSERT(!(ctrl_cpu_rev[0].clr & CPU_SECONDARY) || 10262 !(vmcs_read(CPU_EXEC_CTRL1) & CPU_USE_TSC_SCALING)); 10263 10264 if (vmcs_read(CPU_EXEC_CTRL0) & CPU_USE_TSC_OFFSET) 10265 t += vmcs_read(TSC_OFFSET); 10266 10267 return t; 10268 } 10269 10270 static unsigned long long rdtsc_vmexit_diff_test_iteration(void) 10271 { 10272 unsigned long long guest_tsc, host_to_guest_tsc; 10273 10274 enter_guest(); 10275 skip_exit_vmcall(); 10276 guest_tsc = (u32) regs.rax + (regs.rdx << 32); 10277 host_to_guest_tsc = host_time_to_guest_time(exit_msr_store[0].value); 10278 10279 return host_to_guest_tsc - guest_tsc; 10280 } 10281 10282 static void rdtsc_vmexit_diff_test(void) 10283 { 10284 unsigned long long delta; 10285 int fail = 0; 10286 int i; 10287 10288 if (!(ctrl_cpu_rev[0].clr & CPU_USE_TSC_OFFSET)) 10289 test_skip("CPU doesn't support the 'use TSC offsetting' processor-based VM-execution control.\n"); 10290 10291 test_set_guest(rdtsc_vmexit_diff_test_guest); 10292 10293 reset_guest_tsc_to_zero(); 10294 10295 /* 10296 * Set up the VMCS12 VM-exit MSR-store list to store just one 10297 * MSR: IA32_TIME_STAMP_COUNTER. Note that the value stored is 10298 * in the host time domain (i.e., it is not adjusted according 10299 * to the TSC multiplier and TSC offset fields in the VMCS12, 10300 * as a guest RDTSC would be.) 10301 */ 10302 exit_msr_store = alloc_page(); 10303 exit_msr_store[0].index = MSR_IA32_TSC; 10304 vmcs_write(EXI_MSR_ST_CNT, 1); 10305 vmcs_write(EXIT_MSR_ST_ADDR, virt_to_phys(exit_msr_store)); 10306 10307 for (i = 0; i < RDTSC_DIFF_ITERS && fail < RDTSC_DIFF_FAILS; i++) { 10308 delta = rdtsc_vmexit_diff_test_iteration(); 10309 if (delta >= HOST_CAPTURED_GUEST_TSC_DIFF_THRESHOLD) 10310 fail++; 10311 } 10312 10313 enter_guest(); 10314 10315 report(fail < RDTSC_DIFF_FAILS, 10316 "RDTSC to VM-exit delta too high in %d of %d iterations, last = %llu", 10317 fail, i, delta); 10318 } 10319 10320 static int invalid_msr_init(struct vmcs *vmcs) 10321 { 10322 if (!(ctrl_pin_rev.clr & PIN_PREEMPT)) { 10323 printf("\tPreemption timer is not supported\n"); 10324 return VMX_TEST_EXIT; 10325 } 10326 vmcs_write(PIN_CONTROLS, vmcs_read(PIN_CONTROLS) | PIN_PREEMPT); 10327 preempt_val = 10000000; 10328 vmcs_write(PREEMPT_TIMER_VALUE, preempt_val); 10329 preempt_scale = rdmsr(MSR_IA32_VMX_MISC) & 0x1F; 10330 10331 if (!(ctrl_exit_rev.clr & EXI_SAVE_PREEMPT)) 10332 printf("\tSave preemption value is not supported\n"); 10333 10334 vmcs_write(ENT_MSR_LD_CNT, 1); 10335 vmcs_write(ENTER_MSR_LD_ADDR, (u64)0x13370000); 10336 10337 return VMX_TEST_START; 10338 } 10339 10340 10341 static void invalid_msr_main(void) 10342 { 10343 report_fail("Invalid MSR load"); 10344 } 10345 10346 static int invalid_msr_exit_handler(union exit_reason exit_reason) 10347 { 10348 report_fail("Invalid MSR load"); 10349 print_vmexit_info(exit_reason); 10350 return VMX_TEST_EXIT; 10351 } 10352 10353 static int invalid_msr_entry_failure(struct vmentry_result *result) 10354 { 10355 report(result->exit_reason.failed_vmentry && 10356 result->exit_reason.basic == VMX_FAIL_MSR, "Invalid MSR load"); 10357 return VMX_TEST_VMEXIT; 10358 } 10359 10360 /* 10361 * The max number of MSRs in an atomic switch MSR list is: 10362 * (111B + 1) * 512 = 4096 10363 * 10364 * Each list entry consumes: 10365 * 4-byte MSR index + 4 bytes reserved + 8-byte data = 16 bytes 10366 * 10367 * Allocate 128 kB to cover max_msr_list_size (i.e., 64 kB) and then some. 10368 */ 10369 static const u32 msr_list_page_order = 5; 10370 10371 static void atomic_switch_msr_limit_test_guest(void) 10372 { 10373 vmcall(); 10374 } 10375 10376 static void populate_msr_list(struct vmx_msr_entry *msr_list, 10377 size_t byte_capacity, int count) 10378 { 10379 int i; 10380 10381 for (i = 0; i < count; i++) { 10382 msr_list[i].index = MSR_IA32_TSC; 10383 msr_list[i].reserved = 0; 10384 msr_list[i].value = 0x1234567890abcdef; 10385 } 10386 10387 memset(msr_list + count, 0xff, 10388 byte_capacity - count * sizeof(*msr_list)); 10389 } 10390 10391 static int max_msr_list_size(void) 10392 { 10393 u32 vmx_misc = rdmsr(MSR_IA32_VMX_MISC); 10394 u32 factor = ((vmx_misc & GENMASK(27, 25)) >> 25) + 1; 10395 10396 return factor * 512; 10397 } 10398 10399 static void atomic_switch_msrs_test(int count) 10400 { 10401 struct vmx_msr_entry *vm_enter_load; 10402 struct vmx_msr_entry *vm_exit_load; 10403 struct vmx_msr_entry *vm_exit_store; 10404 int max_allowed = max_msr_list_size(); 10405 int byte_capacity = 1ul << (msr_list_page_order + PAGE_SHIFT); 10406 /* Exceeding the max MSR list size at exit triggers KVM to abort. */ 10407 int exit_count = count > max_allowed ? max_allowed : count; 10408 int cleanup_count = count > max_allowed ? 2 : 1; 10409 int i; 10410 10411 /* 10412 * Check for the IA32_TSC MSR, 10413 * available with the "TSC flag" and used to populate the MSR lists. 10414 */ 10415 if (!(cpuid(1).d & (1 << 4))) { 10416 report_skip("%s : \"Time Stamp Counter\" not supported", __func__); 10417 return; 10418 } 10419 10420 /* Set L2 guest. */ 10421 test_set_guest(atomic_switch_msr_limit_test_guest); 10422 10423 /* Setup atomic MSR switch lists. */ 10424 vm_enter_load = alloc_pages(msr_list_page_order); 10425 vm_exit_load = alloc_pages(msr_list_page_order); 10426 vm_exit_store = alloc_pages(msr_list_page_order); 10427 10428 vmcs_write(ENTER_MSR_LD_ADDR, (u64)vm_enter_load); 10429 vmcs_write(EXIT_MSR_LD_ADDR, (u64)vm_exit_load); 10430 vmcs_write(EXIT_MSR_ST_ADDR, (u64)vm_exit_store); 10431 10432 /* 10433 * VM-Enter should succeed up to the max number of MSRs per list, and 10434 * should not consume junk beyond the last entry. 10435 */ 10436 populate_msr_list(vm_enter_load, byte_capacity, count); 10437 populate_msr_list(vm_exit_load, byte_capacity, exit_count); 10438 populate_msr_list(vm_exit_store, byte_capacity, exit_count); 10439 10440 vmcs_write(ENT_MSR_LD_CNT, count); 10441 vmcs_write(EXI_MSR_LD_CNT, exit_count); 10442 vmcs_write(EXI_MSR_ST_CNT, exit_count); 10443 10444 if (count <= max_allowed) { 10445 enter_guest(); 10446 assert_exit_reason(VMX_VMCALL); 10447 skip_exit_vmcall(); 10448 } else { 10449 u32 exit_qual; 10450 10451 test_guest_state("Invalid MSR Load Count", true, count, 10452 "ENT_MSR_LD_CNT"); 10453 10454 exit_qual = vmcs_read(EXI_QUALIFICATION); 10455 report(exit_qual == max_allowed + 1, "exit_qual, %u, is %u.", 10456 exit_qual, max_allowed + 1); 10457 } 10458 10459 /* Cleanup. */ 10460 vmcs_write(ENT_MSR_LD_CNT, 0); 10461 vmcs_write(EXI_MSR_LD_CNT, 0); 10462 vmcs_write(EXI_MSR_ST_CNT, 0); 10463 for (i = 0; i < cleanup_count; i++) { 10464 enter_guest(); 10465 skip_exit_vmcall(); 10466 } 10467 free_pages_by_order(vm_enter_load, msr_list_page_order); 10468 free_pages_by_order(vm_exit_load, msr_list_page_order); 10469 free_pages_by_order(vm_exit_store, msr_list_page_order); 10470 } 10471 10472 static void atomic_switch_max_msrs_test(void) 10473 { 10474 atomic_switch_msrs_test(max_msr_list_size()); 10475 } 10476 10477 static void atomic_switch_overflow_msrs_test(void) 10478 { 10479 if (test_device_enabled()) 10480 atomic_switch_msrs_test(max_msr_list_size() + 1); 10481 else 10482 test_skip("Test is only supported on KVM"); 10483 } 10484 10485 static void vmx_pf_exception_test_guest(void) 10486 { 10487 ac_test_run(PT_LEVEL_PML4, false); 10488 } 10489 10490 static void vmx_pf_exception_forced_emulation_test_guest(void) 10491 { 10492 ac_test_run(PT_LEVEL_PML4, true); 10493 } 10494 10495 typedef void (*invalidate_tlb_t)(void *data); 10496 typedef void (*pf_exception_test_guest_t)(void); 10497 10498 10499 static void __vmx_pf_exception_test(invalidate_tlb_t inv_fn, void *data, 10500 pf_exception_test_guest_t guest_fn) 10501 { 10502 u64 efer; 10503 struct cpuid cpuid; 10504 10505 test_set_guest(guest_fn); 10506 10507 /* Intercept INVLPG when to perform TLB invalidation from L1 (this). */ 10508 if (inv_fn) 10509 vmcs_set_bits(CPU_EXEC_CTRL0, CPU_INVLPG); 10510 else 10511 vmcs_clear_bits(CPU_EXEC_CTRL0, CPU_INVLPG); 10512 10513 enter_guest(); 10514 10515 while (vmcs_read(EXI_REASON) != VMX_VMCALL) { 10516 switch (vmcs_read(EXI_REASON)) { 10517 case VMX_RDMSR: 10518 assert(regs.rcx == MSR_EFER); 10519 efer = vmcs_read(GUEST_EFER); 10520 regs.rdx = efer >> 32; 10521 regs.rax = efer & 0xffffffff; 10522 break; 10523 case VMX_WRMSR: 10524 assert(regs.rcx == MSR_EFER); 10525 efer = regs.rdx << 32 | (regs.rax & 0xffffffff); 10526 vmcs_write(GUEST_EFER, efer); 10527 break; 10528 case VMX_CPUID: 10529 cpuid = (struct cpuid) {0, 0, 0, 0}; 10530 cpuid = raw_cpuid(regs.rax, regs.rcx); 10531 regs.rax = cpuid.a; 10532 regs.rbx = cpuid.b; 10533 regs.rcx = cpuid.c; 10534 regs.rdx = cpuid.d; 10535 break; 10536 case VMX_INVLPG: 10537 inv_fn(data); 10538 break; 10539 default: 10540 assert_msg(false, 10541 "Unexpected exit to L1, exit_reason: %s (0x%lx)", 10542 exit_reason_description(vmcs_read(EXI_REASON)), 10543 vmcs_read(EXI_REASON)); 10544 } 10545 skip_exit_insn(); 10546 enter_guest(); 10547 } 10548 10549 assert_exit_reason(VMX_VMCALL); 10550 } 10551 10552 static void vmx_pf_exception_test(void) 10553 { 10554 __vmx_pf_exception_test(NULL, NULL, vmx_pf_exception_test_guest); 10555 } 10556 10557 static void vmx_pf_exception_forced_emulation_test(void) 10558 { 10559 __vmx_pf_exception_test(NULL, NULL, vmx_pf_exception_forced_emulation_test_guest); 10560 } 10561 10562 static void invalidate_tlb_no_vpid(void *data) 10563 { 10564 /* If VPID is disabled, the TLB is flushed on VM-Enter and VM-Exit. */ 10565 } 10566 10567 static void vmx_pf_no_vpid_test(void) 10568 { 10569 if (is_vpid_supported()) 10570 vmcs_clear_bits(CPU_EXEC_CTRL1, CPU_VPID); 10571 10572 __vmx_pf_exception_test(invalidate_tlb_no_vpid, NULL, 10573 vmx_pf_exception_test_guest); 10574 } 10575 10576 static void invalidate_tlb_invvpid_addr(void *data) 10577 { 10578 invvpid(INVVPID_ALL, *(u16 *)data, vmcs_read(EXI_QUALIFICATION)); 10579 } 10580 10581 static void invalidate_tlb_new_vpid(void *data) 10582 { 10583 u16 *vpid = data; 10584 10585 /* 10586 * Bump VPID to effectively flush L2's TLB from L0's perspective. 10587 * Invalidate all VPIDs when the VPID wraps to zero as hardware/KVM is 10588 * architecturally allowed to keep TLB entries indefinitely. 10589 */ 10590 ++(*vpid); 10591 if (*vpid == 0) { 10592 ++(*vpid); 10593 invvpid(INVVPID_ALL, 0, 0); 10594 } 10595 vmcs_write(VPID, *vpid); 10596 } 10597 10598 static void __vmx_pf_vpid_test(invalidate_tlb_t inv_fn, u16 vpid) 10599 { 10600 if (!is_vpid_supported()) 10601 test_skip("VPID unsupported"); 10602 10603 if (!is_invvpid_supported()) 10604 test_skip("INVVPID unsupported"); 10605 10606 vmcs_set_bits(CPU_EXEC_CTRL0, CPU_SECONDARY); 10607 vmcs_set_bits(CPU_EXEC_CTRL1, CPU_VPID); 10608 vmcs_write(VPID, vpid); 10609 10610 __vmx_pf_exception_test(inv_fn, &vpid, vmx_pf_exception_test_guest); 10611 } 10612 10613 static void vmx_pf_invvpid_test(void) 10614 { 10615 if (!is_invvpid_type_supported(INVVPID_ADDR)) 10616 test_skip("INVVPID ADDR unsupported"); 10617 10618 __vmx_pf_vpid_test(invalidate_tlb_invvpid_addr, 0xaaaa); 10619 } 10620 10621 static void vmx_pf_vpid_test(void) 10622 { 10623 /* Need INVVPID(ALL) to flush VPIDs upon wrap/reuse. */ 10624 if (!is_invvpid_type_supported(INVVPID_ALL)) 10625 test_skip("INVVPID ALL unsupported"); 10626 10627 __vmx_pf_vpid_test(invalidate_tlb_new_vpid, 1); 10628 } 10629 10630 static void vmx_l2_ac_test(void) 10631 { 10632 bool hit_ac = false; 10633 10634 write_cr0(read_cr0() | X86_CR0_AM); 10635 write_rflags(read_rflags() | X86_EFLAGS_AC); 10636 10637 run_in_user(generate_usermode_ac, AC_VECTOR, 0, 0, 0, 0, &hit_ac); 10638 report(hit_ac, "Usermode #AC handled in L2"); 10639 vmcall(); 10640 } 10641 10642 struct vmx_exception_test { 10643 u8 vector; 10644 void (*guest_code)(void); 10645 }; 10646 10647 struct vmx_exception_test vmx_exception_tests[] = { 10648 { GP_VECTOR, generate_non_canonical_gp }, 10649 { UD_VECTOR, generate_ud }, 10650 { DE_VECTOR, generate_de }, 10651 { DB_VECTOR, generate_single_step_db }, 10652 { BP_VECTOR, generate_bp }, 10653 { AC_VECTOR, vmx_l2_ac_test }, 10654 { OF_VECTOR, generate_of }, 10655 { NM_VECTOR, generate_cr0_ts_nm }, 10656 { NM_VECTOR, generate_cr0_em_nm }, 10657 }; 10658 10659 static u8 vmx_exception_test_vector; 10660 10661 static void vmx_exception_handler(struct ex_regs *regs) 10662 { 10663 report(regs->vector == vmx_exception_test_vector, 10664 "Handling %s in L2's exception handler", 10665 exception_mnemonic(vmx_exception_test_vector)); 10666 vmcall(); 10667 } 10668 10669 static void handle_exception_in_l2(u8 vector) 10670 { 10671 handler old_handler = handle_exception(vector, vmx_exception_handler); 10672 10673 vmx_exception_test_vector = vector; 10674 10675 enter_guest(); 10676 report(vmcs_read(EXI_REASON) == VMX_VMCALL, 10677 "%s handled by L2", exception_mnemonic(vector)); 10678 10679 handle_exception(vector, old_handler); 10680 } 10681 10682 static void handle_exception_in_l1(u32 vector) 10683 { 10684 u32 old_eb = vmcs_read(EXC_BITMAP); 10685 u32 intr_type; 10686 u32 intr_info; 10687 10688 vmcs_write(EXC_BITMAP, old_eb | (1u << vector)); 10689 10690 enter_guest(); 10691 10692 if (vector == BP_VECTOR || vector == OF_VECTOR) 10693 intr_type = VMX_INTR_TYPE_SOFT_EXCEPTION; 10694 else 10695 intr_type = VMX_INTR_TYPE_HARD_EXCEPTION; 10696 10697 intr_info = vmcs_read(EXI_INTR_INFO); 10698 report((vmcs_read(EXI_REASON) == VMX_EXC_NMI) && 10699 (intr_info & INTR_INFO_VALID_MASK) && 10700 (intr_info & INTR_INFO_VECTOR_MASK) == vector && 10701 ((intr_info & INTR_INFO_INTR_TYPE_MASK) >> INTR_INFO_INTR_TYPE_SHIFT) == intr_type, 10702 "%s correctly routed to L1", exception_mnemonic(vector)); 10703 10704 vmcs_write(EXC_BITMAP, old_eb); 10705 } 10706 10707 static void vmx_exception_test(void) 10708 { 10709 struct vmx_exception_test *t; 10710 int i; 10711 10712 for (i = 0; i < ARRAY_SIZE(vmx_exception_tests); i++) { 10713 t = &vmx_exception_tests[i]; 10714 10715 /* 10716 * Override the guest code before each run even though it's the 10717 * same code, the VMCS guest state needs to be reinitialized. 10718 */ 10719 test_override_guest(t->guest_code); 10720 handle_exception_in_l2(t->vector); 10721 10722 test_override_guest(t->guest_code); 10723 handle_exception_in_l1(t->vector); 10724 } 10725 10726 test_set_guest_finished(); 10727 } 10728 10729 enum Vid_op { 10730 VID_OP_SET_ISR, 10731 VID_OP_NOP, 10732 VID_OP_SET_CR8, 10733 VID_OP_SELF_IPI, 10734 VID_OP_TERMINATE, 10735 }; 10736 10737 struct vmx_basic_vid_test_guest_args { 10738 enum Vid_op op; 10739 u8 nr; 10740 u32 isr_exec_cnt; 10741 } vmx_basic_vid_test_guest_args; 10742 10743 /* 10744 * From the SDM, Bit x of the VIRR is 10745 * at bit position (x & 1FH) 10746 * at offset (200H | ((x & E0H) >> 1)). 10747 */ 10748 static void set_virr_bit(volatile u32 *virtual_apic_page, u8 nr) 10749 { 10750 u32 page_offset = (0x200 | ((nr & 0xE0) >> 1)) / sizeof(u32); 10751 u32 mask = 1 << (nr & 0x1f); 10752 10753 virtual_apic_page[page_offset] |= mask; 10754 } 10755 10756 static bool get_virr_bit(volatile u32 *virtual_apic_page, u8 nr) 10757 { 10758 u32 page_offset = (0x200 | ((nr & 0xE0) >> 1)) / sizeof(u32); 10759 u32 mask = 1 << (nr & 0x1f); 10760 10761 return virtual_apic_page[page_offset] & mask; 10762 } 10763 10764 static void vmx_vid_test_isr(isr_regs_t *regs) 10765 { 10766 volatile struct vmx_basic_vid_test_guest_args *args = 10767 &vmx_basic_vid_test_guest_args; 10768 10769 args->isr_exec_cnt++; 10770 barrier(); 10771 eoi(); 10772 } 10773 10774 static void vmx_basic_vid_test_guest(void) 10775 { 10776 volatile struct vmx_basic_vid_test_guest_args *args = 10777 &vmx_basic_vid_test_guest_args; 10778 10779 sti_nop(); 10780 for (;;) { 10781 enum Vid_op op = args->op; 10782 u8 nr = args->nr; 10783 10784 switch (op) { 10785 case VID_OP_TERMINATE: 10786 return; 10787 case VID_OP_SET_ISR: 10788 handle_irq(nr, vmx_vid_test_isr); 10789 break; 10790 case VID_OP_SET_CR8: 10791 write_cr8(nr); 10792 break; 10793 case VID_OP_SELF_IPI: 10794 vmx_x2apic_write(APIC_SELF_IPI, nr); 10795 break; 10796 default: 10797 break; 10798 } 10799 10800 vmcall(); 10801 } 10802 } 10803 10804 static void set_isrs_for_vmx_basic_vid_test(void) 10805 { 10806 volatile struct vmx_basic_vid_test_guest_args *args = 10807 &vmx_basic_vid_test_guest_args; 10808 u16 nr; 10809 10810 /* 10811 * kvm-unit-tests uses vector 32 for IPIs, so don't install a test ISR 10812 * for that vector. 10813 */ 10814 for (nr = 0x21; nr < 0x100; nr++) { 10815 vmcs_write(GUEST_INT_STATUS, 0); 10816 args->op = VID_OP_SET_ISR; 10817 args->nr = nr; 10818 args->isr_exec_cnt = 0; 10819 enter_guest(); 10820 skip_exit_vmcall(); 10821 } 10822 report(true, "Set ISR for vectors 33-255."); 10823 } 10824 10825 /* 10826 * Test virtual interrupt delivery (VID) at VM-entry or TPR virtualization 10827 * 10828 * Args: 10829 * nr: vector under test 10830 * tpr: task priority under test 10831 * tpr_virt: If true, then test VID during TPR virtualization. Otherwise, 10832 * test VID during VM-entry. 10833 */ 10834 static void test_basic_vid(u8 nr, u8 tpr, enum Vid_op op, u32 isr_exec_cnt_want, 10835 bool eoi_exit_induced) 10836 { 10837 volatile struct vmx_basic_vid_test_guest_args *args = 10838 &vmx_basic_vid_test_guest_args; 10839 u16 rvi_want = isr_exec_cnt_want ? 0 : nr; 10840 u16 int_status; 10841 10842 /* 10843 * From the SDM: 10844 * IF "interrupt-window exiting" is 0 AND 10845 * RVI[7:4] > VPPR[7:4] (see Section 29.1.1 for definition of VPPR) 10846 * THEN recognize a pending virtual interrupt; 10847 * ELSE 10848 * do not recognize a pending virtual interrupt; 10849 * FI; 10850 * 10851 * Thus, VPPR dictates whether a virtual interrupt is recognized. 10852 * However, PPR virtualization, which occurs before virtual interrupt 10853 * delivery, sets VPPR to VTPR, when SVI is 0. 10854 */ 10855 args->isr_exec_cnt = 0; 10856 args->op = op; 10857 switch (op) { 10858 case VID_OP_SELF_IPI: 10859 vmcs_write(GUEST_INT_STATUS, 0); 10860 args->nr = nr; 10861 set_vtpr(0); 10862 break; 10863 case VID_OP_SET_CR8: 10864 vmcs_write(GUEST_INT_STATUS, nr); 10865 args->nr = task_priority_class(tpr); 10866 set_vtpr(0xff); 10867 break; 10868 default: 10869 vmcs_write(GUEST_INT_STATUS, nr); 10870 set_vtpr(tpr); 10871 break; 10872 } 10873 10874 enter_guest(); 10875 if (eoi_exit_induced) { 10876 u32 exit_cnt; 10877 10878 assert_exit_reason(VMX_EOI_INDUCED); 10879 for (exit_cnt = 1; exit_cnt < isr_exec_cnt_want; exit_cnt++) { 10880 enter_guest(); 10881 assert_exit_reason(VMX_EOI_INDUCED); 10882 } 10883 enter_guest(); 10884 } 10885 skip_exit_vmcall(); 10886 TEST_ASSERT_EQ(args->isr_exec_cnt, isr_exec_cnt_want); 10887 int_status = vmcs_read(GUEST_INT_STATUS); 10888 TEST_ASSERT_EQ(int_status, rvi_want); 10889 } 10890 10891 /* 10892 * Test recognizing and delivering virtual interrupts via "Virtual-interrupt 10893 * delivery" for two scenarios: 10894 * 1. When there is a pending interrupt at VM-entry. 10895 * 2. When there is a pending interrupt during TPR virtualization. 10896 */ 10897 static void vmx_basic_vid_test(void) 10898 { 10899 volatile struct vmx_basic_vid_test_guest_args *args = 10900 &vmx_basic_vid_test_guest_args; 10901 u8 nr_class; 10902 10903 if (!cpu_has_apicv()) { 10904 report_skip("%s : Not all required APICv bits supported", __func__); 10905 return; 10906 } 10907 10908 enable_vid(); 10909 test_set_guest(vmx_basic_vid_test_guest); 10910 set_isrs_for_vmx_basic_vid_test(); 10911 10912 for (nr_class = 2; nr_class < 16; nr_class++) { 10913 u16 nr; 10914 u8 nr_sub_class; 10915 10916 for (nr_sub_class = 0; nr_sub_class < 16; nr_sub_class++) { 10917 u16 tpr; 10918 10919 nr = (nr_class << 4) | nr_sub_class; 10920 10921 /* 10922 * Don't test the reserved IPI vector, as the test ISR 10923 * was not installed. 10924 */ 10925 if (nr == 0x20) 10926 continue; 10927 10928 test_basic_vid(nr, /*tpr=*/0, VID_OP_SELF_IPI, 10929 /*isr_exec_cnt_want=*/1, 10930 /*eoi_exit_induced=*/false); 10931 for (tpr = 0; tpr < 256; tpr++) { 10932 u32 isr_exec_cnt_want = 10933 task_priority_class(nr) > 10934 task_priority_class(tpr) ? 1 : 0; 10935 10936 test_basic_vid(nr, tpr, VID_OP_NOP, 10937 isr_exec_cnt_want, 10938 /*eoi_exit_induced=*/false); 10939 test_basic_vid(nr, tpr, VID_OP_SET_CR8, 10940 isr_exec_cnt_want, 10941 /*eoi_exit_induced=*/false); 10942 } 10943 report(true, "TPR 0-255 for vector 0x%x.", nr); 10944 } 10945 } 10946 10947 /* Terminate the guest */ 10948 args->op = VID_OP_TERMINATE; 10949 enter_guest(); 10950 assert_exit_reason(VMX_VMCALL); 10951 } 10952 10953 static void test_eoi_virt(u8 nr, u8 lo_pri_nr, bool eoi_exit_induced) 10954 { 10955 u32 *virtual_apic_page = get_vapic_page(); 10956 10957 set_virr_bit(virtual_apic_page, lo_pri_nr); 10958 test_basic_vid(nr, /*tpr=*/0, VID_OP_NOP, /*isr_exec_cnt_want=*/2, 10959 eoi_exit_induced); 10960 TEST_ASSERT(!get_virr_bit(virtual_apic_page, lo_pri_nr)); 10961 TEST_ASSERT(!get_virr_bit(virtual_apic_page, nr)); 10962 } 10963 10964 static void vmx_eoi_virt_test(void) 10965 { 10966 volatile struct vmx_basic_vid_test_guest_args *args = 10967 &vmx_basic_vid_test_guest_args; 10968 u16 nr; 10969 u16 lo_pri_nr; 10970 10971 if (!cpu_has_apicv()) { 10972 report_skip("%s : Not all required APICv bits supported", __func__); 10973 return; 10974 } 10975 10976 enable_vid(); /* Note, enable_vid sets APIC_VIRT_ADDR field in VMCS. */ 10977 test_set_guest(vmx_basic_vid_test_guest); 10978 set_isrs_for_vmx_basic_vid_test(); 10979 10980 /* Now test EOI virtualization without induced EOI exits. */ 10981 for (nr = 0x22; nr < 0x100; nr++) { 10982 for (lo_pri_nr = 0x21; lo_pri_nr < nr; lo_pri_nr++) 10983 test_eoi_virt(nr, lo_pri_nr, 10984 /*eoi_exit_induced=*/false); 10985 10986 report(true, "Low priority nrs 0x21-0x%x for nr 0x%x.", 10987 nr - 1, nr); 10988 } 10989 10990 /* Finally, test EOI virtualization with induced EOI exits. */ 10991 vmcs_write(EOI_EXIT_BITMAP0, GENMASK_ULL(63, 0)); 10992 vmcs_write(EOI_EXIT_BITMAP1, GENMASK_ULL(63, 0)); 10993 vmcs_write(EOI_EXIT_BITMAP2, GENMASK_ULL(63, 0)); 10994 vmcs_write(EOI_EXIT_BITMAP3, GENMASK_ULL(63, 0)); 10995 for (nr = 0x22; nr < 0x100; nr++) { 10996 for (lo_pri_nr = 0x21; lo_pri_nr < nr; lo_pri_nr++) 10997 test_eoi_virt(nr, lo_pri_nr, 10998 /*eoi_exit_induced=*/true); 10999 11000 report(true, 11001 "Low priority nrs 0x21-0x%x for nr 0x%x, with induced EOI exits.", 11002 nr - 1, nr); 11003 } 11004 11005 /* Terminate the guest */ 11006 args->op = VID_OP_TERMINATE; 11007 enter_guest(); 11008 assert_exit_reason(VMX_VMCALL); 11009 } 11010 11011 #define TEST(name) { #name, .v2 = name } 11012 11013 /* name/init/guest_main/exit_handler/syscall_handler/guest_regs */ 11014 struct vmx_test vmx_tests[] = { 11015 { "null", NULL, basic_guest_main, basic_exit_handler, NULL, {0} }, 11016 { "vmenter", NULL, vmenter_main, vmenter_exit_handler, NULL, {0} }, 11017 { "preemption timer", preemption_timer_init, preemption_timer_main, 11018 preemption_timer_exit_handler, NULL, {0} }, 11019 { "control field PAT", test_ctrl_pat_init, test_ctrl_pat_main, 11020 test_ctrl_pat_exit_handler, NULL, {0} }, 11021 { "control field EFER", test_ctrl_efer_init, test_ctrl_efer_main, 11022 test_ctrl_efer_exit_handler, NULL, {0} }, 11023 { "CR shadowing", NULL, cr_shadowing_main, 11024 cr_shadowing_exit_handler, NULL, {0} }, 11025 { "I/O bitmap", iobmp_init, iobmp_main, iobmp_exit_handler, 11026 NULL, {0} }, 11027 { "instruction intercept", insn_intercept_init, insn_intercept_main, 11028 insn_intercept_exit_handler, NULL, {0} }, 11029 { "EPT A/D disabled", ept_init, ept_main, ept_exit_handler, NULL, {0} }, 11030 { "EPT A/D enabled", eptad_init, eptad_main, eptad_exit_handler, NULL, {0} }, 11031 { "PML", pml_init, pml_main, pml_exit_handler, NULL, {0} }, 11032 { "interrupt", interrupt_init, interrupt_main, 11033 interrupt_exit_handler, NULL, {0} }, 11034 { "nmi_hlt", nmi_hlt_init, nmi_hlt_main, 11035 nmi_hlt_exit_handler, NULL, {0} }, 11036 { "debug controls", dbgctls_init, dbgctls_main, dbgctls_exit_handler, 11037 NULL, {0} }, 11038 { "MSR switch", msr_switch_init, msr_switch_main, 11039 msr_switch_exit_handler, NULL, {0}, msr_switch_entry_failure }, 11040 { "vmmcall", vmmcall_init, vmmcall_main, vmmcall_exit_handler, NULL, {0} }, 11041 { "disable RDTSCP", disable_rdtscp_init, disable_rdtscp_main, 11042 disable_rdtscp_exit_handler, NULL, {0} }, 11043 { "exit_monitor_from_l2_test", NULL, exit_monitor_from_l2_main, 11044 exit_monitor_from_l2_handler, NULL, {0} }, 11045 { "invalid_msr", invalid_msr_init, invalid_msr_main, 11046 invalid_msr_exit_handler, NULL, {0}, invalid_msr_entry_failure}, 11047 /* Basic V2 tests. */ 11048 TEST(v2_null_test), 11049 TEST(v2_multiple_entries_test), 11050 TEST(fixture_test_case1), 11051 TEST(fixture_test_case2), 11052 /* Opcode tests. */ 11053 TEST(invvpid_test), 11054 /* VM-entry tests */ 11055 TEST(vmx_controls_test), 11056 TEST(vmx_host_state_area_test), 11057 TEST(vmx_guest_state_area_test), 11058 TEST(vmentry_movss_shadow_test), 11059 TEST(vmentry_unrestricted_guest_test), 11060 /* APICv tests */ 11061 TEST(vmx_eoi_bitmap_ioapic_scan_test), 11062 TEST(vmx_hlt_with_rvi_test), 11063 TEST(apic_reg_virt_test), 11064 TEST(virt_x2apic_mode_test), 11065 TEST(vmx_basic_vid_test), 11066 TEST(vmx_eoi_virt_test), 11067 /* APIC pass-through tests */ 11068 TEST(vmx_apic_passthrough_test), 11069 TEST(vmx_apic_passthrough_thread_test), 11070 TEST(vmx_apic_passthrough_tpr_threshold_test), 11071 TEST(vmx_init_signal_test), 11072 TEST(vmx_sipi_signal_test), 11073 /* VMCS Shadowing tests */ 11074 TEST(vmx_vmcs_shadow_test), 11075 /* Regression tests */ 11076 TEST(vmx_ldtr_test), 11077 TEST(vmx_cr_load_test), 11078 TEST(vmx_cr4_osxsave_test), 11079 TEST(vmx_no_nm_test), 11080 TEST(vmx_db_test), 11081 TEST(vmx_nmi_window_test), 11082 TEST(vmx_intr_window_test), 11083 TEST(vmx_pending_event_test), 11084 TEST(vmx_pending_event_hlt_test), 11085 TEST(vmx_store_tsc_test), 11086 TEST(vmx_preemption_timer_zero_test), 11087 TEST(vmx_preemption_timer_tf_test), 11088 TEST(vmx_preemption_timer_expiry_test), 11089 /* EPT access tests. */ 11090 TEST(ept_access_test_not_present), 11091 TEST(ept_access_test_read_only), 11092 TEST(ept_access_test_write_only), 11093 TEST(ept_access_test_read_write), 11094 TEST(ept_access_test_execute_only), 11095 TEST(ept_access_test_read_execute), 11096 TEST(ept_access_test_write_execute), 11097 TEST(ept_access_test_read_write_execute), 11098 TEST(ept_access_test_reserved_bits), 11099 TEST(ept_access_test_ignored_bits), 11100 TEST(ept_access_test_paddr_not_present_ad_disabled), 11101 TEST(ept_access_test_paddr_not_present_ad_enabled), 11102 TEST(ept_access_test_paddr_read_only_ad_disabled), 11103 TEST(ept_access_test_paddr_read_only_ad_enabled), 11104 TEST(ept_access_test_paddr_read_write), 11105 TEST(ept_access_test_paddr_read_write_execute), 11106 TEST(ept_access_test_paddr_read_execute_ad_disabled), 11107 TEST(ept_access_test_paddr_read_execute_ad_enabled), 11108 TEST(ept_access_test_paddr_not_present_page_fault), 11109 TEST(ept_access_test_force_2m_page), 11110 /* Atomic MSR switch tests. */ 11111 TEST(atomic_switch_max_msrs_test), 11112 TEST(atomic_switch_overflow_msrs_test), 11113 TEST(rdtsc_vmexit_diff_test), 11114 TEST(vmx_mtf_test), 11115 TEST(vmx_mtf_pdpte_test), 11116 TEST(vmx_pf_exception_test), 11117 TEST(vmx_pf_exception_forced_emulation_test), 11118 TEST(vmx_pf_no_vpid_test), 11119 TEST(vmx_pf_invvpid_test), 11120 TEST(vmx_pf_vpid_test), 11121 TEST(vmx_exception_test), 11122 { NULL, NULL, NULL, NULL, NULL, {0} }, 11123 }; 11124