1 #include "libcflat.h" 2 #include "desc.h" 3 #include "processor.h" 4 #include "asm/page.h" 5 #include "x86/vm.h" 6 #include "access.h" 7 8 static bool verbose = false; 9 10 typedef unsigned long pt_element_t; 11 static int invalid_mask; 12 13 /* Test code/data is at 32MiB, paging structures at 33MiB. */ 14 #define AT_CODE_DATA_PHYS 32 * 1024 * 1024 15 #define AT_PAGING_STRUCTURES_PHYS 33 * 1024 * 1024 16 17 #define PT_BASE_ADDR_MASK ((pt_element_t)((((pt_element_t)1 << 36) - 1) & PAGE_MASK)) 18 #define PT_PSE_BASE_ADDR_MASK (PT_BASE_ADDR_MASK & ~(1ull << 21)) 19 20 #define PFERR_PRESENT_MASK (1U << 0) 21 #define PFERR_WRITE_MASK (1U << 1) 22 #define PFERR_USER_MASK (1U << 2) 23 #define PFERR_RESERVED_MASK (1U << 3) 24 #define PFERR_FETCH_MASK (1U << 4) 25 #define PFERR_PK_MASK (1U << 5) 26 27 #define MSR_EFER 0xc0000080 28 #define EFER_NX_MASK (1ull << 11) 29 30 #define PT_INDEX(address, level) \ 31 (((address) >> (12 + ((level)-1) * 9)) & 511) 32 33 /* 34 * Page table access check tests. Each number/bit represent an individual 35 * test case. The main test will bump a counter by 1 to run all permutations 36 * of the below test cases (sans illegal combinations). 37 * 38 * Keep the PRESENT and reserved bits in the higher numbers so that they aren't 39 * toggled on every test, e.g. to keep entries in the TLB. 40 */ 41 enum { 42 AC_PTE_WRITABLE_BIT, 43 AC_PTE_USER_BIT, 44 AC_PTE_ACCESSED_BIT, 45 AC_PTE_DIRTY_BIT, 46 AC_PTE_NX_BIT, 47 AC_PTE_PRESENT_BIT, 48 AC_PTE_BIT51_BIT, 49 AC_PTE_BIT36_BIT, 50 51 AC_PDE_WRITABLE_BIT, 52 AC_PDE_USER_BIT, 53 AC_PDE_ACCESSED_BIT, 54 AC_PDE_DIRTY_BIT, 55 AC_PDE_PSE_BIT, 56 AC_PDE_NX_BIT, 57 AC_PDE_PRESENT_BIT, 58 AC_PDE_BIT51_BIT, 59 AC_PDE_BIT36_BIT, 60 AC_PDE_BIT13_BIT, 61 62 /* 63 * special test case to DISABLE writable bit on page directory 64 * pointer table entry. 65 */ 66 AC_PDPTE_NO_WRITABLE_BIT, 67 68 AC_PKU_AD_BIT, 69 AC_PKU_WD_BIT, 70 AC_PKU_PKEY_BIT, 71 72 AC_ACCESS_USER_BIT, 73 AC_ACCESS_WRITE_BIT, 74 AC_ACCESS_FETCH_BIT, 75 AC_ACCESS_TWICE_BIT, 76 77 AC_CPU_EFER_NX_BIT, 78 AC_CPU_CR0_WP_BIT, 79 AC_CPU_CR4_SMEP_BIT, 80 AC_CPU_CR4_PKE_BIT, 81 82 AC_FEP_BIT, 83 84 NR_AC_FLAGS, 85 }; 86 87 #define AC_PTE_PRESENT_MASK (1 << AC_PTE_PRESENT_BIT) 88 #define AC_PTE_WRITABLE_MASK (1 << AC_PTE_WRITABLE_BIT) 89 #define AC_PTE_USER_MASK (1 << AC_PTE_USER_BIT) 90 #define AC_PTE_ACCESSED_MASK (1 << AC_PTE_ACCESSED_BIT) 91 #define AC_PTE_DIRTY_MASK (1 << AC_PTE_DIRTY_BIT) 92 #define AC_PTE_NX_MASK (1 << AC_PTE_NX_BIT) 93 #define AC_PTE_BIT51_MASK (1 << AC_PTE_BIT51_BIT) 94 #define AC_PTE_BIT36_MASK (1 << AC_PTE_BIT36_BIT) 95 96 #define AC_PDE_PRESENT_MASK (1 << AC_PDE_PRESENT_BIT) 97 #define AC_PDE_WRITABLE_MASK (1 << AC_PDE_WRITABLE_BIT) 98 #define AC_PDE_USER_MASK (1 << AC_PDE_USER_BIT) 99 #define AC_PDE_ACCESSED_MASK (1 << AC_PDE_ACCESSED_BIT) 100 #define AC_PDE_DIRTY_MASK (1 << AC_PDE_DIRTY_BIT) 101 #define AC_PDE_PSE_MASK (1 << AC_PDE_PSE_BIT) 102 #define AC_PDE_NX_MASK (1 << AC_PDE_NX_BIT) 103 #define AC_PDE_BIT51_MASK (1 << AC_PDE_BIT51_BIT) 104 #define AC_PDE_BIT36_MASK (1 << AC_PDE_BIT36_BIT) 105 #define AC_PDE_BIT13_MASK (1 << AC_PDE_BIT13_BIT) 106 107 #define AC_PDPTE_NO_WRITABLE_MASK (1 << AC_PDPTE_NO_WRITABLE_BIT) 108 109 #define AC_PKU_AD_MASK (1 << AC_PKU_AD_BIT) 110 #define AC_PKU_WD_MASK (1 << AC_PKU_WD_BIT) 111 #define AC_PKU_PKEY_MASK (1 << AC_PKU_PKEY_BIT) 112 113 #define AC_ACCESS_USER_MASK (1 << AC_ACCESS_USER_BIT) 114 #define AC_ACCESS_WRITE_MASK (1 << AC_ACCESS_WRITE_BIT) 115 #define AC_ACCESS_FETCH_MASK (1 << AC_ACCESS_FETCH_BIT) 116 #define AC_ACCESS_TWICE_MASK (1 << AC_ACCESS_TWICE_BIT) 117 118 #define AC_CPU_EFER_NX_MASK (1 << AC_CPU_EFER_NX_BIT) 119 #define AC_CPU_CR0_WP_MASK (1 << AC_CPU_CR0_WP_BIT) 120 #define AC_CPU_CR4_SMEP_MASK (1 << AC_CPU_CR4_SMEP_BIT) 121 #define AC_CPU_CR4_PKE_MASK (1 << AC_CPU_CR4_PKE_BIT) 122 123 #define AC_FEP_MASK (1 << AC_FEP_BIT) 124 125 const char *ac_names[] = { 126 [AC_PTE_PRESENT_BIT] = "pte.p", 127 [AC_PTE_ACCESSED_BIT] = "pte.a", 128 [AC_PTE_WRITABLE_BIT] = "pte.rw", 129 [AC_PTE_USER_BIT] = "pte.user", 130 [AC_PTE_DIRTY_BIT] = "pte.d", 131 [AC_PTE_NX_BIT] = "pte.nx", 132 [AC_PTE_BIT51_BIT] = "pte.51", 133 [AC_PTE_BIT36_BIT] = "pte.36", 134 [AC_PDE_PRESENT_BIT] = "pde.p", 135 [AC_PDE_ACCESSED_BIT] = "pde.a", 136 [AC_PDE_WRITABLE_BIT] = "pde.rw", 137 [AC_PDE_USER_BIT] = "pde.user", 138 [AC_PDE_DIRTY_BIT] = "pde.d", 139 [AC_PDE_PSE_BIT] = "pde.pse", 140 [AC_PDE_NX_BIT] = "pde.nx", 141 [AC_PDE_BIT51_BIT] = "pde.51", 142 [AC_PDE_BIT36_BIT] = "pde.36", 143 [AC_PDE_BIT13_BIT] = "pde.13", 144 [AC_PDPTE_NO_WRITABLE_BIT] = "pdpte.ro", 145 [AC_PKU_AD_BIT] = "pkru.ad", 146 [AC_PKU_WD_BIT] = "pkru.wd", 147 [AC_PKU_PKEY_BIT] = "pkey=1", 148 [AC_ACCESS_WRITE_BIT] = "write", 149 [AC_ACCESS_USER_BIT] = "user", 150 [AC_ACCESS_FETCH_BIT] = "fetch", 151 [AC_ACCESS_TWICE_BIT] = "twice", 152 [AC_CPU_EFER_NX_BIT] = "efer.nx", 153 [AC_CPU_CR0_WP_BIT] = "cr0.wp", 154 [AC_CPU_CR4_SMEP_BIT] = "cr4.smep", 155 [AC_CPU_CR4_PKE_BIT] = "cr4.pke", 156 [AC_FEP_BIT] = "fep", 157 }; 158 159 static inline void *va(pt_element_t phys) 160 { 161 return (void *)phys; 162 } 163 164 typedef struct { 165 pt_element_t pt_pool_pa; 166 unsigned int pt_pool_current; 167 int pt_levels; 168 } ac_pt_env_t; 169 170 typedef struct { 171 unsigned flags; 172 void *virt; 173 pt_element_t phys; 174 pt_element_t *ptep; 175 pt_element_t expected_pte; 176 pt_element_t *pdep; 177 pt_element_t expected_pde; 178 pt_element_t ignore_pde; 179 int expected_fault; 180 unsigned expected_error; 181 int pt_levels; 182 183 /* 5-level paging, 1-based to avoid math. */ 184 pt_element_t page_tables[6]; 185 } ac_test_t; 186 187 typedef struct { 188 unsigned short limit; 189 unsigned long linear_addr; 190 } __attribute__((packed)) descriptor_table_t; 191 192 193 static void ac_test_show(ac_test_t *at); 194 195 static unsigned long shadow_cr0; 196 static unsigned long shadow_cr3; 197 static unsigned long shadow_cr4; 198 static unsigned long long shadow_efer; 199 200 typedef void (*walk_fn)(pt_element_t *ptep, int level, unsigned long virt); 201 202 /* Returns the size of the range covered by the last processed entry. */ 203 static unsigned long walk_va(ac_test_t *at, int min_level, unsigned long virt, 204 walk_fn callback, bool leaf_only) 205 { 206 unsigned long parent_pte = shadow_cr3; 207 int i; 208 209 for (i = at->pt_levels; i >= min_level; --i) { 210 pt_element_t *parent_pt = va(parent_pte & PT_BASE_ADDR_MASK); 211 unsigned int index = PT_INDEX(virt, i); 212 pt_element_t *ptep = &parent_pt[index]; 213 214 assert(!leaf_only || (*ptep & PT_PRESENT_MASK)); 215 216 if (!leaf_only || i == 1 || (*ptep & PT_PAGE_SIZE_MASK)) 217 callback(ptep, i, virt); 218 219 if (i == 1 || *ptep & PT_PAGE_SIZE_MASK) 220 break; 221 222 parent_pte = *ptep; 223 } 224 225 return 1ul << PGDIR_BITS(i); 226 } 227 228 static void walk_ptes(ac_test_t *at, unsigned long virt, unsigned long end, 229 walk_fn callback) 230 { 231 unsigned long page_size; 232 233 for ( ; virt < end; virt = ALIGN_DOWN(virt + page_size, page_size)) 234 page_size = walk_va(at, 1, virt, callback, true); 235 } 236 237 static void set_cr0_wp(int wp) 238 { 239 unsigned long cr0 = shadow_cr0; 240 241 cr0 &= ~X86_CR0_WP; 242 if (wp) 243 cr0 |= X86_CR0_WP; 244 if (cr0 != shadow_cr0) { 245 write_cr0(cr0); 246 shadow_cr0 = cr0; 247 } 248 } 249 250 static void clear_user_mask(pt_element_t *ptep, int level, unsigned long virt) 251 { 252 *ptep &= ~PT_USER_MASK; 253 254 /* Flush to avoid spurious #PF */ 255 invlpg((void*)virt); 256 } 257 258 static void set_user_mask(pt_element_t *ptep, int level, unsigned long virt) 259 { 260 *ptep |= PT_USER_MASK; 261 262 /* Flush to avoid spurious #PF */ 263 invlpg((void*)virt); 264 } 265 266 static unsigned set_cr4_smep(ac_test_t *at, int smep) 267 { 268 extern char stext, etext; 269 unsigned long code_start = (unsigned long)&stext; 270 unsigned long code_end = (unsigned long)&etext; 271 unsigned long cr4 = shadow_cr4; 272 unsigned r; 273 274 cr4 &= ~X86_CR4_SMEP; 275 if (smep) 276 cr4 |= X86_CR4_SMEP; 277 if (cr4 == shadow_cr4) 278 return 0; 279 280 if (smep) 281 walk_ptes(at, code_start, code_end, clear_user_mask); 282 r = write_cr4_safe(cr4); 283 if (r || !smep) 284 walk_ptes(at, code_start, code_end, set_user_mask); 285 if (!r) 286 shadow_cr4 = cr4; 287 return r; 288 } 289 290 static void set_cr4_pke(int pke) 291 { 292 unsigned long cr4 = shadow_cr4; 293 294 cr4 &= ~X86_CR4_PKE; 295 if (pke) 296 cr4 |= X86_CR4_PKE; 297 if (cr4 == shadow_cr4) 298 return; 299 300 /* Check that protection keys do not affect accesses when CR4.PKE=0. */ 301 if ((shadow_cr4 & X86_CR4_PKE) && !pke) 302 write_pkru(0xfffffffc); 303 write_cr4(cr4); 304 shadow_cr4 = cr4; 305 } 306 307 static void set_efer_nx(int nx) 308 { 309 unsigned long long efer = shadow_efer; 310 311 efer &= ~EFER_NX_MASK; 312 if (nx) 313 efer |= EFER_NX_MASK; 314 if (efer != shadow_efer) { 315 wrmsr(MSR_EFER, efer); 316 shadow_efer = efer; 317 } 318 } 319 320 static void ac_env_int(ac_pt_env_t *pt_env, int page_table_levels) 321 { 322 extern char page_fault, kernel_entry; 323 set_idt_entry(14, &page_fault, 0); 324 set_idt_entry(0x20, &kernel_entry, 3); 325 326 pt_env->pt_pool_pa = AT_PAGING_STRUCTURES_PHYS; 327 pt_env->pt_pool_current = 0; 328 pt_env->pt_levels = page_table_levels; 329 } 330 331 static pt_element_t ac_test_alloc_pt(ac_pt_env_t *pt_env) 332 { 333 pt_element_t pt; 334 335 /* 336 * Each test needs at most pt_levels-1 structures per virtual address, 337 * and no existing scenario uses more than four addresses. 338 */ 339 assert(pt_env->pt_pool_current < (4 * (pt_env->pt_levels - 1))); 340 341 pt = pt_env->pt_pool_pa + (pt_env->pt_pool_current * PAGE_SIZE); 342 pt_env->pt_pool_current++; 343 memset(va(pt), 0, PAGE_SIZE); 344 return pt; 345 } 346 347 static void __ac_test_init(ac_test_t *at, unsigned long virt, 348 ac_pt_env_t *pt_env, ac_test_t *buddy) 349 { 350 unsigned long buddy_virt = buddy ? (unsigned long)buddy->virt : 0; 351 pt_element_t *root_pt = va(shadow_cr3 & PT_BASE_ADDR_MASK); 352 int i; 353 354 /* 355 * The test infrastructure, e.g. this function, must use a different 356 * top-level SPTE than the test, otherwise modifying SPTEs can affect 357 * normal behavior, e.g. crash the test due to marking code SPTEs 358 * USER when CR4.SMEP=1. 359 */ 360 assert(PT_INDEX(virt, pt_env->pt_levels) != 361 PT_INDEX((unsigned long)__ac_test_init, pt_env->pt_levels)); 362 363 set_efer_nx(1); 364 set_cr0_wp(1); 365 at->flags = 0; 366 at->virt = (void *)virt; 367 at->phys = AT_CODE_DATA_PHYS; 368 at->pt_levels = pt_env->pt_levels; 369 370 at->page_tables[0] = -1ull; 371 at->page_tables[1] = -1ull; 372 373 /* 374 * Zap the existing top-level PTE as it may be reused from a previous 375 * sub-test. This allows runtime PTE modification to assert that two 376 * overlapping walks don't try to install different paging structures. 377 */ 378 root_pt[PT_INDEX(virt, pt_env->pt_levels)] = 0; 379 380 for (i = at->pt_levels; i > 1; i--) { 381 /* 382 * Buddies can reuse any part of the walk that share the same 383 * index. This is weird, but intentional, as several tests 384 * want different walks to merge at lower levels. 385 */ 386 if (buddy && PT_INDEX(virt, i) == PT_INDEX(buddy_virt, i)) 387 at->page_tables[i] = buddy->page_tables[i]; 388 else 389 at->page_tables[i] = ac_test_alloc_pt(pt_env); 390 } 391 } 392 393 static void ac_test_init(ac_test_t *at, unsigned long virt, ac_pt_env_t *pt_env) 394 { 395 __ac_test_init(at, virt, pt_env, NULL); 396 } 397 398 static int ac_test_bump_one(ac_test_t *at) 399 { 400 at->flags = ((at->flags | invalid_mask) + 1) & ~invalid_mask; 401 return at->flags < (1 << NR_AC_FLAGS); 402 } 403 404 #define F(x) ((flags & x##_MASK) != 0) 405 406 static bool ac_test_legal(ac_test_t *at) 407 { 408 int flags = at->flags; 409 unsigned reserved; 410 411 if (F(AC_CPU_CR4_SMEP)) 412 return false; 413 414 if (F(AC_ACCESS_FETCH) && F(AC_ACCESS_WRITE)) 415 return false; 416 417 /* 418 * Since we convert current page to kernel page when cr4.smep=1, 419 * we can't switch to user mode. 420 */ 421 if (F(AC_ACCESS_USER) && F(AC_CPU_CR4_SMEP)) 422 return false; 423 424 /* 425 * Only test protection key faults if CR4.PKE=1. 426 */ 427 if (!F(AC_CPU_CR4_PKE) && 428 (F(AC_PKU_AD) || F(AC_PKU_WD))) { 429 return false; 430 } 431 432 /* 433 * pde.bit13 checks handling of reserved bits in largepage PDEs. It is 434 * meaningless if there is a PTE. 435 */ 436 if (!F(AC_PDE_PSE) && F(AC_PDE_BIT13)) 437 return false; 438 439 /* 440 * Shorten the test by avoiding testing too many reserved bit combinations. 441 * Skip testing multiple reserved bits to shorten the test. Reserved bit 442 * page faults are terminal and multiple reserved bits do not affect the 443 * error code; the odds of a KVM bug are super low, and the odds of actually 444 * being able to detect a bug are even lower. 445 */ 446 reserved = (AC_PDE_BIT51_MASK | AC_PDE_BIT36_MASK | AC_PDE_BIT13_MASK | 447 AC_PTE_BIT51_MASK | AC_PTE_BIT36_MASK); 448 if (!F(AC_CPU_EFER_NX)) 449 reserved |= AC_PDE_NX_MASK | AC_PTE_NX_MASK; 450 451 /* Only test one reserved bit at a time. */ 452 reserved &= flags; 453 if (reserved & (reserved - 1)) 454 return false; 455 456 return true; 457 } 458 459 static int ac_test_bump(ac_test_t *at) 460 { 461 int ret; 462 463 do { 464 ret = ac_test_bump_one(at); 465 } while (ret && !ac_test_legal(at)); 466 467 return ret; 468 } 469 470 static pt_element_t ac_test_permissions(ac_test_t *at, unsigned flags, 471 bool writable, bool user, 472 bool executable) 473 { 474 bool kwritable = !F(AC_CPU_CR0_WP) && !F(AC_ACCESS_USER); 475 pt_element_t expected = 0; 476 477 if (F(AC_ACCESS_USER) && !user) 478 at->expected_fault = 1; 479 480 if (F(AC_ACCESS_WRITE) && !writable && !kwritable) 481 at->expected_fault = 1; 482 483 if (F(AC_ACCESS_FETCH) && !executable) 484 at->expected_fault = 1; 485 486 if (F(AC_ACCESS_FETCH) && user && F(AC_CPU_CR4_SMEP)) 487 at->expected_fault = 1; 488 489 if (user && !F(AC_ACCESS_FETCH) && F(AC_PKU_PKEY) && F(AC_CPU_CR4_PKE)) { 490 if (F(AC_PKU_AD)) { 491 at->expected_fault = 1; 492 at->expected_error |= PFERR_PK_MASK; 493 } else if (F(AC_ACCESS_WRITE) && F(AC_PKU_WD) && !kwritable) { 494 at->expected_fault = 1; 495 at->expected_error |= PFERR_PK_MASK; 496 } 497 } 498 499 if (!at->expected_fault) { 500 expected |= PT_ACCESSED_MASK; 501 if (F(AC_ACCESS_WRITE)) 502 expected |= PT_DIRTY_MASK; 503 } 504 505 return expected; 506 } 507 508 static void ac_emulate_access(ac_test_t *at, unsigned flags) 509 { 510 bool pde_valid, pte_valid; 511 bool user, writable, executable; 512 513 if (F(AC_ACCESS_USER)) 514 at->expected_error |= PFERR_USER_MASK; 515 516 if (F(AC_ACCESS_WRITE)) 517 at->expected_error |= PFERR_WRITE_MASK; 518 519 if (F(AC_ACCESS_FETCH)) 520 at->expected_error |= PFERR_FETCH_MASK; 521 522 if (!F(AC_PDE_ACCESSED)) 523 at->ignore_pde = PT_ACCESSED_MASK; 524 525 pde_valid = F(AC_PDE_PRESENT) 526 && !F(AC_PDE_BIT51) && !F(AC_PDE_BIT36) && !F(AC_PDE_BIT13) 527 && !(F(AC_PDE_NX) && !F(AC_CPU_EFER_NX)); 528 529 if (!pde_valid) { 530 at->expected_fault = 1; 531 if (F(AC_PDE_PRESENT)) { 532 at->expected_error |= PFERR_RESERVED_MASK; 533 } else { 534 at->expected_error &= ~PFERR_PRESENT_MASK; 535 } 536 goto fault; 537 } 538 539 writable = !F(AC_PDPTE_NO_WRITABLE) && F(AC_PDE_WRITABLE); 540 user = F(AC_PDE_USER); 541 executable = !F(AC_PDE_NX); 542 543 if (F(AC_PDE_PSE)) { 544 at->expected_pde |= ac_test_permissions(at, flags, writable, 545 user, executable); 546 goto no_pte; 547 } 548 549 at->expected_pde |= PT_ACCESSED_MASK; 550 551 pte_valid = F(AC_PTE_PRESENT) 552 && !F(AC_PTE_BIT51) && !F(AC_PTE_BIT36) 553 && !(F(AC_PTE_NX) && !F(AC_CPU_EFER_NX)); 554 555 if (!pte_valid) { 556 at->expected_fault = 1; 557 if (F(AC_PTE_PRESENT)) { 558 at->expected_error |= PFERR_RESERVED_MASK; 559 } else { 560 at->expected_error &= ~PFERR_PRESENT_MASK; 561 } 562 goto fault; 563 } 564 565 writable &= F(AC_PTE_WRITABLE); 566 user &= F(AC_PTE_USER); 567 executable &= !F(AC_PTE_NX); 568 569 at->expected_pte |= ac_test_permissions(at, flags, writable, user, 570 executable); 571 572 no_pte: 573 fault: 574 if (!at->expected_fault) 575 at->ignore_pde = 0; 576 if (!F(AC_CPU_EFER_NX) && !F(AC_CPU_CR4_SMEP)) 577 at->expected_error &= ~PFERR_FETCH_MASK; 578 } 579 580 static void __ac_set_expected_status(ac_test_t *at, bool flush) 581 { 582 if (flush) 583 invlpg(at->virt); 584 585 if (at->ptep) 586 at->expected_pte = *at->ptep; 587 at->expected_pde = *at->pdep; 588 at->ignore_pde = 0; 589 at->expected_fault = 0; 590 at->expected_error = PFERR_PRESENT_MASK; 591 592 if (at->flags & AC_ACCESS_TWICE_MASK) { 593 ac_emulate_access(at, at->flags & 594 ~AC_ACCESS_WRITE_MASK & 595 ~AC_ACCESS_FETCH_MASK & 596 ~AC_ACCESS_USER_MASK); 597 at->expected_fault = 0; 598 at->expected_error = PFERR_PRESENT_MASK; 599 at->ignore_pde = 0; 600 } 601 602 ac_emulate_access(at, at->flags); 603 } 604 605 static void ac_set_expected_status(ac_test_t *at) 606 { 607 __ac_set_expected_status(at, true); 608 } 609 610 static pt_element_t ac_get_pt(ac_test_t *at, int i, pt_element_t *ptep) 611 { 612 pt_element_t pte; 613 614 pte = *ptep; 615 if (pte && !(pte & PT_PAGE_SIZE_MASK) && 616 (pte & PT_BASE_ADDR_MASK) != at->page_tables[i]) { 617 printf("\nPT collision. VA = 0x%lx, level = %d, index = %ld, found PT = 0x%lx, want PT = 0x%lx\n", 618 (unsigned long)at->virt, i, 619 PT_INDEX((unsigned long)at->virt, i), 620 pte, at->page_tables[i]); 621 abort(); 622 } 623 624 /* 625 * Preserve A/D bits to avoid writing upper level PTEs, 626 * which cannot be unsyc'd when KVM uses shadow paging. 627 */ 628 pte = at->page_tables[i] | (pte & (PT_DIRTY_MASK | PT_ACCESSED_MASK)); 629 return pte; 630 } 631 632 static void ac_test_setup_ptes(ac_test_t *at) 633 { 634 unsigned long parent_pte = shadow_cr3; 635 int flags = at->flags; 636 int i; 637 638 at->ptep = 0; 639 for (i = at->pt_levels; i >= 1 && (i >= 2 || !F(AC_PDE_PSE)); --i) { 640 pt_element_t *parent_pt = va(parent_pte & PT_BASE_ADDR_MASK); 641 unsigned index = PT_INDEX((unsigned long)at->virt, i); 642 pt_element_t *ptep = &parent_pt[index]; 643 pt_element_t pte; 644 645 switch (i) { 646 case 5: 647 case 4: 648 pte = ac_get_pt(at, i, ptep); 649 pte |= PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK; 650 break; 651 case 3: 652 pte = ac_get_pt(at, i, ptep); 653 pte |= PT_PRESENT_MASK | PT_USER_MASK; 654 if (!F(AC_PDPTE_NO_WRITABLE)) 655 pte |= PT_WRITABLE_MASK; 656 break; 657 case 2: 658 if (!F(AC_PDE_PSE)) { 659 pte = ac_get_pt(at, i, ptep); 660 661 /* The protection key is ignored on non-leaf entries. */ 662 if (F(AC_PKU_PKEY)) 663 pte |= 2ull << 59; 664 } else { 665 pte = at->phys & PT_PSE_BASE_ADDR_MASK; 666 pte |= PT_PAGE_SIZE_MASK; 667 if (F(AC_PKU_PKEY)) 668 pte |= 1ull << 59; 669 } 670 if (F(AC_PDE_PRESENT)) 671 pte |= PT_PRESENT_MASK; 672 if (F(AC_PDE_WRITABLE)) 673 pte |= PT_WRITABLE_MASK; 674 if (F(AC_PDE_USER)) 675 pte |= PT_USER_MASK; 676 if (F(AC_PDE_ACCESSED)) 677 pte |= PT_ACCESSED_MASK; 678 if (F(AC_PDE_DIRTY)) 679 pte |= PT_DIRTY_MASK; 680 if (F(AC_PDE_NX)) 681 pte |= PT64_NX_MASK; 682 if (F(AC_PDE_BIT51)) 683 pte |= 1ull << 51; 684 if (F(AC_PDE_BIT36)) 685 pte |= 1ull << 36; 686 if (F(AC_PDE_BIT13)) 687 pte |= 1ull << 13; 688 at->pdep = ptep; 689 break; 690 case 1: 691 pte = at->phys & PT_BASE_ADDR_MASK; 692 if (F(AC_PKU_PKEY)) 693 pte |= 1ull << 59; 694 if (F(AC_PTE_PRESENT)) 695 pte |= PT_PRESENT_MASK; 696 if (F(AC_PTE_WRITABLE)) 697 pte |= PT_WRITABLE_MASK; 698 if (F(AC_PTE_USER)) 699 pte |= PT_USER_MASK; 700 if (F(AC_PTE_ACCESSED)) 701 pte |= PT_ACCESSED_MASK; 702 if (F(AC_PTE_DIRTY)) 703 pte |= PT_DIRTY_MASK; 704 if (F(AC_PTE_NX)) 705 pte |= PT64_NX_MASK; 706 if (F(AC_PTE_BIT51)) 707 pte |= 1ull << 51; 708 if (F(AC_PTE_BIT36)) 709 pte |= 1ull << 36; 710 at->ptep = ptep; 711 break; 712 default: 713 assert(0); 714 } 715 716 if (pte != *ptep) 717 *ptep = pte; 718 719 parent_pte = pte; 720 } 721 ac_set_expected_status(at); 722 } 723 724 static void __dump_pte(pt_element_t *ptep, int level, unsigned long virt) 725 { 726 printf("------L%d I%lu: %lx\n", level, PT_INDEX(virt, level), *ptep); 727 } 728 729 static void dump_mapping(ac_test_t *at) 730 { 731 unsigned long virt = (unsigned long)at->virt; 732 int flags = at->flags; 733 734 printf("Dump mapping: address: %p\n", at->virt); 735 walk_va(at, F(AC_PDE_PSE) ? 2 : 1, virt, __dump_pte, false); 736 } 737 738 static void ac_test_check(ac_test_t *at, bool *success_ret, bool cond, 739 const char *fmt, ...) 740 { 741 va_list ap; 742 char buf[500]; 743 744 if (!*success_ret) { 745 return; 746 } 747 748 if (!cond) { 749 return; 750 } 751 752 *success_ret = false; 753 754 if (!verbose) { 755 puts("\n"); 756 ac_test_show(at); 757 } 758 759 va_start(ap, fmt); 760 vsnprintf(buf, sizeof(buf), fmt, ap); 761 va_end(ap); 762 printf("FAIL: %s\n", buf); 763 dump_mapping(at); 764 } 765 766 static int pt_match(pt_element_t pte1, pt_element_t pte2, pt_element_t ignore) 767 { 768 pte1 &= ~ignore; 769 pte2 &= ~ignore; 770 return pte1 == pte2; 771 } 772 773 static int ac_test_do_access(ac_test_t *at) 774 { 775 static unsigned unique = 42; 776 int fault = 0; 777 unsigned e; 778 static unsigned char user_stack[4096]; 779 unsigned long rsp; 780 bool success = true; 781 int flags = at->flags; 782 783 ++unique; 784 if (!(unique & 65535)) { 785 puts("."); 786 } 787 788 *((unsigned char *)at->phys) = 0xc3; /* ret */ 789 790 unsigned r = unique; 791 set_cr0_wp(F(AC_CPU_CR0_WP)); 792 set_efer_nx(F(AC_CPU_EFER_NX)); 793 set_cr4_pke(F(AC_CPU_CR4_PKE)); 794 if (F(AC_CPU_CR4_PKE)) { 795 /* WD2=AD2=1, WD1=F(AC_PKU_WD), AD1=F(AC_PKU_AD) */ 796 write_pkru(0x30 | (F(AC_PKU_WD) ? 8 : 0) | 797 (F(AC_PKU_AD) ? 4 : 0)); 798 } 799 800 set_cr4_smep(at, F(AC_CPU_CR4_SMEP)); 801 802 if (F(AC_ACCESS_TWICE)) { 803 asm volatile ("mov $fixed2, %%rsi \n\t" 804 "cmp $0, %[fep] \n\t" 805 "jz 1f \n\t" 806 KVM_FEP 807 "1: mov (%[addr]), %[reg] \n\t" 808 "fixed2:" 809 : [reg]"=r"(r), [fault]"=a"(fault), "=b"(e) 810 : [addr]"r"(at->virt), [fep]"r"(F(AC_FEP)) 811 : "rsi"); 812 fault = 0; 813 } 814 815 asm volatile ("mov $fixed1, %%rsi \n\t" 816 "mov %%rsp, %[rsp0] \n\t" 817 "cmp $0, %[user] \n\t" 818 "jz do_access \n\t" 819 "push %%rax; mov %[user_ds], %%ax; mov %%ax, %%ds; pop %%rax \n\t" 820 "pushq %[user_ds] \n\t" 821 "pushq %[user_stack_top] \n\t" 822 "pushfq \n\t" 823 "pushq %[user_cs] \n\t" 824 "pushq $do_access \n\t" 825 "iretq \n" 826 "do_access: \n\t" 827 "cmp $0, %[fetch] \n\t" 828 "jnz 2f \n\t" 829 "cmp $0, %[write] \n\t" 830 "jnz 1f \n\t" 831 "cmp $0, %[fep] \n\t" 832 "jz 0f \n\t" 833 KVM_FEP 834 "0: mov (%[addr]), %[reg] \n\t" 835 "jmp done \n\t" 836 "1: cmp $0, %[fep] \n\t" 837 "jz 0f \n\t" 838 KVM_FEP 839 "0: mov %[reg], (%[addr]) \n\t" 840 "jmp done \n\t" 841 "2: call *%[addr] \n\t" 842 "done: \n" 843 "fixed1: \n" 844 "int %[kernel_entry_vector] \n\t" 845 ".section .text.entry \n\t" 846 "kernel_entry: \n\t" 847 "mov %[rsp0], %%rsp \n\t" 848 "jmp back_to_kernel \n\t" 849 ".section .text \n\t" 850 "back_to_kernel:" 851 : [reg]"+r"(r), "+a"(fault), "=b"(e), "=&d"(rsp), 852 [rsp0]"=m"(tss[0].rsp0) 853 : [addr]"r"(at->virt), 854 [write]"r"(F(AC_ACCESS_WRITE)), 855 [user]"r"(F(AC_ACCESS_USER)), 856 [fetch]"r"(F(AC_ACCESS_FETCH)), 857 [fep]"r"(F(AC_FEP)), 858 [user_ds]"i"(USER_DS), 859 [user_cs]"i"(USER_CS), 860 [user_stack_top]"r"(user_stack + sizeof user_stack), 861 [kernel_entry_vector]"i"(0x20) 862 : "rsi"); 863 864 asm volatile (".section .text.pf \n\t" 865 "page_fault: \n\t" 866 "pop %rbx \n\t" 867 "mov %rsi, (%rsp) \n\t" 868 "movl $1, %eax \n\t" 869 "iretq \n\t" 870 ".section .text"); 871 872 ac_test_check(at, &success, fault && !at->expected_fault, 873 "unexpected fault"); 874 ac_test_check(at, &success, !fault && at->expected_fault, 875 "unexpected access"); 876 ac_test_check(at, &success, fault && e != at->expected_error, 877 "error code %x expected %x", e, at->expected_error); 878 if (at->ptep) 879 ac_test_check(at, &success, *at->ptep != at->expected_pte, 880 "pte %x expected %x", *at->ptep, at->expected_pte); 881 ac_test_check(at, &success, 882 !pt_match(*at->pdep, at->expected_pde, at->ignore_pde), 883 "pde %x expected %x", *at->pdep, at->expected_pde); 884 885 if (success && verbose) { 886 if (at->expected_fault) { 887 printf("PASS (%x)\n", at->expected_error); 888 } else { 889 printf("PASS\n"); 890 } 891 } 892 return success; 893 } 894 895 static void ac_test_show(ac_test_t *at) 896 { 897 char line[5000]; 898 899 *line = 0; 900 strcat(line, "test"); 901 for (int i = 0; i < NR_AC_FLAGS; ++i) 902 if (at->flags & (1 << i)) { 903 strcat(line, " "); 904 strcat(line, ac_names[i]); 905 } 906 907 strcat(line, ": "); 908 printf("%s", line); 909 } 910 911 /* 912 * This test case is used to trigger the bug which is fixed by 913 * commit e09e90a5 in the kvm tree 914 */ 915 static int corrupt_hugepage_trigger(ac_pt_env_t *pt_env) 916 { 917 ac_test_t at1, at2; 918 919 ac_test_init(&at1, 0xffff923400000000ul, pt_env); 920 __ac_test_init(&at2, 0xffffe66600000000ul, pt_env, &at1); 921 922 at2.flags = AC_CPU_CR0_WP_MASK | AC_PDE_PSE_MASK | AC_PDE_PRESENT_MASK; 923 ac_test_setup_ptes(&at2); 924 if (!ac_test_do_access(&at2)) 925 goto err; 926 927 at1.flags = at2.flags | AC_PDE_WRITABLE_MASK; 928 ac_test_setup_ptes(&at1); 929 if (!ac_test_do_access(&at1)) 930 goto err; 931 932 at1.flags |= AC_ACCESS_WRITE_MASK; 933 ac_set_expected_status(&at1); 934 if (!ac_test_do_access(&at1)) 935 goto err; 936 937 at2.flags |= AC_ACCESS_WRITE_MASK; 938 ac_set_expected_status(&at2); 939 if (!ac_test_do_access(&at2)) 940 goto err; 941 942 return 1; 943 944 err: 945 printf("corrupt_hugepage_trigger test fail\n"); 946 return 0; 947 } 948 949 /* 950 * This test case is used to trigger the bug which is fixed by 951 * commit 3ddf6c06e13e in the kvm tree 952 */ 953 static int check_pfec_on_prefetch_pte(ac_pt_env_t *pt_env) 954 { 955 ac_test_t at1, at2; 956 957 ac_test_init(&at1, 0xffff923406001000ul, pt_env); 958 __ac_test_init(&at2, 0xffff923406003000ul, pt_env, &at1); 959 960 at1.flags = AC_PDE_PRESENT_MASK | AC_PTE_PRESENT_MASK; 961 ac_test_setup_ptes(&at1); 962 963 at2.flags = at1.flags | AC_PTE_NX_MASK; 964 ac_test_setup_ptes(&at2); 965 966 if (!ac_test_do_access(&at1)) { 967 printf("%s: prepare fail\n", __FUNCTION__); 968 goto err; 969 } 970 971 if (!ac_test_do_access(&at2)) { 972 printf("%s: check PFEC on prefetch pte path fail\n", 973 __FUNCTION__); 974 goto err; 975 } 976 977 return 1; 978 979 err: 980 return 0; 981 } 982 983 /* 984 * If the write-fault access is from supervisor and CR0.WP is not set on the 985 * vcpu, kvm will fix it by adjusting pte access - it sets the W bit on pte 986 * and clears U bit. This is the chance that kvm can change pte access from 987 * readonly to writable. 988 * 989 * Unfortunately, the pte access is the access of 'direct' shadow page table, 990 * means direct sp.role.access = pte_access, then we will create a writable 991 * spte entry on the readonly shadow page table. It will cause Dirty bit is 992 * not tracked when two guest ptes point to the same large page. Note, it 993 * does not have other impact except Dirty bit since cr0.wp is encoded into 994 * sp.role. 995 * 996 * Note: to trigger this bug, hugepage should be disabled on host. 997 */ 998 static int check_large_pte_dirty_for_nowp(ac_pt_env_t *pt_env) 999 { 1000 ac_test_t at1, at2; 1001 1002 ac_test_init(&at1, 0xffff923403000000ul, pt_env); 1003 __ac_test_init(&at2, 0xffffe66606000000ul, pt_env, &at1); 1004 1005 at2.flags = AC_PDE_PRESENT_MASK | AC_PDE_PSE_MASK; 1006 ac_test_setup_ptes(&at2); 1007 if (!ac_test_do_access(&at2)) { 1008 printf("%s: read on the first mapping fail.\n", __FUNCTION__); 1009 goto err; 1010 } 1011 1012 at1.flags = at2.flags | AC_ACCESS_WRITE_MASK; 1013 ac_test_setup_ptes(&at1); 1014 if (!ac_test_do_access(&at1)) { 1015 printf("%s: write on the second mapping fail.\n", __FUNCTION__); 1016 goto err; 1017 } 1018 1019 at2.flags |= AC_ACCESS_WRITE_MASK; 1020 ac_set_expected_status(&at2); 1021 if (!ac_test_do_access(&at2)) { 1022 printf("%s: write on the first mapping fail.\n", __FUNCTION__); 1023 goto err; 1024 } 1025 1026 return 1; 1027 1028 err: 1029 return 0; 1030 } 1031 1032 static int check_smep_andnot_wp(ac_pt_env_t *pt_env) 1033 { 1034 ac_test_t at1; 1035 int err_prepare_andnot_wp, err_smep_andnot_wp; 1036 1037 if (!this_cpu_has(X86_FEATURE_SMEP)) { 1038 return 1; 1039 } 1040 1041 ac_test_init(&at1, 0xffff923406001000ul, pt_env); 1042 1043 at1.flags = AC_PDE_PRESENT_MASK | AC_PTE_PRESENT_MASK | 1044 AC_PDE_USER_MASK | AC_PTE_USER_MASK | 1045 AC_PDE_ACCESSED_MASK | AC_PTE_ACCESSED_MASK | 1046 AC_CPU_CR4_SMEP_MASK | 1047 AC_CPU_CR0_WP_MASK | 1048 AC_ACCESS_WRITE_MASK; 1049 ac_test_setup_ptes(&at1); 1050 1051 /* 1052 * Here we write the ro user page when 1053 * cr0.wp=0, then we execute it and SMEP 1054 * fault should happen. 1055 */ 1056 err_prepare_andnot_wp = ac_test_do_access(&at1); 1057 if (!err_prepare_andnot_wp) { 1058 printf("%s: SMEP prepare fail\n", __FUNCTION__); 1059 goto clean_up; 1060 } 1061 1062 at1.flags &= ~AC_ACCESS_WRITE_MASK; 1063 at1.flags |= AC_ACCESS_FETCH_MASK; 1064 ac_set_expected_status(&at1); 1065 err_smep_andnot_wp = ac_test_do_access(&at1); 1066 1067 clean_up: 1068 set_cr4_smep(&at1, 0); 1069 1070 if (!err_prepare_andnot_wp) 1071 goto err; 1072 if (!err_smep_andnot_wp) { 1073 printf("%s: check SMEP without wp fail\n", __FUNCTION__); 1074 goto err; 1075 } 1076 return 1; 1077 1078 err: 1079 return 0; 1080 } 1081 1082 #define TOGGLE_CR0_WP_TEST_BASE_FLAGS \ 1083 (AC_PDE_PRESENT_MASK | AC_PDE_ACCESSED_MASK | \ 1084 AC_PTE_PRESENT_MASK | AC_PTE_ACCESSED_MASK | \ 1085 AC_ACCESS_WRITE_MASK) 1086 1087 static int do_cr0_wp_access(ac_test_t *at, int flags) 1088 { 1089 const bool cr0_wp = !!(flags & AC_CPU_CR0_WP_MASK); 1090 1091 at->flags = TOGGLE_CR0_WP_TEST_BASE_FLAGS | flags; 1092 __ac_set_expected_status(at, false); 1093 1094 /* 1095 * Under VMX the guest might own the CR0.WP bit, requiring KVM to 1096 * manually keep track of it where needed, e.g. in the guest page 1097 * table walker. 1098 * 1099 * Load CR0.WP with the inverse value of what will be used during 1100 * the access test and toggle EFER.NX to coerce KVM into rebuilding 1101 * the current MMU context based on the soon-to-be-stale CR0.WP. 1102 */ 1103 set_cr0_wp(!cr0_wp); 1104 set_efer_nx(1); 1105 set_efer_nx(0); 1106 1107 if (!ac_test_do_access(at)) { 1108 printf("%s: %ssupervisor write with CR0.WP=%d did not %s\n", 1109 __FUNCTION__, (flags & AC_FEP_MASK) ? "emulated " : "", 1110 cr0_wp, cr0_wp ? "FAULT" : "SUCCEED"); 1111 return 1; 1112 } 1113 1114 return 0; 1115 } 1116 1117 static int check_toggle_cr0_wp(ac_pt_env_t *pt_env) 1118 { 1119 ac_test_t at; 1120 int err = 0; 1121 1122 ac_test_init(&at, 0xffff923042007000ul, pt_env); 1123 at.flags = TOGGLE_CR0_WP_TEST_BASE_FLAGS; 1124 ac_test_setup_ptes(&at); 1125 1126 err += do_cr0_wp_access(&at, 0); 1127 err += do_cr0_wp_access(&at, AC_CPU_CR0_WP_MASK); 1128 if (!(invalid_mask & AC_FEP_MASK)) { 1129 err += do_cr0_wp_access(&at, AC_FEP_MASK); 1130 err += do_cr0_wp_access(&at, AC_FEP_MASK | AC_CPU_CR0_WP_MASK); 1131 } 1132 1133 return err == 0; 1134 } 1135 1136 static int check_effective_sp_permissions(ac_pt_env_t *pt_env) 1137 { 1138 unsigned long ptr1 = 0xffff923480000000; 1139 unsigned long ptr2 = ptr1 + SZ_2M; 1140 unsigned long ptr3 = ptr1 + SZ_1G; 1141 unsigned long ptr4 = ptr3 + SZ_2M; 1142 ac_test_t at1, at2, at3, at4; 1143 int err_read_at1, err_write_at2; 1144 int err_read_at3, err_write_at4; 1145 1146 /* 1147 * pgd[] pud[] pmd[] virtual address pointers 1148 * /->pmd(u--)->pte1(uw-)->page1 <- ptr1 (u--) 1149 * /->pud1(uw-)--->pmd(uw-)->pte2(uw-)->page2 <- ptr2 (uw-) 1150 * pgd-| 1151 * \->pud2(u--)--->pmd(u--)->pte1(uw-)->page1 <- ptr3 (u--) 1152 * \->pmd(uw-)->pte2(uw-)->page2 <- ptr4 (u--) 1153 * pud1 and pud2 point to the same pmd page. 1154 */ 1155 1156 ac_test_init(&at1, ptr1, pt_env); 1157 at1.flags = AC_PDE_PRESENT_MASK | AC_PTE_PRESENT_MASK | 1158 AC_PDE_USER_MASK | AC_PTE_USER_MASK | 1159 AC_PDE_ACCESSED_MASK | AC_PTE_ACCESSED_MASK | 1160 AC_PTE_WRITABLE_MASK | AC_ACCESS_USER_MASK; 1161 ac_test_setup_ptes(&at1); 1162 1163 __ac_test_init(&at2, ptr2, pt_env, &at1); 1164 at2.flags = at1.flags | AC_PDE_WRITABLE_MASK | AC_PTE_DIRTY_MASK | AC_ACCESS_WRITE_MASK; 1165 ac_test_setup_ptes(&at2); 1166 1167 __ac_test_init(&at3, ptr3, pt_env, &at1); 1168 /* Override the PMD (1-based index) to point at ptr1's PMD. */ 1169 at3.page_tables[3] = at1.page_tables[3]; 1170 at3.flags = AC_PDPTE_NO_WRITABLE_MASK | at1.flags; 1171 ac_test_setup_ptes(&at3); 1172 1173 /* Alias ptr2, only the PMD will differ; manually override the PMD. */ 1174 __ac_test_init(&at4, ptr4, pt_env, &at2); 1175 at4.page_tables[3] = at1.page_tables[3]; 1176 at4.flags = AC_PDPTE_NO_WRITABLE_MASK | at2.flags; 1177 ac_test_setup_ptes(&at4); 1178 1179 err_read_at1 = ac_test_do_access(&at1); 1180 if (!err_read_at1) { 1181 printf("%s: read access at1 fail\n", __FUNCTION__); 1182 return 0; 1183 } 1184 1185 err_write_at2 = ac_test_do_access(&at2); 1186 if (!err_write_at2) { 1187 printf("%s: write access at2 fail\n", __FUNCTION__); 1188 return 0; 1189 } 1190 1191 err_read_at3 = ac_test_do_access(&at3); 1192 if (!err_read_at3) { 1193 printf("%s: read access at3 fail\n", __FUNCTION__); 1194 return 0; 1195 } 1196 1197 err_write_at4 = ac_test_do_access(&at4); 1198 if (!err_write_at4) { 1199 printf("%s: write access at4 should fail\n", __FUNCTION__); 1200 return 0; 1201 } 1202 1203 return 1; 1204 } 1205 1206 static int ac_test_exec(ac_test_t *at, ac_pt_env_t *pt_env) 1207 { 1208 int r; 1209 1210 if (verbose) { 1211 ac_test_show(at); 1212 } 1213 ac_test_setup_ptes(at); 1214 r = ac_test_do_access(at); 1215 return r; 1216 } 1217 1218 typedef int (*ac_test_fn)(ac_pt_env_t *pt_env); 1219 const ac_test_fn ac_test_cases[] = 1220 { 1221 corrupt_hugepage_trigger, 1222 check_pfec_on_prefetch_pte, 1223 check_large_pte_dirty_for_nowp, 1224 check_smep_andnot_wp, 1225 check_toggle_cr0_wp, 1226 check_effective_sp_permissions, 1227 }; 1228 1229 void ac_test_run(int pt_levels, bool force_emulation) 1230 { 1231 ac_test_t at; 1232 ac_pt_env_t pt_env; 1233 int i, tests, successes; 1234 1235 if (force_emulation && !is_fep_available()) { 1236 report_skip("Forced emulation prefix (FEP) not available\n"); 1237 return; 1238 } 1239 1240 printf("run\n"); 1241 tests = successes = 0; 1242 1243 shadow_cr0 = read_cr0(); 1244 shadow_cr4 = read_cr4(); 1245 shadow_cr3 = read_cr3(); 1246 shadow_efer = rdmsr(MSR_EFER); 1247 1248 if (cpuid_maxphyaddr() >= 52) { 1249 invalid_mask |= AC_PDE_BIT51_MASK; 1250 invalid_mask |= AC_PTE_BIT51_MASK; 1251 } 1252 if (cpuid_maxphyaddr() >= 37) { 1253 invalid_mask |= AC_PDE_BIT36_MASK; 1254 invalid_mask |= AC_PTE_BIT36_MASK; 1255 } 1256 1257 if (!force_emulation) 1258 invalid_mask |= AC_FEP_MASK; 1259 1260 ac_env_int(&pt_env, pt_levels); 1261 ac_test_init(&at, 0xffff923400000000ul, &pt_env); 1262 1263 if (this_cpu_has(X86_FEATURE_PKU)) { 1264 set_cr4_pke(1); 1265 set_cr4_pke(0); 1266 /* Now PKRU = 0xFFFFFFFF. */ 1267 } else { 1268 tests++; 1269 if (write_cr4_safe(shadow_cr4 | X86_CR4_PKE) == GP_VECTOR) { 1270 successes++; 1271 invalid_mask |= AC_PKU_AD_MASK; 1272 invalid_mask |= AC_PKU_WD_MASK; 1273 invalid_mask |= AC_PKU_PKEY_MASK; 1274 invalid_mask |= AC_CPU_CR4_PKE_MASK; 1275 printf("CR4.PKE not available, disabling PKE tests\n"); 1276 } else { 1277 printf("Set PKE in CR4 - expect #GP: FAIL!\n"); 1278 set_cr4_pke(0); 1279 } 1280 } 1281 1282 if (!this_cpu_has(X86_FEATURE_SMEP)) { 1283 tests++; 1284 if (set_cr4_smep(&at, 1) == GP_VECTOR) { 1285 successes++; 1286 invalid_mask |= AC_CPU_CR4_SMEP_MASK; 1287 printf("CR4.SMEP not available, disabling SMEP tests\n"); 1288 } else { 1289 printf("Set SMEP in CR4 - expect #GP: FAIL!\n"); 1290 set_cr4_smep(&at, 0); 1291 } 1292 } 1293 1294 /* Toggling LA57 in 64-bit mode (guaranteed for this test) is illegal. */ 1295 if (this_cpu_has(X86_FEATURE_LA57)) { 1296 tests++; 1297 if (write_cr4_safe(shadow_cr4 ^ X86_CR4_LA57) == GP_VECTOR) 1298 successes++; 1299 1300 /* Force a VM-Exit on KVM, which doesn't intercept LA57 itself. */ 1301 tests++; 1302 if (write_cr4_safe(shadow_cr4 ^ (X86_CR4_LA57 | X86_CR4_PSE)) == GP_VECTOR) 1303 successes++; 1304 } 1305 1306 do { 1307 ++tests; 1308 successes += ac_test_exec(&at, &pt_env); 1309 } while (ac_test_bump(&at)); 1310 1311 for (i = 0; i < ARRAY_SIZE(ac_test_cases); i++) { 1312 ac_env_int(&pt_env, pt_levels); 1313 1314 ++tests; 1315 successes += ac_test_cases[i](&pt_env); 1316 } 1317 1318 printf("\n%d tests, %d failures\n", tests, tests - successes); 1319 1320 report(successes == tests, "%d-level paging tests%s", pt_levels, 1321 force_emulation ? " (with forced emulation)" : ""); 1322 } 1323