1 #include "libcflat.h" 2 #include "desc.h" 3 #include "processor.h" 4 #include "asm/page.h" 5 #include "x86/vm.h" 6 #include "access.h" 7 8 #define true 1 9 #define false 0 10 11 static _Bool verbose = false; 12 13 typedef unsigned long pt_element_t; 14 static int invalid_mask; 15 16 /* Test code/data is at 32MiB, paging structures at 33MiB. */ 17 #define AT_CODE_DATA_PHYS 32 * 1024 * 1024 18 #define AT_PAGING_STRUCTURES_PHYS 33 * 1024 * 1024 19 20 #define PT_BASE_ADDR_MASK ((pt_element_t)((((pt_element_t)1 << 36) - 1) & PAGE_MASK)) 21 #define PT_PSE_BASE_ADDR_MASK (PT_BASE_ADDR_MASK & ~(1ull << 21)) 22 23 #define PFERR_PRESENT_MASK (1U << 0) 24 #define PFERR_WRITE_MASK (1U << 1) 25 #define PFERR_USER_MASK (1U << 2) 26 #define PFERR_RESERVED_MASK (1U << 3) 27 #define PFERR_FETCH_MASK (1U << 4) 28 #define PFERR_PK_MASK (1U << 5) 29 30 #define MSR_EFER 0xc0000080 31 #define EFER_NX_MASK (1ull << 11) 32 33 #define PT_INDEX(address, level) \ 34 (((address) >> (12 + ((level)-1) * 9)) & 511) 35 36 /* 37 * Page table access check tests. Each number/bit represent an individual 38 * test case. The main test will bump a counter by 1 to run all permutations 39 * of the below test cases (sans illegal combinations). 40 * 41 * Keep the PRESENT and reserved bits in the higher numbers so that they aren't 42 * toggled on every test, e.g. to keep entries in the TLB. 43 */ 44 enum { 45 AC_PTE_WRITABLE_BIT, 46 AC_PTE_USER_BIT, 47 AC_PTE_ACCESSED_BIT, 48 AC_PTE_DIRTY_BIT, 49 AC_PTE_NX_BIT, 50 AC_PTE_PRESENT_BIT, 51 AC_PTE_BIT51_BIT, 52 AC_PTE_BIT36_BIT, 53 54 AC_PDE_WRITABLE_BIT, 55 AC_PDE_USER_BIT, 56 AC_PDE_ACCESSED_BIT, 57 AC_PDE_DIRTY_BIT, 58 AC_PDE_PSE_BIT, 59 AC_PDE_NX_BIT, 60 AC_PDE_PRESENT_BIT, 61 AC_PDE_BIT51_BIT, 62 AC_PDE_BIT36_BIT, 63 AC_PDE_BIT13_BIT, 64 65 /* 66 * special test case to DISABLE writable bit on page directory 67 * pointer table entry. 68 */ 69 AC_PDPTE_NO_WRITABLE_BIT, 70 71 AC_PKU_AD_BIT, 72 AC_PKU_WD_BIT, 73 AC_PKU_PKEY_BIT, 74 75 AC_ACCESS_USER_BIT, 76 AC_ACCESS_WRITE_BIT, 77 AC_ACCESS_FETCH_BIT, 78 AC_ACCESS_TWICE_BIT, 79 80 AC_CPU_EFER_NX_BIT, 81 AC_CPU_CR0_WP_BIT, 82 AC_CPU_CR4_SMEP_BIT, 83 AC_CPU_CR4_PKE_BIT, 84 85 AC_FEP_BIT, 86 87 NR_AC_FLAGS, 88 }; 89 90 #define AC_PTE_PRESENT_MASK (1 << AC_PTE_PRESENT_BIT) 91 #define AC_PTE_WRITABLE_MASK (1 << AC_PTE_WRITABLE_BIT) 92 #define AC_PTE_USER_MASK (1 << AC_PTE_USER_BIT) 93 #define AC_PTE_ACCESSED_MASK (1 << AC_PTE_ACCESSED_BIT) 94 #define AC_PTE_DIRTY_MASK (1 << AC_PTE_DIRTY_BIT) 95 #define AC_PTE_NX_MASK (1 << AC_PTE_NX_BIT) 96 #define AC_PTE_BIT51_MASK (1 << AC_PTE_BIT51_BIT) 97 #define AC_PTE_BIT36_MASK (1 << AC_PTE_BIT36_BIT) 98 99 #define AC_PDE_PRESENT_MASK (1 << AC_PDE_PRESENT_BIT) 100 #define AC_PDE_WRITABLE_MASK (1 << AC_PDE_WRITABLE_BIT) 101 #define AC_PDE_USER_MASK (1 << AC_PDE_USER_BIT) 102 #define AC_PDE_ACCESSED_MASK (1 << AC_PDE_ACCESSED_BIT) 103 #define AC_PDE_DIRTY_MASK (1 << AC_PDE_DIRTY_BIT) 104 #define AC_PDE_PSE_MASK (1 << AC_PDE_PSE_BIT) 105 #define AC_PDE_NX_MASK (1 << AC_PDE_NX_BIT) 106 #define AC_PDE_BIT51_MASK (1 << AC_PDE_BIT51_BIT) 107 #define AC_PDE_BIT36_MASK (1 << AC_PDE_BIT36_BIT) 108 #define AC_PDE_BIT13_MASK (1 << AC_PDE_BIT13_BIT) 109 110 #define AC_PDPTE_NO_WRITABLE_MASK (1 << AC_PDPTE_NO_WRITABLE_BIT) 111 112 #define AC_PKU_AD_MASK (1 << AC_PKU_AD_BIT) 113 #define AC_PKU_WD_MASK (1 << AC_PKU_WD_BIT) 114 #define AC_PKU_PKEY_MASK (1 << AC_PKU_PKEY_BIT) 115 116 #define AC_ACCESS_USER_MASK (1 << AC_ACCESS_USER_BIT) 117 #define AC_ACCESS_WRITE_MASK (1 << AC_ACCESS_WRITE_BIT) 118 #define AC_ACCESS_FETCH_MASK (1 << AC_ACCESS_FETCH_BIT) 119 #define AC_ACCESS_TWICE_MASK (1 << AC_ACCESS_TWICE_BIT) 120 121 #define AC_CPU_EFER_NX_MASK (1 << AC_CPU_EFER_NX_BIT) 122 #define AC_CPU_CR0_WP_MASK (1 << AC_CPU_CR0_WP_BIT) 123 #define AC_CPU_CR4_SMEP_MASK (1 << AC_CPU_CR4_SMEP_BIT) 124 #define AC_CPU_CR4_PKE_MASK (1 << AC_CPU_CR4_PKE_BIT) 125 126 #define AC_FEP_MASK (1 << AC_FEP_BIT) 127 128 const char *ac_names[] = { 129 [AC_PTE_PRESENT_BIT] = "pte.p", 130 [AC_PTE_ACCESSED_BIT] = "pte.a", 131 [AC_PTE_WRITABLE_BIT] = "pte.rw", 132 [AC_PTE_USER_BIT] = "pte.user", 133 [AC_PTE_DIRTY_BIT] = "pte.d", 134 [AC_PTE_NX_BIT] = "pte.nx", 135 [AC_PTE_BIT51_BIT] = "pte.51", 136 [AC_PTE_BIT36_BIT] = "pte.36", 137 [AC_PDE_PRESENT_BIT] = "pde.p", 138 [AC_PDE_ACCESSED_BIT] = "pde.a", 139 [AC_PDE_WRITABLE_BIT] = "pde.rw", 140 [AC_PDE_USER_BIT] = "pde.user", 141 [AC_PDE_DIRTY_BIT] = "pde.d", 142 [AC_PDE_PSE_BIT] = "pde.pse", 143 [AC_PDE_NX_BIT] = "pde.nx", 144 [AC_PDE_BIT51_BIT] = "pde.51", 145 [AC_PDE_BIT36_BIT] = "pde.36", 146 [AC_PDE_BIT13_BIT] = "pde.13", 147 [AC_PDPTE_NO_WRITABLE_BIT] = "pdpte.ro", 148 [AC_PKU_AD_BIT] = "pkru.ad", 149 [AC_PKU_WD_BIT] = "pkru.wd", 150 [AC_PKU_PKEY_BIT] = "pkey=1", 151 [AC_ACCESS_WRITE_BIT] = "write", 152 [AC_ACCESS_USER_BIT] = "user", 153 [AC_ACCESS_FETCH_BIT] = "fetch", 154 [AC_ACCESS_TWICE_BIT] = "twice", 155 [AC_CPU_EFER_NX_BIT] = "efer.nx", 156 [AC_CPU_CR0_WP_BIT] = "cr0.wp", 157 [AC_CPU_CR4_SMEP_BIT] = "cr4.smep", 158 [AC_CPU_CR4_PKE_BIT] = "cr4.pke", 159 [AC_FEP_BIT] = "fep", 160 }; 161 162 static inline void *va(pt_element_t phys) 163 { 164 return (void *)phys; 165 } 166 167 typedef struct { 168 pt_element_t pt_pool_pa; 169 unsigned int pt_pool_current; 170 int pt_levels; 171 } ac_pt_env_t; 172 173 typedef struct { 174 unsigned flags; 175 void *virt; 176 pt_element_t phys; 177 pt_element_t *ptep; 178 pt_element_t expected_pte; 179 pt_element_t *pdep; 180 pt_element_t expected_pde; 181 pt_element_t ignore_pde; 182 int expected_fault; 183 unsigned expected_error; 184 int pt_levels; 185 186 /* 5-level paging, 1-based to avoid math. */ 187 pt_element_t page_tables[6]; 188 } ac_test_t; 189 190 typedef struct { 191 unsigned short limit; 192 unsigned long linear_addr; 193 } __attribute__((packed)) descriptor_table_t; 194 195 196 static void ac_test_show(ac_test_t *at); 197 198 static unsigned long shadow_cr0; 199 static unsigned long shadow_cr3; 200 static unsigned long shadow_cr4; 201 static unsigned long long shadow_efer; 202 203 typedef void (*walk_fn)(pt_element_t *ptep, int level, unsigned long virt); 204 205 /* Returns the size of the range covered by the last processed entry. */ 206 static unsigned long walk_va(ac_test_t *at, int min_level, unsigned long virt, 207 walk_fn callback, bool leaf_only) 208 { 209 unsigned long parent_pte = shadow_cr3; 210 int i; 211 212 for (i = at->pt_levels; i >= min_level; --i) { 213 pt_element_t *parent_pt = va(parent_pte & PT_BASE_ADDR_MASK); 214 unsigned int index = PT_INDEX(virt, i); 215 pt_element_t *ptep = &parent_pt[index]; 216 217 assert(!leaf_only || (*ptep & PT_PRESENT_MASK)); 218 219 if (!leaf_only || i == 1 || (*ptep & PT_PAGE_SIZE_MASK)) 220 callback(ptep, i, virt); 221 222 if (i == 1 || *ptep & PT_PAGE_SIZE_MASK) 223 break; 224 225 parent_pte = *ptep; 226 } 227 228 return 1ul << PGDIR_BITS(i); 229 } 230 231 static void walk_ptes(ac_test_t *at, unsigned long virt, unsigned long end, 232 walk_fn callback) 233 { 234 unsigned long page_size; 235 236 for ( ; virt < end; virt = ALIGN_DOWN(virt + page_size, page_size)) 237 page_size = walk_va(at, 1, virt, callback, true); 238 } 239 240 static void set_cr0_wp(int wp) 241 { 242 unsigned long cr0 = shadow_cr0; 243 244 cr0 &= ~X86_CR0_WP; 245 if (wp) 246 cr0 |= X86_CR0_WP; 247 if (cr0 != shadow_cr0) { 248 write_cr0(cr0); 249 shadow_cr0 = cr0; 250 } 251 } 252 253 static void clear_user_mask(pt_element_t *ptep, int level, unsigned long virt) 254 { 255 *ptep &= ~PT_USER_MASK; 256 257 /* Flush to avoid spurious #PF */ 258 invlpg((void*)virt); 259 } 260 261 static void set_user_mask(pt_element_t *ptep, int level, unsigned long virt) 262 { 263 *ptep |= PT_USER_MASK; 264 265 /* Flush to avoid spurious #PF */ 266 invlpg((void*)virt); 267 } 268 269 static unsigned set_cr4_smep(ac_test_t *at, int smep) 270 { 271 extern char stext, etext; 272 unsigned long code_start = (unsigned long)&stext; 273 unsigned long code_end = (unsigned long)&etext; 274 unsigned long cr4 = shadow_cr4; 275 unsigned r; 276 277 cr4 &= ~X86_CR4_SMEP; 278 if (smep) 279 cr4 |= X86_CR4_SMEP; 280 if (cr4 == shadow_cr4) 281 return 0; 282 283 if (smep) 284 walk_ptes(at, code_start, code_end, clear_user_mask); 285 r = write_cr4_safe(cr4); 286 if (r || !smep) 287 walk_ptes(at, code_start, code_end, set_user_mask); 288 if (!r) 289 shadow_cr4 = cr4; 290 return r; 291 } 292 293 static void set_cr4_pke(int pke) 294 { 295 unsigned long cr4 = shadow_cr4; 296 297 cr4 &= ~X86_CR4_PKE; 298 if (pke) 299 cr4 |= X86_CR4_PKE; 300 if (cr4 == shadow_cr4) 301 return; 302 303 /* Check that protection keys do not affect accesses when CR4.PKE=0. */ 304 if ((shadow_cr4 & X86_CR4_PKE) && !pke) 305 write_pkru(0xfffffffc); 306 write_cr4(cr4); 307 shadow_cr4 = cr4; 308 } 309 310 static void set_efer_nx(int nx) 311 { 312 unsigned long long efer = shadow_efer; 313 314 efer &= ~EFER_NX_MASK; 315 if (nx) 316 efer |= EFER_NX_MASK; 317 if (efer != shadow_efer) { 318 wrmsr(MSR_EFER, efer); 319 shadow_efer = efer; 320 } 321 } 322 323 static void ac_env_int(ac_pt_env_t *pt_env, int page_table_levels) 324 { 325 extern char page_fault, kernel_entry; 326 set_idt_entry(14, &page_fault, 0); 327 set_idt_entry(0x20, &kernel_entry, 3); 328 329 pt_env->pt_pool_pa = AT_PAGING_STRUCTURES_PHYS; 330 pt_env->pt_pool_current = 0; 331 pt_env->pt_levels = page_table_levels; 332 } 333 334 static pt_element_t ac_test_alloc_pt(ac_pt_env_t *pt_env) 335 { 336 pt_element_t pt; 337 338 /* 339 * Each test needs at most pt_levels-1 structures per virtual address, 340 * and no existing scenario uses more than four addresses. 341 */ 342 assert(pt_env->pt_pool_current < (4 * (pt_env->pt_levels - 1))); 343 344 pt = pt_env->pt_pool_pa + (pt_env->pt_pool_current * PAGE_SIZE); 345 pt_env->pt_pool_current++; 346 memset(va(pt), 0, PAGE_SIZE); 347 return pt; 348 } 349 350 static void __ac_test_init(ac_test_t *at, unsigned long virt, 351 ac_pt_env_t *pt_env, ac_test_t *buddy) 352 { 353 unsigned long buddy_virt = buddy ? (unsigned long)buddy->virt : 0; 354 pt_element_t *root_pt = va(shadow_cr3 & PT_BASE_ADDR_MASK); 355 int i; 356 357 /* 358 * The test infrastructure, e.g. this function, must use a different 359 * top-level SPTE than the test, otherwise modifying SPTEs can affect 360 * normal behavior, e.g. crash the test due to marking code SPTEs 361 * USER when CR4.SMEP=1. 362 */ 363 assert(PT_INDEX(virt, pt_env->pt_levels) != 364 PT_INDEX((unsigned long)__ac_test_init, pt_env->pt_levels)); 365 366 set_efer_nx(1); 367 set_cr0_wp(1); 368 at->flags = 0; 369 at->virt = (void *)virt; 370 at->phys = AT_CODE_DATA_PHYS; 371 at->pt_levels = pt_env->pt_levels; 372 373 at->page_tables[0] = -1ull; 374 at->page_tables[1] = -1ull; 375 376 /* 377 * Zap the existing top-level PTE as it may be reused from a previous 378 * sub-test. This allows runtime PTE modification to assert that two 379 * overlapping walks don't try to install different paging structures. 380 */ 381 root_pt[PT_INDEX(virt, pt_env->pt_levels)] = 0; 382 383 for (i = at->pt_levels; i > 1; i--) { 384 /* 385 * Buddies can reuse any part of the walk that share the same 386 * index. This is weird, but intentional, as several tests 387 * want different walks to merge at lower levels. 388 */ 389 if (buddy && PT_INDEX(virt, i) == PT_INDEX(buddy_virt, i)) 390 at->page_tables[i] = buddy->page_tables[i]; 391 else 392 at->page_tables[i] = ac_test_alloc_pt(pt_env); 393 } 394 } 395 396 static void ac_test_init(ac_test_t *at, unsigned long virt, ac_pt_env_t *pt_env) 397 { 398 __ac_test_init(at, virt, pt_env, NULL); 399 } 400 401 static int ac_test_bump_one(ac_test_t *at) 402 { 403 at->flags = ((at->flags | invalid_mask) + 1) & ~invalid_mask; 404 return at->flags < (1 << NR_AC_FLAGS); 405 } 406 407 #define F(x) ((flags & x##_MASK) != 0) 408 409 static _Bool ac_test_legal(ac_test_t *at) 410 { 411 int flags = at->flags; 412 unsigned reserved; 413 414 if (F(AC_CPU_CR4_SMEP)) 415 return false; 416 417 if (F(AC_ACCESS_FETCH) && F(AC_ACCESS_WRITE)) 418 return false; 419 420 /* 421 * Since we convert current page to kernel page when cr4.smep=1, 422 * we can't switch to user mode. 423 */ 424 if (F(AC_ACCESS_USER) && F(AC_CPU_CR4_SMEP)) 425 return false; 426 427 /* 428 * Only test protection key faults if CR4.PKE=1. 429 */ 430 if (!F(AC_CPU_CR4_PKE) && 431 (F(AC_PKU_AD) || F(AC_PKU_WD))) { 432 return false; 433 } 434 435 /* 436 * pde.bit13 checks handling of reserved bits in largepage PDEs. It is 437 * meaningless if there is a PTE. 438 */ 439 if (!F(AC_PDE_PSE) && F(AC_PDE_BIT13)) 440 return false; 441 442 /* 443 * Shorten the test by avoiding testing too many reserved bit combinations. 444 * Skip testing multiple reserved bits to shorten the test. Reserved bit 445 * page faults are terminal and multiple reserved bits do not affect the 446 * error code; the odds of a KVM bug are super low, and the odds of actually 447 * being able to detect a bug are even lower. 448 */ 449 reserved = (AC_PDE_BIT51_MASK | AC_PDE_BIT36_MASK | AC_PDE_BIT13_MASK | 450 AC_PTE_BIT51_MASK | AC_PTE_BIT36_MASK); 451 if (!F(AC_CPU_EFER_NX)) 452 reserved |= AC_PDE_NX_MASK | AC_PTE_NX_MASK; 453 454 /* Only test one reserved bit at a time. */ 455 reserved &= flags; 456 if (reserved & (reserved - 1)) 457 return false; 458 459 return true; 460 } 461 462 static int ac_test_bump(ac_test_t *at) 463 { 464 int ret; 465 466 do { 467 ret = ac_test_bump_one(at); 468 } while (ret && !ac_test_legal(at)); 469 470 return ret; 471 } 472 473 static pt_element_t ac_test_permissions(ac_test_t *at, unsigned flags, 474 bool writable, bool user, 475 bool executable) 476 { 477 bool kwritable = !F(AC_CPU_CR0_WP) && !F(AC_ACCESS_USER); 478 pt_element_t expected = 0; 479 480 if (F(AC_ACCESS_USER) && !user) 481 at->expected_fault = 1; 482 483 if (F(AC_ACCESS_WRITE) && !writable && !kwritable) 484 at->expected_fault = 1; 485 486 if (F(AC_ACCESS_FETCH) && !executable) 487 at->expected_fault = 1; 488 489 if (F(AC_ACCESS_FETCH) && user && F(AC_CPU_CR4_SMEP)) 490 at->expected_fault = 1; 491 492 if (user && !F(AC_ACCESS_FETCH) && F(AC_PKU_PKEY) && F(AC_CPU_CR4_PKE)) { 493 if (F(AC_PKU_AD)) { 494 at->expected_fault = 1; 495 at->expected_error |= PFERR_PK_MASK; 496 } else if (F(AC_ACCESS_WRITE) && F(AC_PKU_WD) && !kwritable) { 497 at->expected_fault = 1; 498 at->expected_error |= PFERR_PK_MASK; 499 } 500 } 501 502 if (!at->expected_fault) { 503 expected |= PT_ACCESSED_MASK; 504 if (F(AC_ACCESS_WRITE)) 505 expected |= PT_DIRTY_MASK; 506 } 507 508 return expected; 509 } 510 511 static void ac_emulate_access(ac_test_t *at, unsigned flags) 512 { 513 bool pde_valid, pte_valid; 514 bool user, writable, executable; 515 516 if (F(AC_ACCESS_USER)) 517 at->expected_error |= PFERR_USER_MASK; 518 519 if (F(AC_ACCESS_WRITE)) 520 at->expected_error |= PFERR_WRITE_MASK; 521 522 if (F(AC_ACCESS_FETCH)) 523 at->expected_error |= PFERR_FETCH_MASK; 524 525 if (!F(AC_PDE_ACCESSED)) 526 at->ignore_pde = PT_ACCESSED_MASK; 527 528 pde_valid = F(AC_PDE_PRESENT) 529 && !F(AC_PDE_BIT51) && !F(AC_PDE_BIT36) && !F(AC_PDE_BIT13) 530 && !(F(AC_PDE_NX) && !F(AC_CPU_EFER_NX)); 531 532 if (!pde_valid) { 533 at->expected_fault = 1; 534 if (F(AC_PDE_PRESENT)) { 535 at->expected_error |= PFERR_RESERVED_MASK; 536 } else { 537 at->expected_error &= ~PFERR_PRESENT_MASK; 538 } 539 goto fault; 540 } 541 542 writable = !F(AC_PDPTE_NO_WRITABLE) && F(AC_PDE_WRITABLE); 543 user = F(AC_PDE_USER); 544 executable = !F(AC_PDE_NX); 545 546 if (F(AC_PDE_PSE)) { 547 at->expected_pde |= ac_test_permissions(at, flags, writable, 548 user, executable); 549 goto no_pte; 550 } 551 552 at->expected_pde |= PT_ACCESSED_MASK; 553 554 pte_valid = F(AC_PTE_PRESENT) 555 && !F(AC_PTE_BIT51) && !F(AC_PTE_BIT36) 556 && !(F(AC_PTE_NX) && !F(AC_CPU_EFER_NX)); 557 558 if (!pte_valid) { 559 at->expected_fault = 1; 560 if (F(AC_PTE_PRESENT)) { 561 at->expected_error |= PFERR_RESERVED_MASK; 562 } else { 563 at->expected_error &= ~PFERR_PRESENT_MASK; 564 } 565 goto fault; 566 } 567 568 writable &= F(AC_PTE_WRITABLE); 569 user &= F(AC_PTE_USER); 570 executable &= !F(AC_PTE_NX); 571 572 at->expected_pte |= ac_test_permissions(at, flags, writable, user, 573 executable); 574 575 no_pte: 576 fault: 577 if (!at->expected_fault) 578 at->ignore_pde = 0; 579 if (!F(AC_CPU_EFER_NX) && !F(AC_CPU_CR4_SMEP)) 580 at->expected_error &= ~PFERR_FETCH_MASK; 581 } 582 583 static void __ac_set_expected_status(ac_test_t *at, bool flush) 584 { 585 if (flush) 586 invlpg(at->virt); 587 588 if (at->ptep) 589 at->expected_pte = *at->ptep; 590 at->expected_pde = *at->pdep; 591 at->ignore_pde = 0; 592 at->expected_fault = 0; 593 at->expected_error = PFERR_PRESENT_MASK; 594 595 if (at->flags & AC_ACCESS_TWICE_MASK) { 596 ac_emulate_access(at, at->flags & 597 ~AC_ACCESS_WRITE_MASK & 598 ~AC_ACCESS_FETCH_MASK & 599 ~AC_ACCESS_USER_MASK); 600 at->expected_fault = 0; 601 at->expected_error = PFERR_PRESENT_MASK; 602 at->ignore_pde = 0; 603 } 604 605 ac_emulate_access(at, at->flags); 606 } 607 608 static void ac_set_expected_status(ac_test_t *at) 609 { 610 __ac_set_expected_status(at, true); 611 } 612 613 static pt_element_t ac_get_pt(ac_test_t *at, int i, pt_element_t *ptep) 614 { 615 pt_element_t pte; 616 617 pte = *ptep; 618 if (pte && !(pte & PT_PAGE_SIZE_MASK) && 619 (pte & PT_BASE_ADDR_MASK) != at->page_tables[i]) { 620 printf("\nPT collision. VA = 0x%lx, level = %d, index = %ld, found PT = 0x%lx, want PT = 0x%lx\n", 621 (unsigned long)at->virt, i, 622 PT_INDEX((unsigned long)at->virt, i), 623 pte, at->page_tables[i]); 624 abort(); 625 } 626 627 /* 628 * Preserve A/D bits to avoid writing upper level PTEs, 629 * which cannot be unsyc'd when KVM uses shadow paging. 630 */ 631 pte = at->page_tables[i] | (pte & (PT_DIRTY_MASK | PT_ACCESSED_MASK)); 632 return pte; 633 } 634 635 static void ac_test_setup_ptes(ac_test_t *at) 636 { 637 unsigned long parent_pte = shadow_cr3; 638 int flags = at->flags; 639 int i; 640 641 at->ptep = 0; 642 for (i = at->pt_levels; i >= 1 && (i >= 2 || !F(AC_PDE_PSE)); --i) { 643 pt_element_t *parent_pt = va(parent_pte & PT_BASE_ADDR_MASK); 644 unsigned index = PT_INDEX((unsigned long)at->virt, i); 645 pt_element_t *ptep = &parent_pt[index]; 646 pt_element_t pte; 647 648 switch (i) { 649 case 5: 650 case 4: 651 pte = ac_get_pt(at, i, ptep); 652 pte |= PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK; 653 break; 654 case 3: 655 pte = ac_get_pt(at, i, ptep); 656 pte |= PT_PRESENT_MASK | PT_USER_MASK; 657 if (!F(AC_PDPTE_NO_WRITABLE)) 658 pte |= PT_WRITABLE_MASK; 659 break; 660 case 2: 661 if (!F(AC_PDE_PSE)) { 662 pte = ac_get_pt(at, i, ptep); 663 664 /* The protection key is ignored on non-leaf entries. */ 665 if (F(AC_PKU_PKEY)) 666 pte |= 2ull << 59; 667 } else { 668 pte = at->phys & PT_PSE_BASE_ADDR_MASK; 669 pte |= PT_PAGE_SIZE_MASK; 670 if (F(AC_PKU_PKEY)) 671 pte |= 1ull << 59; 672 } 673 if (F(AC_PDE_PRESENT)) 674 pte |= PT_PRESENT_MASK; 675 if (F(AC_PDE_WRITABLE)) 676 pte |= PT_WRITABLE_MASK; 677 if (F(AC_PDE_USER)) 678 pte |= PT_USER_MASK; 679 if (F(AC_PDE_ACCESSED)) 680 pte |= PT_ACCESSED_MASK; 681 if (F(AC_PDE_DIRTY)) 682 pte |= PT_DIRTY_MASK; 683 if (F(AC_PDE_NX)) 684 pte |= PT64_NX_MASK; 685 if (F(AC_PDE_BIT51)) 686 pte |= 1ull << 51; 687 if (F(AC_PDE_BIT36)) 688 pte |= 1ull << 36; 689 if (F(AC_PDE_BIT13)) 690 pte |= 1ull << 13; 691 at->pdep = ptep; 692 break; 693 case 1: 694 pte = at->phys & PT_BASE_ADDR_MASK; 695 if (F(AC_PKU_PKEY)) 696 pte |= 1ull << 59; 697 if (F(AC_PTE_PRESENT)) 698 pte |= PT_PRESENT_MASK; 699 if (F(AC_PTE_WRITABLE)) 700 pte |= PT_WRITABLE_MASK; 701 if (F(AC_PTE_USER)) 702 pte |= PT_USER_MASK; 703 if (F(AC_PTE_ACCESSED)) 704 pte |= PT_ACCESSED_MASK; 705 if (F(AC_PTE_DIRTY)) 706 pte |= PT_DIRTY_MASK; 707 if (F(AC_PTE_NX)) 708 pte |= PT64_NX_MASK; 709 if (F(AC_PTE_BIT51)) 710 pte |= 1ull << 51; 711 if (F(AC_PTE_BIT36)) 712 pte |= 1ull << 36; 713 at->ptep = ptep; 714 break; 715 default: 716 assert(0); 717 } 718 719 if (pte != *ptep) 720 *ptep = pte; 721 722 parent_pte = pte; 723 } 724 ac_set_expected_status(at); 725 } 726 727 static void __dump_pte(pt_element_t *ptep, int level, unsigned long virt) 728 { 729 printf("------L%d I%lu: %lx\n", level, PT_INDEX(virt, level), *ptep); 730 } 731 732 static void dump_mapping(ac_test_t *at) 733 { 734 unsigned long virt = (unsigned long)at->virt; 735 int flags = at->flags; 736 737 printf("Dump mapping: address: %p\n", at->virt); 738 walk_va(at, F(AC_PDE_PSE) ? 2 : 1, virt, __dump_pte, false); 739 } 740 741 static void ac_test_check(ac_test_t *at, _Bool *success_ret, _Bool cond, 742 const char *fmt, ...) 743 { 744 va_list ap; 745 char buf[500]; 746 747 if (!*success_ret) { 748 return; 749 } 750 751 if (!cond) { 752 return; 753 } 754 755 *success_ret = false; 756 757 if (!verbose) { 758 puts("\n"); 759 ac_test_show(at); 760 } 761 762 va_start(ap, fmt); 763 vsnprintf(buf, sizeof(buf), fmt, ap); 764 va_end(ap); 765 printf("FAIL: %s\n", buf); 766 dump_mapping(at); 767 } 768 769 static int pt_match(pt_element_t pte1, pt_element_t pte2, pt_element_t ignore) 770 { 771 pte1 &= ~ignore; 772 pte2 &= ~ignore; 773 return pte1 == pte2; 774 } 775 776 static int ac_test_do_access(ac_test_t *at) 777 { 778 static unsigned unique = 42; 779 int fault = 0; 780 unsigned e; 781 static unsigned char user_stack[4096]; 782 unsigned long rsp; 783 _Bool success = true; 784 int flags = at->flags; 785 786 ++unique; 787 if (!(unique & 65535)) { 788 puts("."); 789 } 790 791 *((unsigned char *)at->phys) = 0xc3; /* ret */ 792 793 unsigned r = unique; 794 set_cr0_wp(F(AC_CPU_CR0_WP)); 795 set_efer_nx(F(AC_CPU_EFER_NX)); 796 set_cr4_pke(F(AC_CPU_CR4_PKE)); 797 if (F(AC_CPU_CR4_PKE)) { 798 /* WD2=AD2=1, WD1=F(AC_PKU_WD), AD1=F(AC_PKU_AD) */ 799 write_pkru(0x30 | (F(AC_PKU_WD) ? 8 : 0) | 800 (F(AC_PKU_AD) ? 4 : 0)); 801 } 802 803 set_cr4_smep(at, F(AC_CPU_CR4_SMEP)); 804 805 if (F(AC_ACCESS_TWICE)) { 806 asm volatile ("mov $fixed2, %%rsi \n\t" 807 "cmp $0, %[fep] \n\t" 808 "jz 1f \n\t" 809 KVM_FEP 810 "1: mov (%[addr]), %[reg] \n\t" 811 "fixed2:" 812 : [reg]"=r"(r), [fault]"=a"(fault), "=b"(e) 813 : [addr]"r"(at->virt), [fep]"r"(F(AC_FEP)) 814 : "rsi"); 815 fault = 0; 816 } 817 818 asm volatile ("mov $fixed1, %%rsi \n\t" 819 "mov %%rsp, %[rsp0] \n\t" 820 "cmp $0, %[user] \n\t" 821 "jz do_access \n\t" 822 "push %%rax; mov %[user_ds], %%ax; mov %%ax, %%ds; pop %%rax \n\t" 823 "pushq %[user_ds] \n\t" 824 "pushq %[user_stack_top] \n\t" 825 "pushfq \n\t" 826 "pushq %[user_cs] \n\t" 827 "pushq $do_access \n\t" 828 "iretq \n" 829 "do_access: \n\t" 830 "cmp $0, %[fetch] \n\t" 831 "jnz 2f \n\t" 832 "cmp $0, %[write] \n\t" 833 "jnz 1f \n\t" 834 "cmp $0, %[fep] \n\t" 835 "jz 0f \n\t" 836 KVM_FEP 837 "0: mov (%[addr]), %[reg] \n\t" 838 "jmp done \n\t" 839 "1: cmp $0, %[fep] \n\t" 840 "jz 0f \n\t" 841 KVM_FEP 842 "0: mov %[reg], (%[addr]) \n\t" 843 "jmp done \n\t" 844 "2: call *%[addr] \n\t" 845 "done: \n" 846 "fixed1: \n" 847 "int %[kernel_entry_vector] \n\t" 848 ".section .text.entry \n\t" 849 "kernel_entry: \n\t" 850 "mov %[rsp0], %%rsp \n\t" 851 "jmp back_to_kernel \n\t" 852 ".section .text \n\t" 853 "back_to_kernel:" 854 : [reg]"+r"(r), "+a"(fault), "=b"(e), "=&d"(rsp), 855 [rsp0]"=m"(tss[0].rsp0) 856 : [addr]"r"(at->virt), 857 [write]"r"(F(AC_ACCESS_WRITE)), 858 [user]"r"(F(AC_ACCESS_USER)), 859 [fetch]"r"(F(AC_ACCESS_FETCH)), 860 [fep]"r"(F(AC_FEP)), 861 [user_ds]"i"(USER_DS), 862 [user_cs]"i"(USER_CS), 863 [user_stack_top]"r"(user_stack + sizeof user_stack), 864 [kernel_entry_vector]"i"(0x20) 865 : "rsi"); 866 867 asm volatile (".section .text.pf \n\t" 868 "page_fault: \n\t" 869 "pop %rbx \n\t" 870 "mov %rsi, (%rsp) \n\t" 871 "movl $1, %eax \n\t" 872 "iretq \n\t" 873 ".section .text"); 874 875 ac_test_check(at, &success, fault && !at->expected_fault, 876 "unexpected fault"); 877 ac_test_check(at, &success, !fault && at->expected_fault, 878 "unexpected access"); 879 ac_test_check(at, &success, fault && e != at->expected_error, 880 "error code %x expected %x", e, at->expected_error); 881 if (at->ptep) 882 ac_test_check(at, &success, *at->ptep != at->expected_pte, 883 "pte %x expected %x", *at->ptep, at->expected_pte); 884 ac_test_check(at, &success, 885 !pt_match(*at->pdep, at->expected_pde, at->ignore_pde), 886 "pde %x expected %x", *at->pdep, at->expected_pde); 887 888 if (success && verbose) { 889 if (at->expected_fault) { 890 printf("PASS (%x)\n", at->expected_error); 891 } else { 892 printf("PASS\n"); 893 } 894 } 895 return success; 896 } 897 898 static void ac_test_show(ac_test_t *at) 899 { 900 char line[5000]; 901 902 *line = 0; 903 strcat(line, "test"); 904 for (int i = 0; i < NR_AC_FLAGS; ++i) 905 if (at->flags & (1 << i)) { 906 strcat(line, " "); 907 strcat(line, ac_names[i]); 908 } 909 910 strcat(line, ": "); 911 printf("%s", line); 912 } 913 914 /* 915 * This test case is used to trigger the bug which is fixed by 916 * commit e09e90a5 in the kvm tree 917 */ 918 static int corrupt_hugepage_trigger(ac_pt_env_t *pt_env) 919 { 920 ac_test_t at1, at2; 921 922 ac_test_init(&at1, 0xffff923400000000ul, pt_env); 923 __ac_test_init(&at2, 0xffffe66600000000ul, pt_env, &at1); 924 925 at2.flags = AC_CPU_CR0_WP_MASK | AC_PDE_PSE_MASK | AC_PDE_PRESENT_MASK; 926 ac_test_setup_ptes(&at2); 927 if (!ac_test_do_access(&at2)) 928 goto err; 929 930 at1.flags = at2.flags | AC_PDE_WRITABLE_MASK; 931 ac_test_setup_ptes(&at1); 932 if (!ac_test_do_access(&at1)) 933 goto err; 934 935 at1.flags |= AC_ACCESS_WRITE_MASK; 936 ac_set_expected_status(&at1); 937 if (!ac_test_do_access(&at1)) 938 goto err; 939 940 at2.flags |= AC_ACCESS_WRITE_MASK; 941 ac_set_expected_status(&at2); 942 if (!ac_test_do_access(&at2)) 943 goto err; 944 945 return 1; 946 947 err: 948 printf("corrupt_hugepage_trigger test fail\n"); 949 return 0; 950 } 951 952 /* 953 * This test case is used to trigger the bug which is fixed by 954 * commit 3ddf6c06e13e in the kvm tree 955 */ 956 static int check_pfec_on_prefetch_pte(ac_pt_env_t *pt_env) 957 { 958 ac_test_t at1, at2; 959 960 ac_test_init(&at1, 0xffff923406001000ul, pt_env); 961 __ac_test_init(&at2, 0xffff923406003000ul, pt_env, &at1); 962 963 at1.flags = AC_PDE_PRESENT_MASK | AC_PTE_PRESENT_MASK; 964 ac_test_setup_ptes(&at1); 965 966 at2.flags = at1.flags | AC_PTE_NX_MASK; 967 ac_test_setup_ptes(&at2); 968 969 if (!ac_test_do_access(&at1)) { 970 printf("%s: prepare fail\n", __FUNCTION__); 971 goto err; 972 } 973 974 if (!ac_test_do_access(&at2)) { 975 printf("%s: check PFEC on prefetch pte path fail\n", 976 __FUNCTION__); 977 goto err; 978 } 979 980 return 1; 981 982 err: 983 return 0; 984 } 985 986 /* 987 * If the write-fault access is from supervisor and CR0.WP is not set on the 988 * vcpu, kvm will fix it by adjusting pte access - it sets the W bit on pte 989 * and clears U bit. This is the chance that kvm can change pte access from 990 * readonly to writable. 991 * 992 * Unfortunately, the pte access is the access of 'direct' shadow page table, 993 * means direct sp.role.access = pte_access, then we will create a writable 994 * spte entry on the readonly shadow page table. It will cause Dirty bit is 995 * not tracked when two guest ptes point to the same large page. Note, it 996 * does not have other impact except Dirty bit since cr0.wp is encoded into 997 * sp.role. 998 * 999 * Note: to trigger this bug, hugepage should be disabled on host. 1000 */ 1001 static int check_large_pte_dirty_for_nowp(ac_pt_env_t *pt_env) 1002 { 1003 ac_test_t at1, at2; 1004 1005 ac_test_init(&at1, 0xffff923403000000ul, pt_env); 1006 __ac_test_init(&at2, 0xffffe66606000000ul, pt_env, &at1); 1007 1008 at2.flags = AC_PDE_PRESENT_MASK | AC_PDE_PSE_MASK; 1009 ac_test_setup_ptes(&at2); 1010 if (!ac_test_do_access(&at2)) { 1011 printf("%s: read on the first mapping fail.\n", __FUNCTION__); 1012 goto err; 1013 } 1014 1015 at1.flags = at2.flags | AC_ACCESS_WRITE_MASK; 1016 ac_test_setup_ptes(&at1); 1017 if (!ac_test_do_access(&at1)) { 1018 printf("%s: write on the second mapping fail.\n", __FUNCTION__); 1019 goto err; 1020 } 1021 1022 at2.flags |= AC_ACCESS_WRITE_MASK; 1023 ac_set_expected_status(&at2); 1024 if (!ac_test_do_access(&at2)) { 1025 printf("%s: write on the first mapping fail.\n", __FUNCTION__); 1026 goto err; 1027 } 1028 1029 return 1; 1030 1031 err: 1032 return 0; 1033 } 1034 1035 static int check_smep_andnot_wp(ac_pt_env_t *pt_env) 1036 { 1037 ac_test_t at1; 1038 int err_prepare_andnot_wp, err_smep_andnot_wp; 1039 1040 if (!this_cpu_has(X86_FEATURE_SMEP)) { 1041 return 1; 1042 } 1043 1044 ac_test_init(&at1, 0xffff923406001000ul, pt_env); 1045 1046 at1.flags = AC_PDE_PRESENT_MASK | AC_PTE_PRESENT_MASK | 1047 AC_PDE_USER_MASK | AC_PTE_USER_MASK | 1048 AC_PDE_ACCESSED_MASK | AC_PTE_ACCESSED_MASK | 1049 AC_CPU_CR4_SMEP_MASK | 1050 AC_CPU_CR0_WP_MASK | 1051 AC_ACCESS_WRITE_MASK; 1052 ac_test_setup_ptes(&at1); 1053 1054 /* 1055 * Here we write the ro user page when 1056 * cr0.wp=0, then we execute it and SMEP 1057 * fault should happen. 1058 */ 1059 err_prepare_andnot_wp = ac_test_do_access(&at1); 1060 if (!err_prepare_andnot_wp) { 1061 printf("%s: SMEP prepare fail\n", __FUNCTION__); 1062 goto clean_up; 1063 } 1064 1065 at1.flags &= ~AC_ACCESS_WRITE_MASK; 1066 at1.flags |= AC_ACCESS_FETCH_MASK; 1067 ac_set_expected_status(&at1); 1068 err_smep_andnot_wp = ac_test_do_access(&at1); 1069 1070 clean_up: 1071 set_cr4_smep(&at1, 0); 1072 1073 if (!err_prepare_andnot_wp) 1074 goto err; 1075 if (!err_smep_andnot_wp) { 1076 printf("%s: check SMEP without wp fail\n", __FUNCTION__); 1077 goto err; 1078 } 1079 return 1; 1080 1081 err: 1082 return 0; 1083 } 1084 1085 #define TOGGLE_CR0_WP_TEST_BASE_FLAGS \ 1086 (AC_PDE_PRESENT_MASK | AC_PDE_ACCESSED_MASK | \ 1087 AC_PTE_PRESENT_MASK | AC_PTE_ACCESSED_MASK | \ 1088 AC_ACCESS_WRITE_MASK) 1089 1090 static int do_cr0_wp_access(ac_test_t *at, int flags) 1091 { 1092 const bool cr0_wp = !!(flags & AC_CPU_CR0_WP_MASK); 1093 1094 at->flags = TOGGLE_CR0_WP_TEST_BASE_FLAGS | flags; 1095 __ac_set_expected_status(at, false); 1096 1097 /* 1098 * Under VMX the guest might own the CR0.WP bit, requiring KVM to 1099 * manually keep track of it where needed, e.g. in the guest page 1100 * table walker. 1101 * 1102 * Load CR0.WP with the inverse value of what will be used during 1103 * the access test and toggle EFER.NX to coerce KVM into rebuilding 1104 * the current MMU context based on the soon-to-be-stale CR0.WP. 1105 */ 1106 set_cr0_wp(!cr0_wp); 1107 set_efer_nx(1); 1108 set_efer_nx(0); 1109 1110 if (!ac_test_do_access(at)) { 1111 printf("%s: %ssupervisor write with CR0.WP=%d did not %s\n", 1112 __FUNCTION__, (flags & AC_FEP_MASK) ? "emulated " : "", 1113 cr0_wp, cr0_wp ? "FAULT" : "SUCCEED"); 1114 return 1; 1115 } 1116 1117 return 0; 1118 } 1119 1120 static int check_toggle_cr0_wp(ac_pt_env_t *pt_env) 1121 { 1122 ac_test_t at; 1123 int err = 0; 1124 1125 ac_test_init(&at, 0xffff923042007000ul, pt_env); 1126 at.flags = TOGGLE_CR0_WP_TEST_BASE_FLAGS; 1127 ac_test_setup_ptes(&at); 1128 1129 err += do_cr0_wp_access(&at, 0); 1130 err += do_cr0_wp_access(&at, AC_CPU_CR0_WP_MASK); 1131 if (!(invalid_mask & AC_FEP_MASK)) { 1132 err += do_cr0_wp_access(&at, AC_FEP_MASK); 1133 err += do_cr0_wp_access(&at, AC_FEP_MASK | AC_CPU_CR0_WP_MASK); 1134 } 1135 1136 return err == 0; 1137 } 1138 1139 static int check_effective_sp_permissions(ac_pt_env_t *pt_env) 1140 { 1141 unsigned long ptr1 = 0xffff923480000000; 1142 unsigned long ptr2 = ptr1 + SZ_2M; 1143 unsigned long ptr3 = ptr1 + SZ_1G; 1144 unsigned long ptr4 = ptr3 + SZ_2M; 1145 ac_test_t at1, at2, at3, at4; 1146 int err_read_at1, err_write_at2; 1147 int err_read_at3, err_write_at4; 1148 1149 /* 1150 * pgd[] pud[] pmd[] virtual address pointers 1151 * /->pmd(u--)->pte1(uw-)->page1 <- ptr1 (u--) 1152 * /->pud1(uw-)--->pmd(uw-)->pte2(uw-)->page2 <- ptr2 (uw-) 1153 * pgd-| 1154 * \->pud2(u--)--->pmd(u--)->pte1(uw-)->page1 <- ptr3 (u--) 1155 * \->pmd(uw-)->pte2(uw-)->page2 <- ptr4 (u--) 1156 * pud1 and pud2 point to the same pmd page. 1157 */ 1158 1159 ac_test_init(&at1, ptr1, pt_env); 1160 at1.flags = AC_PDE_PRESENT_MASK | AC_PTE_PRESENT_MASK | 1161 AC_PDE_USER_MASK | AC_PTE_USER_MASK | 1162 AC_PDE_ACCESSED_MASK | AC_PTE_ACCESSED_MASK | 1163 AC_PTE_WRITABLE_MASK | AC_ACCESS_USER_MASK; 1164 ac_test_setup_ptes(&at1); 1165 1166 __ac_test_init(&at2, ptr2, pt_env, &at1); 1167 at2.flags = at1.flags | AC_PDE_WRITABLE_MASK | AC_PTE_DIRTY_MASK | AC_ACCESS_WRITE_MASK; 1168 ac_test_setup_ptes(&at2); 1169 1170 __ac_test_init(&at3, ptr3, pt_env, &at1); 1171 /* Override the PMD (1-based index) to point at ptr1's PMD. */ 1172 at3.page_tables[3] = at1.page_tables[3]; 1173 at3.flags = AC_PDPTE_NO_WRITABLE_MASK | at1.flags; 1174 ac_test_setup_ptes(&at3); 1175 1176 /* Alias ptr2, only the PMD will differ; manually override the PMD. */ 1177 __ac_test_init(&at4, ptr4, pt_env, &at2); 1178 at4.page_tables[3] = at1.page_tables[3]; 1179 at4.flags = AC_PDPTE_NO_WRITABLE_MASK | at2.flags; 1180 ac_test_setup_ptes(&at4); 1181 1182 err_read_at1 = ac_test_do_access(&at1); 1183 if (!err_read_at1) { 1184 printf("%s: read access at1 fail\n", __FUNCTION__); 1185 return 0; 1186 } 1187 1188 err_write_at2 = ac_test_do_access(&at2); 1189 if (!err_write_at2) { 1190 printf("%s: write access at2 fail\n", __FUNCTION__); 1191 return 0; 1192 } 1193 1194 err_read_at3 = ac_test_do_access(&at3); 1195 if (!err_read_at3) { 1196 printf("%s: read access at3 fail\n", __FUNCTION__); 1197 return 0; 1198 } 1199 1200 err_write_at4 = ac_test_do_access(&at4); 1201 if (!err_write_at4) { 1202 printf("%s: write access at4 should fail\n", __FUNCTION__); 1203 return 0; 1204 } 1205 1206 return 1; 1207 } 1208 1209 static int ac_test_exec(ac_test_t *at, ac_pt_env_t *pt_env) 1210 { 1211 int r; 1212 1213 if (verbose) { 1214 ac_test_show(at); 1215 } 1216 ac_test_setup_ptes(at); 1217 r = ac_test_do_access(at); 1218 return r; 1219 } 1220 1221 typedef int (*ac_test_fn)(ac_pt_env_t *pt_env); 1222 const ac_test_fn ac_test_cases[] = 1223 { 1224 corrupt_hugepage_trigger, 1225 check_pfec_on_prefetch_pte, 1226 check_large_pte_dirty_for_nowp, 1227 check_smep_andnot_wp, 1228 check_toggle_cr0_wp, 1229 check_effective_sp_permissions, 1230 }; 1231 1232 void ac_test_run(int pt_levels, bool force_emulation) 1233 { 1234 ac_test_t at; 1235 ac_pt_env_t pt_env; 1236 int i, tests, successes; 1237 1238 if (force_emulation && !is_fep_available()) { 1239 report_skip("Forced emulation prefix (FEP) not available\n"); 1240 return; 1241 } 1242 1243 printf("run\n"); 1244 tests = successes = 0; 1245 1246 shadow_cr0 = read_cr0(); 1247 shadow_cr4 = read_cr4(); 1248 shadow_cr3 = read_cr3(); 1249 shadow_efer = rdmsr(MSR_EFER); 1250 1251 if (cpuid_maxphyaddr() >= 52) { 1252 invalid_mask |= AC_PDE_BIT51_MASK; 1253 invalid_mask |= AC_PTE_BIT51_MASK; 1254 } 1255 if (cpuid_maxphyaddr() >= 37) { 1256 invalid_mask |= AC_PDE_BIT36_MASK; 1257 invalid_mask |= AC_PTE_BIT36_MASK; 1258 } 1259 1260 if (!force_emulation) 1261 invalid_mask |= AC_FEP_MASK; 1262 1263 ac_env_int(&pt_env, pt_levels); 1264 ac_test_init(&at, 0xffff923400000000ul, &pt_env); 1265 1266 if (this_cpu_has(X86_FEATURE_PKU)) { 1267 set_cr4_pke(1); 1268 set_cr4_pke(0); 1269 /* Now PKRU = 0xFFFFFFFF. */ 1270 } else { 1271 tests++; 1272 if (write_cr4_safe(shadow_cr4 | X86_CR4_PKE) == GP_VECTOR) { 1273 successes++; 1274 invalid_mask |= AC_PKU_AD_MASK; 1275 invalid_mask |= AC_PKU_WD_MASK; 1276 invalid_mask |= AC_PKU_PKEY_MASK; 1277 invalid_mask |= AC_CPU_CR4_PKE_MASK; 1278 printf("CR4.PKE not available, disabling PKE tests\n"); 1279 } else { 1280 printf("Set PKE in CR4 - expect #GP: FAIL!\n"); 1281 set_cr4_pke(0); 1282 } 1283 } 1284 1285 if (!this_cpu_has(X86_FEATURE_SMEP)) { 1286 tests++; 1287 if (set_cr4_smep(&at, 1) == GP_VECTOR) { 1288 successes++; 1289 invalid_mask |= AC_CPU_CR4_SMEP_MASK; 1290 printf("CR4.SMEP not available, disabling SMEP tests\n"); 1291 } else { 1292 printf("Set SMEP in CR4 - expect #GP: FAIL!\n"); 1293 set_cr4_smep(&at, 0); 1294 } 1295 } 1296 1297 /* Toggling LA57 in 64-bit mode (guaranteed for this test) is illegal. */ 1298 if (this_cpu_has(X86_FEATURE_LA57)) { 1299 tests++; 1300 if (write_cr4_safe(shadow_cr4 ^ X86_CR4_LA57) == GP_VECTOR) 1301 successes++; 1302 1303 /* Force a VM-Exit on KVM, which doesn't intercept LA57 itself. */ 1304 tests++; 1305 if (write_cr4_safe(shadow_cr4 ^ (X86_CR4_LA57 | X86_CR4_PSE)) == GP_VECTOR) 1306 successes++; 1307 } 1308 1309 do { 1310 ++tests; 1311 successes += ac_test_exec(&at, &pt_env); 1312 } while (ac_test_bump(&at)); 1313 1314 for (i = 0; i < ARRAY_SIZE(ac_test_cases); i++) { 1315 ac_env_int(&pt_env, pt_levels); 1316 1317 ++tests; 1318 successes += ac_test_cases[i](&pt_env); 1319 } 1320 1321 printf("\n%d tests, %d failures\n", tests, tests - successes); 1322 1323 report(successes == tests, "%d-level paging tests%s", pt_levels, 1324 force_emulation ? " (with forced emulation)" : ""); 1325 } 1326