1 2 #include "libcflat.h" 3 #include "desc.h" 4 #include "processor.h" 5 #include "asm/page.h" 6 #include "x86/vm.h" 7 8 #define smp_id() 0 9 10 #define true 1 11 #define false 0 12 13 static _Bool verbose = false; 14 15 typedef unsigned long pt_element_t; 16 static int invalid_mask; 17 static int page_table_levels; 18 19 #define PT_BASE_ADDR_MASK ((pt_element_t)((((pt_element_t)1 << 36) - 1) & PAGE_MASK)) 20 #define PT_PSE_BASE_ADDR_MASK (PT_BASE_ADDR_MASK & ~(1ull << 21)) 21 22 #define CR0_WP_MASK (1UL << 16) 23 #define CR4_SMEP_MASK (1UL << 20) 24 25 #define PFERR_PRESENT_MASK (1U << 0) 26 #define PFERR_WRITE_MASK (1U << 1) 27 #define PFERR_USER_MASK (1U << 2) 28 #define PFERR_RESERVED_MASK (1U << 3) 29 #define PFERR_FETCH_MASK (1U << 4) 30 #define PFERR_PK_MASK (1U << 5) 31 32 #define MSR_EFER 0xc0000080 33 #define EFER_NX_MASK (1ull << 11) 34 35 #define PT_INDEX(address, level) \ 36 ((address) >> (12 + ((level)-1) * 9)) & 511 37 38 /* 39 * page table access check tests 40 */ 41 42 enum { 43 AC_PTE_PRESENT_BIT, 44 AC_PTE_WRITABLE_BIT, 45 AC_PTE_USER_BIT, 46 AC_PTE_ACCESSED_BIT, 47 AC_PTE_DIRTY_BIT, 48 AC_PTE_NX_BIT, 49 AC_PTE_BIT51_BIT, 50 AC_PTE_BIT36_BIT, 51 52 AC_PDE_PRESENT_BIT, 53 AC_PDE_WRITABLE_BIT, 54 AC_PDE_USER_BIT, 55 AC_PDE_ACCESSED_BIT, 56 AC_PDE_DIRTY_BIT, 57 AC_PDE_PSE_BIT, 58 AC_PDE_NX_BIT, 59 AC_PDE_BIT51_BIT, 60 AC_PDE_BIT36_BIT, 61 AC_PDE_BIT13_BIT, 62 63 AC_PKU_AD_BIT, 64 AC_PKU_WD_BIT, 65 AC_PKU_PKEY_BIT, 66 67 AC_ACCESS_USER_BIT, 68 AC_ACCESS_WRITE_BIT, 69 AC_ACCESS_FETCH_BIT, 70 AC_ACCESS_TWICE_BIT, 71 72 AC_CPU_EFER_NX_BIT, 73 AC_CPU_CR0_WP_BIT, 74 AC_CPU_CR4_SMEP_BIT, 75 AC_CPU_CR4_PKE_BIT, 76 77 NR_AC_FLAGS 78 }; 79 80 #define AC_PTE_PRESENT_MASK (1 << AC_PTE_PRESENT_BIT) 81 #define AC_PTE_WRITABLE_MASK (1 << AC_PTE_WRITABLE_BIT) 82 #define AC_PTE_USER_MASK (1 << AC_PTE_USER_BIT) 83 #define AC_PTE_ACCESSED_MASK (1 << AC_PTE_ACCESSED_BIT) 84 #define AC_PTE_DIRTY_MASK (1 << AC_PTE_DIRTY_BIT) 85 #define AC_PTE_NX_MASK (1 << AC_PTE_NX_BIT) 86 #define AC_PTE_BIT51_MASK (1 << AC_PTE_BIT51_BIT) 87 #define AC_PTE_BIT36_MASK (1 << AC_PTE_BIT36_BIT) 88 89 #define AC_PDE_PRESENT_MASK (1 << AC_PDE_PRESENT_BIT) 90 #define AC_PDE_WRITABLE_MASK (1 << AC_PDE_WRITABLE_BIT) 91 #define AC_PDE_USER_MASK (1 << AC_PDE_USER_BIT) 92 #define AC_PDE_ACCESSED_MASK (1 << AC_PDE_ACCESSED_BIT) 93 #define AC_PDE_DIRTY_MASK (1 << AC_PDE_DIRTY_BIT) 94 #define AC_PDE_PSE_MASK (1 << AC_PDE_PSE_BIT) 95 #define AC_PDE_NX_MASK (1 << AC_PDE_NX_BIT) 96 #define AC_PDE_BIT51_MASK (1 << AC_PDE_BIT51_BIT) 97 #define AC_PDE_BIT36_MASK (1 << AC_PDE_BIT36_BIT) 98 #define AC_PDE_BIT13_MASK (1 << AC_PDE_BIT13_BIT) 99 100 #define AC_PKU_AD_MASK (1 << AC_PKU_AD_BIT) 101 #define AC_PKU_WD_MASK (1 << AC_PKU_WD_BIT) 102 #define AC_PKU_PKEY_MASK (1 << AC_PKU_PKEY_BIT) 103 104 #define AC_ACCESS_USER_MASK (1 << AC_ACCESS_USER_BIT) 105 #define AC_ACCESS_WRITE_MASK (1 << AC_ACCESS_WRITE_BIT) 106 #define AC_ACCESS_FETCH_MASK (1 << AC_ACCESS_FETCH_BIT) 107 #define AC_ACCESS_TWICE_MASK (1 << AC_ACCESS_TWICE_BIT) 108 109 #define AC_CPU_EFER_NX_MASK (1 << AC_CPU_EFER_NX_BIT) 110 #define AC_CPU_CR0_WP_MASK (1 << AC_CPU_CR0_WP_BIT) 111 #define AC_CPU_CR4_SMEP_MASK (1 << AC_CPU_CR4_SMEP_BIT) 112 #define AC_CPU_CR4_PKE_MASK (1 << AC_CPU_CR4_PKE_BIT) 113 114 const char *ac_names[] = { 115 [AC_PTE_PRESENT_BIT] = "pte.p", 116 [AC_PTE_ACCESSED_BIT] = "pte.a", 117 [AC_PTE_WRITABLE_BIT] = "pte.rw", 118 [AC_PTE_USER_BIT] = "pte.user", 119 [AC_PTE_DIRTY_BIT] = "pte.d", 120 [AC_PTE_NX_BIT] = "pte.nx", 121 [AC_PTE_BIT51_BIT] = "pte.51", 122 [AC_PTE_BIT36_BIT] = "pte.36", 123 [AC_PDE_PRESENT_BIT] = "pde.p", 124 [AC_PDE_ACCESSED_BIT] = "pde.a", 125 [AC_PDE_WRITABLE_BIT] = "pde.rw", 126 [AC_PDE_USER_BIT] = "pde.user", 127 [AC_PDE_DIRTY_BIT] = "pde.d", 128 [AC_PDE_PSE_BIT] = "pde.pse", 129 [AC_PDE_NX_BIT] = "pde.nx", 130 [AC_PDE_BIT51_BIT] = "pde.51", 131 [AC_PDE_BIT36_BIT] = "pde.36", 132 [AC_PDE_BIT13_BIT] = "pde.13", 133 [AC_PKU_AD_BIT] = "pkru.ad", 134 [AC_PKU_WD_BIT] = "pkru.wd", 135 [AC_PKU_PKEY_BIT] = "pkey=1", 136 [AC_ACCESS_WRITE_BIT] = "write", 137 [AC_ACCESS_USER_BIT] = "user", 138 [AC_ACCESS_FETCH_BIT] = "fetch", 139 [AC_ACCESS_TWICE_BIT] = "twice", 140 [AC_CPU_EFER_NX_BIT] = "efer.nx", 141 [AC_CPU_CR0_WP_BIT] = "cr0.wp", 142 [AC_CPU_CR4_SMEP_BIT] = "cr4.smep", 143 [AC_CPU_CR4_PKE_BIT] = "cr4.pke", 144 }; 145 146 static inline void *va(pt_element_t phys) 147 { 148 return (void *)phys; 149 } 150 151 typedef struct { 152 pt_element_t pt_pool; 153 unsigned pt_pool_size; 154 unsigned pt_pool_current; 155 } ac_pool_t; 156 157 typedef struct { 158 unsigned flags; 159 void *virt; 160 pt_element_t phys; 161 pt_element_t *ptep; 162 pt_element_t expected_pte; 163 pt_element_t *pdep; 164 pt_element_t expected_pde; 165 pt_element_t ignore_pde; 166 int expected_fault; 167 unsigned expected_error; 168 } ac_test_t; 169 170 typedef struct { 171 unsigned short limit; 172 unsigned long linear_addr; 173 } __attribute__((packed)) descriptor_table_t; 174 175 176 static void ac_test_show(ac_test_t *at); 177 178 static unsigned long shadow_cr0; 179 static unsigned long shadow_cr4; 180 static unsigned long long shadow_efer; 181 182 static void set_cr0_wp(int wp) 183 { 184 unsigned long cr0 = shadow_cr0; 185 186 cr0 &= ~CR0_WP_MASK; 187 if (wp) 188 cr0 |= CR0_WP_MASK; 189 if (cr0 != shadow_cr0) { 190 write_cr0(cr0); 191 shadow_cr0 = cr0; 192 } 193 } 194 195 static unsigned set_cr4_smep(int smep) 196 { 197 unsigned long cr4 = shadow_cr4; 198 extern u64 ptl2[]; 199 unsigned r; 200 201 cr4 &= ~CR4_SMEP_MASK; 202 if (smep) 203 cr4 |= CR4_SMEP_MASK; 204 if (cr4 == shadow_cr4) 205 return 0; 206 207 if (smep) 208 ptl2[2] &= ~PT_USER_MASK; 209 r = write_cr4_checking(cr4); 210 if (r || !smep) 211 ptl2[2] |= PT_USER_MASK; 212 if (!r) 213 shadow_cr4 = cr4; 214 return r; 215 } 216 217 static void set_cr4_pke(int pke) 218 { 219 unsigned long cr4 = shadow_cr4; 220 221 cr4 &= ~X86_CR4_PKE; 222 if (pke) 223 cr4 |= X86_CR4_PKE; 224 if (cr4 == shadow_cr4) 225 return; 226 227 /* Check that protection keys do not affect accesses when CR4.PKE=0. */ 228 if ((shadow_cr4 & X86_CR4_PKE) && !pke) 229 write_pkru(0xfffffffc); 230 write_cr4(cr4); 231 shadow_cr4 = cr4; 232 } 233 234 static void set_efer_nx(int nx) 235 { 236 unsigned long long efer = shadow_efer; 237 238 efer &= ~EFER_NX_MASK; 239 if (nx) 240 efer |= EFER_NX_MASK; 241 if (efer != shadow_efer) { 242 wrmsr(MSR_EFER, efer); 243 shadow_efer = efer; 244 } 245 } 246 247 static void ac_env_int(ac_pool_t *pool) 248 { 249 extern char page_fault, kernel_entry; 250 set_idt_entry(14, &page_fault, 0); 251 set_idt_entry(0x20, &kernel_entry, 3); 252 253 pool->pt_pool = 33 * 1024 * 1024; 254 pool->pt_pool_size = 120 * 1024 * 1024 - pool->pt_pool; 255 pool->pt_pool_current = 0; 256 } 257 258 static void ac_test_init(ac_test_t *at, void *virt) 259 { 260 set_efer_nx(1); 261 set_cr0_wp(1); 262 at->flags = 0; 263 at->virt = virt; 264 at->phys = 32 * 1024 * 1024; 265 } 266 267 static int ac_test_bump_one(ac_test_t *at) 268 { 269 at->flags = ((at->flags | invalid_mask) + 1) & ~invalid_mask; 270 return at->flags < (1 << NR_AC_FLAGS); 271 } 272 273 #define F(x) ((flags & x##_MASK) != 0) 274 275 static _Bool ac_test_legal(ac_test_t *at) 276 { 277 int flags = at->flags; 278 279 if (F(AC_ACCESS_FETCH) && F(AC_ACCESS_WRITE)) 280 return false; 281 282 /* 283 * Since we convert current page to kernel page when cr4.smep=1, 284 * we can't switch to user mode. 285 */ 286 if (F(AC_ACCESS_USER) && F(AC_CPU_CR4_SMEP)) 287 return false; 288 289 /* 290 * Only test protection key faults if CR4.PKE=1. 291 */ 292 if (!F(AC_CPU_CR4_PKE) && 293 (F(AC_PKU_AD) || F(AC_PKU_WD))) { 294 return false; 295 } 296 297 /* 298 * pde.bit13 checks handling of reserved bits in largepage PDEs. It is 299 * meaningless if there is a PTE. 300 */ 301 if (!F(AC_PDE_PSE) && F(AC_PDE_BIT13)) 302 return false; 303 304 /* 305 * Shorten the test by avoiding testing too many reserved bit combinations 306 */ 307 if ((F(AC_PDE_BIT51) + F(AC_PDE_BIT36) + F(AC_PDE_BIT13)) > 1) 308 return false; 309 if ((F(AC_PTE_BIT51) + F(AC_PTE_BIT36)) > 1) 310 return false; 311 312 return true; 313 } 314 315 static int ac_test_bump(ac_test_t *at) 316 { 317 int ret; 318 319 ret = ac_test_bump_one(at); 320 while (ret && !ac_test_legal(at)) 321 ret = ac_test_bump_one(at); 322 return ret; 323 } 324 325 static pt_element_t ac_test_alloc_pt(ac_pool_t *pool) 326 { 327 pt_element_t ret = pool->pt_pool + pool->pt_pool_current; 328 pool->pt_pool_current += PAGE_SIZE; 329 return ret; 330 } 331 332 static _Bool ac_test_enough_room(ac_pool_t *pool) 333 { 334 return pool->pt_pool_current + 5 * PAGE_SIZE <= pool->pt_pool_size; 335 } 336 337 static void ac_test_reset_pt_pool(ac_pool_t *pool) 338 { 339 pool->pt_pool_current = 0; 340 } 341 342 static pt_element_t ac_test_permissions(ac_test_t *at, unsigned flags, 343 bool writable, bool user, 344 bool executable) 345 { 346 bool kwritable = !F(AC_CPU_CR0_WP) && !F(AC_ACCESS_USER); 347 pt_element_t expected = 0; 348 349 if (F(AC_ACCESS_USER) && !user) 350 at->expected_fault = 1; 351 352 if (F(AC_ACCESS_WRITE) && !writable && !kwritable) 353 at->expected_fault = 1; 354 355 if (F(AC_ACCESS_FETCH) && !executable) 356 at->expected_fault = 1; 357 358 if (F(AC_ACCESS_FETCH) && user && F(AC_CPU_CR4_SMEP)) 359 at->expected_fault = 1; 360 361 if (user && !F(AC_ACCESS_FETCH) && F(AC_PKU_PKEY) && F(AC_CPU_CR4_PKE)) { 362 if (F(AC_PKU_AD)) { 363 at->expected_fault = 1; 364 at->expected_error |= PFERR_PK_MASK; 365 } else if (F(AC_ACCESS_WRITE) && F(AC_PKU_WD) && !kwritable) { 366 at->expected_fault = 1; 367 at->expected_error |= PFERR_PK_MASK; 368 } 369 } 370 371 if (!at->expected_fault) { 372 expected |= PT_ACCESSED_MASK; 373 if (F(AC_ACCESS_WRITE)) 374 expected |= PT_DIRTY_MASK; 375 } 376 377 return expected; 378 } 379 380 static void ac_emulate_access(ac_test_t *at, unsigned flags) 381 { 382 bool pde_valid, pte_valid; 383 bool user, writable, executable; 384 385 if (F(AC_ACCESS_USER)) 386 at->expected_error |= PFERR_USER_MASK; 387 388 if (F(AC_ACCESS_WRITE)) 389 at->expected_error |= PFERR_WRITE_MASK; 390 391 if (F(AC_ACCESS_FETCH)) 392 at->expected_error |= PFERR_FETCH_MASK; 393 394 if (!F(AC_PDE_ACCESSED)) 395 at->ignore_pde = PT_ACCESSED_MASK; 396 397 pde_valid = F(AC_PDE_PRESENT) 398 && !F(AC_PDE_BIT51) && !F(AC_PDE_BIT36) && !F(AC_PDE_BIT13) 399 && !(F(AC_PDE_NX) && !F(AC_CPU_EFER_NX)); 400 401 if (!pde_valid) { 402 at->expected_fault = 1; 403 if (F(AC_PDE_PRESENT)) { 404 at->expected_error |= PFERR_RESERVED_MASK; 405 } else { 406 at->expected_error &= ~PFERR_PRESENT_MASK; 407 } 408 goto fault; 409 } 410 411 writable = F(AC_PDE_WRITABLE); 412 user = F(AC_PDE_USER); 413 executable = !F(AC_PDE_NX); 414 415 if (F(AC_PDE_PSE)) { 416 at->expected_pde |= ac_test_permissions(at, flags, writable, user, 417 executable); 418 goto no_pte; 419 } 420 421 at->expected_pde |= PT_ACCESSED_MASK; 422 423 pte_valid = F(AC_PTE_PRESENT) 424 && !F(AC_PTE_BIT51) && !F(AC_PTE_BIT36) 425 && !(F(AC_PTE_NX) && !F(AC_CPU_EFER_NX)); 426 427 if (!pte_valid) { 428 at->expected_fault = 1; 429 if (F(AC_PTE_PRESENT)) { 430 at->expected_error |= PFERR_RESERVED_MASK; 431 } else { 432 at->expected_error &= ~PFERR_PRESENT_MASK; 433 } 434 goto fault; 435 } 436 437 writable &= F(AC_PTE_WRITABLE); 438 user &= F(AC_PTE_USER); 439 executable &= !F(AC_PTE_NX); 440 441 at->expected_pte |= ac_test_permissions(at, flags, writable, user, 442 executable); 443 444 no_pte: 445 fault: 446 if (!at->expected_fault) 447 at->ignore_pde = 0; 448 if (!F(AC_CPU_EFER_NX) && !F(AC_CPU_CR4_SMEP)) 449 at->expected_error &= ~PFERR_FETCH_MASK; 450 } 451 452 static void ac_set_expected_status(ac_test_t *at) 453 { 454 invlpg(at->virt); 455 456 if (at->ptep) 457 at->expected_pte = *at->ptep; 458 at->expected_pde = *at->pdep; 459 at->ignore_pde = 0; 460 at->expected_fault = 0; 461 at->expected_error = PFERR_PRESENT_MASK; 462 463 if (at->flags & AC_ACCESS_TWICE_MASK) { 464 ac_emulate_access(at, at->flags & ~AC_ACCESS_WRITE_MASK 465 & ~AC_ACCESS_FETCH_MASK & ~AC_ACCESS_USER_MASK); 466 at->expected_fault = 0; 467 at->expected_error = PFERR_PRESENT_MASK; 468 at->ignore_pde = 0; 469 } 470 471 ac_emulate_access(at, at->flags); 472 } 473 474 static void __ac_setup_specific_pages(ac_test_t *at, ac_pool_t *pool, 475 u64 pd_page, u64 pt_page) 476 477 { 478 unsigned long root = read_cr3(); 479 int flags = at->flags; 480 bool skip = true; 481 482 if (!ac_test_enough_room(pool)) 483 ac_test_reset_pt_pool(pool); 484 485 at->ptep = 0; 486 for (int i = page_table_levels; i >= 1 && (i >= 2 || !F(AC_PDE_PSE)); --i) { 487 pt_element_t *vroot = va(root & PT_BASE_ADDR_MASK); 488 unsigned index = PT_INDEX((unsigned long)at->virt, i); 489 pt_element_t pte = 0; 490 491 /* 492 * Reuse existing page tables along the path to the test code and data 493 * (which is in the bottom 2MB). 494 */ 495 if (skip && i >= 2 && index == 0) { 496 goto next; 497 } 498 skip = false; 499 500 switch (i) { 501 case 5: 502 case 4: 503 case 3: 504 pte = pd_page ? pd_page : ac_test_alloc_pt(pool); 505 pte |= PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK; 506 break; 507 case 2: 508 if (!F(AC_PDE_PSE)) { 509 pte = pt_page ? pt_page : ac_test_alloc_pt(pool); 510 /* The protection key is ignored on non-leaf entries. */ 511 if (F(AC_PKU_PKEY)) 512 pte |= 2ull << 59; 513 } else { 514 pte = at->phys & PT_PSE_BASE_ADDR_MASK; 515 pte |= PT_PAGE_SIZE_MASK; 516 if (F(AC_PKU_PKEY)) 517 pte |= 1ull << 59; 518 } 519 if (F(AC_PDE_PRESENT)) 520 pte |= PT_PRESENT_MASK; 521 if (F(AC_PDE_WRITABLE)) 522 pte |= PT_WRITABLE_MASK; 523 if (F(AC_PDE_USER)) 524 pte |= PT_USER_MASK; 525 if (F(AC_PDE_ACCESSED)) 526 pte |= PT_ACCESSED_MASK; 527 if (F(AC_PDE_DIRTY)) 528 pte |= PT_DIRTY_MASK; 529 if (F(AC_PDE_NX)) 530 pte |= PT64_NX_MASK; 531 if (F(AC_PDE_BIT51)) 532 pte |= 1ull << 51; 533 if (F(AC_PDE_BIT36)) 534 pte |= 1ull << 36; 535 if (F(AC_PDE_BIT13)) 536 pte |= 1ull << 13; 537 at->pdep = &vroot[index]; 538 break; 539 case 1: 540 pte = at->phys & PT_BASE_ADDR_MASK; 541 if (F(AC_PKU_PKEY)) 542 pte |= 1ull << 59; 543 if (F(AC_PTE_PRESENT)) 544 pte |= PT_PRESENT_MASK; 545 if (F(AC_PTE_WRITABLE)) 546 pte |= PT_WRITABLE_MASK; 547 if (F(AC_PTE_USER)) 548 pte |= PT_USER_MASK; 549 if (F(AC_PTE_ACCESSED)) 550 pte |= PT_ACCESSED_MASK; 551 if (F(AC_PTE_DIRTY)) 552 pte |= PT_DIRTY_MASK; 553 if (F(AC_PTE_NX)) 554 pte |= PT64_NX_MASK; 555 if (F(AC_PTE_BIT51)) 556 pte |= 1ull << 51; 557 if (F(AC_PTE_BIT36)) 558 pte |= 1ull << 36; 559 at->ptep = &vroot[index]; 560 break; 561 } 562 vroot[index] = pte; 563 next: 564 root = vroot[index]; 565 } 566 ac_set_expected_status(at); 567 } 568 569 static void ac_test_setup_pte(ac_test_t *at, ac_pool_t *pool) 570 { 571 __ac_setup_specific_pages(at, pool, 0, 0); 572 } 573 574 static void ac_setup_specific_pages(ac_test_t *at, ac_pool_t *pool, 575 u64 pd_page, u64 pt_page) 576 { 577 return __ac_setup_specific_pages(at, pool, pd_page, pt_page); 578 } 579 580 static void dump_mapping(ac_test_t *at) 581 { 582 unsigned long root = read_cr3(); 583 int flags = at->flags; 584 int i; 585 586 printf("Dump mapping: address: %p\n", at->virt); 587 for (i = page_table_levels ; i >= 1 && (i >= 2 || !F(AC_PDE_PSE)); --i) { 588 pt_element_t *vroot = va(root & PT_BASE_ADDR_MASK); 589 unsigned index = PT_INDEX((unsigned long)at->virt, i); 590 pt_element_t pte = vroot[index]; 591 592 printf("------L%d: %lx\n", i, pte); 593 root = vroot[index]; 594 } 595 } 596 597 static void ac_test_check(ac_test_t *at, _Bool *success_ret, _Bool cond, 598 const char *fmt, ...) 599 { 600 va_list ap; 601 char buf[500]; 602 603 if (!*success_ret) { 604 return; 605 } 606 607 if (!cond) { 608 return; 609 } 610 611 *success_ret = false; 612 613 if (!verbose) { 614 puts("\n"); 615 ac_test_show(at); 616 } 617 618 va_start(ap, fmt); 619 vsnprintf(buf, sizeof(buf), fmt, ap); 620 va_end(ap); 621 printf("FAIL: %s\n", buf); 622 dump_mapping(at); 623 } 624 625 static int pt_match(pt_element_t pte1, pt_element_t pte2, pt_element_t ignore) 626 { 627 pte1 &= ~ignore; 628 pte2 &= ~ignore; 629 return pte1 == pte2; 630 } 631 632 static int ac_test_do_access(ac_test_t *at) 633 { 634 static unsigned unique = 42; 635 int fault = 0; 636 unsigned e; 637 static unsigned char user_stack[4096]; 638 unsigned long rsp; 639 _Bool success = true; 640 int flags = at->flags; 641 642 ++unique; 643 if (!(unique & 65535)) { 644 puts("."); 645 } 646 647 *((unsigned char *)at->phys) = 0xc3; /* ret */ 648 649 unsigned r = unique; 650 set_cr0_wp(F(AC_CPU_CR0_WP)); 651 set_efer_nx(F(AC_CPU_EFER_NX)); 652 set_cr4_pke(F(AC_CPU_CR4_PKE)); 653 if (F(AC_CPU_CR4_PKE)) { 654 /* WD2=AD2=1, WD1=F(AC_PKU_WD), AD1=F(AC_PKU_AD) */ 655 write_pkru(0x30 | (F(AC_PKU_WD) ? 8 : 0) | 656 (F(AC_PKU_AD) ? 4 : 0)); 657 } 658 659 set_cr4_smep(F(AC_CPU_CR4_SMEP)); 660 661 if (F(AC_ACCESS_TWICE)) { 662 asm volatile ( 663 "mov $fixed2, %%rsi \n\t" 664 "mov (%[addr]), %[reg] \n\t" 665 "fixed2:" 666 : [reg]"=r"(r), [fault]"=a"(fault), "=b"(e) 667 : [addr]"r"(at->virt) 668 : "rsi" 669 ); 670 fault = 0; 671 } 672 673 asm volatile ("mov $fixed1, %%rsi \n\t" 674 "mov %%rsp, %%rdx \n\t" 675 "cmp $0, %[user] \n\t" 676 "jz do_access \n\t" 677 "push %%rax; mov %[user_ds], %%ax; mov %%ax, %%ds; pop %%rax \n\t" 678 "pushq %[user_ds] \n\t" 679 "pushq %[user_stack_top] \n\t" 680 "pushfq \n\t" 681 "pushq %[user_cs] \n\t" 682 "pushq $do_access \n\t" 683 "iretq \n" 684 "do_access: \n\t" 685 "cmp $0, %[fetch] \n\t" 686 "jnz 2f \n\t" 687 "cmp $0, %[write] \n\t" 688 "jnz 1f \n\t" 689 "mov (%[addr]), %[reg] \n\t" 690 "jmp done \n\t" 691 "1: mov %[reg], (%[addr]) \n\t" 692 "jmp done \n\t" 693 "2: call *%[addr] \n\t" 694 "done: \n" 695 "fixed1: \n" 696 "int %[kernel_entry_vector] \n\t" 697 "back_to_kernel:" 698 : [reg]"+r"(r), "+a"(fault), "=b"(e), "=&d"(rsp) 699 : [addr]"r"(at->virt), 700 [write]"r"(F(AC_ACCESS_WRITE)), 701 [user]"r"(F(AC_ACCESS_USER)), 702 [fetch]"r"(F(AC_ACCESS_FETCH)), 703 [user_ds]"i"(USER_DS), 704 [user_cs]"i"(USER_CS), 705 [user_stack_top]"r"(user_stack + sizeof user_stack), 706 [kernel_entry_vector]"i"(0x20) 707 : "rsi"); 708 709 asm volatile (".section .text.pf \n\t" 710 "page_fault: \n\t" 711 "pop %rbx \n\t" 712 "mov %rsi, (%rsp) \n\t" 713 "movl $1, %eax \n\t" 714 "iretq \n\t" 715 ".section .text"); 716 717 asm volatile (".section .text.entry \n\t" 718 "kernel_entry: \n\t" 719 "mov %rdx, %rsp \n\t" 720 "jmp back_to_kernel \n\t" 721 ".section .text"); 722 723 ac_test_check(at, &success, fault && !at->expected_fault, 724 "unexpected fault"); 725 ac_test_check(at, &success, !fault && at->expected_fault, 726 "unexpected access"); 727 ac_test_check(at, &success, fault && e != at->expected_error, 728 "error code %x expected %x", e, at->expected_error); 729 if (at->ptep) 730 ac_test_check(at, &success, *at->ptep != at->expected_pte, 731 "pte %x expected %x", *at->ptep, at->expected_pte); 732 ac_test_check(at, &success, 733 !pt_match(*at->pdep, at->expected_pde, at->ignore_pde), 734 "pde %x expected %x", *at->pdep, at->expected_pde); 735 736 if (success && verbose) { 737 if (at->expected_fault) { 738 printf("PASS (%x)\n", at->expected_error); 739 } else { 740 printf("PASS\n"); 741 } 742 } 743 return success; 744 } 745 746 static void ac_test_show(ac_test_t *at) 747 { 748 char line[5000]; 749 750 *line = 0; 751 strcat(line, "test"); 752 for (int i = 0; i < NR_AC_FLAGS; ++i) 753 if (at->flags & (1 << i)) { 754 strcat(line, " "); 755 strcat(line, ac_names[i]); 756 } 757 758 strcat(line, ": "); 759 printf("%s", line); 760 } 761 762 /* 763 * This test case is used to triger the bug which is fixed by 764 * commit e09e90a5 in the kvm tree 765 */ 766 static int corrupt_hugepage_triger(ac_pool_t *pool) 767 { 768 ac_test_t at1, at2; 769 770 ac_test_init(&at1, (void *)(0x123400000000)); 771 ac_test_init(&at2, (void *)(0x666600000000)); 772 773 at2.flags = AC_CPU_CR0_WP_MASK | AC_PDE_PSE_MASK | AC_PDE_PRESENT_MASK; 774 ac_test_setup_pte(&at2, pool); 775 if (!ac_test_do_access(&at2)) 776 goto err; 777 778 at1.flags = at2.flags | AC_PDE_WRITABLE_MASK; 779 ac_test_setup_pte(&at1, pool); 780 if (!ac_test_do_access(&at1)) 781 goto err; 782 783 at1.flags |= AC_ACCESS_WRITE_MASK; 784 ac_set_expected_status(&at1); 785 if (!ac_test_do_access(&at1)) 786 goto err; 787 788 at2.flags |= AC_ACCESS_WRITE_MASK; 789 ac_set_expected_status(&at2); 790 if (!ac_test_do_access(&at2)) 791 goto err; 792 793 return 1; 794 795 err: 796 printf("corrupt_hugepage_triger test fail\n"); 797 return 0; 798 } 799 800 /* 801 * This test case is used to triger the bug which is fixed by 802 * commit 3ddf6c06e13e in the kvm tree 803 */ 804 static int check_pfec_on_prefetch_pte(ac_pool_t *pool) 805 { 806 ac_test_t at1, at2; 807 808 ac_test_init(&at1, (void *)(0x123406001000)); 809 ac_test_init(&at2, (void *)(0x123406003000)); 810 811 at1.flags = AC_PDE_PRESENT_MASK | AC_PTE_PRESENT_MASK; 812 ac_setup_specific_pages(&at1, pool, 30 * 1024 * 1024, 30 * 1024 * 1024); 813 814 at2.flags = at1.flags | AC_PTE_NX_MASK; 815 ac_setup_specific_pages(&at2, pool, 30 * 1024 * 1024, 30 * 1024 * 1024); 816 817 if (!ac_test_do_access(&at1)) { 818 printf("%s: prepare fail\n", __FUNCTION__); 819 goto err; 820 } 821 822 if (!ac_test_do_access(&at2)) { 823 printf("%s: check PFEC on prefetch pte path fail\n", 824 __FUNCTION__); 825 goto err; 826 } 827 828 return 1; 829 830 err: 831 return 0; 832 } 833 834 /* 835 * If the write-fault access is from supervisor and CR0.WP is not set on the 836 * vcpu, kvm will fix it by adjusting pte access - it sets the W bit on pte 837 * and clears U bit. This is the chance that kvm can change pte access from 838 * readonly to writable. 839 * 840 * Unfortunately, the pte access is the access of 'direct' shadow page table, 841 * means direct sp.role.access = pte_access, then we will create a writable 842 * spte entry on the readonly shadow page table. It will cause Dirty bit is 843 * not tracked when two guest ptes point to the same large page. Note, it 844 * does not have other impact except Dirty bit since cr0.wp is encoded into 845 * sp.role. 846 * 847 * Note: to trigger this bug, hugepage should be disabled on host. 848 */ 849 static int check_large_pte_dirty_for_nowp(ac_pool_t *pool) 850 { 851 ac_test_t at1, at2; 852 853 ac_test_init(&at1, (void *)(0x123403000000)); 854 ac_test_init(&at2, (void *)(0x666606000000)); 855 856 at2.flags = AC_PDE_PRESENT_MASK | AC_PDE_PSE_MASK; 857 ac_test_setup_pte(&at2, pool); 858 if (!ac_test_do_access(&at2)) { 859 printf("%s: read on the first mapping fail.\n", __FUNCTION__); 860 goto err; 861 } 862 863 at1.flags = at2.flags | AC_ACCESS_WRITE_MASK; 864 ac_test_setup_pte(&at1, pool); 865 if (!ac_test_do_access(&at1)) { 866 printf("%s: write on the second mapping fail.\n", __FUNCTION__); 867 goto err; 868 } 869 870 at2.flags |= AC_ACCESS_WRITE_MASK; 871 ac_set_expected_status(&at2); 872 if (!ac_test_do_access(&at2)) { 873 printf("%s: write on the first mapping fail.\n", __FUNCTION__); 874 goto err; 875 } 876 877 return 1; 878 879 err: 880 return 0; 881 } 882 883 static int check_smep_andnot_wp(ac_pool_t *pool) 884 { 885 ac_test_t at1; 886 int err_prepare_andnot_wp, err_smep_andnot_wp; 887 888 if (!this_cpu_has(X86_FEATURE_SMEP)) { 889 return 1; 890 } 891 892 ac_test_init(&at1, (void *)(0x123406001000)); 893 894 at1.flags = AC_PDE_PRESENT_MASK | AC_PTE_PRESENT_MASK | 895 AC_PDE_USER_MASK | AC_PTE_USER_MASK | 896 AC_PDE_ACCESSED_MASK | AC_PTE_ACCESSED_MASK | 897 AC_CPU_CR4_SMEP_MASK | 898 AC_CPU_CR0_WP_MASK | 899 AC_ACCESS_WRITE_MASK; 900 ac_test_setup_pte(&at1, pool); 901 902 /* 903 * Here we write the ro user page when 904 * cr0.wp=0, then we execute it and SMEP 905 * fault should happen. 906 */ 907 err_prepare_andnot_wp = ac_test_do_access(&at1); 908 if (!err_prepare_andnot_wp) { 909 printf("%s: SMEP prepare fail\n", __FUNCTION__); 910 goto clean_up; 911 } 912 913 at1.flags &= ~AC_ACCESS_WRITE_MASK; 914 at1.flags |= AC_ACCESS_FETCH_MASK; 915 ac_set_expected_status(&at1); 916 err_smep_andnot_wp = ac_test_do_access(&at1); 917 918 clean_up: 919 set_cr4_smep(0); 920 921 if (!err_prepare_andnot_wp) 922 goto err; 923 if (!err_smep_andnot_wp) { 924 printf("%s: check SMEP without wp fail\n", __FUNCTION__); 925 goto err; 926 } 927 return 1; 928 929 err: 930 return 0; 931 } 932 933 static int ac_test_exec(ac_test_t *at, ac_pool_t *pool) 934 { 935 int r; 936 937 if (verbose) { 938 ac_test_show(at); 939 } 940 ac_test_setup_pte(at, pool); 941 r = ac_test_do_access(at); 942 return r; 943 } 944 945 typedef int (*ac_test_fn)(ac_pool_t *pool); 946 const ac_test_fn ac_test_cases[] = 947 { 948 corrupt_hugepage_triger, 949 check_pfec_on_prefetch_pte, 950 check_large_pte_dirty_for_nowp, 951 check_smep_andnot_wp 952 }; 953 954 static int ac_test_run(void) 955 { 956 ac_test_t at; 957 ac_pool_t pool; 958 int i, tests, successes; 959 960 printf("run\n"); 961 tests = successes = 0; 962 963 shadow_cr0 = read_cr0(); 964 shadow_cr4 = read_cr4(); 965 shadow_efer = rdmsr(MSR_EFER); 966 967 if (cpuid_maxphyaddr() >= 52) { 968 invalid_mask |= AC_PDE_BIT51_MASK; 969 invalid_mask |= AC_PTE_BIT51_MASK; 970 } 971 if (cpuid_maxphyaddr() >= 37) { 972 invalid_mask |= AC_PDE_BIT36_MASK; 973 invalid_mask |= AC_PTE_BIT36_MASK; 974 } 975 976 if (this_cpu_has(X86_FEATURE_PKU)) { 977 set_cr4_pke(1); 978 set_cr4_pke(0); 979 /* Now PKRU = 0xFFFFFFFF. */ 980 } else { 981 tests++; 982 if (write_cr4_checking(shadow_cr4 | X86_CR4_PKE) == GP_VECTOR) { 983 successes++; 984 invalid_mask |= AC_PKU_AD_MASK; 985 invalid_mask |= AC_PKU_WD_MASK; 986 invalid_mask |= AC_PKU_PKEY_MASK; 987 invalid_mask |= AC_CPU_CR4_PKE_MASK; 988 printf("CR4.PKE not available, disabling PKE tests\n"); 989 } else { 990 printf("Set PKE in CR4 - expect #GP: FAIL!\n"); 991 set_cr4_pke(0); 992 } 993 } 994 995 if (!this_cpu_has(X86_FEATURE_SMEP)) { 996 tests++; 997 if (set_cr4_smep(1) == GP_VECTOR) { 998 successes++; 999 invalid_mask |= AC_CPU_CR4_SMEP_MASK; 1000 printf("CR4.SMEP not available, disabling SMEP tests\n"); 1001 } else { 1002 printf("Set SMEP in CR4 - expect #GP: FAIL!\n"); 1003 set_cr4_smep(0); 1004 } 1005 } 1006 1007 /* Toggling LA57 in 64-bit mode (guaranteed for this test) is illegal. */ 1008 if (this_cpu_has(X86_FEATURE_LA57)) { 1009 tests++; 1010 if (write_cr4_checking(shadow_cr4 ^ X86_CR4_LA57) == GP_VECTOR) 1011 successes++; 1012 1013 /* Force a VM-Exit on KVM, which doesn't intercept LA57 itself. */ 1014 tests++; 1015 if (write_cr4_checking(shadow_cr4 ^ (X86_CR4_LA57 | X86_CR4_PSE)) == GP_VECTOR) 1016 successes++; 1017 } 1018 1019 ac_env_int(&pool); 1020 ac_test_init(&at, (void *)(0x123400000000 + 16 * smp_id())); 1021 do { 1022 ++tests; 1023 successes += ac_test_exec(&at, &pool); 1024 } while (ac_test_bump(&at)); 1025 1026 for (i = 0; i < ARRAY_SIZE(ac_test_cases); i++) { 1027 ++tests; 1028 successes += ac_test_cases[i](&pool); 1029 } 1030 1031 printf("\n%d tests, %d failures\n", tests, tests - successes); 1032 1033 return successes == tests; 1034 } 1035 1036 int main(void) 1037 { 1038 int r; 1039 1040 printf("starting test\n\n"); 1041 page_table_levels = 4; 1042 r = ac_test_run(); 1043 1044 if (this_cpu_has(X86_FEATURE_LA57)) { 1045 page_table_levels = 5; 1046 printf("starting 5-level paging test.\n\n"); 1047 setup_5level_page_table(); 1048 r = ac_test_run(); 1049 } 1050 1051 return r ? 0 : 1; 1052 } 1053