1 2 #include "libcflat.h" 3 #include "desc.h" 4 #include "processor.h" 5 #include "asm/page.h" 6 #include "x86/vm.h" 7 8 #define smp_id() 0 9 10 #define true 1 11 #define false 0 12 13 static _Bool verbose = false; 14 15 typedef unsigned long pt_element_t; 16 static int cpuid_7_ebx; 17 static int cpuid_7_ecx; 18 static int invalid_mask; 19 static int page_table_levels; 20 21 #define PT_BASE_ADDR_MASK ((pt_element_t)((((pt_element_t)1 << 40) - 1) & PAGE_MASK)) 22 #define PT_PSE_BASE_ADDR_MASK (PT_BASE_ADDR_MASK & ~(1ull << 21)) 23 24 #define CR0_WP_MASK (1UL << 16) 25 #define CR4_SMEP_MASK (1UL << 20) 26 27 #define PFERR_PRESENT_MASK (1U << 0) 28 #define PFERR_WRITE_MASK (1U << 1) 29 #define PFERR_USER_MASK (1U << 2) 30 #define PFERR_RESERVED_MASK (1U << 3) 31 #define PFERR_FETCH_MASK (1U << 4) 32 #define PFERR_PK_MASK (1U << 5) 33 34 #define MSR_EFER 0xc0000080 35 #define EFER_NX_MASK (1ull << 11) 36 37 #define PT_INDEX(address, level) \ 38 ((address) >> (12 + ((level)-1) * 9)) & 511 39 40 /* 41 * page table access check tests 42 */ 43 44 enum { 45 AC_PTE_PRESENT_BIT, 46 AC_PTE_WRITABLE_BIT, 47 AC_PTE_USER_BIT, 48 AC_PTE_ACCESSED_BIT, 49 AC_PTE_DIRTY_BIT, 50 AC_PTE_NX_BIT, 51 AC_PTE_BIT51_BIT, 52 53 AC_PDE_PRESENT_BIT, 54 AC_PDE_WRITABLE_BIT, 55 AC_PDE_USER_BIT, 56 AC_PDE_ACCESSED_BIT, 57 AC_PDE_DIRTY_BIT, 58 AC_PDE_PSE_BIT, 59 AC_PDE_NX_BIT, 60 AC_PDE_BIT51_BIT, 61 AC_PDE_BIT13_BIT, 62 63 AC_PKU_AD_BIT, 64 AC_PKU_WD_BIT, 65 AC_PKU_PKEY_BIT, 66 67 AC_ACCESS_USER_BIT, 68 AC_ACCESS_WRITE_BIT, 69 AC_ACCESS_FETCH_BIT, 70 AC_ACCESS_TWICE_BIT, 71 72 AC_CPU_EFER_NX_BIT, 73 AC_CPU_CR0_WP_BIT, 74 AC_CPU_CR4_SMEP_BIT, 75 AC_CPU_CR4_PKE_BIT, 76 77 NR_AC_FLAGS 78 }; 79 80 #define AC_PTE_PRESENT_MASK (1 << AC_PTE_PRESENT_BIT) 81 #define AC_PTE_WRITABLE_MASK (1 << AC_PTE_WRITABLE_BIT) 82 #define AC_PTE_USER_MASK (1 << AC_PTE_USER_BIT) 83 #define AC_PTE_ACCESSED_MASK (1 << AC_PTE_ACCESSED_BIT) 84 #define AC_PTE_DIRTY_MASK (1 << AC_PTE_DIRTY_BIT) 85 #define AC_PTE_NX_MASK (1 << AC_PTE_NX_BIT) 86 #define AC_PTE_BIT51_MASK (1 << AC_PTE_BIT51_BIT) 87 88 #define AC_PDE_PRESENT_MASK (1 << AC_PDE_PRESENT_BIT) 89 #define AC_PDE_WRITABLE_MASK (1 << AC_PDE_WRITABLE_BIT) 90 #define AC_PDE_USER_MASK (1 << AC_PDE_USER_BIT) 91 #define AC_PDE_ACCESSED_MASK (1 << AC_PDE_ACCESSED_BIT) 92 #define AC_PDE_DIRTY_MASK (1 << AC_PDE_DIRTY_BIT) 93 #define AC_PDE_PSE_MASK (1 << AC_PDE_PSE_BIT) 94 #define AC_PDE_NX_MASK (1 << AC_PDE_NX_BIT) 95 #define AC_PDE_BIT51_MASK (1 << AC_PDE_BIT51_BIT) 96 #define AC_PDE_BIT13_MASK (1 << AC_PDE_BIT13_BIT) 97 98 #define AC_PKU_AD_MASK (1 << AC_PKU_AD_BIT) 99 #define AC_PKU_WD_MASK (1 << AC_PKU_WD_BIT) 100 #define AC_PKU_PKEY_MASK (1 << AC_PKU_PKEY_BIT) 101 102 #define AC_ACCESS_USER_MASK (1 << AC_ACCESS_USER_BIT) 103 #define AC_ACCESS_WRITE_MASK (1 << AC_ACCESS_WRITE_BIT) 104 #define AC_ACCESS_FETCH_MASK (1 << AC_ACCESS_FETCH_BIT) 105 #define AC_ACCESS_TWICE_MASK (1 << AC_ACCESS_TWICE_BIT) 106 107 #define AC_CPU_EFER_NX_MASK (1 << AC_CPU_EFER_NX_BIT) 108 #define AC_CPU_CR0_WP_MASK (1 << AC_CPU_CR0_WP_BIT) 109 #define AC_CPU_CR4_SMEP_MASK (1 << AC_CPU_CR4_SMEP_BIT) 110 #define AC_CPU_CR4_PKE_MASK (1 << AC_CPU_CR4_PKE_BIT) 111 112 const char *ac_names[] = { 113 [AC_PTE_PRESENT_BIT] = "pte.p", 114 [AC_PTE_ACCESSED_BIT] = "pte.a", 115 [AC_PTE_WRITABLE_BIT] = "pte.rw", 116 [AC_PTE_USER_BIT] = "pte.user", 117 [AC_PTE_DIRTY_BIT] = "pte.d", 118 [AC_PTE_NX_BIT] = "pte.nx", 119 [AC_PTE_BIT51_BIT] = "pte.51", 120 [AC_PDE_PRESENT_BIT] = "pde.p", 121 [AC_PDE_ACCESSED_BIT] = "pde.a", 122 [AC_PDE_WRITABLE_BIT] = "pde.rw", 123 [AC_PDE_USER_BIT] = "pde.user", 124 [AC_PDE_DIRTY_BIT] = "pde.d", 125 [AC_PDE_PSE_BIT] = "pde.pse", 126 [AC_PDE_NX_BIT] = "pde.nx", 127 [AC_PDE_BIT51_BIT] = "pde.51", 128 [AC_PDE_BIT13_BIT] = "pde.13", 129 [AC_PKU_AD_BIT] = "pkru.ad", 130 [AC_PKU_WD_BIT] = "pkru.wd", 131 [AC_PKU_PKEY_BIT] = "pkey=1", 132 [AC_ACCESS_WRITE_BIT] = "write", 133 [AC_ACCESS_USER_BIT] = "user", 134 [AC_ACCESS_FETCH_BIT] = "fetch", 135 [AC_ACCESS_TWICE_BIT] = "twice", 136 [AC_CPU_EFER_NX_BIT] = "efer.nx", 137 [AC_CPU_CR0_WP_BIT] = "cr0.wp", 138 [AC_CPU_CR4_SMEP_BIT] = "cr4.smep", 139 [AC_CPU_CR4_PKE_BIT] = "cr4.pke", 140 }; 141 142 static inline void *va(pt_element_t phys) 143 { 144 return (void *)phys; 145 } 146 147 typedef struct { 148 pt_element_t pt_pool; 149 unsigned pt_pool_size; 150 unsigned pt_pool_current; 151 } ac_pool_t; 152 153 typedef struct { 154 unsigned flags; 155 void *virt; 156 pt_element_t phys; 157 pt_element_t *ptep; 158 pt_element_t expected_pte; 159 pt_element_t *pdep; 160 pt_element_t expected_pde; 161 pt_element_t ignore_pde; 162 int expected_fault; 163 unsigned expected_error; 164 } ac_test_t; 165 166 typedef struct { 167 unsigned short limit; 168 unsigned long linear_addr; 169 } __attribute__((packed)) descriptor_table_t; 170 171 172 static void ac_test_show(ac_test_t *at); 173 174 int write_cr4_checking(unsigned long val) 175 { 176 asm volatile(ASM_TRY("1f") 177 "mov %0,%%cr4\n\t" 178 "1:": : "r" (val)); 179 return exception_vector(); 180 } 181 182 void set_cr0_wp(int wp) 183 { 184 unsigned long cr0 = read_cr0(); 185 unsigned long old_cr0 = cr0; 186 187 cr0 &= ~CR0_WP_MASK; 188 if (wp) 189 cr0 |= CR0_WP_MASK; 190 if (old_cr0 != cr0) 191 write_cr0(cr0); 192 } 193 194 unsigned set_cr4_smep(int smep) 195 { 196 unsigned long cr4 = read_cr4(); 197 unsigned long old_cr4 = cr4; 198 extern u64 ptl2[]; 199 unsigned r; 200 201 cr4 &= ~CR4_SMEP_MASK; 202 if (smep) 203 cr4 |= CR4_SMEP_MASK; 204 if (old_cr4 == cr4) 205 return 0; 206 207 if (smep) 208 ptl2[2] &= ~PT_USER_MASK; 209 r = write_cr4_checking(cr4); 210 if (r || !smep) 211 ptl2[2] |= PT_USER_MASK; 212 return r; 213 } 214 215 void set_cr4_pke(int pke) 216 { 217 unsigned long cr4 = read_cr4(); 218 unsigned long old_cr4 = cr4; 219 220 cr4 &= ~X86_CR4_PKE; 221 if (pke) 222 cr4 |= X86_CR4_PKE; 223 if (old_cr4 == cr4) 224 return; 225 226 /* Check that protection keys do not affect accesses when CR4.PKE=0. */ 227 if ((read_cr4() & X86_CR4_PKE) && !pke) { 228 write_pkru(0xfffffffc); 229 } 230 write_cr4(cr4); 231 } 232 233 void set_efer_nx(int nx) 234 { 235 unsigned long long efer = rdmsr(MSR_EFER); 236 unsigned long long old_efer = efer; 237 238 efer &= ~EFER_NX_MASK; 239 if (nx) 240 efer |= EFER_NX_MASK; 241 if (old_efer != efer) 242 wrmsr(MSR_EFER, efer); 243 } 244 245 static void ac_env_int(ac_pool_t *pool) 246 { 247 extern char page_fault, kernel_entry; 248 set_idt_entry(14, &page_fault, 0); 249 set_idt_entry(0x20, &kernel_entry, 3); 250 251 pool->pt_pool = 33 * 1024 * 1024; 252 pool->pt_pool_size = 120 * 1024 * 1024 - pool->pt_pool; 253 pool->pt_pool_current = 0; 254 } 255 256 void ac_test_init(ac_test_t *at, void *virt) 257 { 258 wrmsr(MSR_EFER, rdmsr(MSR_EFER) | EFER_NX_MASK); 259 set_cr0_wp(1); 260 at->flags = 0; 261 at->virt = virt; 262 at->phys = 32 * 1024 * 1024; 263 } 264 265 int ac_test_bump_one(ac_test_t *at) 266 { 267 at->flags = ((at->flags | invalid_mask) + 1) & ~invalid_mask; 268 return at->flags < (1 << NR_AC_FLAGS); 269 } 270 271 #define F(x) ((flags & x##_MASK) != 0) 272 273 _Bool ac_test_legal(ac_test_t *at) 274 { 275 int flags = at->flags; 276 277 if (F(AC_ACCESS_FETCH) && F(AC_ACCESS_WRITE)) 278 return false; 279 280 /* 281 * Since we convert current page to kernel page when cr4.smep=1, 282 * we can't switch to user mode. 283 */ 284 if (F(AC_ACCESS_USER) && F(AC_CPU_CR4_SMEP)) 285 return false; 286 287 /* 288 * Only test protection key faults if CR4.PKE=1. 289 */ 290 if (!F(AC_CPU_CR4_PKE) && 291 (F(AC_PKU_AD) || F(AC_PKU_WD))) { 292 return false; 293 } 294 295 /* 296 * pde.bit13 checks handling of reserved bits in largepage PDEs. It is 297 * meaningless if there is a PTE. 298 */ 299 if (!F(AC_PDE_PSE) && F(AC_PDE_BIT13)) 300 return false; 301 302 return true; 303 } 304 305 int ac_test_bump(ac_test_t *at) 306 { 307 int ret; 308 309 ret = ac_test_bump_one(at); 310 while (ret && !ac_test_legal(at)) 311 ret = ac_test_bump_one(at); 312 return ret; 313 } 314 315 pt_element_t ac_test_alloc_pt(ac_pool_t *pool) 316 { 317 pt_element_t ret = pool->pt_pool + pool->pt_pool_current; 318 pool->pt_pool_current += PAGE_SIZE; 319 return ret; 320 } 321 322 _Bool ac_test_enough_room(ac_pool_t *pool) 323 { 324 return pool->pt_pool_current + 5 * PAGE_SIZE <= pool->pt_pool_size; 325 } 326 327 void ac_test_reset_pt_pool(ac_pool_t *pool) 328 { 329 pool->pt_pool_current = 0; 330 } 331 332 pt_element_t ac_test_permissions(ac_test_t *at, unsigned flags, bool writable, 333 bool user, bool executable) 334 { 335 bool kwritable = !F(AC_CPU_CR0_WP) && !F(AC_ACCESS_USER); 336 pt_element_t expected = 0; 337 338 if (F(AC_ACCESS_USER) && !user) 339 at->expected_fault = 1; 340 341 if (F(AC_ACCESS_WRITE) && !writable && !kwritable) 342 at->expected_fault = 1; 343 344 if (F(AC_ACCESS_FETCH) && !executable) 345 at->expected_fault = 1; 346 347 if (F(AC_ACCESS_FETCH) && user && F(AC_CPU_CR4_SMEP)) 348 at->expected_fault = 1; 349 350 if (user && !F(AC_ACCESS_FETCH) && F(AC_PKU_PKEY) && F(AC_CPU_CR4_PKE)) { 351 if (F(AC_PKU_AD)) { 352 at->expected_fault = 1; 353 at->expected_error |= PFERR_PK_MASK; 354 } else if (F(AC_ACCESS_WRITE) && F(AC_PKU_WD) && !kwritable) { 355 at->expected_fault = 1; 356 at->expected_error |= PFERR_PK_MASK; 357 } 358 } 359 360 if (!at->expected_fault) { 361 expected |= PT_ACCESSED_MASK; 362 if (F(AC_ACCESS_WRITE)) 363 expected |= PT_DIRTY_MASK; 364 } 365 366 return expected; 367 } 368 369 void ac_emulate_access(ac_test_t *at, unsigned flags) 370 { 371 bool pde_valid, pte_valid; 372 bool user, writable, executable; 373 374 if (F(AC_ACCESS_USER)) 375 at->expected_error |= PFERR_USER_MASK; 376 377 if (F(AC_ACCESS_WRITE)) 378 at->expected_error |= PFERR_WRITE_MASK; 379 380 if (F(AC_ACCESS_FETCH)) 381 at->expected_error |= PFERR_FETCH_MASK; 382 383 if (!F(AC_PDE_ACCESSED)) 384 at->ignore_pde = PT_ACCESSED_MASK; 385 386 pde_valid = F(AC_PDE_PRESENT) 387 && !F(AC_PDE_BIT51) && !F(AC_PDE_BIT13) 388 && !(F(AC_PDE_NX) && !F(AC_CPU_EFER_NX)); 389 390 if (!pde_valid) { 391 at->expected_fault = 1; 392 if (F(AC_PDE_PRESENT)) { 393 at->expected_error |= PFERR_RESERVED_MASK; 394 } else { 395 at->expected_error &= ~PFERR_PRESENT_MASK; 396 } 397 goto fault; 398 } 399 400 writable = F(AC_PDE_WRITABLE); 401 user = F(AC_PDE_USER); 402 executable = !F(AC_PDE_NX); 403 404 if (F(AC_PDE_PSE)) { 405 at->expected_pde |= ac_test_permissions(at, flags, writable, user, 406 executable); 407 goto no_pte; 408 } 409 410 at->expected_pde |= PT_ACCESSED_MASK; 411 412 pte_valid = F(AC_PTE_PRESENT) 413 && !F(AC_PTE_BIT51) 414 && !(F(AC_PTE_NX) && !F(AC_CPU_EFER_NX)); 415 416 if (!pte_valid) { 417 at->expected_fault = 1; 418 if (F(AC_PTE_PRESENT)) { 419 at->expected_error |= PFERR_RESERVED_MASK; 420 } else { 421 at->expected_error &= ~PFERR_PRESENT_MASK; 422 } 423 goto fault; 424 } 425 426 writable &= F(AC_PTE_WRITABLE); 427 user &= F(AC_PTE_USER); 428 executable &= !F(AC_PTE_NX); 429 430 at->expected_pte |= ac_test_permissions(at, flags, writable, user, 431 executable); 432 433 no_pte: 434 fault: 435 if (!at->expected_fault) 436 at->ignore_pde = 0; 437 if (!F(AC_CPU_EFER_NX) && !F(AC_CPU_CR4_SMEP)) 438 at->expected_error &= ~PFERR_FETCH_MASK; 439 } 440 441 void ac_set_expected_status(ac_test_t *at) 442 { 443 invlpg(at->virt); 444 445 if (at->ptep) 446 at->expected_pte = *at->ptep; 447 at->expected_pde = *at->pdep; 448 at->ignore_pde = 0; 449 at->expected_fault = 0; 450 at->expected_error = PFERR_PRESENT_MASK; 451 452 if (at->flags & AC_ACCESS_TWICE_MASK) { 453 ac_emulate_access(at, at->flags & ~AC_ACCESS_WRITE_MASK 454 & ~AC_ACCESS_FETCH_MASK & ~AC_ACCESS_USER_MASK); 455 at->expected_fault = 0; 456 at->expected_error = PFERR_PRESENT_MASK; 457 at->ignore_pde = 0; 458 } 459 460 ac_emulate_access(at, at->flags); 461 } 462 463 void __ac_setup_specific_pages(ac_test_t *at, ac_pool_t *pool, u64 pd_page, 464 u64 pt_page) 465 466 { 467 unsigned long root = read_cr3(); 468 int flags = at->flags; 469 bool skip = true; 470 471 if (!ac_test_enough_room(pool)) 472 ac_test_reset_pt_pool(pool); 473 474 at->ptep = 0; 475 for (int i = page_table_levels; i >= 1 && (i >= 2 || !F(AC_PDE_PSE)); --i) { 476 pt_element_t *vroot = va(root & PT_BASE_ADDR_MASK); 477 unsigned index = PT_INDEX((unsigned long)at->virt, i); 478 pt_element_t pte = 0; 479 480 /* 481 * Reuse existing page tables along the path to the test code and data 482 * (which is in the bottom 2MB). 483 */ 484 if (skip && i >= 2 && index == 0) { 485 goto next; 486 } 487 skip = false; 488 489 switch (i) { 490 case 5: 491 case 4: 492 case 3: 493 pte = pd_page ? pd_page : ac_test_alloc_pt(pool); 494 pte |= PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK; 495 break; 496 case 2: 497 if (!F(AC_PDE_PSE)) { 498 pte = pt_page ? pt_page : ac_test_alloc_pt(pool); 499 /* The protection key is ignored on non-leaf entries. */ 500 if (F(AC_PKU_PKEY)) 501 pte |= 2ull << 59; 502 } else { 503 pte = at->phys & PT_PSE_BASE_ADDR_MASK; 504 pte |= PT_PAGE_SIZE_MASK; 505 if (F(AC_PKU_PKEY)) 506 pte |= 1ull << 59; 507 } 508 if (F(AC_PDE_PRESENT)) 509 pte |= PT_PRESENT_MASK; 510 if (F(AC_PDE_WRITABLE)) 511 pte |= PT_WRITABLE_MASK; 512 if (F(AC_PDE_USER)) 513 pte |= PT_USER_MASK; 514 if (F(AC_PDE_ACCESSED)) 515 pte |= PT_ACCESSED_MASK; 516 if (F(AC_PDE_DIRTY)) 517 pte |= PT_DIRTY_MASK; 518 if (F(AC_PDE_NX)) 519 pte |= PT64_NX_MASK; 520 if (F(AC_PDE_BIT51)) 521 pte |= 1ull << 51; 522 if (F(AC_PDE_BIT13)) 523 pte |= 1ull << 13; 524 at->pdep = &vroot[index]; 525 break; 526 case 1: 527 pte = at->phys & PT_BASE_ADDR_MASK; 528 if (F(AC_PKU_PKEY)) 529 pte |= 1ull << 59; 530 if (F(AC_PTE_PRESENT)) 531 pte |= PT_PRESENT_MASK; 532 if (F(AC_PTE_WRITABLE)) 533 pte |= PT_WRITABLE_MASK; 534 if (F(AC_PTE_USER)) 535 pte |= PT_USER_MASK; 536 if (F(AC_PTE_ACCESSED)) 537 pte |= PT_ACCESSED_MASK; 538 if (F(AC_PTE_DIRTY)) 539 pte |= PT_DIRTY_MASK; 540 if (F(AC_PTE_NX)) 541 pte |= PT64_NX_MASK; 542 if (F(AC_PTE_BIT51)) 543 pte |= 1ull << 51; 544 at->ptep = &vroot[index]; 545 break; 546 } 547 vroot[index] = pte; 548 next: 549 root = vroot[index]; 550 } 551 ac_set_expected_status(at); 552 } 553 554 static void ac_test_setup_pte(ac_test_t *at, ac_pool_t *pool) 555 { 556 __ac_setup_specific_pages(at, pool, 0, 0); 557 } 558 559 static void ac_setup_specific_pages(ac_test_t *at, ac_pool_t *pool, 560 u64 pd_page, u64 pt_page) 561 { 562 return __ac_setup_specific_pages(at, pool, pd_page, pt_page); 563 } 564 565 static void dump_mapping(ac_test_t *at) 566 { 567 unsigned long root = read_cr3(); 568 int flags = at->flags; 569 int i; 570 571 printf("Dump mapping: address: %p\n", at->virt); 572 for (i = page_table_levels ; i >= 1 && (i >= 2 || !F(AC_PDE_PSE)); --i) { 573 pt_element_t *vroot = va(root & PT_BASE_ADDR_MASK); 574 unsigned index = PT_INDEX((unsigned long)at->virt, i); 575 pt_element_t pte = vroot[index]; 576 577 printf("------L%d: %lx\n", i, pte); 578 root = vroot[index]; 579 } 580 } 581 582 static void ac_test_check(ac_test_t *at, _Bool *success_ret, _Bool cond, 583 const char *fmt, ...) 584 { 585 va_list ap; 586 char buf[500]; 587 588 if (!*success_ret) { 589 return; 590 } 591 592 if (!cond) { 593 return; 594 } 595 596 *success_ret = false; 597 598 if (!verbose) { 599 puts("\n"); 600 ac_test_show(at); 601 } 602 603 va_start(ap, fmt); 604 vsnprintf(buf, sizeof(buf), fmt, ap); 605 va_end(ap); 606 printf("FAIL: %s\n", buf); 607 dump_mapping(at); 608 } 609 610 static int pt_match(pt_element_t pte1, pt_element_t pte2, pt_element_t ignore) 611 { 612 pte1 &= ~ignore; 613 pte2 &= ~ignore; 614 return pte1 == pte2; 615 } 616 617 int ac_test_do_access(ac_test_t *at) 618 { 619 static unsigned unique = 42; 620 int fault = 0; 621 unsigned e; 622 static unsigned char user_stack[4096]; 623 unsigned long rsp; 624 _Bool success = true; 625 int flags = at->flags; 626 627 ++unique; 628 if (!(unique & 65535)) { 629 puts("."); 630 } 631 632 *((unsigned char *)at->phys) = 0xc3; /* ret */ 633 634 unsigned r = unique; 635 set_cr0_wp(F(AC_CPU_CR0_WP)); 636 set_efer_nx(F(AC_CPU_EFER_NX)); 637 set_cr4_pke(F(AC_CPU_CR4_PKE)); 638 if (F(AC_CPU_CR4_PKE)) { 639 /* WD2=AD2=1, WD1=F(AC_PKU_WD), AD1=F(AC_PKU_AD) */ 640 write_pkru(0x30 | (F(AC_PKU_WD) ? 8 : 0) | 641 (F(AC_PKU_AD) ? 4 : 0)); 642 } 643 644 set_cr4_smep(F(AC_CPU_CR4_SMEP)); 645 646 if (F(AC_ACCESS_TWICE)) { 647 asm volatile ( 648 "mov $fixed2, %%rsi \n\t" 649 "mov (%[addr]), %[reg] \n\t" 650 "fixed2:" 651 : [reg]"=r"(r), [fault]"=a"(fault), "=b"(e) 652 : [addr]"r"(at->virt) 653 : "rsi" 654 ); 655 fault = 0; 656 } 657 658 asm volatile ("mov $fixed1, %%rsi \n\t" 659 "mov %%rsp, %%rdx \n\t" 660 "cmp $0, %[user] \n\t" 661 "jz do_access \n\t" 662 "push %%rax; mov %[user_ds], %%ax; mov %%ax, %%ds; pop %%rax \n\t" 663 "pushq %[user_ds] \n\t" 664 "pushq %[user_stack_top] \n\t" 665 "pushfq \n\t" 666 "pushq %[user_cs] \n\t" 667 "pushq $do_access \n\t" 668 "iretq \n" 669 "do_access: \n\t" 670 "cmp $0, %[fetch] \n\t" 671 "jnz 2f \n\t" 672 "cmp $0, %[write] \n\t" 673 "jnz 1f \n\t" 674 "mov (%[addr]), %[reg] \n\t" 675 "jmp done \n\t" 676 "1: mov %[reg], (%[addr]) \n\t" 677 "jmp done \n\t" 678 "2: call *%[addr] \n\t" 679 "done: \n" 680 "fixed1: \n" 681 "int %[kernel_entry_vector] \n\t" 682 "back_to_kernel:" 683 : [reg]"+r"(r), "+a"(fault), "=b"(e), "=&d"(rsp) 684 : [addr]"r"(at->virt), 685 [write]"r"(F(AC_ACCESS_WRITE)), 686 [user]"r"(F(AC_ACCESS_USER)), 687 [fetch]"r"(F(AC_ACCESS_FETCH)), 688 [user_ds]"i"(USER_DS), 689 [user_cs]"i"(USER_CS), 690 [user_stack_top]"r"(user_stack + sizeof user_stack), 691 [kernel_entry_vector]"i"(0x20) 692 : "rsi"); 693 694 asm volatile (".section .text.pf \n\t" 695 "page_fault: \n\t" 696 "pop %rbx \n\t" 697 "mov %rsi, (%rsp) \n\t" 698 "movl $1, %eax \n\t" 699 "iretq \n\t" 700 ".section .text"); 701 702 asm volatile (".section .text.entry \n\t" 703 "kernel_entry: \n\t" 704 "mov %rdx, %rsp \n\t" 705 "jmp back_to_kernel \n\t" 706 ".section .text"); 707 708 ac_test_check(at, &success, fault && !at->expected_fault, 709 "unexpected fault"); 710 ac_test_check(at, &success, !fault && at->expected_fault, 711 "unexpected access"); 712 ac_test_check(at, &success, fault && e != at->expected_error, 713 "error code %x expected %x", e, at->expected_error); 714 ac_test_check(at, &success, at->ptep && *at->ptep != at->expected_pte, 715 "pte %x expected %x", *at->ptep, at->expected_pte); 716 ac_test_check(at, &success, 717 !pt_match(*at->pdep, at->expected_pde, at->ignore_pde), 718 "pde %x expected %x", *at->pdep, at->expected_pde); 719 720 if (success && verbose) { 721 if (at->expected_fault) { 722 printf("PASS (%x)\n", at->expected_error); 723 } else { 724 printf("PASS\n"); 725 } 726 } 727 return success; 728 } 729 730 static void ac_test_show(ac_test_t *at) 731 { 732 char line[5000]; 733 734 *line = 0; 735 strcat(line, "test"); 736 for (int i = 0; i < NR_AC_FLAGS; ++i) 737 if (at->flags & (1 << i)) { 738 strcat(line, " "); 739 strcat(line, ac_names[i]); 740 } 741 strcat(line, ": "); 742 printf("%s", line); 743 } 744 745 /* 746 * This test case is used to triger the bug which is fixed by 747 * commit e09e90a5 in the kvm tree 748 */ 749 static int corrupt_hugepage_triger(ac_pool_t *pool) 750 { 751 ac_test_t at1, at2; 752 753 ac_test_init(&at1, (void *)(0x123400000000)); 754 ac_test_init(&at2, (void *)(0x666600000000)); 755 756 at2.flags = AC_CPU_CR0_WP_MASK | AC_PDE_PSE_MASK | AC_PDE_PRESENT_MASK; 757 ac_test_setup_pte(&at2, pool); 758 if (!ac_test_do_access(&at2)) 759 goto err; 760 761 at1.flags = at2.flags | AC_PDE_WRITABLE_MASK; 762 ac_test_setup_pte(&at1, pool); 763 if (!ac_test_do_access(&at1)) 764 goto err; 765 766 at1.flags |= AC_ACCESS_WRITE_MASK; 767 ac_set_expected_status(&at1); 768 if (!ac_test_do_access(&at1)) 769 goto err; 770 771 at2.flags |= AC_ACCESS_WRITE_MASK; 772 ac_set_expected_status(&at2); 773 if (!ac_test_do_access(&at2)) 774 goto err; 775 776 return 1; 777 778 err: 779 printf("corrupt_hugepage_triger test fail\n"); 780 return 0; 781 } 782 783 /* 784 * This test case is used to triger the bug which is fixed by 785 * commit 3ddf6c06e13e in the kvm tree 786 */ 787 static int check_pfec_on_prefetch_pte(ac_pool_t *pool) 788 { 789 ac_test_t at1, at2; 790 791 ac_test_init(&at1, (void *)(0x123406001000)); 792 ac_test_init(&at2, (void *)(0x123406003000)); 793 794 at1.flags = AC_PDE_PRESENT_MASK | AC_PTE_PRESENT_MASK; 795 ac_setup_specific_pages(&at1, pool, 30 * 1024 * 1024, 30 * 1024 * 1024); 796 797 at2.flags = at1.flags | AC_PTE_NX_MASK; 798 ac_setup_specific_pages(&at2, pool, 30 * 1024 * 1024, 30 * 1024 * 1024); 799 800 if (!ac_test_do_access(&at1)) { 801 printf("%s: prepare fail\n", __FUNCTION__); 802 goto err; 803 } 804 805 if (!ac_test_do_access(&at2)) { 806 printf("%s: check PFEC on prefetch pte path fail\n", 807 __FUNCTION__); 808 goto err; 809 } 810 811 return 1; 812 813 err: 814 return 0; 815 } 816 817 /* 818 * If the write-fault access is from supervisor and CR0.WP is not set on the 819 * vcpu, kvm will fix it by adjusting pte access - it sets the W bit on pte 820 * and clears U bit. This is the chance that kvm can change pte access from 821 * readonly to writable. 822 * 823 * Unfortunately, the pte access is the access of 'direct' shadow page table, 824 * means direct sp.role.access = pte_access, then we will create a writable 825 * spte entry on the readonly shadow page table. It will cause Dirty bit is 826 * not tracked when two guest ptes point to the same large page. Note, it 827 * does not have other impact except Dirty bit since cr0.wp is encoded into 828 * sp.role. 829 * 830 * Note: to trigger this bug, hugepage should be disabled on host. 831 */ 832 static int check_large_pte_dirty_for_nowp(ac_pool_t *pool) 833 { 834 ac_test_t at1, at2; 835 836 ac_test_init(&at1, (void *)(0x123403000000)); 837 ac_test_init(&at2, (void *)(0x666606000000)); 838 839 at2.flags = AC_PDE_PRESENT_MASK | AC_PDE_PSE_MASK; 840 ac_test_setup_pte(&at2, pool); 841 if (!ac_test_do_access(&at2)) { 842 printf("%s: read on the first mapping fail.\n", __FUNCTION__); 843 goto err; 844 } 845 846 at1.flags = at2.flags | AC_ACCESS_WRITE_MASK; 847 ac_test_setup_pte(&at1, pool); 848 if (!ac_test_do_access(&at1)) { 849 printf("%s: write on the second mapping fail.\n", __FUNCTION__); 850 goto err; 851 } 852 853 at2.flags |= AC_ACCESS_WRITE_MASK; 854 ac_set_expected_status(&at2); 855 if (!ac_test_do_access(&at2)) { 856 printf("%s: write on the first mapping fail.\n", __FUNCTION__); 857 goto err; 858 } 859 860 return 1; 861 862 err: 863 return 0; 864 } 865 866 static int check_smep_andnot_wp(ac_pool_t *pool) 867 { 868 ac_test_t at1; 869 int err_prepare_andnot_wp, err_smep_andnot_wp; 870 871 if (!(cpuid_7_ebx & (1 << 7))) { 872 return 1; 873 } 874 875 ac_test_init(&at1, (void *)(0x123406001000)); 876 877 at1.flags = AC_PDE_PRESENT_MASK | AC_PTE_PRESENT_MASK | 878 AC_PDE_USER_MASK | AC_PTE_USER_MASK | 879 AC_PDE_ACCESSED_MASK | AC_PTE_ACCESSED_MASK | 880 AC_CPU_CR4_SMEP_MASK | 881 AC_CPU_CR0_WP_MASK | 882 AC_ACCESS_WRITE_MASK; 883 ac_test_setup_pte(&at1, pool); 884 885 /* 886 * Here we write the ro user page when 887 * cr0.wp=0, then we execute it and SMEP 888 * fault should happen. 889 */ 890 err_prepare_andnot_wp = ac_test_do_access(&at1); 891 if (!err_prepare_andnot_wp) { 892 printf("%s: SMEP prepare fail\n", __FUNCTION__); 893 goto clean_up; 894 } 895 896 at1.flags &= ~AC_ACCESS_WRITE_MASK; 897 at1.flags |= AC_ACCESS_FETCH_MASK; 898 ac_set_expected_status(&at1); 899 err_smep_andnot_wp = ac_test_do_access(&at1); 900 901 clean_up: 902 set_cr4_smep(0); 903 904 if (!err_prepare_andnot_wp) 905 goto err; 906 if (!err_smep_andnot_wp) { 907 printf("%s: check SMEP without wp fail\n", __FUNCTION__); 908 goto err; 909 } 910 return 1; 911 912 err: 913 return 0; 914 } 915 916 int ac_test_exec(ac_test_t *at, ac_pool_t *pool) 917 { 918 int r; 919 920 if (verbose) { 921 ac_test_show(at); 922 } 923 ac_test_setup_pte(at, pool); 924 r = ac_test_do_access(at); 925 return r; 926 } 927 928 typedef int (*ac_test_fn)(ac_pool_t *pool); 929 const ac_test_fn ac_test_cases[] = 930 { 931 corrupt_hugepage_triger, 932 check_pfec_on_prefetch_pte, 933 check_large_pte_dirty_for_nowp, 934 check_smep_andnot_wp 935 }; 936 937 int ac_test_run(void) 938 { 939 ac_test_t at; 940 ac_pool_t pool; 941 int i, tests, successes; 942 943 printf("run\n"); 944 tests = successes = 0; 945 946 if (cpuid_7_ecx & (1 << 3)) { 947 set_cr4_pke(1); 948 set_cr4_pke(0); 949 /* Now PKRU = 0xFFFFFFFF. */ 950 } else { 951 unsigned long cr4 = read_cr4(); 952 tests++; 953 if (write_cr4_checking(cr4 | X86_CR4_PKE) == GP_VECTOR) { 954 successes++; 955 invalid_mask |= AC_PKU_AD_MASK; 956 invalid_mask |= AC_PKU_WD_MASK; 957 invalid_mask |= AC_PKU_PKEY_MASK; 958 invalid_mask |= AC_CPU_CR4_PKE_MASK; 959 printf("CR4.PKE not available, disabling PKE tests\n"); 960 } else { 961 printf("Set PKE in CR4 - expect #GP: FAIL!\n"); 962 set_cr4_pke(0); 963 } 964 } 965 966 if (!(cpuid_7_ebx & (1 << 7))) { 967 tests++; 968 if (set_cr4_smep(1) == GP_VECTOR) { 969 successes++; 970 invalid_mask |= AC_CPU_CR4_SMEP_MASK; 971 printf("CR4.SMEP not available, disabling SMEP tests\n"); 972 } else { 973 printf("Set SMEP in CR4 - expect #GP: FAIL!\n"); 974 set_cr4_smep(0); 975 } 976 } 977 978 ac_env_int(&pool); 979 ac_test_init(&at, (void *)(0x123400000000 + 16 * smp_id())); 980 do { 981 ++tests; 982 successes += ac_test_exec(&at, &pool); 983 } while (ac_test_bump(&at)); 984 985 for (i = 0; i < ARRAY_SIZE(ac_test_cases); i++) { 986 ++tests; 987 successes += ac_test_cases[i](&pool); 988 } 989 990 printf("\n%d tests, %d failures\n", tests, tests - successes); 991 992 return successes == tests; 993 } 994 995 int main(void) 996 { 997 int r; 998 999 setup_idt(); 1000 1001 cpuid_7_ebx = cpuid(7).b; 1002 cpuid_7_ecx = cpuid(7).c; 1003 1004 printf("starting test\n\n"); 1005 page_table_levels = 4; 1006 r = ac_test_run(); 1007 1008 if (cpuid_7_ecx & (1 << 16)) { 1009 page_table_levels = 5; 1010 setup_5level_page_table(); 1011 printf("starting 5-level paging test.\n\n"); 1012 r = ac_test_run(); 1013 } 1014 1015 return r ? 0 : 1; 1016 } 1017