1 2 #include "libcflat.h" 3 #include "desc.h" 4 #include "processor.h" 5 #include "asm/page.h" 6 #include "x86/vm.h" 7 8 #define smp_id() 0 9 10 #define true 1 11 #define false 0 12 13 static _Bool verbose = false; 14 15 typedef unsigned long pt_element_t; 16 static int cpuid_7_ebx; 17 static int cpuid_7_ecx; 18 static int invalid_mask; 19 static int page_table_levels; 20 21 #define PT_BASE_ADDR_MASK ((pt_element_t)((((pt_element_t)1 << 40) - 1) & PAGE_MASK)) 22 #define PT_PSE_BASE_ADDR_MASK (PT_BASE_ADDR_MASK & ~(1ull << 21)) 23 24 #define CR0_WP_MASK (1UL << 16) 25 #define CR4_SMEP_MASK (1UL << 20) 26 27 #define PFERR_PRESENT_MASK (1U << 0) 28 #define PFERR_WRITE_MASK (1U << 1) 29 #define PFERR_USER_MASK (1U << 2) 30 #define PFERR_RESERVED_MASK (1U << 3) 31 #define PFERR_FETCH_MASK (1U << 4) 32 #define PFERR_PK_MASK (1U << 5) 33 34 #define MSR_EFER 0xc0000080 35 #define EFER_NX_MASK (1ull << 11) 36 37 #define PT_INDEX(address, level) \ 38 ((address) >> (12 + ((level)-1) * 9)) & 511 39 40 /* 41 * page table access check tests 42 */ 43 44 enum { 45 AC_PTE_PRESENT_BIT, 46 AC_PTE_WRITABLE_BIT, 47 AC_PTE_USER_BIT, 48 AC_PTE_ACCESSED_BIT, 49 AC_PTE_DIRTY_BIT, 50 AC_PTE_NX_BIT, 51 AC_PTE_BIT51_BIT, 52 53 AC_PDE_PRESENT_BIT, 54 AC_PDE_WRITABLE_BIT, 55 AC_PDE_USER_BIT, 56 AC_PDE_ACCESSED_BIT, 57 AC_PDE_DIRTY_BIT, 58 AC_PDE_PSE_BIT, 59 AC_PDE_NX_BIT, 60 AC_PDE_BIT51_BIT, 61 AC_PDE_BIT13_BIT, 62 63 AC_PKU_AD_BIT, 64 AC_PKU_WD_BIT, 65 AC_PKU_PKEY_BIT, 66 67 AC_ACCESS_USER_BIT, 68 AC_ACCESS_WRITE_BIT, 69 AC_ACCESS_FETCH_BIT, 70 AC_ACCESS_TWICE_BIT, 71 72 AC_CPU_EFER_NX_BIT, 73 AC_CPU_CR0_WP_BIT, 74 AC_CPU_CR4_SMEP_BIT, 75 AC_CPU_CR4_PKE_BIT, 76 77 NR_AC_FLAGS 78 }; 79 80 #define AC_PTE_PRESENT_MASK (1 << AC_PTE_PRESENT_BIT) 81 #define AC_PTE_WRITABLE_MASK (1 << AC_PTE_WRITABLE_BIT) 82 #define AC_PTE_USER_MASK (1 << AC_PTE_USER_BIT) 83 #define AC_PTE_ACCESSED_MASK (1 << AC_PTE_ACCESSED_BIT) 84 #define AC_PTE_DIRTY_MASK (1 << AC_PTE_DIRTY_BIT) 85 #define AC_PTE_NX_MASK (1 << AC_PTE_NX_BIT) 86 #define AC_PTE_BIT51_MASK (1 << AC_PTE_BIT51_BIT) 87 88 #define AC_PDE_PRESENT_MASK (1 << AC_PDE_PRESENT_BIT) 89 #define AC_PDE_WRITABLE_MASK (1 << AC_PDE_WRITABLE_BIT) 90 #define AC_PDE_USER_MASK (1 << AC_PDE_USER_BIT) 91 #define AC_PDE_ACCESSED_MASK (1 << AC_PDE_ACCESSED_BIT) 92 #define AC_PDE_DIRTY_MASK (1 << AC_PDE_DIRTY_BIT) 93 #define AC_PDE_PSE_MASK (1 << AC_PDE_PSE_BIT) 94 #define AC_PDE_NX_MASK (1 << AC_PDE_NX_BIT) 95 #define AC_PDE_BIT51_MASK (1 << AC_PDE_BIT51_BIT) 96 #define AC_PDE_BIT13_MASK (1 << AC_PDE_BIT13_BIT) 97 98 #define AC_PKU_AD_MASK (1 << AC_PKU_AD_BIT) 99 #define AC_PKU_WD_MASK (1 << AC_PKU_WD_BIT) 100 #define AC_PKU_PKEY_MASK (1 << AC_PKU_PKEY_BIT) 101 102 #define AC_ACCESS_USER_MASK (1 << AC_ACCESS_USER_BIT) 103 #define AC_ACCESS_WRITE_MASK (1 << AC_ACCESS_WRITE_BIT) 104 #define AC_ACCESS_FETCH_MASK (1 << AC_ACCESS_FETCH_BIT) 105 #define AC_ACCESS_TWICE_MASK (1 << AC_ACCESS_TWICE_BIT) 106 107 #define AC_CPU_EFER_NX_MASK (1 << AC_CPU_EFER_NX_BIT) 108 #define AC_CPU_CR0_WP_MASK (1 << AC_CPU_CR0_WP_BIT) 109 #define AC_CPU_CR4_SMEP_MASK (1 << AC_CPU_CR4_SMEP_BIT) 110 #define AC_CPU_CR4_PKE_MASK (1 << AC_CPU_CR4_PKE_BIT) 111 112 const char *ac_names[] = { 113 [AC_PTE_PRESENT_BIT] = "pte.p", 114 [AC_PTE_ACCESSED_BIT] = "pte.a", 115 [AC_PTE_WRITABLE_BIT] = "pte.rw", 116 [AC_PTE_USER_BIT] = "pte.user", 117 [AC_PTE_DIRTY_BIT] = "pte.d", 118 [AC_PTE_NX_BIT] = "pte.nx", 119 [AC_PTE_BIT51_BIT] = "pte.51", 120 [AC_PDE_PRESENT_BIT] = "pde.p", 121 [AC_PDE_ACCESSED_BIT] = "pde.a", 122 [AC_PDE_WRITABLE_BIT] = "pde.rw", 123 [AC_PDE_USER_BIT] = "pde.user", 124 [AC_PDE_DIRTY_BIT] = "pde.d", 125 [AC_PDE_PSE_BIT] = "pde.pse", 126 [AC_PDE_NX_BIT] = "pde.nx", 127 [AC_PDE_BIT51_BIT] = "pde.51", 128 [AC_PDE_BIT13_BIT] = "pde.13", 129 [AC_PKU_AD_BIT] = "pkru.ad", 130 [AC_PKU_WD_BIT] = "pkru.wd", 131 [AC_PKU_PKEY_BIT] = "pkey=1", 132 [AC_ACCESS_WRITE_BIT] = "write", 133 [AC_ACCESS_USER_BIT] = "user", 134 [AC_ACCESS_FETCH_BIT] = "fetch", 135 [AC_ACCESS_TWICE_BIT] = "twice", 136 [AC_CPU_EFER_NX_BIT] = "efer.nx", 137 [AC_CPU_CR0_WP_BIT] = "cr0.wp", 138 [AC_CPU_CR4_SMEP_BIT] = "cr4.smep", 139 [AC_CPU_CR4_PKE_BIT] = "cr4.pke", 140 }; 141 142 static inline void *va(pt_element_t phys) 143 { 144 return (void *)phys; 145 } 146 147 typedef struct { 148 pt_element_t pt_pool; 149 unsigned pt_pool_size; 150 unsigned pt_pool_current; 151 } ac_pool_t; 152 153 typedef struct { 154 unsigned flags; 155 void *virt; 156 pt_element_t phys; 157 pt_element_t *ptep; 158 pt_element_t expected_pte; 159 pt_element_t *pdep; 160 pt_element_t expected_pde; 161 pt_element_t ignore_pde; 162 int expected_fault; 163 unsigned expected_error; 164 } ac_test_t; 165 166 typedef struct { 167 unsigned short limit; 168 unsigned long linear_addr; 169 } __attribute__((packed)) descriptor_table_t; 170 171 172 static void ac_test_show(ac_test_t *at); 173 174 static int write_cr4_checking(unsigned long val) 175 { 176 asm volatile(ASM_TRY("1f") 177 "mov %0,%%cr4\n\t" 178 "1:": : "r" (val)); 179 return exception_vector(); 180 } 181 182 static void set_cr0_wp(int wp) 183 { 184 unsigned long cr0 = read_cr0(); 185 unsigned long old_cr0 = cr0; 186 187 cr0 &= ~CR0_WP_MASK; 188 if (wp) 189 cr0 |= CR0_WP_MASK; 190 if (old_cr0 != cr0) 191 write_cr0(cr0); 192 } 193 194 static unsigned set_cr4_smep(int smep) 195 { 196 unsigned long cr4 = read_cr4(); 197 unsigned long old_cr4 = cr4; 198 extern u64 ptl2[]; 199 unsigned r; 200 201 cr4 &= ~CR4_SMEP_MASK; 202 if (smep) 203 cr4 |= CR4_SMEP_MASK; 204 if (old_cr4 == cr4) 205 return 0; 206 207 if (smep) 208 ptl2[2] &= ~PT_USER_MASK; 209 r = write_cr4_checking(cr4); 210 if (r || !smep) 211 ptl2[2] |= PT_USER_MASK; 212 return r; 213 } 214 215 static void set_cr4_pke(int pke) 216 { 217 unsigned long cr4 = read_cr4(); 218 unsigned long old_cr4 = cr4; 219 220 cr4 &= ~X86_CR4_PKE; 221 if (pke) 222 cr4 |= X86_CR4_PKE; 223 if (old_cr4 == cr4) 224 return; 225 226 /* Check that protection keys do not affect accesses when CR4.PKE=0. */ 227 if ((read_cr4() & X86_CR4_PKE) && !pke) { 228 write_pkru(0xfffffffc); 229 } 230 write_cr4(cr4); 231 } 232 233 static void set_efer_nx(int nx) 234 { 235 unsigned long long efer = rdmsr(MSR_EFER); 236 unsigned long long old_efer = efer; 237 238 efer &= ~EFER_NX_MASK; 239 if (nx) 240 efer |= EFER_NX_MASK; 241 if (old_efer != efer) 242 wrmsr(MSR_EFER, efer); 243 } 244 245 static void ac_env_int(ac_pool_t *pool) 246 { 247 extern char page_fault, kernel_entry; 248 set_idt_entry(14, &page_fault, 0); 249 set_idt_entry(0x20, &kernel_entry, 3); 250 251 pool->pt_pool = 33 * 1024 * 1024; 252 pool->pt_pool_size = 120 * 1024 * 1024 - pool->pt_pool; 253 pool->pt_pool_current = 0; 254 } 255 256 static void ac_test_init(ac_test_t *at, void *virt) 257 { 258 wrmsr(MSR_EFER, rdmsr(MSR_EFER) | EFER_NX_MASK); 259 set_cr0_wp(1); 260 at->flags = 0; 261 at->virt = virt; 262 at->phys = 32 * 1024 * 1024; 263 } 264 265 static int ac_test_bump_one(ac_test_t *at) 266 { 267 at->flags = ((at->flags | invalid_mask) + 1) & ~invalid_mask; 268 return at->flags < (1 << NR_AC_FLAGS); 269 } 270 271 #define F(x) ((flags & x##_MASK) != 0) 272 273 static _Bool ac_test_legal(ac_test_t *at) 274 { 275 int flags = at->flags; 276 277 if (F(AC_ACCESS_FETCH) && F(AC_ACCESS_WRITE)) 278 return false; 279 280 /* 281 * Since we convert current page to kernel page when cr4.smep=1, 282 * we can't switch to user mode. 283 */ 284 if (F(AC_ACCESS_USER) && F(AC_CPU_CR4_SMEP)) 285 return false; 286 287 /* 288 * Only test protection key faults if CR4.PKE=1. 289 */ 290 if (!F(AC_CPU_CR4_PKE) && 291 (F(AC_PKU_AD) || F(AC_PKU_WD))) { 292 return false; 293 } 294 295 /* 296 * pde.bit13 checks handling of reserved bits in largepage PDEs. It is 297 * meaningless if there is a PTE. 298 */ 299 if (!F(AC_PDE_PSE) && F(AC_PDE_BIT13)) 300 return false; 301 302 return true; 303 } 304 305 static int ac_test_bump(ac_test_t *at) 306 { 307 int ret; 308 309 ret = ac_test_bump_one(at); 310 while (ret && !ac_test_legal(at)) 311 ret = ac_test_bump_one(at); 312 return ret; 313 } 314 315 static pt_element_t ac_test_alloc_pt(ac_pool_t *pool) 316 { 317 pt_element_t ret = pool->pt_pool + pool->pt_pool_current; 318 pool->pt_pool_current += PAGE_SIZE; 319 return ret; 320 } 321 322 static _Bool ac_test_enough_room(ac_pool_t *pool) 323 { 324 return pool->pt_pool_current + 5 * PAGE_SIZE <= pool->pt_pool_size; 325 } 326 327 static void ac_test_reset_pt_pool(ac_pool_t *pool) 328 { 329 pool->pt_pool_current = 0; 330 } 331 332 static pt_element_t ac_test_permissions(ac_test_t *at, unsigned flags, 333 bool writable, bool user, 334 bool executable) 335 { 336 bool kwritable = !F(AC_CPU_CR0_WP) && !F(AC_ACCESS_USER); 337 pt_element_t expected = 0; 338 339 if (F(AC_ACCESS_USER) && !user) 340 at->expected_fault = 1; 341 342 if (F(AC_ACCESS_WRITE) && !writable && !kwritable) 343 at->expected_fault = 1; 344 345 if (F(AC_ACCESS_FETCH) && !executable) 346 at->expected_fault = 1; 347 348 if (F(AC_ACCESS_FETCH) && user && F(AC_CPU_CR4_SMEP)) 349 at->expected_fault = 1; 350 351 if (user && !F(AC_ACCESS_FETCH) && F(AC_PKU_PKEY) && F(AC_CPU_CR4_PKE)) { 352 if (F(AC_PKU_AD)) { 353 at->expected_fault = 1; 354 at->expected_error |= PFERR_PK_MASK; 355 } else if (F(AC_ACCESS_WRITE) && F(AC_PKU_WD) && !kwritable) { 356 at->expected_fault = 1; 357 at->expected_error |= PFERR_PK_MASK; 358 } 359 } 360 361 if (!at->expected_fault) { 362 expected |= PT_ACCESSED_MASK; 363 if (F(AC_ACCESS_WRITE)) 364 expected |= PT_DIRTY_MASK; 365 } 366 367 return expected; 368 } 369 370 static void ac_emulate_access(ac_test_t *at, unsigned flags) 371 { 372 bool pde_valid, pte_valid; 373 bool user, writable, executable; 374 375 if (F(AC_ACCESS_USER)) 376 at->expected_error |= PFERR_USER_MASK; 377 378 if (F(AC_ACCESS_WRITE)) 379 at->expected_error |= PFERR_WRITE_MASK; 380 381 if (F(AC_ACCESS_FETCH)) 382 at->expected_error |= PFERR_FETCH_MASK; 383 384 if (!F(AC_PDE_ACCESSED)) 385 at->ignore_pde = PT_ACCESSED_MASK; 386 387 pde_valid = F(AC_PDE_PRESENT) 388 && !F(AC_PDE_BIT51) && !F(AC_PDE_BIT13) 389 && !(F(AC_PDE_NX) && !F(AC_CPU_EFER_NX)); 390 391 if (!pde_valid) { 392 at->expected_fault = 1; 393 if (F(AC_PDE_PRESENT)) { 394 at->expected_error |= PFERR_RESERVED_MASK; 395 } else { 396 at->expected_error &= ~PFERR_PRESENT_MASK; 397 } 398 goto fault; 399 } 400 401 writable = F(AC_PDE_WRITABLE); 402 user = F(AC_PDE_USER); 403 executable = !F(AC_PDE_NX); 404 405 if (F(AC_PDE_PSE)) { 406 at->expected_pde |= ac_test_permissions(at, flags, writable, user, 407 executable); 408 goto no_pte; 409 } 410 411 at->expected_pde |= PT_ACCESSED_MASK; 412 413 pte_valid = F(AC_PTE_PRESENT) 414 && !F(AC_PTE_BIT51) 415 && !(F(AC_PTE_NX) && !F(AC_CPU_EFER_NX)); 416 417 if (!pte_valid) { 418 at->expected_fault = 1; 419 if (F(AC_PTE_PRESENT)) { 420 at->expected_error |= PFERR_RESERVED_MASK; 421 } else { 422 at->expected_error &= ~PFERR_PRESENT_MASK; 423 } 424 goto fault; 425 } 426 427 writable &= F(AC_PTE_WRITABLE); 428 user &= F(AC_PTE_USER); 429 executable &= !F(AC_PTE_NX); 430 431 at->expected_pte |= ac_test_permissions(at, flags, writable, user, 432 executable); 433 434 no_pte: 435 fault: 436 if (!at->expected_fault) 437 at->ignore_pde = 0; 438 if (!F(AC_CPU_EFER_NX) && !F(AC_CPU_CR4_SMEP)) 439 at->expected_error &= ~PFERR_FETCH_MASK; 440 } 441 442 static void ac_set_expected_status(ac_test_t *at) 443 { 444 invlpg(at->virt); 445 446 if (at->ptep) 447 at->expected_pte = *at->ptep; 448 at->expected_pde = *at->pdep; 449 at->ignore_pde = 0; 450 at->expected_fault = 0; 451 at->expected_error = PFERR_PRESENT_MASK; 452 453 if (at->flags & AC_ACCESS_TWICE_MASK) { 454 ac_emulate_access(at, at->flags & ~AC_ACCESS_WRITE_MASK 455 & ~AC_ACCESS_FETCH_MASK & ~AC_ACCESS_USER_MASK); 456 at->expected_fault = 0; 457 at->expected_error = PFERR_PRESENT_MASK; 458 at->ignore_pde = 0; 459 } 460 461 ac_emulate_access(at, at->flags); 462 } 463 464 static void __ac_setup_specific_pages(ac_test_t *at, ac_pool_t *pool, 465 u64 pd_page, u64 pt_page) 466 467 { 468 unsigned long root = read_cr3(); 469 int flags = at->flags; 470 bool skip = true; 471 472 if (!ac_test_enough_room(pool)) 473 ac_test_reset_pt_pool(pool); 474 475 at->ptep = 0; 476 for (int i = page_table_levels; i >= 1 && (i >= 2 || !F(AC_PDE_PSE)); --i) { 477 pt_element_t *vroot = va(root & PT_BASE_ADDR_MASK); 478 unsigned index = PT_INDEX((unsigned long)at->virt, i); 479 pt_element_t pte = 0; 480 481 /* 482 * Reuse existing page tables along the path to the test code and data 483 * (which is in the bottom 2MB). 484 */ 485 if (skip && i >= 2 && index == 0) { 486 goto next; 487 } 488 skip = false; 489 490 switch (i) { 491 case 5: 492 case 4: 493 case 3: 494 pte = pd_page ? pd_page : ac_test_alloc_pt(pool); 495 pte |= PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK; 496 break; 497 case 2: 498 if (!F(AC_PDE_PSE)) { 499 pte = pt_page ? pt_page : ac_test_alloc_pt(pool); 500 /* The protection key is ignored on non-leaf entries. */ 501 if (F(AC_PKU_PKEY)) 502 pte |= 2ull << 59; 503 } else { 504 pte = at->phys & PT_PSE_BASE_ADDR_MASK; 505 pte |= PT_PAGE_SIZE_MASK; 506 if (F(AC_PKU_PKEY)) 507 pte |= 1ull << 59; 508 } 509 if (F(AC_PDE_PRESENT)) 510 pte |= PT_PRESENT_MASK; 511 if (F(AC_PDE_WRITABLE)) 512 pte |= PT_WRITABLE_MASK; 513 if (F(AC_PDE_USER)) 514 pte |= PT_USER_MASK; 515 if (F(AC_PDE_ACCESSED)) 516 pte |= PT_ACCESSED_MASK; 517 if (F(AC_PDE_DIRTY)) 518 pte |= PT_DIRTY_MASK; 519 if (F(AC_PDE_NX)) 520 pte |= PT64_NX_MASK; 521 if (F(AC_PDE_BIT51)) 522 pte |= 1ull << 51; 523 if (F(AC_PDE_BIT13)) 524 pte |= 1ull << 13; 525 at->pdep = &vroot[index]; 526 break; 527 case 1: 528 pte = at->phys & PT_BASE_ADDR_MASK; 529 if (F(AC_PKU_PKEY)) 530 pte |= 1ull << 59; 531 if (F(AC_PTE_PRESENT)) 532 pte |= PT_PRESENT_MASK; 533 if (F(AC_PTE_WRITABLE)) 534 pte |= PT_WRITABLE_MASK; 535 if (F(AC_PTE_USER)) 536 pte |= PT_USER_MASK; 537 if (F(AC_PTE_ACCESSED)) 538 pte |= PT_ACCESSED_MASK; 539 if (F(AC_PTE_DIRTY)) 540 pte |= PT_DIRTY_MASK; 541 if (F(AC_PTE_NX)) 542 pte |= PT64_NX_MASK; 543 if (F(AC_PTE_BIT51)) 544 pte |= 1ull << 51; 545 at->ptep = &vroot[index]; 546 break; 547 } 548 vroot[index] = pte; 549 next: 550 root = vroot[index]; 551 } 552 ac_set_expected_status(at); 553 } 554 555 static void ac_test_setup_pte(ac_test_t *at, ac_pool_t *pool) 556 { 557 __ac_setup_specific_pages(at, pool, 0, 0); 558 } 559 560 static void ac_setup_specific_pages(ac_test_t *at, ac_pool_t *pool, 561 u64 pd_page, u64 pt_page) 562 { 563 return __ac_setup_specific_pages(at, pool, pd_page, pt_page); 564 } 565 566 static void dump_mapping(ac_test_t *at) 567 { 568 unsigned long root = read_cr3(); 569 int flags = at->flags; 570 int i; 571 572 printf("Dump mapping: address: %p\n", at->virt); 573 for (i = page_table_levels ; i >= 1 && (i >= 2 || !F(AC_PDE_PSE)); --i) { 574 pt_element_t *vroot = va(root & PT_BASE_ADDR_MASK); 575 unsigned index = PT_INDEX((unsigned long)at->virt, i); 576 pt_element_t pte = vroot[index]; 577 578 printf("------L%d: %lx\n", i, pte); 579 root = vroot[index]; 580 } 581 } 582 583 static void ac_test_check(ac_test_t *at, _Bool *success_ret, _Bool cond, 584 const char *fmt, ...) 585 { 586 va_list ap; 587 char buf[500]; 588 589 if (!*success_ret) { 590 return; 591 } 592 593 if (!cond) { 594 return; 595 } 596 597 *success_ret = false; 598 599 if (!verbose) { 600 puts("\n"); 601 ac_test_show(at); 602 } 603 604 va_start(ap, fmt); 605 vsnprintf(buf, sizeof(buf), fmt, ap); 606 va_end(ap); 607 printf("FAIL: %s\n", buf); 608 dump_mapping(at); 609 } 610 611 static int pt_match(pt_element_t pte1, pt_element_t pte2, pt_element_t ignore) 612 { 613 pte1 &= ~ignore; 614 pte2 &= ~ignore; 615 return pte1 == pte2; 616 } 617 618 static int ac_test_do_access(ac_test_t *at) 619 { 620 static unsigned unique = 42; 621 int fault = 0; 622 unsigned e; 623 static unsigned char user_stack[4096]; 624 unsigned long rsp; 625 _Bool success = true; 626 int flags = at->flags; 627 628 ++unique; 629 if (!(unique & 65535)) { 630 puts("."); 631 } 632 633 *((unsigned char *)at->phys) = 0xc3; /* ret */ 634 635 unsigned r = unique; 636 set_cr0_wp(F(AC_CPU_CR0_WP)); 637 set_efer_nx(F(AC_CPU_EFER_NX)); 638 set_cr4_pke(F(AC_CPU_CR4_PKE)); 639 if (F(AC_CPU_CR4_PKE)) { 640 /* WD2=AD2=1, WD1=F(AC_PKU_WD), AD1=F(AC_PKU_AD) */ 641 write_pkru(0x30 | (F(AC_PKU_WD) ? 8 : 0) | 642 (F(AC_PKU_AD) ? 4 : 0)); 643 } 644 645 set_cr4_smep(F(AC_CPU_CR4_SMEP)); 646 647 if (F(AC_ACCESS_TWICE)) { 648 asm volatile ( 649 "mov $fixed2, %%rsi \n\t" 650 "mov (%[addr]), %[reg] \n\t" 651 "fixed2:" 652 : [reg]"=r"(r), [fault]"=a"(fault), "=b"(e) 653 : [addr]"r"(at->virt) 654 : "rsi" 655 ); 656 fault = 0; 657 } 658 659 asm volatile ("mov $fixed1, %%rsi \n\t" 660 "mov %%rsp, %%rdx \n\t" 661 "cmp $0, %[user] \n\t" 662 "jz do_access \n\t" 663 "push %%rax; mov %[user_ds], %%ax; mov %%ax, %%ds; pop %%rax \n\t" 664 "pushq %[user_ds] \n\t" 665 "pushq %[user_stack_top] \n\t" 666 "pushfq \n\t" 667 "pushq %[user_cs] \n\t" 668 "pushq $do_access \n\t" 669 "iretq \n" 670 "do_access: \n\t" 671 "cmp $0, %[fetch] \n\t" 672 "jnz 2f \n\t" 673 "cmp $0, %[write] \n\t" 674 "jnz 1f \n\t" 675 "mov (%[addr]), %[reg] \n\t" 676 "jmp done \n\t" 677 "1: mov %[reg], (%[addr]) \n\t" 678 "jmp done \n\t" 679 "2: call *%[addr] \n\t" 680 "done: \n" 681 "fixed1: \n" 682 "int %[kernel_entry_vector] \n\t" 683 "back_to_kernel:" 684 : [reg]"+r"(r), "+a"(fault), "=b"(e), "=&d"(rsp) 685 : [addr]"r"(at->virt), 686 [write]"r"(F(AC_ACCESS_WRITE)), 687 [user]"r"(F(AC_ACCESS_USER)), 688 [fetch]"r"(F(AC_ACCESS_FETCH)), 689 [user_ds]"i"(USER_DS), 690 [user_cs]"i"(USER_CS), 691 [user_stack_top]"r"(user_stack + sizeof user_stack), 692 [kernel_entry_vector]"i"(0x20) 693 : "rsi"); 694 695 asm volatile (".section .text.pf \n\t" 696 "page_fault: \n\t" 697 "pop %rbx \n\t" 698 "mov %rsi, (%rsp) \n\t" 699 "movl $1, %eax \n\t" 700 "iretq \n\t" 701 ".section .text"); 702 703 asm volatile (".section .text.entry \n\t" 704 "kernel_entry: \n\t" 705 "mov %rdx, %rsp \n\t" 706 "jmp back_to_kernel \n\t" 707 ".section .text"); 708 709 ac_test_check(at, &success, fault && !at->expected_fault, 710 "unexpected fault"); 711 ac_test_check(at, &success, !fault && at->expected_fault, 712 "unexpected access"); 713 ac_test_check(at, &success, fault && e != at->expected_error, 714 "error code %x expected %x", e, at->expected_error); 715 ac_test_check(at, &success, at->ptep && *at->ptep != at->expected_pte, 716 "pte %x expected %x", *at->ptep, at->expected_pte); 717 ac_test_check(at, &success, 718 !pt_match(*at->pdep, at->expected_pde, at->ignore_pde), 719 "pde %x expected %x", *at->pdep, at->expected_pde); 720 721 if (success && verbose) { 722 if (at->expected_fault) { 723 printf("PASS (%x)\n", at->expected_error); 724 } else { 725 printf("PASS\n"); 726 } 727 } 728 return success; 729 } 730 731 static void ac_test_show(ac_test_t *at) 732 { 733 char line[5000]; 734 735 *line = 0; 736 strcat(line, "test"); 737 for (int i = 0; i < NR_AC_FLAGS; ++i) 738 if (at->flags & (1 << i)) { 739 strcat(line, " "); 740 strcat(line, ac_names[i]); 741 } 742 strcat(line, ": "); 743 printf("%s", line); 744 } 745 746 /* 747 * This test case is used to triger the bug which is fixed by 748 * commit e09e90a5 in the kvm tree 749 */ 750 static int corrupt_hugepage_triger(ac_pool_t *pool) 751 { 752 ac_test_t at1, at2; 753 754 ac_test_init(&at1, (void *)(0x123400000000)); 755 ac_test_init(&at2, (void *)(0x666600000000)); 756 757 at2.flags = AC_CPU_CR0_WP_MASK | AC_PDE_PSE_MASK | AC_PDE_PRESENT_MASK; 758 ac_test_setup_pte(&at2, pool); 759 if (!ac_test_do_access(&at2)) 760 goto err; 761 762 at1.flags = at2.flags | AC_PDE_WRITABLE_MASK; 763 ac_test_setup_pte(&at1, pool); 764 if (!ac_test_do_access(&at1)) 765 goto err; 766 767 at1.flags |= AC_ACCESS_WRITE_MASK; 768 ac_set_expected_status(&at1); 769 if (!ac_test_do_access(&at1)) 770 goto err; 771 772 at2.flags |= AC_ACCESS_WRITE_MASK; 773 ac_set_expected_status(&at2); 774 if (!ac_test_do_access(&at2)) 775 goto err; 776 777 return 1; 778 779 err: 780 printf("corrupt_hugepage_triger test fail\n"); 781 return 0; 782 } 783 784 /* 785 * This test case is used to triger the bug which is fixed by 786 * commit 3ddf6c06e13e in the kvm tree 787 */ 788 static int check_pfec_on_prefetch_pte(ac_pool_t *pool) 789 { 790 ac_test_t at1, at2; 791 792 ac_test_init(&at1, (void *)(0x123406001000)); 793 ac_test_init(&at2, (void *)(0x123406003000)); 794 795 at1.flags = AC_PDE_PRESENT_MASK | AC_PTE_PRESENT_MASK; 796 ac_setup_specific_pages(&at1, pool, 30 * 1024 * 1024, 30 * 1024 * 1024); 797 798 at2.flags = at1.flags | AC_PTE_NX_MASK; 799 ac_setup_specific_pages(&at2, pool, 30 * 1024 * 1024, 30 * 1024 * 1024); 800 801 if (!ac_test_do_access(&at1)) { 802 printf("%s: prepare fail\n", __FUNCTION__); 803 goto err; 804 } 805 806 if (!ac_test_do_access(&at2)) { 807 printf("%s: check PFEC on prefetch pte path fail\n", 808 __FUNCTION__); 809 goto err; 810 } 811 812 return 1; 813 814 err: 815 return 0; 816 } 817 818 /* 819 * If the write-fault access is from supervisor and CR0.WP is not set on the 820 * vcpu, kvm will fix it by adjusting pte access - it sets the W bit on pte 821 * and clears U bit. This is the chance that kvm can change pte access from 822 * readonly to writable. 823 * 824 * Unfortunately, the pte access is the access of 'direct' shadow page table, 825 * means direct sp.role.access = pte_access, then we will create a writable 826 * spte entry on the readonly shadow page table. It will cause Dirty bit is 827 * not tracked when two guest ptes point to the same large page. Note, it 828 * does not have other impact except Dirty bit since cr0.wp is encoded into 829 * sp.role. 830 * 831 * Note: to trigger this bug, hugepage should be disabled on host. 832 */ 833 static int check_large_pte_dirty_for_nowp(ac_pool_t *pool) 834 { 835 ac_test_t at1, at2; 836 837 ac_test_init(&at1, (void *)(0x123403000000)); 838 ac_test_init(&at2, (void *)(0x666606000000)); 839 840 at2.flags = AC_PDE_PRESENT_MASK | AC_PDE_PSE_MASK; 841 ac_test_setup_pte(&at2, pool); 842 if (!ac_test_do_access(&at2)) { 843 printf("%s: read on the first mapping fail.\n", __FUNCTION__); 844 goto err; 845 } 846 847 at1.flags = at2.flags | AC_ACCESS_WRITE_MASK; 848 ac_test_setup_pte(&at1, pool); 849 if (!ac_test_do_access(&at1)) { 850 printf("%s: write on the second mapping fail.\n", __FUNCTION__); 851 goto err; 852 } 853 854 at2.flags |= AC_ACCESS_WRITE_MASK; 855 ac_set_expected_status(&at2); 856 if (!ac_test_do_access(&at2)) { 857 printf("%s: write on the first mapping fail.\n", __FUNCTION__); 858 goto err; 859 } 860 861 return 1; 862 863 err: 864 return 0; 865 } 866 867 static int check_smep_andnot_wp(ac_pool_t *pool) 868 { 869 ac_test_t at1; 870 int err_prepare_andnot_wp, err_smep_andnot_wp; 871 872 if (!(cpuid_7_ebx & (1 << 7))) { 873 return 1; 874 } 875 876 ac_test_init(&at1, (void *)(0x123406001000)); 877 878 at1.flags = AC_PDE_PRESENT_MASK | AC_PTE_PRESENT_MASK | 879 AC_PDE_USER_MASK | AC_PTE_USER_MASK | 880 AC_PDE_ACCESSED_MASK | AC_PTE_ACCESSED_MASK | 881 AC_CPU_CR4_SMEP_MASK | 882 AC_CPU_CR0_WP_MASK | 883 AC_ACCESS_WRITE_MASK; 884 ac_test_setup_pte(&at1, pool); 885 886 /* 887 * Here we write the ro user page when 888 * cr0.wp=0, then we execute it and SMEP 889 * fault should happen. 890 */ 891 err_prepare_andnot_wp = ac_test_do_access(&at1); 892 if (!err_prepare_andnot_wp) { 893 printf("%s: SMEP prepare fail\n", __FUNCTION__); 894 goto clean_up; 895 } 896 897 at1.flags &= ~AC_ACCESS_WRITE_MASK; 898 at1.flags |= AC_ACCESS_FETCH_MASK; 899 ac_set_expected_status(&at1); 900 err_smep_andnot_wp = ac_test_do_access(&at1); 901 902 clean_up: 903 set_cr4_smep(0); 904 905 if (!err_prepare_andnot_wp) 906 goto err; 907 if (!err_smep_andnot_wp) { 908 printf("%s: check SMEP without wp fail\n", __FUNCTION__); 909 goto err; 910 } 911 return 1; 912 913 err: 914 return 0; 915 } 916 917 static int ac_test_exec(ac_test_t *at, ac_pool_t *pool) 918 { 919 int r; 920 921 if (verbose) { 922 ac_test_show(at); 923 } 924 ac_test_setup_pte(at, pool); 925 r = ac_test_do_access(at); 926 return r; 927 } 928 929 typedef int (*ac_test_fn)(ac_pool_t *pool); 930 const ac_test_fn ac_test_cases[] = 931 { 932 corrupt_hugepage_triger, 933 check_pfec_on_prefetch_pte, 934 check_large_pte_dirty_for_nowp, 935 check_smep_andnot_wp 936 }; 937 938 static int ac_test_run(void) 939 { 940 ac_test_t at; 941 ac_pool_t pool; 942 int i, tests, successes; 943 944 printf("run\n"); 945 tests = successes = 0; 946 947 if (cpuid_7_ecx & (1 << 3)) { 948 set_cr4_pke(1); 949 set_cr4_pke(0); 950 /* Now PKRU = 0xFFFFFFFF. */ 951 } else { 952 unsigned long cr4 = read_cr4(); 953 tests++; 954 if (write_cr4_checking(cr4 | X86_CR4_PKE) == GP_VECTOR) { 955 successes++; 956 invalid_mask |= AC_PKU_AD_MASK; 957 invalid_mask |= AC_PKU_WD_MASK; 958 invalid_mask |= AC_PKU_PKEY_MASK; 959 invalid_mask |= AC_CPU_CR4_PKE_MASK; 960 printf("CR4.PKE not available, disabling PKE tests\n"); 961 } else { 962 printf("Set PKE in CR4 - expect #GP: FAIL!\n"); 963 set_cr4_pke(0); 964 } 965 } 966 967 if (!(cpuid_7_ebx & (1 << 7))) { 968 tests++; 969 if (set_cr4_smep(1) == GP_VECTOR) { 970 successes++; 971 invalid_mask |= AC_CPU_CR4_SMEP_MASK; 972 printf("CR4.SMEP not available, disabling SMEP tests\n"); 973 } else { 974 printf("Set SMEP in CR4 - expect #GP: FAIL!\n"); 975 set_cr4_smep(0); 976 } 977 } 978 979 ac_env_int(&pool); 980 ac_test_init(&at, (void *)(0x123400000000 + 16 * smp_id())); 981 do { 982 ++tests; 983 successes += ac_test_exec(&at, &pool); 984 } while (ac_test_bump(&at)); 985 986 for (i = 0; i < ARRAY_SIZE(ac_test_cases); i++) { 987 ++tests; 988 successes += ac_test_cases[i](&pool); 989 } 990 991 printf("\n%d tests, %d failures\n", tests, tests - successes); 992 993 return successes == tests; 994 } 995 996 int main(void) 997 { 998 int r; 999 1000 setup_idt(); 1001 1002 cpuid_7_ebx = cpuid(7).b; 1003 cpuid_7_ecx = cpuid(7).c; 1004 1005 printf("starting test\n\n"); 1006 page_table_levels = 4; 1007 r = ac_test_run(); 1008 1009 if (cpuid_7_ecx & (1 << 16)) { 1010 page_table_levels = 5; 1011 setup_5level_page_table(); 1012 printf("starting 5-level paging test.\n\n"); 1013 r = ac_test_run(); 1014 } 1015 1016 return r ? 0 : 1; 1017 } 1018