1 2 #include "libcflat.h" 3 #include "desc.h" 4 #include "processor.h" 5 6 #define smp_id() 0 7 8 #define true 1 9 #define false 0 10 11 static _Bool verbose = false; 12 13 typedef unsigned long pt_element_t; 14 static int cpuid_7_ebx; 15 static int cpuid_7_ecx; 16 17 #define PAGE_SIZE ((pt_element_t)4096) 18 #define PAGE_MASK (~(PAGE_SIZE-1)) 19 20 #define PT_BASE_ADDR_MASK ((pt_element_t)((((pt_element_t)1 << 40) - 1) & PAGE_MASK)) 21 #define PT_PSE_BASE_ADDR_MASK (PT_BASE_ADDR_MASK & ~(1ull << 21)) 22 23 #define PT_PRESENT_MASK ((pt_element_t)1 << 0) 24 #define PT_WRITABLE_MASK ((pt_element_t)1 << 1) 25 #define PT_USER_MASK ((pt_element_t)1 << 2) 26 #define PT_ACCESSED_MASK ((pt_element_t)1 << 5) 27 #define PT_DIRTY_MASK ((pt_element_t)1 << 6) 28 #define PT_PSE_MASK ((pt_element_t)1 << 7) 29 #define PT_NX_MASK ((pt_element_t)1 << 63) 30 31 #define CR0_WP_MASK (1UL << 16) 32 #define CR4_SMEP_MASK (1UL << 20) 33 34 #define PFERR_PRESENT_MASK (1U << 0) 35 #define PFERR_WRITE_MASK (1U << 1) 36 #define PFERR_USER_MASK (1U << 2) 37 #define PFERR_RESERVED_MASK (1U << 3) 38 #define PFERR_FETCH_MASK (1U << 4) 39 #define PFERR_PK_MASK (1U << 5) 40 41 #define MSR_EFER 0xc0000080 42 #define EFER_NX_MASK (1ull << 11) 43 44 #define PT_INDEX(address, level) \ 45 ((address) >> (12 + ((level)-1) * 9)) & 511 46 47 /* 48 * page table access check tests 49 */ 50 51 enum { 52 AC_PTE_PRESENT, 53 AC_PTE_WRITABLE, 54 AC_PTE_USER, 55 AC_PTE_ACCESSED, 56 AC_PTE_DIRTY, 57 AC_PTE_NX, 58 AC_PTE_BIT51, 59 60 AC_PDE_PRESENT, 61 AC_PDE_WRITABLE, 62 AC_PDE_USER, 63 AC_PDE_ACCESSED, 64 AC_PDE_DIRTY, 65 AC_PDE_PSE, 66 AC_PDE_NX, 67 AC_PDE_BIT51, 68 AC_PDE_BIT13, 69 70 AC_PKU_AD, 71 AC_PKU_WD, 72 AC_PKU_PKEY, 73 74 AC_ACCESS_USER, 75 AC_ACCESS_WRITE, 76 AC_ACCESS_FETCH, 77 AC_ACCESS_TWICE, 78 79 AC_CPU_EFER_NX, 80 AC_CPU_CR0_WP, 81 AC_CPU_CR4_SMEP, 82 AC_CPU_CR4_PKE, 83 84 NR_AC_FLAGS 85 }; 86 87 const char *ac_names[] = { 88 [AC_PTE_PRESENT] = "pte.p", 89 [AC_PTE_ACCESSED] = "pte.a", 90 [AC_PTE_WRITABLE] = "pte.rw", 91 [AC_PTE_USER] = "pte.user", 92 [AC_PTE_DIRTY] = "pte.d", 93 [AC_PTE_NX] = "pte.nx", 94 [AC_PTE_BIT51] = "pte.51", 95 [AC_PDE_PRESENT] = "pde.p", 96 [AC_PDE_ACCESSED] = "pde.a", 97 [AC_PDE_WRITABLE] = "pde.rw", 98 [AC_PDE_USER] = "pde.user", 99 [AC_PDE_DIRTY] = "pde.d", 100 [AC_PDE_PSE] = "pde.pse", 101 [AC_PDE_NX] = "pde.nx", 102 [AC_PDE_BIT51] = "pde.51", 103 [AC_PDE_BIT13] = "pde.13", 104 [AC_PKU_AD] = "pkru.ad", 105 [AC_PKU_WD] = "pkru.wd", 106 [AC_PKU_PKEY] = "pkey=1", 107 [AC_ACCESS_WRITE] = "write", 108 [AC_ACCESS_USER] = "user", 109 [AC_ACCESS_FETCH] = "fetch", 110 [AC_ACCESS_TWICE] = "twice", 111 [AC_CPU_EFER_NX] = "efer.nx", 112 [AC_CPU_CR0_WP] = "cr0.wp", 113 [AC_CPU_CR4_SMEP] = "cr4.smep", 114 [AC_CPU_CR4_PKE] = "cr4.pke", 115 }; 116 117 static inline void *va(pt_element_t phys) 118 { 119 return (void *)phys; 120 } 121 122 typedef struct { 123 pt_element_t pt_pool; 124 unsigned pt_pool_size; 125 unsigned pt_pool_current; 126 } ac_pool_t; 127 128 typedef struct { 129 unsigned flags[NR_AC_FLAGS]; 130 void *virt; 131 pt_element_t phys; 132 pt_element_t *ptep; 133 pt_element_t expected_pte; 134 pt_element_t *pdep; 135 pt_element_t expected_pde; 136 pt_element_t ignore_pde; 137 int expected_fault; 138 unsigned expected_error; 139 } ac_test_t; 140 141 typedef struct { 142 unsigned short limit; 143 unsigned long linear_addr; 144 } __attribute__((packed)) descriptor_table_t; 145 146 147 static void ac_test_show(ac_test_t *at); 148 149 int write_cr4_checking(unsigned long val) 150 { 151 asm volatile(ASM_TRY("1f") 152 "mov %0,%%cr4\n\t" 153 "1:": : "r" (val)); 154 return exception_vector(); 155 } 156 157 void set_cr0_wp(int wp) 158 { 159 unsigned long cr0 = read_cr0(); 160 161 cr0 &= ~CR0_WP_MASK; 162 if (wp) 163 cr0 |= CR0_WP_MASK; 164 write_cr0(cr0); 165 } 166 167 void set_cr4_smep(int smep) 168 { 169 unsigned long cr4 = read_cr4(); 170 171 cr4 &= ~CR4_SMEP_MASK; 172 if (smep) 173 cr4 |= CR4_SMEP_MASK; 174 write_cr4(cr4); 175 } 176 177 void set_cr4_pke(int pke) 178 { 179 unsigned long cr4 = read_cr4(); 180 181 /* Check that protection keys do not affect accesses when CR4.PKE=0. */ 182 if ((read_cr4() & X86_CR4_PKE) && !pke) { 183 write_pkru(0xffffffff); 184 } 185 186 cr4 &= ~X86_CR4_PKE; 187 if (pke) 188 cr4 |= X86_CR4_PKE; 189 write_cr4(cr4); 190 } 191 192 void set_efer_nx(int nx) 193 { 194 unsigned long long efer; 195 196 efer = rdmsr(MSR_EFER); 197 efer &= ~EFER_NX_MASK; 198 if (nx) 199 efer |= EFER_NX_MASK; 200 wrmsr(MSR_EFER, efer); 201 } 202 203 static void ac_env_int(ac_pool_t *pool) 204 { 205 setup_idt(); 206 207 extern char page_fault, kernel_entry; 208 set_idt_entry(14, &page_fault, 0); 209 set_idt_entry(0x20, &kernel_entry, 3); 210 211 pool->pt_pool = 33 * 1024 * 1024; 212 pool->pt_pool_size = 120 * 1024 * 1024 - pool->pt_pool; 213 pool->pt_pool_current = 0; 214 } 215 216 void ac_test_init(ac_test_t *at, void *virt) 217 { 218 wrmsr(MSR_EFER, rdmsr(MSR_EFER) | EFER_NX_MASK); 219 set_cr0_wp(1); 220 for (int i = 0; i < NR_AC_FLAGS; ++i) 221 at->flags[i] = 0; 222 at->virt = virt; 223 at->phys = 32 * 1024 * 1024; 224 } 225 226 int ac_test_bump_one(ac_test_t *at) 227 { 228 for (int i = 0; i < NR_AC_FLAGS; ++i) 229 if (!at->flags[i]) { 230 at->flags[i] = 1; 231 return 1; 232 } else 233 at->flags[i] = 0; 234 return 0; 235 } 236 237 _Bool ac_test_legal(ac_test_t *at) 238 { 239 if (at->flags[AC_ACCESS_FETCH] && at->flags[AC_ACCESS_WRITE]) 240 return false; 241 242 /* 243 * Since we convert current page to kernel page when cr4.smep=1, 244 * we can't switch to user mode. 245 */ 246 if (at->flags[AC_ACCESS_USER] && at->flags[AC_CPU_CR4_SMEP]) 247 return false; 248 249 /* 250 * Only test protection key faults if CR4.PKE=1. 251 */ 252 if (!at->flags[AC_CPU_CR4_PKE] && 253 (at->flags[AC_PKU_AD] || at->flags[AC_PKU_WD])) { 254 return false; 255 } 256 257 /* 258 * pde.bit13 checks handling of reserved bits in largepage PDEs. It is 259 * meaningless if there is a PTE. 260 */ 261 if (!at->flags[AC_PDE_PSE] && at->flags[AC_PDE_BIT13]) 262 return false; 263 264 return true; 265 } 266 267 int ac_test_bump(ac_test_t *at) 268 { 269 int ret; 270 271 ret = ac_test_bump_one(at); 272 while (ret && !ac_test_legal(at)) 273 ret = ac_test_bump_one(at); 274 return ret; 275 } 276 277 pt_element_t ac_test_alloc_pt(ac_pool_t *pool) 278 { 279 pt_element_t ret = pool->pt_pool + pool->pt_pool_current; 280 pool->pt_pool_current += PAGE_SIZE; 281 return ret; 282 } 283 284 _Bool ac_test_enough_room(ac_pool_t *pool) 285 { 286 return pool->pt_pool_current + 4 * PAGE_SIZE <= pool->pt_pool_size; 287 } 288 289 void ac_test_reset_pt_pool(ac_pool_t *pool) 290 { 291 pool->pt_pool_current = 0; 292 } 293 294 void ac_set_expected_status(ac_test_t *at) 295 { 296 int pde_valid, pte_valid; 297 298 invlpg(at->virt); 299 300 if (at->ptep) 301 at->expected_pte = *at->ptep; 302 at->expected_pde = *at->pdep; 303 at->ignore_pde = 0; 304 at->expected_fault = 0; 305 at->expected_error = PFERR_PRESENT_MASK; 306 307 pde_valid = at->flags[AC_PDE_PRESENT] 308 && !at->flags[AC_PDE_BIT51] && !at->flags[AC_PDE_BIT13] 309 && !(at->flags[AC_PDE_NX] && !at->flags[AC_CPU_EFER_NX]); 310 pte_valid = pde_valid 311 && at->flags[AC_PTE_PRESENT] 312 && !at->flags[AC_PTE_BIT51] 313 && !(at->flags[AC_PTE_NX] && !at->flags[AC_CPU_EFER_NX]); 314 315 if (at->flags[AC_ACCESS_USER]) 316 at->expected_error |= PFERR_USER_MASK; 317 318 if (at->flags[AC_ACCESS_WRITE]) 319 at->expected_error |= PFERR_WRITE_MASK; 320 321 if (at->flags[AC_ACCESS_FETCH]) 322 at->expected_error |= PFERR_FETCH_MASK; 323 324 if (!at->flags[AC_PDE_PRESENT]) { 325 at->expected_fault = 1; 326 at->expected_error &= ~PFERR_PRESENT_MASK; 327 } else if (!pde_valid) { 328 at->expected_fault = 1; 329 at->expected_error |= PFERR_RESERVED_MASK; 330 } 331 332 if (at->flags[AC_ACCESS_USER] && !at->flags[AC_PDE_USER]) 333 at->expected_fault = 1; 334 335 if (at->flags[AC_ACCESS_WRITE] 336 && !at->flags[AC_PDE_WRITABLE] 337 && (at->flags[AC_CPU_CR0_WP] || at->flags[AC_ACCESS_USER])) 338 at->expected_fault = 1; 339 340 if (at->flags[AC_ACCESS_FETCH] && at->flags[AC_PDE_NX]) 341 at->expected_fault = 1; 342 343 if (!at->flags[AC_PDE_ACCESSED]) 344 at->ignore_pde = PT_ACCESSED_MASK; 345 346 if (!pde_valid) 347 goto fault; 348 349 if (!at->expected_fault) 350 at->expected_pde |= PT_ACCESSED_MASK; 351 352 if (at->flags[AC_PDE_PSE]) { 353 /* Even for "twice" accesses, PKEY might cause pde.a=0. */ 354 if (at->flags[AC_PDE_USER] && at->flags[AC_ACCESS_TWICE] && 355 at->flags[AC_PKU_PKEY] && at->flags[AC_CPU_CR4_PKE] && 356 at->flags[AC_PKU_AD]) { 357 pde_valid = false; 358 } 359 360 if (at->flags[AC_ACCESS_FETCH] && at->flags[AC_PDE_USER] 361 && at->flags[AC_CPU_CR4_SMEP]) 362 at->expected_fault = 1; 363 364 if (at->flags[AC_PDE_USER] && !at->flags[AC_ACCESS_FETCH] && 365 at->flags[AC_PKU_PKEY] && at->flags[AC_CPU_CR4_PKE] && 366 !at->expected_fault) { 367 if (at->flags[AC_PKU_AD]) { 368 at->expected_fault = 1; 369 at->expected_error |= PFERR_PK_MASK; 370 } else if (at->flags[AC_ACCESS_WRITE] && at->flags[AC_PKU_WD] && 371 (at->flags[AC_CPU_CR0_WP] || at->flags[AC_ACCESS_USER])) { 372 at->expected_fault = 1; 373 at->expected_error |= PFERR_PK_MASK; 374 } 375 } 376 if (at->flags[AC_ACCESS_WRITE] && !at->expected_fault) 377 at->expected_pde |= PT_DIRTY_MASK; 378 379 goto no_pte; 380 } 381 382 if (!at->flags[AC_PTE_PRESENT]) { 383 at->expected_fault = 1; 384 at->expected_error &= ~PFERR_PRESENT_MASK; 385 } else if (!pte_valid) { 386 at->expected_fault = 1; 387 at->expected_error |= PFERR_RESERVED_MASK; 388 } 389 390 if (at->flags[AC_ACCESS_USER] && !at->flags[AC_PTE_USER]) 391 at->expected_fault = 1; 392 393 if (!pte_valid) 394 goto fault; 395 396 /* Even for "twice" accesses, PKEY might cause pte.a=0. */ 397 if (at->flags[AC_PDE_USER] && at->flags[AC_PTE_USER] && at->flags[AC_ACCESS_TWICE] && 398 at->flags[AC_PKU_PKEY] && at->flags[AC_CPU_CR4_PKE] && 399 at->flags[AC_PKU_AD]) { 400 pte_valid = false; 401 } 402 403 if (at->flags[AC_ACCESS_WRITE] 404 && !at->flags[AC_PTE_WRITABLE] 405 && (at->flags[AC_CPU_CR0_WP] || at->flags[AC_ACCESS_USER])) 406 at->expected_fault = 1; 407 408 if (at->flags[AC_ACCESS_FETCH] 409 && (at->flags[AC_PTE_NX] 410 || (at->flags[AC_CPU_CR4_SMEP] 411 && at->flags[AC_PDE_USER] 412 && at->flags[AC_PTE_USER]))) 413 at->expected_fault = 1; 414 415 if (at->flags[AC_PDE_USER] && at->flags[AC_PTE_USER] && !at->flags[AC_ACCESS_FETCH] && 416 at->flags[AC_PKU_PKEY] && at->flags[AC_CPU_CR4_PKE] && 417 !at->expected_fault) { 418 if (at->flags[AC_PKU_AD]) { 419 at->expected_fault = 1; 420 at->expected_error |= PFERR_PK_MASK; 421 } else if (at->flags[AC_ACCESS_WRITE] && at->flags[AC_PKU_WD] && 422 (at->flags[AC_CPU_CR0_WP] || at->flags[AC_ACCESS_USER])) { 423 at->expected_fault = 1; 424 at->expected_error |= PFERR_PK_MASK; 425 } 426 } 427 428 if (at->expected_fault) 429 goto fault; 430 431 at->expected_pte |= PT_ACCESSED_MASK; 432 if (at->flags[AC_ACCESS_WRITE]) 433 at->expected_pte |= PT_DIRTY_MASK; 434 435 no_pte: 436 fault: 437 if (at->flags[AC_ACCESS_TWICE]) { 438 if (pde_valid) { 439 at->expected_pde |= PT_ACCESSED_MASK; 440 if (pte_valid) 441 at->expected_pte |= PT_ACCESSED_MASK; 442 } 443 } 444 if (!at->expected_fault) 445 at->ignore_pde = 0; 446 if (!at->flags[AC_CPU_EFER_NX] && !at->flags[AC_CPU_CR4_SMEP]) 447 at->expected_error &= ~PFERR_FETCH_MASK; 448 } 449 450 void __ac_setup_specific_pages(ac_test_t *at, ac_pool_t *pool, u64 pd_page, 451 u64 pt_page) 452 453 { 454 unsigned long root = read_cr3(); 455 456 if (!ac_test_enough_room(pool)) 457 ac_test_reset_pt_pool(pool); 458 459 at->ptep = 0; 460 for (int i = 4; i >= 1 && (i >= 2 || !at->flags[AC_PDE_PSE]); --i) { 461 pt_element_t *vroot = va(root & PT_BASE_ADDR_MASK); 462 unsigned index = PT_INDEX((unsigned long)at->virt, i); 463 pt_element_t pte = 0; 464 switch (i) { 465 case 4: 466 case 3: 467 pte = pd_page ? pd_page : ac_test_alloc_pt(pool); 468 pte |= PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK; 469 break; 470 case 2: 471 if (!at->flags[AC_PDE_PSE]) { 472 pte = pt_page ? pt_page : ac_test_alloc_pt(pool); 473 /* The protection key is ignored on non-leaf entries. */ 474 if (at->flags[AC_PKU_PKEY]) 475 pte |= 2ull << 59; 476 } else { 477 pte = at->phys & PT_PSE_BASE_ADDR_MASK; 478 pte |= PT_PSE_MASK; 479 if (at->flags[AC_PKU_PKEY]) 480 pte |= 1ull << 59; 481 } 482 if (at->flags[AC_PDE_PRESENT]) 483 pte |= PT_PRESENT_MASK; 484 if (at->flags[AC_PDE_WRITABLE]) 485 pte |= PT_WRITABLE_MASK; 486 if (at->flags[AC_PDE_USER]) 487 pte |= PT_USER_MASK; 488 if (at->flags[AC_PDE_ACCESSED]) 489 pte |= PT_ACCESSED_MASK; 490 if (at->flags[AC_PDE_DIRTY]) 491 pte |= PT_DIRTY_MASK; 492 if (at->flags[AC_PDE_NX]) 493 pte |= PT_NX_MASK; 494 if (at->flags[AC_PDE_BIT51]) 495 pte |= 1ull << 51; 496 if (at->flags[AC_PDE_BIT13]) 497 pte |= 1ull << 13; 498 at->pdep = &vroot[index]; 499 break; 500 case 1: 501 pte = at->phys & PT_BASE_ADDR_MASK; 502 if (at->flags[AC_PKU_PKEY]) 503 pte |= 1ull << 59; 504 if (at->flags[AC_PTE_PRESENT]) 505 pte |= PT_PRESENT_MASK; 506 if (at->flags[AC_PTE_WRITABLE]) 507 pte |= PT_WRITABLE_MASK; 508 if (at->flags[AC_PTE_USER]) 509 pte |= PT_USER_MASK; 510 if (at->flags[AC_PTE_ACCESSED]) 511 pte |= PT_ACCESSED_MASK; 512 if (at->flags[AC_PTE_DIRTY]) 513 pte |= PT_DIRTY_MASK; 514 if (at->flags[AC_PTE_NX]) 515 pte |= PT_NX_MASK; 516 if (at->flags[AC_PTE_BIT51]) 517 pte |= 1ull << 51; 518 at->ptep = &vroot[index]; 519 break; 520 } 521 vroot[index] = pte; 522 root = vroot[index]; 523 } 524 ac_set_expected_status(at); 525 } 526 527 static void ac_test_setup_pte(ac_test_t *at, ac_pool_t *pool) 528 { 529 __ac_setup_specific_pages(at, pool, 0, 0); 530 } 531 532 static void ac_setup_specific_pages(ac_test_t *at, ac_pool_t *pool, 533 u64 pd_page, u64 pt_page) 534 { 535 return __ac_setup_specific_pages(at, pool, pd_page, pt_page); 536 } 537 538 static void dump_mapping(ac_test_t *at) 539 { 540 unsigned long root = read_cr3(); 541 int i; 542 543 printf("Dump mapping: address: %p\n", at->virt); 544 for (i = 4; i >= 1 && (i >= 2 || !at->flags[AC_PDE_PSE]); --i) { 545 pt_element_t *vroot = va(root & PT_BASE_ADDR_MASK); 546 unsigned index = PT_INDEX((unsigned long)at->virt, i); 547 pt_element_t pte = vroot[index]; 548 549 printf("------L%d: %lx\n", i, pte); 550 root = vroot[index]; 551 } 552 } 553 554 static void ac_test_check(ac_test_t *at, _Bool *success_ret, _Bool cond, 555 const char *fmt, ...) 556 { 557 va_list ap; 558 char buf[500]; 559 560 if (!*success_ret) { 561 return; 562 } 563 564 if (!cond) { 565 return; 566 } 567 568 *success_ret = false; 569 570 if (!verbose) { 571 ac_test_show(at); 572 } 573 574 va_start(ap, fmt); 575 vsnprintf(buf, sizeof(buf), fmt, ap); 576 va_end(ap); 577 printf("FAIL: %s\n", buf); 578 dump_mapping(at); 579 } 580 581 static int pt_match(pt_element_t pte1, pt_element_t pte2, pt_element_t ignore) 582 { 583 pte1 &= ~ignore; 584 pte2 &= ~ignore; 585 return pte1 == pte2; 586 } 587 588 int ac_test_do_access(ac_test_t *at) 589 { 590 static unsigned unique = 42; 591 int fault = 0; 592 unsigned e; 593 static unsigned char user_stack[4096]; 594 unsigned long rsp; 595 _Bool success = true; 596 597 ++unique; 598 599 *((unsigned char *)at->phys) = 0xc3; /* ret */ 600 601 unsigned r = unique; 602 set_cr0_wp(at->flags[AC_CPU_CR0_WP]); 603 set_efer_nx(at->flags[AC_CPU_EFER_NX]); 604 if (at->flags[AC_CPU_CR4_PKE] && !(cpuid_7_ecx & (1 << 3))) { 605 unsigned long cr4 = read_cr4(); 606 if (write_cr4_checking(cr4 | X86_CR4_PKE) == GP_VECTOR) 607 goto done; 608 printf("Set PKE in CR4 - expect #GP: FAIL!\n"); 609 return 0; 610 } 611 if (at->flags[AC_CPU_CR4_SMEP] && !(cpuid_7_ebx & (1 << 7))) { 612 unsigned long cr4 = read_cr4(); 613 if (write_cr4_checking(cr4 | CR4_SMEP_MASK) == GP_VECTOR) 614 goto done; 615 printf("Set SMEP in CR4 - expect #GP: FAIL!\n"); 616 return 0; 617 } 618 619 set_cr4_pke(at->flags[AC_CPU_CR4_PKE]); 620 if (at->flags[AC_CPU_CR4_PKE]) { 621 /* WD2=AD2=1, WD1=at->flags[AC_PKU_WD], AD1=at->flags[AC_PKU_AD] */ 622 write_pkru(0x30 | (at->flags[AC_PKU_WD] ? 8 : 0) | 623 (at->flags[AC_PKU_AD] ? 4 : 0)); 624 } 625 626 set_cr4_smep(at->flags[AC_CPU_CR4_SMEP]); 627 628 if (at->flags[AC_ACCESS_TWICE]) { 629 asm volatile ( 630 "mov $fixed2, %%rsi \n\t" 631 "mov (%[addr]), %[reg] \n\t" 632 "fixed2:" 633 : [reg]"=r"(r), [fault]"=a"(fault), "=b"(e) 634 : [addr]"r"(at->virt) 635 : "rsi" 636 ); 637 fault = 0; 638 } 639 640 asm volatile ("mov $fixed1, %%rsi \n\t" 641 "mov %%rsp, %%rdx \n\t" 642 "cmp $0, %[user] \n\t" 643 "jz do_access \n\t" 644 "push %%rax; mov %[user_ds], %%ax; mov %%ax, %%ds; pop %%rax \n\t" 645 "pushq %[user_ds] \n\t" 646 "pushq %[user_stack_top] \n\t" 647 "pushfq \n\t" 648 "pushq %[user_cs] \n\t" 649 "pushq $do_access \n\t" 650 "iretq \n" 651 "do_access: \n\t" 652 "cmp $0, %[fetch] \n\t" 653 "jnz 2f \n\t" 654 "cmp $0, %[write] \n\t" 655 "jnz 1f \n\t" 656 "mov (%[addr]), %[reg] \n\t" 657 "jmp done \n\t" 658 "1: mov %[reg], (%[addr]) \n\t" 659 "jmp done \n\t" 660 "2: call *%[addr] \n\t" 661 "done: \n" 662 "fixed1: \n" 663 "int %[kernel_entry_vector] \n\t" 664 "back_to_kernel:" 665 : [reg]"+r"(r), "+a"(fault), "=b"(e), "=&d"(rsp) 666 : [addr]"r"(at->virt), 667 [write]"r"(at->flags[AC_ACCESS_WRITE]), 668 [user]"r"(at->flags[AC_ACCESS_USER]), 669 [fetch]"r"(at->flags[AC_ACCESS_FETCH]), 670 [user_ds]"i"(USER_DS), 671 [user_cs]"i"(USER_CS), 672 [user_stack_top]"r"(user_stack + sizeof user_stack), 673 [kernel_entry_vector]"i"(0x20) 674 : "rsi"); 675 676 asm volatile (".section .text.pf \n\t" 677 "page_fault: \n\t" 678 "pop %rbx \n\t" 679 "mov %rsi, (%rsp) \n\t" 680 "movl $1, %eax \n\t" 681 "iretq \n\t" 682 ".section .text"); 683 684 asm volatile (".section .text.entry \n\t" 685 "kernel_entry: \n\t" 686 "mov %rdx, %rsp \n\t" 687 "jmp back_to_kernel \n\t" 688 ".section .text"); 689 690 ac_test_check(at, &success, fault && !at->expected_fault, 691 "unexpected fault"); 692 ac_test_check(at, &success, !fault && at->expected_fault, 693 "unexpected access"); 694 ac_test_check(at, &success, fault && e != at->expected_error, 695 "error code %x expected %x", e, at->expected_error); 696 ac_test_check(at, &success, at->ptep && *at->ptep != at->expected_pte, 697 "pte %x expected %x", *at->ptep, at->expected_pte); 698 ac_test_check(at, &success, 699 !pt_match(*at->pdep, at->expected_pde, at->ignore_pde), 700 "pde %x expected %x", *at->pdep, at->expected_pde); 701 702 done: 703 if (success && verbose) { 704 printf("PASS\n"); 705 } 706 return success; 707 } 708 709 static void ac_test_show(ac_test_t *at) 710 { 711 char line[5000]; 712 713 *line = 0; 714 strcat(line, "test"); 715 for (int i = 0; i < NR_AC_FLAGS; ++i) 716 if (at->flags[i]) { 717 strcat(line, " "); 718 strcat(line, ac_names[i]); 719 } 720 strcat(line, ": "); 721 printf("%s", line); 722 } 723 724 /* 725 * This test case is used to triger the bug which is fixed by 726 * commit e09e90a5 in the kvm tree 727 */ 728 static int corrupt_hugepage_triger(ac_pool_t *pool) 729 { 730 ac_test_t at1, at2; 731 732 ac_test_init(&at1, (void *)(0x123400000000)); 733 ac_test_init(&at2, (void *)(0x666600000000)); 734 735 at2.flags[AC_CPU_CR0_WP] = 1; 736 at2.flags[AC_PDE_PSE] = 1; 737 at2.flags[AC_PDE_PRESENT] = 1; 738 ac_test_setup_pte(&at2, pool); 739 if (!ac_test_do_access(&at2)) 740 goto err; 741 742 at1.flags[AC_CPU_CR0_WP] = 1; 743 at1.flags[AC_PDE_PSE] = 1; 744 at1.flags[AC_PDE_WRITABLE] = 1; 745 at1.flags[AC_PDE_PRESENT] = 1; 746 ac_test_setup_pte(&at1, pool); 747 if (!ac_test_do_access(&at1)) 748 goto err; 749 750 at1.flags[AC_ACCESS_WRITE] = 1; 751 ac_set_expected_status(&at1); 752 if (!ac_test_do_access(&at1)) 753 goto err; 754 755 at2.flags[AC_ACCESS_WRITE] = 1; 756 ac_set_expected_status(&at2); 757 if (!ac_test_do_access(&at2)) 758 goto err; 759 760 return 1; 761 762 err: 763 printf("corrupt_hugepage_triger test fail\n"); 764 return 0; 765 } 766 767 /* 768 * This test case is used to triger the bug which is fixed by 769 * commit 3ddf6c06e13e in the kvm tree 770 */ 771 static int check_pfec_on_prefetch_pte(ac_pool_t *pool) 772 { 773 ac_test_t at1, at2; 774 775 ac_test_init(&at1, (void *)(0x123406001000)); 776 ac_test_init(&at2, (void *)(0x123406003000)); 777 778 at1.flags[AC_PDE_PRESENT] = 1; 779 at1.flags[AC_PTE_PRESENT] = 1; 780 ac_setup_specific_pages(&at1, pool, 30 * 1024 * 1024, 30 * 1024 * 1024); 781 782 at2.flags[AC_PDE_PRESENT] = 1; 783 at2.flags[AC_PTE_NX] = 1; 784 at2.flags[AC_PTE_PRESENT] = 1; 785 ac_setup_specific_pages(&at2, pool, 30 * 1024 * 1024, 30 * 1024 * 1024); 786 787 if (!ac_test_do_access(&at1)) { 788 printf("%s: prepare fail\n", __FUNCTION__); 789 goto err; 790 } 791 792 if (!ac_test_do_access(&at2)) { 793 printf("%s: check PFEC on prefetch pte path fail\n", 794 __FUNCTION__); 795 goto err; 796 } 797 798 return 1; 799 800 err: 801 return 0; 802 } 803 804 /* 805 * If the write-fault access is from supervisor and CR0.WP is not set on the 806 * vcpu, kvm will fix it by adjusting pte access - it sets the W bit on pte 807 * and clears U bit. This is the chance that kvm can change pte access from 808 * readonly to writable. 809 * 810 * Unfortunately, the pte access is the access of 'direct' shadow page table, 811 * means direct sp.role.access = pte_access, then we will create a writable 812 * spte entry on the readonly shadow page table. It will cause Dirty bit is 813 * not tracked when two guest ptes point to the same large page. Note, it 814 * does not have other impact except Dirty bit since cr0.wp is encoded into 815 * sp.role. 816 * 817 * Note: to trigger this bug, hugepage should be disabled on host. 818 */ 819 static int check_large_pte_dirty_for_nowp(ac_pool_t *pool) 820 { 821 ac_test_t at1, at2; 822 823 ac_test_init(&at1, (void *)(0x123403000000)); 824 ac_test_init(&at2, (void *)(0x666606000000)); 825 826 at2.flags[AC_PDE_PRESENT] = 1; 827 at2.flags[AC_PDE_PSE] = 1; 828 829 ac_test_setup_pte(&at2, pool); 830 if (!ac_test_do_access(&at2)) { 831 printf("%s: read on the first mapping fail.\n", __FUNCTION__); 832 goto err; 833 } 834 835 at1.flags[AC_PDE_PRESENT] = 1; 836 at1.flags[AC_PDE_PSE] = 1; 837 at1.flags[AC_ACCESS_WRITE] = 1; 838 839 ac_test_setup_pte(&at1, pool); 840 if (!ac_test_do_access(&at1)) { 841 printf("%s: write on the second mapping fail.\n", __FUNCTION__); 842 goto err; 843 } 844 845 at2.flags[AC_ACCESS_WRITE] = 1; 846 ac_set_expected_status(&at2); 847 if (!ac_test_do_access(&at2)) { 848 printf("%s: write on the first mapping fail.\n", __FUNCTION__); 849 goto err; 850 } 851 852 return 1; 853 854 err: 855 return 0; 856 } 857 858 static int check_smep_andnot_wp(ac_pool_t *pool) 859 { 860 ac_test_t at1; 861 int err_prepare_andnot_wp, err_smep_andnot_wp; 862 extern u64 ptl2[]; 863 864 ac_test_init(&at1, (void *)(0x123406001000)); 865 866 at1.flags[AC_PDE_PRESENT] = 1; 867 at1.flags[AC_PTE_PRESENT] = 1; 868 at1.flags[AC_PDE_USER] = 1; 869 at1.flags[AC_PTE_USER] = 1; 870 at1.flags[AC_PDE_ACCESSED] = 1; 871 at1.flags[AC_PTE_ACCESSED] = 1; 872 at1.flags[AC_CPU_CR4_SMEP] = 1; 873 at1.flags[AC_CPU_CR0_WP] = 0; 874 at1.flags[AC_ACCESS_WRITE] = 1; 875 ac_test_setup_pte(&at1, pool); 876 ptl2[2] -= 0x4; 877 878 /* 879 * Here we write the ro user page when 880 * cr0.wp=0, then we execute it and SMEP 881 * fault should happen. 882 */ 883 err_prepare_andnot_wp = ac_test_do_access(&at1); 884 if (!err_prepare_andnot_wp) { 885 printf("%s: SMEP prepare fail\n", __FUNCTION__); 886 goto clean_up; 887 } 888 889 at1.flags[AC_ACCESS_WRITE] = 0; 890 at1.flags[AC_ACCESS_FETCH] = 1; 891 ac_set_expected_status(&at1); 892 err_smep_andnot_wp = ac_test_do_access(&at1); 893 894 clean_up: 895 set_cr4_smep(0); 896 ptl2[2] += 0x4; 897 898 if (!err_prepare_andnot_wp) 899 goto err; 900 if (!err_smep_andnot_wp) { 901 printf("%s: check SMEP without wp fail\n", __FUNCTION__); 902 goto err; 903 } 904 return 1; 905 906 err: 907 return 0; 908 } 909 910 int ac_test_exec(ac_test_t *at, ac_pool_t *pool) 911 { 912 int r; 913 914 if (verbose) { 915 ac_test_show(at); 916 } 917 ac_test_setup_pte(at, pool); 918 r = ac_test_do_access(at); 919 return r; 920 } 921 922 typedef int (*ac_test_fn)(ac_pool_t *pool); 923 const ac_test_fn ac_test_cases[] = 924 { 925 corrupt_hugepage_triger, 926 check_pfec_on_prefetch_pte, 927 check_large_pte_dirty_for_nowp, 928 check_smep_andnot_wp 929 }; 930 931 int ac_test_run(void) 932 { 933 ac_test_t at; 934 ac_pool_t pool; 935 int i, tests, successes; 936 extern u64 ptl2[]; 937 938 printf("run\n"); 939 tests = successes = 0; 940 ac_env_int(&pool); 941 ac_test_init(&at, (void *)(0x123400000000 + 16 * smp_id())); 942 do { 943 if (at.flags[AC_CPU_CR4_SMEP] && (ptl2[2] & 0x4)) 944 ptl2[2] -= 0x4; 945 if (!at.flags[AC_CPU_CR4_SMEP] && !(ptl2[2] & 0x4)) { 946 set_cr4_smep(0); 947 ptl2[2] += 0x4; 948 } 949 950 ++tests; 951 successes += ac_test_exec(&at, &pool); 952 } while (ac_test_bump(&at)); 953 954 set_cr4_smep(0); 955 ptl2[2] += 0x4; 956 957 for (i = 0; i < ARRAY_SIZE(ac_test_cases); i++) { 958 ++tests; 959 successes += ac_test_cases[i](&pool); 960 } 961 962 printf("\n%d tests, %d failures\n", tests, tests - successes); 963 964 return successes == tests; 965 } 966 967 int main() 968 { 969 int r; 970 971 cpuid_7_ebx = cpuid(7).b; 972 cpuid_7_ecx = cpuid(7).c; 973 974 if (cpuid_7_ecx & (1 << 3)) { 975 set_cr4_pke(1); 976 set_cr4_pke(0); 977 /* Now PKRU = 0xFFFFFFFF. */ 978 } 979 980 printf("starting test\n\n"); 981 r = ac_test_run(); 982 return r ? 0 : 1; 983 } 984