1 2 #include "libcflat.h" 3 #include "desc.h" 4 #include "processor.h" 5 #include "asm/page.h" 6 7 #define smp_id() 0 8 9 #define true 1 10 #define false 0 11 12 static _Bool verbose = false; 13 14 typedef unsigned long pt_element_t; 15 static int cpuid_7_ebx; 16 static int cpuid_7_ecx; 17 static int invalid_mask; 18 19 #define PT_BASE_ADDR_MASK ((pt_element_t)((((pt_element_t)1 << 40) - 1) & PAGE_MASK)) 20 #define PT_PSE_BASE_ADDR_MASK (PT_BASE_ADDR_MASK & ~(1ull << 21)) 21 22 #define CR0_WP_MASK (1UL << 16) 23 #define CR4_SMEP_MASK (1UL << 20) 24 25 #define PFERR_PRESENT_MASK (1U << 0) 26 #define PFERR_WRITE_MASK (1U << 1) 27 #define PFERR_USER_MASK (1U << 2) 28 #define PFERR_RESERVED_MASK (1U << 3) 29 #define PFERR_FETCH_MASK (1U << 4) 30 #define PFERR_PK_MASK (1U << 5) 31 32 #define MSR_EFER 0xc0000080 33 #define EFER_NX_MASK (1ull << 11) 34 35 #define PT_INDEX(address, level) \ 36 ((address) >> (12 + ((level)-1) * 9)) & 511 37 38 /* 39 * page table access check tests 40 */ 41 42 enum { 43 AC_PTE_PRESENT_BIT, 44 AC_PTE_WRITABLE_BIT, 45 AC_PTE_USER_BIT, 46 AC_PTE_ACCESSED_BIT, 47 AC_PTE_DIRTY_BIT, 48 AC_PTE_NX_BIT, 49 AC_PTE_BIT51_BIT, 50 51 AC_PDE_PRESENT_BIT, 52 AC_PDE_WRITABLE_BIT, 53 AC_PDE_USER_BIT, 54 AC_PDE_ACCESSED_BIT, 55 AC_PDE_DIRTY_BIT, 56 AC_PDE_PSE_BIT, 57 AC_PDE_NX_BIT, 58 AC_PDE_BIT51_BIT, 59 AC_PDE_BIT13_BIT, 60 61 AC_PKU_AD_BIT, 62 AC_PKU_WD_BIT, 63 AC_PKU_PKEY_BIT, 64 65 AC_ACCESS_USER_BIT, 66 AC_ACCESS_WRITE_BIT, 67 AC_ACCESS_FETCH_BIT, 68 AC_ACCESS_TWICE_BIT, 69 70 AC_CPU_EFER_NX_BIT, 71 AC_CPU_CR0_WP_BIT, 72 AC_CPU_CR4_SMEP_BIT, 73 AC_CPU_CR4_PKE_BIT, 74 75 NR_AC_FLAGS 76 }; 77 78 #define AC_PTE_PRESENT_MASK (1 << AC_PTE_PRESENT_BIT) 79 #define AC_PTE_WRITABLE_MASK (1 << AC_PTE_WRITABLE_BIT) 80 #define AC_PTE_USER_MASK (1 << AC_PTE_USER_BIT) 81 #define AC_PTE_ACCESSED_MASK (1 << AC_PTE_ACCESSED_BIT) 82 #define AC_PTE_DIRTY_MASK (1 << AC_PTE_DIRTY_BIT) 83 #define AC_PTE_NX_MASK (1 << AC_PTE_NX_BIT) 84 #define AC_PTE_BIT51_MASK (1 << AC_PTE_BIT51_BIT) 85 86 #define AC_PDE_PRESENT_MASK (1 << AC_PDE_PRESENT_BIT) 87 #define AC_PDE_WRITABLE_MASK (1 << AC_PDE_WRITABLE_BIT) 88 #define AC_PDE_USER_MASK (1 << AC_PDE_USER_BIT) 89 #define AC_PDE_ACCESSED_MASK (1 << AC_PDE_ACCESSED_BIT) 90 #define AC_PDE_DIRTY_MASK (1 << AC_PDE_DIRTY_BIT) 91 #define AC_PDE_PSE_MASK (1 << AC_PDE_PSE_BIT) 92 #define AC_PDE_NX_MASK (1 << AC_PDE_NX_BIT) 93 #define AC_PDE_BIT51_MASK (1 << AC_PDE_BIT51_BIT) 94 #define AC_PDE_BIT13_MASK (1 << AC_PDE_BIT13_BIT) 95 96 #define AC_PKU_AD_MASK (1 << AC_PKU_AD_BIT) 97 #define AC_PKU_WD_MASK (1 << AC_PKU_WD_BIT) 98 #define AC_PKU_PKEY_MASK (1 << AC_PKU_PKEY_BIT) 99 100 #define AC_ACCESS_USER_MASK (1 << AC_ACCESS_USER_BIT) 101 #define AC_ACCESS_WRITE_MASK (1 << AC_ACCESS_WRITE_BIT) 102 #define AC_ACCESS_FETCH_MASK (1 << AC_ACCESS_FETCH_BIT) 103 #define AC_ACCESS_TWICE_MASK (1 << AC_ACCESS_TWICE_BIT) 104 105 #define AC_CPU_EFER_NX_MASK (1 << AC_CPU_EFER_NX_BIT) 106 #define AC_CPU_CR0_WP_MASK (1 << AC_CPU_CR0_WP_BIT) 107 #define AC_CPU_CR4_SMEP_MASK (1 << AC_CPU_CR4_SMEP_BIT) 108 #define AC_CPU_CR4_PKE_MASK (1 << AC_CPU_CR4_PKE_BIT) 109 110 const char *ac_names[] = { 111 [AC_PTE_PRESENT_BIT] = "pte.p", 112 [AC_PTE_ACCESSED_BIT] = "pte.a", 113 [AC_PTE_WRITABLE_BIT] = "pte.rw", 114 [AC_PTE_USER_BIT] = "pte.user", 115 [AC_PTE_DIRTY_BIT] = "pte.d", 116 [AC_PTE_NX_BIT] = "pte.nx", 117 [AC_PTE_BIT51_BIT] = "pte.51", 118 [AC_PDE_PRESENT_BIT] = "pde.p", 119 [AC_PDE_ACCESSED_BIT] = "pde.a", 120 [AC_PDE_WRITABLE_BIT] = "pde.rw", 121 [AC_PDE_USER_BIT] = "pde.user", 122 [AC_PDE_DIRTY_BIT] = "pde.d", 123 [AC_PDE_PSE_BIT] = "pde.pse", 124 [AC_PDE_NX_BIT] = "pde.nx", 125 [AC_PDE_BIT51_BIT] = "pde.51", 126 [AC_PDE_BIT13_BIT] = "pde.13", 127 [AC_PKU_AD_BIT] = "pkru.ad", 128 [AC_PKU_WD_BIT] = "pkru.wd", 129 [AC_PKU_PKEY_BIT] = "pkey=1", 130 [AC_ACCESS_WRITE_BIT] = "write", 131 [AC_ACCESS_USER_BIT] = "user", 132 [AC_ACCESS_FETCH_BIT] = "fetch", 133 [AC_ACCESS_TWICE_BIT] = "twice", 134 [AC_CPU_EFER_NX_BIT] = "efer.nx", 135 [AC_CPU_CR0_WP_BIT] = "cr0.wp", 136 [AC_CPU_CR4_SMEP_BIT] = "cr4.smep", 137 [AC_CPU_CR4_PKE_BIT] = "cr4.pke", 138 }; 139 140 static inline void *va(pt_element_t phys) 141 { 142 return (void *)phys; 143 } 144 145 typedef struct { 146 pt_element_t pt_pool; 147 unsigned pt_pool_size; 148 unsigned pt_pool_current; 149 } ac_pool_t; 150 151 typedef struct { 152 unsigned flags; 153 void *virt; 154 pt_element_t phys; 155 pt_element_t *ptep; 156 pt_element_t expected_pte; 157 pt_element_t *pdep; 158 pt_element_t expected_pde; 159 pt_element_t ignore_pde; 160 int expected_fault; 161 unsigned expected_error; 162 } ac_test_t; 163 164 typedef struct { 165 unsigned short limit; 166 unsigned long linear_addr; 167 } __attribute__((packed)) descriptor_table_t; 168 169 170 static void ac_test_show(ac_test_t *at); 171 172 int write_cr4_checking(unsigned long val) 173 { 174 asm volatile(ASM_TRY("1f") 175 "mov %0,%%cr4\n\t" 176 "1:": : "r" (val)); 177 return exception_vector(); 178 } 179 180 void set_cr0_wp(int wp) 181 { 182 unsigned long cr0 = read_cr0(); 183 unsigned long old_cr0 = cr0; 184 185 cr0 &= ~CR0_WP_MASK; 186 if (wp) 187 cr0 |= CR0_WP_MASK; 188 if (old_cr0 != cr0) 189 write_cr0(cr0); 190 } 191 192 void set_cr4_smep(int smep) 193 { 194 unsigned long cr4 = read_cr4(); 195 unsigned long old_cr4 = cr4; 196 extern u64 ptl2[]; 197 198 cr4 &= ~CR4_SMEP_MASK; 199 if (smep) 200 cr4 |= CR4_SMEP_MASK; 201 if (old_cr4 == cr4) 202 return; 203 204 if (smep) 205 ptl2[2] &= ~PT_USER_MASK; 206 write_cr4(cr4); 207 if (!smep) 208 ptl2[2] |= PT_USER_MASK; 209 } 210 211 void set_cr4_pke(int pke) 212 { 213 unsigned long cr4 = read_cr4(); 214 unsigned long old_cr4 = cr4; 215 216 cr4 &= ~X86_CR4_PKE; 217 if (pke) 218 cr4 |= X86_CR4_PKE; 219 if (old_cr4 == cr4) 220 return; 221 222 /* Check that protection keys do not affect accesses when CR4.PKE=0. */ 223 if ((read_cr4() & X86_CR4_PKE) && !pke) { 224 write_pkru(0xfffffffc); 225 } 226 write_cr4(cr4); 227 } 228 229 void set_efer_nx(int nx) 230 { 231 unsigned long long efer = rdmsr(MSR_EFER); 232 unsigned long long old_efer = efer; 233 234 efer &= ~EFER_NX_MASK; 235 if (nx) 236 efer |= EFER_NX_MASK; 237 if (old_efer != efer) 238 wrmsr(MSR_EFER, efer); 239 } 240 241 static void ac_env_int(ac_pool_t *pool) 242 { 243 extern char page_fault, kernel_entry; 244 set_idt_entry(14, &page_fault, 0); 245 set_idt_entry(0x20, &kernel_entry, 3); 246 247 pool->pt_pool = 33 * 1024 * 1024; 248 pool->pt_pool_size = 120 * 1024 * 1024 - pool->pt_pool; 249 pool->pt_pool_current = 0; 250 } 251 252 void ac_test_init(ac_test_t *at, void *virt) 253 { 254 wrmsr(MSR_EFER, rdmsr(MSR_EFER) | EFER_NX_MASK); 255 set_cr0_wp(1); 256 at->flags = 0; 257 at->virt = virt; 258 at->phys = 32 * 1024 * 1024; 259 } 260 261 int ac_test_bump_one(ac_test_t *at) 262 { 263 at->flags = ((at->flags | invalid_mask) + 1) & ~invalid_mask; 264 return at->flags < (1 << NR_AC_FLAGS); 265 } 266 267 #define F(x) ((flags & x##_MASK) != 0) 268 269 _Bool ac_test_legal(ac_test_t *at) 270 { 271 int flags = at->flags; 272 273 if (F(AC_ACCESS_FETCH) && F(AC_ACCESS_WRITE)) 274 return false; 275 276 /* 277 * Since we convert current page to kernel page when cr4.smep=1, 278 * we can't switch to user mode. 279 */ 280 if (F(AC_ACCESS_USER) && F(AC_CPU_CR4_SMEP)) 281 return false; 282 283 /* 284 * Only test protection key faults if CR4.PKE=1. 285 */ 286 if (!F(AC_CPU_CR4_PKE) && 287 (F(AC_PKU_AD) || F(AC_PKU_WD))) { 288 return false; 289 } 290 291 /* 292 * pde.bit13 checks handling of reserved bits in largepage PDEs. It is 293 * meaningless if there is a PTE. 294 */ 295 if (!F(AC_PDE_PSE) && F(AC_PDE_BIT13)) 296 return false; 297 298 return true; 299 } 300 301 int ac_test_bump(ac_test_t *at) 302 { 303 int ret; 304 305 ret = ac_test_bump_one(at); 306 while (ret && !ac_test_legal(at)) 307 ret = ac_test_bump_one(at); 308 return ret; 309 } 310 311 pt_element_t ac_test_alloc_pt(ac_pool_t *pool) 312 { 313 pt_element_t ret = pool->pt_pool + pool->pt_pool_current; 314 pool->pt_pool_current += PAGE_SIZE; 315 return ret; 316 } 317 318 _Bool ac_test_enough_room(ac_pool_t *pool) 319 { 320 return pool->pt_pool_current + 4 * PAGE_SIZE <= pool->pt_pool_size; 321 } 322 323 void ac_test_reset_pt_pool(ac_pool_t *pool) 324 { 325 pool->pt_pool_current = 0; 326 } 327 328 pt_element_t ac_test_permissions(ac_test_t *at, unsigned flags, bool writable, 329 bool user, bool executable) 330 { 331 bool kwritable = !F(AC_CPU_CR0_WP) && !F(AC_ACCESS_USER); 332 pt_element_t expected = 0; 333 334 if (F(AC_ACCESS_USER) && !user) 335 at->expected_fault = 1; 336 337 if (F(AC_ACCESS_WRITE) && !writable && !kwritable) 338 at->expected_fault = 1; 339 340 if (F(AC_ACCESS_FETCH) && !executable) 341 at->expected_fault = 1; 342 343 if (F(AC_ACCESS_FETCH) && user && F(AC_CPU_CR4_SMEP)) 344 at->expected_fault = 1; 345 346 if (user && !F(AC_ACCESS_FETCH) && F(AC_PKU_PKEY) && F(AC_CPU_CR4_PKE)) { 347 if (F(AC_PKU_AD)) { 348 at->expected_fault = 1; 349 at->expected_error |= PFERR_PK_MASK; 350 } else if (F(AC_ACCESS_WRITE) && F(AC_PKU_WD) && !kwritable) { 351 at->expected_fault = 1; 352 at->expected_error |= PFERR_PK_MASK; 353 } 354 } 355 356 if (!at->expected_fault) { 357 expected |= PT_ACCESSED_MASK; 358 if (F(AC_ACCESS_WRITE)) 359 expected |= PT_DIRTY_MASK; 360 } 361 362 return expected; 363 } 364 365 void ac_emulate_access(ac_test_t *at, unsigned flags) 366 { 367 bool pde_valid, pte_valid; 368 bool user, writable, executable; 369 370 if (F(AC_ACCESS_USER)) 371 at->expected_error |= PFERR_USER_MASK; 372 373 if (F(AC_ACCESS_WRITE)) 374 at->expected_error |= PFERR_WRITE_MASK; 375 376 if (F(AC_ACCESS_FETCH)) 377 at->expected_error |= PFERR_FETCH_MASK; 378 379 if (!F(AC_PDE_ACCESSED)) 380 at->ignore_pde = PT_ACCESSED_MASK; 381 382 pde_valid = F(AC_PDE_PRESENT) 383 && !F(AC_PDE_BIT51) && !F(AC_PDE_BIT13) 384 && !(F(AC_PDE_NX) && !F(AC_CPU_EFER_NX)); 385 386 if (!pde_valid) { 387 at->expected_fault = 1; 388 if (F(AC_PDE_PRESENT)) { 389 at->expected_error |= PFERR_RESERVED_MASK; 390 } else { 391 at->expected_error &= ~PFERR_PRESENT_MASK; 392 } 393 goto fault; 394 } 395 396 writable = F(AC_PDE_WRITABLE); 397 user = F(AC_PDE_USER); 398 executable = !F(AC_PDE_NX); 399 400 if (F(AC_PDE_PSE)) { 401 at->expected_pde |= ac_test_permissions(at, flags, writable, user, 402 executable); 403 goto no_pte; 404 } 405 406 at->expected_pde |= PT_ACCESSED_MASK; 407 408 pte_valid = F(AC_PTE_PRESENT) 409 && !F(AC_PTE_BIT51) 410 && !(F(AC_PTE_NX) && !F(AC_CPU_EFER_NX)); 411 412 if (!pte_valid) { 413 at->expected_fault = 1; 414 if (F(AC_PTE_PRESENT)) { 415 at->expected_error |= PFERR_RESERVED_MASK; 416 } else { 417 at->expected_error &= ~PFERR_PRESENT_MASK; 418 } 419 goto fault; 420 } 421 422 writable &= F(AC_PTE_WRITABLE); 423 user &= F(AC_PTE_USER); 424 executable &= !F(AC_PTE_NX); 425 426 at->expected_pte |= ac_test_permissions(at, flags, writable, user, 427 executable); 428 429 no_pte: 430 fault: 431 if (!at->expected_fault) 432 at->ignore_pde = 0; 433 if (!F(AC_CPU_EFER_NX) && !F(AC_CPU_CR4_SMEP)) 434 at->expected_error &= ~PFERR_FETCH_MASK; 435 } 436 437 void ac_set_expected_status(ac_test_t *at) 438 { 439 invlpg(at->virt); 440 441 if (at->ptep) 442 at->expected_pte = *at->ptep; 443 at->expected_pde = *at->pdep; 444 at->ignore_pde = 0; 445 at->expected_fault = 0; 446 at->expected_error = PFERR_PRESENT_MASK; 447 448 if (at->flags & AC_ACCESS_TWICE_MASK) { 449 ac_emulate_access(at, at->flags & ~AC_ACCESS_WRITE_MASK 450 & ~AC_ACCESS_FETCH_MASK & ~AC_ACCESS_USER_MASK); 451 at->expected_fault = 0; 452 at->expected_error = PFERR_PRESENT_MASK; 453 at->ignore_pde = 0; 454 } 455 456 ac_emulate_access(at, at->flags); 457 } 458 459 void __ac_setup_specific_pages(ac_test_t *at, ac_pool_t *pool, u64 pd_page, 460 u64 pt_page) 461 462 { 463 unsigned long root = read_cr3(); 464 int flags = at->flags; 465 466 if (!ac_test_enough_room(pool)) 467 ac_test_reset_pt_pool(pool); 468 469 at->ptep = 0; 470 for (int i = 4; i >= 1 && (i >= 2 || !F(AC_PDE_PSE)); --i) { 471 pt_element_t *vroot = va(root & PT_BASE_ADDR_MASK); 472 unsigned index = PT_INDEX((unsigned long)at->virt, i); 473 pt_element_t pte = 0; 474 switch (i) { 475 case 4: 476 case 3: 477 pte = pd_page ? pd_page : ac_test_alloc_pt(pool); 478 pte |= PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK; 479 break; 480 case 2: 481 if (!F(AC_PDE_PSE)) { 482 pte = pt_page ? pt_page : ac_test_alloc_pt(pool); 483 /* The protection key is ignored on non-leaf entries. */ 484 if (F(AC_PKU_PKEY)) 485 pte |= 2ull << 59; 486 } else { 487 pte = at->phys & PT_PSE_BASE_ADDR_MASK; 488 pte |= PT_PAGE_SIZE_MASK; 489 if (F(AC_PKU_PKEY)) 490 pte |= 1ull << 59; 491 } 492 if (F(AC_PDE_PRESENT)) 493 pte |= PT_PRESENT_MASK; 494 if (F(AC_PDE_WRITABLE)) 495 pte |= PT_WRITABLE_MASK; 496 if (F(AC_PDE_USER)) 497 pte |= PT_USER_MASK; 498 if (F(AC_PDE_ACCESSED)) 499 pte |= PT_ACCESSED_MASK; 500 if (F(AC_PDE_DIRTY)) 501 pte |= PT_DIRTY_MASK; 502 if (F(AC_PDE_NX)) 503 pte |= PT64_NX_MASK; 504 if (F(AC_PDE_BIT51)) 505 pte |= 1ull << 51; 506 if (F(AC_PDE_BIT13)) 507 pte |= 1ull << 13; 508 at->pdep = &vroot[index]; 509 break; 510 case 1: 511 pte = at->phys & PT_BASE_ADDR_MASK; 512 if (F(AC_PKU_PKEY)) 513 pte |= 1ull << 59; 514 if (F(AC_PTE_PRESENT)) 515 pte |= PT_PRESENT_MASK; 516 if (F(AC_PTE_WRITABLE)) 517 pte |= PT_WRITABLE_MASK; 518 if (F(AC_PTE_USER)) 519 pte |= PT_USER_MASK; 520 if (F(AC_PTE_ACCESSED)) 521 pte |= PT_ACCESSED_MASK; 522 if (F(AC_PTE_DIRTY)) 523 pte |= PT_DIRTY_MASK; 524 if (F(AC_PTE_NX)) 525 pte |= PT64_NX_MASK; 526 if (F(AC_PTE_BIT51)) 527 pte |= 1ull << 51; 528 at->ptep = &vroot[index]; 529 break; 530 } 531 vroot[index] = pte; 532 root = vroot[index]; 533 } 534 ac_set_expected_status(at); 535 } 536 537 static void ac_test_setup_pte(ac_test_t *at, ac_pool_t *pool) 538 { 539 __ac_setup_specific_pages(at, pool, 0, 0); 540 } 541 542 static void ac_setup_specific_pages(ac_test_t *at, ac_pool_t *pool, 543 u64 pd_page, u64 pt_page) 544 { 545 return __ac_setup_specific_pages(at, pool, pd_page, pt_page); 546 } 547 548 static void dump_mapping(ac_test_t *at) 549 { 550 unsigned long root = read_cr3(); 551 int flags = at->flags; 552 int i; 553 554 printf("Dump mapping: address: %p\n", at->virt); 555 for (i = 4; i >= 1 && (i >= 2 || !F(AC_PDE_PSE)); --i) { 556 pt_element_t *vroot = va(root & PT_BASE_ADDR_MASK); 557 unsigned index = PT_INDEX((unsigned long)at->virt, i); 558 pt_element_t pte = vroot[index]; 559 560 printf("------L%d: %lx\n", i, pte); 561 root = vroot[index]; 562 } 563 } 564 565 static void ac_test_check(ac_test_t *at, _Bool *success_ret, _Bool cond, 566 const char *fmt, ...) 567 { 568 va_list ap; 569 char buf[500]; 570 571 if (!*success_ret) { 572 return; 573 } 574 575 if (!cond) { 576 return; 577 } 578 579 *success_ret = false; 580 581 if (!verbose) { 582 puts("\n"); 583 ac_test_show(at); 584 } 585 586 va_start(ap, fmt); 587 vsnprintf(buf, sizeof(buf), fmt, ap); 588 va_end(ap); 589 printf("FAIL: %s\n", buf); 590 dump_mapping(at); 591 } 592 593 static int pt_match(pt_element_t pte1, pt_element_t pte2, pt_element_t ignore) 594 { 595 pte1 &= ~ignore; 596 pte2 &= ~ignore; 597 return pte1 == pte2; 598 } 599 600 int ac_test_do_access(ac_test_t *at) 601 { 602 static unsigned unique = 42; 603 int fault = 0; 604 unsigned e; 605 static unsigned char user_stack[4096]; 606 unsigned long rsp; 607 _Bool success = true; 608 int flags = at->flags; 609 610 ++unique; 611 if (!(unique & 65535)) { 612 puts("."); 613 } 614 615 *((unsigned char *)at->phys) = 0xc3; /* ret */ 616 617 unsigned r = unique; 618 set_cr0_wp(F(AC_CPU_CR0_WP)); 619 set_efer_nx(F(AC_CPU_EFER_NX)); 620 set_cr4_pke(F(AC_CPU_CR4_PKE)); 621 if (F(AC_CPU_CR4_PKE)) { 622 /* WD2=AD2=1, WD1=F(AC_PKU_WD), AD1=F(AC_PKU_AD) */ 623 write_pkru(0x30 | (F(AC_PKU_WD) ? 8 : 0) | 624 (F(AC_PKU_AD) ? 4 : 0)); 625 } 626 627 set_cr4_smep(F(AC_CPU_CR4_SMEP)); 628 629 if (F(AC_ACCESS_TWICE)) { 630 asm volatile ( 631 "mov $fixed2, %%rsi \n\t" 632 "mov (%[addr]), %[reg] \n\t" 633 "fixed2:" 634 : [reg]"=r"(r), [fault]"=a"(fault), "=b"(e) 635 : [addr]"r"(at->virt) 636 : "rsi" 637 ); 638 fault = 0; 639 } 640 641 asm volatile ("mov $fixed1, %%rsi \n\t" 642 "mov %%rsp, %%rdx \n\t" 643 "cmp $0, %[user] \n\t" 644 "jz do_access \n\t" 645 "push %%rax; mov %[user_ds], %%ax; mov %%ax, %%ds; pop %%rax \n\t" 646 "pushq %[user_ds] \n\t" 647 "pushq %[user_stack_top] \n\t" 648 "pushfq \n\t" 649 "pushq %[user_cs] \n\t" 650 "pushq $do_access \n\t" 651 "iretq \n" 652 "do_access: \n\t" 653 "cmp $0, %[fetch] \n\t" 654 "jnz 2f \n\t" 655 "cmp $0, %[write] \n\t" 656 "jnz 1f \n\t" 657 "mov (%[addr]), %[reg] \n\t" 658 "jmp done \n\t" 659 "1: mov %[reg], (%[addr]) \n\t" 660 "jmp done \n\t" 661 "2: call *%[addr] \n\t" 662 "done: \n" 663 "fixed1: \n" 664 "int %[kernel_entry_vector] \n\t" 665 "back_to_kernel:" 666 : [reg]"+r"(r), "+a"(fault), "=b"(e), "=&d"(rsp) 667 : [addr]"r"(at->virt), 668 [write]"r"(F(AC_ACCESS_WRITE)), 669 [user]"r"(F(AC_ACCESS_USER)), 670 [fetch]"r"(F(AC_ACCESS_FETCH)), 671 [user_ds]"i"(USER_DS), 672 [user_cs]"i"(USER_CS), 673 [user_stack_top]"r"(user_stack + sizeof user_stack), 674 [kernel_entry_vector]"i"(0x20) 675 : "rsi"); 676 677 asm volatile (".section .text.pf \n\t" 678 "page_fault: \n\t" 679 "pop %rbx \n\t" 680 "mov %rsi, (%rsp) \n\t" 681 "movl $1, %eax \n\t" 682 "iretq \n\t" 683 ".section .text"); 684 685 asm volatile (".section .text.entry \n\t" 686 "kernel_entry: \n\t" 687 "mov %rdx, %rsp \n\t" 688 "jmp back_to_kernel \n\t" 689 ".section .text"); 690 691 ac_test_check(at, &success, fault && !at->expected_fault, 692 "unexpected fault"); 693 ac_test_check(at, &success, !fault && at->expected_fault, 694 "unexpected access"); 695 ac_test_check(at, &success, fault && e != at->expected_error, 696 "error code %x expected %x", e, at->expected_error); 697 ac_test_check(at, &success, at->ptep && *at->ptep != at->expected_pte, 698 "pte %x expected %x", *at->ptep, at->expected_pte); 699 ac_test_check(at, &success, 700 !pt_match(*at->pdep, at->expected_pde, at->ignore_pde), 701 "pde %x expected %x", *at->pdep, at->expected_pde); 702 703 if (success && verbose) { 704 if (at->expected_fault) { 705 printf("PASS (%x)\n", at->expected_error); 706 } else { 707 printf("PASS\n"); 708 } 709 } 710 return success; 711 } 712 713 static void ac_test_show(ac_test_t *at) 714 { 715 char line[5000]; 716 717 *line = 0; 718 strcat(line, "test"); 719 for (int i = 0; i < NR_AC_FLAGS; ++i) 720 if (at->flags & (1 << i)) { 721 strcat(line, " "); 722 strcat(line, ac_names[i]); 723 } 724 strcat(line, ": "); 725 printf("%s", line); 726 } 727 728 /* 729 * This test case is used to triger the bug which is fixed by 730 * commit e09e90a5 in the kvm tree 731 */ 732 static int corrupt_hugepage_triger(ac_pool_t *pool) 733 { 734 ac_test_t at1, at2; 735 736 ac_test_init(&at1, (void *)(0x123400000000)); 737 ac_test_init(&at2, (void *)(0x666600000000)); 738 739 at2.flags = AC_CPU_CR0_WP_MASK | AC_PDE_PSE_MASK | AC_PDE_PRESENT_MASK; 740 ac_test_setup_pte(&at2, pool); 741 if (!ac_test_do_access(&at2)) 742 goto err; 743 744 at1.flags = at2.flags | AC_PDE_WRITABLE_MASK; 745 ac_test_setup_pte(&at1, pool); 746 if (!ac_test_do_access(&at1)) 747 goto err; 748 749 at1.flags |= AC_ACCESS_WRITE_MASK; 750 ac_set_expected_status(&at1); 751 if (!ac_test_do_access(&at1)) 752 goto err; 753 754 at2.flags |= AC_ACCESS_WRITE_MASK; 755 ac_set_expected_status(&at2); 756 if (!ac_test_do_access(&at2)) 757 goto err; 758 759 return 1; 760 761 err: 762 printf("corrupt_hugepage_triger test fail\n"); 763 return 0; 764 } 765 766 /* 767 * This test case is used to triger the bug which is fixed by 768 * commit 3ddf6c06e13e in the kvm tree 769 */ 770 static int check_pfec_on_prefetch_pte(ac_pool_t *pool) 771 { 772 ac_test_t at1, at2; 773 774 ac_test_init(&at1, (void *)(0x123406001000)); 775 ac_test_init(&at2, (void *)(0x123406003000)); 776 777 at1.flags = AC_PDE_PRESENT_MASK | AC_PTE_PRESENT_MASK; 778 ac_setup_specific_pages(&at1, pool, 30 * 1024 * 1024, 30 * 1024 * 1024); 779 780 at2.flags = at1.flags | AC_PTE_NX_MASK; 781 ac_setup_specific_pages(&at2, pool, 30 * 1024 * 1024, 30 * 1024 * 1024); 782 783 if (!ac_test_do_access(&at1)) { 784 printf("%s: prepare fail\n", __FUNCTION__); 785 goto err; 786 } 787 788 if (!ac_test_do_access(&at2)) { 789 printf("%s: check PFEC on prefetch pte path fail\n", 790 __FUNCTION__); 791 goto err; 792 } 793 794 return 1; 795 796 err: 797 return 0; 798 } 799 800 /* 801 * If the write-fault access is from supervisor and CR0.WP is not set on the 802 * vcpu, kvm will fix it by adjusting pte access - it sets the W bit on pte 803 * and clears U bit. This is the chance that kvm can change pte access from 804 * readonly to writable. 805 * 806 * Unfortunately, the pte access is the access of 'direct' shadow page table, 807 * means direct sp.role.access = pte_access, then we will create a writable 808 * spte entry on the readonly shadow page table. It will cause Dirty bit is 809 * not tracked when two guest ptes point to the same large page. Note, it 810 * does not have other impact except Dirty bit since cr0.wp is encoded into 811 * sp.role. 812 * 813 * Note: to trigger this bug, hugepage should be disabled on host. 814 */ 815 static int check_large_pte_dirty_for_nowp(ac_pool_t *pool) 816 { 817 ac_test_t at1, at2; 818 819 ac_test_init(&at1, (void *)(0x123403000000)); 820 ac_test_init(&at2, (void *)(0x666606000000)); 821 822 at2.flags = AC_PDE_PRESENT_MASK | AC_PDE_PSE_MASK; 823 ac_test_setup_pte(&at2, pool); 824 if (!ac_test_do_access(&at2)) { 825 printf("%s: read on the first mapping fail.\n", __FUNCTION__); 826 goto err; 827 } 828 829 at1.flags = at2.flags | AC_ACCESS_WRITE_MASK; 830 ac_test_setup_pte(&at1, pool); 831 if (!ac_test_do_access(&at1)) { 832 printf("%s: write on the second mapping fail.\n", __FUNCTION__); 833 goto err; 834 } 835 836 at2.flags |= AC_ACCESS_WRITE_MASK; 837 ac_set_expected_status(&at2); 838 if (!ac_test_do_access(&at2)) { 839 printf("%s: write on the first mapping fail.\n", __FUNCTION__); 840 goto err; 841 } 842 843 return 1; 844 845 err: 846 return 0; 847 } 848 849 static int check_smep_andnot_wp(ac_pool_t *pool) 850 { 851 ac_test_t at1; 852 int err_prepare_andnot_wp, err_smep_andnot_wp; 853 854 if (!(cpuid_7_ebx & (1 << 7))) { 855 return 1; 856 } 857 858 ac_test_init(&at1, (void *)(0x123406001000)); 859 860 at1.flags = AC_PDE_PRESENT_MASK | AC_PTE_PRESENT_MASK | 861 AC_PDE_USER_MASK | AC_PTE_USER_MASK | 862 AC_PDE_ACCESSED_MASK | AC_PTE_ACCESSED_MASK | 863 AC_CPU_CR4_SMEP_MASK | 864 AC_CPU_CR0_WP_MASK | 865 AC_ACCESS_WRITE_MASK; 866 ac_test_setup_pte(&at1, pool); 867 868 /* 869 * Here we write the ro user page when 870 * cr0.wp=0, then we execute it and SMEP 871 * fault should happen. 872 */ 873 err_prepare_andnot_wp = ac_test_do_access(&at1); 874 if (!err_prepare_andnot_wp) { 875 printf("%s: SMEP prepare fail\n", __FUNCTION__); 876 goto clean_up; 877 } 878 879 at1.flags &= ~AC_ACCESS_WRITE_MASK; 880 at1.flags |= AC_ACCESS_FETCH_MASK; 881 ac_set_expected_status(&at1); 882 err_smep_andnot_wp = ac_test_do_access(&at1); 883 884 clean_up: 885 set_cr4_smep(0); 886 887 if (!err_prepare_andnot_wp) 888 goto err; 889 if (!err_smep_andnot_wp) { 890 printf("%s: check SMEP without wp fail\n", __FUNCTION__); 891 goto err; 892 } 893 return 1; 894 895 err: 896 return 0; 897 } 898 899 int ac_test_exec(ac_test_t *at, ac_pool_t *pool) 900 { 901 int r; 902 903 if (verbose) { 904 ac_test_show(at); 905 } 906 ac_test_setup_pte(at, pool); 907 r = ac_test_do_access(at); 908 return r; 909 } 910 911 typedef int (*ac_test_fn)(ac_pool_t *pool); 912 const ac_test_fn ac_test_cases[] = 913 { 914 corrupt_hugepage_triger, 915 check_pfec_on_prefetch_pte, 916 check_large_pte_dirty_for_nowp, 917 check_smep_andnot_wp 918 }; 919 920 int ac_test_run(void) 921 { 922 ac_test_t at; 923 ac_pool_t pool; 924 int i, tests, successes; 925 926 printf("run\n"); 927 tests = successes = 0; 928 929 if (cpuid_7_ecx & (1 << 3)) { 930 set_cr4_pke(1); 931 set_cr4_pke(0); 932 /* Now PKRU = 0xFFFFFFFF. */ 933 } else { 934 unsigned long cr4 = read_cr4(); 935 tests++; 936 if (write_cr4_checking(cr4 | X86_CR4_PKE) == GP_VECTOR) { 937 successes++; 938 invalid_mask |= AC_PKU_AD_MASK; 939 invalid_mask |= AC_PKU_WD_MASK; 940 invalid_mask |= AC_PKU_PKEY_MASK; 941 invalid_mask |= AC_CPU_CR4_PKE_MASK; 942 printf("CR4.PKE not available, disabling PKE tests\n"); 943 } else { 944 printf("Set PKE in CR4 - expect #GP: FAIL!\n"); 945 set_cr4_pke(0); 946 } 947 } 948 949 if (!(cpuid_7_ebx & (1 << 7))) { 950 unsigned long cr4 = read_cr4(); 951 tests++; 952 if (write_cr4_checking(cr4 | CR4_SMEP_MASK) == GP_VECTOR) { 953 successes++; 954 invalid_mask |= AC_CPU_CR4_SMEP_MASK; 955 printf("CR4.SMEP not available, disabling SMEP tests\n"); 956 } else { 957 printf("Set SMEP in CR4 - expect #GP: FAIL!\n"); 958 set_cr4_smep(0); 959 } 960 } 961 962 ac_env_int(&pool); 963 ac_test_init(&at, (void *)(0x123400000000 + 16 * smp_id())); 964 do { 965 ++tests; 966 successes += ac_test_exec(&at, &pool); 967 } while (ac_test_bump(&at)); 968 969 for (i = 0; i < ARRAY_SIZE(ac_test_cases); i++) { 970 ++tests; 971 successes += ac_test_cases[i](&pool); 972 } 973 974 printf("\n%d tests, %d failures\n", tests, tests - successes); 975 976 return successes == tests; 977 } 978 979 int main() 980 { 981 int r; 982 983 setup_idt(); 984 985 cpuid_7_ebx = cpuid(7).b; 986 cpuid_7_ecx = cpuid(7).c; 987 988 printf("starting test\n\n"); 989 r = ac_test_run(); 990 return r ? 0 : 1; 991 } 992