1 2 #include "libcflat.h" 3 #include "desc.h" 4 #include "processor.h" 5 #include "asm/page.h" 6 7 #define smp_id() 0 8 9 #define true 1 10 #define false 0 11 12 static _Bool verbose = false; 13 14 typedef unsigned long pt_element_t; 15 static int cpuid_7_ebx; 16 static int cpuid_7_ecx; 17 static int invalid_mask; 18 19 #define PT_BASE_ADDR_MASK ((pt_element_t)((((pt_element_t)1 << 40) - 1) & PAGE_MASK)) 20 #define PT_PSE_BASE_ADDR_MASK (PT_BASE_ADDR_MASK & ~(1ull << 21)) 21 22 #define PT_PRESENT_MASK ((pt_element_t)1 << 0) 23 #define PT_WRITABLE_MASK ((pt_element_t)1 << 1) 24 #define PT_USER_MASK ((pt_element_t)1 << 2) 25 #define PT_ACCESSED_MASK ((pt_element_t)1 << 5) 26 #define PT_DIRTY_MASK ((pt_element_t)1 << 6) 27 #define PT_PSE_MASK ((pt_element_t)1 << 7) 28 #define PT_NX_MASK ((pt_element_t)1 << 63) 29 30 #define CR0_WP_MASK (1UL << 16) 31 #define CR4_SMEP_MASK (1UL << 20) 32 33 #define PFERR_PRESENT_MASK (1U << 0) 34 #define PFERR_WRITE_MASK (1U << 1) 35 #define PFERR_USER_MASK (1U << 2) 36 #define PFERR_RESERVED_MASK (1U << 3) 37 #define PFERR_FETCH_MASK (1U << 4) 38 #define PFERR_PK_MASK (1U << 5) 39 40 #define MSR_EFER 0xc0000080 41 #define EFER_NX_MASK (1ull << 11) 42 43 #define PT_INDEX(address, level) \ 44 ((address) >> (12 + ((level)-1) * 9)) & 511 45 46 /* 47 * page table access check tests 48 */ 49 50 enum { 51 AC_PTE_PRESENT_BIT, 52 AC_PTE_WRITABLE_BIT, 53 AC_PTE_USER_BIT, 54 AC_PTE_ACCESSED_BIT, 55 AC_PTE_DIRTY_BIT, 56 AC_PTE_NX_BIT, 57 AC_PTE_BIT51_BIT, 58 59 AC_PDE_PRESENT_BIT, 60 AC_PDE_WRITABLE_BIT, 61 AC_PDE_USER_BIT, 62 AC_PDE_ACCESSED_BIT, 63 AC_PDE_DIRTY_BIT, 64 AC_PDE_PSE_BIT, 65 AC_PDE_NX_BIT, 66 AC_PDE_BIT51_BIT, 67 AC_PDE_BIT13_BIT, 68 69 AC_PKU_AD_BIT, 70 AC_PKU_WD_BIT, 71 AC_PKU_PKEY_BIT, 72 73 AC_ACCESS_USER_BIT, 74 AC_ACCESS_WRITE_BIT, 75 AC_ACCESS_FETCH_BIT, 76 AC_ACCESS_TWICE_BIT, 77 78 AC_CPU_EFER_NX_BIT, 79 AC_CPU_CR0_WP_BIT, 80 AC_CPU_CR4_SMEP_BIT, 81 AC_CPU_CR4_PKE_BIT, 82 83 NR_AC_FLAGS 84 }; 85 86 #define AC_PTE_PRESENT_MASK (1 << AC_PTE_PRESENT_BIT) 87 #define AC_PTE_WRITABLE_MASK (1 << AC_PTE_WRITABLE_BIT) 88 #define AC_PTE_USER_MASK (1 << AC_PTE_USER_BIT) 89 #define AC_PTE_ACCESSED_MASK (1 << AC_PTE_ACCESSED_BIT) 90 #define AC_PTE_DIRTY_MASK (1 << AC_PTE_DIRTY_BIT) 91 #define AC_PTE_NX_MASK (1 << AC_PTE_NX_BIT) 92 #define AC_PTE_BIT51_MASK (1 << AC_PTE_BIT51_BIT) 93 94 #define AC_PDE_PRESENT_MASK (1 << AC_PDE_PRESENT_BIT) 95 #define AC_PDE_WRITABLE_MASK (1 << AC_PDE_WRITABLE_BIT) 96 #define AC_PDE_USER_MASK (1 << AC_PDE_USER_BIT) 97 #define AC_PDE_ACCESSED_MASK (1 << AC_PDE_ACCESSED_BIT) 98 #define AC_PDE_DIRTY_MASK (1 << AC_PDE_DIRTY_BIT) 99 #define AC_PDE_PSE_MASK (1 << AC_PDE_PSE_BIT) 100 #define AC_PDE_NX_MASK (1 << AC_PDE_NX_BIT) 101 #define AC_PDE_BIT51_MASK (1 << AC_PDE_BIT51_BIT) 102 #define AC_PDE_BIT13_MASK (1 << AC_PDE_BIT13_BIT) 103 104 #define AC_PKU_AD_MASK (1 << AC_PKU_AD_BIT) 105 #define AC_PKU_WD_MASK (1 << AC_PKU_WD_BIT) 106 #define AC_PKU_PKEY_MASK (1 << AC_PKU_PKEY_BIT) 107 108 #define AC_ACCESS_USER_MASK (1 << AC_ACCESS_USER_BIT) 109 #define AC_ACCESS_WRITE_MASK (1 << AC_ACCESS_WRITE_BIT) 110 #define AC_ACCESS_FETCH_MASK (1 << AC_ACCESS_FETCH_BIT) 111 #define AC_ACCESS_TWICE_MASK (1 << AC_ACCESS_TWICE_BIT) 112 113 #define AC_CPU_EFER_NX_MASK (1 << AC_CPU_EFER_NX_BIT) 114 #define AC_CPU_CR0_WP_MASK (1 << AC_CPU_CR0_WP_BIT) 115 #define AC_CPU_CR4_SMEP_MASK (1 << AC_CPU_CR4_SMEP_BIT) 116 #define AC_CPU_CR4_PKE_MASK (1 << AC_CPU_CR4_PKE_BIT) 117 118 const char *ac_names[] = { 119 [AC_PTE_PRESENT_BIT] = "pte.p", 120 [AC_PTE_ACCESSED_BIT] = "pte.a", 121 [AC_PTE_WRITABLE_BIT] = "pte.rw", 122 [AC_PTE_USER_BIT] = "pte.user", 123 [AC_PTE_DIRTY_BIT] = "pte.d", 124 [AC_PTE_NX_BIT] = "pte.nx", 125 [AC_PTE_BIT51_BIT] = "pte.51", 126 [AC_PDE_PRESENT_BIT] = "pde.p", 127 [AC_PDE_ACCESSED_BIT] = "pde.a", 128 [AC_PDE_WRITABLE_BIT] = "pde.rw", 129 [AC_PDE_USER_BIT] = "pde.user", 130 [AC_PDE_DIRTY_BIT] = "pde.d", 131 [AC_PDE_PSE_BIT] = "pde.pse", 132 [AC_PDE_NX_BIT] = "pde.nx", 133 [AC_PDE_BIT51_BIT] = "pde.51", 134 [AC_PDE_BIT13_BIT] = "pde.13", 135 [AC_PKU_AD_BIT] = "pkru.ad", 136 [AC_PKU_WD_BIT] = "pkru.wd", 137 [AC_PKU_PKEY_BIT] = "pkey=1", 138 [AC_ACCESS_WRITE_BIT] = "write", 139 [AC_ACCESS_USER_BIT] = "user", 140 [AC_ACCESS_FETCH_BIT] = "fetch", 141 [AC_ACCESS_TWICE_BIT] = "twice", 142 [AC_CPU_EFER_NX_BIT] = "efer.nx", 143 [AC_CPU_CR0_WP_BIT] = "cr0.wp", 144 [AC_CPU_CR4_SMEP_BIT] = "cr4.smep", 145 [AC_CPU_CR4_PKE_BIT] = "cr4.pke", 146 }; 147 148 static inline void *va(pt_element_t phys) 149 { 150 return (void *)phys; 151 } 152 153 typedef struct { 154 pt_element_t pt_pool; 155 unsigned pt_pool_size; 156 unsigned pt_pool_current; 157 } ac_pool_t; 158 159 typedef struct { 160 unsigned flags; 161 void *virt; 162 pt_element_t phys; 163 pt_element_t *ptep; 164 pt_element_t expected_pte; 165 pt_element_t *pdep; 166 pt_element_t expected_pde; 167 pt_element_t ignore_pde; 168 int expected_fault; 169 unsigned expected_error; 170 } ac_test_t; 171 172 typedef struct { 173 unsigned short limit; 174 unsigned long linear_addr; 175 } __attribute__((packed)) descriptor_table_t; 176 177 178 static void ac_test_show(ac_test_t *at); 179 180 int write_cr4_checking(unsigned long val) 181 { 182 asm volatile(ASM_TRY("1f") 183 "mov %0,%%cr4\n\t" 184 "1:": : "r" (val)); 185 return exception_vector(); 186 } 187 188 void set_cr0_wp(int wp) 189 { 190 unsigned long cr0 = read_cr0(); 191 unsigned long old_cr0 = cr0; 192 193 cr0 &= ~CR0_WP_MASK; 194 if (wp) 195 cr0 |= CR0_WP_MASK; 196 if (old_cr0 != cr0) 197 write_cr0(cr0); 198 } 199 200 void set_cr4_smep(int smep) 201 { 202 unsigned long cr4 = read_cr4(); 203 unsigned long old_cr4 = cr4; 204 extern u64 ptl2[]; 205 206 cr4 &= ~CR4_SMEP_MASK; 207 if (smep) 208 cr4 |= CR4_SMEP_MASK; 209 if (old_cr4 == cr4) 210 return; 211 212 if (smep) 213 ptl2[2] &= ~PT_USER_MASK; 214 write_cr4(cr4); 215 if (!smep) 216 ptl2[2] |= PT_USER_MASK; 217 } 218 219 void set_cr4_pke(int pke) 220 { 221 unsigned long cr4 = read_cr4(); 222 unsigned long old_cr4 = cr4; 223 224 cr4 &= ~X86_CR4_PKE; 225 if (pke) 226 cr4 |= X86_CR4_PKE; 227 if (old_cr4 == cr4) 228 return; 229 230 /* Check that protection keys do not affect accesses when CR4.PKE=0. */ 231 if ((read_cr4() & X86_CR4_PKE) && !pke) { 232 write_pkru(0xfffffffc); 233 } 234 write_cr4(cr4); 235 } 236 237 void set_efer_nx(int nx) 238 { 239 unsigned long long efer = rdmsr(MSR_EFER); 240 unsigned long long old_efer = efer; 241 242 efer &= ~EFER_NX_MASK; 243 if (nx) 244 efer |= EFER_NX_MASK; 245 if (old_efer != efer) 246 wrmsr(MSR_EFER, efer); 247 } 248 249 static void ac_env_int(ac_pool_t *pool) 250 { 251 extern char page_fault, kernel_entry; 252 set_idt_entry(14, &page_fault, 0); 253 set_idt_entry(0x20, &kernel_entry, 3); 254 255 pool->pt_pool = 33 * 1024 * 1024; 256 pool->pt_pool_size = 120 * 1024 * 1024 - pool->pt_pool; 257 pool->pt_pool_current = 0; 258 } 259 260 void ac_test_init(ac_test_t *at, void *virt) 261 { 262 wrmsr(MSR_EFER, rdmsr(MSR_EFER) | EFER_NX_MASK); 263 set_cr0_wp(1); 264 at->flags = 0; 265 at->virt = virt; 266 at->phys = 32 * 1024 * 1024; 267 } 268 269 int ac_test_bump_one(ac_test_t *at) 270 { 271 at->flags = ((at->flags | invalid_mask) + 1) & ~invalid_mask; 272 return at->flags < (1 << NR_AC_FLAGS); 273 } 274 275 #define F(x) ((flags & x##_MASK) != 0) 276 277 _Bool ac_test_legal(ac_test_t *at) 278 { 279 int flags = at->flags; 280 281 if (F(AC_ACCESS_FETCH) && F(AC_ACCESS_WRITE)) 282 return false; 283 284 /* 285 * Since we convert current page to kernel page when cr4.smep=1, 286 * we can't switch to user mode. 287 */ 288 if (F(AC_ACCESS_USER) && F(AC_CPU_CR4_SMEP)) 289 return false; 290 291 /* 292 * Only test protection key faults if CR4.PKE=1. 293 */ 294 if (!F(AC_CPU_CR4_PKE) && 295 (F(AC_PKU_AD) || F(AC_PKU_WD))) { 296 return false; 297 } 298 299 /* 300 * pde.bit13 checks handling of reserved bits in largepage PDEs. It is 301 * meaningless if there is a PTE. 302 */ 303 if (!F(AC_PDE_PSE) && F(AC_PDE_BIT13)) 304 return false; 305 306 return true; 307 } 308 309 int ac_test_bump(ac_test_t *at) 310 { 311 int ret; 312 313 ret = ac_test_bump_one(at); 314 while (ret && !ac_test_legal(at)) 315 ret = ac_test_bump_one(at); 316 return ret; 317 } 318 319 pt_element_t ac_test_alloc_pt(ac_pool_t *pool) 320 { 321 pt_element_t ret = pool->pt_pool + pool->pt_pool_current; 322 pool->pt_pool_current += PAGE_SIZE; 323 return ret; 324 } 325 326 _Bool ac_test_enough_room(ac_pool_t *pool) 327 { 328 return pool->pt_pool_current + 4 * PAGE_SIZE <= pool->pt_pool_size; 329 } 330 331 void ac_test_reset_pt_pool(ac_pool_t *pool) 332 { 333 pool->pt_pool_current = 0; 334 } 335 336 pt_element_t ac_test_permissions(ac_test_t *at, unsigned flags, bool writable, 337 bool user, bool executable) 338 { 339 bool kwritable = !F(AC_CPU_CR0_WP) && !F(AC_ACCESS_USER); 340 pt_element_t expected = 0; 341 342 if (F(AC_ACCESS_USER) && !user) 343 at->expected_fault = 1; 344 345 if (F(AC_ACCESS_WRITE) && !writable && !kwritable) 346 at->expected_fault = 1; 347 348 if (F(AC_ACCESS_FETCH) && !executable) 349 at->expected_fault = 1; 350 351 if (F(AC_ACCESS_FETCH) && user && F(AC_CPU_CR4_SMEP)) 352 at->expected_fault = 1; 353 354 if (user && !F(AC_ACCESS_FETCH) && F(AC_PKU_PKEY) && F(AC_CPU_CR4_PKE)) { 355 if (F(AC_PKU_AD)) { 356 at->expected_fault = 1; 357 at->expected_error |= PFERR_PK_MASK; 358 } else if (F(AC_ACCESS_WRITE) && F(AC_PKU_WD) && !kwritable) { 359 at->expected_fault = 1; 360 at->expected_error |= PFERR_PK_MASK; 361 } 362 } 363 364 if (!at->expected_fault) { 365 expected |= PT_ACCESSED_MASK; 366 if (F(AC_ACCESS_WRITE)) 367 expected |= PT_DIRTY_MASK; 368 } 369 370 return expected; 371 } 372 373 void ac_emulate_access(ac_test_t *at, unsigned flags) 374 { 375 bool pde_valid, pte_valid; 376 bool user, writable, executable; 377 378 if (F(AC_ACCESS_USER)) 379 at->expected_error |= PFERR_USER_MASK; 380 381 if (F(AC_ACCESS_WRITE)) 382 at->expected_error |= PFERR_WRITE_MASK; 383 384 if (F(AC_ACCESS_FETCH)) 385 at->expected_error |= PFERR_FETCH_MASK; 386 387 if (!F(AC_PDE_ACCESSED)) 388 at->ignore_pde = PT_ACCESSED_MASK; 389 390 pde_valid = F(AC_PDE_PRESENT) 391 && !F(AC_PDE_BIT51) && !F(AC_PDE_BIT13) 392 && !(F(AC_PDE_NX) && !F(AC_CPU_EFER_NX)); 393 394 if (!pde_valid) { 395 at->expected_fault = 1; 396 if (F(AC_PDE_PRESENT)) { 397 at->expected_error |= PFERR_RESERVED_MASK; 398 } else { 399 at->expected_error &= ~PFERR_PRESENT_MASK; 400 } 401 goto fault; 402 } 403 404 writable = F(AC_PDE_WRITABLE); 405 user = F(AC_PDE_USER); 406 executable = !F(AC_PDE_NX); 407 408 if (F(AC_PDE_PSE)) { 409 at->expected_pde |= ac_test_permissions(at, flags, writable, user, 410 executable); 411 goto no_pte; 412 } 413 414 at->expected_pde |= PT_ACCESSED_MASK; 415 416 pte_valid = F(AC_PTE_PRESENT) 417 && !F(AC_PTE_BIT51) 418 && !(F(AC_PTE_NX) && !F(AC_CPU_EFER_NX)); 419 420 if (!pte_valid) { 421 at->expected_fault = 1; 422 if (F(AC_PTE_PRESENT)) { 423 at->expected_error |= PFERR_RESERVED_MASK; 424 } else { 425 at->expected_error &= ~PFERR_PRESENT_MASK; 426 } 427 goto fault; 428 } 429 430 writable &= F(AC_PTE_WRITABLE); 431 user &= F(AC_PTE_USER); 432 executable &= !F(AC_PTE_NX); 433 434 at->expected_pte |= ac_test_permissions(at, flags, writable, user, 435 executable); 436 437 no_pte: 438 fault: 439 if (!at->expected_fault) 440 at->ignore_pde = 0; 441 if (!F(AC_CPU_EFER_NX) && !F(AC_CPU_CR4_SMEP)) 442 at->expected_error &= ~PFERR_FETCH_MASK; 443 } 444 445 void ac_set_expected_status(ac_test_t *at) 446 { 447 invlpg(at->virt); 448 449 if (at->ptep) 450 at->expected_pte = *at->ptep; 451 at->expected_pde = *at->pdep; 452 at->ignore_pde = 0; 453 at->expected_fault = 0; 454 at->expected_error = PFERR_PRESENT_MASK; 455 456 if (at->flags & AC_ACCESS_TWICE_MASK) { 457 ac_emulate_access(at, at->flags & ~AC_ACCESS_WRITE_MASK 458 & ~AC_ACCESS_FETCH_MASK & ~AC_ACCESS_USER_MASK); 459 at->expected_fault = 0; 460 at->expected_error = PFERR_PRESENT_MASK; 461 at->ignore_pde = 0; 462 } 463 464 ac_emulate_access(at, at->flags); 465 } 466 467 void __ac_setup_specific_pages(ac_test_t *at, ac_pool_t *pool, u64 pd_page, 468 u64 pt_page) 469 470 { 471 unsigned long root = read_cr3(); 472 int flags = at->flags; 473 474 if (!ac_test_enough_room(pool)) 475 ac_test_reset_pt_pool(pool); 476 477 at->ptep = 0; 478 for (int i = 4; i >= 1 && (i >= 2 || !F(AC_PDE_PSE)); --i) { 479 pt_element_t *vroot = va(root & PT_BASE_ADDR_MASK); 480 unsigned index = PT_INDEX((unsigned long)at->virt, i); 481 pt_element_t pte = 0; 482 switch (i) { 483 case 4: 484 case 3: 485 pte = pd_page ? pd_page : ac_test_alloc_pt(pool); 486 pte |= PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK; 487 break; 488 case 2: 489 if (!F(AC_PDE_PSE)) { 490 pte = pt_page ? pt_page : ac_test_alloc_pt(pool); 491 /* The protection key is ignored on non-leaf entries. */ 492 if (F(AC_PKU_PKEY)) 493 pte |= 2ull << 59; 494 } else { 495 pte = at->phys & PT_PSE_BASE_ADDR_MASK; 496 pte |= PT_PSE_MASK; 497 if (F(AC_PKU_PKEY)) 498 pte |= 1ull << 59; 499 } 500 if (F(AC_PDE_PRESENT)) 501 pte |= PT_PRESENT_MASK; 502 if (F(AC_PDE_WRITABLE)) 503 pte |= PT_WRITABLE_MASK; 504 if (F(AC_PDE_USER)) 505 pte |= PT_USER_MASK; 506 if (F(AC_PDE_ACCESSED)) 507 pte |= PT_ACCESSED_MASK; 508 if (F(AC_PDE_DIRTY)) 509 pte |= PT_DIRTY_MASK; 510 if (F(AC_PDE_NX)) 511 pte |= PT_NX_MASK; 512 if (F(AC_PDE_BIT51)) 513 pte |= 1ull << 51; 514 if (F(AC_PDE_BIT13)) 515 pte |= 1ull << 13; 516 at->pdep = &vroot[index]; 517 break; 518 case 1: 519 pte = at->phys & PT_BASE_ADDR_MASK; 520 if (F(AC_PKU_PKEY)) 521 pte |= 1ull << 59; 522 if (F(AC_PTE_PRESENT)) 523 pte |= PT_PRESENT_MASK; 524 if (F(AC_PTE_WRITABLE)) 525 pte |= PT_WRITABLE_MASK; 526 if (F(AC_PTE_USER)) 527 pte |= PT_USER_MASK; 528 if (F(AC_PTE_ACCESSED)) 529 pte |= PT_ACCESSED_MASK; 530 if (F(AC_PTE_DIRTY)) 531 pte |= PT_DIRTY_MASK; 532 if (F(AC_PTE_NX)) 533 pte |= PT_NX_MASK; 534 if (F(AC_PTE_BIT51)) 535 pte |= 1ull << 51; 536 at->ptep = &vroot[index]; 537 break; 538 } 539 vroot[index] = pte; 540 root = vroot[index]; 541 } 542 ac_set_expected_status(at); 543 } 544 545 static void ac_test_setup_pte(ac_test_t *at, ac_pool_t *pool) 546 { 547 __ac_setup_specific_pages(at, pool, 0, 0); 548 } 549 550 static void ac_setup_specific_pages(ac_test_t *at, ac_pool_t *pool, 551 u64 pd_page, u64 pt_page) 552 { 553 return __ac_setup_specific_pages(at, pool, pd_page, pt_page); 554 } 555 556 static void dump_mapping(ac_test_t *at) 557 { 558 unsigned long root = read_cr3(); 559 int flags = at->flags; 560 int i; 561 562 printf("Dump mapping: address: %p\n", at->virt); 563 for (i = 4; i >= 1 && (i >= 2 || !F(AC_PDE_PSE)); --i) { 564 pt_element_t *vroot = va(root & PT_BASE_ADDR_MASK); 565 unsigned index = PT_INDEX((unsigned long)at->virt, i); 566 pt_element_t pte = vroot[index]; 567 568 printf("------L%d: %lx\n", i, pte); 569 root = vroot[index]; 570 } 571 } 572 573 static void ac_test_check(ac_test_t *at, _Bool *success_ret, _Bool cond, 574 const char *fmt, ...) 575 { 576 va_list ap; 577 char buf[500]; 578 579 if (!*success_ret) { 580 return; 581 } 582 583 if (!cond) { 584 return; 585 } 586 587 *success_ret = false; 588 589 if (!verbose) { 590 puts("\n"); 591 ac_test_show(at); 592 } 593 594 va_start(ap, fmt); 595 vsnprintf(buf, sizeof(buf), fmt, ap); 596 va_end(ap); 597 printf("FAIL: %s\n", buf); 598 dump_mapping(at); 599 } 600 601 static int pt_match(pt_element_t pte1, pt_element_t pte2, pt_element_t ignore) 602 { 603 pte1 &= ~ignore; 604 pte2 &= ~ignore; 605 return pte1 == pte2; 606 } 607 608 int ac_test_do_access(ac_test_t *at) 609 { 610 static unsigned unique = 42; 611 int fault = 0; 612 unsigned e; 613 static unsigned char user_stack[4096]; 614 unsigned long rsp; 615 _Bool success = true; 616 int flags = at->flags; 617 618 ++unique; 619 if (!(unique & 65535)) { 620 puts("."); 621 } 622 623 *((unsigned char *)at->phys) = 0xc3; /* ret */ 624 625 unsigned r = unique; 626 set_cr0_wp(F(AC_CPU_CR0_WP)); 627 set_efer_nx(F(AC_CPU_EFER_NX)); 628 set_cr4_pke(F(AC_CPU_CR4_PKE)); 629 if (F(AC_CPU_CR4_PKE)) { 630 /* WD2=AD2=1, WD1=F(AC_PKU_WD), AD1=F(AC_PKU_AD) */ 631 write_pkru(0x30 | (F(AC_PKU_WD) ? 8 : 0) | 632 (F(AC_PKU_AD) ? 4 : 0)); 633 } 634 635 set_cr4_smep(F(AC_CPU_CR4_SMEP)); 636 637 if (F(AC_ACCESS_TWICE)) { 638 asm volatile ( 639 "mov $fixed2, %%rsi \n\t" 640 "mov (%[addr]), %[reg] \n\t" 641 "fixed2:" 642 : [reg]"=r"(r), [fault]"=a"(fault), "=b"(e) 643 : [addr]"r"(at->virt) 644 : "rsi" 645 ); 646 fault = 0; 647 } 648 649 asm volatile ("mov $fixed1, %%rsi \n\t" 650 "mov %%rsp, %%rdx \n\t" 651 "cmp $0, %[user] \n\t" 652 "jz do_access \n\t" 653 "push %%rax; mov %[user_ds], %%ax; mov %%ax, %%ds; pop %%rax \n\t" 654 "pushq %[user_ds] \n\t" 655 "pushq %[user_stack_top] \n\t" 656 "pushfq \n\t" 657 "pushq %[user_cs] \n\t" 658 "pushq $do_access \n\t" 659 "iretq \n" 660 "do_access: \n\t" 661 "cmp $0, %[fetch] \n\t" 662 "jnz 2f \n\t" 663 "cmp $0, %[write] \n\t" 664 "jnz 1f \n\t" 665 "mov (%[addr]), %[reg] \n\t" 666 "jmp done \n\t" 667 "1: mov %[reg], (%[addr]) \n\t" 668 "jmp done \n\t" 669 "2: call *%[addr] \n\t" 670 "done: \n" 671 "fixed1: \n" 672 "int %[kernel_entry_vector] \n\t" 673 "back_to_kernel:" 674 : [reg]"+r"(r), "+a"(fault), "=b"(e), "=&d"(rsp) 675 : [addr]"r"(at->virt), 676 [write]"r"(F(AC_ACCESS_WRITE)), 677 [user]"r"(F(AC_ACCESS_USER)), 678 [fetch]"r"(F(AC_ACCESS_FETCH)), 679 [user_ds]"i"(USER_DS), 680 [user_cs]"i"(USER_CS), 681 [user_stack_top]"r"(user_stack + sizeof user_stack), 682 [kernel_entry_vector]"i"(0x20) 683 : "rsi"); 684 685 asm volatile (".section .text.pf \n\t" 686 "page_fault: \n\t" 687 "pop %rbx \n\t" 688 "mov %rsi, (%rsp) \n\t" 689 "movl $1, %eax \n\t" 690 "iretq \n\t" 691 ".section .text"); 692 693 asm volatile (".section .text.entry \n\t" 694 "kernel_entry: \n\t" 695 "mov %rdx, %rsp \n\t" 696 "jmp back_to_kernel \n\t" 697 ".section .text"); 698 699 ac_test_check(at, &success, fault && !at->expected_fault, 700 "unexpected fault"); 701 ac_test_check(at, &success, !fault && at->expected_fault, 702 "unexpected access"); 703 ac_test_check(at, &success, fault && e != at->expected_error, 704 "error code %x expected %x", e, at->expected_error); 705 ac_test_check(at, &success, at->ptep && *at->ptep != at->expected_pte, 706 "pte %x expected %x", *at->ptep, at->expected_pte); 707 ac_test_check(at, &success, 708 !pt_match(*at->pdep, at->expected_pde, at->ignore_pde), 709 "pde %x expected %x", *at->pdep, at->expected_pde); 710 711 if (success && verbose) { 712 if (at->expected_fault) { 713 printf("PASS (%x)\n", at->expected_error); 714 } else { 715 printf("PASS\n"); 716 } 717 } 718 return success; 719 } 720 721 static void ac_test_show(ac_test_t *at) 722 { 723 char line[5000]; 724 725 *line = 0; 726 strcat(line, "test"); 727 for (int i = 0; i < NR_AC_FLAGS; ++i) 728 if (at->flags & (1 << i)) { 729 strcat(line, " "); 730 strcat(line, ac_names[i]); 731 } 732 strcat(line, ": "); 733 printf("%s", line); 734 } 735 736 /* 737 * This test case is used to triger the bug which is fixed by 738 * commit e09e90a5 in the kvm tree 739 */ 740 static int corrupt_hugepage_triger(ac_pool_t *pool) 741 { 742 ac_test_t at1, at2; 743 744 ac_test_init(&at1, (void *)(0x123400000000)); 745 ac_test_init(&at2, (void *)(0x666600000000)); 746 747 at2.flags = AC_CPU_CR0_WP_MASK | AC_PDE_PSE_MASK | AC_PDE_PRESENT_MASK; 748 ac_test_setup_pte(&at2, pool); 749 if (!ac_test_do_access(&at2)) 750 goto err; 751 752 at1.flags = at2.flags | AC_PDE_WRITABLE_MASK; 753 ac_test_setup_pte(&at1, pool); 754 if (!ac_test_do_access(&at1)) 755 goto err; 756 757 at1.flags |= AC_ACCESS_WRITE_MASK; 758 ac_set_expected_status(&at1); 759 if (!ac_test_do_access(&at1)) 760 goto err; 761 762 at2.flags |= AC_ACCESS_WRITE_MASK; 763 ac_set_expected_status(&at2); 764 if (!ac_test_do_access(&at2)) 765 goto err; 766 767 return 1; 768 769 err: 770 printf("corrupt_hugepage_triger test fail\n"); 771 return 0; 772 } 773 774 /* 775 * This test case is used to triger the bug which is fixed by 776 * commit 3ddf6c06e13e in the kvm tree 777 */ 778 static int check_pfec_on_prefetch_pte(ac_pool_t *pool) 779 { 780 ac_test_t at1, at2; 781 782 ac_test_init(&at1, (void *)(0x123406001000)); 783 ac_test_init(&at2, (void *)(0x123406003000)); 784 785 at1.flags = AC_PDE_PRESENT_MASK | AC_PTE_PRESENT_MASK; 786 ac_setup_specific_pages(&at1, pool, 30 * 1024 * 1024, 30 * 1024 * 1024); 787 788 at2.flags = at1.flags | AC_PTE_NX_MASK; 789 ac_setup_specific_pages(&at2, pool, 30 * 1024 * 1024, 30 * 1024 * 1024); 790 791 if (!ac_test_do_access(&at1)) { 792 printf("%s: prepare fail\n", __FUNCTION__); 793 goto err; 794 } 795 796 if (!ac_test_do_access(&at2)) { 797 printf("%s: check PFEC on prefetch pte path fail\n", 798 __FUNCTION__); 799 goto err; 800 } 801 802 return 1; 803 804 err: 805 return 0; 806 } 807 808 /* 809 * If the write-fault access is from supervisor and CR0.WP is not set on the 810 * vcpu, kvm will fix it by adjusting pte access - it sets the W bit on pte 811 * and clears U bit. This is the chance that kvm can change pte access from 812 * readonly to writable. 813 * 814 * Unfortunately, the pte access is the access of 'direct' shadow page table, 815 * means direct sp.role.access = pte_access, then we will create a writable 816 * spte entry on the readonly shadow page table. It will cause Dirty bit is 817 * not tracked when two guest ptes point to the same large page. Note, it 818 * does not have other impact except Dirty bit since cr0.wp is encoded into 819 * sp.role. 820 * 821 * Note: to trigger this bug, hugepage should be disabled on host. 822 */ 823 static int check_large_pte_dirty_for_nowp(ac_pool_t *pool) 824 { 825 ac_test_t at1, at2; 826 827 ac_test_init(&at1, (void *)(0x123403000000)); 828 ac_test_init(&at2, (void *)(0x666606000000)); 829 830 at2.flags = AC_PDE_PRESENT_MASK | AC_PDE_PSE_MASK; 831 ac_test_setup_pte(&at2, pool); 832 if (!ac_test_do_access(&at2)) { 833 printf("%s: read on the first mapping fail.\n", __FUNCTION__); 834 goto err; 835 } 836 837 at1.flags = at2.flags | AC_ACCESS_WRITE_MASK; 838 ac_test_setup_pte(&at1, pool); 839 if (!ac_test_do_access(&at1)) { 840 printf("%s: write on the second mapping fail.\n", __FUNCTION__); 841 goto err; 842 } 843 844 at2.flags |= AC_ACCESS_WRITE_MASK; 845 ac_set_expected_status(&at2); 846 if (!ac_test_do_access(&at2)) { 847 printf("%s: write on the first mapping fail.\n", __FUNCTION__); 848 goto err; 849 } 850 851 return 1; 852 853 err: 854 return 0; 855 } 856 857 static int check_smep_andnot_wp(ac_pool_t *pool) 858 { 859 ac_test_t at1; 860 int err_prepare_andnot_wp, err_smep_andnot_wp; 861 862 if (!(cpuid_7_ebx & (1 << 7))) { 863 return 1; 864 } 865 866 ac_test_init(&at1, (void *)(0x123406001000)); 867 868 at1.flags = AC_PDE_PRESENT_MASK | AC_PTE_PRESENT_MASK | 869 AC_PDE_USER_MASK | AC_PTE_USER_MASK | 870 AC_PDE_ACCESSED_MASK | AC_PTE_ACCESSED_MASK | 871 AC_CPU_CR4_SMEP_MASK | 872 AC_CPU_CR0_WP_MASK | 873 AC_ACCESS_WRITE_MASK; 874 ac_test_setup_pte(&at1, pool); 875 876 /* 877 * Here we write the ro user page when 878 * cr0.wp=0, then we execute it and SMEP 879 * fault should happen. 880 */ 881 err_prepare_andnot_wp = ac_test_do_access(&at1); 882 if (!err_prepare_andnot_wp) { 883 printf("%s: SMEP prepare fail\n", __FUNCTION__); 884 goto clean_up; 885 } 886 887 at1.flags &= ~AC_ACCESS_WRITE_MASK; 888 at1.flags |= AC_ACCESS_FETCH_MASK; 889 ac_set_expected_status(&at1); 890 err_smep_andnot_wp = ac_test_do_access(&at1); 891 892 clean_up: 893 set_cr4_smep(0); 894 895 if (!err_prepare_andnot_wp) 896 goto err; 897 if (!err_smep_andnot_wp) { 898 printf("%s: check SMEP without wp fail\n", __FUNCTION__); 899 goto err; 900 } 901 return 1; 902 903 err: 904 return 0; 905 } 906 907 int ac_test_exec(ac_test_t *at, ac_pool_t *pool) 908 { 909 int r; 910 911 if (verbose) { 912 ac_test_show(at); 913 } 914 ac_test_setup_pte(at, pool); 915 r = ac_test_do_access(at); 916 return r; 917 } 918 919 typedef int (*ac_test_fn)(ac_pool_t *pool); 920 const ac_test_fn ac_test_cases[] = 921 { 922 corrupt_hugepage_triger, 923 check_pfec_on_prefetch_pte, 924 check_large_pte_dirty_for_nowp, 925 check_smep_andnot_wp 926 }; 927 928 int ac_test_run(void) 929 { 930 ac_test_t at; 931 ac_pool_t pool; 932 int i, tests, successes; 933 934 printf("run\n"); 935 tests = successes = 0; 936 937 if (cpuid_7_ecx & (1 << 3)) { 938 set_cr4_pke(1); 939 set_cr4_pke(0); 940 /* Now PKRU = 0xFFFFFFFF. */ 941 } else { 942 unsigned long cr4 = read_cr4(); 943 tests++; 944 if (write_cr4_checking(cr4 | X86_CR4_PKE) == GP_VECTOR) { 945 successes++; 946 invalid_mask |= AC_PKU_AD_MASK; 947 invalid_mask |= AC_PKU_WD_MASK; 948 invalid_mask |= AC_PKU_PKEY_MASK; 949 invalid_mask |= AC_CPU_CR4_PKE_MASK; 950 printf("CR4.PKE not available, disabling PKE tests\n"); 951 } else { 952 printf("Set PKE in CR4 - expect #GP: FAIL!\n"); 953 set_cr4_pke(0); 954 } 955 } 956 957 if (!(cpuid_7_ebx & (1 << 7))) { 958 unsigned long cr4 = read_cr4(); 959 tests++; 960 if (write_cr4_checking(cr4 | CR4_SMEP_MASK) == GP_VECTOR) { 961 successes++; 962 invalid_mask |= AC_CPU_CR4_SMEP_MASK; 963 printf("CR4.SMEP not available, disabling SMEP tests\n"); 964 } else { 965 printf("Set SMEP in CR4 - expect #GP: FAIL!\n"); 966 set_cr4_smep(0); 967 } 968 } 969 970 ac_env_int(&pool); 971 ac_test_init(&at, (void *)(0x123400000000 + 16 * smp_id())); 972 do { 973 ++tests; 974 successes += ac_test_exec(&at, &pool); 975 } while (ac_test_bump(&at)); 976 977 for (i = 0; i < ARRAY_SIZE(ac_test_cases); i++) { 978 ++tests; 979 successes += ac_test_cases[i](&pool); 980 } 981 982 printf("\n%d tests, %d failures\n", tests, tests - successes); 983 984 return successes == tests; 985 } 986 987 int main() 988 { 989 int r; 990 991 setup_idt(); 992 993 cpuid_7_ebx = cpuid(7).b; 994 cpuid_7_ecx = cpuid(7).c; 995 996 printf("starting test\n\n"); 997 r = ac_test_run(); 998 return r ? 0 : 1; 999 } 1000