1 2 #include "libcflat.h" 3 #include "desc.h" 4 #include "processor.h" 5 6 #define smp_id() 0 7 8 #define true 1 9 #define false 0 10 11 static _Bool verbose = false; 12 13 typedef unsigned long pt_element_t; 14 static int cpuid_7_ebx; 15 static int cpuid_7_ecx; 16 static int invalid_mask; 17 18 #define PAGE_SIZE ((pt_element_t)4096) 19 #define PAGE_MASK (~(PAGE_SIZE-1)) 20 21 #define PT_BASE_ADDR_MASK ((pt_element_t)((((pt_element_t)1 << 40) - 1) & PAGE_MASK)) 22 #define PT_PSE_BASE_ADDR_MASK (PT_BASE_ADDR_MASK & ~(1ull << 21)) 23 24 #define PT_PRESENT_MASK ((pt_element_t)1 << 0) 25 #define PT_WRITABLE_MASK ((pt_element_t)1 << 1) 26 #define PT_USER_MASK ((pt_element_t)1 << 2) 27 #define PT_ACCESSED_MASK ((pt_element_t)1 << 5) 28 #define PT_DIRTY_MASK ((pt_element_t)1 << 6) 29 #define PT_PSE_MASK ((pt_element_t)1 << 7) 30 #define PT_NX_MASK ((pt_element_t)1 << 63) 31 32 #define CR0_WP_MASK (1UL << 16) 33 #define CR4_SMEP_MASK (1UL << 20) 34 35 #define PFERR_PRESENT_MASK (1U << 0) 36 #define PFERR_WRITE_MASK (1U << 1) 37 #define PFERR_USER_MASK (1U << 2) 38 #define PFERR_RESERVED_MASK (1U << 3) 39 #define PFERR_FETCH_MASK (1U << 4) 40 #define PFERR_PK_MASK (1U << 5) 41 42 #define MSR_EFER 0xc0000080 43 #define EFER_NX_MASK (1ull << 11) 44 45 #define PT_INDEX(address, level) \ 46 ((address) >> (12 + ((level)-1) * 9)) & 511 47 48 /* 49 * page table access check tests 50 */ 51 52 enum { 53 AC_PTE_PRESENT_BIT, 54 AC_PTE_WRITABLE_BIT, 55 AC_PTE_USER_BIT, 56 AC_PTE_ACCESSED_BIT, 57 AC_PTE_DIRTY_BIT, 58 AC_PTE_NX_BIT, 59 AC_PTE_BIT51_BIT, 60 61 AC_PDE_PRESENT_BIT, 62 AC_PDE_WRITABLE_BIT, 63 AC_PDE_USER_BIT, 64 AC_PDE_ACCESSED_BIT, 65 AC_PDE_DIRTY_BIT, 66 AC_PDE_PSE_BIT, 67 AC_PDE_NX_BIT, 68 AC_PDE_BIT51_BIT, 69 AC_PDE_BIT13_BIT, 70 71 AC_PKU_AD_BIT, 72 AC_PKU_WD_BIT, 73 AC_PKU_PKEY_BIT, 74 75 AC_ACCESS_USER_BIT, 76 AC_ACCESS_WRITE_BIT, 77 AC_ACCESS_FETCH_BIT, 78 AC_ACCESS_TWICE_BIT, 79 80 AC_CPU_EFER_NX_BIT, 81 AC_CPU_CR0_WP_BIT, 82 AC_CPU_CR4_SMEP_BIT, 83 AC_CPU_CR4_PKE_BIT, 84 85 NR_AC_FLAGS 86 }; 87 88 #define AC_PTE_PRESENT_MASK (1 << AC_PTE_PRESENT_BIT) 89 #define AC_PTE_WRITABLE_MASK (1 << AC_PTE_WRITABLE_BIT) 90 #define AC_PTE_USER_MASK (1 << AC_PTE_USER_BIT) 91 #define AC_PTE_ACCESSED_MASK (1 << AC_PTE_ACCESSED_BIT) 92 #define AC_PTE_DIRTY_MASK (1 << AC_PTE_DIRTY_BIT) 93 #define AC_PTE_NX_MASK (1 << AC_PTE_NX_BIT) 94 #define AC_PTE_BIT51_MASK (1 << AC_PTE_BIT51_BIT) 95 96 #define AC_PDE_PRESENT_MASK (1 << AC_PDE_PRESENT_BIT) 97 #define AC_PDE_WRITABLE_MASK (1 << AC_PDE_WRITABLE_BIT) 98 #define AC_PDE_USER_MASK (1 << AC_PDE_USER_BIT) 99 #define AC_PDE_ACCESSED_MASK (1 << AC_PDE_ACCESSED_BIT) 100 #define AC_PDE_DIRTY_MASK (1 << AC_PDE_DIRTY_BIT) 101 #define AC_PDE_PSE_MASK (1 << AC_PDE_PSE_BIT) 102 #define AC_PDE_NX_MASK (1 << AC_PDE_NX_BIT) 103 #define AC_PDE_BIT51_MASK (1 << AC_PDE_BIT51_BIT) 104 #define AC_PDE_BIT13_MASK (1 << AC_PDE_BIT13_BIT) 105 106 #define AC_PKU_AD_MASK (1 << AC_PKU_AD_BIT) 107 #define AC_PKU_WD_MASK (1 << AC_PKU_WD_BIT) 108 #define AC_PKU_PKEY_MASK (1 << AC_PKU_PKEY_BIT) 109 110 #define AC_ACCESS_USER_MASK (1 << AC_ACCESS_USER_BIT) 111 #define AC_ACCESS_WRITE_MASK (1 << AC_ACCESS_WRITE_BIT) 112 #define AC_ACCESS_FETCH_MASK (1 << AC_ACCESS_FETCH_BIT) 113 #define AC_ACCESS_TWICE_MASK (1 << AC_ACCESS_TWICE_BIT) 114 115 #define AC_CPU_EFER_NX_MASK (1 << AC_CPU_EFER_NX_BIT) 116 #define AC_CPU_CR0_WP_MASK (1 << AC_CPU_CR0_WP_BIT) 117 #define AC_CPU_CR4_SMEP_MASK (1 << AC_CPU_CR4_SMEP_BIT) 118 #define AC_CPU_CR4_PKE_MASK (1 << AC_CPU_CR4_PKE_BIT) 119 120 const char *ac_names[] = { 121 [AC_PTE_PRESENT_BIT] = "pte.p", 122 [AC_PTE_ACCESSED_BIT] = "pte.a", 123 [AC_PTE_WRITABLE_BIT] = "pte.rw", 124 [AC_PTE_USER_BIT] = "pte.user", 125 [AC_PTE_DIRTY_BIT] = "pte.d", 126 [AC_PTE_NX_BIT] = "pte.nx", 127 [AC_PTE_BIT51_BIT] = "pte.51", 128 [AC_PDE_PRESENT_BIT] = "pde.p", 129 [AC_PDE_ACCESSED_BIT] = "pde.a", 130 [AC_PDE_WRITABLE_BIT] = "pde.rw", 131 [AC_PDE_USER_BIT] = "pde.user", 132 [AC_PDE_DIRTY_BIT] = "pde.d", 133 [AC_PDE_PSE_BIT] = "pde.pse", 134 [AC_PDE_NX_BIT] = "pde.nx", 135 [AC_PDE_BIT51_BIT] = "pde.51", 136 [AC_PDE_BIT13_BIT] = "pde.13", 137 [AC_PKU_AD_BIT] = "pkru.ad", 138 [AC_PKU_WD_BIT] = "pkru.wd", 139 [AC_PKU_PKEY_BIT] = "pkey=1", 140 [AC_ACCESS_WRITE_BIT] = "write", 141 [AC_ACCESS_USER_BIT] = "user", 142 [AC_ACCESS_FETCH_BIT] = "fetch", 143 [AC_ACCESS_TWICE_BIT] = "twice", 144 [AC_CPU_EFER_NX_BIT] = "efer.nx", 145 [AC_CPU_CR0_WP_BIT] = "cr0.wp", 146 [AC_CPU_CR4_SMEP_BIT] = "cr4.smep", 147 [AC_CPU_CR4_PKE_BIT] = "cr4.pke", 148 }; 149 150 static inline void *va(pt_element_t phys) 151 { 152 return (void *)phys; 153 } 154 155 typedef struct { 156 pt_element_t pt_pool; 157 unsigned pt_pool_size; 158 unsigned pt_pool_current; 159 } ac_pool_t; 160 161 typedef struct { 162 unsigned flags; 163 void *virt; 164 pt_element_t phys; 165 pt_element_t *ptep; 166 pt_element_t expected_pte; 167 pt_element_t *pdep; 168 pt_element_t expected_pde; 169 pt_element_t ignore_pde; 170 int expected_fault; 171 unsigned expected_error; 172 } ac_test_t; 173 174 typedef struct { 175 unsigned short limit; 176 unsigned long linear_addr; 177 } __attribute__((packed)) descriptor_table_t; 178 179 180 static void ac_test_show(ac_test_t *at); 181 182 int write_cr4_checking(unsigned long val) 183 { 184 asm volatile(ASM_TRY("1f") 185 "mov %0,%%cr4\n\t" 186 "1:": : "r" (val)); 187 return exception_vector(); 188 } 189 190 void set_cr0_wp(int wp) 191 { 192 unsigned long cr0 = read_cr0(); 193 unsigned long old_cr0 = cr0; 194 195 cr0 &= ~CR0_WP_MASK; 196 if (wp) 197 cr0 |= CR0_WP_MASK; 198 if (old_cr0 != cr0) 199 write_cr0(cr0); 200 } 201 202 void set_cr4_smep(int smep) 203 { 204 unsigned long cr4 = read_cr4(); 205 unsigned long old_cr4 = cr4; 206 extern u64 ptl2[]; 207 208 cr4 &= ~CR4_SMEP_MASK; 209 if (smep) 210 cr4 |= CR4_SMEP_MASK; 211 if (old_cr4 == cr4) 212 return; 213 214 if (smep) 215 ptl2[2] &= ~PT_USER_MASK; 216 write_cr4(cr4); 217 if (!smep) 218 ptl2[2] |= PT_USER_MASK; 219 } 220 221 void set_cr4_pke(int pke) 222 { 223 unsigned long cr4 = read_cr4(); 224 unsigned long old_cr4 = cr4; 225 226 cr4 &= ~X86_CR4_PKE; 227 if (pke) 228 cr4 |= X86_CR4_PKE; 229 if (old_cr4 == cr4) 230 return; 231 232 /* Check that protection keys do not affect accesses when CR4.PKE=0. */ 233 if ((read_cr4() & X86_CR4_PKE) && !pke) { 234 write_pkru(0xfffffffc); 235 } 236 write_cr4(cr4); 237 } 238 239 void set_efer_nx(int nx) 240 { 241 unsigned long long efer = rdmsr(MSR_EFER); 242 unsigned long long old_efer = efer; 243 244 efer &= ~EFER_NX_MASK; 245 if (nx) 246 efer |= EFER_NX_MASK; 247 if (old_efer != efer) 248 wrmsr(MSR_EFER, efer); 249 } 250 251 static void ac_env_int(ac_pool_t *pool) 252 { 253 extern char page_fault, kernel_entry; 254 set_idt_entry(14, &page_fault, 0); 255 set_idt_entry(0x20, &kernel_entry, 3); 256 257 pool->pt_pool = 33 * 1024 * 1024; 258 pool->pt_pool_size = 120 * 1024 * 1024 - pool->pt_pool; 259 pool->pt_pool_current = 0; 260 } 261 262 void ac_test_init(ac_test_t *at, void *virt) 263 { 264 wrmsr(MSR_EFER, rdmsr(MSR_EFER) | EFER_NX_MASK); 265 set_cr0_wp(1); 266 at->flags = 0; 267 at->virt = virt; 268 at->phys = 32 * 1024 * 1024; 269 } 270 271 int ac_test_bump_one(ac_test_t *at) 272 { 273 at->flags = ((at->flags | invalid_mask) + 1) & ~invalid_mask; 274 return at->flags < (1 << NR_AC_FLAGS); 275 } 276 277 #define F(x) ((flags & x##_MASK) != 0) 278 279 _Bool ac_test_legal(ac_test_t *at) 280 { 281 int flags = at->flags; 282 283 if (F(AC_ACCESS_FETCH) && F(AC_ACCESS_WRITE)) 284 return false; 285 286 /* 287 * Since we convert current page to kernel page when cr4.smep=1, 288 * we can't switch to user mode. 289 */ 290 if (F(AC_ACCESS_USER) && F(AC_CPU_CR4_SMEP)) 291 return false; 292 293 /* 294 * Only test protection key faults if CR4.PKE=1. 295 */ 296 if (!F(AC_CPU_CR4_PKE) && 297 (F(AC_PKU_AD) || F(AC_PKU_WD))) { 298 return false; 299 } 300 301 /* 302 * pde.bit13 checks handling of reserved bits in largepage PDEs. It is 303 * meaningless if there is a PTE. 304 */ 305 if (!F(AC_PDE_PSE) && F(AC_PDE_BIT13)) 306 return false; 307 308 return true; 309 } 310 311 int ac_test_bump(ac_test_t *at) 312 { 313 int ret; 314 315 ret = ac_test_bump_one(at); 316 while (ret && !ac_test_legal(at)) 317 ret = ac_test_bump_one(at); 318 return ret; 319 } 320 321 pt_element_t ac_test_alloc_pt(ac_pool_t *pool) 322 { 323 pt_element_t ret = pool->pt_pool + pool->pt_pool_current; 324 pool->pt_pool_current += PAGE_SIZE; 325 return ret; 326 } 327 328 _Bool ac_test_enough_room(ac_pool_t *pool) 329 { 330 return pool->pt_pool_current + 4 * PAGE_SIZE <= pool->pt_pool_size; 331 } 332 333 void ac_test_reset_pt_pool(ac_pool_t *pool) 334 { 335 pool->pt_pool_current = 0; 336 } 337 338 pt_element_t ac_test_permissions(ac_test_t *at, unsigned flags, bool writable, 339 bool user, bool executable) 340 { 341 bool kwritable = !F(AC_CPU_CR0_WP) && !F(AC_ACCESS_USER); 342 pt_element_t expected = 0; 343 344 if (F(AC_ACCESS_USER) && !user) 345 at->expected_fault = 1; 346 347 if (F(AC_ACCESS_WRITE) && !writable && !kwritable) 348 at->expected_fault = 1; 349 350 if (F(AC_ACCESS_FETCH) && !executable) 351 at->expected_fault = 1; 352 353 if (F(AC_ACCESS_FETCH) && user && F(AC_CPU_CR4_SMEP)) 354 at->expected_fault = 1; 355 356 if (user && !F(AC_ACCESS_FETCH) && F(AC_PKU_PKEY) && F(AC_CPU_CR4_PKE)) { 357 if (F(AC_PKU_AD)) { 358 at->expected_fault = 1; 359 at->expected_error |= PFERR_PK_MASK; 360 } else if (F(AC_ACCESS_WRITE) && F(AC_PKU_WD) && !kwritable) { 361 at->expected_fault = 1; 362 at->expected_error |= PFERR_PK_MASK; 363 } 364 } 365 366 if (!at->expected_fault) { 367 expected |= PT_ACCESSED_MASK; 368 if (F(AC_ACCESS_WRITE)) 369 expected |= PT_DIRTY_MASK; 370 } 371 372 return expected; 373 } 374 375 void ac_emulate_access(ac_test_t *at, unsigned flags) 376 { 377 bool pde_valid, pte_valid; 378 bool user, writable, executable; 379 380 if (F(AC_ACCESS_USER)) 381 at->expected_error |= PFERR_USER_MASK; 382 383 if (F(AC_ACCESS_WRITE)) 384 at->expected_error |= PFERR_WRITE_MASK; 385 386 if (F(AC_ACCESS_FETCH)) 387 at->expected_error |= PFERR_FETCH_MASK; 388 389 if (!F(AC_PDE_ACCESSED)) 390 at->ignore_pde = PT_ACCESSED_MASK; 391 392 pde_valid = F(AC_PDE_PRESENT) 393 && !F(AC_PDE_BIT51) && !F(AC_PDE_BIT13) 394 && !(F(AC_PDE_NX) && !F(AC_CPU_EFER_NX)); 395 396 if (!pde_valid) { 397 at->expected_fault = 1; 398 if (F(AC_PDE_PRESENT)) { 399 at->expected_error |= PFERR_RESERVED_MASK; 400 } else { 401 at->expected_error &= ~PFERR_PRESENT_MASK; 402 } 403 goto fault; 404 } 405 406 writable = F(AC_PDE_WRITABLE); 407 user = F(AC_PDE_USER); 408 executable = !F(AC_PDE_NX); 409 410 if (F(AC_PDE_PSE)) { 411 at->expected_pde |= ac_test_permissions(at, flags, writable, user, 412 executable); 413 goto no_pte; 414 } 415 416 at->expected_pde |= PT_ACCESSED_MASK; 417 418 pte_valid = F(AC_PTE_PRESENT) 419 && !F(AC_PTE_BIT51) 420 && !(F(AC_PTE_NX) && !F(AC_CPU_EFER_NX)); 421 422 if (!pte_valid) { 423 at->expected_fault = 1; 424 if (F(AC_PTE_PRESENT)) { 425 at->expected_error |= PFERR_RESERVED_MASK; 426 } else { 427 at->expected_error &= ~PFERR_PRESENT_MASK; 428 } 429 goto fault; 430 } 431 432 writable &= F(AC_PTE_WRITABLE); 433 user &= F(AC_PTE_USER); 434 executable &= !F(AC_PTE_NX); 435 436 at->expected_pte |= ac_test_permissions(at, flags, writable, user, 437 executable); 438 439 no_pte: 440 fault: 441 if (!at->expected_fault) 442 at->ignore_pde = 0; 443 if (!F(AC_CPU_EFER_NX) && !F(AC_CPU_CR4_SMEP)) 444 at->expected_error &= ~PFERR_FETCH_MASK; 445 } 446 447 void ac_set_expected_status(ac_test_t *at) 448 { 449 invlpg(at->virt); 450 451 if (at->ptep) 452 at->expected_pte = *at->ptep; 453 at->expected_pde = *at->pdep; 454 at->ignore_pde = 0; 455 at->expected_fault = 0; 456 at->expected_error = PFERR_PRESENT_MASK; 457 458 if (at->flags & AC_ACCESS_TWICE_MASK) { 459 ac_emulate_access(at, at->flags & ~AC_ACCESS_WRITE_MASK 460 & ~AC_ACCESS_FETCH_MASK & ~AC_ACCESS_USER_MASK); 461 at->expected_fault = 0; 462 at->expected_error = PFERR_PRESENT_MASK; 463 at->ignore_pde = 0; 464 } 465 466 ac_emulate_access(at, at->flags); 467 } 468 469 void __ac_setup_specific_pages(ac_test_t *at, ac_pool_t *pool, u64 pd_page, 470 u64 pt_page) 471 472 { 473 unsigned long root = read_cr3(); 474 int flags = at->flags; 475 476 if (!ac_test_enough_room(pool)) 477 ac_test_reset_pt_pool(pool); 478 479 at->ptep = 0; 480 for (int i = 4; i >= 1 && (i >= 2 || !F(AC_PDE_PSE)); --i) { 481 pt_element_t *vroot = va(root & PT_BASE_ADDR_MASK); 482 unsigned index = PT_INDEX((unsigned long)at->virt, i); 483 pt_element_t pte = 0; 484 switch (i) { 485 case 4: 486 case 3: 487 pte = pd_page ? pd_page : ac_test_alloc_pt(pool); 488 pte |= PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK; 489 break; 490 case 2: 491 if (!F(AC_PDE_PSE)) { 492 pte = pt_page ? pt_page : ac_test_alloc_pt(pool); 493 /* The protection key is ignored on non-leaf entries. */ 494 if (F(AC_PKU_PKEY)) 495 pte |= 2ull << 59; 496 } else { 497 pte = at->phys & PT_PSE_BASE_ADDR_MASK; 498 pte |= PT_PSE_MASK; 499 if (F(AC_PKU_PKEY)) 500 pte |= 1ull << 59; 501 } 502 if (F(AC_PDE_PRESENT)) 503 pte |= PT_PRESENT_MASK; 504 if (F(AC_PDE_WRITABLE)) 505 pte |= PT_WRITABLE_MASK; 506 if (F(AC_PDE_USER)) 507 pte |= PT_USER_MASK; 508 if (F(AC_PDE_ACCESSED)) 509 pte |= PT_ACCESSED_MASK; 510 if (F(AC_PDE_DIRTY)) 511 pte |= PT_DIRTY_MASK; 512 if (F(AC_PDE_NX)) 513 pte |= PT_NX_MASK; 514 if (F(AC_PDE_BIT51)) 515 pte |= 1ull << 51; 516 if (F(AC_PDE_BIT13)) 517 pte |= 1ull << 13; 518 at->pdep = &vroot[index]; 519 break; 520 case 1: 521 pte = at->phys & PT_BASE_ADDR_MASK; 522 if (F(AC_PKU_PKEY)) 523 pte |= 1ull << 59; 524 if (F(AC_PTE_PRESENT)) 525 pte |= PT_PRESENT_MASK; 526 if (F(AC_PTE_WRITABLE)) 527 pte |= PT_WRITABLE_MASK; 528 if (F(AC_PTE_USER)) 529 pte |= PT_USER_MASK; 530 if (F(AC_PTE_ACCESSED)) 531 pte |= PT_ACCESSED_MASK; 532 if (F(AC_PTE_DIRTY)) 533 pte |= PT_DIRTY_MASK; 534 if (F(AC_PTE_NX)) 535 pte |= PT_NX_MASK; 536 if (F(AC_PTE_BIT51)) 537 pte |= 1ull << 51; 538 at->ptep = &vroot[index]; 539 break; 540 } 541 vroot[index] = pte; 542 root = vroot[index]; 543 } 544 ac_set_expected_status(at); 545 } 546 547 static void ac_test_setup_pte(ac_test_t *at, ac_pool_t *pool) 548 { 549 __ac_setup_specific_pages(at, pool, 0, 0); 550 } 551 552 static void ac_setup_specific_pages(ac_test_t *at, ac_pool_t *pool, 553 u64 pd_page, u64 pt_page) 554 { 555 return __ac_setup_specific_pages(at, pool, pd_page, pt_page); 556 } 557 558 static void dump_mapping(ac_test_t *at) 559 { 560 unsigned long root = read_cr3(); 561 int flags = at->flags; 562 int i; 563 564 printf("Dump mapping: address: %p\n", at->virt); 565 for (i = 4; i >= 1 && (i >= 2 || !F(AC_PDE_PSE)); --i) { 566 pt_element_t *vroot = va(root & PT_BASE_ADDR_MASK); 567 unsigned index = PT_INDEX((unsigned long)at->virt, i); 568 pt_element_t pte = vroot[index]; 569 570 printf("------L%d: %lx\n", i, pte); 571 root = vroot[index]; 572 } 573 } 574 575 static void ac_test_check(ac_test_t *at, _Bool *success_ret, _Bool cond, 576 const char *fmt, ...) 577 { 578 va_list ap; 579 char buf[500]; 580 581 if (!*success_ret) { 582 return; 583 } 584 585 if (!cond) { 586 return; 587 } 588 589 *success_ret = false; 590 591 if (!verbose) { 592 puts("\n"); 593 ac_test_show(at); 594 } 595 596 va_start(ap, fmt); 597 vsnprintf(buf, sizeof(buf), fmt, ap); 598 va_end(ap); 599 printf("FAIL: %s\n", buf); 600 dump_mapping(at); 601 } 602 603 static int pt_match(pt_element_t pte1, pt_element_t pte2, pt_element_t ignore) 604 { 605 pte1 &= ~ignore; 606 pte2 &= ~ignore; 607 return pte1 == pte2; 608 } 609 610 int ac_test_do_access(ac_test_t *at) 611 { 612 static unsigned unique = 42; 613 int fault = 0; 614 unsigned e; 615 static unsigned char user_stack[4096]; 616 unsigned long rsp; 617 _Bool success = true; 618 int flags = at->flags; 619 620 ++unique; 621 if (!(unique & 65535)) { 622 puts("."); 623 } 624 625 *((unsigned char *)at->phys) = 0xc3; /* ret */ 626 627 unsigned r = unique; 628 set_cr0_wp(F(AC_CPU_CR0_WP)); 629 set_efer_nx(F(AC_CPU_EFER_NX)); 630 set_cr4_pke(F(AC_CPU_CR4_PKE)); 631 if (F(AC_CPU_CR4_PKE)) { 632 /* WD2=AD2=1, WD1=F(AC_PKU_WD), AD1=F(AC_PKU_AD) */ 633 write_pkru(0x30 | (F(AC_PKU_WD) ? 8 : 0) | 634 (F(AC_PKU_AD) ? 4 : 0)); 635 } 636 637 set_cr4_smep(F(AC_CPU_CR4_SMEP)); 638 639 if (F(AC_ACCESS_TWICE)) { 640 asm volatile ( 641 "mov $fixed2, %%rsi \n\t" 642 "mov (%[addr]), %[reg] \n\t" 643 "fixed2:" 644 : [reg]"=r"(r), [fault]"=a"(fault), "=b"(e) 645 : [addr]"r"(at->virt) 646 : "rsi" 647 ); 648 fault = 0; 649 } 650 651 asm volatile ("mov $fixed1, %%rsi \n\t" 652 "mov %%rsp, %%rdx \n\t" 653 "cmp $0, %[user] \n\t" 654 "jz do_access \n\t" 655 "push %%rax; mov %[user_ds], %%ax; mov %%ax, %%ds; pop %%rax \n\t" 656 "pushq %[user_ds] \n\t" 657 "pushq %[user_stack_top] \n\t" 658 "pushfq \n\t" 659 "pushq %[user_cs] \n\t" 660 "pushq $do_access \n\t" 661 "iretq \n" 662 "do_access: \n\t" 663 "cmp $0, %[fetch] \n\t" 664 "jnz 2f \n\t" 665 "cmp $0, %[write] \n\t" 666 "jnz 1f \n\t" 667 "mov (%[addr]), %[reg] \n\t" 668 "jmp done \n\t" 669 "1: mov %[reg], (%[addr]) \n\t" 670 "jmp done \n\t" 671 "2: call *%[addr] \n\t" 672 "done: \n" 673 "fixed1: \n" 674 "int %[kernel_entry_vector] \n\t" 675 "back_to_kernel:" 676 : [reg]"+r"(r), "+a"(fault), "=b"(e), "=&d"(rsp) 677 : [addr]"r"(at->virt), 678 [write]"r"(F(AC_ACCESS_WRITE)), 679 [user]"r"(F(AC_ACCESS_USER)), 680 [fetch]"r"(F(AC_ACCESS_FETCH)), 681 [user_ds]"i"(USER_DS), 682 [user_cs]"i"(USER_CS), 683 [user_stack_top]"r"(user_stack + sizeof user_stack), 684 [kernel_entry_vector]"i"(0x20) 685 : "rsi"); 686 687 asm volatile (".section .text.pf \n\t" 688 "page_fault: \n\t" 689 "pop %rbx \n\t" 690 "mov %rsi, (%rsp) \n\t" 691 "movl $1, %eax \n\t" 692 "iretq \n\t" 693 ".section .text"); 694 695 asm volatile (".section .text.entry \n\t" 696 "kernel_entry: \n\t" 697 "mov %rdx, %rsp \n\t" 698 "jmp back_to_kernel \n\t" 699 ".section .text"); 700 701 ac_test_check(at, &success, fault && !at->expected_fault, 702 "unexpected fault"); 703 ac_test_check(at, &success, !fault && at->expected_fault, 704 "unexpected access"); 705 ac_test_check(at, &success, fault && e != at->expected_error, 706 "error code %x expected %x", e, at->expected_error); 707 ac_test_check(at, &success, at->ptep && *at->ptep != at->expected_pte, 708 "pte %x expected %x", *at->ptep, at->expected_pte); 709 ac_test_check(at, &success, 710 !pt_match(*at->pdep, at->expected_pde, at->ignore_pde), 711 "pde %x expected %x", *at->pdep, at->expected_pde); 712 713 if (success && verbose) { 714 if (at->expected_fault) { 715 printf("PASS (%x)\n", at->expected_error); 716 } else { 717 printf("PASS\n"); 718 } 719 } 720 return success; 721 } 722 723 static void ac_test_show(ac_test_t *at) 724 { 725 char line[5000]; 726 727 *line = 0; 728 strcat(line, "test"); 729 for (int i = 0; i < NR_AC_FLAGS; ++i) 730 if (at->flags & (1 << i)) { 731 strcat(line, " "); 732 strcat(line, ac_names[i]); 733 } 734 strcat(line, ": "); 735 printf("%s", line); 736 } 737 738 /* 739 * This test case is used to triger the bug which is fixed by 740 * commit e09e90a5 in the kvm tree 741 */ 742 static int corrupt_hugepage_triger(ac_pool_t *pool) 743 { 744 ac_test_t at1, at2; 745 746 ac_test_init(&at1, (void *)(0x123400000000)); 747 ac_test_init(&at2, (void *)(0x666600000000)); 748 749 at2.flags = AC_CPU_CR0_WP_MASK | AC_PDE_PSE_MASK | AC_PDE_PRESENT_MASK; 750 ac_test_setup_pte(&at2, pool); 751 if (!ac_test_do_access(&at2)) 752 goto err; 753 754 at1.flags = at2.flags | AC_PDE_WRITABLE_MASK; 755 ac_test_setup_pte(&at1, pool); 756 if (!ac_test_do_access(&at1)) 757 goto err; 758 759 at1.flags |= AC_ACCESS_WRITE_MASK; 760 ac_set_expected_status(&at1); 761 if (!ac_test_do_access(&at1)) 762 goto err; 763 764 at2.flags |= AC_ACCESS_WRITE_MASK; 765 ac_set_expected_status(&at2); 766 if (!ac_test_do_access(&at2)) 767 goto err; 768 769 return 1; 770 771 err: 772 printf("corrupt_hugepage_triger test fail\n"); 773 return 0; 774 } 775 776 /* 777 * This test case is used to triger the bug which is fixed by 778 * commit 3ddf6c06e13e in the kvm tree 779 */ 780 static int check_pfec_on_prefetch_pte(ac_pool_t *pool) 781 { 782 ac_test_t at1, at2; 783 784 ac_test_init(&at1, (void *)(0x123406001000)); 785 ac_test_init(&at2, (void *)(0x123406003000)); 786 787 at1.flags = AC_PDE_PRESENT_MASK | AC_PTE_PRESENT_MASK; 788 ac_setup_specific_pages(&at1, pool, 30 * 1024 * 1024, 30 * 1024 * 1024); 789 790 at2.flags = at1.flags | AC_PTE_NX_MASK; 791 ac_setup_specific_pages(&at2, pool, 30 * 1024 * 1024, 30 * 1024 * 1024); 792 793 if (!ac_test_do_access(&at1)) { 794 printf("%s: prepare fail\n", __FUNCTION__); 795 goto err; 796 } 797 798 if (!ac_test_do_access(&at2)) { 799 printf("%s: check PFEC on prefetch pte path fail\n", 800 __FUNCTION__); 801 goto err; 802 } 803 804 return 1; 805 806 err: 807 return 0; 808 } 809 810 /* 811 * If the write-fault access is from supervisor and CR0.WP is not set on the 812 * vcpu, kvm will fix it by adjusting pte access - it sets the W bit on pte 813 * and clears U bit. This is the chance that kvm can change pte access from 814 * readonly to writable. 815 * 816 * Unfortunately, the pte access is the access of 'direct' shadow page table, 817 * means direct sp.role.access = pte_access, then we will create a writable 818 * spte entry on the readonly shadow page table. It will cause Dirty bit is 819 * not tracked when two guest ptes point to the same large page. Note, it 820 * does not have other impact except Dirty bit since cr0.wp is encoded into 821 * sp.role. 822 * 823 * Note: to trigger this bug, hugepage should be disabled on host. 824 */ 825 static int check_large_pte_dirty_for_nowp(ac_pool_t *pool) 826 { 827 ac_test_t at1, at2; 828 829 ac_test_init(&at1, (void *)(0x123403000000)); 830 ac_test_init(&at2, (void *)(0x666606000000)); 831 832 at2.flags = AC_PDE_PRESENT_MASK | AC_PDE_PSE_MASK; 833 ac_test_setup_pte(&at2, pool); 834 if (!ac_test_do_access(&at2)) { 835 printf("%s: read on the first mapping fail.\n", __FUNCTION__); 836 goto err; 837 } 838 839 at1.flags = at2.flags | AC_ACCESS_WRITE_MASK; 840 ac_test_setup_pte(&at1, pool); 841 if (!ac_test_do_access(&at1)) { 842 printf("%s: write on the second mapping fail.\n", __FUNCTION__); 843 goto err; 844 } 845 846 at2.flags |= AC_ACCESS_WRITE_MASK; 847 ac_set_expected_status(&at2); 848 if (!ac_test_do_access(&at2)) { 849 printf("%s: write on the first mapping fail.\n", __FUNCTION__); 850 goto err; 851 } 852 853 return 1; 854 855 err: 856 return 0; 857 } 858 859 static int check_smep_andnot_wp(ac_pool_t *pool) 860 { 861 ac_test_t at1; 862 int err_prepare_andnot_wp, err_smep_andnot_wp; 863 864 if (!(cpuid_7_ebx & (1 << 7))) { 865 return 1; 866 } 867 868 ac_test_init(&at1, (void *)(0x123406001000)); 869 870 at1.flags = AC_PDE_PRESENT_MASK | AC_PTE_PRESENT_MASK | 871 AC_PDE_USER_MASK | AC_PTE_USER_MASK | 872 AC_PDE_ACCESSED_MASK | AC_PTE_ACCESSED_MASK | 873 AC_CPU_CR4_SMEP_MASK | 874 AC_CPU_CR0_WP_MASK | 875 AC_ACCESS_WRITE_MASK; 876 ac_test_setup_pte(&at1, pool); 877 878 /* 879 * Here we write the ro user page when 880 * cr0.wp=0, then we execute it and SMEP 881 * fault should happen. 882 */ 883 err_prepare_andnot_wp = ac_test_do_access(&at1); 884 if (!err_prepare_andnot_wp) { 885 printf("%s: SMEP prepare fail\n", __FUNCTION__); 886 goto clean_up; 887 } 888 889 at1.flags &= ~AC_ACCESS_WRITE_MASK; 890 at1.flags |= AC_ACCESS_FETCH_MASK; 891 ac_set_expected_status(&at1); 892 err_smep_andnot_wp = ac_test_do_access(&at1); 893 894 clean_up: 895 set_cr4_smep(0); 896 897 if (!err_prepare_andnot_wp) 898 goto err; 899 if (!err_smep_andnot_wp) { 900 printf("%s: check SMEP without wp fail\n", __FUNCTION__); 901 goto err; 902 } 903 return 1; 904 905 err: 906 return 0; 907 } 908 909 int ac_test_exec(ac_test_t *at, ac_pool_t *pool) 910 { 911 int r; 912 913 if (verbose) { 914 ac_test_show(at); 915 } 916 ac_test_setup_pte(at, pool); 917 r = ac_test_do_access(at); 918 return r; 919 } 920 921 typedef int (*ac_test_fn)(ac_pool_t *pool); 922 const ac_test_fn ac_test_cases[] = 923 { 924 corrupt_hugepage_triger, 925 check_pfec_on_prefetch_pte, 926 check_large_pte_dirty_for_nowp, 927 check_smep_andnot_wp 928 }; 929 930 int ac_test_run(void) 931 { 932 ac_test_t at; 933 ac_pool_t pool; 934 int i, tests, successes; 935 936 printf("run\n"); 937 tests = successes = 0; 938 939 if (cpuid_7_ecx & (1 << 3)) { 940 set_cr4_pke(1); 941 set_cr4_pke(0); 942 /* Now PKRU = 0xFFFFFFFF. */ 943 } else { 944 unsigned long cr4 = read_cr4(); 945 tests++; 946 if (write_cr4_checking(cr4 | X86_CR4_PKE) == GP_VECTOR) { 947 successes++; 948 invalid_mask |= AC_PKU_AD_MASK; 949 invalid_mask |= AC_PKU_WD_MASK; 950 invalid_mask |= AC_PKU_PKEY_MASK; 951 invalid_mask |= AC_CPU_CR4_PKE_MASK; 952 printf("CR4.PKE not available, disabling PKE tests\n"); 953 } else { 954 printf("Set PKE in CR4 - expect #GP: FAIL!\n"); 955 set_cr4_pke(0); 956 } 957 } 958 959 if (!(cpuid_7_ebx & (1 << 7))) { 960 unsigned long cr4 = read_cr4(); 961 tests++; 962 if (write_cr4_checking(cr4 | CR4_SMEP_MASK) == GP_VECTOR) { 963 successes++; 964 invalid_mask |= AC_CPU_CR4_SMEP_MASK; 965 printf("CR4.SMEP not available, disabling SMEP tests\n"); 966 } else { 967 printf("Set SMEP in CR4 - expect #GP: FAIL!\n"); 968 set_cr4_smep(0); 969 } 970 } 971 972 ac_env_int(&pool); 973 ac_test_init(&at, (void *)(0x123400000000 + 16 * smp_id())); 974 do { 975 ++tests; 976 successes += ac_test_exec(&at, &pool); 977 } while (ac_test_bump(&at)); 978 979 for (i = 0; i < ARRAY_SIZE(ac_test_cases); i++) { 980 ++tests; 981 successes += ac_test_cases[i](&pool); 982 } 983 984 printf("\n%d tests, %d failures\n", tests, tests - successes); 985 986 return successes == tests; 987 } 988 989 int main() 990 { 991 int r; 992 993 setup_idt(); 994 995 cpuid_7_ebx = cpuid(7).b; 996 cpuid_7_ecx = cpuid(7).c; 997 998 printf("starting test\n\n"); 999 r = ac_test_run(); 1000 return r ? 0 : 1; 1001 } 1002