1 2 #include "libcflat.h" 3 #include "desc.h" 4 #include "processor.h" 5 6 #define smp_id() 0 7 8 #define true 1 9 #define false 0 10 11 static _Bool verbose = false; 12 13 typedef unsigned long pt_element_t; 14 15 #define PAGE_SIZE ((pt_element_t)4096) 16 #define PAGE_MASK (~(PAGE_SIZE-1)) 17 18 #define PT_BASE_ADDR_MASK ((pt_element_t)((((pt_element_t)1 << 40) - 1) & PAGE_MASK)) 19 #define PT_PSE_BASE_ADDR_MASK (PT_BASE_ADDR_MASK & ~(1ull << 21)) 20 21 #define PT_PRESENT_MASK ((pt_element_t)1 << 0) 22 #define PT_WRITABLE_MASK ((pt_element_t)1 << 1) 23 #define PT_USER_MASK ((pt_element_t)1 << 2) 24 #define PT_ACCESSED_MASK ((pt_element_t)1 << 5) 25 #define PT_DIRTY_MASK ((pt_element_t)1 << 6) 26 #define PT_PSE_MASK ((pt_element_t)1 << 7) 27 #define PT_NX_MASK ((pt_element_t)1 << 63) 28 29 #define CR0_WP_MASK (1UL << 16) 30 #define CR4_SMEP_MASK (1UL << 20) 31 32 #define PFERR_PRESENT_MASK (1U << 0) 33 #define PFERR_WRITE_MASK (1U << 1) 34 #define PFERR_USER_MASK (1U << 2) 35 #define PFERR_RESERVED_MASK (1U << 3) 36 #define PFERR_FETCH_MASK (1U << 4) 37 38 #define MSR_EFER 0xc0000080 39 #define EFER_NX_MASK (1ull << 11) 40 41 #define PT_INDEX(address, level) \ 42 ((address) >> (12 + ((level)-1) * 9)) & 511 43 44 /* 45 * page table access check tests 46 */ 47 48 enum { 49 AC_PTE_PRESENT, 50 AC_PTE_WRITABLE, 51 AC_PTE_USER, 52 AC_PTE_ACCESSED, 53 AC_PTE_DIRTY, 54 AC_PTE_NX, 55 AC_PTE_BIT51, 56 57 AC_PDE_PRESENT, 58 AC_PDE_WRITABLE, 59 AC_PDE_USER, 60 AC_PDE_ACCESSED, 61 AC_PDE_DIRTY, 62 AC_PDE_PSE, 63 AC_PDE_NX, 64 AC_PDE_BIT51, 65 AC_PDE_BIT13, 66 67 AC_ACCESS_USER, 68 AC_ACCESS_WRITE, 69 AC_ACCESS_FETCH, 70 AC_ACCESS_TWICE, 71 // AC_ACCESS_PTE, 72 73 AC_CPU_EFER_NX, 74 AC_CPU_CR0_WP, 75 AC_CPU_CR4_SMEP, 76 77 NR_AC_FLAGS 78 }; 79 80 const char *ac_names[] = { 81 [AC_PTE_PRESENT] = "pte.p", 82 [AC_PTE_ACCESSED] = "pte.a", 83 [AC_PTE_WRITABLE] = "pte.rw", 84 [AC_PTE_USER] = "pte.user", 85 [AC_PTE_DIRTY] = "pte.d", 86 [AC_PTE_NX] = "pte.nx", 87 [AC_PTE_BIT51] = "pte.51", 88 [AC_PDE_PRESENT] = "pde.p", 89 [AC_PDE_ACCESSED] = "pde.a", 90 [AC_PDE_WRITABLE] = "pde.rw", 91 [AC_PDE_USER] = "pde.user", 92 [AC_PDE_DIRTY] = "pde.d", 93 [AC_PDE_PSE] = "pde.pse", 94 [AC_PDE_NX] = "pde.nx", 95 [AC_PDE_BIT51] = "pde.51", 96 [AC_PDE_BIT13] = "pde.13", 97 [AC_ACCESS_WRITE] = "write", 98 [AC_ACCESS_USER] = "user", 99 [AC_ACCESS_FETCH] = "fetch", 100 [AC_ACCESS_TWICE] = "twice", 101 [AC_CPU_EFER_NX] = "efer.nx", 102 [AC_CPU_CR0_WP] = "cr0.wp", 103 [AC_CPU_CR4_SMEP] = "cr4.smep", 104 }; 105 106 static inline void *va(pt_element_t phys) 107 { 108 return (void *)phys; 109 } 110 111 typedef struct { 112 pt_element_t pt_pool; 113 unsigned pt_pool_size; 114 unsigned pt_pool_current; 115 } ac_pool_t; 116 117 typedef struct { 118 unsigned flags[NR_AC_FLAGS]; 119 void *virt; 120 pt_element_t phys; 121 pt_element_t *ptep; 122 pt_element_t expected_pte; 123 pt_element_t *pdep; 124 pt_element_t expected_pde; 125 pt_element_t ignore_pde; 126 int expected_fault; 127 unsigned expected_error; 128 } ac_test_t; 129 130 typedef struct { 131 unsigned short limit; 132 unsigned long linear_addr; 133 } __attribute__((packed)) descriptor_table_t; 134 135 136 static void ac_test_show(ac_test_t *at); 137 138 int write_cr4_checking(unsigned long val) 139 { 140 asm volatile(ASM_TRY("1f") 141 "mov %0,%%cr4\n\t" 142 "1:": : "r" (val)); 143 return exception_vector(); 144 } 145 146 void set_cr0_wp(int wp) 147 { 148 unsigned long cr0 = read_cr0(); 149 150 cr0 &= ~CR0_WP_MASK; 151 if (wp) 152 cr0 |= CR0_WP_MASK; 153 write_cr0(cr0); 154 } 155 156 void set_cr4_smep(int smep) 157 { 158 unsigned long cr4 = read_cr4(); 159 160 cr4 &= ~CR4_SMEP_MASK; 161 if (smep) 162 cr4 |= CR4_SMEP_MASK; 163 write_cr4(cr4); 164 } 165 166 void set_efer_nx(int nx) 167 { 168 unsigned long long efer; 169 170 efer = rdmsr(MSR_EFER); 171 efer &= ~EFER_NX_MASK; 172 if (nx) 173 efer |= EFER_NX_MASK; 174 wrmsr(MSR_EFER, efer); 175 } 176 177 static void ac_env_int(ac_pool_t *pool) 178 { 179 setup_idt(); 180 181 extern char page_fault, kernel_entry; 182 set_idt_entry(14, &page_fault, 0); 183 set_idt_entry(0x20, &kernel_entry, 3); 184 185 pool->pt_pool = 33 * 1024 * 1024; 186 pool->pt_pool_size = 120 * 1024 * 1024 - pool->pt_pool; 187 pool->pt_pool_current = 0; 188 } 189 190 void ac_test_init(ac_test_t *at, void *virt) 191 { 192 wrmsr(MSR_EFER, rdmsr(MSR_EFER) | EFER_NX_MASK); 193 set_cr0_wp(1); 194 for (int i = 0; i < NR_AC_FLAGS; ++i) 195 at->flags[i] = 0; 196 at->virt = virt; 197 at->phys = 32 * 1024 * 1024; 198 } 199 200 int ac_test_bump_one(ac_test_t *at) 201 { 202 for (int i = 0; i < NR_AC_FLAGS; ++i) 203 if (!at->flags[i]) { 204 at->flags[i] = 1; 205 return 1; 206 } else 207 at->flags[i] = 0; 208 return 0; 209 } 210 211 _Bool ac_test_legal(ac_test_t *at) 212 { 213 if (at->flags[AC_ACCESS_FETCH] && at->flags[AC_ACCESS_WRITE]) 214 return false; 215 216 /* 217 * Since we convert current page to kernel page when cr4.smep=1, 218 * we can't switch to user mode. 219 */ 220 if (at->flags[AC_ACCESS_USER] && at->flags[AC_CPU_CR4_SMEP]) 221 return false; 222 223 /* 224 * pde.bit13 checks handling of reserved bits in largepage PDEs. It is 225 * meaningless if there is a PTE. 226 */ 227 if (!at->flags[AC_PDE_PSE] && at->flags[AC_PDE_BIT13]) 228 return false; 229 230 return true; 231 } 232 233 int ac_test_bump(ac_test_t *at) 234 { 235 int ret; 236 237 ret = ac_test_bump_one(at); 238 while (ret && !ac_test_legal(at)) 239 ret = ac_test_bump_one(at); 240 return ret; 241 } 242 243 pt_element_t ac_test_alloc_pt(ac_pool_t *pool) 244 { 245 pt_element_t ret = pool->pt_pool + pool->pt_pool_current; 246 pool->pt_pool_current += PAGE_SIZE; 247 return ret; 248 } 249 250 _Bool ac_test_enough_room(ac_pool_t *pool) 251 { 252 return pool->pt_pool_current + 4 * PAGE_SIZE <= pool->pt_pool_size; 253 } 254 255 void ac_test_reset_pt_pool(ac_pool_t *pool) 256 { 257 pool->pt_pool_current = 0; 258 } 259 260 void ac_set_expected_status(ac_test_t *at) 261 { 262 int pde_valid, pte_valid; 263 264 invlpg(at->virt); 265 266 if (at->ptep) 267 at->expected_pte = *at->ptep; 268 at->expected_pde = *at->pdep; 269 at->ignore_pde = 0; 270 at->expected_fault = 0; 271 at->expected_error = PFERR_PRESENT_MASK; 272 273 pde_valid = at->flags[AC_PDE_PRESENT] 274 && !at->flags[AC_PDE_BIT51] && !at->flags[AC_PDE_BIT13] 275 && !(at->flags[AC_PDE_NX] && !at->flags[AC_CPU_EFER_NX]); 276 pte_valid = pde_valid 277 && at->flags[AC_PTE_PRESENT] 278 && !at->flags[AC_PTE_BIT51] 279 && !(at->flags[AC_PTE_NX] && !at->flags[AC_CPU_EFER_NX]); 280 if (at->flags[AC_ACCESS_TWICE]) { 281 if (pde_valid) { 282 at->expected_pde |= PT_ACCESSED_MASK; 283 if (pte_valid) 284 at->expected_pte |= PT_ACCESSED_MASK; 285 } 286 } 287 288 if (at->flags[AC_ACCESS_USER]) 289 at->expected_error |= PFERR_USER_MASK; 290 291 if (at->flags[AC_ACCESS_WRITE]) 292 at->expected_error |= PFERR_WRITE_MASK; 293 294 if (at->flags[AC_ACCESS_FETCH]) 295 at->expected_error |= PFERR_FETCH_MASK; 296 297 if (!at->flags[AC_PDE_PRESENT]) { 298 at->expected_fault = 1; 299 at->expected_error &= ~PFERR_PRESENT_MASK; 300 } else if (!pde_valid) { 301 at->expected_fault = 1; 302 at->expected_error |= PFERR_RESERVED_MASK; 303 } 304 305 if (at->flags[AC_ACCESS_USER] && !at->flags[AC_PDE_USER]) 306 at->expected_fault = 1; 307 308 if (at->flags[AC_ACCESS_WRITE] 309 && !at->flags[AC_PDE_WRITABLE] 310 && (at->flags[AC_CPU_CR0_WP] || at->flags[AC_ACCESS_USER])) 311 at->expected_fault = 1; 312 313 if (at->flags[AC_ACCESS_FETCH] && at->flags[AC_PDE_NX]) 314 at->expected_fault = 1; 315 316 if (!at->flags[AC_PDE_ACCESSED]) 317 at->ignore_pde = PT_ACCESSED_MASK; 318 319 if (!pde_valid) 320 goto fault; 321 322 if (!at->expected_fault) 323 at->expected_pde |= PT_ACCESSED_MASK; 324 325 if (at->flags[AC_PDE_PSE]) { 326 if (at->flags[AC_ACCESS_WRITE] && !at->expected_fault) 327 at->expected_pde |= PT_DIRTY_MASK; 328 if (at->flags[AC_ACCESS_FETCH] && at->flags[AC_PDE_USER] 329 && at->flags[AC_CPU_CR4_SMEP]) 330 at->expected_fault = 1; 331 goto no_pte; 332 } 333 334 if (!at->flags[AC_PTE_PRESENT]) { 335 at->expected_fault = 1; 336 at->expected_error &= ~PFERR_PRESENT_MASK; 337 } else if (!pte_valid) { 338 at->expected_fault = 1; 339 at->expected_error |= PFERR_RESERVED_MASK; 340 } 341 342 if (at->flags[AC_ACCESS_USER] && !at->flags[AC_PTE_USER]) 343 at->expected_fault = 1; 344 345 if (at->flags[AC_ACCESS_WRITE] 346 && !at->flags[AC_PTE_WRITABLE] 347 && (at->flags[AC_CPU_CR0_WP] || at->flags[AC_ACCESS_USER])) 348 at->expected_fault = 1; 349 350 if (at->flags[AC_ACCESS_FETCH] 351 && (at->flags[AC_PTE_NX] 352 || (at->flags[AC_CPU_CR4_SMEP] 353 && at->flags[AC_PDE_USER] 354 && at->flags[AC_PTE_USER]))) 355 at->expected_fault = 1; 356 357 if (at->expected_fault) 358 goto fault; 359 360 at->expected_pte |= PT_ACCESSED_MASK; 361 if (at->flags[AC_ACCESS_WRITE]) 362 at->expected_pte |= PT_DIRTY_MASK; 363 364 no_pte: 365 fault: 366 if (!at->expected_fault) 367 at->ignore_pde = 0; 368 if (!at->flags[AC_CPU_EFER_NX] && !at->flags[AC_CPU_CR4_SMEP]) 369 at->expected_error &= ~PFERR_FETCH_MASK; 370 } 371 372 void __ac_setup_specific_pages(ac_test_t *at, ac_pool_t *pool, u64 pd_page, 373 u64 pt_page) 374 375 { 376 unsigned long root = read_cr3(); 377 378 if (!ac_test_enough_room(pool)) 379 ac_test_reset_pt_pool(pool); 380 381 at->ptep = 0; 382 for (int i = 4; i >= 1 && (i >= 2 || !at->flags[AC_PDE_PSE]); --i) { 383 pt_element_t *vroot = va(root & PT_BASE_ADDR_MASK); 384 unsigned index = PT_INDEX((unsigned long)at->virt, i); 385 pt_element_t pte = 0; 386 switch (i) { 387 case 4: 388 case 3: 389 pte = pd_page ? pd_page : ac_test_alloc_pt(pool); 390 pte |= PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK; 391 break; 392 case 2: 393 if (!at->flags[AC_PDE_PSE]) 394 pte = pt_page ? pt_page : ac_test_alloc_pt(pool); 395 else { 396 pte = at->phys & PT_PSE_BASE_ADDR_MASK; 397 pte |= PT_PSE_MASK; 398 } 399 if (at->flags[AC_PDE_PRESENT]) 400 pte |= PT_PRESENT_MASK; 401 if (at->flags[AC_PDE_WRITABLE]) 402 pte |= PT_WRITABLE_MASK; 403 if (at->flags[AC_PDE_USER]) 404 pte |= PT_USER_MASK; 405 if (at->flags[AC_PDE_ACCESSED]) 406 pte |= PT_ACCESSED_MASK; 407 if (at->flags[AC_PDE_DIRTY]) 408 pte |= PT_DIRTY_MASK; 409 if (at->flags[AC_PDE_NX]) 410 pte |= PT_NX_MASK; 411 if (at->flags[AC_PDE_BIT51]) 412 pte |= 1ull << 51; 413 if (at->flags[AC_PDE_BIT13]) 414 pte |= 1ull << 13; 415 at->pdep = &vroot[index]; 416 break; 417 case 1: 418 pte = at->phys & PT_BASE_ADDR_MASK; 419 if (at->flags[AC_PTE_PRESENT]) 420 pte |= PT_PRESENT_MASK; 421 if (at->flags[AC_PTE_WRITABLE]) 422 pte |= PT_WRITABLE_MASK; 423 if (at->flags[AC_PTE_USER]) 424 pte |= PT_USER_MASK; 425 if (at->flags[AC_PTE_ACCESSED]) 426 pte |= PT_ACCESSED_MASK; 427 if (at->flags[AC_PTE_DIRTY]) 428 pte |= PT_DIRTY_MASK; 429 if (at->flags[AC_PTE_NX]) 430 pte |= PT_NX_MASK; 431 if (at->flags[AC_PTE_BIT51]) 432 pte |= 1ull << 51; 433 at->ptep = &vroot[index]; 434 break; 435 } 436 vroot[index] = pte; 437 root = vroot[index]; 438 } 439 ac_set_expected_status(at); 440 } 441 442 static void ac_test_setup_pte(ac_test_t *at, ac_pool_t *pool) 443 { 444 __ac_setup_specific_pages(at, pool, 0, 0); 445 } 446 447 static void ac_setup_specific_pages(ac_test_t *at, ac_pool_t *pool, 448 u64 pd_page, u64 pt_page) 449 { 450 return __ac_setup_specific_pages(at, pool, pd_page, pt_page); 451 } 452 453 static void dump_mapping(ac_test_t *at) 454 { 455 unsigned long root = read_cr3(); 456 int i; 457 458 printf("Dump mapping: address: %llx\n", at->virt); 459 for (i = 4; i >= 1 && (i >= 2 || !at->flags[AC_PDE_PSE]); --i) { 460 pt_element_t *vroot = va(root & PT_BASE_ADDR_MASK); 461 unsigned index = PT_INDEX((unsigned long)at->virt, i); 462 pt_element_t pte = vroot[index]; 463 464 printf("------L%d: %llx\n", i, pte); 465 root = vroot[index]; 466 } 467 } 468 469 static void ac_test_check(ac_test_t *at, _Bool *success_ret, _Bool cond, 470 const char *fmt, ...) 471 { 472 va_list ap; 473 char buf[500]; 474 475 if (!*success_ret) { 476 return; 477 } 478 479 if (!cond) { 480 return; 481 } 482 483 *success_ret = false; 484 485 if (!verbose) { 486 ac_test_show(at); 487 } 488 489 va_start(ap, fmt); 490 vsnprintf(buf, sizeof(buf), fmt, ap); 491 va_end(ap); 492 printf("FAIL: %s\n", buf); 493 dump_mapping(at); 494 } 495 496 static int pt_match(pt_element_t pte1, pt_element_t pte2, pt_element_t ignore) 497 { 498 pte1 &= ~ignore; 499 pte2 &= ~ignore; 500 return pte1 == pte2; 501 } 502 503 int ac_test_do_access(ac_test_t *at) 504 { 505 static unsigned unique = 42; 506 int fault = 0; 507 unsigned e; 508 static unsigned char user_stack[4096]; 509 unsigned long rsp; 510 _Bool success = true; 511 512 ++unique; 513 514 *((unsigned char *)at->phys) = 0xc3; /* ret */ 515 516 unsigned r = unique; 517 set_cr0_wp(at->flags[AC_CPU_CR0_WP]); 518 set_efer_nx(at->flags[AC_CPU_EFER_NX]); 519 if (at->flags[AC_CPU_CR4_SMEP] && !(cpuid(7).b & (1 << 7))) { 520 unsigned long cr4 = read_cr4(); 521 if (write_cr4_checking(cr4 | CR4_SMEP_MASK) == GP_VECTOR) 522 goto done; 523 printf("Set SMEP in CR4 - expect #GP: FAIL!\n"); 524 return 0; 525 } 526 set_cr4_smep(at->flags[AC_CPU_CR4_SMEP]); 527 528 if (at->flags[AC_ACCESS_TWICE]) { 529 asm volatile ( 530 "mov $fixed2, %%rsi \n\t" 531 "mov (%[addr]), %[reg] \n\t" 532 "fixed2:" 533 : [reg]"=r"(r), [fault]"=a"(fault), "=b"(e) 534 : [addr]"r"(at->virt) 535 : "rsi" 536 ); 537 fault = 0; 538 } 539 540 asm volatile ("mov $fixed1, %%rsi \n\t" 541 "mov %%rsp, %%rdx \n\t" 542 "cmp $0, %[user] \n\t" 543 "jz do_access \n\t" 544 "push %%rax; mov %[user_ds], %%ax; mov %%ax, %%ds; pop %%rax \n\t" 545 "pushq %[user_ds] \n\t" 546 "pushq %[user_stack_top] \n\t" 547 "pushfq \n\t" 548 "pushq %[user_cs] \n\t" 549 "pushq $do_access \n\t" 550 "iretq \n" 551 "do_access: \n\t" 552 "cmp $0, %[fetch] \n\t" 553 "jnz 2f \n\t" 554 "cmp $0, %[write] \n\t" 555 "jnz 1f \n\t" 556 "mov (%[addr]), %[reg] \n\t" 557 "jmp done \n\t" 558 "1: mov %[reg], (%[addr]) \n\t" 559 "jmp done \n\t" 560 "2: call *%[addr] \n\t" 561 "done: \n" 562 "fixed1: \n" 563 "int %[kernel_entry_vector] \n\t" 564 "back_to_kernel:" 565 : [reg]"+r"(r), "+a"(fault), "=b"(e), "=&d"(rsp) 566 : [addr]"r"(at->virt), 567 [write]"r"(at->flags[AC_ACCESS_WRITE]), 568 [user]"r"(at->flags[AC_ACCESS_USER]), 569 [fetch]"r"(at->flags[AC_ACCESS_FETCH]), 570 [user_ds]"i"(USER_DS), 571 [user_cs]"i"(USER_CS), 572 [user_stack_top]"r"(user_stack + sizeof user_stack), 573 [kernel_entry_vector]"i"(0x20) 574 : "rsi"); 575 576 asm volatile (".section .text.pf \n\t" 577 "page_fault: \n\t" 578 "pop %rbx \n\t" 579 "mov %rsi, (%rsp) \n\t" 580 "movl $1, %eax \n\t" 581 "iretq \n\t" 582 ".section .text"); 583 584 asm volatile (".section .text.entry \n\t" 585 "kernel_entry: \n\t" 586 "mov %rdx, %rsp \n\t" 587 "jmp back_to_kernel \n\t" 588 ".section .text"); 589 590 ac_test_check(at, &success, fault && !at->expected_fault, 591 "unexpected fault"); 592 ac_test_check(at, &success, !fault && at->expected_fault, 593 "unexpected access"); 594 ac_test_check(at, &success, fault && e != at->expected_error, 595 "error code %x expected %x", e, at->expected_error); 596 ac_test_check(at, &success, at->ptep && *at->ptep != at->expected_pte, 597 "pte %x expected %x", *at->ptep, at->expected_pte); 598 ac_test_check(at, &success, 599 !pt_match(*at->pdep, at->expected_pde, at->ignore_pde), 600 "pde %x expected %x", *at->pdep, at->expected_pde); 601 602 done: 603 if (success && verbose) { 604 printf("PASS\n"); 605 } 606 return success; 607 } 608 609 static void ac_test_show(ac_test_t *at) 610 { 611 char line[5000]; 612 613 *line = 0; 614 strcat(line, "test"); 615 for (int i = 0; i < NR_AC_FLAGS; ++i) 616 if (at->flags[i]) { 617 strcat(line, " "); 618 strcat(line, ac_names[i]); 619 } 620 strcat(line, ": "); 621 printf("%s", line); 622 } 623 624 /* 625 * This test case is used to triger the bug which is fixed by 626 * commit e09e90a5 in the kvm tree 627 */ 628 static int corrupt_hugepage_triger(ac_pool_t *pool) 629 { 630 ac_test_t at1, at2; 631 632 ac_test_init(&at1, (void *)(0x123400000000)); 633 ac_test_init(&at2, (void *)(0x666600000000)); 634 635 at2.flags[AC_CPU_CR0_WP] = 1; 636 at2.flags[AC_PDE_PSE] = 1; 637 at2.flags[AC_PDE_PRESENT] = 1; 638 ac_test_setup_pte(&at2, pool); 639 if (!ac_test_do_access(&at2)) 640 goto err; 641 642 at1.flags[AC_CPU_CR0_WP] = 1; 643 at1.flags[AC_PDE_PSE] = 1; 644 at1.flags[AC_PDE_WRITABLE] = 1; 645 at1.flags[AC_PDE_PRESENT] = 1; 646 ac_test_setup_pte(&at1, pool); 647 if (!ac_test_do_access(&at1)) 648 goto err; 649 650 at1.flags[AC_ACCESS_WRITE] = 1; 651 ac_set_expected_status(&at1); 652 if (!ac_test_do_access(&at1)) 653 goto err; 654 655 at2.flags[AC_ACCESS_WRITE] = 1; 656 ac_set_expected_status(&at2); 657 if (!ac_test_do_access(&at2)) 658 goto err; 659 660 return 1; 661 662 err: 663 printf("corrupt_hugepage_triger test fail\n"); 664 return 0; 665 } 666 667 /* 668 * This test case is used to triger the bug which is fixed by 669 * commit 3ddf6c06e13e in the kvm tree 670 */ 671 static int check_pfec_on_prefetch_pte(ac_pool_t *pool) 672 { 673 ac_test_t at1, at2; 674 675 ac_test_init(&at1, (void *)(0x123406001000)); 676 ac_test_init(&at2, (void *)(0x123406003000)); 677 678 at1.flags[AC_PDE_PRESENT] = 1; 679 at1.flags[AC_PTE_PRESENT] = 1; 680 ac_setup_specific_pages(&at1, pool, 30 * 1024 * 1024, 30 * 1024 * 1024); 681 682 at2.flags[AC_PDE_PRESENT] = 1; 683 at2.flags[AC_PTE_NX] = 1; 684 at2.flags[AC_PTE_PRESENT] = 1; 685 ac_setup_specific_pages(&at2, pool, 30 * 1024 * 1024, 30 * 1024 * 1024); 686 687 if (!ac_test_do_access(&at1)) { 688 printf("%s: prepare fail\n", __FUNCTION__); 689 goto err; 690 } 691 692 if (!ac_test_do_access(&at2)) { 693 printf("%s: check PFEC on prefetch pte path fail\n", 694 __FUNCTION__); 695 goto err; 696 } 697 698 return 1; 699 700 err: 701 return 0; 702 } 703 704 /* 705 * If the write-fault access is from supervisor and CR0.WP is not set on the 706 * vcpu, kvm will fix it by adjusting pte access - it sets the W bit on pte 707 * and clears U bit. This is the chance that kvm can change pte access from 708 * readonly to writable. 709 * 710 * Unfortunately, the pte access is the access of 'direct' shadow page table, 711 * means direct sp.role.access = pte_access, then we will create a writable 712 * spte entry on the readonly shadow page table. It will cause Dirty bit is 713 * not tracked when two guest ptes point to the same large page. Note, it 714 * does not have other impact except Dirty bit since cr0.wp is encoded into 715 * sp.role. 716 * 717 * Note: to trigger this bug, hugepage should be disabled on host. 718 */ 719 static int check_large_pte_dirty_for_nowp(ac_pool_t *pool) 720 { 721 ac_test_t at1, at2; 722 723 ac_test_init(&at1, (void *)(0x123403000000)); 724 ac_test_init(&at2, (void *)(0x666606000000)); 725 726 at2.flags[AC_PDE_PRESENT] = 1; 727 at2.flags[AC_PDE_PSE] = 1; 728 729 ac_test_setup_pte(&at2, pool); 730 if (!ac_test_do_access(&at2)) { 731 printf("%s: read on the first mapping fail.\n", __FUNCTION__); 732 goto err; 733 } 734 735 at1.flags[AC_PDE_PRESENT] = 1; 736 at1.flags[AC_PDE_PSE] = 1; 737 at1.flags[AC_ACCESS_WRITE] = 1; 738 739 ac_test_setup_pte(&at1, pool); 740 if (!ac_test_do_access(&at1)) { 741 printf("%s: write on the second mapping fail.\n", __FUNCTION__); 742 goto err; 743 } 744 745 at2.flags[AC_ACCESS_WRITE] = 1; 746 ac_set_expected_status(&at2); 747 if (!ac_test_do_access(&at2)) { 748 printf("%s: write on the first mapping fail.\n", __FUNCTION__); 749 goto err; 750 } 751 752 return 1; 753 754 err: 755 return 0; 756 } 757 758 static int check_smep_andnot_wp(ac_pool_t *pool) 759 { 760 ac_test_t at1; 761 int err_prepare_andnot_wp, err_smep_andnot_wp; 762 extern u64 ptl2[]; 763 764 ac_test_init(&at1, (void *)(0x123406001000)); 765 766 at1.flags[AC_PDE_PRESENT] = 1; 767 at1.flags[AC_PTE_PRESENT] = 1; 768 at1.flags[AC_PDE_USER] = 1; 769 at1.flags[AC_PTE_USER] = 1; 770 at1.flags[AC_PDE_ACCESSED] = 1; 771 at1.flags[AC_PTE_ACCESSED] = 1; 772 at1.flags[AC_CPU_CR4_SMEP] = 1; 773 at1.flags[AC_CPU_CR0_WP] = 0; 774 at1.flags[AC_ACCESS_WRITE] = 1; 775 ac_test_setup_pte(&at1, pool); 776 ptl2[2] -= 0x4; 777 778 /* 779 * Here we write the ro user page when 780 * cr0.wp=0, then we execute it and SMEP 781 * fault should happen. 782 */ 783 err_prepare_andnot_wp = ac_test_do_access(&at1); 784 if (!err_prepare_andnot_wp) { 785 printf("%s: SMEP prepare fail\n", __FUNCTION__); 786 goto clean_up; 787 } 788 789 at1.flags[AC_ACCESS_WRITE] = 0; 790 at1.flags[AC_ACCESS_FETCH] = 1; 791 ac_set_expected_status(&at1); 792 err_smep_andnot_wp = ac_test_do_access(&at1); 793 794 clean_up: 795 set_cr4_smep(0); 796 ptl2[2] += 0x4; 797 798 if (!err_prepare_andnot_wp) 799 goto err; 800 if (!err_smep_andnot_wp) { 801 printf("%s: check SMEP without wp fail\n", __FUNCTION__); 802 goto err; 803 } 804 return 1; 805 806 err: 807 return 0; 808 } 809 810 int ac_test_exec(ac_test_t *at, ac_pool_t *pool) 811 { 812 int r; 813 814 if (verbose) { 815 ac_test_show(at); 816 } 817 ac_test_setup_pte(at, pool); 818 r = ac_test_do_access(at); 819 return r; 820 } 821 822 typedef int (*ac_test_fn)(ac_pool_t *pool); 823 const ac_test_fn ac_test_cases[] = 824 { 825 corrupt_hugepage_triger, 826 check_pfec_on_prefetch_pte, 827 check_large_pte_dirty_for_nowp, 828 check_smep_andnot_wp 829 }; 830 831 int ac_test_run(void) 832 { 833 ac_test_t at; 834 ac_pool_t pool; 835 int i, tests, successes; 836 extern u64 ptl2[]; 837 838 printf("run\n"); 839 tests = successes = 0; 840 ac_env_int(&pool); 841 ac_test_init(&at, (void *)(0x123400000000 + 16 * smp_id())); 842 do { 843 if (at.flags[AC_CPU_CR4_SMEP] && (ptl2[2] & 0x4)) 844 ptl2[2] -= 0x4; 845 if (!at.flags[AC_CPU_CR4_SMEP] && !(ptl2[2] & 0x4)) { 846 set_cr4_smep(0); 847 ptl2[2] += 0x4; 848 } 849 850 ++tests; 851 successes += ac_test_exec(&at, &pool); 852 } while (ac_test_bump(&at)); 853 854 set_cr4_smep(0); 855 ptl2[2] += 0x4; 856 857 for (i = 0; i < ARRAY_SIZE(ac_test_cases); i++) { 858 ++tests; 859 successes += ac_test_cases[i](&pool); 860 } 861 862 printf("\n%d tests, %d failures\n", tests, tests - successes); 863 864 return successes == tests; 865 } 866 867 int main() 868 { 869 int r; 870 871 printf("starting test\n\n"); 872 r = ac_test_run(); 873 return r ? 0 : 1; 874 } 875