xref: /kvm-unit-tests/x86/access.c (revision a322d4c597bb7a4de7985e7b51b80504f7e4fdda)
1 
2 #include "libcflat.h"
3 #include "desc.h"
4 #include "processor.h"
5 
6 #define smp_id() 0
7 
8 #define true 1
9 #define false 0
10 
11 static _Bool verbose = false;
12 
13 typedef unsigned long pt_element_t;
14 
15 #define PAGE_SIZE ((pt_element_t)4096)
16 #define PAGE_MASK (~(PAGE_SIZE-1))
17 
18 #define PT_BASE_ADDR_MASK ((pt_element_t)((((pt_element_t)1 << 40) - 1) & PAGE_MASK))
19 #define PT_PSE_BASE_ADDR_MASK (PT_BASE_ADDR_MASK & ~(1ull << 21))
20 
21 #define PT_PRESENT_MASK    ((pt_element_t)1 << 0)
22 #define PT_WRITABLE_MASK   ((pt_element_t)1 << 1)
23 #define PT_USER_MASK       ((pt_element_t)1 << 2)
24 #define PT_ACCESSED_MASK   ((pt_element_t)1 << 5)
25 #define PT_DIRTY_MASK      ((pt_element_t)1 << 6)
26 #define PT_PSE_MASK        ((pt_element_t)1 << 7)
27 #define PT_NX_MASK         ((pt_element_t)1 << 63)
28 
29 #define CR0_WP_MASK (1UL << 16)
30 #define CR4_SMEP_MASK (1UL << 20)
31 
32 #define PFERR_PRESENT_MASK (1U << 0)
33 #define PFERR_WRITE_MASK (1U << 1)
34 #define PFERR_USER_MASK (1U << 2)
35 #define PFERR_RESERVED_MASK (1U << 3)
36 #define PFERR_FETCH_MASK (1U << 4)
37 
38 #define MSR_EFER 0xc0000080
39 #define EFER_NX_MASK		(1ull << 11)
40 
41 #define PT_INDEX(address, level)       \
42        ((address) >> (12 + ((level)-1) * 9)) & 511
43 
44 /*
45  * page table access check tests
46  */
47 
48 enum {
49     AC_PTE_PRESENT,
50     AC_PTE_WRITABLE,
51     AC_PTE_USER,
52     AC_PTE_ACCESSED,
53     AC_PTE_DIRTY,
54     AC_PTE_NX,
55     AC_PTE_BIT51,
56 
57     AC_PDE_PRESENT,
58     AC_PDE_WRITABLE,
59     AC_PDE_USER,
60     AC_PDE_ACCESSED,
61     AC_PDE_DIRTY,
62     AC_PDE_PSE,
63     AC_PDE_NX,
64     AC_PDE_BIT51,
65     AC_PDE_BIT13,
66 
67     AC_ACCESS_USER,
68     AC_ACCESS_WRITE,
69     AC_ACCESS_FETCH,
70     AC_ACCESS_TWICE,
71     // AC_ACCESS_PTE,
72 
73     AC_CPU_EFER_NX,
74     AC_CPU_CR0_WP,
75     AC_CPU_CR4_SMEP,
76 
77     NR_AC_FLAGS
78 };
79 
80 const char *ac_names[] = {
81     [AC_PTE_PRESENT] = "pte.p",
82     [AC_PTE_ACCESSED] = "pte.a",
83     [AC_PTE_WRITABLE] = "pte.rw",
84     [AC_PTE_USER] = "pte.user",
85     [AC_PTE_DIRTY] = "pte.d",
86     [AC_PTE_NX] = "pte.nx",
87     [AC_PTE_BIT51] = "pte.51",
88     [AC_PDE_PRESENT] = "pde.p",
89     [AC_PDE_ACCESSED] = "pde.a",
90     [AC_PDE_WRITABLE] = "pde.rw",
91     [AC_PDE_USER] = "pde.user",
92     [AC_PDE_DIRTY] = "pde.d",
93     [AC_PDE_PSE] = "pde.pse",
94     [AC_PDE_NX] = "pde.nx",
95     [AC_PDE_BIT51] = "pde.51",
96     [AC_PDE_BIT13] = "pde.13",
97     [AC_ACCESS_WRITE] = "write",
98     [AC_ACCESS_USER] = "user",
99     [AC_ACCESS_FETCH] = "fetch",
100     [AC_ACCESS_TWICE] = "twice",
101     [AC_CPU_EFER_NX] = "efer.nx",
102     [AC_CPU_CR0_WP] = "cr0.wp",
103     [AC_CPU_CR4_SMEP] = "cr4.smep",
104 };
105 
106 static inline void *va(pt_element_t phys)
107 {
108     return (void *)phys;
109 }
110 
111 typedef struct {
112     pt_element_t pt_pool;
113     unsigned pt_pool_size;
114     unsigned pt_pool_current;
115 } ac_pool_t;
116 
117 typedef struct {
118     unsigned flags[NR_AC_FLAGS];
119     void *virt;
120     pt_element_t phys;
121     pt_element_t *ptep;
122     pt_element_t expected_pte;
123     pt_element_t *pdep;
124     pt_element_t expected_pde;
125     pt_element_t ignore_pde;
126     int expected_fault;
127     unsigned expected_error;
128 } ac_test_t;
129 
130 typedef struct {
131     unsigned short limit;
132     unsigned long linear_addr;
133 } __attribute__((packed)) descriptor_table_t;
134 
135 
136 static void ac_test_show(ac_test_t *at);
137 
138 int write_cr4_checking(unsigned long val)
139 {
140     asm volatile(ASM_TRY("1f")
141             "mov %0,%%cr4\n\t"
142             "1:": : "r" (val));
143     return exception_vector();
144 }
145 
146 void set_cr0_wp(int wp)
147 {
148     unsigned long cr0 = read_cr0();
149 
150     cr0 &= ~CR0_WP_MASK;
151     if (wp)
152 	cr0 |= CR0_WP_MASK;
153     write_cr0(cr0);
154 }
155 
156 void set_cr4_smep(int smep)
157 {
158     unsigned long cr4 = read_cr4();
159 
160     cr4 &= ~CR4_SMEP_MASK;
161     if (smep)
162 	cr4 |= CR4_SMEP_MASK;
163     write_cr4(cr4);
164 }
165 
166 void set_efer_nx(int nx)
167 {
168     unsigned long long efer;
169 
170     efer = rdmsr(MSR_EFER);
171     efer &= ~EFER_NX_MASK;
172     if (nx)
173 	efer |= EFER_NX_MASK;
174     wrmsr(MSR_EFER, efer);
175 }
176 
177 static void ac_env_int(ac_pool_t *pool)
178 {
179     setup_idt();
180 
181     extern char page_fault, kernel_entry;
182     set_idt_entry(14, &page_fault, 0);
183     set_idt_entry(0x20, &kernel_entry, 3);
184 
185     pool->pt_pool = 33 * 1024 * 1024;
186     pool->pt_pool_size = 120 * 1024 * 1024 - pool->pt_pool;
187     pool->pt_pool_current = 0;
188 }
189 
190 void ac_test_init(ac_test_t *at, void *virt)
191 {
192     wrmsr(MSR_EFER, rdmsr(MSR_EFER) | EFER_NX_MASK);
193     set_cr0_wp(1);
194     for (int i = 0; i < NR_AC_FLAGS; ++i)
195 	at->flags[i] = 0;
196     at->virt = virt;
197     at->phys = 32 * 1024 * 1024;
198 }
199 
200 int ac_test_bump_one(ac_test_t *at)
201 {
202     for (int i = 0; i < NR_AC_FLAGS; ++i)
203 	if (!at->flags[i]) {
204 	    at->flags[i] = 1;
205 	    return 1;
206 	} else
207 	    at->flags[i] = 0;
208     return 0;
209 }
210 
211 _Bool ac_test_legal(ac_test_t *at)
212 {
213     if (at->flags[AC_ACCESS_FETCH] && at->flags[AC_ACCESS_WRITE])
214 	return false;
215 
216     /*
217      * Since we convert current page to kernel page when cr4.smep=1,
218      * we can't switch to user mode.
219      */
220     if (at->flags[AC_ACCESS_USER] && at->flags[AC_CPU_CR4_SMEP])
221 	return false;
222 
223     /*
224      * pde.bit13 checks handling of reserved bits in largepage PDEs.  It is
225      * meaningless if there is a PTE.
226      */
227     if (!at->flags[AC_PDE_PSE] && at->flags[AC_PDE_BIT13])
228         return false;
229 
230     return true;
231 }
232 
233 int ac_test_bump(ac_test_t *at)
234 {
235     int ret;
236 
237     ret = ac_test_bump_one(at);
238     while (ret && !ac_test_legal(at))
239 	ret = ac_test_bump_one(at);
240     return ret;
241 }
242 
243 pt_element_t ac_test_alloc_pt(ac_pool_t *pool)
244 {
245     pt_element_t ret = pool->pt_pool + pool->pt_pool_current;
246     pool->pt_pool_current += PAGE_SIZE;
247     return ret;
248 }
249 
250 _Bool ac_test_enough_room(ac_pool_t *pool)
251 {
252     return pool->pt_pool_current + 4 * PAGE_SIZE <= pool->pt_pool_size;
253 }
254 
255 void ac_test_reset_pt_pool(ac_pool_t *pool)
256 {
257     pool->pt_pool_current = 0;
258 }
259 
260 void ac_set_expected_status(ac_test_t *at)
261 {
262     int pde_valid, pte_valid;
263 
264     invlpg(at->virt);
265 
266     if (at->ptep)
267 	at->expected_pte = *at->ptep;
268     at->expected_pde = *at->pdep;
269     at->ignore_pde = 0;
270     at->expected_fault = 0;
271     at->expected_error = PFERR_PRESENT_MASK;
272 
273     pde_valid = at->flags[AC_PDE_PRESENT]
274         && !at->flags[AC_PDE_BIT51] && !at->flags[AC_PDE_BIT13]
275         && !(at->flags[AC_PDE_NX] && !at->flags[AC_CPU_EFER_NX]);
276     pte_valid = pde_valid
277         && at->flags[AC_PTE_PRESENT]
278         && !at->flags[AC_PTE_BIT51]
279         && !(at->flags[AC_PTE_NX] && !at->flags[AC_CPU_EFER_NX]);
280     if (at->flags[AC_ACCESS_TWICE]) {
281 	if (pde_valid) {
282 	    at->expected_pde |= PT_ACCESSED_MASK;
283 	    if (pte_valid)
284 		at->expected_pte |= PT_ACCESSED_MASK;
285 	}
286     }
287 
288     if (at->flags[AC_ACCESS_USER])
289 	at->expected_error |= PFERR_USER_MASK;
290 
291     if (at->flags[AC_ACCESS_WRITE])
292 	at->expected_error |= PFERR_WRITE_MASK;
293 
294     if (at->flags[AC_ACCESS_FETCH])
295 	at->expected_error |= PFERR_FETCH_MASK;
296 
297     if (!at->flags[AC_PDE_PRESENT]) {
298 	at->expected_fault = 1;
299 	at->expected_error &= ~PFERR_PRESENT_MASK;
300     } else if (!pde_valid) {
301         at->expected_fault = 1;
302         at->expected_error |= PFERR_RESERVED_MASK;
303     }
304 
305     if (at->flags[AC_ACCESS_USER] && !at->flags[AC_PDE_USER])
306 	at->expected_fault = 1;
307 
308     if (at->flags[AC_ACCESS_WRITE]
309 	&& !at->flags[AC_PDE_WRITABLE]
310 	&& (at->flags[AC_CPU_CR0_WP] || at->flags[AC_ACCESS_USER]))
311 	at->expected_fault = 1;
312 
313     if (at->flags[AC_ACCESS_FETCH] && at->flags[AC_PDE_NX])
314 	at->expected_fault = 1;
315 
316     if (!at->flags[AC_PDE_ACCESSED])
317         at->ignore_pde = PT_ACCESSED_MASK;
318 
319     if (!pde_valid)
320 	goto fault;
321 
322     if (!at->expected_fault)
323         at->expected_pde |= PT_ACCESSED_MASK;
324 
325     if (at->flags[AC_PDE_PSE]) {
326 	if (at->flags[AC_ACCESS_WRITE] && !at->expected_fault)
327 	    at->expected_pde |= PT_DIRTY_MASK;
328 	if (at->flags[AC_ACCESS_FETCH] && at->flags[AC_PDE_USER]
329 	    && at->flags[AC_CPU_CR4_SMEP])
330 	    at->expected_fault = 1;
331 	goto no_pte;
332     }
333 
334     if (!at->flags[AC_PTE_PRESENT]) {
335 	at->expected_fault = 1;
336 	at->expected_error &= ~PFERR_PRESENT_MASK;
337     } else if (!pte_valid) {
338         at->expected_fault = 1;
339         at->expected_error |= PFERR_RESERVED_MASK;
340     }
341 
342     if (at->flags[AC_ACCESS_USER] && !at->flags[AC_PTE_USER])
343 	at->expected_fault = 1;
344 
345     if (at->flags[AC_ACCESS_WRITE]
346 	&& !at->flags[AC_PTE_WRITABLE]
347 	&& (at->flags[AC_CPU_CR0_WP] || at->flags[AC_ACCESS_USER]))
348 	at->expected_fault = 1;
349 
350     if (at->flags[AC_ACCESS_FETCH]
351 	&& (at->flags[AC_PTE_NX]
352 	    || (at->flags[AC_CPU_CR4_SMEP]
353 		&& at->flags[AC_PDE_USER]
354 		&& at->flags[AC_PTE_USER])))
355 	at->expected_fault = 1;
356 
357     if (at->expected_fault)
358 	goto fault;
359 
360     at->expected_pte |= PT_ACCESSED_MASK;
361     if (at->flags[AC_ACCESS_WRITE])
362 	at->expected_pte |= PT_DIRTY_MASK;
363 
364 no_pte:
365 fault:
366     if (!at->expected_fault)
367         at->ignore_pde = 0;
368     if (!at->flags[AC_CPU_EFER_NX] && !at->flags[AC_CPU_CR4_SMEP])
369         at->expected_error &= ~PFERR_FETCH_MASK;
370 }
371 
372 void __ac_setup_specific_pages(ac_test_t *at, ac_pool_t *pool, u64 pd_page,
373 			       u64 pt_page)
374 
375 {
376     unsigned long root = read_cr3();
377 
378     if (!ac_test_enough_room(pool))
379 	ac_test_reset_pt_pool(pool);
380 
381     at->ptep = 0;
382     for (int i = 4; i >= 1 && (i >= 2 || !at->flags[AC_PDE_PSE]); --i) {
383 	pt_element_t *vroot = va(root & PT_BASE_ADDR_MASK);
384 	unsigned index = PT_INDEX((unsigned long)at->virt, i);
385 	pt_element_t pte = 0;
386 	switch (i) {
387 	case 4:
388 	case 3:
389 	    pte = pd_page ? pd_page : ac_test_alloc_pt(pool);
390 	    pte |= PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK;
391 	    break;
392 	case 2:
393 	    if (!at->flags[AC_PDE_PSE])
394 		pte = pt_page ? pt_page : ac_test_alloc_pt(pool);
395 	    else {
396 		pte = at->phys & PT_PSE_BASE_ADDR_MASK;
397 		pte |= PT_PSE_MASK;
398 	    }
399 	    if (at->flags[AC_PDE_PRESENT])
400 		pte |= PT_PRESENT_MASK;
401 	    if (at->flags[AC_PDE_WRITABLE])
402 		pte |= PT_WRITABLE_MASK;
403 	    if (at->flags[AC_PDE_USER])
404 		pte |= PT_USER_MASK;
405 	    if (at->flags[AC_PDE_ACCESSED])
406 		pte |= PT_ACCESSED_MASK;
407 	    if (at->flags[AC_PDE_DIRTY])
408 		pte |= PT_DIRTY_MASK;
409 	    if (at->flags[AC_PDE_NX])
410 		pte |= PT_NX_MASK;
411 	    if (at->flags[AC_PDE_BIT51])
412 		pte |= 1ull << 51;
413 	    if (at->flags[AC_PDE_BIT13])
414 		pte |= 1ull << 13;
415 	    at->pdep = &vroot[index];
416 	    break;
417 	case 1:
418 	    pte = at->phys & PT_BASE_ADDR_MASK;
419 	    if (at->flags[AC_PTE_PRESENT])
420 		pte |= PT_PRESENT_MASK;
421 	    if (at->flags[AC_PTE_WRITABLE])
422 		pte |= PT_WRITABLE_MASK;
423 	    if (at->flags[AC_PTE_USER])
424 		pte |= PT_USER_MASK;
425 	    if (at->flags[AC_PTE_ACCESSED])
426 		pte |= PT_ACCESSED_MASK;
427 	    if (at->flags[AC_PTE_DIRTY])
428 		pte |= PT_DIRTY_MASK;
429 	    if (at->flags[AC_PTE_NX])
430 		pte |= PT_NX_MASK;
431 	    if (at->flags[AC_PTE_BIT51])
432 		pte |= 1ull << 51;
433 	    at->ptep = &vroot[index];
434 	    break;
435 	}
436 	vroot[index] = pte;
437 	root = vroot[index];
438     }
439     ac_set_expected_status(at);
440 }
441 
442 static void ac_test_setup_pte(ac_test_t *at, ac_pool_t *pool)
443 {
444 	__ac_setup_specific_pages(at, pool, 0, 0);
445 }
446 
447 static void ac_setup_specific_pages(ac_test_t *at, ac_pool_t *pool,
448 				    u64 pd_page, u64 pt_page)
449 {
450 	return __ac_setup_specific_pages(at, pool, pd_page, pt_page);
451 }
452 
453 static void dump_mapping(ac_test_t *at)
454 {
455 	unsigned long root = read_cr3();
456 	int i;
457 
458 	printf("Dump mapping: address: %llx\n", at->virt);
459 	for (i = 4; i >= 1 && (i >= 2 || !at->flags[AC_PDE_PSE]); --i) {
460 		pt_element_t *vroot = va(root & PT_BASE_ADDR_MASK);
461 		unsigned index = PT_INDEX((unsigned long)at->virt, i);
462 		pt_element_t pte = vroot[index];
463 
464 		printf("------L%d: %llx\n", i, pte);
465 		root = vroot[index];
466 	}
467 }
468 
469 static void ac_test_check(ac_test_t *at, _Bool *success_ret, _Bool cond,
470                           const char *fmt, ...)
471 {
472     va_list ap;
473     char buf[500];
474 
475     if (!*success_ret) {
476         return;
477     }
478 
479     if (!cond) {
480         return;
481     }
482 
483     *success_ret = false;
484 
485     if (!verbose) {
486         ac_test_show(at);
487     }
488 
489     va_start(ap, fmt);
490     vsnprintf(buf, sizeof(buf), fmt, ap);
491     va_end(ap);
492     printf("FAIL: %s\n", buf);
493     dump_mapping(at);
494 }
495 
496 static int pt_match(pt_element_t pte1, pt_element_t pte2, pt_element_t ignore)
497 {
498     pte1 &= ~ignore;
499     pte2 &= ~ignore;
500     return pte1 == pte2;
501 }
502 
503 int ac_test_do_access(ac_test_t *at)
504 {
505     static unsigned unique = 42;
506     int fault = 0;
507     unsigned e;
508     static unsigned char user_stack[4096];
509     unsigned long rsp;
510     _Bool success = true;
511 
512     ++unique;
513 
514     *((unsigned char *)at->phys) = 0xc3; /* ret */
515 
516     unsigned r = unique;
517     set_cr0_wp(at->flags[AC_CPU_CR0_WP]);
518     set_efer_nx(at->flags[AC_CPU_EFER_NX]);
519     if (at->flags[AC_CPU_CR4_SMEP] && !(cpuid(7).b & (1 << 7))) {
520 	unsigned long cr4 = read_cr4();
521 	if (write_cr4_checking(cr4 | CR4_SMEP_MASK) == GP_VECTOR)
522 		goto done;
523 	printf("Set SMEP in CR4 - expect #GP: FAIL!\n");
524 	return 0;
525     }
526     set_cr4_smep(at->flags[AC_CPU_CR4_SMEP]);
527 
528     if (at->flags[AC_ACCESS_TWICE]) {
529 	asm volatile (
530 	    "mov $fixed2, %%rsi \n\t"
531 	    "mov (%[addr]), %[reg] \n\t"
532 	    "fixed2:"
533 	    : [reg]"=r"(r), [fault]"=a"(fault), "=b"(e)
534 	    : [addr]"r"(at->virt)
535 	    : "rsi"
536 	    );
537 	fault = 0;
538     }
539 
540     asm volatile ("mov $fixed1, %%rsi \n\t"
541 		  "mov %%rsp, %%rdx \n\t"
542 		  "cmp $0, %[user] \n\t"
543 		  "jz do_access \n\t"
544 		  "push %%rax; mov %[user_ds], %%ax; mov %%ax, %%ds; pop %%rax  \n\t"
545 		  "pushq %[user_ds] \n\t"
546 		  "pushq %[user_stack_top] \n\t"
547 		  "pushfq \n\t"
548 		  "pushq %[user_cs] \n\t"
549 		  "pushq $do_access \n\t"
550 		  "iretq \n"
551 		  "do_access: \n\t"
552 		  "cmp $0, %[fetch] \n\t"
553 		  "jnz 2f \n\t"
554 		  "cmp $0, %[write] \n\t"
555 		  "jnz 1f \n\t"
556 		  "mov (%[addr]), %[reg] \n\t"
557 		  "jmp done \n\t"
558 		  "1: mov %[reg], (%[addr]) \n\t"
559 		  "jmp done \n\t"
560 		  "2: call *%[addr] \n\t"
561 		  "done: \n"
562 		  "fixed1: \n"
563 		  "int %[kernel_entry_vector] \n\t"
564 		  "back_to_kernel:"
565 		  : [reg]"+r"(r), "+a"(fault), "=b"(e), "=&d"(rsp)
566 		  : [addr]"r"(at->virt),
567 		    [write]"r"(at->flags[AC_ACCESS_WRITE]),
568 		    [user]"r"(at->flags[AC_ACCESS_USER]),
569 		    [fetch]"r"(at->flags[AC_ACCESS_FETCH]),
570 		    [user_ds]"i"(USER_DS),
571 		    [user_cs]"i"(USER_CS),
572 		    [user_stack_top]"r"(user_stack + sizeof user_stack),
573 		    [kernel_entry_vector]"i"(0x20)
574 		  : "rsi");
575 
576     asm volatile (".section .text.pf \n\t"
577 		  "page_fault: \n\t"
578 		  "pop %rbx \n\t"
579 		  "mov %rsi, (%rsp) \n\t"
580 		  "movl $1, %eax \n\t"
581 		  "iretq \n\t"
582 		  ".section .text");
583 
584     asm volatile (".section .text.entry \n\t"
585 		  "kernel_entry: \n\t"
586 		  "mov %rdx, %rsp \n\t"
587 		  "jmp back_to_kernel \n\t"
588 		  ".section .text");
589 
590     ac_test_check(at, &success, fault && !at->expected_fault,
591                   "unexpected fault");
592     ac_test_check(at, &success, !fault && at->expected_fault,
593                   "unexpected access");
594     ac_test_check(at, &success, fault && e != at->expected_error,
595                   "error code %x expected %x", e, at->expected_error);
596     ac_test_check(at, &success, at->ptep && *at->ptep != at->expected_pte,
597                   "pte %x expected %x", *at->ptep, at->expected_pte);
598     ac_test_check(at, &success,
599                   !pt_match(*at->pdep, at->expected_pde, at->ignore_pde),
600                   "pde %x expected %x", *at->pdep, at->expected_pde);
601 
602 done:
603     if (success && verbose) {
604         printf("PASS\n");
605     }
606     return success;
607 }
608 
609 static void ac_test_show(ac_test_t *at)
610 {
611     char line[5000];
612 
613     *line = 0;
614     strcat(line, "test");
615     for (int i = 0; i < NR_AC_FLAGS; ++i)
616 	if (at->flags[i]) {
617 	    strcat(line, " ");
618 	    strcat(line, ac_names[i]);
619 	}
620     strcat(line, ": ");
621     printf("%s", line);
622 }
623 
624 /*
625  * This test case is used to triger the bug which is fixed by
626  * commit e09e90a5 in the kvm tree
627  */
628 static int corrupt_hugepage_triger(ac_pool_t *pool)
629 {
630     ac_test_t at1, at2;
631 
632     ac_test_init(&at1, (void *)(0x123400000000));
633     ac_test_init(&at2, (void *)(0x666600000000));
634 
635     at2.flags[AC_CPU_CR0_WP] = 1;
636     at2.flags[AC_PDE_PSE] = 1;
637     at2.flags[AC_PDE_PRESENT] = 1;
638     ac_test_setup_pte(&at2, pool);
639     if (!ac_test_do_access(&at2))
640         goto err;
641 
642     at1.flags[AC_CPU_CR0_WP] = 1;
643     at1.flags[AC_PDE_PSE] = 1;
644     at1.flags[AC_PDE_WRITABLE] = 1;
645     at1.flags[AC_PDE_PRESENT] = 1;
646     ac_test_setup_pte(&at1, pool);
647     if (!ac_test_do_access(&at1))
648         goto err;
649 
650     at1.flags[AC_ACCESS_WRITE] = 1;
651     ac_set_expected_status(&at1);
652     if (!ac_test_do_access(&at1))
653         goto err;
654 
655     at2.flags[AC_ACCESS_WRITE] = 1;
656     ac_set_expected_status(&at2);
657     if (!ac_test_do_access(&at2))
658         goto err;
659 
660     return 1;
661 
662 err:
663     printf("corrupt_hugepage_triger test fail\n");
664     return 0;
665 }
666 
667 /*
668  * This test case is used to triger the bug which is fixed by
669  * commit 3ddf6c06e13e in the kvm tree
670  */
671 static int check_pfec_on_prefetch_pte(ac_pool_t *pool)
672 {
673 	ac_test_t at1, at2;
674 
675 	ac_test_init(&at1, (void *)(0x123406001000));
676 	ac_test_init(&at2, (void *)(0x123406003000));
677 
678 	at1.flags[AC_PDE_PRESENT] = 1;
679 	at1.flags[AC_PTE_PRESENT] = 1;
680 	ac_setup_specific_pages(&at1, pool, 30 * 1024 * 1024, 30 * 1024 * 1024);
681 
682 	at2.flags[AC_PDE_PRESENT] = 1;
683 	at2.flags[AC_PTE_NX] = 1;
684 	at2.flags[AC_PTE_PRESENT] = 1;
685 	ac_setup_specific_pages(&at2, pool, 30 * 1024 * 1024, 30 * 1024 * 1024);
686 
687 	if (!ac_test_do_access(&at1)) {
688 		printf("%s: prepare fail\n", __FUNCTION__);
689 		goto err;
690 	}
691 
692 	if (!ac_test_do_access(&at2)) {
693 		printf("%s: check PFEC on prefetch pte path fail\n",
694 			__FUNCTION__);
695 		goto err;
696 	}
697 
698 	return 1;
699 
700 err:
701     return 0;
702 }
703 
704 /*
705  * If the write-fault access is from supervisor and CR0.WP is not set on the
706  * vcpu, kvm will fix it by adjusting pte access - it sets the W bit on pte
707  * and clears U bit. This is the chance that kvm can change pte access from
708  * readonly to writable.
709  *
710  * Unfortunately, the pte access is the access of 'direct' shadow page table,
711  * means direct sp.role.access = pte_access, then we will create a writable
712  * spte entry on the readonly shadow page table. It will cause Dirty bit is
713  * not tracked when two guest ptes point to the same large page. Note, it
714  * does not have other impact except Dirty bit since cr0.wp is encoded into
715  * sp.role.
716  *
717  * Note: to trigger this bug, hugepage should be disabled on host.
718  */
719 static int check_large_pte_dirty_for_nowp(ac_pool_t *pool)
720 {
721 	ac_test_t at1, at2;
722 
723 	ac_test_init(&at1, (void *)(0x123403000000));
724 	ac_test_init(&at2, (void *)(0x666606000000));
725 
726 	at2.flags[AC_PDE_PRESENT] = 1;
727 	at2.flags[AC_PDE_PSE] = 1;
728 
729 	ac_test_setup_pte(&at2, pool);
730 	if (!ac_test_do_access(&at2)) {
731 		printf("%s: read on the first mapping fail.\n", __FUNCTION__);
732 		goto err;
733 	}
734 
735 	at1.flags[AC_PDE_PRESENT] = 1;
736 	at1.flags[AC_PDE_PSE] = 1;
737 	at1.flags[AC_ACCESS_WRITE] = 1;
738 
739 	ac_test_setup_pte(&at1, pool);
740 	if (!ac_test_do_access(&at1)) {
741 		printf("%s: write on the second mapping fail.\n", __FUNCTION__);
742 		goto err;
743 	}
744 
745 	at2.flags[AC_ACCESS_WRITE] = 1;
746 	ac_set_expected_status(&at2);
747 	if (!ac_test_do_access(&at2)) {
748 		printf("%s: write on the first mapping fail.\n", __FUNCTION__);
749 		goto err;
750 	}
751 
752 	return 1;
753 
754 err:
755 	return 0;
756 }
757 
758 static int check_smep_andnot_wp(ac_pool_t *pool)
759 {
760 	ac_test_t at1;
761 	int err_prepare_andnot_wp, err_smep_andnot_wp;
762 	extern u64 ptl2[];
763 
764 	ac_test_init(&at1, (void *)(0x123406001000));
765 
766 	at1.flags[AC_PDE_PRESENT] = 1;
767 	at1.flags[AC_PTE_PRESENT] = 1;
768 	at1.flags[AC_PDE_USER] = 1;
769 	at1.flags[AC_PTE_USER] = 1;
770 	at1.flags[AC_PDE_ACCESSED] = 1;
771 	at1.flags[AC_PTE_ACCESSED] = 1;
772 	at1.flags[AC_CPU_CR4_SMEP] = 1;
773 	at1.flags[AC_CPU_CR0_WP] = 0;
774 	at1.flags[AC_ACCESS_WRITE] = 1;
775 	ac_test_setup_pte(&at1, pool);
776 	ptl2[2] -= 0x4;
777 
778 	/*
779 	 * Here we write the ro user page when
780 	 * cr0.wp=0, then we execute it and SMEP
781 	 * fault should happen.
782 	 */
783 	err_prepare_andnot_wp = ac_test_do_access(&at1);
784 	if (!err_prepare_andnot_wp) {
785 		printf("%s: SMEP prepare fail\n", __FUNCTION__);
786 		goto clean_up;
787 	}
788 
789 	at1.flags[AC_ACCESS_WRITE] = 0;
790 	at1.flags[AC_ACCESS_FETCH] = 1;
791 	ac_set_expected_status(&at1);
792 	err_smep_andnot_wp = ac_test_do_access(&at1);
793 
794 clean_up:
795 	set_cr4_smep(0);
796 	ptl2[2] += 0x4;
797 
798 	if (!err_prepare_andnot_wp)
799 		goto err;
800 	if (!err_smep_andnot_wp) {
801 		printf("%s: check SMEP without wp fail\n", __FUNCTION__);
802 		goto err;
803 	}
804 	return 1;
805 
806 err:
807 	return 0;
808 }
809 
810 int ac_test_exec(ac_test_t *at, ac_pool_t *pool)
811 {
812     int r;
813 
814     if (verbose) {
815         ac_test_show(at);
816     }
817     ac_test_setup_pte(at, pool);
818     r = ac_test_do_access(at);
819     return r;
820 }
821 
822 typedef int (*ac_test_fn)(ac_pool_t *pool);
823 const ac_test_fn ac_test_cases[] =
824 {
825 	corrupt_hugepage_triger,
826 	check_pfec_on_prefetch_pte,
827 	check_large_pte_dirty_for_nowp,
828 	check_smep_andnot_wp
829 };
830 
831 int ac_test_run(void)
832 {
833     ac_test_t at;
834     ac_pool_t pool;
835     int i, tests, successes;
836     extern u64 ptl2[];
837 
838     printf("run\n");
839     tests = successes = 0;
840     ac_env_int(&pool);
841     ac_test_init(&at, (void *)(0x123400000000 + 16 * smp_id()));
842     do {
843 	if (at.flags[AC_CPU_CR4_SMEP] && (ptl2[2] & 0x4))
844 		ptl2[2] -= 0x4;
845 	if (!at.flags[AC_CPU_CR4_SMEP] && !(ptl2[2] & 0x4)) {
846 		set_cr4_smep(0);
847 		ptl2[2] += 0x4;
848 	}
849 
850 	++tests;
851 	successes += ac_test_exec(&at, &pool);
852     } while (ac_test_bump(&at));
853 
854     set_cr4_smep(0);
855     ptl2[2] += 0x4;
856 
857     for (i = 0; i < ARRAY_SIZE(ac_test_cases); i++) {
858 	++tests;
859 	successes += ac_test_cases[i](&pool);
860     }
861 
862     printf("\n%d tests, %d failures\n", tests, tests - successes);
863 
864     return successes == tests;
865 }
866 
867 int main()
868 {
869     int r;
870 
871     printf("starting test\n\n");
872     r = ac_test_run();
873     return r ? 0 : 1;
874 }
875