xref: /kvm-unit-tests/x86/access.c (revision 9a008986caac35c1120b8552eb610b7793fe613a)
1 
2 #include "libcflat.h"
3 #include "desc.h"
4 #include "processor.h"
5 #include "asm/page.h"
6 
7 #define smp_id() 0
8 
9 #define true 1
10 #define false 0
11 
12 static _Bool verbose = false;
13 
14 typedef unsigned long pt_element_t;
15 static int cpuid_7_ebx;
16 static int cpuid_7_ecx;
17 static int invalid_mask;
18 
19 #define PT_BASE_ADDR_MASK ((pt_element_t)((((pt_element_t)1 << 40) - 1) & PAGE_MASK))
20 #define PT_PSE_BASE_ADDR_MASK (PT_BASE_ADDR_MASK & ~(1ull << 21))
21 
22 #define CR0_WP_MASK (1UL << 16)
23 #define CR4_SMEP_MASK (1UL << 20)
24 
25 #define PFERR_PRESENT_MASK (1U << 0)
26 #define PFERR_WRITE_MASK (1U << 1)
27 #define PFERR_USER_MASK (1U << 2)
28 #define PFERR_RESERVED_MASK (1U << 3)
29 #define PFERR_FETCH_MASK (1U << 4)
30 #define PFERR_PK_MASK (1U << 5)
31 
32 #define MSR_EFER 0xc0000080
33 #define EFER_NX_MASK		(1ull << 11)
34 
35 #define PT_INDEX(address, level)       \
36        ((address) >> (12 + ((level)-1) * 9)) & 511
37 
38 /*
39  * page table access check tests
40  */
41 
42 enum {
43     AC_PTE_PRESENT_BIT,
44     AC_PTE_WRITABLE_BIT,
45     AC_PTE_USER_BIT,
46     AC_PTE_ACCESSED_BIT,
47     AC_PTE_DIRTY_BIT,
48     AC_PTE_NX_BIT,
49     AC_PTE_BIT51_BIT,
50 
51     AC_PDE_PRESENT_BIT,
52     AC_PDE_WRITABLE_BIT,
53     AC_PDE_USER_BIT,
54     AC_PDE_ACCESSED_BIT,
55     AC_PDE_DIRTY_BIT,
56     AC_PDE_PSE_BIT,
57     AC_PDE_NX_BIT,
58     AC_PDE_BIT51_BIT,
59     AC_PDE_BIT13_BIT,
60 
61     AC_PKU_AD_BIT,
62     AC_PKU_WD_BIT,
63     AC_PKU_PKEY_BIT,
64 
65     AC_ACCESS_USER_BIT,
66     AC_ACCESS_WRITE_BIT,
67     AC_ACCESS_FETCH_BIT,
68     AC_ACCESS_TWICE_BIT,
69 
70     AC_CPU_EFER_NX_BIT,
71     AC_CPU_CR0_WP_BIT,
72     AC_CPU_CR4_SMEP_BIT,
73     AC_CPU_CR4_PKE_BIT,
74 
75     NR_AC_FLAGS
76 };
77 
78 #define AC_PTE_PRESENT_MASK   (1 << AC_PTE_PRESENT_BIT)
79 #define AC_PTE_WRITABLE_MASK  (1 << AC_PTE_WRITABLE_BIT)
80 #define AC_PTE_USER_MASK      (1 << AC_PTE_USER_BIT)
81 #define AC_PTE_ACCESSED_MASK  (1 << AC_PTE_ACCESSED_BIT)
82 #define AC_PTE_DIRTY_MASK     (1 << AC_PTE_DIRTY_BIT)
83 #define AC_PTE_NX_MASK        (1 << AC_PTE_NX_BIT)
84 #define AC_PTE_BIT51_MASK     (1 << AC_PTE_BIT51_BIT)
85 
86 #define AC_PDE_PRESENT_MASK   (1 << AC_PDE_PRESENT_BIT)
87 #define AC_PDE_WRITABLE_MASK  (1 << AC_PDE_WRITABLE_BIT)
88 #define AC_PDE_USER_MASK      (1 << AC_PDE_USER_BIT)
89 #define AC_PDE_ACCESSED_MASK  (1 << AC_PDE_ACCESSED_BIT)
90 #define AC_PDE_DIRTY_MASK     (1 << AC_PDE_DIRTY_BIT)
91 #define AC_PDE_PSE_MASK       (1 << AC_PDE_PSE_BIT)
92 #define AC_PDE_NX_MASK        (1 << AC_PDE_NX_BIT)
93 #define AC_PDE_BIT51_MASK     (1 << AC_PDE_BIT51_BIT)
94 #define AC_PDE_BIT13_MASK     (1 << AC_PDE_BIT13_BIT)
95 
96 #define AC_PKU_AD_MASK        (1 << AC_PKU_AD_BIT)
97 #define AC_PKU_WD_MASK        (1 << AC_PKU_WD_BIT)
98 #define AC_PKU_PKEY_MASK      (1 << AC_PKU_PKEY_BIT)
99 
100 #define AC_ACCESS_USER_MASK   (1 << AC_ACCESS_USER_BIT)
101 #define AC_ACCESS_WRITE_MASK  (1 << AC_ACCESS_WRITE_BIT)
102 #define AC_ACCESS_FETCH_MASK  (1 << AC_ACCESS_FETCH_BIT)
103 #define AC_ACCESS_TWICE_MASK  (1 << AC_ACCESS_TWICE_BIT)
104 
105 #define AC_CPU_EFER_NX_MASK   (1 << AC_CPU_EFER_NX_BIT)
106 #define AC_CPU_CR0_WP_MASK    (1 << AC_CPU_CR0_WP_BIT)
107 #define AC_CPU_CR4_SMEP_MASK  (1 << AC_CPU_CR4_SMEP_BIT)
108 #define AC_CPU_CR4_PKE_MASK   (1 << AC_CPU_CR4_PKE_BIT)
109 
110 const char *ac_names[] = {
111     [AC_PTE_PRESENT_BIT] = "pte.p",
112     [AC_PTE_ACCESSED_BIT] = "pte.a",
113     [AC_PTE_WRITABLE_BIT] = "pte.rw",
114     [AC_PTE_USER_BIT] = "pte.user",
115     [AC_PTE_DIRTY_BIT] = "pte.d",
116     [AC_PTE_NX_BIT] = "pte.nx",
117     [AC_PTE_BIT51_BIT] = "pte.51",
118     [AC_PDE_PRESENT_BIT] = "pde.p",
119     [AC_PDE_ACCESSED_BIT] = "pde.a",
120     [AC_PDE_WRITABLE_BIT] = "pde.rw",
121     [AC_PDE_USER_BIT] = "pde.user",
122     [AC_PDE_DIRTY_BIT] = "pde.d",
123     [AC_PDE_PSE_BIT] = "pde.pse",
124     [AC_PDE_NX_BIT] = "pde.nx",
125     [AC_PDE_BIT51_BIT] = "pde.51",
126     [AC_PDE_BIT13_BIT] = "pde.13",
127     [AC_PKU_AD_BIT] = "pkru.ad",
128     [AC_PKU_WD_BIT] = "pkru.wd",
129     [AC_PKU_PKEY_BIT] = "pkey=1",
130     [AC_ACCESS_WRITE_BIT] = "write",
131     [AC_ACCESS_USER_BIT] = "user",
132     [AC_ACCESS_FETCH_BIT] = "fetch",
133     [AC_ACCESS_TWICE_BIT] = "twice",
134     [AC_CPU_EFER_NX_BIT] = "efer.nx",
135     [AC_CPU_CR0_WP_BIT] = "cr0.wp",
136     [AC_CPU_CR4_SMEP_BIT] = "cr4.smep",
137     [AC_CPU_CR4_PKE_BIT] = "cr4.pke",
138 };
139 
140 static inline void *va(pt_element_t phys)
141 {
142     return (void *)phys;
143 }
144 
145 typedef struct {
146     pt_element_t pt_pool;
147     unsigned pt_pool_size;
148     unsigned pt_pool_current;
149 } ac_pool_t;
150 
151 typedef struct {
152     unsigned flags;
153     void *virt;
154     pt_element_t phys;
155     pt_element_t *ptep;
156     pt_element_t expected_pte;
157     pt_element_t *pdep;
158     pt_element_t expected_pde;
159     pt_element_t ignore_pde;
160     int expected_fault;
161     unsigned expected_error;
162 } ac_test_t;
163 
164 typedef struct {
165     unsigned short limit;
166     unsigned long linear_addr;
167 } __attribute__((packed)) descriptor_table_t;
168 
169 
170 static void ac_test_show(ac_test_t *at);
171 
172 int write_cr4_checking(unsigned long val)
173 {
174     asm volatile(ASM_TRY("1f")
175             "mov %0,%%cr4\n\t"
176             "1:": : "r" (val));
177     return exception_vector();
178 }
179 
180 void set_cr0_wp(int wp)
181 {
182     unsigned long cr0 = read_cr0();
183     unsigned long old_cr0 = cr0;
184 
185     cr0 &= ~CR0_WP_MASK;
186     if (wp)
187 	cr0 |= CR0_WP_MASK;
188     if (old_cr0 != cr0)
189         write_cr0(cr0);
190 }
191 
192 void set_cr4_smep(int smep)
193 {
194     unsigned long cr4 = read_cr4();
195     unsigned long old_cr4 = cr4;
196     extern u64 ptl2[];
197 
198     cr4 &= ~CR4_SMEP_MASK;
199     if (smep)
200 	cr4 |= CR4_SMEP_MASK;
201     if (old_cr4 == cr4)
202         return;
203 
204     if (smep)
205         ptl2[2] &= ~PT_USER_MASK;
206     write_cr4(cr4);
207     if (!smep)
208         ptl2[2] |= PT_USER_MASK;
209 }
210 
211 void set_cr4_pke(int pke)
212 {
213     unsigned long cr4 = read_cr4();
214     unsigned long old_cr4 = cr4;
215 
216     cr4 &= ~X86_CR4_PKE;
217     if (pke)
218 	cr4 |= X86_CR4_PKE;
219     if (old_cr4 == cr4)
220         return;
221 
222     /* Check that protection keys do not affect accesses when CR4.PKE=0.  */
223     if ((read_cr4() & X86_CR4_PKE) && !pke) {
224         write_pkru(0xfffffffc);
225     }
226     write_cr4(cr4);
227 }
228 
229 void set_efer_nx(int nx)
230 {
231     unsigned long long efer = rdmsr(MSR_EFER);
232     unsigned long long old_efer = efer;
233 
234     efer &= ~EFER_NX_MASK;
235     if (nx)
236 	efer |= EFER_NX_MASK;
237     if (old_efer != efer)
238         wrmsr(MSR_EFER, efer);
239 }
240 
241 static void ac_env_int(ac_pool_t *pool)
242 {
243     extern char page_fault, kernel_entry;
244     set_idt_entry(14, &page_fault, 0);
245     set_idt_entry(0x20, &kernel_entry, 3);
246 
247     pool->pt_pool = 33 * 1024 * 1024;
248     pool->pt_pool_size = 120 * 1024 * 1024 - pool->pt_pool;
249     pool->pt_pool_current = 0;
250 }
251 
252 void ac_test_init(ac_test_t *at, void *virt)
253 {
254     wrmsr(MSR_EFER, rdmsr(MSR_EFER) | EFER_NX_MASK);
255     set_cr0_wp(1);
256     at->flags = 0;
257     at->virt = virt;
258     at->phys = 32 * 1024 * 1024;
259 }
260 
261 int ac_test_bump_one(ac_test_t *at)
262 {
263     at->flags = ((at->flags | invalid_mask) + 1) & ~invalid_mask;
264     return at->flags < (1 << NR_AC_FLAGS);
265 }
266 
267 #define F(x)  ((flags & x##_MASK) != 0)
268 
269 _Bool ac_test_legal(ac_test_t *at)
270 {
271     int flags = at->flags;
272 
273     if (F(AC_ACCESS_FETCH) && F(AC_ACCESS_WRITE))
274 	return false;
275 
276     /*
277      * Since we convert current page to kernel page when cr4.smep=1,
278      * we can't switch to user mode.
279      */
280     if (F(AC_ACCESS_USER) && F(AC_CPU_CR4_SMEP))
281 	return false;
282 
283     /*
284      * Only test protection key faults if CR4.PKE=1.
285      */
286     if (!F(AC_CPU_CR4_PKE) &&
287         (F(AC_PKU_AD) || F(AC_PKU_WD))) {
288 	return false;
289     }
290 
291     /*
292      * pde.bit13 checks handling of reserved bits in largepage PDEs.  It is
293      * meaningless if there is a PTE.
294      */
295     if (!F(AC_PDE_PSE) && F(AC_PDE_BIT13))
296         return false;
297 
298     return true;
299 }
300 
301 int ac_test_bump(ac_test_t *at)
302 {
303     int ret;
304 
305     ret = ac_test_bump_one(at);
306     while (ret && !ac_test_legal(at))
307 	ret = ac_test_bump_one(at);
308     return ret;
309 }
310 
311 pt_element_t ac_test_alloc_pt(ac_pool_t *pool)
312 {
313     pt_element_t ret = pool->pt_pool + pool->pt_pool_current;
314     pool->pt_pool_current += PAGE_SIZE;
315     return ret;
316 }
317 
318 _Bool ac_test_enough_room(ac_pool_t *pool)
319 {
320     return pool->pt_pool_current + 4 * PAGE_SIZE <= pool->pt_pool_size;
321 }
322 
323 void ac_test_reset_pt_pool(ac_pool_t *pool)
324 {
325     pool->pt_pool_current = 0;
326 }
327 
328 pt_element_t ac_test_permissions(ac_test_t *at, unsigned flags, bool writable,
329                                  bool user, bool executable)
330 {
331     bool kwritable = !F(AC_CPU_CR0_WP) && !F(AC_ACCESS_USER);
332     pt_element_t expected = 0;
333 
334     if (F(AC_ACCESS_USER) && !user)
335 	at->expected_fault = 1;
336 
337     if (F(AC_ACCESS_WRITE) && !writable && !kwritable)
338 	at->expected_fault = 1;
339 
340     if (F(AC_ACCESS_FETCH) && !executable)
341 	at->expected_fault = 1;
342 
343     if (F(AC_ACCESS_FETCH) && user && F(AC_CPU_CR4_SMEP))
344         at->expected_fault = 1;
345 
346     if (user && !F(AC_ACCESS_FETCH) && F(AC_PKU_PKEY) && F(AC_CPU_CR4_PKE)) {
347         if (F(AC_PKU_AD)) {
348             at->expected_fault = 1;
349             at->expected_error |= PFERR_PK_MASK;
350         } else if (F(AC_ACCESS_WRITE) && F(AC_PKU_WD) && !kwritable) {
351             at->expected_fault = 1;
352             at->expected_error |= PFERR_PK_MASK;
353         }
354     }
355 
356     if (!at->expected_fault) {
357         expected |= PT_ACCESSED_MASK;
358         if (F(AC_ACCESS_WRITE))
359             expected |= PT_DIRTY_MASK;
360     }
361 
362     return expected;
363 }
364 
365 void ac_emulate_access(ac_test_t *at, unsigned flags)
366 {
367     bool pde_valid, pte_valid;
368     bool user, writable, executable;
369 
370     if (F(AC_ACCESS_USER))
371 	at->expected_error |= PFERR_USER_MASK;
372 
373     if (F(AC_ACCESS_WRITE))
374 	at->expected_error |= PFERR_WRITE_MASK;
375 
376     if (F(AC_ACCESS_FETCH))
377 	at->expected_error |= PFERR_FETCH_MASK;
378 
379     if (!F(AC_PDE_ACCESSED))
380         at->ignore_pde = PT_ACCESSED_MASK;
381 
382     pde_valid = F(AC_PDE_PRESENT)
383         && !F(AC_PDE_BIT51) && !F(AC_PDE_BIT13)
384         && !(F(AC_PDE_NX) && !F(AC_CPU_EFER_NX));
385 
386     if (!pde_valid) {
387         at->expected_fault = 1;
388 	if (F(AC_PDE_PRESENT)) {
389             at->expected_error |= PFERR_RESERVED_MASK;
390         } else {
391             at->expected_error &= ~PFERR_PRESENT_MASK;
392         }
393 	goto fault;
394     }
395 
396     writable = F(AC_PDE_WRITABLE);
397     user = F(AC_PDE_USER);
398     executable = !F(AC_PDE_NX);
399 
400     if (F(AC_PDE_PSE)) {
401         at->expected_pde |= ac_test_permissions(at, flags, writable, user,
402                                                 executable);
403 	goto no_pte;
404     }
405 
406     at->expected_pde |= PT_ACCESSED_MASK;
407 
408     pte_valid = F(AC_PTE_PRESENT)
409         && !F(AC_PTE_BIT51)
410         && !(F(AC_PTE_NX) && !F(AC_CPU_EFER_NX));
411 
412     if (!pte_valid) {
413         at->expected_fault = 1;
414 	if (F(AC_PTE_PRESENT)) {
415             at->expected_error |= PFERR_RESERVED_MASK;
416         } else {
417             at->expected_error &= ~PFERR_PRESENT_MASK;
418         }
419 	goto fault;
420     }
421 
422     writable &= F(AC_PTE_WRITABLE);
423     user &= F(AC_PTE_USER);
424     executable &= !F(AC_PTE_NX);
425 
426     at->expected_pte |= ac_test_permissions(at, flags, writable, user,
427                                             executable);
428 
429 no_pte:
430 fault:
431     if (!at->expected_fault)
432         at->ignore_pde = 0;
433     if (!F(AC_CPU_EFER_NX) && !F(AC_CPU_CR4_SMEP))
434         at->expected_error &= ~PFERR_FETCH_MASK;
435 }
436 
437 void ac_set_expected_status(ac_test_t *at)
438 {
439     invlpg(at->virt);
440 
441     if (at->ptep)
442 	at->expected_pte = *at->ptep;
443     at->expected_pde = *at->pdep;
444     at->ignore_pde = 0;
445     at->expected_fault = 0;
446     at->expected_error = PFERR_PRESENT_MASK;
447 
448     if (at->flags & AC_ACCESS_TWICE_MASK) {
449         ac_emulate_access(at, at->flags & ~AC_ACCESS_WRITE_MASK
450                           & ~AC_ACCESS_FETCH_MASK & ~AC_ACCESS_USER_MASK);
451         at->expected_fault = 0;
452 	at->expected_error = PFERR_PRESENT_MASK;
453         at->ignore_pde = 0;
454     }
455 
456     ac_emulate_access(at, at->flags);
457 }
458 
459 void __ac_setup_specific_pages(ac_test_t *at, ac_pool_t *pool, u64 pd_page,
460 			       u64 pt_page)
461 
462 {
463     unsigned long root = read_cr3();
464     int flags = at->flags;
465 
466     if (!ac_test_enough_room(pool))
467 	ac_test_reset_pt_pool(pool);
468 
469     at->ptep = 0;
470     for (int i = 4; i >= 1 && (i >= 2 || !F(AC_PDE_PSE)); --i) {
471 	pt_element_t *vroot = va(root & PT_BASE_ADDR_MASK);
472 	unsigned index = PT_INDEX((unsigned long)at->virt, i);
473 	pt_element_t pte = 0;
474 	switch (i) {
475 	case 4:
476 	case 3:
477 	    pte = pd_page ? pd_page : ac_test_alloc_pt(pool);
478 	    pte |= PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK;
479 	    break;
480 	case 2:
481 	    if (!F(AC_PDE_PSE)) {
482 		pte = pt_page ? pt_page : ac_test_alloc_pt(pool);
483 		/* The protection key is ignored on non-leaf entries.  */
484                 if (F(AC_PKU_PKEY))
485                     pte |= 2ull << 59;
486 	    } else {
487 		pte = at->phys & PT_PSE_BASE_ADDR_MASK;
488 		pte |= PT_PAGE_SIZE_MASK;
489                 if (F(AC_PKU_PKEY))
490                     pte |= 1ull << 59;
491 	    }
492 	    if (F(AC_PDE_PRESENT))
493 		pte |= PT_PRESENT_MASK;
494 	    if (F(AC_PDE_WRITABLE))
495 		pte |= PT_WRITABLE_MASK;
496 	    if (F(AC_PDE_USER))
497 		pte |= PT_USER_MASK;
498 	    if (F(AC_PDE_ACCESSED))
499 		pte |= PT_ACCESSED_MASK;
500 	    if (F(AC_PDE_DIRTY))
501 		pte |= PT_DIRTY_MASK;
502 	    if (F(AC_PDE_NX))
503 		pte |= PT64_NX_MASK;
504 	    if (F(AC_PDE_BIT51))
505 		pte |= 1ull << 51;
506 	    if (F(AC_PDE_BIT13))
507 		pte |= 1ull << 13;
508 	    at->pdep = &vroot[index];
509 	    break;
510 	case 1:
511 	    pte = at->phys & PT_BASE_ADDR_MASK;
512 	    if (F(AC_PKU_PKEY))
513 		pte |= 1ull << 59;
514 	    if (F(AC_PTE_PRESENT))
515 		pte |= PT_PRESENT_MASK;
516 	    if (F(AC_PTE_WRITABLE))
517 		pte |= PT_WRITABLE_MASK;
518 	    if (F(AC_PTE_USER))
519 		pte |= PT_USER_MASK;
520 	    if (F(AC_PTE_ACCESSED))
521 		pte |= PT_ACCESSED_MASK;
522 	    if (F(AC_PTE_DIRTY))
523 		pte |= PT_DIRTY_MASK;
524 	    if (F(AC_PTE_NX))
525 		pte |= PT64_NX_MASK;
526 	    if (F(AC_PTE_BIT51))
527 		pte |= 1ull << 51;
528 	    at->ptep = &vroot[index];
529 	    break;
530 	}
531 	vroot[index] = pte;
532 	root = vroot[index];
533     }
534     ac_set_expected_status(at);
535 }
536 
537 static void ac_test_setup_pte(ac_test_t *at, ac_pool_t *pool)
538 {
539 	__ac_setup_specific_pages(at, pool, 0, 0);
540 }
541 
542 static void ac_setup_specific_pages(ac_test_t *at, ac_pool_t *pool,
543 				    u64 pd_page, u64 pt_page)
544 {
545 	return __ac_setup_specific_pages(at, pool, pd_page, pt_page);
546 }
547 
548 static void dump_mapping(ac_test_t *at)
549 {
550 	unsigned long root = read_cr3();
551         int flags = at->flags;
552 	int i;
553 
554 	printf("Dump mapping: address: %p\n", at->virt);
555 	for (i = 4; i >= 1 && (i >= 2 || !F(AC_PDE_PSE)); --i) {
556 		pt_element_t *vroot = va(root & PT_BASE_ADDR_MASK);
557 		unsigned index = PT_INDEX((unsigned long)at->virt, i);
558 		pt_element_t pte = vroot[index];
559 
560 		printf("------L%d: %lx\n", i, pte);
561 		root = vroot[index];
562 	}
563 }
564 
565 static void ac_test_check(ac_test_t *at, _Bool *success_ret, _Bool cond,
566                           const char *fmt, ...)
567 {
568     va_list ap;
569     char buf[500];
570 
571     if (!*success_ret) {
572         return;
573     }
574 
575     if (!cond) {
576         return;
577     }
578 
579     *success_ret = false;
580 
581     if (!verbose) {
582         puts("\n");
583         ac_test_show(at);
584     }
585 
586     va_start(ap, fmt);
587     vsnprintf(buf, sizeof(buf), fmt, ap);
588     va_end(ap);
589     printf("FAIL: %s\n", buf);
590     dump_mapping(at);
591 }
592 
593 static int pt_match(pt_element_t pte1, pt_element_t pte2, pt_element_t ignore)
594 {
595     pte1 &= ~ignore;
596     pte2 &= ~ignore;
597     return pte1 == pte2;
598 }
599 
600 int ac_test_do_access(ac_test_t *at)
601 {
602     static unsigned unique = 42;
603     int fault = 0;
604     unsigned e;
605     static unsigned char user_stack[4096];
606     unsigned long rsp;
607     _Bool success = true;
608     int flags = at->flags;
609 
610     ++unique;
611     if (!(unique & 65535)) {
612         puts(".");
613     }
614 
615     *((unsigned char *)at->phys) = 0xc3; /* ret */
616 
617     unsigned r = unique;
618     set_cr0_wp(F(AC_CPU_CR0_WP));
619     set_efer_nx(F(AC_CPU_EFER_NX));
620     set_cr4_pke(F(AC_CPU_CR4_PKE));
621     if (F(AC_CPU_CR4_PKE)) {
622         /* WD2=AD2=1, WD1=F(AC_PKU_WD), AD1=F(AC_PKU_AD) */
623         write_pkru(0x30 | (F(AC_PKU_WD) ? 8 : 0) |
624                    (F(AC_PKU_AD) ? 4 : 0));
625     }
626 
627     set_cr4_smep(F(AC_CPU_CR4_SMEP));
628 
629     if (F(AC_ACCESS_TWICE)) {
630 	asm volatile (
631 	    "mov $fixed2, %%rsi \n\t"
632 	    "mov (%[addr]), %[reg] \n\t"
633 	    "fixed2:"
634 	    : [reg]"=r"(r), [fault]"=a"(fault), "=b"(e)
635 	    : [addr]"r"(at->virt)
636 	    : "rsi"
637 	    );
638 	fault = 0;
639     }
640 
641     asm volatile ("mov $fixed1, %%rsi \n\t"
642 		  "mov %%rsp, %%rdx \n\t"
643 		  "cmp $0, %[user] \n\t"
644 		  "jz do_access \n\t"
645 		  "push %%rax; mov %[user_ds], %%ax; mov %%ax, %%ds; pop %%rax  \n\t"
646 		  "pushq %[user_ds] \n\t"
647 		  "pushq %[user_stack_top] \n\t"
648 		  "pushfq \n\t"
649 		  "pushq %[user_cs] \n\t"
650 		  "pushq $do_access \n\t"
651 		  "iretq \n"
652 		  "do_access: \n\t"
653 		  "cmp $0, %[fetch] \n\t"
654 		  "jnz 2f \n\t"
655 		  "cmp $0, %[write] \n\t"
656 		  "jnz 1f \n\t"
657 		  "mov (%[addr]), %[reg] \n\t"
658 		  "jmp done \n\t"
659 		  "1: mov %[reg], (%[addr]) \n\t"
660 		  "jmp done \n\t"
661 		  "2: call *%[addr] \n\t"
662 		  "done: \n"
663 		  "fixed1: \n"
664 		  "int %[kernel_entry_vector] \n\t"
665 		  "back_to_kernel:"
666 		  : [reg]"+r"(r), "+a"(fault), "=b"(e), "=&d"(rsp)
667 		  : [addr]"r"(at->virt),
668 		    [write]"r"(F(AC_ACCESS_WRITE)),
669 		    [user]"r"(F(AC_ACCESS_USER)),
670 		    [fetch]"r"(F(AC_ACCESS_FETCH)),
671 		    [user_ds]"i"(USER_DS),
672 		    [user_cs]"i"(USER_CS),
673 		    [user_stack_top]"r"(user_stack + sizeof user_stack),
674 		    [kernel_entry_vector]"i"(0x20)
675 		  : "rsi");
676 
677     asm volatile (".section .text.pf \n\t"
678 		  "page_fault: \n\t"
679 		  "pop %rbx \n\t"
680 		  "mov %rsi, (%rsp) \n\t"
681 		  "movl $1, %eax \n\t"
682 		  "iretq \n\t"
683 		  ".section .text");
684 
685     asm volatile (".section .text.entry \n\t"
686 		  "kernel_entry: \n\t"
687 		  "mov %rdx, %rsp \n\t"
688 		  "jmp back_to_kernel \n\t"
689 		  ".section .text");
690 
691     ac_test_check(at, &success, fault && !at->expected_fault,
692                   "unexpected fault");
693     ac_test_check(at, &success, !fault && at->expected_fault,
694                   "unexpected access");
695     ac_test_check(at, &success, fault && e != at->expected_error,
696                   "error code %x expected %x", e, at->expected_error);
697     ac_test_check(at, &success, at->ptep && *at->ptep != at->expected_pte,
698                   "pte %x expected %x", *at->ptep, at->expected_pte);
699     ac_test_check(at, &success,
700                   !pt_match(*at->pdep, at->expected_pde, at->ignore_pde),
701                   "pde %x expected %x", *at->pdep, at->expected_pde);
702 
703     if (success && verbose) {
704 	if (at->expected_fault) {
705             printf("PASS (%x)\n", at->expected_error);
706 	} else {
707             printf("PASS\n");
708 	}
709     }
710     return success;
711 }
712 
713 static void ac_test_show(ac_test_t *at)
714 {
715     char line[5000];
716 
717     *line = 0;
718     strcat(line, "test");
719     for (int i = 0; i < NR_AC_FLAGS; ++i)
720 	if (at->flags & (1 << i)) {
721 	    strcat(line, " ");
722 	    strcat(line, ac_names[i]);
723 	}
724     strcat(line, ": ");
725     printf("%s", line);
726 }
727 
728 /*
729  * This test case is used to triger the bug which is fixed by
730  * commit e09e90a5 in the kvm tree
731  */
732 static int corrupt_hugepage_triger(ac_pool_t *pool)
733 {
734     ac_test_t at1, at2;
735 
736     ac_test_init(&at1, (void *)(0x123400000000));
737     ac_test_init(&at2, (void *)(0x666600000000));
738 
739     at2.flags = AC_CPU_CR0_WP_MASK | AC_PDE_PSE_MASK | AC_PDE_PRESENT_MASK;
740     ac_test_setup_pte(&at2, pool);
741     if (!ac_test_do_access(&at2))
742         goto err;
743 
744     at1.flags = at2.flags | AC_PDE_WRITABLE_MASK;
745     ac_test_setup_pte(&at1, pool);
746     if (!ac_test_do_access(&at1))
747         goto err;
748 
749     at1.flags |= AC_ACCESS_WRITE_MASK;
750     ac_set_expected_status(&at1);
751     if (!ac_test_do_access(&at1))
752         goto err;
753 
754     at2.flags |= AC_ACCESS_WRITE_MASK;
755     ac_set_expected_status(&at2);
756     if (!ac_test_do_access(&at2))
757         goto err;
758 
759     return 1;
760 
761 err:
762     printf("corrupt_hugepage_triger test fail\n");
763     return 0;
764 }
765 
766 /*
767  * This test case is used to triger the bug which is fixed by
768  * commit 3ddf6c06e13e in the kvm tree
769  */
770 static int check_pfec_on_prefetch_pte(ac_pool_t *pool)
771 {
772 	ac_test_t at1, at2;
773 
774 	ac_test_init(&at1, (void *)(0x123406001000));
775 	ac_test_init(&at2, (void *)(0x123406003000));
776 
777 	at1.flags = AC_PDE_PRESENT_MASK | AC_PTE_PRESENT_MASK;
778 	ac_setup_specific_pages(&at1, pool, 30 * 1024 * 1024, 30 * 1024 * 1024);
779 
780         at2.flags = at1.flags | AC_PTE_NX_MASK;
781 	ac_setup_specific_pages(&at2, pool, 30 * 1024 * 1024, 30 * 1024 * 1024);
782 
783 	if (!ac_test_do_access(&at1)) {
784 		printf("%s: prepare fail\n", __FUNCTION__);
785 		goto err;
786 	}
787 
788 	if (!ac_test_do_access(&at2)) {
789 		printf("%s: check PFEC on prefetch pte path fail\n",
790 			__FUNCTION__);
791 		goto err;
792 	}
793 
794 	return 1;
795 
796 err:
797     return 0;
798 }
799 
800 /*
801  * If the write-fault access is from supervisor and CR0.WP is not set on the
802  * vcpu, kvm will fix it by adjusting pte access - it sets the W bit on pte
803  * and clears U bit. This is the chance that kvm can change pte access from
804  * readonly to writable.
805  *
806  * Unfortunately, the pte access is the access of 'direct' shadow page table,
807  * means direct sp.role.access = pte_access, then we will create a writable
808  * spte entry on the readonly shadow page table. It will cause Dirty bit is
809  * not tracked when two guest ptes point to the same large page. Note, it
810  * does not have other impact except Dirty bit since cr0.wp is encoded into
811  * sp.role.
812  *
813  * Note: to trigger this bug, hugepage should be disabled on host.
814  */
815 static int check_large_pte_dirty_for_nowp(ac_pool_t *pool)
816 {
817 	ac_test_t at1, at2;
818 
819 	ac_test_init(&at1, (void *)(0x123403000000));
820 	ac_test_init(&at2, (void *)(0x666606000000));
821 
822         at2.flags = AC_PDE_PRESENT_MASK | AC_PDE_PSE_MASK;
823 	ac_test_setup_pte(&at2, pool);
824 	if (!ac_test_do_access(&at2)) {
825 		printf("%s: read on the first mapping fail.\n", __FUNCTION__);
826 		goto err;
827 	}
828 
829         at1.flags = at2.flags | AC_ACCESS_WRITE_MASK;
830 	ac_test_setup_pte(&at1, pool);
831 	if (!ac_test_do_access(&at1)) {
832 		printf("%s: write on the second mapping fail.\n", __FUNCTION__);
833 		goto err;
834 	}
835 
836 	at2.flags |= AC_ACCESS_WRITE_MASK;
837 	ac_set_expected_status(&at2);
838 	if (!ac_test_do_access(&at2)) {
839 		printf("%s: write on the first mapping fail.\n", __FUNCTION__);
840 		goto err;
841 	}
842 
843 	return 1;
844 
845 err:
846 	return 0;
847 }
848 
849 static int check_smep_andnot_wp(ac_pool_t *pool)
850 {
851 	ac_test_t at1;
852 	int err_prepare_andnot_wp, err_smep_andnot_wp;
853 
854 	if (!(cpuid_7_ebx & (1 << 7))) {
855 	    return 1;
856 	}
857 
858 	ac_test_init(&at1, (void *)(0x123406001000));
859 
860 	at1.flags = AC_PDE_PRESENT_MASK | AC_PTE_PRESENT_MASK |
861             AC_PDE_USER_MASK | AC_PTE_USER_MASK |
862             AC_PDE_ACCESSED_MASK | AC_PTE_ACCESSED_MASK |
863             AC_CPU_CR4_SMEP_MASK |
864             AC_CPU_CR0_WP_MASK |
865             AC_ACCESS_WRITE_MASK;
866 	ac_test_setup_pte(&at1, pool);
867 
868 	/*
869 	 * Here we write the ro user page when
870 	 * cr0.wp=0, then we execute it and SMEP
871 	 * fault should happen.
872 	 */
873 	err_prepare_andnot_wp = ac_test_do_access(&at1);
874 	if (!err_prepare_andnot_wp) {
875 		printf("%s: SMEP prepare fail\n", __FUNCTION__);
876 		goto clean_up;
877 	}
878 
879         at1.flags &= ~AC_ACCESS_WRITE_MASK;
880         at1.flags |= AC_ACCESS_FETCH_MASK;
881         ac_set_expected_status(&at1);
882         err_smep_andnot_wp = ac_test_do_access(&at1);
883 
884 clean_up:
885 	set_cr4_smep(0);
886 
887 	if (!err_prepare_andnot_wp)
888 		goto err;
889 	if (!err_smep_andnot_wp) {
890 		printf("%s: check SMEP without wp fail\n", __FUNCTION__);
891 		goto err;
892 	}
893 	return 1;
894 
895 err:
896 	return 0;
897 }
898 
899 int ac_test_exec(ac_test_t *at, ac_pool_t *pool)
900 {
901     int r;
902 
903     if (verbose) {
904         ac_test_show(at);
905     }
906     ac_test_setup_pte(at, pool);
907     r = ac_test_do_access(at);
908     return r;
909 }
910 
911 typedef int (*ac_test_fn)(ac_pool_t *pool);
912 const ac_test_fn ac_test_cases[] =
913 {
914 	corrupt_hugepage_triger,
915 	check_pfec_on_prefetch_pte,
916 	check_large_pte_dirty_for_nowp,
917 	check_smep_andnot_wp
918 };
919 
920 int ac_test_run(void)
921 {
922     ac_test_t at;
923     ac_pool_t pool;
924     int i, tests, successes;
925 
926     printf("run\n");
927     tests = successes = 0;
928 
929     if (cpuid_7_ecx & (1 << 3)) {
930         set_cr4_pke(1);
931         set_cr4_pke(0);
932         /* Now PKRU = 0xFFFFFFFF.  */
933     } else {
934 	unsigned long cr4 = read_cr4();
935 	tests++;
936 	if (write_cr4_checking(cr4 | X86_CR4_PKE) == GP_VECTOR) {
937             successes++;
938             invalid_mask |= AC_PKU_AD_MASK;
939             invalid_mask |= AC_PKU_WD_MASK;
940             invalid_mask |= AC_PKU_PKEY_MASK;
941             invalid_mask |= AC_CPU_CR4_PKE_MASK;
942             printf("CR4.PKE not available, disabling PKE tests\n");
943 	} else {
944             printf("Set PKE in CR4 - expect #GP: FAIL!\n");
945             set_cr4_pke(0);
946 	}
947     }
948 
949     if (!(cpuid_7_ebx & (1 << 7))) {
950 	unsigned long cr4 = read_cr4();
951 	tests++;
952 	if (write_cr4_checking(cr4 | CR4_SMEP_MASK) == GP_VECTOR) {
953             successes++;
954             invalid_mask |= AC_CPU_CR4_SMEP_MASK;
955             printf("CR4.SMEP not available, disabling SMEP tests\n");
956 	} else {
957             printf("Set SMEP in CR4 - expect #GP: FAIL!\n");
958             set_cr4_smep(0);
959 	}
960     }
961 
962     ac_env_int(&pool);
963     ac_test_init(&at, (void *)(0x123400000000 + 16 * smp_id()));
964     do {
965 	++tests;
966 	successes += ac_test_exec(&at, &pool);
967     } while (ac_test_bump(&at));
968 
969     for (i = 0; i < ARRAY_SIZE(ac_test_cases); i++) {
970 	++tests;
971 	successes += ac_test_cases[i](&pool);
972     }
973 
974     printf("\n%d tests, %d failures\n", tests, tests - successes);
975 
976     return successes == tests;
977 }
978 
979 int main()
980 {
981     int r;
982 
983     setup_idt();
984 
985     cpuid_7_ebx = cpuid(7).b;
986     cpuid_7_ecx = cpuid(7).c;
987 
988     printf("starting test\n\n");
989     r = ac_test_run();
990     return r ? 0 : 1;
991 }
992