xref: /kvm-unit-tests/x86/access.c (revision be704aff683c54fc108deaafacc7cb89ad0648d9)
1 
2 #include "libcflat.h"
3 #include "desc.h"
4 #include "processor.h"
5 #include "asm/page.h"
6 #include "x86/vm.h"
7 
8 #define smp_id() 0
9 
10 #define true 1
11 #define false 0
12 
13 static _Bool verbose = false;
14 
15 typedef unsigned long pt_element_t;
16 static int invalid_mask;
17 static int page_table_levels;
18 
19 #define PT_BASE_ADDR_MASK ((pt_element_t)((((pt_element_t)1 << 36) - 1) & PAGE_MASK))
20 #define PT_PSE_BASE_ADDR_MASK (PT_BASE_ADDR_MASK & ~(1ull << 21))
21 
22 #define CR0_WP_MASK (1UL << 16)
23 #define CR4_SMEP_MASK (1UL << 20)
24 
25 #define PFERR_PRESENT_MASK (1U << 0)
26 #define PFERR_WRITE_MASK (1U << 1)
27 #define PFERR_USER_MASK (1U << 2)
28 #define PFERR_RESERVED_MASK (1U << 3)
29 #define PFERR_FETCH_MASK (1U << 4)
30 #define PFERR_PK_MASK (1U << 5)
31 
32 #define MSR_EFER 0xc0000080
33 #define EFER_NX_MASK		(1ull << 11)
34 
35 #define PT_INDEX(address, level)       \
36        ((address) >> (12 + ((level)-1) * 9)) & 511
37 
38 /*
39  * page table access check tests
40  */
41 
42 enum {
43     AC_PTE_PRESENT_BIT,
44     AC_PTE_WRITABLE_BIT,
45     AC_PTE_USER_BIT,
46     AC_PTE_ACCESSED_BIT,
47     AC_PTE_DIRTY_BIT,
48     AC_PTE_NX_BIT,
49     AC_PTE_BIT51_BIT,
50     AC_PTE_BIT36_BIT,
51 
52     AC_PDE_PRESENT_BIT,
53     AC_PDE_WRITABLE_BIT,
54     AC_PDE_USER_BIT,
55     AC_PDE_ACCESSED_BIT,
56     AC_PDE_DIRTY_BIT,
57     AC_PDE_PSE_BIT,
58     AC_PDE_NX_BIT,
59     AC_PDE_BIT51_BIT,
60     AC_PDE_BIT36_BIT,
61     AC_PDE_BIT13_BIT,
62 
63     AC_PKU_AD_BIT,
64     AC_PKU_WD_BIT,
65     AC_PKU_PKEY_BIT,
66 
67     AC_ACCESS_USER_BIT,
68     AC_ACCESS_WRITE_BIT,
69     AC_ACCESS_FETCH_BIT,
70     AC_ACCESS_TWICE_BIT,
71 
72     AC_CPU_EFER_NX_BIT,
73     AC_CPU_CR0_WP_BIT,
74     AC_CPU_CR4_SMEP_BIT,
75     AC_CPU_CR4_PKE_BIT,
76 
77     NR_AC_FLAGS
78 };
79 
80 #define AC_PTE_PRESENT_MASK   (1 << AC_PTE_PRESENT_BIT)
81 #define AC_PTE_WRITABLE_MASK  (1 << AC_PTE_WRITABLE_BIT)
82 #define AC_PTE_USER_MASK      (1 << AC_PTE_USER_BIT)
83 #define AC_PTE_ACCESSED_MASK  (1 << AC_PTE_ACCESSED_BIT)
84 #define AC_PTE_DIRTY_MASK     (1 << AC_PTE_DIRTY_BIT)
85 #define AC_PTE_NX_MASK        (1 << AC_PTE_NX_BIT)
86 #define AC_PTE_BIT51_MASK     (1 << AC_PTE_BIT51_BIT)
87 #define AC_PTE_BIT36_MASK     (1 << AC_PTE_BIT36_BIT)
88 
89 #define AC_PDE_PRESENT_MASK   (1 << AC_PDE_PRESENT_BIT)
90 #define AC_PDE_WRITABLE_MASK  (1 << AC_PDE_WRITABLE_BIT)
91 #define AC_PDE_USER_MASK      (1 << AC_PDE_USER_BIT)
92 #define AC_PDE_ACCESSED_MASK  (1 << AC_PDE_ACCESSED_BIT)
93 #define AC_PDE_DIRTY_MASK     (1 << AC_PDE_DIRTY_BIT)
94 #define AC_PDE_PSE_MASK       (1 << AC_PDE_PSE_BIT)
95 #define AC_PDE_NX_MASK        (1 << AC_PDE_NX_BIT)
96 #define AC_PDE_BIT51_MASK     (1 << AC_PDE_BIT51_BIT)
97 #define AC_PDE_BIT36_MASK     (1 << AC_PDE_BIT36_BIT)
98 #define AC_PDE_BIT13_MASK     (1 << AC_PDE_BIT13_BIT)
99 
100 #define AC_PKU_AD_MASK        (1 << AC_PKU_AD_BIT)
101 #define AC_PKU_WD_MASK        (1 << AC_PKU_WD_BIT)
102 #define AC_PKU_PKEY_MASK      (1 << AC_PKU_PKEY_BIT)
103 
104 #define AC_ACCESS_USER_MASK   (1 << AC_ACCESS_USER_BIT)
105 #define AC_ACCESS_WRITE_MASK  (1 << AC_ACCESS_WRITE_BIT)
106 #define AC_ACCESS_FETCH_MASK  (1 << AC_ACCESS_FETCH_BIT)
107 #define AC_ACCESS_TWICE_MASK  (1 << AC_ACCESS_TWICE_BIT)
108 
109 #define AC_CPU_EFER_NX_MASK   (1 << AC_CPU_EFER_NX_BIT)
110 #define AC_CPU_CR0_WP_MASK    (1 << AC_CPU_CR0_WP_BIT)
111 #define AC_CPU_CR4_SMEP_MASK  (1 << AC_CPU_CR4_SMEP_BIT)
112 #define AC_CPU_CR4_PKE_MASK   (1 << AC_CPU_CR4_PKE_BIT)
113 
114 const char *ac_names[] = {
115     [AC_PTE_PRESENT_BIT] = "pte.p",
116     [AC_PTE_ACCESSED_BIT] = "pte.a",
117     [AC_PTE_WRITABLE_BIT] = "pte.rw",
118     [AC_PTE_USER_BIT] = "pte.user",
119     [AC_PTE_DIRTY_BIT] = "pte.d",
120     [AC_PTE_NX_BIT] = "pte.nx",
121     [AC_PTE_BIT51_BIT] = "pte.51",
122     [AC_PTE_BIT36_BIT] = "pte.36",
123     [AC_PDE_PRESENT_BIT] = "pde.p",
124     [AC_PDE_ACCESSED_BIT] = "pde.a",
125     [AC_PDE_WRITABLE_BIT] = "pde.rw",
126     [AC_PDE_USER_BIT] = "pde.user",
127     [AC_PDE_DIRTY_BIT] = "pde.d",
128     [AC_PDE_PSE_BIT] = "pde.pse",
129     [AC_PDE_NX_BIT] = "pde.nx",
130     [AC_PDE_BIT51_BIT] = "pde.51",
131     [AC_PDE_BIT36_BIT] = "pde.36",
132     [AC_PDE_BIT13_BIT] = "pde.13",
133     [AC_PKU_AD_BIT] = "pkru.ad",
134     [AC_PKU_WD_BIT] = "pkru.wd",
135     [AC_PKU_PKEY_BIT] = "pkey=1",
136     [AC_ACCESS_WRITE_BIT] = "write",
137     [AC_ACCESS_USER_BIT] = "user",
138     [AC_ACCESS_FETCH_BIT] = "fetch",
139     [AC_ACCESS_TWICE_BIT] = "twice",
140     [AC_CPU_EFER_NX_BIT] = "efer.nx",
141     [AC_CPU_CR0_WP_BIT] = "cr0.wp",
142     [AC_CPU_CR4_SMEP_BIT] = "cr4.smep",
143     [AC_CPU_CR4_PKE_BIT] = "cr4.pke",
144 };
145 
146 static inline void *va(pt_element_t phys)
147 {
148     return (void *)phys;
149 }
150 
151 typedef struct {
152     pt_element_t pt_pool;
153     unsigned pt_pool_size;
154     unsigned pt_pool_current;
155 } ac_pool_t;
156 
157 typedef struct {
158     unsigned flags;
159     void *virt;
160     pt_element_t phys;
161     pt_element_t *ptep;
162     pt_element_t expected_pte;
163     pt_element_t *pdep;
164     pt_element_t expected_pde;
165     pt_element_t ignore_pde;
166     int expected_fault;
167     unsigned expected_error;
168 } ac_test_t;
169 
170 typedef struct {
171     unsigned short limit;
172     unsigned long linear_addr;
173 } __attribute__((packed)) descriptor_table_t;
174 
175 
176 static void ac_test_show(ac_test_t *at);
177 
178 static unsigned long shadow_cr0;
179 static unsigned long shadow_cr4;
180 static unsigned long long shadow_efer;
181 
182 static void set_cr0_wp(int wp)
183 {
184     unsigned long cr0 = shadow_cr0;
185 
186     cr0 &= ~CR0_WP_MASK;
187     if (wp)
188 	cr0 |= CR0_WP_MASK;
189     if (cr0 != shadow_cr0) {
190         write_cr0(cr0);
191         shadow_cr0 = cr0;
192     }
193 }
194 
195 static unsigned set_cr4_smep(int smep)
196 {
197     unsigned long cr4 = shadow_cr4;
198     extern u64 ptl2[];
199     unsigned r;
200 
201     cr4 &= ~CR4_SMEP_MASK;
202     if (smep)
203 	cr4 |= CR4_SMEP_MASK;
204     if (cr4 == shadow_cr4)
205         return 0;
206 
207     if (smep)
208         ptl2[2] &= ~PT_USER_MASK;
209     r = write_cr4_checking(cr4);
210     if (r || !smep)
211         ptl2[2] |= PT_USER_MASK;
212     if (!r)
213         shadow_cr4 = cr4;
214     return r;
215 }
216 
217 static void set_cr4_pke(int pke)
218 {
219     unsigned long cr4 = shadow_cr4;
220 
221     cr4 &= ~X86_CR4_PKE;
222     if (pke)
223 	cr4 |= X86_CR4_PKE;
224     if (cr4 == shadow_cr4)
225         return;
226 
227     /* Check that protection keys do not affect accesses when CR4.PKE=0.  */
228     if ((shadow_cr4 & X86_CR4_PKE) && !pke)
229         write_pkru(0xfffffffc);
230     write_cr4(cr4);
231     shadow_cr4 = cr4;
232 }
233 
234 static void set_efer_nx(int nx)
235 {
236     unsigned long long efer = shadow_efer;
237 
238     efer &= ~EFER_NX_MASK;
239     if (nx)
240 	efer |= EFER_NX_MASK;
241     if (efer != shadow_efer) {
242         wrmsr(MSR_EFER, efer);
243         shadow_efer = efer;
244     }
245 }
246 
247 static void ac_env_int(ac_pool_t *pool)
248 {
249     extern char page_fault, kernel_entry;
250     set_idt_entry(14, &page_fault, 0);
251     set_idt_entry(0x20, &kernel_entry, 3);
252 
253     pool->pt_pool = 33 * 1024 * 1024;
254     pool->pt_pool_size = 120 * 1024 * 1024 - pool->pt_pool;
255     pool->pt_pool_current = 0;
256 }
257 
258 static void ac_test_init(ac_test_t *at, void *virt)
259 {
260     set_efer_nx(1);
261     set_cr0_wp(1);
262     at->flags = 0;
263     at->virt = virt;
264     at->phys = 32 * 1024 * 1024;
265 }
266 
267 static int ac_test_bump_one(ac_test_t *at)
268 {
269     at->flags = ((at->flags | invalid_mask) + 1) & ~invalid_mask;
270     return at->flags < (1 << NR_AC_FLAGS);
271 }
272 
273 #define F(x)  ((flags & x##_MASK) != 0)
274 
275 static _Bool ac_test_legal(ac_test_t *at)
276 {
277     int flags = at->flags;
278 
279     if (F(AC_ACCESS_FETCH) && F(AC_ACCESS_WRITE))
280 	return false;
281 
282     /*
283      * Since we convert current page to kernel page when cr4.smep=1,
284      * we can't switch to user mode.
285      */
286     if (F(AC_ACCESS_USER) && F(AC_CPU_CR4_SMEP))
287 	return false;
288 
289     /*
290      * Only test protection key faults if CR4.PKE=1.
291      */
292     if (!F(AC_CPU_CR4_PKE) &&
293         (F(AC_PKU_AD) || F(AC_PKU_WD))) {
294 	return false;
295     }
296 
297     /*
298      * pde.bit13 checks handling of reserved bits in largepage PDEs.  It is
299      * meaningless if there is a PTE.
300      */
301     if (!F(AC_PDE_PSE) && F(AC_PDE_BIT13))
302         return false;
303 
304     /*
305      * Shorten the test by avoiding testing too many reserved bit combinations
306      */
307     if ((F(AC_PDE_BIT51) + F(AC_PDE_BIT36) + F(AC_PDE_BIT13)) > 1)
308         return false;
309     if ((F(AC_PTE_BIT51) + F(AC_PTE_BIT36)) > 1)
310         return false;
311 
312     return true;
313 }
314 
315 static int ac_test_bump(ac_test_t *at)
316 {
317     int ret;
318 
319     ret = ac_test_bump_one(at);
320     while (ret && !ac_test_legal(at))
321 	ret = ac_test_bump_one(at);
322     return ret;
323 }
324 
325 static pt_element_t ac_test_alloc_pt(ac_pool_t *pool)
326 {
327     pt_element_t ret = pool->pt_pool + pool->pt_pool_current;
328     pool->pt_pool_current += PAGE_SIZE;
329     return ret;
330 }
331 
332 static _Bool ac_test_enough_room(ac_pool_t *pool)
333 {
334     return pool->pt_pool_current + 5 * PAGE_SIZE <= pool->pt_pool_size;
335 }
336 
337 static void ac_test_reset_pt_pool(ac_pool_t *pool)
338 {
339     pool->pt_pool_current = 0;
340 }
341 
342 static pt_element_t ac_test_permissions(ac_test_t *at, unsigned flags,
343                                         bool writable, bool user,
344                                         bool executable)
345 {
346     bool kwritable = !F(AC_CPU_CR0_WP) && !F(AC_ACCESS_USER);
347     pt_element_t expected = 0;
348 
349     if (F(AC_ACCESS_USER) && !user)
350 	at->expected_fault = 1;
351 
352     if (F(AC_ACCESS_WRITE) && !writable && !kwritable)
353 	at->expected_fault = 1;
354 
355     if (F(AC_ACCESS_FETCH) && !executable)
356 	at->expected_fault = 1;
357 
358     if (F(AC_ACCESS_FETCH) && user && F(AC_CPU_CR4_SMEP))
359         at->expected_fault = 1;
360 
361     if (user && !F(AC_ACCESS_FETCH) && F(AC_PKU_PKEY) && F(AC_CPU_CR4_PKE)) {
362         if (F(AC_PKU_AD)) {
363             at->expected_fault = 1;
364             at->expected_error |= PFERR_PK_MASK;
365         } else if (F(AC_ACCESS_WRITE) && F(AC_PKU_WD) && !kwritable) {
366             at->expected_fault = 1;
367             at->expected_error |= PFERR_PK_MASK;
368         }
369     }
370 
371     if (!at->expected_fault) {
372         expected |= PT_ACCESSED_MASK;
373         if (F(AC_ACCESS_WRITE))
374             expected |= PT_DIRTY_MASK;
375     }
376 
377     return expected;
378 }
379 
380 static void ac_emulate_access(ac_test_t *at, unsigned flags)
381 {
382     bool pde_valid, pte_valid;
383     bool user, writable, executable;
384 
385     if (F(AC_ACCESS_USER))
386 	at->expected_error |= PFERR_USER_MASK;
387 
388     if (F(AC_ACCESS_WRITE))
389 	at->expected_error |= PFERR_WRITE_MASK;
390 
391     if (F(AC_ACCESS_FETCH))
392 	at->expected_error |= PFERR_FETCH_MASK;
393 
394     if (!F(AC_PDE_ACCESSED))
395         at->ignore_pde = PT_ACCESSED_MASK;
396 
397     pde_valid = F(AC_PDE_PRESENT)
398         && !F(AC_PDE_BIT51) && !F(AC_PDE_BIT36) && !F(AC_PDE_BIT13)
399         && !(F(AC_PDE_NX) && !F(AC_CPU_EFER_NX));
400 
401     if (!pde_valid) {
402         at->expected_fault = 1;
403 	if (F(AC_PDE_PRESENT)) {
404             at->expected_error |= PFERR_RESERVED_MASK;
405         } else {
406             at->expected_error &= ~PFERR_PRESENT_MASK;
407         }
408 	goto fault;
409     }
410 
411     writable = F(AC_PDE_WRITABLE);
412     user = F(AC_PDE_USER);
413     executable = !F(AC_PDE_NX);
414 
415     if (F(AC_PDE_PSE)) {
416         at->expected_pde |= ac_test_permissions(at, flags, writable, user,
417                                                 executable);
418 	goto no_pte;
419     }
420 
421     at->expected_pde |= PT_ACCESSED_MASK;
422 
423     pte_valid = F(AC_PTE_PRESENT)
424         && !F(AC_PTE_BIT51) && !F(AC_PTE_BIT36)
425         && !(F(AC_PTE_NX) && !F(AC_CPU_EFER_NX));
426 
427     if (!pte_valid) {
428         at->expected_fault = 1;
429 	if (F(AC_PTE_PRESENT)) {
430             at->expected_error |= PFERR_RESERVED_MASK;
431         } else {
432             at->expected_error &= ~PFERR_PRESENT_MASK;
433         }
434 	goto fault;
435     }
436 
437     writable &= F(AC_PTE_WRITABLE);
438     user &= F(AC_PTE_USER);
439     executable &= !F(AC_PTE_NX);
440 
441     at->expected_pte |= ac_test_permissions(at, flags, writable, user,
442                                             executable);
443 
444 no_pte:
445 fault:
446     if (!at->expected_fault)
447         at->ignore_pde = 0;
448     if (!F(AC_CPU_EFER_NX) && !F(AC_CPU_CR4_SMEP))
449         at->expected_error &= ~PFERR_FETCH_MASK;
450 }
451 
452 static void ac_set_expected_status(ac_test_t *at)
453 {
454     invlpg(at->virt);
455 
456     if (at->ptep)
457 	at->expected_pte = *at->ptep;
458     at->expected_pde = *at->pdep;
459     at->ignore_pde = 0;
460     at->expected_fault = 0;
461     at->expected_error = PFERR_PRESENT_MASK;
462 
463     if (at->flags & AC_ACCESS_TWICE_MASK) {
464         ac_emulate_access(at, at->flags & ~AC_ACCESS_WRITE_MASK
465                           & ~AC_ACCESS_FETCH_MASK & ~AC_ACCESS_USER_MASK);
466         at->expected_fault = 0;
467 	at->expected_error = PFERR_PRESENT_MASK;
468         at->ignore_pde = 0;
469     }
470 
471     ac_emulate_access(at, at->flags);
472 }
473 
474 static void __ac_setup_specific_pages(ac_test_t *at, ac_pool_t *pool,
475 				      u64 pd_page, u64 pt_page)
476 
477 {
478     unsigned long root = read_cr3();
479     int flags = at->flags;
480     bool skip = true;
481 
482     if (!ac_test_enough_room(pool))
483 	ac_test_reset_pt_pool(pool);
484 
485     at->ptep = 0;
486     for (int i = page_table_levels; i >= 1 && (i >= 2 || !F(AC_PDE_PSE)); --i) {
487 	pt_element_t *vroot = va(root & PT_BASE_ADDR_MASK);
488 	unsigned index = PT_INDEX((unsigned long)at->virt, i);
489 	pt_element_t pte = 0;
490 
491 	/*
492 	 * Reuse existing page tables along the path to the test code and data
493 	 * (which is in the bottom 2MB).
494 	 */
495 	if (skip && i >= 2 && index == 0) {
496 	    goto next;
497 	}
498 	skip = false;
499 
500 	switch (i) {
501 	case 5:
502 	case 4:
503 	case 3:
504 	    pte = pd_page ? pd_page : ac_test_alloc_pt(pool);
505 	    pte |= PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK;
506 	    break;
507 	case 2:
508 	    if (!F(AC_PDE_PSE)) {
509 		pte = pt_page ? pt_page : ac_test_alloc_pt(pool);
510 		/* The protection key is ignored on non-leaf entries.  */
511                 if (F(AC_PKU_PKEY))
512                     pte |= 2ull << 59;
513 	    } else {
514 		pte = at->phys & PT_PSE_BASE_ADDR_MASK;
515 		pte |= PT_PAGE_SIZE_MASK;
516                 if (F(AC_PKU_PKEY))
517                     pte |= 1ull << 59;
518 	    }
519 	    if (F(AC_PDE_PRESENT))
520 		pte |= PT_PRESENT_MASK;
521 	    if (F(AC_PDE_WRITABLE))
522 		pte |= PT_WRITABLE_MASK;
523 	    if (F(AC_PDE_USER))
524 		pte |= PT_USER_MASK;
525 	    if (F(AC_PDE_ACCESSED))
526 		pte |= PT_ACCESSED_MASK;
527 	    if (F(AC_PDE_DIRTY))
528 		pte |= PT_DIRTY_MASK;
529 	    if (F(AC_PDE_NX))
530 		pte |= PT64_NX_MASK;
531 	    if (F(AC_PDE_BIT51))
532 		pte |= 1ull << 51;
533 	    if (F(AC_PDE_BIT36))
534                 pte |= 1ull << 36;
535 	    if (F(AC_PDE_BIT13))
536 		pte |= 1ull << 13;
537 	    at->pdep = &vroot[index];
538 	    break;
539 	case 1:
540 	    pte = at->phys & PT_BASE_ADDR_MASK;
541 	    if (F(AC_PKU_PKEY))
542 		pte |= 1ull << 59;
543 	    if (F(AC_PTE_PRESENT))
544 		pte |= PT_PRESENT_MASK;
545 	    if (F(AC_PTE_WRITABLE))
546 		pte |= PT_WRITABLE_MASK;
547 	    if (F(AC_PTE_USER))
548 		pte |= PT_USER_MASK;
549 	    if (F(AC_PTE_ACCESSED))
550 		pte |= PT_ACCESSED_MASK;
551 	    if (F(AC_PTE_DIRTY))
552 		pte |= PT_DIRTY_MASK;
553 	    if (F(AC_PTE_NX))
554 		pte |= PT64_NX_MASK;
555 	    if (F(AC_PTE_BIT51))
556 		pte |= 1ull << 51;
557 	    if (F(AC_PTE_BIT36))
558                 pte |= 1ull << 36;
559 	    at->ptep = &vroot[index];
560 	    break;
561 	}
562 	vroot[index] = pte;
563  next:
564 	root = vroot[index];
565     }
566     ac_set_expected_status(at);
567 }
568 
569 static void ac_test_setup_pte(ac_test_t *at, ac_pool_t *pool)
570 {
571 	__ac_setup_specific_pages(at, pool, 0, 0);
572 }
573 
574 static void ac_setup_specific_pages(ac_test_t *at, ac_pool_t *pool,
575 				    u64 pd_page, u64 pt_page)
576 {
577 	return __ac_setup_specific_pages(at, pool, pd_page, pt_page);
578 }
579 
580 static void dump_mapping(ac_test_t *at)
581 {
582 	unsigned long root = read_cr3();
583         int flags = at->flags;
584 	int i;
585 
586 	printf("Dump mapping: address: %p\n", at->virt);
587 	for (i = page_table_levels ; i >= 1 && (i >= 2 || !F(AC_PDE_PSE)); --i) {
588 		pt_element_t *vroot = va(root & PT_BASE_ADDR_MASK);
589 		unsigned index = PT_INDEX((unsigned long)at->virt, i);
590 		pt_element_t pte = vroot[index];
591 
592 		printf("------L%d: %lx\n", i, pte);
593 		root = vroot[index];
594 	}
595 }
596 
597 static void ac_test_check(ac_test_t *at, _Bool *success_ret, _Bool cond,
598                           const char *fmt, ...)
599 {
600     va_list ap;
601     char buf[500];
602 
603     if (!*success_ret) {
604         return;
605     }
606 
607     if (!cond) {
608         return;
609     }
610 
611     *success_ret = false;
612 
613     if (!verbose) {
614         puts("\n");
615         ac_test_show(at);
616     }
617 
618     va_start(ap, fmt);
619     vsnprintf(buf, sizeof(buf), fmt, ap);
620     va_end(ap);
621     printf("FAIL: %s\n", buf);
622     dump_mapping(at);
623 }
624 
625 static int pt_match(pt_element_t pte1, pt_element_t pte2, pt_element_t ignore)
626 {
627     pte1 &= ~ignore;
628     pte2 &= ~ignore;
629     return pte1 == pte2;
630 }
631 
632 static int ac_test_do_access(ac_test_t *at)
633 {
634     static unsigned unique = 42;
635     int fault = 0;
636     unsigned e;
637     static unsigned char user_stack[4096];
638     unsigned long rsp;
639     _Bool success = true;
640     int flags = at->flags;
641 
642     ++unique;
643     if (!(unique & 65535)) {
644         puts(".");
645     }
646 
647     *((unsigned char *)at->phys) = 0xc3; /* ret */
648 
649     unsigned r = unique;
650     set_cr0_wp(F(AC_CPU_CR0_WP));
651     set_efer_nx(F(AC_CPU_EFER_NX));
652     set_cr4_pke(F(AC_CPU_CR4_PKE));
653     if (F(AC_CPU_CR4_PKE)) {
654         /* WD2=AD2=1, WD1=F(AC_PKU_WD), AD1=F(AC_PKU_AD) */
655         write_pkru(0x30 | (F(AC_PKU_WD) ? 8 : 0) |
656                    (F(AC_PKU_AD) ? 4 : 0));
657     }
658 
659     set_cr4_smep(F(AC_CPU_CR4_SMEP));
660 
661     if (F(AC_ACCESS_TWICE)) {
662 	asm volatile (
663 	    "mov $fixed2, %%rsi \n\t"
664 	    "mov (%[addr]), %[reg] \n\t"
665 	    "fixed2:"
666 	    : [reg]"=r"(r), [fault]"=a"(fault), "=b"(e)
667 	    : [addr]"r"(at->virt)
668 	    : "rsi"
669 	    );
670 	fault = 0;
671     }
672 
673     asm volatile ("mov $fixed1, %%rsi \n\t"
674 		  "mov %%rsp, %%rdx \n\t"
675 		  "cmp $0, %[user] \n\t"
676 		  "jz do_access \n\t"
677 		  "push %%rax; mov %[user_ds], %%ax; mov %%ax, %%ds; pop %%rax  \n\t"
678 		  "pushq %[user_ds] \n\t"
679 		  "pushq %[user_stack_top] \n\t"
680 		  "pushfq \n\t"
681 		  "pushq %[user_cs] \n\t"
682 		  "pushq $do_access \n\t"
683 		  "iretq \n"
684 		  "do_access: \n\t"
685 		  "cmp $0, %[fetch] \n\t"
686 		  "jnz 2f \n\t"
687 		  "cmp $0, %[write] \n\t"
688 		  "jnz 1f \n\t"
689 		  "mov (%[addr]), %[reg] \n\t"
690 		  "jmp done \n\t"
691 		  "1: mov %[reg], (%[addr]) \n\t"
692 		  "jmp done \n\t"
693 		  "2: call *%[addr] \n\t"
694 		  "done: \n"
695 		  "fixed1: \n"
696 		  "int %[kernel_entry_vector] \n\t"
697 		  "back_to_kernel:"
698 		  : [reg]"+r"(r), "+a"(fault), "=b"(e), "=&d"(rsp)
699 		  : [addr]"r"(at->virt),
700 		    [write]"r"(F(AC_ACCESS_WRITE)),
701 		    [user]"r"(F(AC_ACCESS_USER)),
702 		    [fetch]"r"(F(AC_ACCESS_FETCH)),
703 		    [user_ds]"i"(USER_DS),
704 		    [user_cs]"i"(USER_CS),
705 		    [user_stack_top]"r"(user_stack + sizeof user_stack),
706 		    [kernel_entry_vector]"i"(0x20)
707 		  : "rsi");
708 
709     asm volatile (".section .text.pf \n\t"
710 		  "page_fault: \n\t"
711 		  "pop %rbx \n\t"
712 		  "mov %rsi, (%rsp) \n\t"
713 		  "movl $1, %eax \n\t"
714 		  "iretq \n\t"
715 		  ".section .text");
716 
717     asm volatile (".section .text.entry \n\t"
718 		  "kernel_entry: \n\t"
719 		  "mov %rdx, %rsp \n\t"
720 		  "jmp back_to_kernel \n\t"
721 		  ".section .text");
722 
723     ac_test_check(at, &success, fault && !at->expected_fault,
724                   "unexpected fault");
725     ac_test_check(at, &success, !fault && at->expected_fault,
726                   "unexpected access");
727     ac_test_check(at, &success, fault && e != at->expected_error,
728                   "error code %x expected %x", e, at->expected_error);
729     if (at->ptep)
730         ac_test_check(at, &success, *at->ptep != at->expected_pte,
731                       "pte %x expected %x", *at->ptep, at->expected_pte);
732     ac_test_check(at, &success,
733                   !pt_match(*at->pdep, at->expected_pde, at->ignore_pde),
734                   "pde %x expected %x", *at->pdep, at->expected_pde);
735 
736     if (success && verbose) {
737 	if (at->expected_fault) {
738             printf("PASS (%x)\n", at->expected_error);
739 	} else {
740             printf("PASS\n");
741 	}
742     }
743     return success;
744 }
745 
746 static void ac_test_show(ac_test_t *at)
747 {
748     char line[5000];
749 
750     *line = 0;
751     strcat(line, "test");
752     for (int i = 0; i < NR_AC_FLAGS; ++i)
753 	if (at->flags & (1 << i)) {
754 	    strcat(line, " ");
755 	    strcat(line, ac_names[i]);
756 	}
757 
758     strcat(line, ": ");
759     printf("%s", line);
760 }
761 
762 /*
763  * This test case is used to triger the bug which is fixed by
764  * commit e09e90a5 in the kvm tree
765  */
766 static int corrupt_hugepage_triger(ac_pool_t *pool)
767 {
768     ac_test_t at1, at2;
769 
770     ac_test_init(&at1, (void *)(0x123400000000));
771     ac_test_init(&at2, (void *)(0x666600000000));
772 
773     at2.flags = AC_CPU_CR0_WP_MASK | AC_PDE_PSE_MASK | AC_PDE_PRESENT_MASK;
774     ac_test_setup_pte(&at2, pool);
775     if (!ac_test_do_access(&at2))
776         goto err;
777 
778     at1.flags = at2.flags | AC_PDE_WRITABLE_MASK;
779     ac_test_setup_pte(&at1, pool);
780     if (!ac_test_do_access(&at1))
781         goto err;
782 
783     at1.flags |= AC_ACCESS_WRITE_MASK;
784     ac_set_expected_status(&at1);
785     if (!ac_test_do_access(&at1))
786         goto err;
787 
788     at2.flags |= AC_ACCESS_WRITE_MASK;
789     ac_set_expected_status(&at2);
790     if (!ac_test_do_access(&at2))
791         goto err;
792 
793     return 1;
794 
795 err:
796     printf("corrupt_hugepage_triger test fail\n");
797     return 0;
798 }
799 
800 /*
801  * This test case is used to triger the bug which is fixed by
802  * commit 3ddf6c06e13e in the kvm tree
803  */
804 static int check_pfec_on_prefetch_pte(ac_pool_t *pool)
805 {
806 	ac_test_t at1, at2;
807 
808 	ac_test_init(&at1, (void *)(0x123406001000));
809 	ac_test_init(&at2, (void *)(0x123406003000));
810 
811 	at1.flags = AC_PDE_PRESENT_MASK | AC_PTE_PRESENT_MASK;
812 	ac_setup_specific_pages(&at1, pool, 30 * 1024 * 1024, 30 * 1024 * 1024);
813 
814         at2.flags = at1.flags | AC_PTE_NX_MASK;
815 	ac_setup_specific_pages(&at2, pool, 30 * 1024 * 1024, 30 * 1024 * 1024);
816 
817 	if (!ac_test_do_access(&at1)) {
818 		printf("%s: prepare fail\n", __FUNCTION__);
819 		goto err;
820 	}
821 
822 	if (!ac_test_do_access(&at2)) {
823 		printf("%s: check PFEC on prefetch pte path fail\n",
824 			__FUNCTION__);
825 		goto err;
826 	}
827 
828 	return 1;
829 
830 err:
831     return 0;
832 }
833 
834 /*
835  * If the write-fault access is from supervisor and CR0.WP is not set on the
836  * vcpu, kvm will fix it by adjusting pte access - it sets the W bit on pte
837  * and clears U bit. This is the chance that kvm can change pte access from
838  * readonly to writable.
839  *
840  * Unfortunately, the pte access is the access of 'direct' shadow page table,
841  * means direct sp.role.access = pte_access, then we will create a writable
842  * spte entry on the readonly shadow page table. It will cause Dirty bit is
843  * not tracked when two guest ptes point to the same large page. Note, it
844  * does not have other impact except Dirty bit since cr0.wp is encoded into
845  * sp.role.
846  *
847  * Note: to trigger this bug, hugepage should be disabled on host.
848  */
849 static int check_large_pte_dirty_for_nowp(ac_pool_t *pool)
850 {
851 	ac_test_t at1, at2;
852 
853 	ac_test_init(&at1, (void *)(0x123403000000));
854 	ac_test_init(&at2, (void *)(0x666606000000));
855 
856         at2.flags = AC_PDE_PRESENT_MASK | AC_PDE_PSE_MASK;
857 	ac_test_setup_pte(&at2, pool);
858 	if (!ac_test_do_access(&at2)) {
859 		printf("%s: read on the first mapping fail.\n", __FUNCTION__);
860 		goto err;
861 	}
862 
863         at1.flags = at2.flags | AC_ACCESS_WRITE_MASK;
864 	ac_test_setup_pte(&at1, pool);
865 	if (!ac_test_do_access(&at1)) {
866 		printf("%s: write on the second mapping fail.\n", __FUNCTION__);
867 		goto err;
868 	}
869 
870 	at2.flags |= AC_ACCESS_WRITE_MASK;
871 	ac_set_expected_status(&at2);
872 	if (!ac_test_do_access(&at2)) {
873 		printf("%s: write on the first mapping fail.\n", __FUNCTION__);
874 		goto err;
875 	}
876 
877 	return 1;
878 
879 err:
880 	return 0;
881 }
882 
883 static int check_smep_andnot_wp(ac_pool_t *pool)
884 {
885 	ac_test_t at1;
886 	int err_prepare_andnot_wp, err_smep_andnot_wp;
887 
888 	if (!this_cpu_has(X86_FEATURE_SMEP)) {
889 	    return 1;
890 	}
891 
892 	ac_test_init(&at1, (void *)(0x123406001000));
893 
894 	at1.flags = AC_PDE_PRESENT_MASK | AC_PTE_PRESENT_MASK |
895             AC_PDE_USER_MASK | AC_PTE_USER_MASK |
896             AC_PDE_ACCESSED_MASK | AC_PTE_ACCESSED_MASK |
897             AC_CPU_CR4_SMEP_MASK |
898             AC_CPU_CR0_WP_MASK |
899             AC_ACCESS_WRITE_MASK;
900 	ac_test_setup_pte(&at1, pool);
901 
902 	/*
903 	 * Here we write the ro user page when
904 	 * cr0.wp=0, then we execute it and SMEP
905 	 * fault should happen.
906 	 */
907 	err_prepare_andnot_wp = ac_test_do_access(&at1);
908 	if (!err_prepare_andnot_wp) {
909 		printf("%s: SMEP prepare fail\n", __FUNCTION__);
910 		goto clean_up;
911 	}
912 
913         at1.flags &= ~AC_ACCESS_WRITE_MASK;
914         at1.flags |= AC_ACCESS_FETCH_MASK;
915         ac_set_expected_status(&at1);
916         err_smep_andnot_wp = ac_test_do_access(&at1);
917 
918 clean_up:
919 	set_cr4_smep(0);
920 
921 	if (!err_prepare_andnot_wp)
922 		goto err;
923 	if (!err_smep_andnot_wp) {
924 		printf("%s: check SMEP without wp fail\n", __FUNCTION__);
925 		goto err;
926 	}
927 	return 1;
928 
929 err:
930 	return 0;
931 }
932 
933 static int ac_test_exec(ac_test_t *at, ac_pool_t *pool)
934 {
935     int r;
936 
937     if (verbose) {
938         ac_test_show(at);
939     }
940     ac_test_setup_pte(at, pool);
941     r = ac_test_do_access(at);
942     return r;
943 }
944 
945 typedef int (*ac_test_fn)(ac_pool_t *pool);
946 const ac_test_fn ac_test_cases[] =
947 {
948 	corrupt_hugepage_triger,
949 	check_pfec_on_prefetch_pte,
950 	check_large_pte_dirty_for_nowp,
951 	check_smep_andnot_wp
952 };
953 
954 static int ac_test_run(void)
955 {
956     ac_test_t at;
957     ac_pool_t pool;
958     int i, tests, successes;
959 
960     printf("run\n");
961     tests = successes = 0;
962 
963     shadow_cr0 = read_cr0();
964     shadow_cr4 = read_cr4();
965     shadow_efer = rdmsr(MSR_EFER);
966 
967     if (cpuid_maxphyaddr() >= 52) {
968         invalid_mask |= AC_PDE_BIT51_MASK;
969         invalid_mask |= AC_PTE_BIT51_MASK;
970     }
971     if (cpuid_maxphyaddr() >= 37) {
972         invalid_mask |= AC_PDE_BIT36_MASK;
973         invalid_mask |= AC_PTE_BIT36_MASK;
974     }
975 
976     if (this_cpu_has(X86_FEATURE_PKU)) {
977         set_cr4_pke(1);
978         set_cr4_pke(0);
979         /* Now PKRU = 0xFFFFFFFF.  */
980     } else {
981 	tests++;
982 	if (write_cr4_checking(shadow_cr4 | X86_CR4_PKE) == GP_VECTOR) {
983             successes++;
984             invalid_mask |= AC_PKU_AD_MASK;
985             invalid_mask |= AC_PKU_WD_MASK;
986             invalid_mask |= AC_PKU_PKEY_MASK;
987             invalid_mask |= AC_CPU_CR4_PKE_MASK;
988             printf("CR4.PKE not available, disabling PKE tests\n");
989 	} else {
990             printf("Set PKE in CR4 - expect #GP: FAIL!\n");
991             set_cr4_pke(0);
992 	}
993     }
994 
995     if (!this_cpu_has(X86_FEATURE_SMEP)) {
996 	tests++;
997 	if (set_cr4_smep(1) == GP_VECTOR) {
998             successes++;
999             invalid_mask |= AC_CPU_CR4_SMEP_MASK;
1000             printf("CR4.SMEP not available, disabling SMEP tests\n");
1001 	} else {
1002             printf("Set SMEP in CR4 - expect #GP: FAIL!\n");
1003             set_cr4_smep(0);
1004 	}
1005     }
1006 
1007     /* Toggling LA57 in 64-bit mode (guaranteed for this test) is illegal. */
1008     if (this_cpu_has(X86_FEATURE_LA57)) {
1009         tests++;
1010         if (write_cr4_checking(shadow_cr4 ^ X86_CR4_LA57) == GP_VECTOR)
1011             successes++;
1012 
1013         /* Force a VM-Exit on KVM, which doesn't intercept LA57 itself. */
1014         tests++;
1015         if (write_cr4_checking(shadow_cr4 ^ (X86_CR4_LA57 | X86_CR4_PSE)) == GP_VECTOR)
1016             successes++;
1017     }
1018 
1019     ac_env_int(&pool);
1020     ac_test_init(&at, (void *)(0x123400000000 + 16 * smp_id()));
1021     do {
1022 	++tests;
1023 	successes += ac_test_exec(&at, &pool);
1024     } while (ac_test_bump(&at));
1025 
1026     for (i = 0; i < ARRAY_SIZE(ac_test_cases); i++) {
1027 	++tests;
1028 	successes += ac_test_cases[i](&pool);
1029     }
1030 
1031     printf("\n%d tests, %d failures\n", tests, tests - successes);
1032 
1033     return successes == tests;
1034 }
1035 
1036 int main(void)
1037 {
1038     int r;
1039 
1040     printf("starting test\n\n");
1041     page_table_levels = 4;
1042     r = ac_test_run();
1043 
1044     if (this_cpu_has(X86_FEATURE_LA57)) {
1045         page_table_levels = 5;
1046         printf("starting 5-level paging test.\n\n");
1047         setup_5level_page_table();
1048         r = ac_test_run();
1049     }
1050 
1051     return r ? 0 : 1;
1052 }
1053