xref: /kvm-unit-tests/x86/access.c (revision 33a6576c6e1bfd98fd35402c9f64dfd7537dd2ff)
1 
2 #include "libcflat.h"
3 #include "desc.h"
4 #include "processor.h"
5 #include "asm/page.h"
6 #include "x86/vm.h"
7 
8 #define smp_id() 0
9 
10 #define true 1
11 #define false 0
12 
13 static _Bool verbose = false;
14 
15 typedef unsigned long pt_element_t;
16 static int invalid_mask;
17 static int page_table_levels;
18 
19 #define PT_BASE_ADDR_MASK ((pt_element_t)((((pt_element_t)1 << 40) - 1) & PAGE_MASK))
20 #define PT_PSE_BASE_ADDR_MASK (PT_BASE_ADDR_MASK & ~(1ull << 21))
21 
22 #define CR0_WP_MASK (1UL << 16)
23 #define CR4_SMEP_MASK (1UL << 20)
24 
25 #define PFERR_PRESENT_MASK (1U << 0)
26 #define PFERR_WRITE_MASK (1U << 1)
27 #define PFERR_USER_MASK (1U << 2)
28 #define PFERR_RESERVED_MASK (1U << 3)
29 #define PFERR_FETCH_MASK (1U << 4)
30 #define PFERR_PK_MASK (1U << 5)
31 
32 #define MSR_EFER 0xc0000080
33 #define EFER_NX_MASK		(1ull << 11)
34 
35 #define PT_INDEX(address, level)       \
36        ((address) >> (12 + ((level)-1) * 9)) & 511
37 
38 /*
39  * page table access check tests
40  */
41 
42 enum {
43     AC_PTE_PRESENT_BIT,
44     AC_PTE_WRITABLE_BIT,
45     AC_PTE_USER_BIT,
46     AC_PTE_ACCESSED_BIT,
47     AC_PTE_DIRTY_BIT,
48     AC_PTE_NX_BIT,
49     AC_PTE_BIT51_BIT,
50 
51     AC_PDE_PRESENT_BIT,
52     AC_PDE_WRITABLE_BIT,
53     AC_PDE_USER_BIT,
54     AC_PDE_ACCESSED_BIT,
55     AC_PDE_DIRTY_BIT,
56     AC_PDE_PSE_BIT,
57     AC_PDE_NX_BIT,
58     AC_PDE_BIT51_BIT,
59     AC_PDE_BIT13_BIT,
60 
61     AC_PKU_AD_BIT,
62     AC_PKU_WD_BIT,
63     AC_PKU_PKEY_BIT,
64 
65     AC_ACCESS_USER_BIT,
66     AC_ACCESS_WRITE_BIT,
67     AC_ACCESS_FETCH_BIT,
68     AC_ACCESS_TWICE_BIT,
69 
70     AC_CPU_EFER_NX_BIT,
71     AC_CPU_CR0_WP_BIT,
72     AC_CPU_CR4_SMEP_BIT,
73     AC_CPU_CR4_PKE_BIT,
74 
75     NR_AC_FLAGS
76 };
77 
78 #define AC_PTE_PRESENT_MASK   (1 << AC_PTE_PRESENT_BIT)
79 #define AC_PTE_WRITABLE_MASK  (1 << AC_PTE_WRITABLE_BIT)
80 #define AC_PTE_USER_MASK      (1 << AC_PTE_USER_BIT)
81 #define AC_PTE_ACCESSED_MASK  (1 << AC_PTE_ACCESSED_BIT)
82 #define AC_PTE_DIRTY_MASK     (1 << AC_PTE_DIRTY_BIT)
83 #define AC_PTE_NX_MASK        (1 << AC_PTE_NX_BIT)
84 #define AC_PTE_BIT51_MASK     (1 << AC_PTE_BIT51_BIT)
85 
86 #define AC_PDE_PRESENT_MASK   (1 << AC_PDE_PRESENT_BIT)
87 #define AC_PDE_WRITABLE_MASK  (1 << AC_PDE_WRITABLE_BIT)
88 #define AC_PDE_USER_MASK      (1 << AC_PDE_USER_BIT)
89 #define AC_PDE_ACCESSED_MASK  (1 << AC_PDE_ACCESSED_BIT)
90 #define AC_PDE_DIRTY_MASK     (1 << AC_PDE_DIRTY_BIT)
91 #define AC_PDE_PSE_MASK       (1 << AC_PDE_PSE_BIT)
92 #define AC_PDE_NX_MASK        (1 << AC_PDE_NX_BIT)
93 #define AC_PDE_BIT51_MASK     (1 << AC_PDE_BIT51_BIT)
94 #define AC_PDE_BIT13_MASK     (1 << AC_PDE_BIT13_BIT)
95 
96 #define AC_PKU_AD_MASK        (1 << AC_PKU_AD_BIT)
97 #define AC_PKU_WD_MASK        (1 << AC_PKU_WD_BIT)
98 #define AC_PKU_PKEY_MASK      (1 << AC_PKU_PKEY_BIT)
99 
100 #define AC_ACCESS_USER_MASK   (1 << AC_ACCESS_USER_BIT)
101 #define AC_ACCESS_WRITE_MASK  (1 << AC_ACCESS_WRITE_BIT)
102 #define AC_ACCESS_FETCH_MASK  (1 << AC_ACCESS_FETCH_BIT)
103 #define AC_ACCESS_TWICE_MASK  (1 << AC_ACCESS_TWICE_BIT)
104 
105 #define AC_CPU_EFER_NX_MASK   (1 << AC_CPU_EFER_NX_BIT)
106 #define AC_CPU_CR0_WP_MASK    (1 << AC_CPU_CR0_WP_BIT)
107 #define AC_CPU_CR4_SMEP_MASK  (1 << AC_CPU_CR4_SMEP_BIT)
108 #define AC_CPU_CR4_PKE_MASK   (1 << AC_CPU_CR4_PKE_BIT)
109 
110 const char *ac_names[] = {
111     [AC_PTE_PRESENT_BIT] = "pte.p",
112     [AC_PTE_ACCESSED_BIT] = "pte.a",
113     [AC_PTE_WRITABLE_BIT] = "pte.rw",
114     [AC_PTE_USER_BIT] = "pte.user",
115     [AC_PTE_DIRTY_BIT] = "pte.d",
116     [AC_PTE_NX_BIT] = "pte.nx",
117     [AC_PTE_BIT51_BIT] = "pte.51",
118     [AC_PDE_PRESENT_BIT] = "pde.p",
119     [AC_PDE_ACCESSED_BIT] = "pde.a",
120     [AC_PDE_WRITABLE_BIT] = "pde.rw",
121     [AC_PDE_USER_BIT] = "pde.user",
122     [AC_PDE_DIRTY_BIT] = "pde.d",
123     [AC_PDE_PSE_BIT] = "pde.pse",
124     [AC_PDE_NX_BIT] = "pde.nx",
125     [AC_PDE_BIT51_BIT] = "pde.51",
126     [AC_PDE_BIT13_BIT] = "pde.13",
127     [AC_PKU_AD_BIT] = "pkru.ad",
128     [AC_PKU_WD_BIT] = "pkru.wd",
129     [AC_PKU_PKEY_BIT] = "pkey=1",
130     [AC_ACCESS_WRITE_BIT] = "write",
131     [AC_ACCESS_USER_BIT] = "user",
132     [AC_ACCESS_FETCH_BIT] = "fetch",
133     [AC_ACCESS_TWICE_BIT] = "twice",
134     [AC_CPU_EFER_NX_BIT] = "efer.nx",
135     [AC_CPU_CR0_WP_BIT] = "cr0.wp",
136     [AC_CPU_CR4_SMEP_BIT] = "cr4.smep",
137     [AC_CPU_CR4_PKE_BIT] = "cr4.pke",
138 };
139 
140 static inline void *va(pt_element_t phys)
141 {
142     return (void *)phys;
143 }
144 
145 typedef struct {
146     pt_element_t pt_pool;
147     unsigned pt_pool_size;
148     unsigned pt_pool_current;
149 } ac_pool_t;
150 
151 typedef struct {
152     unsigned flags;
153     void *virt;
154     pt_element_t phys;
155     pt_element_t *ptep;
156     pt_element_t expected_pte;
157     pt_element_t *pdep;
158     pt_element_t expected_pde;
159     pt_element_t ignore_pde;
160     int expected_fault;
161     unsigned expected_error;
162 } ac_test_t;
163 
164 typedef struct {
165     unsigned short limit;
166     unsigned long linear_addr;
167 } __attribute__((packed)) descriptor_table_t;
168 
169 
170 static void ac_test_show(ac_test_t *at);
171 
172 static unsigned long shadow_cr0;
173 static unsigned long shadow_cr4;
174 static unsigned long long shadow_efer;
175 
176 static void set_cr0_wp(int wp)
177 {
178     unsigned long cr0 = shadow_cr0;
179 
180     cr0 &= ~CR0_WP_MASK;
181     if (wp)
182 	cr0 |= CR0_WP_MASK;
183     if (cr0 != shadow_cr0) {
184         write_cr0(cr0);
185         shadow_cr0 = cr0;
186     }
187 }
188 
189 static unsigned set_cr4_smep(int smep)
190 {
191     unsigned long cr4 = shadow_cr4;
192     extern u64 ptl2[];
193     unsigned r;
194 
195     cr4 &= ~CR4_SMEP_MASK;
196     if (smep)
197 	cr4 |= CR4_SMEP_MASK;
198     if (cr4 == shadow_cr4)
199         return 0;
200 
201     if (smep)
202         ptl2[2] &= ~PT_USER_MASK;
203     r = write_cr4_checking(cr4);
204     if (r || !smep)
205         ptl2[2] |= PT_USER_MASK;
206     if (!r)
207         shadow_cr4 = cr4;
208     return r;
209 }
210 
211 static void set_cr4_pke(int pke)
212 {
213     unsigned long cr4 = shadow_cr4;
214 
215     cr4 &= ~X86_CR4_PKE;
216     if (pke)
217 	cr4 |= X86_CR4_PKE;
218     if (cr4 == shadow_cr4)
219         return;
220 
221     /* Check that protection keys do not affect accesses when CR4.PKE=0.  */
222     if ((shadow_cr4 & X86_CR4_PKE) && !pke)
223         write_pkru(0xfffffffc);
224     write_cr4(cr4);
225     shadow_cr4 = cr4;
226 }
227 
228 static void set_efer_nx(int nx)
229 {
230     unsigned long long efer = shadow_efer;
231 
232     efer &= ~EFER_NX_MASK;
233     if (nx)
234 	efer |= EFER_NX_MASK;
235     if (efer != shadow_efer) {
236         wrmsr(MSR_EFER, efer);
237         shadow_efer = efer;
238     }
239 }
240 
241 static void ac_env_int(ac_pool_t *pool)
242 {
243     extern char page_fault, kernel_entry;
244     set_idt_entry(14, &page_fault, 0);
245     set_idt_entry(0x20, &kernel_entry, 3);
246 
247     pool->pt_pool = 33 * 1024 * 1024;
248     pool->pt_pool_size = 120 * 1024 * 1024 - pool->pt_pool;
249     pool->pt_pool_current = 0;
250 }
251 
252 static void ac_test_init(ac_test_t *at, void *virt)
253 {
254     set_efer_nx(1);
255     set_cr0_wp(1);
256     at->flags = 0;
257     at->virt = virt;
258     at->phys = 32 * 1024 * 1024;
259 }
260 
261 static int ac_test_bump_one(ac_test_t *at)
262 {
263     at->flags = ((at->flags | invalid_mask) + 1) & ~invalid_mask;
264     return at->flags < (1 << NR_AC_FLAGS);
265 }
266 
267 #define F(x)  ((flags & x##_MASK) != 0)
268 
269 static _Bool ac_test_legal(ac_test_t *at)
270 {
271     int flags = at->flags;
272 
273     if (F(AC_ACCESS_FETCH) && F(AC_ACCESS_WRITE))
274 	return false;
275 
276     /*
277      * Since we convert current page to kernel page when cr4.smep=1,
278      * we can't switch to user mode.
279      */
280     if (F(AC_ACCESS_USER) && F(AC_CPU_CR4_SMEP))
281 	return false;
282 
283     /*
284      * Only test protection key faults if CR4.PKE=1.
285      */
286     if (!F(AC_CPU_CR4_PKE) &&
287         (F(AC_PKU_AD) || F(AC_PKU_WD))) {
288 	return false;
289     }
290 
291     /*
292      * pde.bit13 checks handling of reserved bits in largepage PDEs.  It is
293      * meaningless if there is a PTE.
294      */
295     if (!F(AC_PDE_PSE) && F(AC_PDE_BIT13))
296         return false;
297 
298     return true;
299 }
300 
301 static int ac_test_bump(ac_test_t *at)
302 {
303     int ret;
304 
305     ret = ac_test_bump_one(at);
306     while (ret && !ac_test_legal(at))
307 	ret = ac_test_bump_one(at);
308     return ret;
309 }
310 
311 static pt_element_t ac_test_alloc_pt(ac_pool_t *pool)
312 {
313     pt_element_t ret = pool->pt_pool + pool->pt_pool_current;
314     pool->pt_pool_current += PAGE_SIZE;
315     return ret;
316 }
317 
318 static _Bool ac_test_enough_room(ac_pool_t *pool)
319 {
320     return pool->pt_pool_current + 5 * PAGE_SIZE <= pool->pt_pool_size;
321 }
322 
323 static void ac_test_reset_pt_pool(ac_pool_t *pool)
324 {
325     pool->pt_pool_current = 0;
326 }
327 
328 static pt_element_t ac_test_permissions(ac_test_t *at, unsigned flags,
329                                         bool writable, bool user,
330                                         bool executable)
331 {
332     bool kwritable = !F(AC_CPU_CR0_WP) && !F(AC_ACCESS_USER);
333     pt_element_t expected = 0;
334 
335     if (F(AC_ACCESS_USER) && !user)
336 	at->expected_fault = 1;
337 
338     if (F(AC_ACCESS_WRITE) && !writable && !kwritable)
339 	at->expected_fault = 1;
340 
341     if (F(AC_ACCESS_FETCH) && !executable)
342 	at->expected_fault = 1;
343 
344     if (F(AC_ACCESS_FETCH) && user && F(AC_CPU_CR4_SMEP))
345         at->expected_fault = 1;
346 
347     if (user && !F(AC_ACCESS_FETCH) && F(AC_PKU_PKEY) && F(AC_CPU_CR4_PKE)) {
348         if (F(AC_PKU_AD)) {
349             at->expected_fault = 1;
350             at->expected_error |= PFERR_PK_MASK;
351         } else if (F(AC_ACCESS_WRITE) && F(AC_PKU_WD) && !kwritable) {
352             at->expected_fault = 1;
353             at->expected_error |= PFERR_PK_MASK;
354         }
355     }
356 
357     if (!at->expected_fault) {
358         expected |= PT_ACCESSED_MASK;
359         if (F(AC_ACCESS_WRITE))
360             expected |= PT_DIRTY_MASK;
361     }
362 
363     return expected;
364 }
365 
366 static void ac_emulate_access(ac_test_t *at, unsigned flags)
367 {
368     bool pde_valid, pte_valid;
369     bool user, writable, executable;
370 
371     if (F(AC_ACCESS_USER))
372 	at->expected_error |= PFERR_USER_MASK;
373 
374     if (F(AC_ACCESS_WRITE))
375 	at->expected_error |= PFERR_WRITE_MASK;
376 
377     if (F(AC_ACCESS_FETCH))
378 	at->expected_error |= PFERR_FETCH_MASK;
379 
380     if (!F(AC_PDE_ACCESSED))
381         at->ignore_pde = PT_ACCESSED_MASK;
382 
383     pde_valid = F(AC_PDE_PRESENT)
384         && !F(AC_PDE_BIT51) && !F(AC_PDE_BIT13)
385         && !(F(AC_PDE_NX) && !F(AC_CPU_EFER_NX));
386 
387     if (!pde_valid) {
388         at->expected_fault = 1;
389 	if (F(AC_PDE_PRESENT)) {
390             at->expected_error |= PFERR_RESERVED_MASK;
391         } else {
392             at->expected_error &= ~PFERR_PRESENT_MASK;
393         }
394 	goto fault;
395     }
396 
397     writable = F(AC_PDE_WRITABLE);
398     user = F(AC_PDE_USER);
399     executable = !F(AC_PDE_NX);
400 
401     if (F(AC_PDE_PSE)) {
402         at->expected_pde |= ac_test_permissions(at, flags, writable, user,
403                                                 executable);
404 	goto no_pte;
405     }
406 
407     at->expected_pde |= PT_ACCESSED_MASK;
408 
409     pte_valid = F(AC_PTE_PRESENT)
410         && !F(AC_PTE_BIT51)
411         && !(F(AC_PTE_NX) && !F(AC_CPU_EFER_NX));
412 
413     if (!pte_valid) {
414         at->expected_fault = 1;
415 	if (F(AC_PTE_PRESENT)) {
416             at->expected_error |= PFERR_RESERVED_MASK;
417         } else {
418             at->expected_error &= ~PFERR_PRESENT_MASK;
419         }
420 	goto fault;
421     }
422 
423     writable &= F(AC_PTE_WRITABLE);
424     user &= F(AC_PTE_USER);
425     executable &= !F(AC_PTE_NX);
426 
427     at->expected_pte |= ac_test_permissions(at, flags, writable, user,
428                                             executable);
429 
430 no_pte:
431 fault:
432     if (!at->expected_fault)
433         at->ignore_pde = 0;
434     if (!F(AC_CPU_EFER_NX) && !F(AC_CPU_CR4_SMEP))
435         at->expected_error &= ~PFERR_FETCH_MASK;
436 }
437 
438 static void ac_set_expected_status(ac_test_t *at)
439 {
440     invlpg(at->virt);
441 
442     if (at->ptep)
443 	at->expected_pte = *at->ptep;
444     at->expected_pde = *at->pdep;
445     at->ignore_pde = 0;
446     at->expected_fault = 0;
447     at->expected_error = PFERR_PRESENT_MASK;
448 
449     if (at->flags & AC_ACCESS_TWICE_MASK) {
450         ac_emulate_access(at, at->flags & ~AC_ACCESS_WRITE_MASK
451                           & ~AC_ACCESS_FETCH_MASK & ~AC_ACCESS_USER_MASK);
452         at->expected_fault = 0;
453 	at->expected_error = PFERR_PRESENT_MASK;
454         at->ignore_pde = 0;
455     }
456 
457     ac_emulate_access(at, at->flags);
458 }
459 
460 static void __ac_setup_specific_pages(ac_test_t *at, ac_pool_t *pool,
461 				      u64 pd_page, u64 pt_page)
462 
463 {
464     unsigned long root = read_cr3();
465     int flags = at->flags;
466     bool skip = true;
467 
468     if (!ac_test_enough_room(pool))
469 	ac_test_reset_pt_pool(pool);
470 
471     at->ptep = 0;
472     for (int i = page_table_levels; i >= 1 && (i >= 2 || !F(AC_PDE_PSE)); --i) {
473 	pt_element_t *vroot = va(root & PT_BASE_ADDR_MASK);
474 	unsigned index = PT_INDEX((unsigned long)at->virt, i);
475 	pt_element_t pte = 0;
476 
477 	/*
478 	 * Reuse existing page tables along the path to the test code and data
479 	 * (which is in the bottom 2MB).
480 	 */
481 	if (skip && i >= 2 && index == 0) {
482 	    goto next;
483 	}
484 	skip = false;
485 
486 	switch (i) {
487 	case 5:
488 	case 4:
489 	case 3:
490 	    pte = pd_page ? pd_page : ac_test_alloc_pt(pool);
491 	    pte |= PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK;
492 	    break;
493 	case 2:
494 	    if (!F(AC_PDE_PSE)) {
495 		pte = pt_page ? pt_page : ac_test_alloc_pt(pool);
496 		/* The protection key is ignored on non-leaf entries.  */
497                 if (F(AC_PKU_PKEY))
498                     pte |= 2ull << 59;
499 	    } else {
500 		pte = at->phys & PT_PSE_BASE_ADDR_MASK;
501 		pte |= PT_PAGE_SIZE_MASK;
502                 if (F(AC_PKU_PKEY))
503                     pte |= 1ull << 59;
504 	    }
505 	    if (F(AC_PDE_PRESENT))
506 		pte |= PT_PRESENT_MASK;
507 	    if (F(AC_PDE_WRITABLE))
508 		pte |= PT_WRITABLE_MASK;
509 	    if (F(AC_PDE_USER))
510 		pte |= PT_USER_MASK;
511 	    if (F(AC_PDE_ACCESSED))
512 		pte |= PT_ACCESSED_MASK;
513 	    if (F(AC_PDE_DIRTY))
514 		pte |= PT_DIRTY_MASK;
515 	    if (F(AC_PDE_NX))
516 		pte |= PT64_NX_MASK;
517 	    if (F(AC_PDE_BIT51))
518 		pte |= 1ull << 51;
519 	    if (F(AC_PDE_BIT13))
520 		pte |= 1ull << 13;
521 	    at->pdep = &vroot[index];
522 	    break;
523 	case 1:
524 	    pte = at->phys & PT_BASE_ADDR_MASK;
525 	    if (F(AC_PKU_PKEY))
526 		pte |= 1ull << 59;
527 	    if (F(AC_PTE_PRESENT))
528 		pte |= PT_PRESENT_MASK;
529 	    if (F(AC_PTE_WRITABLE))
530 		pte |= PT_WRITABLE_MASK;
531 	    if (F(AC_PTE_USER))
532 		pte |= PT_USER_MASK;
533 	    if (F(AC_PTE_ACCESSED))
534 		pte |= PT_ACCESSED_MASK;
535 	    if (F(AC_PTE_DIRTY))
536 		pte |= PT_DIRTY_MASK;
537 	    if (F(AC_PTE_NX))
538 		pte |= PT64_NX_MASK;
539 	    if (F(AC_PTE_BIT51))
540 		pte |= 1ull << 51;
541 	    at->ptep = &vroot[index];
542 	    break;
543 	}
544 	vroot[index] = pte;
545  next:
546 	root = vroot[index];
547     }
548     ac_set_expected_status(at);
549 }
550 
551 static void ac_test_setup_pte(ac_test_t *at, ac_pool_t *pool)
552 {
553 	__ac_setup_specific_pages(at, pool, 0, 0);
554 }
555 
556 static void ac_setup_specific_pages(ac_test_t *at, ac_pool_t *pool,
557 				    u64 pd_page, u64 pt_page)
558 {
559 	return __ac_setup_specific_pages(at, pool, pd_page, pt_page);
560 }
561 
562 static void dump_mapping(ac_test_t *at)
563 {
564 	unsigned long root = read_cr3();
565         int flags = at->flags;
566 	int i;
567 
568 	printf("Dump mapping: address: %p\n", at->virt);
569 	for (i = page_table_levels ; i >= 1 && (i >= 2 || !F(AC_PDE_PSE)); --i) {
570 		pt_element_t *vroot = va(root & PT_BASE_ADDR_MASK);
571 		unsigned index = PT_INDEX((unsigned long)at->virt, i);
572 		pt_element_t pte = vroot[index];
573 
574 		printf("------L%d: %lx\n", i, pte);
575 		root = vroot[index];
576 	}
577 }
578 
579 static void ac_test_check(ac_test_t *at, _Bool *success_ret, _Bool cond,
580                           const char *fmt, ...)
581 {
582     va_list ap;
583     char buf[500];
584 
585     if (!*success_ret) {
586         return;
587     }
588 
589     if (!cond) {
590         return;
591     }
592 
593     *success_ret = false;
594 
595     if (!verbose) {
596         puts("\n");
597         ac_test_show(at);
598     }
599 
600     va_start(ap, fmt);
601     vsnprintf(buf, sizeof(buf), fmt, ap);
602     va_end(ap);
603     printf("FAIL: %s\n", buf);
604     dump_mapping(at);
605 }
606 
607 static int pt_match(pt_element_t pte1, pt_element_t pte2, pt_element_t ignore)
608 {
609     pte1 &= ~ignore;
610     pte2 &= ~ignore;
611     return pte1 == pte2;
612 }
613 
614 static int ac_test_do_access(ac_test_t *at)
615 {
616     static unsigned unique = 42;
617     int fault = 0;
618     unsigned e;
619     static unsigned char user_stack[4096];
620     unsigned long rsp;
621     _Bool success = true;
622     int flags = at->flags;
623 
624     ++unique;
625     if (!(unique & 65535)) {
626         puts(".");
627     }
628 
629     *((unsigned char *)at->phys) = 0xc3; /* ret */
630 
631     unsigned r = unique;
632     set_cr0_wp(F(AC_CPU_CR0_WP));
633     set_efer_nx(F(AC_CPU_EFER_NX));
634     set_cr4_pke(F(AC_CPU_CR4_PKE));
635     if (F(AC_CPU_CR4_PKE)) {
636         /* WD2=AD2=1, WD1=F(AC_PKU_WD), AD1=F(AC_PKU_AD) */
637         write_pkru(0x30 | (F(AC_PKU_WD) ? 8 : 0) |
638                    (F(AC_PKU_AD) ? 4 : 0));
639     }
640 
641     set_cr4_smep(F(AC_CPU_CR4_SMEP));
642 
643     if (F(AC_ACCESS_TWICE)) {
644 	asm volatile (
645 	    "mov $fixed2, %%rsi \n\t"
646 	    "mov (%[addr]), %[reg] \n\t"
647 	    "fixed2:"
648 	    : [reg]"=r"(r), [fault]"=a"(fault), "=b"(e)
649 	    : [addr]"r"(at->virt)
650 	    : "rsi"
651 	    );
652 	fault = 0;
653     }
654 
655     asm volatile ("mov $fixed1, %%rsi \n\t"
656 		  "mov %%rsp, %%rdx \n\t"
657 		  "cmp $0, %[user] \n\t"
658 		  "jz do_access \n\t"
659 		  "push %%rax; mov %[user_ds], %%ax; mov %%ax, %%ds; pop %%rax  \n\t"
660 		  "pushq %[user_ds] \n\t"
661 		  "pushq %[user_stack_top] \n\t"
662 		  "pushfq \n\t"
663 		  "pushq %[user_cs] \n\t"
664 		  "pushq $do_access \n\t"
665 		  "iretq \n"
666 		  "do_access: \n\t"
667 		  "cmp $0, %[fetch] \n\t"
668 		  "jnz 2f \n\t"
669 		  "cmp $0, %[write] \n\t"
670 		  "jnz 1f \n\t"
671 		  "mov (%[addr]), %[reg] \n\t"
672 		  "jmp done \n\t"
673 		  "1: mov %[reg], (%[addr]) \n\t"
674 		  "jmp done \n\t"
675 		  "2: call *%[addr] \n\t"
676 		  "done: \n"
677 		  "fixed1: \n"
678 		  "int %[kernel_entry_vector] \n\t"
679 		  "back_to_kernel:"
680 		  : [reg]"+r"(r), "+a"(fault), "=b"(e), "=&d"(rsp)
681 		  : [addr]"r"(at->virt),
682 		    [write]"r"(F(AC_ACCESS_WRITE)),
683 		    [user]"r"(F(AC_ACCESS_USER)),
684 		    [fetch]"r"(F(AC_ACCESS_FETCH)),
685 		    [user_ds]"i"(USER_DS),
686 		    [user_cs]"i"(USER_CS),
687 		    [user_stack_top]"r"(user_stack + sizeof user_stack),
688 		    [kernel_entry_vector]"i"(0x20)
689 		  : "rsi");
690 
691     asm volatile (".section .text.pf \n\t"
692 		  "page_fault: \n\t"
693 		  "pop %rbx \n\t"
694 		  "mov %rsi, (%rsp) \n\t"
695 		  "movl $1, %eax \n\t"
696 		  "iretq \n\t"
697 		  ".section .text");
698 
699     asm volatile (".section .text.entry \n\t"
700 		  "kernel_entry: \n\t"
701 		  "mov %rdx, %rsp \n\t"
702 		  "jmp back_to_kernel \n\t"
703 		  ".section .text");
704 
705     ac_test_check(at, &success, fault && !at->expected_fault,
706                   "unexpected fault");
707     ac_test_check(at, &success, !fault && at->expected_fault,
708                   "unexpected access");
709     ac_test_check(at, &success, fault && e != at->expected_error,
710                   "error code %x expected %x", e, at->expected_error);
711     if (at->ptep)
712         ac_test_check(at, &success, *at->ptep != at->expected_pte,
713                       "pte %x expected %x", *at->ptep, at->expected_pte);
714     ac_test_check(at, &success,
715                   !pt_match(*at->pdep, at->expected_pde, at->ignore_pde),
716                   "pde %x expected %x", *at->pdep, at->expected_pde);
717 
718     if (success && verbose) {
719 	if (at->expected_fault) {
720             printf("PASS (%x)\n", at->expected_error);
721 	} else {
722             printf("PASS\n");
723 	}
724     }
725     return success;
726 }
727 
728 static void ac_test_show(ac_test_t *at)
729 {
730     char line[5000];
731 
732     *line = 0;
733     strcat(line, "test");
734     for (int i = 0; i < NR_AC_FLAGS; ++i)
735 	if (at->flags & (1 << i)) {
736 	    strcat(line, " ");
737 	    strcat(line, ac_names[i]);
738 	}
739     strcat(line, ": ");
740     printf("%s", line);
741 }
742 
743 /*
744  * This test case is used to triger the bug which is fixed by
745  * commit e09e90a5 in the kvm tree
746  */
747 static int corrupt_hugepage_triger(ac_pool_t *pool)
748 {
749     ac_test_t at1, at2;
750 
751     ac_test_init(&at1, (void *)(0x123400000000));
752     ac_test_init(&at2, (void *)(0x666600000000));
753 
754     at2.flags = AC_CPU_CR0_WP_MASK | AC_PDE_PSE_MASK | AC_PDE_PRESENT_MASK;
755     ac_test_setup_pte(&at2, pool);
756     if (!ac_test_do_access(&at2))
757         goto err;
758 
759     at1.flags = at2.flags | AC_PDE_WRITABLE_MASK;
760     ac_test_setup_pte(&at1, pool);
761     if (!ac_test_do_access(&at1))
762         goto err;
763 
764     at1.flags |= AC_ACCESS_WRITE_MASK;
765     ac_set_expected_status(&at1);
766     if (!ac_test_do_access(&at1))
767         goto err;
768 
769     at2.flags |= AC_ACCESS_WRITE_MASK;
770     ac_set_expected_status(&at2);
771     if (!ac_test_do_access(&at2))
772         goto err;
773 
774     return 1;
775 
776 err:
777     printf("corrupt_hugepage_triger test fail\n");
778     return 0;
779 }
780 
781 /*
782  * This test case is used to triger the bug which is fixed by
783  * commit 3ddf6c06e13e in the kvm tree
784  */
785 static int check_pfec_on_prefetch_pte(ac_pool_t *pool)
786 {
787 	ac_test_t at1, at2;
788 
789 	ac_test_init(&at1, (void *)(0x123406001000));
790 	ac_test_init(&at2, (void *)(0x123406003000));
791 
792 	at1.flags = AC_PDE_PRESENT_MASK | AC_PTE_PRESENT_MASK;
793 	ac_setup_specific_pages(&at1, pool, 30 * 1024 * 1024, 30 * 1024 * 1024);
794 
795         at2.flags = at1.flags | AC_PTE_NX_MASK;
796 	ac_setup_specific_pages(&at2, pool, 30 * 1024 * 1024, 30 * 1024 * 1024);
797 
798 	if (!ac_test_do_access(&at1)) {
799 		printf("%s: prepare fail\n", __FUNCTION__);
800 		goto err;
801 	}
802 
803 	if (!ac_test_do_access(&at2)) {
804 		printf("%s: check PFEC on prefetch pte path fail\n",
805 			__FUNCTION__);
806 		goto err;
807 	}
808 
809 	return 1;
810 
811 err:
812     return 0;
813 }
814 
815 /*
816  * If the write-fault access is from supervisor and CR0.WP is not set on the
817  * vcpu, kvm will fix it by adjusting pte access - it sets the W bit on pte
818  * and clears U bit. This is the chance that kvm can change pte access from
819  * readonly to writable.
820  *
821  * Unfortunately, the pte access is the access of 'direct' shadow page table,
822  * means direct sp.role.access = pte_access, then we will create a writable
823  * spte entry on the readonly shadow page table. It will cause Dirty bit is
824  * not tracked when two guest ptes point to the same large page. Note, it
825  * does not have other impact except Dirty bit since cr0.wp is encoded into
826  * sp.role.
827  *
828  * Note: to trigger this bug, hugepage should be disabled on host.
829  */
830 static int check_large_pte_dirty_for_nowp(ac_pool_t *pool)
831 {
832 	ac_test_t at1, at2;
833 
834 	ac_test_init(&at1, (void *)(0x123403000000));
835 	ac_test_init(&at2, (void *)(0x666606000000));
836 
837         at2.flags = AC_PDE_PRESENT_MASK | AC_PDE_PSE_MASK;
838 	ac_test_setup_pte(&at2, pool);
839 	if (!ac_test_do_access(&at2)) {
840 		printf("%s: read on the first mapping fail.\n", __FUNCTION__);
841 		goto err;
842 	}
843 
844         at1.flags = at2.flags | AC_ACCESS_WRITE_MASK;
845 	ac_test_setup_pte(&at1, pool);
846 	if (!ac_test_do_access(&at1)) {
847 		printf("%s: write on the second mapping fail.\n", __FUNCTION__);
848 		goto err;
849 	}
850 
851 	at2.flags |= AC_ACCESS_WRITE_MASK;
852 	ac_set_expected_status(&at2);
853 	if (!ac_test_do_access(&at2)) {
854 		printf("%s: write on the first mapping fail.\n", __FUNCTION__);
855 		goto err;
856 	}
857 
858 	return 1;
859 
860 err:
861 	return 0;
862 }
863 
864 static int check_smep_andnot_wp(ac_pool_t *pool)
865 {
866 	ac_test_t at1;
867 	int err_prepare_andnot_wp, err_smep_andnot_wp;
868 
869 	if (!this_cpu_has(X86_FEATURE_SMEP)) {
870 	    return 1;
871 	}
872 
873 	ac_test_init(&at1, (void *)(0x123406001000));
874 
875 	at1.flags = AC_PDE_PRESENT_MASK | AC_PTE_PRESENT_MASK |
876             AC_PDE_USER_MASK | AC_PTE_USER_MASK |
877             AC_PDE_ACCESSED_MASK | AC_PTE_ACCESSED_MASK |
878             AC_CPU_CR4_SMEP_MASK |
879             AC_CPU_CR0_WP_MASK |
880             AC_ACCESS_WRITE_MASK;
881 	ac_test_setup_pte(&at1, pool);
882 
883 	/*
884 	 * Here we write the ro user page when
885 	 * cr0.wp=0, then we execute it and SMEP
886 	 * fault should happen.
887 	 */
888 	err_prepare_andnot_wp = ac_test_do_access(&at1);
889 	if (!err_prepare_andnot_wp) {
890 		printf("%s: SMEP prepare fail\n", __FUNCTION__);
891 		goto clean_up;
892 	}
893 
894         at1.flags &= ~AC_ACCESS_WRITE_MASK;
895         at1.flags |= AC_ACCESS_FETCH_MASK;
896         ac_set_expected_status(&at1);
897         err_smep_andnot_wp = ac_test_do_access(&at1);
898 
899 clean_up:
900 	set_cr4_smep(0);
901 
902 	if (!err_prepare_andnot_wp)
903 		goto err;
904 	if (!err_smep_andnot_wp) {
905 		printf("%s: check SMEP without wp fail\n", __FUNCTION__);
906 		goto err;
907 	}
908 	return 1;
909 
910 err:
911 	return 0;
912 }
913 
914 static int ac_test_exec(ac_test_t *at, ac_pool_t *pool)
915 {
916     int r;
917 
918     if (verbose) {
919         ac_test_show(at);
920     }
921     ac_test_setup_pte(at, pool);
922     r = ac_test_do_access(at);
923     return r;
924 }
925 
926 typedef int (*ac_test_fn)(ac_pool_t *pool);
927 const ac_test_fn ac_test_cases[] =
928 {
929 	corrupt_hugepage_triger,
930 	check_pfec_on_prefetch_pte,
931 	check_large_pte_dirty_for_nowp,
932 	check_smep_andnot_wp
933 };
934 
935 static int ac_test_run(void)
936 {
937     ac_test_t at;
938     ac_pool_t pool;
939     int i, tests, successes;
940 
941     printf("run\n");
942     tests = successes = 0;
943 
944     shadow_cr0 = read_cr0();
945     shadow_cr4 = read_cr4();
946     shadow_efer = rdmsr(MSR_EFER);
947 
948     if (this_cpu_has(X86_FEATURE_PKU)) {
949         set_cr4_pke(1);
950         set_cr4_pke(0);
951         /* Now PKRU = 0xFFFFFFFF.  */
952     } else {
953 	tests++;
954 	if (write_cr4_checking(shadow_cr4 | X86_CR4_PKE) == GP_VECTOR) {
955             successes++;
956             invalid_mask |= AC_PKU_AD_MASK;
957             invalid_mask |= AC_PKU_WD_MASK;
958             invalid_mask |= AC_PKU_PKEY_MASK;
959             invalid_mask |= AC_CPU_CR4_PKE_MASK;
960             printf("CR4.PKE not available, disabling PKE tests\n");
961 	} else {
962             printf("Set PKE in CR4 - expect #GP: FAIL!\n");
963             set_cr4_pke(0);
964 	}
965     }
966 
967     if (!this_cpu_has(X86_FEATURE_SMEP)) {
968 	tests++;
969 	if (set_cr4_smep(1) == GP_VECTOR) {
970             successes++;
971             invalid_mask |= AC_CPU_CR4_SMEP_MASK;
972             printf("CR4.SMEP not available, disabling SMEP tests\n");
973 	} else {
974             printf("Set SMEP in CR4 - expect #GP: FAIL!\n");
975             set_cr4_smep(0);
976 	}
977     }
978 
979     ac_env_int(&pool);
980     ac_test_init(&at, (void *)(0x123400000000 + 16 * smp_id()));
981     do {
982 	++tests;
983 	successes += ac_test_exec(&at, &pool);
984     } while (ac_test_bump(&at));
985 
986     for (i = 0; i < ARRAY_SIZE(ac_test_cases); i++) {
987 	++tests;
988 	successes += ac_test_cases[i](&pool);
989     }
990 
991     printf("\n%d tests, %d failures\n", tests, tests - successes);
992 
993     return successes == tests;
994 }
995 
996 int main(void)
997 {
998     int r;
999 
1000     setup_idt();
1001 
1002     printf("starting test\n\n");
1003     page_table_levels = 4;
1004     r = ac_test_run();
1005 
1006     if (this_cpu_has(X86_FEATURE_LA57)) {
1007         page_table_levels = 5;
1008         printf("starting 5-level paging test.\n\n");
1009         setup_5level_page_table();
1010         r = ac_test_run();
1011     }
1012 
1013     return r ? 0 : 1;
1014 }
1015