xref: /kvm-unit-tests/x86/access.c (revision 8d2cdb35a07aca4a025ccb9a2c8f4786779b8b0f)
1 
2 #include "libcflat.h"
3 #include "desc.h"
4 #include "processor.h"
5 #include "asm/page.h"
6 #include "x86/vm.h"
7 
8 #define smp_id() 0
9 
10 #define true 1
11 #define false 0
12 
13 static _Bool verbose = false;
14 
15 typedef unsigned long pt_element_t;
16 static int cpuid_7_ebx;
17 static int cpuid_7_ecx;
18 static int invalid_mask;
19 static int page_table_levels;
20 
21 #define PT_BASE_ADDR_MASK ((pt_element_t)((((pt_element_t)1 << 40) - 1) & PAGE_MASK))
22 #define PT_PSE_BASE_ADDR_MASK (PT_BASE_ADDR_MASK & ~(1ull << 21))
23 
24 #define CR0_WP_MASK (1UL << 16)
25 #define CR4_SMEP_MASK (1UL << 20)
26 
27 #define PFERR_PRESENT_MASK (1U << 0)
28 #define PFERR_WRITE_MASK (1U << 1)
29 #define PFERR_USER_MASK (1U << 2)
30 #define PFERR_RESERVED_MASK (1U << 3)
31 #define PFERR_FETCH_MASK (1U << 4)
32 #define PFERR_PK_MASK (1U << 5)
33 
34 #define MSR_EFER 0xc0000080
35 #define EFER_NX_MASK		(1ull << 11)
36 
37 #define PT_INDEX(address, level)       \
38        ((address) >> (12 + ((level)-1) * 9)) & 511
39 
40 /*
41  * page table access check tests
42  */
43 
44 enum {
45     AC_PTE_PRESENT_BIT,
46     AC_PTE_WRITABLE_BIT,
47     AC_PTE_USER_BIT,
48     AC_PTE_ACCESSED_BIT,
49     AC_PTE_DIRTY_BIT,
50     AC_PTE_NX_BIT,
51     AC_PTE_BIT51_BIT,
52 
53     AC_PDE_PRESENT_BIT,
54     AC_PDE_WRITABLE_BIT,
55     AC_PDE_USER_BIT,
56     AC_PDE_ACCESSED_BIT,
57     AC_PDE_DIRTY_BIT,
58     AC_PDE_PSE_BIT,
59     AC_PDE_NX_BIT,
60     AC_PDE_BIT51_BIT,
61     AC_PDE_BIT13_BIT,
62 
63     AC_PKU_AD_BIT,
64     AC_PKU_WD_BIT,
65     AC_PKU_PKEY_BIT,
66 
67     AC_ACCESS_USER_BIT,
68     AC_ACCESS_WRITE_BIT,
69     AC_ACCESS_FETCH_BIT,
70     AC_ACCESS_TWICE_BIT,
71 
72     AC_CPU_EFER_NX_BIT,
73     AC_CPU_CR0_WP_BIT,
74     AC_CPU_CR4_SMEP_BIT,
75     AC_CPU_CR4_PKE_BIT,
76 
77     NR_AC_FLAGS
78 };
79 
80 #define AC_PTE_PRESENT_MASK   (1 << AC_PTE_PRESENT_BIT)
81 #define AC_PTE_WRITABLE_MASK  (1 << AC_PTE_WRITABLE_BIT)
82 #define AC_PTE_USER_MASK      (1 << AC_PTE_USER_BIT)
83 #define AC_PTE_ACCESSED_MASK  (1 << AC_PTE_ACCESSED_BIT)
84 #define AC_PTE_DIRTY_MASK     (1 << AC_PTE_DIRTY_BIT)
85 #define AC_PTE_NX_MASK        (1 << AC_PTE_NX_BIT)
86 #define AC_PTE_BIT51_MASK     (1 << AC_PTE_BIT51_BIT)
87 
88 #define AC_PDE_PRESENT_MASK   (1 << AC_PDE_PRESENT_BIT)
89 #define AC_PDE_WRITABLE_MASK  (1 << AC_PDE_WRITABLE_BIT)
90 #define AC_PDE_USER_MASK      (1 << AC_PDE_USER_BIT)
91 #define AC_PDE_ACCESSED_MASK  (1 << AC_PDE_ACCESSED_BIT)
92 #define AC_PDE_DIRTY_MASK     (1 << AC_PDE_DIRTY_BIT)
93 #define AC_PDE_PSE_MASK       (1 << AC_PDE_PSE_BIT)
94 #define AC_PDE_NX_MASK        (1 << AC_PDE_NX_BIT)
95 #define AC_PDE_BIT51_MASK     (1 << AC_PDE_BIT51_BIT)
96 #define AC_PDE_BIT13_MASK     (1 << AC_PDE_BIT13_BIT)
97 
98 #define AC_PKU_AD_MASK        (1 << AC_PKU_AD_BIT)
99 #define AC_PKU_WD_MASK        (1 << AC_PKU_WD_BIT)
100 #define AC_PKU_PKEY_MASK      (1 << AC_PKU_PKEY_BIT)
101 
102 #define AC_ACCESS_USER_MASK   (1 << AC_ACCESS_USER_BIT)
103 #define AC_ACCESS_WRITE_MASK  (1 << AC_ACCESS_WRITE_BIT)
104 #define AC_ACCESS_FETCH_MASK  (1 << AC_ACCESS_FETCH_BIT)
105 #define AC_ACCESS_TWICE_MASK  (1 << AC_ACCESS_TWICE_BIT)
106 
107 #define AC_CPU_EFER_NX_MASK   (1 << AC_CPU_EFER_NX_BIT)
108 #define AC_CPU_CR0_WP_MASK    (1 << AC_CPU_CR0_WP_BIT)
109 #define AC_CPU_CR4_SMEP_MASK  (1 << AC_CPU_CR4_SMEP_BIT)
110 #define AC_CPU_CR4_PKE_MASK   (1 << AC_CPU_CR4_PKE_BIT)
111 
112 const char *ac_names[] = {
113     [AC_PTE_PRESENT_BIT] = "pte.p",
114     [AC_PTE_ACCESSED_BIT] = "pte.a",
115     [AC_PTE_WRITABLE_BIT] = "pte.rw",
116     [AC_PTE_USER_BIT] = "pte.user",
117     [AC_PTE_DIRTY_BIT] = "pte.d",
118     [AC_PTE_NX_BIT] = "pte.nx",
119     [AC_PTE_BIT51_BIT] = "pte.51",
120     [AC_PDE_PRESENT_BIT] = "pde.p",
121     [AC_PDE_ACCESSED_BIT] = "pde.a",
122     [AC_PDE_WRITABLE_BIT] = "pde.rw",
123     [AC_PDE_USER_BIT] = "pde.user",
124     [AC_PDE_DIRTY_BIT] = "pde.d",
125     [AC_PDE_PSE_BIT] = "pde.pse",
126     [AC_PDE_NX_BIT] = "pde.nx",
127     [AC_PDE_BIT51_BIT] = "pde.51",
128     [AC_PDE_BIT13_BIT] = "pde.13",
129     [AC_PKU_AD_BIT] = "pkru.ad",
130     [AC_PKU_WD_BIT] = "pkru.wd",
131     [AC_PKU_PKEY_BIT] = "pkey=1",
132     [AC_ACCESS_WRITE_BIT] = "write",
133     [AC_ACCESS_USER_BIT] = "user",
134     [AC_ACCESS_FETCH_BIT] = "fetch",
135     [AC_ACCESS_TWICE_BIT] = "twice",
136     [AC_CPU_EFER_NX_BIT] = "efer.nx",
137     [AC_CPU_CR0_WP_BIT] = "cr0.wp",
138     [AC_CPU_CR4_SMEP_BIT] = "cr4.smep",
139     [AC_CPU_CR4_PKE_BIT] = "cr4.pke",
140 };
141 
142 static inline void *va(pt_element_t phys)
143 {
144     return (void *)phys;
145 }
146 
147 typedef struct {
148     pt_element_t pt_pool;
149     unsigned pt_pool_size;
150     unsigned pt_pool_current;
151 } ac_pool_t;
152 
153 typedef struct {
154     unsigned flags;
155     void *virt;
156     pt_element_t phys;
157     pt_element_t *ptep;
158     pt_element_t expected_pte;
159     pt_element_t *pdep;
160     pt_element_t expected_pde;
161     pt_element_t ignore_pde;
162     int expected_fault;
163     unsigned expected_error;
164 } ac_test_t;
165 
166 typedef struct {
167     unsigned short limit;
168     unsigned long linear_addr;
169 } __attribute__((packed)) descriptor_table_t;
170 
171 
172 static void ac_test_show(ac_test_t *at);
173 
174 static int write_cr4_checking(unsigned long val)
175 {
176     asm volatile(ASM_TRY("1f")
177             "mov %0,%%cr4\n\t"
178             "1:": : "r" (val));
179     return exception_vector();
180 }
181 
182 static void set_cr0_wp(int wp)
183 {
184     unsigned long cr0 = read_cr0();
185     unsigned long old_cr0 = cr0;
186 
187     cr0 &= ~CR0_WP_MASK;
188     if (wp)
189 	cr0 |= CR0_WP_MASK;
190     if (old_cr0 != cr0)
191         write_cr0(cr0);
192 }
193 
194 static unsigned set_cr4_smep(int smep)
195 {
196     unsigned long cr4 = read_cr4();
197     unsigned long old_cr4 = cr4;
198     extern u64 ptl2[];
199     unsigned r;
200 
201     cr4 &= ~CR4_SMEP_MASK;
202     if (smep)
203 	cr4 |= CR4_SMEP_MASK;
204     if (old_cr4 == cr4)
205         return 0;
206 
207     if (smep)
208         ptl2[2] &= ~PT_USER_MASK;
209     r = write_cr4_checking(cr4);
210     if (r || !smep)
211         ptl2[2] |= PT_USER_MASK;
212     return r;
213 }
214 
215 static void set_cr4_pke(int pke)
216 {
217     unsigned long cr4 = read_cr4();
218     unsigned long old_cr4 = cr4;
219 
220     cr4 &= ~X86_CR4_PKE;
221     if (pke)
222 	cr4 |= X86_CR4_PKE;
223     if (old_cr4 == cr4)
224         return;
225 
226     /* Check that protection keys do not affect accesses when CR4.PKE=0.  */
227     if ((read_cr4() & X86_CR4_PKE) && !pke) {
228         write_pkru(0xfffffffc);
229     }
230     write_cr4(cr4);
231 }
232 
233 static void set_efer_nx(int nx)
234 {
235     unsigned long long efer = rdmsr(MSR_EFER);
236     unsigned long long old_efer = efer;
237 
238     efer &= ~EFER_NX_MASK;
239     if (nx)
240 	efer |= EFER_NX_MASK;
241     if (old_efer != efer)
242         wrmsr(MSR_EFER, efer);
243 }
244 
245 static void ac_env_int(ac_pool_t *pool)
246 {
247     extern char page_fault, kernel_entry;
248     set_idt_entry(14, &page_fault, 0);
249     set_idt_entry(0x20, &kernel_entry, 3);
250 
251     pool->pt_pool = 33 * 1024 * 1024;
252     pool->pt_pool_size = 120 * 1024 * 1024 - pool->pt_pool;
253     pool->pt_pool_current = 0;
254 }
255 
256 static void ac_test_init(ac_test_t *at, void *virt)
257 {
258     wrmsr(MSR_EFER, rdmsr(MSR_EFER) | EFER_NX_MASK);
259     set_cr0_wp(1);
260     at->flags = 0;
261     at->virt = virt;
262     at->phys = 32 * 1024 * 1024;
263 }
264 
265 static int ac_test_bump_one(ac_test_t *at)
266 {
267     at->flags = ((at->flags | invalid_mask) + 1) & ~invalid_mask;
268     return at->flags < (1 << NR_AC_FLAGS);
269 }
270 
271 #define F(x)  ((flags & x##_MASK) != 0)
272 
273 static _Bool ac_test_legal(ac_test_t *at)
274 {
275     int flags = at->flags;
276 
277     if (F(AC_ACCESS_FETCH) && F(AC_ACCESS_WRITE))
278 	return false;
279 
280     /*
281      * Since we convert current page to kernel page when cr4.smep=1,
282      * we can't switch to user mode.
283      */
284     if (F(AC_ACCESS_USER) && F(AC_CPU_CR4_SMEP))
285 	return false;
286 
287     /*
288      * Only test protection key faults if CR4.PKE=1.
289      */
290     if (!F(AC_CPU_CR4_PKE) &&
291         (F(AC_PKU_AD) || F(AC_PKU_WD))) {
292 	return false;
293     }
294 
295     /*
296      * pde.bit13 checks handling of reserved bits in largepage PDEs.  It is
297      * meaningless if there is a PTE.
298      */
299     if (!F(AC_PDE_PSE) && F(AC_PDE_BIT13))
300         return false;
301 
302     return true;
303 }
304 
305 static int ac_test_bump(ac_test_t *at)
306 {
307     int ret;
308 
309     ret = ac_test_bump_one(at);
310     while (ret && !ac_test_legal(at))
311 	ret = ac_test_bump_one(at);
312     return ret;
313 }
314 
315 static pt_element_t ac_test_alloc_pt(ac_pool_t *pool)
316 {
317     pt_element_t ret = pool->pt_pool + pool->pt_pool_current;
318     pool->pt_pool_current += PAGE_SIZE;
319     return ret;
320 }
321 
322 static _Bool ac_test_enough_room(ac_pool_t *pool)
323 {
324     return pool->pt_pool_current + 5 * PAGE_SIZE <= pool->pt_pool_size;
325 }
326 
327 static void ac_test_reset_pt_pool(ac_pool_t *pool)
328 {
329     pool->pt_pool_current = 0;
330 }
331 
332 static pt_element_t ac_test_permissions(ac_test_t *at, unsigned flags,
333                                         bool writable, bool user,
334                                         bool executable)
335 {
336     bool kwritable = !F(AC_CPU_CR0_WP) && !F(AC_ACCESS_USER);
337     pt_element_t expected = 0;
338 
339     if (F(AC_ACCESS_USER) && !user)
340 	at->expected_fault = 1;
341 
342     if (F(AC_ACCESS_WRITE) && !writable && !kwritable)
343 	at->expected_fault = 1;
344 
345     if (F(AC_ACCESS_FETCH) && !executable)
346 	at->expected_fault = 1;
347 
348     if (F(AC_ACCESS_FETCH) && user && F(AC_CPU_CR4_SMEP))
349         at->expected_fault = 1;
350 
351     if (user && !F(AC_ACCESS_FETCH) && F(AC_PKU_PKEY) && F(AC_CPU_CR4_PKE)) {
352         if (F(AC_PKU_AD)) {
353             at->expected_fault = 1;
354             at->expected_error |= PFERR_PK_MASK;
355         } else if (F(AC_ACCESS_WRITE) && F(AC_PKU_WD) && !kwritable) {
356             at->expected_fault = 1;
357             at->expected_error |= PFERR_PK_MASK;
358         }
359     }
360 
361     if (!at->expected_fault) {
362         expected |= PT_ACCESSED_MASK;
363         if (F(AC_ACCESS_WRITE))
364             expected |= PT_DIRTY_MASK;
365     }
366 
367     return expected;
368 }
369 
370 static void ac_emulate_access(ac_test_t *at, unsigned flags)
371 {
372     bool pde_valid, pte_valid;
373     bool user, writable, executable;
374 
375     if (F(AC_ACCESS_USER))
376 	at->expected_error |= PFERR_USER_MASK;
377 
378     if (F(AC_ACCESS_WRITE))
379 	at->expected_error |= PFERR_WRITE_MASK;
380 
381     if (F(AC_ACCESS_FETCH))
382 	at->expected_error |= PFERR_FETCH_MASK;
383 
384     if (!F(AC_PDE_ACCESSED))
385         at->ignore_pde = PT_ACCESSED_MASK;
386 
387     pde_valid = F(AC_PDE_PRESENT)
388         && !F(AC_PDE_BIT51) && !F(AC_PDE_BIT13)
389         && !(F(AC_PDE_NX) && !F(AC_CPU_EFER_NX));
390 
391     if (!pde_valid) {
392         at->expected_fault = 1;
393 	if (F(AC_PDE_PRESENT)) {
394             at->expected_error |= PFERR_RESERVED_MASK;
395         } else {
396             at->expected_error &= ~PFERR_PRESENT_MASK;
397         }
398 	goto fault;
399     }
400 
401     writable = F(AC_PDE_WRITABLE);
402     user = F(AC_PDE_USER);
403     executable = !F(AC_PDE_NX);
404 
405     if (F(AC_PDE_PSE)) {
406         at->expected_pde |= ac_test_permissions(at, flags, writable, user,
407                                                 executable);
408 	goto no_pte;
409     }
410 
411     at->expected_pde |= PT_ACCESSED_MASK;
412 
413     pte_valid = F(AC_PTE_PRESENT)
414         && !F(AC_PTE_BIT51)
415         && !(F(AC_PTE_NX) && !F(AC_CPU_EFER_NX));
416 
417     if (!pte_valid) {
418         at->expected_fault = 1;
419 	if (F(AC_PTE_PRESENT)) {
420             at->expected_error |= PFERR_RESERVED_MASK;
421         } else {
422             at->expected_error &= ~PFERR_PRESENT_MASK;
423         }
424 	goto fault;
425     }
426 
427     writable &= F(AC_PTE_WRITABLE);
428     user &= F(AC_PTE_USER);
429     executable &= !F(AC_PTE_NX);
430 
431     at->expected_pte |= ac_test_permissions(at, flags, writable, user,
432                                             executable);
433 
434 no_pte:
435 fault:
436     if (!at->expected_fault)
437         at->ignore_pde = 0;
438     if (!F(AC_CPU_EFER_NX) && !F(AC_CPU_CR4_SMEP))
439         at->expected_error &= ~PFERR_FETCH_MASK;
440 }
441 
442 static void ac_set_expected_status(ac_test_t *at)
443 {
444     invlpg(at->virt);
445 
446     if (at->ptep)
447 	at->expected_pte = *at->ptep;
448     at->expected_pde = *at->pdep;
449     at->ignore_pde = 0;
450     at->expected_fault = 0;
451     at->expected_error = PFERR_PRESENT_MASK;
452 
453     if (at->flags & AC_ACCESS_TWICE_MASK) {
454         ac_emulate_access(at, at->flags & ~AC_ACCESS_WRITE_MASK
455                           & ~AC_ACCESS_FETCH_MASK & ~AC_ACCESS_USER_MASK);
456         at->expected_fault = 0;
457 	at->expected_error = PFERR_PRESENT_MASK;
458         at->ignore_pde = 0;
459     }
460 
461     ac_emulate_access(at, at->flags);
462 }
463 
464 static void __ac_setup_specific_pages(ac_test_t *at, ac_pool_t *pool,
465 				      u64 pd_page, u64 pt_page)
466 
467 {
468     unsigned long root = read_cr3();
469     int flags = at->flags;
470     bool skip = true;
471 
472     if (!ac_test_enough_room(pool))
473 	ac_test_reset_pt_pool(pool);
474 
475     at->ptep = 0;
476     for (int i = page_table_levels; i >= 1 && (i >= 2 || !F(AC_PDE_PSE)); --i) {
477 	pt_element_t *vroot = va(root & PT_BASE_ADDR_MASK);
478 	unsigned index = PT_INDEX((unsigned long)at->virt, i);
479 	pt_element_t pte = 0;
480 
481 	/*
482 	 * Reuse existing page tables along the path to the test code and data
483 	 * (which is in the bottom 2MB).
484 	 */
485 	if (skip && i >= 2 && index == 0) {
486 	    goto next;
487 	}
488 	skip = false;
489 
490 	switch (i) {
491 	case 5:
492 	case 4:
493 	case 3:
494 	    pte = pd_page ? pd_page : ac_test_alloc_pt(pool);
495 	    pte |= PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK;
496 	    break;
497 	case 2:
498 	    if (!F(AC_PDE_PSE)) {
499 		pte = pt_page ? pt_page : ac_test_alloc_pt(pool);
500 		/* The protection key is ignored on non-leaf entries.  */
501                 if (F(AC_PKU_PKEY))
502                     pte |= 2ull << 59;
503 	    } else {
504 		pte = at->phys & PT_PSE_BASE_ADDR_MASK;
505 		pte |= PT_PAGE_SIZE_MASK;
506                 if (F(AC_PKU_PKEY))
507                     pte |= 1ull << 59;
508 	    }
509 	    if (F(AC_PDE_PRESENT))
510 		pte |= PT_PRESENT_MASK;
511 	    if (F(AC_PDE_WRITABLE))
512 		pte |= PT_WRITABLE_MASK;
513 	    if (F(AC_PDE_USER))
514 		pte |= PT_USER_MASK;
515 	    if (F(AC_PDE_ACCESSED))
516 		pte |= PT_ACCESSED_MASK;
517 	    if (F(AC_PDE_DIRTY))
518 		pte |= PT_DIRTY_MASK;
519 	    if (F(AC_PDE_NX))
520 		pte |= PT64_NX_MASK;
521 	    if (F(AC_PDE_BIT51))
522 		pte |= 1ull << 51;
523 	    if (F(AC_PDE_BIT13))
524 		pte |= 1ull << 13;
525 	    at->pdep = &vroot[index];
526 	    break;
527 	case 1:
528 	    pte = at->phys & PT_BASE_ADDR_MASK;
529 	    if (F(AC_PKU_PKEY))
530 		pte |= 1ull << 59;
531 	    if (F(AC_PTE_PRESENT))
532 		pte |= PT_PRESENT_MASK;
533 	    if (F(AC_PTE_WRITABLE))
534 		pte |= PT_WRITABLE_MASK;
535 	    if (F(AC_PTE_USER))
536 		pte |= PT_USER_MASK;
537 	    if (F(AC_PTE_ACCESSED))
538 		pte |= PT_ACCESSED_MASK;
539 	    if (F(AC_PTE_DIRTY))
540 		pte |= PT_DIRTY_MASK;
541 	    if (F(AC_PTE_NX))
542 		pte |= PT64_NX_MASK;
543 	    if (F(AC_PTE_BIT51))
544 		pte |= 1ull << 51;
545 	    at->ptep = &vroot[index];
546 	    break;
547 	}
548 	vroot[index] = pte;
549  next:
550 	root = vroot[index];
551     }
552     ac_set_expected_status(at);
553 }
554 
555 static void ac_test_setup_pte(ac_test_t *at, ac_pool_t *pool)
556 {
557 	__ac_setup_specific_pages(at, pool, 0, 0);
558 }
559 
560 static void ac_setup_specific_pages(ac_test_t *at, ac_pool_t *pool,
561 				    u64 pd_page, u64 pt_page)
562 {
563 	return __ac_setup_specific_pages(at, pool, pd_page, pt_page);
564 }
565 
566 static void dump_mapping(ac_test_t *at)
567 {
568 	unsigned long root = read_cr3();
569         int flags = at->flags;
570 	int i;
571 
572 	printf("Dump mapping: address: %p\n", at->virt);
573 	for (i = page_table_levels ; i >= 1 && (i >= 2 || !F(AC_PDE_PSE)); --i) {
574 		pt_element_t *vroot = va(root & PT_BASE_ADDR_MASK);
575 		unsigned index = PT_INDEX((unsigned long)at->virt, i);
576 		pt_element_t pte = vroot[index];
577 
578 		printf("------L%d: %lx\n", i, pte);
579 		root = vroot[index];
580 	}
581 }
582 
583 static void ac_test_check(ac_test_t *at, _Bool *success_ret, _Bool cond,
584                           const char *fmt, ...)
585 {
586     va_list ap;
587     char buf[500];
588 
589     if (!*success_ret) {
590         return;
591     }
592 
593     if (!cond) {
594         return;
595     }
596 
597     *success_ret = false;
598 
599     if (!verbose) {
600         puts("\n");
601         ac_test_show(at);
602     }
603 
604     va_start(ap, fmt);
605     vsnprintf(buf, sizeof(buf), fmt, ap);
606     va_end(ap);
607     printf("FAIL: %s\n", buf);
608     dump_mapping(at);
609 }
610 
611 static int pt_match(pt_element_t pte1, pt_element_t pte2, pt_element_t ignore)
612 {
613     pte1 &= ~ignore;
614     pte2 &= ~ignore;
615     return pte1 == pte2;
616 }
617 
618 static int ac_test_do_access(ac_test_t *at)
619 {
620     static unsigned unique = 42;
621     int fault = 0;
622     unsigned e;
623     static unsigned char user_stack[4096];
624     unsigned long rsp;
625     _Bool success = true;
626     int flags = at->flags;
627 
628     ++unique;
629     if (!(unique & 65535)) {
630         puts(".");
631     }
632 
633     *((unsigned char *)at->phys) = 0xc3; /* ret */
634 
635     unsigned r = unique;
636     set_cr0_wp(F(AC_CPU_CR0_WP));
637     set_efer_nx(F(AC_CPU_EFER_NX));
638     set_cr4_pke(F(AC_CPU_CR4_PKE));
639     if (F(AC_CPU_CR4_PKE)) {
640         /* WD2=AD2=1, WD1=F(AC_PKU_WD), AD1=F(AC_PKU_AD) */
641         write_pkru(0x30 | (F(AC_PKU_WD) ? 8 : 0) |
642                    (F(AC_PKU_AD) ? 4 : 0));
643     }
644 
645     set_cr4_smep(F(AC_CPU_CR4_SMEP));
646 
647     if (F(AC_ACCESS_TWICE)) {
648 	asm volatile (
649 	    "mov $fixed2, %%rsi \n\t"
650 	    "mov (%[addr]), %[reg] \n\t"
651 	    "fixed2:"
652 	    : [reg]"=r"(r), [fault]"=a"(fault), "=b"(e)
653 	    : [addr]"r"(at->virt)
654 	    : "rsi"
655 	    );
656 	fault = 0;
657     }
658 
659     asm volatile ("mov $fixed1, %%rsi \n\t"
660 		  "mov %%rsp, %%rdx \n\t"
661 		  "cmp $0, %[user] \n\t"
662 		  "jz do_access \n\t"
663 		  "push %%rax; mov %[user_ds], %%ax; mov %%ax, %%ds; pop %%rax  \n\t"
664 		  "pushq %[user_ds] \n\t"
665 		  "pushq %[user_stack_top] \n\t"
666 		  "pushfq \n\t"
667 		  "pushq %[user_cs] \n\t"
668 		  "pushq $do_access \n\t"
669 		  "iretq \n"
670 		  "do_access: \n\t"
671 		  "cmp $0, %[fetch] \n\t"
672 		  "jnz 2f \n\t"
673 		  "cmp $0, %[write] \n\t"
674 		  "jnz 1f \n\t"
675 		  "mov (%[addr]), %[reg] \n\t"
676 		  "jmp done \n\t"
677 		  "1: mov %[reg], (%[addr]) \n\t"
678 		  "jmp done \n\t"
679 		  "2: call *%[addr] \n\t"
680 		  "done: \n"
681 		  "fixed1: \n"
682 		  "int %[kernel_entry_vector] \n\t"
683 		  "back_to_kernel:"
684 		  : [reg]"+r"(r), "+a"(fault), "=b"(e), "=&d"(rsp)
685 		  : [addr]"r"(at->virt),
686 		    [write]"r"(F(AC_ACCESS_WRITE)),
687 		    [user]"r"(F(AC_ACCESS_USER)),
688 		    [fetch]"r"(F(AC_ACCESS_FETCH)),
689 		    [user_ds]"i"(USER_DS),
690 		    [user_cs]"i"(USER_CS),
691 		    [user_stack_top]"r"(user_stack + sizeof user_stack),
692 		    [kernel_entry_vector]"i"(0x20)
693 		  : "rsi");
694 
695     asm volatile (".section .text.pf \n\t"
696 		  "page_fault: \n\t"
697 		  "pop %rbx \n\t"
698 		  "mov %rsi, (%rsp) \n\t"
699 		  "movl $1, %eax \n\t"
700 		  "iretq \n\t"
701 		  ".section .text");
702 
703     asm volatile (".section .text.entry \n\t"
704 		  "kernel_entry: \n\t"
705 		  "mov %rdx, %rsp \n\t"
706 		  "jmp back_to_kernel \n\t"
707 		  ".section .text");
708 
709     ac_test_check(at, &success, fault && !at->expected_fault,
710                   "unexpected fault");
711     ac_test_check(at, &success, !fault && at->expected_fault,
712                   "unexpected access");
713     ac_test_check(at, &success, fault && e != at->expected_error,
714                   "error code %x expected %x", e, at->expected_error);
715     ac_test_check(at, &success, at->ptep && *at->ptep != at->expected_pte,
716                   "pte %x expected %x", *at->ptep, at->expected_pte);
717     ac_test_check(at, &success,
718                   !pt_match(*at->pdep, at->expected_pde, at->ignore_pde),
719                   "pde %x expected %x", *at->pdep, at->expected_pde);
720 
721     if (success && verbose) {
722 	if (at->expected_fault) {
723             printf("PASS (%x)\n", at->expected_error);
724 	} else {
725             printf("PASS\n");
726 	}
727     }
728     return success;
729 }
730 
731 static void ac_test_show(ac_test_t *at)
732 {
733     char line[5000];
734 
735     *line = 0;
736     strcat(line, "test");
737     for (int i = 0; i < NR_AC_FLAGS; ++i)
738 	if (at->flags & (1 << i)) {
739 	    strcat(line, " ");
740 	    strcat(line, ac_names[i]);
741 	}
742     strcat(line, ": ");
743     printf("%s", line);
744 }
745 
746 /*
747  * This test case is used to triger the bug which is fixed by
748  * commit e09e90a5 in the kvm tree
749  */
750 static int corrupt_hugepage_triger(ac_pool_t *pool)
751 {
752     ac_test_t at1, at2;
753 
754     ac_test_init(&at1, (void *)(0x123400000000));
755     ac_test_init(&at2, (void *)(0x666600000000));
756 
757     at2.flags = AC_CPU_CR0_WP_MASK | AC_PDE_PSE_MASK | AC_PDE_PRESENT_MASK;
758     ac_test_setup_pte(&at2, pool);
759     if (!ac_test_do_access(&at2))
760         goto err;
761 
762     at1.flags = at2.flags | AC_PDE_WRITABLE_MASK;
763     ac_test_setup_pte(&at1, pool);
764     if (!ac_test_do_access(&at1))
765         goto err;
766 
767     at1.flags |= AC_ACCESS_WRITE_MASK;
768     ac_set_expected_status(&at1);
769     if (!ac_test_do_access(&at1))
770         goto err;
771 
772     at2.flags |= AC_ACCESS_WRITE_MASK;
773     ac_set_expected_status(&at2);
774     if (!ac_test_do_access(&at2))
775         goto err;
776 
777     return 1;
778 
779 err:
780     printf("corrupt_hugepage_triger test fail\n");
781     return 0;
782 }
783 
784 /*
785  * This test case is used to triger the bug which is fixed by
786  * commit 3ddf6c06e13e in the kvm tree
787  */
788 static int check_pfec_on_prefetch_pte(ac_pool_t *pool)
789 {
790 	ac_test_t at1, at2;
791 
792 	ac_test_init(&at1, (void *)(0x123406001000));
793 	ac_test_init(&at2, (void *)(0x123406003000));
794 
795 	at1.flags = AC_PDE_PRESENT_MASK | AC_PTE_PRESENT_MASK;
796 	ac_setup_specific_pages(&at1, pool, 30 * 1024 * 1024, 30 * 1024 * 1024);
797 
798         at2.flags = at1.flags | AC_PTE_NX_MASK;
799 	ac_setup_specific_pages(&at2, pool, 30 * 1024 * 1024, 30 * 1024 * 1024);
800 
801 	if (!ac_test_do_access(&at1)) {
802 		printf("%s: prepare fail\n", __FUNCTION__);
803 		goto err;
804 	}
805 
806 	if (!ac_test_do_access(&at2)) {
807 		printf("%s: check PFEC on prefetch pte path fail\n",
808 			__FUNCTION__);
809 		goto err;
810 	}
811 
812 	return 1;
813 
814 err:
815     return 0;
816 }
817 
818 /*
819  * If the write-fault access is from supervisor and CR0.WP is not set on the
820  * vcpu, kvm will fix it by adjusting pte access - it sets the W bit on pte
821  * and clears U bit. This is the chance that kvm can change pte access from
822  * readonly to writable.
823  *
824  * Unfortunately, the pte access is the access of 'direct' shadow page table,
825  * means direct sp.role.access = pte_access, then we will create a writable
826  * spte entry on the readonly shadow page table. It will cause Dirty bit is
827  * not tracked when two guest ptes point to the same large page. Note, it
828  * does not have other impact except Dirty bit since cr0.wp is encoded into
829  * sp.role.
830  *
831  * Note: to trigger this bug, hugepage should be disabled on host.
832  */
833 static int check_large_pte_dirty_for_nowp(ac_pool_t *pool)
834 {
835 	ac_test_t at1, at2;
836 
837 	ac_test_init(&at1, (void *)(0x123403000000));
838 	ac_test_init(&at2, (void *)(0x666606000000));
839 
840         at2.flags = AC_PDE_PRESENT_MASK | AC_PDE_PSE_MASK;
841 	ac_test_setup_pte(&at2, pool);
842 	if (!ac_test_do_access(&at2)) {
843 		printf("%s: read on the first mapping fail.\n", __FUNCTION__);
844 		goto err;
845 	}
846 
847         at1.flags = at2.flags | AC_ACCESS_WRITE_MASK;
848 	ac_test_setup_pte(&at1, pool);
849 	if (!ac_test_do_access(&at1)) {
850 		printf("%s: write on the second mapping fail.\n", __FUNCTION__);
851 		goto err;
852 	}
853 
854 	at2.flags |= AC_ACCESS_WRITE_MASK;
855 	ac_set_expected_status(&at2);
856 	if (!ac_test_do_access(&at2)) {
857 		printf("%s: write on the first mapping fail.\n", __FUNCTION__);
858 		goto err;
859 	}
860 
861 	return 1;
862 
863 err:
864 	return 0;
865 }
866 
867 static int check_smep_andnot_wp(ac_pool_t *pool)
868 {
869 	ac_test_t at1;
870 	int err_prepare_andnot_wp, err_smep_andnot_wp;
871 
872 	if (!(cpuid_7_ebx & (1 << 7))) {
873 	    return 1;
874 	}
875 
876 	ac_test_init(&at1, (void *)(0x123406001000));
877 
878 	at1.flags = AC_PDE_PRESENT_MASK | AC_PTE_PRESENT_MASK |
879             AC_PDE_USER_MASK | AC_PTE_USER_MASK |
880             AC_PDE_ACCESSED_MASK | AC_PTE_ACCESSED_MASK |
881             AC_CPU_CR4_SMEP_MASK |
882             AC_CPU_CR0_WP_MASK |
883             AC_ACCESS_WRITE_MASK;
884 	ac_test_setup_pte(&at1, pool);
885 
886 	/*
887 	 * Here we write the ro user page when
888 	 * cr0.wp=0, then we execute it and SMEP
889 	 * fault should happen.
890 	 */
891 	err_prepare_andnot_wp = ac_test_do_access(&at1);
892 	if (!err_prepare_andnot_wp) {
893 		printf("%s: SMEP prepare fail\n", __FUNCTION__);
894 		goto clean_up;
895 	}
896 
897         at1.flags &= ~AC_ACCESS_WRITE_MASK;
898         at1.flags |= AC_ACCESS_FETCH_MASK;
899         ac_set_expected_status(&at1);
900         err_smep_andnot_wp = ac_test_do_access(&at1);
901 
902 clean_up:
903 	set_cr4_smep(0);
904 
905 	if (!err_prepare_andnot_wp)
906 		goto err;
907 	if (!err_smep_andnot_wp) {
908 		printf("%s: check SMEP without wp fail\n", __FUNCTION__);
909 		goto err;
910 	}
911 	return 1;
912 
913 err:
914 	return 0;
915 }
916 
917 static int ac_test_exec(ac_test_t *at, ac_pool_t *pool)
918 {
919     int r;
920 
921     if (verbose) {
922         ac_test_show(at);
923     }
924     ac_test_setup_pte(at, pool);
925     r = ac_test_do_access(at);
926     return r;
927 }
928 
929 typedef int (*ac_test_fn)(ac_pool_t *pool);
930 const ac_test_fn ac_test_cases[] =
931 {
932 	corrupt_hugepage_triger,
933 	check_pfec_on_prefetch_pte,
934 	check_large_pte_dirty_for_nowp,
935 	check_smep_andnot_wp
936 };
937 
938 static int ac_test_run(void)
939 {
940     ac_test_t at;
941     ac_pool_t pool;
942     int i, tests, successes;
943 
944     printf("run\n");
945     tests = successes = 0;
946 
947     if (cpuid_7_ecx & (1 << 3)) {
948         set_cr4_pke(1);
949         set_cr4_pke(0);
950         /* Now PKRU = 0xFFFFFFFF.  */
951     } else {
952 	unsigned long cr4 = read_cr4();
953 	tests++;
954 	if (write_cr4_checking(cr4 | X86_CR4_PKE) == GP_VECTOR) {
955             successes++;
956             invalid_mask |= AC_PKU_AD_MASK;
957             invalid_mask |= AC_PKU_WD_MASK;
958             invalid_mask |= AC_PKU_PKEY_MASK;
959             invalid_mask |= AC_CPU_CR4_PKE_MASK;
960             printf("CR4.PKE not available, disabling PKE tests\n");
961 	} else {
962             printf("Set PKE in CR4 - expect #GP: FAIL!\n");
963             set_cr4_pke(0);
964 	}
965     }
966 
967     if (!(cpuid_7_ebx & (1 << 7))) {
968 	tests++;
969 	if (set_cr4_smep(1) == GP_VECTOR) {
970             successes++;
971             invalid_mask |= AC_CPU_CR4_SMEP_MASK;
972             printf("CR4.SMEP not available, disabling SMEP tests\n");
973 	} else {
974             printf("Set SMEP in CR4 - expect #GP: FAIL!\n");
975             set_cr4_smep(0);
976 	}
977     }
978 
979     ac_env_int(&pool);
980     ac_test_init(&at, (void *)(0x123400000000 + 16 * smp_id()));
981     do {
982 	++tests;
983 	successes += ac_test_exec(&at, &pool);
984     } while (ac_test_bump(&at));
985 
986     for (i = 0; i < ARRAY_SIZE(ac_test_cases); i++) {
987 	++tests;
988 	successes += ac_test_cases[i](&pool);
989     }
990 
991     printf("\n%d tests, %d failures\n", tests, tests - successes);
992 
993     return successes == tests;
994 }
995 
996 int main(void)
997 {
998     int r;
999 
1000     setup_idt();
1001 
1002     cpuid_7_ebx = cpuid(7).b;
1003     cpuid_7_ecx = cpuid(7).c;
1004 
1005     printf("starting test\n\n");
1006     page_table_levels = 4;
1007     r = ac_test_run();
1008 
1009     if (cpuid_7_ecx & (1 << 16)) {
1010         page_table_levels = 5;
1011         setup_5level_page_table();
1012         printf("starting 5-level paging test.\n\n");
1013         r = ac_test_run();
1014     }
1015 
1016     return r ? 0 : 1;
1017 }
1018