xref: /kvm-unit-tests/x86/access.c (revision 52266791750d89b7f4ab6bad4d42de0056e4fb32)
1 
2 #include "libcflat.h"
3 #include "desc.h"
4 #include "processor.h"
5 
6 #define smp_id() 0
7 
8 #define true 1
9 #define false 0
10 
11 static _Bool verbose = false;
12 
13 typedef unsigned long pt_element_t;
14 static int cpuid_7_ebx;
15 static int cpuid_7_ecx;
16 static int invalid_mask;
17 
18 #define PAGE_SIZE ((pt_element_t)4096)
19 #define PAGE_MASK (~(PAGE_SIZE-1))
20 
21 #define PT_BASE_ADDR_MASK ((pt_element_t)((((pt_element_t)1 << 40) - 1) & PAGE_MASK))
22 #define PT_PSE_BASE_ADDR_MASK (PT_BASE_ADDR_MASK & ~(1ull << 21))
23 
24 #define PT_PRESENT_MASK    ((pt_element_t)1 << 0)
25 #define PT_WRITABLE_MASK   ((pt_element_t)1 << 1)
26 #define PT_USER_MASK       ((pt_element_t)1 << 2)
27 #define PT_ACCESSED_MASK   ((pt_element_t)1 << 5)
28 #define PT_DIRTY_MASK      ((pt_element_t)1 << 6)
29 #define PT_PSE_MASK        ((pt_element_t)1 << 7)
30 #define PT_NX_MASK         ((pt_element_t)1 << 63)
31 
32 #define CR0_WP_MASK (1UL << 16)
33 #define CR4_SMEP_MASK (1UL << 20)
34 
35 #define PFERR_PRESENT_MASK (1U << 0)
36 #define PFERR_WRITE_MASK (1U << 1)
37 #define PFERR_USER_MASK (1U << 2)
38 #define PFERR_RESERVED_MASK (1U << 3)
39 #define PFERR_FETCH_MASK (1U << 4)
40 #define PFERR_PK_MASK (1U << 5)
41 
42 #define MSR_EFER 0xc0000080
43 #define EFER_NX_MASK		(1ull << 11)
44 
45 #define PT_INDEX(address, level)       \
46        ((address) >> (12 + ((level)-1) * 9)) & 511
47 
48 /*
49  * page table access check tests
50  */
51 
52 enum {
53     AC_PTE_PRESENT_BIT,
54     AC_PTE_WRITABLE_BIT,
55     AC_PTE_USER_BIT,
56     AC_PTE_ACCESSED_BIT,
57     AC_PTE_DIRTY_BIT,
58     AC_PTE_NX_BIT,
59     AC_PTE_BIT51_BIT,
60 
61     AC_PDE_PRESENT_BIT,
62     AC_PDE_WRITABLE_BIT,
63     AC_PDE_USER_BIT,
64     AC_PDE_ACCESSED_BIT,
65     AC_PDE_DIRTY_BIT,
66     AC_PDE_PSE_BIT,
67     AC_PDE_NX_BIT,
68     AC_PDE_BIT51_BIT,
69     AC_PDE_BIT13_BIT,
70 
71     AC_PKU_AD_BIT,
72     AC_PKU_WD_BIT,
73     AC_PKU_PKEY_BIT,
74 
75     AC_ACCESS_USER_BIT,
76     AC_ACCESS_WRITE_BIT,
77     AC_ACCESS_FETCH_BIT,
78     AC_ACCESS_TWICE_BIT,
79 
80     AC_CPU_EFER_NX_BIT,
81     AC_CPU_CR0_WP_BIT,
82     AC_CPU_CR4_SMEP_BIT,
83     AC_CPU_CR4_PKE_BIT,
84 
85     NR_AC_FLAGS
86 };
87 
88 #define AC_PTE_PRESENT_MASK   (1 << AC_PTE_PRESENT_BIT)
89 #define AC_PTE_WRITABLE_MASK  (1 << AC_PTE_WRITABLE_BIT)
90 #define AC_PTE_USER_MASK      (1 << AC_PTE_USER_BIT)
91 #define AC_PTE_ACCESSED_MASK  (1 << AC_PTE_ACCESSED_BIT)
92 #define AC_PTE_DIRTY_MASK     (1 << AC_PTE_DIRTY_BIT)
93 #define AC_PTE_NX_MASK        (1 << AC_PTE_NX_BIT)
94 #define AC_PTE_BIT51_MASK     (1 << AC_PTE_BIT51_BIT)
95 
96 #define AC_PDE_PRESENT_MASK   (1 << AC_PDE_PRESENT_BIT)
97 #define AC_PDE_WRITABLE_MASK  (1 << AC_PDE_WRITABLE_BIT)
98 #define AC_PDE_USER_MASK      (1 << AC_PDE_USER_BIT)
99 #define AC_PDE_ACCESSED_MASK  (1 << AC_PDE_ACCESSED_BIT)
100 #define AC_PDE_DIRTY_MASK     (1 << AC_PDE_DIRTY_BIT)
101 #define AC_PDE_PSE_MASK       (1 << AC_PDE_PSE_BIT)
102 #define AC_PDE_NX_MASK        (1 << AC_PDE_NX_BIT)
103 #define AC_PDE_BIT51_MASK     (1 << AC_PDE_BIT51_BIT)
104 #define AC_PDE_BIT13_MASK     (1 << AC_PDE_BIT13_BIT)
105 
106 #define AC_PKU_AD_MASK        (1 << AC_PKU_AD_BIT)
107 #define AC_PKU_WD_MASK        (1 << AC_PKU_WD_BIT)
108 #define AC_PKU_PKEY_MASK      (1 << AC_PKU_PKEY_BIT)
109 
110 #define AC_ACCESS_USER_MASK   (1 << AC_ACCESS_USER_BIT)
111 #define AC_ACCESS_WRITE_MASK  (1 << AC_ACCESS_WRITE_BIT)
112 #define AC_ACCESS_FETCH_MASK  (1 << AC_ACCESS_FETCH_BIT)
113 #define AC_ACCESS_TWICE_MASK  (1 << AC_ACCESS_TWICE_BIT)
114 
115 #define AC_CPU_EFER_NX_MASK   (1 << AC_CPU_EFER_NX_BIT)
116 #define AC_CPU_CR0_WP_MASK    (1 << AC_CPU_CR0_WP_BIT)
117 #define AC_CPU_CR4_SMEP_MASK  (1 << AC_CPU_CR4_SMEP_BIT)
118 #define AC_CPU_CR4_PKE_MASK   (1 << AC_CPU_CR4_PKE_BIT)
119 
120 const char *ac_names[] = {
121     [AC_PTE_PRESENT_BIT] = "pte.p",
122     [AC_PTE_ACCESSED_BIT] = "pte.a",
123     [AC_PTE_WRITABLE_BIT] = "pte.rw",
124     [AC_PTE_USER_BIT] = "pte.user",
125     [AC_PTE_DIRTY_BIT] = "pte.d",
126     [AC_PTE_NX_BIT] = "pte.nx",
127     [AC_PTE_BIT51_BIT] = "pte.51",
128     [AC_PDE_PRESENT_BIT] = "pde.p",
129     [AC_PDE_ACCESSED_BIT] = "pde.a",
130     [AC_PDE_WRITABLE_BIT] = "pde.rw",
131     [AC_PDE_USER_BIT] = "pde.user",
132     [AC_PDE_DIRTY_BIT] = "pde.d",
133     [AC_PDE_PSE_BIT] = "pde.pse",
134     [AC_PDE_NX_BIT] = "pde.nx",
135     [AC_PDE_BIT51_BIT] = "pde.51",
136     [AC_PDE_BIT13_BIT] = "pde.13",
137     [AC_PKU_AD_BIT] = "pkru.ad",
138     [AC_PKU_WD_BIT] = "pkru.wd",
139     [AC_PKU_PKEY_BIT] = "pkey=1",
140     [AC_ACCESS_WRITE_BIT] = "write",
141     [AC_ACCESS_USER_BIT] = "user",
142     [AC_ACCESS_FETCH_BIT] = "fetch",
143     [AC_ACCESS_TWICE_BIT] = "twice",
144     [AC_CPU_EFER_NX_BIT] = "efer.nx",
145     [AC_CPU_CR0_WP_BIT] = "cr0.wp",
146     [AC_CPU_CR4_SMEP_BIT] = "cr4.smep",
147     [AC_CPU_CR4_PKE_BIT] = "cr4.pke",
148 };
149 
150 static inline void *va(pt_element_t phys)
151 {
152     return (void *)phys;
153 }
154 
155 typedef struct {
156     pt_element_t pt_pool;
157     unsigned pt_pool_size;
158     unsigned pt_pool_current;
159 } ac_pool_t;
160 
161 typedef struct {
162     unsigned flags;
163     void *virt;
164     pt_element_t phys;
165     pt_element_t *ptep;
166     pt_element_t expected_pte;
167     pt_element_t *pdep;
168     pt_element_t expected_pde;
169     pt_element_t ignore_pde;
170     int expected_fault;
171     unsigned expected_error;
172 } ac_test_t;
173 
174 typedef struct {
175     unsigned short limit;
176     unsigned long linear_addr;
177 } __attribute__((packed)) descriptor_table_t;
178 
179 
180 static void ac_test_show(ac_test_t *at);
181 
182 int write_cr4_checking(unsigned long val)
183 {
184     asm volatile(ASM_TRY("1f")
185             "mov %0,%%cr4\n\t"
186             "1:": : "r" (val));
187     return exception_vector();
188 }
189 
190 void set_cr0_wp(int wp)
191 {
192     unsigned long cr0 = read_cr0();
193     unsigned long old_cr0 = cr0;
194 
195     cr0 &= ~CR0_WP_MASK;
196     if (wp)
197 	cr0 |= CR0_WP_MASK;
198     if (old_cr0 != cr0)
199         write_cr0(cr0);
200 }
201 
202 void set_cr4_smep(int smep)
203 {
204     unsigned long cr4 = read_cr4();
205     unsigned long old_cr4 = cr4;
206     extern u64 ptl2[];
207 
208     cr4 &= ~CR4_SMEP_MASK;
209     if (smep)
210 	cr4 |= CR4_SMEP_MASK;
211     if (old_cr4 == cr4)
212         return;
213 
214     if (smep)
215         ptl2[2] &= ~PT_USER_MASK;
216     write_cr4(cr4);
217     if (!smep)
218         ptl2[2] |= PT_USER_MASK;
219 }
220 
221 void set_cr4_pke(int pke)
222 {
223     unsigned long cr4 = read_cr4();
224     unsigned long old_cr4 = cr4;
225 
226     cr4 &= ~X86_CR4_PKE;
227     if (pke)
228 	cr4 |= X86_CR4_PKE;
229     if (old_cr4 == cr4)
230         return;
231 
232     /* Check that protection keys do not affect accesses when CR4.PKE=0.  */
233     if ((read_cr4() & X86_CR4_PKE) && !pke) {
234         write_pkru(0xfffffffc);
235     }
236     write_cr4(cr4);
237 }
238 
239 void set_efer_nx(int nx)
240 {
241     unsigned long long efer = rdmsr(MSR_EFER);
242     unsigned long long old_efer = efer;
243 
244     efer &= ~EFER_NX_MASK;
245     if (nx)
246 	efer |= EFER_NX_MASK;
247     if (old_efer != efer)
248         wrmsr(MSR_EFER, efer);
249 }
250 
251 static void ac_env_int(ac_pool_t *pool)
252 {
253     extern char page_fault, kernel_entry;
254     set_idt_entry(14, &page_fault, 0);
255     set_idt_entry(0x20, &kernel_entry, 3);
256 
257     pool->pt_pool = 33 * 1024 * 1024;
258     pool->pt_pool_size = 120 * 1024 * 1024 - pool->pt_pool;
259     pool->pt_pool_current = 0;
260 }
261 
262 void ac_test_init(ac_test_t *at, void *virt)
263 {
264     wrmsr(MSR_EFER, rdmsr(MSR_EFER) | EFER_NX_MASK);
265     set_cr0_wp(1);
266     at->flags = 0;
267     at->virt = virt;
268     at->phys = 32 * 1024 * 1024;
269 }
270 
271 int ac_test_bump_one(ac_test_t *at)
272 {
273     at->flags = ((at->flags | invalid_mask) + 1) & ~invalid_mask;
274     return at->flags < (1 << NR_AC_FLAGS);
275 }
276 
277 #define F(x)  ((flags & x##_MASK) != 0)
278 
279 _Bool ac_test_legal(ac_test_t *at)
280 {
281     int flags = at->flags;
282 
283     if (F(AC_ACCESS_FETCH) && F(AC_ACCESS_WRITE))
284 	return false;
285 
286     /*
287      * Since we convert current page to kernel page when cr4.smep=1,
288      * we can't switch to user mode.
289      */
290     if (F(AC_ACCESS_USER) && F(AC_CPU_CR4_SMEP))
291 	return false;
292 
293     /*
294      * Only test protection key faults if CR4.PKE=1.
295      */
296     if (!F(AC_CPU_CR4_PKE) &&
297         (F(AC_PKU_AD) || F(AC_PKU_WD))) {
298 	return false;
299     }
300 
301     /*
302      * pde.bit13 checks handling of reserved bits in largepage PDEs.  It is
303      * meaningless if there is a PTE.
304      */
305     if (!F(AC_PDE_PSE) && F(AC_PDE_BIT13))
306         return false;
307 
308     return true;
309 }
310 
311 int ac_test_bump(ac_test_t *at)
312 {
313     int ret;
314 
315     ret = ac_test_bump_one(at);
316     while (ret && !ac_test_legal(at))
317 	ret = ac_test_bump_one(at);
318     return ret;
319 }
320 
321 pt_element_t ac_test_alloc_pt(ac_pool_t *pool)
322 {
323     pt_element_t ret = pool->pt_pool + pool->pt_pool_current;
324     pool->pt_pool_current += PAGE_SIZE;
325     return ret;
326 }
327 
328 _Bool ac_test_enough_room(ac_pool_t *pool)
329 {
330     return pool->pt_pool_current + 4 * PAGE_SIZE <= pool->pt_pool_size;
331 }
332 
333 void ac_test_reset_pt_pool(ac_pool_t *pool)
334 {
335     pool->pt_pool_current = 0;
336 }
337 
338 pt_element_t ac_test_permissions(ac_test_t *at, unsigned flags, bool writable,
339                                  bool user, bool executable)
340 {
341     bool kwritable = !F(AC_CPU_CR0_WP) && !F(AC_ACCESS_USER);
342     pt_element_t expected = 0;
343 
344     if (F(AC_ACCESS_USER) && !user)
345 	at->expected_fault = 1;
346 
347     if (F(AC_ACCESS_WRITE) && !writable && !kwritable)
348 	at->expected_fault = 1;
349 
350     if (F(AC_ACCESS_FETCH) && !executable)
351 	at->expected_fault = 1;
352 
353     if (F(AC_ACCESS_FETCH) && user && F(AC_CPU_CR4_SMEP))
354         at->expected_fault = 1;
355 
356     if (user && !F(AC_ACCESS_FETCH) && F(AC_PKU_PKEY) && F(AC_CPU_CR4_PKE)) {
357         if (F(AC_PKU_AD)) {
358             at->expected_fault = 1;
359             at->expected_error |= PFERR_PK_MASK;
360         } else if (F(AC_ACCESS_WRITE) && F(AC_PKU_WD) && !kwritable) {
361             at->expected_fault = 1;
362             at->expected_error |= PFERR_PK_MASK;
363         }
364     }
365 
366     if (!at->expected_fault) {
367         expected |= PT_ACCESSED_MASK;
368         if (F(AC_ACCESS_WRITE))
369             expected |= PT_DIRTY_MASK;
370     }
371 
372     return expected;
373 }
374 
375 void ac_emulate_access(ac_test_t *at, unsigned flags)
376 {
377     bool pde_valid, pte_valid;
378     bool user, writable, executable;
379 
380     if (F(AC_ACCESS_USER))
381 	at->expected_error |= PFERR_USER_MASK;
382 
383     if (F(AC_ACCESS_WRITE))
384 	at->expected_error |= PFERR_WRITE_MASK;
385 
386     if (F(AC_ACCESS_FETCH))
387 	at->expected_error |= PFERR_FETCH_MASK;
388 
389     if (!F(AC_PDE_ACCESSED))
390         at->ignore_pde = PT_ACCESSED_MASK;
391 
392     pde_valid = F(AC_PDE_PRESENT)
393         && !F(AC_PDE_BIT51) && !F(AC_PDE_BIT13)
394         && !(F(AC_PDE_NX) && !F(AC_CPU_EFER_NX));
395 
396     if (!pde_valid) {
397         at->expected_fault = 1;
398 	if (F(AC_PDE_PRESENT)) {
399             at->expected_error |= PFERR_RESERVED_MASK;
400         } else {
401             at->expected_error &= ~PFERR_PRESENT_MASK;
402         }
403 	goto fault;
404     }
405 
406     writable = F(AC_PDE_WRITABLE);
407     user = F(AC_PDE_USER);
408     executable = !F(AC_PDE_NX);
409 
410     if (F(AC_PDE_PSE)) {
411         at->expected_pde |= ac_test_permissions(at, flags, writable, user,
412                                                 executable);
413 	goto no_pte;
414     }
415 
416     at->expected_pde |= PT_ACCESSED_MASK;
417 
418     pte_valid = F(AC_PTE_PRESENT)
419         && !F(AC_PTE_BIT51)
420         && !(F(AC_PTE_NX) && !F(AC_CPU_EFER_NX));
421 
422     if (!pte_valid) {
423         at->expected_fault = 1;
424 	if (F(AC_PTE_PRESENT)) {
425             at->expected_error |= PFERR_RESERVED_MASK;
426         } else {
427             at->expected_error &= ~PFERR_PRESENT_MASK;
428         }
429 	goto fault;
430     }
431 
432     writable &= F(AC_PTE_WRITABLE);
433     user &= F(AC_PTE_USER);
434     executable &= !F(AC_PTE_NX);
435 
436     at->expected_pte |= ac_test_permissions(at, flags, writable, user,
437                                             executable);
438 
439 no_pte:
440 fault:
441     if (!at->expected_fault)
442         at->ignore_pde = 0;
443     if (!F(AC_CPU_EFER_NX) && !F(AC_CPU_CR4_SMEP))
444         at->expected_error &= ~PFERR_FETCH_MASK;
445 }
446 
447 void ac_set_expected_status(ac_test_t *at)
448 {
449     invlpg(at->virt);
450 
451     if (at->ptep)
452 	at->expected_pte = *at->ptep;
453     at->expected_pde = *at->pdep;
454     at->ignore_pde = 0;
455     at->expected_fault = 0;
456     at->expected_error = PFERR_PRESENT_MASK;
457 
458     if (at->flags & AC_ACCESS_TWICE_MASK) {
459         ac_emulate_access(at, at->flags & ~AC_ACCESS_WRITE_MASK
460                           & ~AC_ACCESS_FETCH_MASK & ~AC_ACCESS_USER_MASK);
461         at->expected_fault = 0;
462 	at->expected_error = PFERR_PRESENT_MASK;
463         at->ignore_pde = 0;
464     }
465 
466     ac_emulate_access(at, at->flags);
467 }
468 
469 void __ac_setup_specific_pages(ac_test_t *at, ac_pool_t *pool, u64 pd_page,
470 			       u64 pt_page)
471 
472 {
473     unsigned long root = read_cr3();
474     int flags = at->flags;
475 
476     if (!ac_test_enough_room(pool))
477 	ac_test_reset_pt_pool(pool);
478 
479     at->ptep = 0;
480     for (int i = 4; i >= 1 && (i >= 2 || !F(AC_PDE_PSE)); --i) {
481 	pt_element_t *vroot = va(root & PT_BASE_ADDR_MASK);
482 	unsigned index = PT_INDEX((unsigned long)at->virt, i);
483 	pt_element_t pte = 0;
484 	switch (i) {
485 	case 4:
486 	case 3:
487 	    pte = pd_page ? pd_page : ac_test_alloc_pt(pool);
488 	    pte |= PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK;
489 	    break;
490 	case 2:
491 	    if (!F(AC_PDE_PSE)) {
492 		pte = pt_page ? pt_page : ac_test_alloc_pt(pool);
493 		/* The protection key is ignored on non-leaf entries.  */
494                 if (F(AC_PKU_PKEY))
495                     pte |= 2ull << 59;
496 	    } else {
497 		pte = at->phys & PT_PSE_BASE_ADDR_MASK;
498 		pte |= PT_PSE_MASK;
499                 if (F(AC_PKU_PKEY))
500                     pte |= 1ull << 59;
501 	    }
502 	    if (F(AC_PDE_PRESENT))
503 		pte |= PT_PRESENT_MASK;
504 	    if (F(AC_PDE_WRITABLE))
505 		pte |= PT_WRITABLE_MASK;
506 	    if (F(AC_PDE_USER))
507 		pte |= PT_USER_MASK;
508 	    if (F(AC_PDE_ACCESSED))
509 		pte |= PT_ACCESSED_MASK;
510 	    if (F(AC_PDE_DIRTY))
511 		pte |= PT_DIRTY_MASK;
512 	    if (F(AC_PDE_NX))
513 		pte |= PT_NX_MASK;
514 	    if (F(AC_PDE_BIT51))
515 		pte |= 1ull << 51;
516 	    if (F(AC_PDE_BIT13))
517 		pte |= 1ull << 13;
518 	    at->pdep = &vroot[index];
519 	    break;
520 	case 1:
521 	    pte = at->phys & PT_BASE_ADDR_MASK;
522 	    if (F(AC_PKU_PKEY))
523 		pte |= 1ull << 59;
524 	    if (F(AC_PTE_PRESENT))
525 		pte |= PT_PRESENT_MASK;
526 	    if (F(AC_PTE_WRITABLE))
527 		pte |= PT_WRITABLE_MASK;
528 	    if (F(AC_PTE_USER))
529 		pte |= PT_USER_MASK;
530 	    if (F(AC_PTE_ACCESSED))
531 		pte |= PT_ACCESSED_MASK;
532 	    if (F(AC_PTE_DIRTY))
533 		pte |= PT_DIRTY_MASK;
534 	    if (F(AC_PTE_NX))
535 		pte |= PT_NX_MASK;
536 	    if (F(AC_PTE_BIT51))
537 		pte |= 1ull << 51;
538 	    at->ptep = &vroot[index];
539 	    break;
540 	}
541 	vroot[index] = pte;
542 	root = vroot[index];
543     }
544     ac_set_expected_status(at);
545 }
546 
547 static void ac_test_setup_pte(ac_test_t *at, ac_pool_t *pool)
548 {
549 	__ac_setup_specific_pages(at, pool, 0, 0);
550 }
551 
552 static void ac_setup_specific_pages(ac_test_t *at, ac_pool_t *pool,
553 				    u64 pd_page, u64 pt_page)
554 {
555 	return __ac_setup_specific_pages(at, pool, pd_page, pt_page);
556 }
557 
558 static void dump_mapping(ac_test_t *at)
559 {
560 	unsigned long root = read_cr3();
561         int flags = at->flags;
562 	int i;
563 
564 	printf("Dump mapping: address: %p\n", at->virt);
565 	for (i = 4; i >= 1 && (i >= 2 || !F(AC_PDE_PSE)); --i) {
566 		pt_element_t *vroot = va(root & PT_BASE_ADDR_MASK);
567 		unsigned index = PT_INDEX((unsigned long)at->virt, i);
568 		pt_element_t pte = vroot[index];
569 
570 		printf("------L%d: %lx\n", i, pte);
571 		root = vroot[index];
572 	}
573 }
574 
575 static void ac_test_check(ac_test_t *at, _Bool *success_ret, _Bool cond,
576                           const char *fmt, ...)
577 {
578     va_list ap;
579     char buf[500];
580 
581     if (!*success_ret) {
582         return;
583     }
584 
585     if (!cond) {
586         return;
587     }
588 
589     *success_ret = false;
590 
591     if (!verbose) {
592         puts("\n");
593         ac_test_show(at);
594     }
595 
596     va_start(ap, fmt);
597     vsnprintf(buf, sizeof(buf), fmt, ap);
598     va_end(ap);
599     printf("FAIL: %s\n", buf);
600     dump_mapping(at);
601 }
602 
603 static int pt_match(pt_element_t pte1, pt_element_t pte2, pt_element_t ignore)
604 {
605     pte1 &= ~ignore;
606     pte2 &= ~ignore;
607     return pte1 == pte2;
608 }
609 
610 int ac_test_do_access(ac_test_t *at)
611 {
612     static unsigned unique = 42;
613     int fault = 0;
614     unsigned e;
615     static unsigned char user_stack[4096];
616     unsigned long rsp;
617     _Bool success = true;
618     int flags = at->flags;
619 
620     ++unique;
621     if (!(unique & 65535)) {
622         puts(".");
623     }
624 
625     *((unsigned char *)at->phys) = 0xc3; /* ret */
626 
627     unsigned r = unique;
628     set_cr0_wp(F(AC_CPU_CR0_WP));
629     set_efer_nx(F(AC_CPU_EFER_NX));
630     set_cr4_pke(F(AC_CPU_CR4_PKE));
631     if (F(AC_CPU_CR4_PKE)) {
632         /* WD2=AD2=1, WD1=F(AC_PKU_WD), AD1=F(AC_PKU_AD) */
633         write_pkru(0x30 | (F(AC_PKU_WD) ? 8 : 0) |
634                    (F(AC_PKU_AD) ? 4 : 0));
635     }
636 
637     set_cr4_smep(F(AC_CPU_CR4_SMEP));
638 
639     if (F(AC_ACCESS_TWICE)) {
640 	asm volatile (
641 	    "mov $fixed2, %%rsi \n\t"
642 	    "mov (%[addr]), %[reg] \n\t"
643 	    "fixed2:"
644 	    : [reg]"=r"(r), [fault]"=a"(fault), "=b"(e)
645 	    : [addr]"r"(at->virt)
646 	    : "rsi"
647 	    );
648 	fault = 0;
649     }
650 
651     asm volatile ("mov $fixed1, %%rsi \n\t"
652 		  "mov %%rsp, %%rdx \n\t"
653 		  "cmp $0, %[user] \n\t"
654 		  "jz do_access \n\t"
655 		  "push %%rax; mov %[user_ds], %%ax; mov %%ax, %%ds; pop %%rax  \n\t"
656 		  "pushq %[user_ds] \n\t"
657 		  "pushq %[user_stack_top] \n\t"
658 		  "pushfq \n\t"
659 		  "pushq %[user_cs] \n\t"
660 		  "pushq $do_access \n\t"
661 		  "iretq \n"
662 		  "do_access: \n\t"
663 		  "cmp $0, %[fetch] \n\t"
664 		  "jnz 2f \n\t"
665 		  "cmp $0, %[write] \n\t"
666 		  "jnz 1f \n\t"
667 		  "mov (%[addr]), %[reg] \n\t"
668 		  "jmp done \n\t"
669 		  "1: mov %[reg], (%[addr]) \n\t"
670 		  "jmp done \n\t"
671 		  "2: call *%[addr] \n\t"
672 		  "done: \n"
673 		  "fixed1: \n"
674 		  "int %[kernel_entry_vector] \n\t"
675 		  "back_to_kernel:"
676 		  : [reg]"+r"(r), "+a"(fault), "=b"(e), "=&d"(rsp)
677 		  : [addr]"r"(at->virt),
678 		    [write]"r"(F(AC_ACCESS_WRITE)),
679 		    [user]"r"(F(AC_ACCESS_USER)),
680 		    [fetch]"r"(F(AC_ACCESS_FETCH)),
681 		    [user_ds]"i"(USER_DS),
682 		    [user_cs]"i"(USER_CS),
683 		    [user_stack_top]"r"(user_stack + sizeof user_stack),
684 		    [kernel_entry_vector]"i"(0x20)
685 		  : "rsi");
686 
687     asm volatile (".section .text.pf \n\t"
688 		  "page_fault: \n\t"
689 		  "pop %rbx \n\t"
690 		  "mov %rsi, (%rsp) \n\t"
691 		  "movl $1, %eax \n\t"
692 		  "iretq \n\t"
693 		  ".section .text");
694 
695     asm volatile (".section .text.entry \n\t"
696 		  "kernel_entry: \n\t"
697 		  "mov %rdx, %rsp \n\t"
698 		  "jmp back_to_kernel \n\t"
699 		  ".section .text");
700 
701     ac_test_check(at, &success, fault && !at->expected_fault,
702                   "unexpected fault");
703     ac_test_check(at, &success, !fault && at->expected_fault,
704                   "unexpected access");
705     ac_test_check(at, &success, fault && e != at->expected_error,
706                   "error code %x expected %x", e, at->expected_error);
707     ac_test_check(at, &success, at->ptep && *at->ptep != at->expected_pte,
708                   "pte %x expected %x", *at->ptep, at->expected_pte);
709     ac_test_check(at, &success,
710                   !pt_match(*at->pdep, at->expected_pde, at->ignore_pde),
711                   "pde %x expected %x", *at->pdep, at->expected_pde);
712 
713     if (success && verbose) {
714 	if (at->expected_fault) {
715             printf("PASS (%x)\n", at->expected_error);
716 	} else {
717             printf("PASS\n");
718 	}
719     }
720     return success;
721 }
722 
723 static void ac_test_show(ac_test_t *at)
724 {
725     char line[5000];
726 
727     *line = 0;
728     strcat(line, "test");
729     for (int i = 0; i < NR_AC_FLAGS; ++i)
730 	if (at->flags & (1 << i)) {
731 	    strcat(line, " ");
732 	    strcat(line, ac_names[i]);
733 	}
734     strcat(line, ": ");
735     printf("%s", line);
736 }
737 
738 /*
739  * This test case is used to triger the bug which is fixed by
740  * commit e09e90a5 in the kvm tree
741  */
742 static int corrupt_hugepage_triger(ac_pool_t *pool)
743 {
744     ac_test_t at1, at2;
745 
746     ac_test_init(&at1, (void *)(0x123400000000));
747     ac_test_init(&at2, (void *)(0x666600000000));
748 
749     at2.flags = AC_CPU_CR0_WP_MASK | AC_PDE_PSE_MASK | AC_PDE_PRESENT_MASK;
750     ac_test_setup_pte(&at2, pool);
751     if (!ac_test_do_access(&at2))
752         goto err;
753 
754     at1.flags = at2.flags | AC_PDE_WRITABLE_MASK;
755     ac_test_setup_pte(&at1, pool);
756     if (!ac_test_do_access(&at1))
757         goto err;
758 
759     at1.flags |= AC_ACCESS_WRITE_MASK;
760     ac_set_expected_status(&at1);
761     if (!ac_test_do_access(&at1))
762         goto err;
763 
764     at2.flags |= AC_ACCESS_WRITE_MASK;
765     ac_set_expected_status(&at2);
766     if (!ac_test_do_access(&at2))
767         goto err;
768 
769     return 1;
770 
771 err:
772     printf("corrupt_hugepage_triger test fail\n");
773     return 0;
774 }
775 
776 /*
777  * This test case is used to triger the bug which is fixed by
778  * commit 3ddf6c06e13e in the kvm tree
779  */
780 static int check_pfec_on_prefetch_pte(ac_pool_t *pool)
781 {
782 	ac_test_t at1, at2;
783 
784 	ac_test_init(&at1, (void *)(0x123406001000));
785 	ac_test_init(&at2, (void *)(0x123406003000));
786 
787 	at1.flags = AC_PDE_PRESENT_MASK | AC_PTE_PRESENT_MASK;
788 	ac_setup_specific_pages(&at1, pool, 30 * 1024 * 1024, 30 * 1024 * 1024);
789 
790         at2.flags = at1.flags | AC_PTE_NX_MASK;
791 	ac_setup_specific_pages(&at2, pool, 30 * 1024 * 1024, 30 * 1024 * 1024);
792 
793 	if (!ac_test_do_access(&at1)) {
794 		printf("%s: prepare fail\n", __FUNCTION__);
795 		goto err;
796 	}
797 
798 	if (!ac_test_do_access(&at2)) {
799 		printf("%s: check PFEC on prefetch pte path fail\n",
800 			__FUNCTION__);
801 		goto err;
802 	}
803 
804 	return 1;
805 
806 err:
807     return 0;
808 }
809 
810 /*
811  * If the write-fault access is from supervisor and CR0.WP is not set on the
812  * vcpu, kvm will fix it by adjusting pte access - it sets the W bit on pte
813  * and clears U bit. This is the chance that kvm can change pte access from
814  * readonly to writable.
815  *
816  * Unfortunately, the pte access is the access of 'direct' shadow page table,
817  * means direct sp.role.access = pte_access, then we will create a writable
818  * spte entry on the readonly shadow page table. It will cause Dirty bit is
819  * not tracked when two guest ptes point to the same large page. Note, it
820  * does not have other impact except Dirty bit since cr0.wp is encoded into
821  * sp.role.
822  *
823  * Note: to trigger this bug, hugepage should be disabled on host.
824  */
825 static int check_large_pte_dirty_for_nowp(ac_pool_t *pool)
826 {
827 	ac_test_t at1, at2;
828 
829 	ac_test_init(&at1, (void *)(0x123403000000));
830 	ac_test_init(&at2, (void *)(0x666606000000));
831 
832         at2.flags = AC_PDE_PRESENT_MASK | AC_PDE_PSE_MASK;
833 	ac_test_setup_pte(&at2, pool);
834 	if (!ac_test_do_access(&at2)) {
835 		printf("%s: read on the first mapping fail.\n", __FUNCTION__);
836 		goto err;
837 	}
838 
839         at1.flags = at2.flags | AC_ACCESS_WRITE_MASK;
840 	ac_test_setup_pte(&at1, pool);
841 	if (!ac_test_do_access(&at1)) {
842 		printf("%s: write on the second mapping fail.\n", __FUNCTION__);
843 		goto err;
844 	}
845 
846 	at2.flags |= AC_ACCESS_WRITE_MASK;
847 	ac_set_expected_status(&at2);
848 	if (!ac_test_do_access(&at2)) {
849 		printf("%s: write on the first mapping fail.\n", __FUNCTION__);
850 		goto err;
851 	}
852 
853 	return 1;
854 
855 err:
856 	return 0;
857 }
858 
859 static int check_smep_andnot_wp(ac_pool_t *pool)
860 {
861 	ac_test_t at1;
862 	int err_prepare_andnot_wp, err_smep_andnot_wp;
863 
864 	if (!(cpuid_7_ebx & (1 << 7))) {
865 	    return 1;
866 	}
867 
868 	ac_test_init(&at1, (void *)(0x123406001000));
869 
870 	at1.flags = AC_PDE_PRESENT_MASK | AC_PTE_PRESENT_MASK |
871             AC_PDE_USER_MASK | AC_PTE_USER_MASK |
872             AC_PDE_ACCESSED_MASK | AC_PTE_ACCESSED_MASK |
873             AC_CPU_CR4_SMEP_MASK |
874             AC_CPU_CR0_WP_MASK |
875             AC_ACCESS_WRITE_MASK;
876 	ac_test_setup_pte(&at1, pool);
877 
878 	/*
879 	 * Here we write the ro user page when
880 	 * cr0.wp=0, then we execute it and SMEP
881 	 * fault should happen.
882 	 */
883 	err_prepare_andnot_wp = ac_test_do_access(&at1);
884 	if (!err_prepare_andnot_wp) {
885 		printf("%s: SMEP prepare fail\n", __FUNCTION__);
886 		goto clean_up;
887 	}
888 
889         at1.flags &= ~AC_ACCESS_WRITE_MASK;
890         at1.flags |= AC_ACCESS_FETCH_MASK;
891         ac_set_expected_status(&at1);
892         err_smep_andnot_wp = ac_test_do_access(&at1);
893 
894 clean_up:
895 	set_cr4_smep(0);
896 
897 	if (!err_prepare_andnot_wp)
898 		goto err;
899 	if (!err_smep_andnot_wp) {
900 		printf("%s: check SMEP without wp fail\n", __FUNCTION__);
901 		goto err;
902 	}
903 	return 1;
904 
905 err:
906 	return 0;
907 }
908 
909 int ac_test_exec(ac_test_t *at, ac_pool_t *pool)
910 {
911     int r;
912 
913     if (verbose) {
914         ac_test_show(at);
915     }
916     ac_test_setup_pte(at, pool);
917     r = ac_test_do_access(at);
918     return r;
919 }
920 
921 typedef int (*ac_test_fn)(ac_pool_t *pool);
922 const ac_test_fn ac_test_cases[] =
923 {
924 	corrupt_hugepage_triger,
925 	check_pfec_on_prefetch_pte,
926 	check_large_pte_dirty_for_nowp,
927 	check_smep_andnot_wp
928 };
929 
930 int ac_test_run(void)
931 {
932     ac_test_t at;
933     ac_pool_t pool;
934     int i, tests, successes;
935 
936     printf("run\n");
937     tests = successes = 0;
938 
939     if (cpuid_7_ecx & (1 << 3)) {
940         set_cr4_pke(1);
941         set_cr4_pke(0);
942         /* Now PKRU = 0xFFFFFFFF.  */
943     } else {
944 	unsigned long cr4 = read_cr4();
945 	tests++;
946 	if (write_cr4_checking(cr4 | X86_CR4_PKE) == GP_VECTOR) {
947             successes++;
948             invalid_mask |= AC_PKU_AD_MASK;
949             invalid_mask |= AC_PKU_WD_MASK;
950             invalid_mask |= AC_PKU_PKEY_MASK;
951             invalid_mask |= AC_CPU_CR4_PKE_MASK;
952             printf("CR4.PKE not available, disabling PKE tests\n");
953 	} else {
954             printf("Set PKE in CR4 - expect #GP: FAIL!\n");
955             set_cr4_pke(0);
956 	}
957     }
958 
959     if (!(cpuid_7_ebx & (1 << 7))) {
960 	unsigned long cr4 = read_cr4();
961 	tests++;
962 	if (write_cr4_checking(cr4 | CR4_SMEP_MASK) == GP_VECTOR) {
963             successes++;
964             invalid_mask |= AC_CPU_CR4_SMEP_MASK;
965             printf("CR4.SMEP not available, disabling SMEP tests\n");
966 	} else {
967             printf("Set SMEP in CR4 - expect #GP: FAIL!\n");
968             set_cr4_smep(0);
969 	}
970     }
971 
972     ac_env_int(&pool);
973     ac_test_init(&at, (void *)(0x123400000000 + 16 * smp_id()));
974     do {
975 	++tests;
976 	successes += ac_test_exec(&at, &pool);
977     } while (ac_test_bump(&at));
978 
979     for (i = 0; i < ARRAY_SIZE(ac_test_cases); i++) {
980 	++tests;
981 	successes += ac_test_cases[i](&pool);
982     }
983 
984     printf("\n%d tests, %d failures\n", tests, tests - successes);
985 
986     return successes == tests;
987 }
988 
989 int main()
990 {
991     int r;
992 
993     setup_idt();
994 
995     cpuid_7_ebx = cpuid(7).b;
996     cpuid_7_ecx = cpuid(7).c;
997 
998     printf("starting test\n\n");
999     r = ac_test_run();
1000     return r ? 0 : 1;
1001 }
1002