xref: /kvm-unit-tests/x86/access.c (revision e94079c500a204da006c4c665054a11fa84357df)
1 
2 #include "libcflat.h"
3 #include "desc.h"
4 #include "processor.h"
5 
6 #define smp_id() 0
7 
8 #define true 1
9 #define false 0
10 
11 static _Bool verbose = false;
12 
13 typedef unsigned long pt_element_t;
14 static int cpuid_7_ebx;
15 static int cpuid_7_ecx;
16 
17 #define PAGE_SIZE ((pt_element_t)4096)
18 #define PAGE_MASK (~(PAGE_SIZE-1))
19 
20 #define PT_BASE_ADDR_MASK ((pt_element_t)((((pt_element_t)1 << 40) - 1) & PAGE_MASK))
21 #define PT_PSE_BASE_ADDR_MASK (PT_BASE_ADDR_MASK & ~(1ull << 21))
22 
23 #define PT_PRESENT_MASK    ((pt_element_t)1 << 0)
24 #define PT_WRITABLE_MASK   ((pt_element_t)1 << 1)
25 #define PT_USER_MASK       ((pt_element_t)1 << 2)
26 #define PT_ACCESSED_MASK   ((pt_element_t)1 << 5)
27 #define PT_DIRTY_MASK      ((pt_element_t)1 << 6)
28 #define PT_PSE_MASK        ((pt_element_t)1 << 7)
29 #define PT_NX_MASK         ((pt_element_t)1 << 63)
30 
31 #define CR0_WP_MASK (1UL << 16)
32 #define CR4_SMEP_MASK (1UL << 20)
33 
34 #define PFERR_PRESENT_MASK (1U << 0)
35 #define PFERR_WRITE_MASK (1U << 1)
36 #define PFERR_USER_MASK (1U << 2)
37 #define PFERR_RESERVED_MASK (1U << 3)
38 #define PFERR_FETCH_MASK (1U << 4)
39 #define PFERR_PK_MASK (1U << 5)
40 
41 #define MSR_EFER 0xc0000080
42 #define EFER_NX_MASK		(1ull << 11)
43 
44 #define PT_INDEX(address, level)       \
45        ((address) >> (12 + ((level)-1) * 9)) & 511
46 
47 /*
48  * page table access check tests
49  */
50 
51 enum {
52     AC_PTE_PRESENT,
53     AC_PTE_WRITABLE,
54     AC_PTE_USER,
55     AC_PTE_ACCESSED,
56     AC_PTE_DIRTY,
57     AC_PTE_NX,
58     AC_PTE_BIT51,
59 
60     AC_PDE_PRESENT,
61     AC_PDE_WRITABLE,
62     AC_PDE_USER,
63     AC_PDE_ACCESSED,
64     AC_PDE_DIRTY,
65     AC_PDE_PSE,
66     AC_PDE_NX,
67     AC_PDE_BIT51,
68     AC_PDE_BIT13,
69 
70     AC_PKU_AD,
71     AC_PKU_WD,
72     AC_PKU_PKEY,
73 
74     AC_ACCESS_USER,
75     AC_ACCESS_WRITE,
76     AC_ACCESS_FETCH,
77     AC_ACCESS_TWICE,
78 
79     AC_CPU_EFER_NX,
80     AC_CPU_CR0_WP,
81     AC_CPU_CR4_SMEP,
82     AC_CPU_CR4_PKE,
83 
84     NR_AC_FLAGS
85 };
86 
87 const char *ac_names[] = {
88     [AC_PTE_PRESENT] = "pte.p",
89     [AC_PTE_ACCESSED] = "pte.a",
90     [AC_PTE_WRITABLE] = "pte.rw",
91     [AC_PTE_USER] = "pte.user",
92     [AC_PTE_DIRTY] = "pte.d",
93     [AC_PTE_NX] = "pte.nx",
94     [AC_PTE_BIT51] = "pte.51",
95     [AC_PDE_PRESENT] = "pde.p",
96     [AC_PDE_ACCESSED] = "pde.a",
97     [AC_PDE_WRITABLE] = "pde.rw",
98     [AC_PDE_USER] = "pde.user",
99     [AC_PDE_DIRTY] = "pde.d",
100     [AC_PDE_PSE] = "pde.pse",
101     [AC_PDE_NX] = "pde.nx",
102     [AC_PDE_BIT51] = "pde.51",
103     [AC_PDE_BIT13] = "pde.13",
104     [AC_PKU_AD] = "pkru.ad",
105     [AC_PKU_WD] = "pkru.wd",
106     [AC_PKU_PKEY] = "pkey=1",
107     [AC_ACCESS_WRITE] = "write",
108     [AC_ACCESS_USER] = "user",
109     [AC_ACCESS_FETCH] = "fetch",
110     [AC_ACCESS_TWICE] = "twice",
111     [AC_CPU_EFER_NX] = "efer.nx",
112     [AC_CPU_CR0_WP] = "cr0.wp",
113     [AC_CPU_CR4_SMEP] = "cr4.smep",
114     [AC_CPU_CR4_PKE] = "cr4.pke",
115 };
116 
117 static inline void *va(pt_element_t phys)
118 {
119     return (void *)phys;
120 }
121 
122 typedef struct {
123     pt_element_t pt_pool;
124     unsigned pt_pool_size;
125     unsigned pt_pool_current;
126 } ac_pool_t;
127 
128 typedef struct {
129     unsigned flags[NR_AC_FLAGS];
130     void *virt;
131     pt_element_t phys;
132     pt_element_t *ptep;
133     pt_element_t expected_pte;
134     pt_element_t *pdep;
135     pt_element_t expected_pde;
136     pt_element_t ignore_pde;
137     int expected_fault;
138     unsigned expected_error;
139 } ac_test_t;
140 
141 typedef struct {
142     unsigned short limit;
143     unsigned long linear_addr;
144 } __attribute__((packed)) descriptor_table_t;
145 
146 
147 static void ac_test_show(ac_test_t *at);
148 
149 int write_cr4_checking(unsigned long val)
150 {
151     asm volatile(ASM_TRY("1f")
152             "mov %0,%%cr4\n\t"
153             "1:": : "r" (val));
154     return exception_vector();
155 }
156 
157 void set_cr0_wp(int wp)
158 {
159     unsigned long cr0 = read_cr0();
160 
161     cr0 &= ~CR0_WP_MASK;
162     if (wp)
163 	cr0 |= CR0_WP_MASK;
164     write_cr0(cr0);
165 }
166 
167 void set_cr4_smep(int smep)
168 {
169     unsigned long cr4 = read_cr4();
170 
171     cr4 &= ~CR4_SMEP_MASK;
172     if (smep)
173 	cr4 |= CR4_SMEP_MASK;
174     write_cr4(cr4);
175 }
176 
177 void set_cr4_pke(int pke)
178 {
179     unsigned long cr4 = read_cr4();
180 
181     /* Check that protection keys do not affect accesses when CR4.PKE=0.  */
182     if ((read_cr4() & X86_CR4_PKE) && !pke) {
183         write_pkru(0xffffffff);
184     }
185 
186     cr4 &= ~X86_CR4_PKE;
187     if (pke)
188 	cr4 |= X86_CR4_PKE;
189     write_cr4(cr4);
190 }
191 
192 void set_efer_nx(int nx)
193 {
194     unsigned long long efer;
195 
196     efer = rdmsr(MSR_EFER);
197     efer &= ~EFER_NX_MASK;
198     if (nx)
199 	efer |= EFER_NX_MASK;
200     wrmsr(MSR_EFER, efer);
201 }
202 
203 static void ac_env_int(ac_pool_t *pool)
204 {
205     setup_idt();
206 
207     extern char page_fault, kernel_entry;
208     set_idt_entry(14, &page_fault, 0);
209     set_idt_entry(0x20, &kernel_entry, 3);
210 
211     pool->pt_pool = 33 * 1024 * 1024;
212     pool->pt_pool_size = 120 * 1024 * 1024 - pool->pt_pool;
213     pool->pt_pool_current = 0;
214 }
215 
216 void ac_test_init(ac_test_t *at, void *virt)
217 {
218     wrmsr(MSR_EFER, rdmsr(MSR_EFER) | EFER_NX_MASK);
219     set_cr0_wp(1);
220     for (int i = 0; i < NR_AC_FLAGS; ++i)
221 	at->flags[i] = 0;
222     at->virt = virt;
223     at->phys = 32 * 1024 * 1024;
224 }
225 
226 int ac_test_bump_one(ac_test_t *at)
227 {
228     for (int i = 0; i < NR_AC_FLAGS; ++i)
229 	if (!at->flags[i]) {
230 	    at->flags[i] = 1;
231 	    return 1;
232 	} else
233 	    at->flags[i] = 0;
234     return 0;
235 }
236 
237 _Bool ac_test_legal(ac_test_t *at)
238 {
239     if (at->flags[AC_ACCESS_FETCH] && at->flags[AC_ACCESS_WRITE])
240 	return false;
241 
242     /*
243      * Since we convert current page to kernel page when cr4.smep=1,
244      * we can't switch to user mode.
245      */
246     if (at->flags[AC_ACCESS_USER] && at->flags[AC_CPU_CR4_SMEP])
247 	return false;
248 
249     /*
250      * Only test protection key faults if CR4.PKE=1.
251      */
252     if (!at->flags[AC_CPU_CR4_PKE] &&
253         (at->flags[AC_PKU_AD] || at->flags[AC_PKU_WD])) {
254 	return false;
255     }
256 
257     /*
258      * pde.bit13 checks handling of reserved bits in largepage PDEs.  It is
259      * meaningless if there is a PTE.
260      */
261     if (!at->flags[AC_PDE_PSE] && at->flags[AC_PDE_BIT13])
262         return false;
263 
264     return true;
265 }
266 
267 int ac_test_bump(ac_test_t *at)
268 {
269     int ret;
270 
271     ret = ac_test_bump_one(at);
272     while (ret && !ac_test_legal(at))
273 	ret = ac_test_bump_one(at);
274     return ret;
275 }
276 
277 pt_element_t ac_test_alloc_pt(ac_pool_t *pool)
278 {
279     pt_element_t ret = pool->pt_pool + pool->pt_pool_current;
280     pool->pt_pool_current += PAGE_SIZE;
281     return ret;
282 }
283 
284 _Bool ac_test_enough_room(ac_pool_t *pool)
285 {
286     return pool->pt_pool_current + 4 * PAGE_SIZE <= pool->pt_pool_size;
287 }
288 
289 void ac_test_reset_pt_pool(ac_pool_t *pool)
290 {
291     pool->pt_pool_current = 0;
292 }
293 
294 void ac_set_expected_status(ac_test_t *at)
295 {
296     int pde_valid, pte_valid;
297 
298     invlpg(at->virt);
299 
300     if (at->ptep)
301 	at->expected_pte = *at->ptep;
302     at->expected_pde = *at->pdep;
303     at->ignore_pde = 0;
304     at->expected_fault = 0;
305     at->expected_error = PFERR_PRESENT_MASK;
306 
307     pde_valid = at->flags[AC_PDE_PRESENT]
308         && !at->flags[AC_PDE_BIT51] && !at->flags[AC_PDE_BIT13]
309         && !(at->flags[AC_PDE_NX] && !at->flags[AC_CPU_EFER_NX]);
310     pte_valid = pde_valid
311         && at->flags[AC_PTE_PRESENT]
312         && !at->flags[AC_PTE_BIT51]
313         && !(at->flags[AC_PTE_NX] && !at->flags[AC_CPU_EFER_NX]);
314 
315     if (at->flags[AC_ACCESS_USER])
316 	at->expected_error |= PFERR_USER_MASK;
317 
318     if (at->flags[AC_ACCESS_WRITE])
319 	at->expected_error |= PFERR_WRITE_MASK;
320 
321     if (at->flags[AC_ACCESS_FETCH])
322 	at->expected_error |= PFERR_FETCH_MASK;
323 
324     if (!at->flags[AC_PDE_PRESENT]) {
325 	at->expected_fault = 1;
326 	at->expected_error &= ~PFERR_PRESENT_MASK;
327     } else if (!pde_valid) {
328         at->expected_fault = 1;
329         at->expected_error |= PFERR_RESERVED_MASK;
330     }
331 
332     if (at->flags[AC_ACCESS_USER] && !at->flags[AC_PDE_USER])
333 	at->expected_fault = 1;
334 
335     if (at->flags[AC_ACCESS_WRITE]
336 	&& !at->flags[AC_PDE_WRITABLE]
337 	&& (at->flags[AC_CPU_CR0_WP] || at->flags[AC_ACCESS_USER]))
338 	at->expected_fault = 1;
339 
340     if (at->flags[AC_ACCESS_FETCH] && at->flags[AC_PDE_NX])
341 	at->expected_fault = 1;
342 
343     if (!at->flags[AC_PDE_ACCESSED])
344         at->ignore_pde = PT_ACCESSED_MASK;
345 
346     if (!pde_valid)
347 	goto fault;
348 
349     if (!at->expected_fault)
350         at->expected_pde |= PT_ACCESSED_MASK;
351 
352     if (at->flags[AC_PDE_PSE]) {
353         /* Even for "twice" accesses, PKEY might cause pde.a=0.  */
354         if (at->flags[AC_PDE_USER] && at->flags[AC_ACCESS_TWICE] &&
355             at->flags[AC_PKU_PKEY] && at->flags[AC_CPU_CR4_PKE] &&
356             at->flags[AC_PKU_AD]) {
357             pde_valid = false;
358         }
359 
360 	if (at->flags[AC_ACCESS_FETCH] && at->flags[AC_PDE_USER]
361 	    && at->flags[AC_CPU_CR4_SMEP])
362 	    at->expected_fault = 1;
363 
364         if (at->flags[AC_PDE_USER] && !at->flags[AC_ACCESS_FETCH] &&
365 	    at->flags[AC_PKU_PKEY] && at->flags[AC_CPU_CR4_PKE] &&
366 	    !at->expected_fault) {
367             if (at->flags[AC_PKU_AD]) {
368                 at->expected_fault = 1;
369                 at->expected_error |= PFERR_PK_MASK;
370             } else if (at->flags[AC_ACCESS_WRITE] && at->flags[AC_PKU_WD] &&
371                        (at->flags[AC_CPU_CR0_WP] || at->flags[AC_ACCESS_USER])) {
372                 at->expected_fault = 1;
373                 at->expected_error |= PFERR_PK_MASK;
374             }
375         }
376 	if (at->flags[AC_ACCESS_WRITE] && !at->expected_fault)
377 	    at->expected_pde |= PT_DIRTY_MASK;
378 
379 	goto no_pte;
380     }
381 
382     if (!at->flags[AC_PTE_PRESENT]) {
383 	at->expected_fault = 1;
384 	at->expected_error &= ~PFERR_PRESENT_MASK;
385     } else if (!pte_valid) {
386         at->expected_fault = 1;
387         at->expected_error |= PFERR_RESERVED_MASK;
388     }
389 
390     if (at->flags[AC_ACCESS_USER] && !at->flags[AC_PTE_USER])
391 	at->expected_fault = 1;
392 
393     if (!pte_valid)
394         goto fault;
395 
396     /* Even for "twice" accesses, PKEY might cause pte.a=0.  */
397     if (at->flags[AC_PDE_USER] && at->flags[AC_PTE_USER] && at->flags[AC_ACCESS_TWICE] &&
398         at->flags[AC_PKU_PKEY] && at->flags[AC_CPU_CR4_PKE] &&
399 	at->flags[AC_PKU_AD]) {
400         pte_valid = false;
401     }
402 
403     if (at->flags[AC_ACCESS_WRITE]
404 	&& !at->flags[AC_PTE_WRITABLE]
405 	&& (at->flags[AC_CPU_CR0_WP] || at->flags[AC_ACCESS_USER]))
406 	at->expected_fault = 1;
407 
408     if (at->flags[AC_ACCESS_FETCH]
409 	&& (at->flags[AC_PTE_NX]
410 	    || (at->flags[AC_CPU_CR4_SMEP]
411 		&& at->flags[AC_PDE_USER]
412 		&& at->flags[AC_PTE_USER])))
413 	at->expected_fault = 1;
414 
415     if (at->flags[AC_PDE_USER] && at->flags[AC_PTE_USER] && !at->flags[AC_ACCESS_FETCH] &&
416         at->flags[AC_PKU_PKEY] && at->flags[AC_CPU_CR4_PKE] &&
417 	!at->expected_fault) {
418         if (at->flags[AC_PKU_AD]) {
419             at->expected_fault = 1;
420             at->expected_error |= PFERR_PK_MASK;
421         } else if (at->flags[AC_ACCESS_WRITE] && at->flags[AC_PKU_WD] &&
422                    (at->flags[AC_CPU_CR0_WP] || at->flags[AC_ACCESS_USER])) {
423             at->expected_fault = 1;
424             at->expected_error |= PFERR_PK_MASK;
425         }
426     }
427 
428     if (at->expected_fault)
429 	goto fault;
430 
431     at->expected_pte |= PT_ACCESSED_MASK;
432     if (at->flags[AC_ACCESS_WRITE])
433 	at->expected_pte |= PT_DIRTY_MASK;
434 
435 no_pte:
436 fault:
437     if (at->flags[AC_ACCESS_TWICE]) {
438 	if (pde_valid) {
439 	    at->expected_pde |= PT_ACCESSED_MASK;
440 	    if (pte_valid)
441 		at->expected_pte |= PT_ACCESSED_MASK;
442 	}
443     }
444     if (!at->expected_fault)
445         at->ignore_pde = 0;
446     if (!at->flags[AC_CPU_EFER_NX] && !at->flags[AC_CPU_CR4_SMEP])
447         at->expected_error &= ~PFERR_FETCH_MASK;
448 }
449 
450 void __ac_setup_specific_pages(ac_test_t *at, ac_pool_t *pool, u64 pd_page,
451 			       u64 pt_page)
452 
453 {
454     unsigned long root = read_cr3();
455 
456     if (!ac_test_enough_room(pool))
457 	ac_test_reset_pt_pool(pool);
458 
459     at->ptep = 0;
460     for (int i = 4; i >= 1 && (i >= 2 || !at->flags[AC_PDE_PSE]); --i) {
461 	pt_element_t *vroot = va(root & PT_BASE_ADDR_MASK);
462 	unsigned index = PT_INDEX((unsigned long)at->virt, i);
463 	pt_element_t pte = 0;
464 	switch (i) {
465 	case 4:
466 	case 3:
467 	    pte = pd_page ? pd_page : ac_test_alloc_pt(pool);
468 	    pte |= PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK;
469 	    break;
470 	case 2:
471 	    if (!at->flags[AC_PDE_PSE]) {
472 		pte = pt_page ? pt_page : ac_test_alloc_pt(pool);
473 		/* The protection key is ignored on non-leaf entries.  */
474                 if (at->flags[AC_PKU_PKEY])
475                     pte |= 2ull << 59;
476 	    } else {
477 		pte = at->phys & PT_PSE_BASE_ADDR_MASK;
478 		pte |= PT_PSE_MASK;
479                 if (at->flags[AC_PKU_PKEY])
480                     pte |= 1ull << 59;
481 	    }
482 	    if (at->flags[AC_PDE_PRESENT])
483 		pte |= PT_PRESENT_MASK;
484 	    if (at->flags[AC_PDE_WRITABLE])
485 		pte |= PT_WRITABLE_MASK;
486 	    if (at->flags[AC_PDE_USER])
487 		pte |= PT_USER_MASK;
488 	    if (at->flags[AC_PDE_ACCESSED])
489 		pte |= PT_ACCESSED_MASK;
490 	    if (at->flags[AC_PDE_DIRTY])
491 		pte |= PT_DIRTY_MASK;
492 	    if (at->flags[AC_PDE_NX])
493 		pte |= PT_NX_MASK;
494 	    if (at->flags[AC_PDE_BIT51])
495 		pte |= 1ull << 51;
496 	    if (at->flags[AC_PDE_BIT13])
497 		pte |= 1ull << 13;
498 	    at->pdep = &vroot[index];
499 	    break;
500 	case 1:
501 	    pte = at->phys & PT_BASE_ADDR_MASK;
502 	    if (at->flags[AC_PKU_PKEY])
503 		pte |= 1ull << 59;
504 	    if (at->flags[AC_PTE_PRESENT])
505 		pte |= PT_PRESENT_MASK;
506 	    if (at->flags[AC_PTE_WRITABLE])
507 		pte |= PT_WRITABLE_MASK;
508 	    if (at->flags[AC_PTE_USER])
509 		pte |= PT_USER_MASK;
510 	    if (at->flags[AC_PTE_ACCESSED])
511 		pte |= PT_ACCESSED_MASK;
512 	    if (at->flags[AC_PTE_DIRTY])
513 		pte |= PT_DIRTY_MASK;
514 	    if (at->flags[AC_PTE_NX])
515 		pte |= PT_NX_MASK;
516 	    if (at->flags[AC_PTE_BIT51])
517 		pte |= 1ull << 51;
518 	    at->ptep = &vroot[index];
519 	    break;
520 	}
521 	vroot[index] = pte;
522 	root = vroot[index];
523     }
524     ac_set_expected_status(at);
525 }
526 
527 static void ac_test_setup_pte(ac_test_t *at, ac_pool_t *pool)
528 {
529 	__ac_setup_specific_pages(at, pool, 0, 0);
530 }
531 
532 static void ac_setup_specific_pages(ac_test_t *at, ac_pool_t *pool,
533 				    u64 pd_page, u64 pt_page)
534 {
535 	return __ac_setup_specific_pages(at, pool, pd_page, pt_page);
536 }
537 
538 static void dump_mapping(ac_test_t *at)
539 {
540 	unsigned long root = read_cr3();
541 	int i;
542 
543 	printf("Dump mapping: address: %p\n", at->virt);
544 	for (i = 4; i >= 1 && (i >= 2 || !at->flags[AC_PDE_PSE]); --i) {
545 		pt_element_t *vroot = va(root & PT_BASE_ADDR_MASK);
546 		unsigned index = PT_INDEX((unsigned long)at->virt, i);
547 		pt_element_t pte = vroot[index];
548 
549 		printf("------L%d: %lx\n", i, pte);
550 		root = vroot[index];
551 	}
552 }
553 
554 static void ac_test_check(ac_test_t *at, _Bool *success_ret, _Bool cond,
555                           const char *fmt, ...)
556 {
557     va_list ap;
558     char buf[500];
559 
560     if (!*success_ret) {
561         return;
562     }
563 
564     if (!cond) {
565         return;
566     }
567 
568     *success_ret = false;
569 
570     if (!verbose) {
571         ac_test_show(at);
572     }
573 
574     va_start(ap, fmt);
575     vsnprintf(buf, sizeof(buf), fmt, ap);
576     va_end(ap);
577     printf("FAIL: %s\n", buf);
578     dump_mapping(at);
579 }
580 
581 static int pt_match(pt_element_t pte1, pt_element_t pte2, pt_element_t ignore)
582 {
583     pte1 &= ~ignore;
584     pte2 &= ~ignore;
585     return pte1 == pte2;
586 }
587 
588 int ac_test_do_access(ac_test_t *at)
589 {
590     static unsigned unique = 42;
591     int fault = 0;
592     unsigned e;
593     static unsigned char user_stack[4096];
594     unsigned long rsp;
595     _Bool success = true;
596 
597     ++unique;
598 
599     *((unsigned char *)at->phys) = 0xc3; /* ret */
600 
601     unsigned r = unique;
602     set_cr0_wp(at->flags[AC_CPU_CR0_WP]);
603     set_efer_nx(at->flags[AC_CPU_EFER_NX]);
604     if (at->flags[AC_CPU_CR4_PKE] && !(cpuid_7_ecx & (1 << 3))) {
605 	unsigned long cr4 = read_cr4();
606 	if (write_cr4_checking(cr4 | X86_CR4_PKE) == GP_VECTOR)
607 		goto done;
608 	printf("Set PKE in CR4 - expect #GP: FAIL!\n");
609 	return 0;
610     }
611     if (at->flags[AC_CPU_CR4_SMEP] && !(cpuid_7_ebx & (1 << 7))) {
612 	unsigned long cr4 = read_cr4();
613 	if (write_cr4_checking(cr4 | CR4_SMEP_MASK) == GP_VECTOR)
614 		goto done;
615 	printf("Set SMEP in CR4 - expect #GP: FAIL!\n");
616 	return 0;
617     }
618 
619     set_cr4_pke(at->flags[AC_CPU_CR4_PKE]);
620     if (at->flags[AC_CPU_CR4_PKE]) {
621         /* WD2=AD2=1, WD1=at->flags[AC_PKU_WD], AD1=at->flags[AC_PKU_AD] */
622         write_pkru(0x30 | (at->flags[AC_PKU_WD] ? 8 : 0) |
623                    (at->flags[AC_PKU_AD] ? 4 : 0));
624     }
625 
626     set_cr4_smep(at->flags[AC_CPU_CR4_SMEP]);
627 
628     if (at->flags[AC_ACCESS_TWICE]) {
629 	asm volatile (
630 	    "mov $fixed2, %%rsi \n\t"
631 	    "mov (%[addr]), %[reg] \n\t"
632 	    "fixed2:"
633 	    : [reg]"=r"(r), [fault]"=a"(fault), "=b"(e)
634 	    : [addr]"r"(at->virt)
635 	    : "rsi"
636 	    );
637 	fault = 0;
638     }
639 
640     asm volatile ("mov $fixed1, %%rsi \n\t"
641 		  "mov %%rsp, %%rdx \n\t"
642 		  "cmp $0, %[user] \n\t"
643 		  "jz do_access \n\t"
644 		  "push %%rax; mov %[user_ds], %%ax; mov %%ax, %%ds; pop %%rax  \n\t"
645 		  "pushq %[user_ds] \n\t"
646 		  "pushq %[user_stack_top] \n\t"
647 		  "pushfq \n\t"
648 		  "pushq %[user_cs] \n\t"
649 		  "pushq $do_access \n\t"
650 		  "iretq \n"
651 		  "do_access: \n\t"
652 		  "cmp $0, %[fetch] \n\t"
653 		  "jnz 2f \n\t"
654 		  "cmp $0, %[write] \n\t"
655 		  "jnz 1f \n\t"
656 		  "mov (%[addr]), %[reg] \n\t"
657 		  "jmp done \n\t"
658 		  "1: mov %[reg], (%[addr]) \n\t"
659 		  "jmp done \n\t"
660 		  "2: call *%[addr] \n\t"
661 		  "done: \n"
662 		  "fixed1: \n"
663 		  "int %[kernel_entry_vector] \n\t"
664 		  "back_to_kernel:"
665 		  : [reg]"+r"(r), "+a"(fault), "=b"(e), "=&d"(rsp)
666 		  : [addr]"r"(at->virt),
667 		    [write]"r"(at->flags[AC_ACCESS_WRITE]),
668 		    [user]"r"(at->flags[AC_ACCESS_USER]),
669 		    [fetch]"r"(at->flags[AC_ACCESS_FETCH]),
670 		    [user_ds]"i"(USER_DS),
671 		    [user_cs]"i"(USER_CS),
672 		    [user_stack_top]"r"(user_stack + sizeof user_stack),
673 		    [kernel_entry_vector]"i"(0x20)
674 		  : "rsi");
675 
676     asm volatile (".section .text.pf \n\t"
677 		  "page_fault: \n\t"
678 		  "pop %rbx \n\t"
679 		  "mov %rsi, (%rsp) \n\t"
680 		  "movl $1, %eax \n\t"
681 		  "iretq \n\t"
682 		  ".section .text");
683 
684     asm volatile (".section .text.entry \n\t"
685 		  "kernel_entry: \n\t"
686 		  "mov %rdx, %rsp \n\t"
687 		  "jmp back_to_kernel \n\t"
688 		  ".section .text");
689 
690     ac_test_check(at, &success, fault && !at->expected_fault,
691                   "unexpected fault");
692     ac_test_check(at, &success, !fault && at->expected_fault,
693                   "unexpected access");
694     ac_test_check(at, &success, fault && e != at->expected_error,
695                   "error code %x expected %x", e, at->expected_error);
696     ac_test_check(at, &success, at->ptep && *at->ptep != at->expected_pte,
697                   "pte %x expected %x", *at->ptep, at->expected_pte);
698     ac_test_check(at, &success,
699                   !pt_match(*at->pdep, at->expected_pde, at->ignore_pde),
700                   "pde %x expected %x", *at->pdep, at->expected_pde);
701 
702 done:
703     if (success && verbose) {
704         printf("PASS\n");
705     }
706     return success;
707 }
708 
709 static void ac_test_show(ac_test_t *at)
710 {
711     char line[5000];
712 
713     *line = 0;
714     strcat(line, "test");
715     for (int i = 0; i < NR_AC_FLAGS; ++i)
716 	if (at->flags[i]) {
717 	    strcat(line, " ");
718 	    strcat(line, ac_names[i]);
719 	}
720     strcat(line, ": ");
721     printf("%s", line);
722 }
723 
724 /*
725  * This test case is used to triger the bug which is fixed by
726  * commit e09e90a5 in the kvm tree
727  */
728 static int corrupt_hugepage_triger(ac_pool_t *pool)
729 {
730     ac_test_t at1, at2;
731 
732     ac_test_init(&at1, (void *)(0x123400000000));
733     ac_test_init(&at2, (void *)(0x666600000000));
734 
735     at2.flags[AC_CPU_CR0_WP] = 1;
736     at2.flags[AC_PDE_PSE] = 1;
737     at2.flags[AC_PDE_PRESENT] = 1;
738     ac_test_setup_pte(&at2, pool);
739     if (!ac_test_do_access(&at2))
740         goto err;
741 
742     at1.flags[AC_CPU_CR0_WP] = 1;
743     at1.flags[AC_PDE_PSE] = 1;
744     at1.flags[AC_PDE_WRITABLE] = 1;
745     at1.flags[AC_PDE_PRESENT] = 1;
746     ac_test_setup_pte(&at1, pool);
747     if (!ac_test_do_access(&at1))
748         goto err;
749 
750     at1.flags[AC_ACCESS_WRITE] = 1;
751     ac_set_expected_status(&at1);
752     if (!ac_test_do_access(&at1))
753         goto err;
754 
755     at2.flags[AC_ACCESS_WRITE] = 1;
756     ac_set_expected_status(&at2);
757     if (!ac_test_do_access(&at2))
758         goto err;
759 
760     return 1;
761 
762 err:
763     printf("corrupt_hugepage_triger test fail\n");
764     return 0;
765 }
766 
767 /*
768  * This test case is used to triger the bug which is fixed by
769  * commit 3ddf6c06e13e in the kvm tree
770  */
771 static int check_pfec_on_prefetch_pte(ac_pool_t *pool)
772 {
773 	ac_test_t at1, at2;
774 
775 	ac_test_init(&at1, (void *)(0x123406001000));
776 	ac_test_init(&at2, (void *)(0x123406003000));
777 
778 	at1.flags[AC_PDE_PRESENT] = 1;
779 	at1.flags[AC_PTE_PRESENT] = 1;
780 	ac_setup_specific_pages(&at1, pool, 30 * 1024 * 1024, 30 * 1024 * 1024);
781 
782 	at2.flags[AC_PDE_PRESENT] = 1;
783 	at2.flags[AC_PTE_NX] = 1;
784 	at2.flags[AC_PTE_PRESENT] = 1;
785 	ac_setup_specific_pages(&at2, pool, 30 * 1024 * 1024, 30 * 1024 * 1024);
786 
787 	if (!ac_test_do_access(&at1)) {
788 		printf("%s: prepare fail\n", __FUNCTION__);
789 		goto err;
790 	}
791 
792 	if (!ac_test_do_access(&at2)) {
793 		printf("%s: check PFEC on prefetch pte path fail\n",
794 			__FUNCTION__);
795 		goto err;
796 	}
797 
798 	return 1;
799 
800 err:
801     return 0;
802 }
803 
804 /*
805  * If the write-fault access is from supervisor and CR0.WP is not set on the
806  * vcpu, kvm will fix it by adjusting pte access - it sets the W bit on pte
807  * and clears U bit. This is the chance that kvm can change pte access from
808  * readonly to writable.
809  *
810  * Unfortunately, the pte access is the access of 'direct' shadow page table,
811  * means direct sp.role.access = pte_access, then we will create a writable
812  * spte entry on the readonly shadow page table. It will cause Dirty bit is
813  * not tracked when two guest ptes point to the same large page. Note, it
814  * does not have other impact except Dirty bit since cr0.wp is encoded into
815  * sp.role.
816  *
817  * Note: to trigger this bug, hugepage should be disabled on host.
818  */
819 static int check_large_pte_dirty_for_nowp(ac_pool_t *pool)
820 {
821 	ac_test_t at1, at2;
822 
823 	ac_test_init(&at1, (void *)(0x123403000000));
824 	ac_test_init(&at2, (void *)(0x666606000000));
825 
826 	at2.flags[AC_PDE_PRESENT] = 1;
827 	at2.flags[AC_PDE_PSE] = 1;
828 
829 	ac_test_setup_pte(&at2, pool);
830 	if (!ac_test_do_access(&at2)) {
831 		printf("%s: read on the first mapping fail.\n", __FUNCTION__);
832 		goto err;
833 	}
834 
835 	at1.flags[AC_PDE_PRESENT] = 1;
836 	at1.flags[AC_PDE_PSE] = 1;
837 	at1.flags[AC_ACCESS_WRITE] = 1;
838 
839 	ac_test_setup_pte(&at1, pool);
840 	if (!ac_test_do_access(&at1)) {
841 		printf("%s: write on the second mapping fail.\n", __FUNCTION__);
842 		goto err;
843 	}
844 
845 	at2.flags[AC_ACCESS_WRITE] = 1;
846 	ac_set_expected_status(&at2);
847 	if (!ac_test_do_access(&at2)) {
848 		printf("%s: write on the first mapping fail.\n", __FUNCTION__);
849 		goto err;
850 	}
851 
852 	return 1;
853 
854 err:
855 	return 0;
856 }
857 
858 static int check_smep_andnot_wp(ac_pool_t *pool)
859 {
860 	ac_test_t at1;
861 	int err_prepare_andnot_wp, err_smep_andnot_wp;
862 	extern u64 ptl2[];
863 
864 	ac_test_init(&at1, (void *)(0x123406001000));
865 
866 	at1.flags[AC_PDE_PRESENT] = 1;
867 	at1.flags[AC_PTE_PRESENT] = 1;
868 	at1.flags[AC_PDE_USER] = 1;
869 	at1.flags[AC_PTE_USER] = 1;
870 	at1.flags[AC_PDE_ACCESSED] = 1;
871 	at1.flags[AC_PTE_ACCESSED] = 1;
872 	at1.flags[AC_CPU_CR4_SMEP] = 1;
873 	at1.flags[AC_CPU_CR0_WP] = 0;
874 	at1.flags[AC_ACCESS_WRITE] = 1;
875 	ac_test_setup_pte(&at1, pool);
876 	ptl2[2] -= 0x4;
877 
878 	/*
879 	 * Here we write the ro user page when
880 	 * cr0.wp=0, then we execute it and SMEP
881 	 * fault should happen.
882 	 */
883 	err_prepare_andnot_wp = ac_test_do_access(&at1);
884 	if (!err_prepare_andnot_wp) {
885 		printf("%s: SMEP prepare fail\n", __FUNCTION__);
886 		goto clean_up;
887 	}
888 
889 	at1.flags[AC_ACCESS_WRITE] = 0;
890 	at1.flags[AC_ACCESS_FETCH] = 1;
891 	ac_set_expected_status(&at1);
892 	err_smep_andnot_wp = ac_test_do_access(&at1);
893 
894 clean_up:
895 	set_cr4_smep(0);
896 	ptl2[2] += 0x4;
897 
898 	if (!err_prepare_andnot_wp)
899 		goto err;
900 	if (!err_smep_andnot_wp) {
901 		printf("%s: check SMEP without wp fail\n", __FUNCTION__);
902 		goto err;
903 	}
904 	return 1;
905 
906 err:
907 	return 0;
908 }
909 
910 int ac_test_exec(ac_test_t *at, ac_pool_t *pool)
911 {
912     int r;
913 
914     if (verbose) {
915         ac_test_show(at);
916     }
917     ac_test_setup_pte(at, pool);
918     r = ac_test_do_access(at);
919     return r;
920 }
921 
922 typedef int (*ac_test_fn)(ac_pool_t *pool);
923 const ac_test_fn ac_test_cases[] =
924 {
925 	corrupt_hugepage_triger,
926 	check_pfec_on_prefetch_pte,
927 	check_large_pte_dirty_for_nowp,
928 	check_smep_andnot_wp
929 };
930 
931 int ac_test_run(void)
932 {
933     ac_test_t at;
934     ac_pool_t pool;
935     int i, tests, successes;
936     extern u64 ptl2[];
937 
938     printf("run\n");
939     tests = successes = 0;
940     ac_env_int(&pool);
941     ac_test_init(&at, (void *)(0x123400000000 + 16 * smp_id()));
942     do {
943 	if (at.flags[AC_CPU_CR4_SMEP] && (ptl2[2] & 0x4))
944 		ptl2[2] -= 0x4;
945 	if (!at.flags[AC_CPU_CR4_SMEP] && !(ptl2[2] & 0x4)) {
946 		set_cr4_smep(0);
947 		ptl2[2] += 0x4;
948 	}
949 
950 	++tests;
951 	successes += ac_test_exec(&at, &pool);
952     } while (ac_test_bump(&at));
953 
954     set_cr4_smep(0);
955     ptl2[2] += 0x4;
956 
957     for (i = 0; i < ARRAY_SIZE(ac_test_cases); i++) {
958 	++tests;
959 	successes += ac_test_cases[i](&pool);
960     }
961 
962     printf("\n%d tests, %d failures\n", tests, tests - successes);
963 
964     return successes == tests;
965 }
966 
967 int main()
968 {
969     int r;
970 
971     cpuid_7_ebx = cpuid(7).b;
972     cpuid_7_ecx = cpuid(7).c;
973 
974     if (cpuid_7_ecx & (1 << 3)) {
975         set_cr4_pke(1);
976         set_cr4_pke(0);
977         /* Now PKRU = 0xFFFFFFFF.  */
978     }
979 
980     printf("starting test\n\n");
981     r = ac_test_run();
982     return r ? 0 : 1;
983 }
984