xref: /kvm-unit-tests/x86/access.c (revision badc98cafea47463b8151eefbc6d4954f6aec6a9)
1 
2 #include "libcflat.h"
3 #include "desc.h"
4 #include "processor.h"
5 #include "asm/page.h"
6 #include "x86/vm.h"
7 
8 #define smp_id() 0
9 
10 #define true 1
11 #define false 0
12 
13 static _Bool verbose = false;
14 
15 typedef unsigned long pt_element_t;
16 static int invalid_mask;
17 static int page_table_levels;
18 
19 #define PT_BASE_ADDR_MASK ((pt_element_t)((((pt_element_t)1 << 40) - 1) & PAGE_MASK))
20 #define PT_PSE_BASE_ADDR_MASK (PT_BASE_ADDR_MASK & ~(1ull << 21))
21 
22 #define CR0_WP_MASK (1UL << 16)
23 #define CR4_SMEP_MASK (1UL << 20)
24 
25 #define PFERR_PRESENT_MASK (1U << 0)
26 #define PFERR_WRITE_MASK (1U << 1)
27 #define PFERR_USER_MASK (1U << 2)
28 #define PFERR_RESERVED_MASK (1U << 3)
29 #define PFERR_FETCH_MASK (1U << 4)
30 #define PFERR_PK_MASK (1U << 5)
31 
32 #define MSR_EFER 0xc0000080
33 #define EFER_NX_MASK		(1ull << 11)
34 
35 #define PT_INDEX(address, level)       \
36        ((address) >> (12 + ((level)-1) * 9)) & 511
37 
38 /*
39  * page table access check tests
40  */
41 
42 enum {
43     AC_PTE_PRESENT_BIT,
44     AC_PTE_WRITABLE_BIT,
45     AC_PTE_USER_BIT,
46     AC_PTE_ACCESSED_BIT,
47     AC_PTE_DIRTY_BIT,
48     AC_PTE_NX_BIT,
49     AC_PTE_BIT51_BIT,
50 
51     AC_PDE_PRESENT_BIT,
52     AC_PDE_WRITABLE_BIT,
53     AC_PDE_USER_BIT,
54     AC_PDE_ACCESSED_BIT,
55     AC_PDE_DIRTY_BIT,
56     AC_PDE_PSE_BIT,
57     AC_PDE_NX_BIT,
58     AC_PDE_BIT51_BIT,
59     AC_PDE_BIT13_BIT,
60 
61     AC_PKU_AD_BIT,
62     AC_PKU_WD_BIT,
63     AC_PKU_PKEY_BIT,
64 
65     AC_ACCESS_USER_BIT,
66     AC_ACCESS_WRITE_BIT,
67     AC_ACCESS_FETCH_BIT,
68     AC_ACCESS_TWICE_BIT,
69 
70     AC_CPU_EFER_NX_BIT,
71     AC_CPU_CR0_WP_BIT,
72     AC_CPU_CR4_SMEP_BIT,
73     AC_CPU_CR4_PKE_BIT,
74 
75     NR_AC_FLAGS
76 };
77 
78 #define AC_PTE_PRESENT_MASK   (1 << AC_PTE_PRESENT_BIT)
79 #define AC_PTE_WRITABLE_MASK  (1 << AC_PTE_WRITABLE_BIT)
80 #define AC_PTE_USER_MASK      (1 << AC_PTE_USER_BIT)
81 #define AC_PTE_ACCESSED_MASK  (1 << AC_PTE_ACCESSED_BIT)
82 #define AC_PTE_DIRTY_MASK     (1 << AC_PTE_DIRTY_BIT)
83 #define AC_PTE_NX_MASK        (1 << AC_PTE_NX_BIT)
84 #define AC_PTE_BIT51_MASK     (1 << AC_PTE_BIT51_BIT)
85 
86 #define AC_PDE_PRESENT_MASK   (1 << AC_PDE_PRESENT_BIT)
87 #define AC_PDE_WRITABLE_MASK  (1 << AC_PDE_WRITABLE_BIT)
88 #define AC_PDE_USER_MASK      (1 << AC_PDE_USER_BIT)
89 #define AC_PDE_ACCESSED_MASK  (1 << AC_PDE_ACCESSED_BIT)
90 #define AC_PDE_DIRTY_MASK     (1 << AC_PDE_DIRTY_BIT)
91 #define AC_PDE_PSE_MASK       (1 << AC_PDE_PSE_BIT)
92 #define AC_PDE_NX_MASK        (1 << AC_PDE_NX_BIT)
93 #define AC_PDE_BIT51_MASK     (1 << AC_PDE_BIT51_BIT)
94 #define AC_PDE_BIT13_MASK     (1 << AC_PDE_BIT13_BIT)
95 
96 #define AC_PKU_AD_MASK        (1 << AC_PKU_AD_BIT)
97 #define AC_PKU_WD_MASK        (1 << AC_PKU_WD_BIT)
98 #define AC_PKU_PKEY_MASK      (1 << AC_PKU_PKEY_BIT)
99 
100 #define AC_ACCESS_USER_MASK   (1 << AC_ACCESS_USER_BIT)
101 #define AC_ACCESS_WRITE_MASK  (1 << AC_ACCESS_WRITE_BIT)
102 #define AC_ACCESS_FETCH_MASK  (1 << AC_ACCESS_FETCH_BIT)
103 #define AC_ACCESS_TWICE_MASK  (1 << AC_ACCESS_TWICE_BIT)
104 
105 #define AC_CPU_EFER_NX_MASK   (1 << AC_CPU_EFER_NX_BIT)
106 #define AC_CPU_CR0_WP_MASK    (1 << AC_CPU_CR0_WP_BIT)
107 #define AC_CPU_CR4_SMEP_MASK  (1 << AC_CPU_CR4_SMEP_BIT)
108 #define AC_CPU_CR4_PKE_MASK   (1 << AC_CPU_CR4_PKE_BIT)
109 
110 const char *ac_names[] = {
111     [AC_PTE_PRESENT_BIT] = "pte.p",
112     [AC_PTE_ACCESSED_BIT] = "pte.a",
113     [AC_PTE_WRITABLE_BIT] = "pte.rw",
114     [AC_PTE_USER_BIT] = "pte.user",
115     [AC_PTE_DIRTY_BIT] = "pte.d",
116     [AC_PTE_NX_BIT] = "pte.nx",
117     [AC_PTE_BIT51_BIT] = "pte.51",
118     [AC_PDE_PRESENT_BIT] = "pde.p",
119     [AC_PDE_ACCESSED_BIT] = "pde.a",
120     [AC_PDE_WRITABLE_BIT] = "pde.rw",
121     [AC_PDE_USER_BIT] = "pde.user",
122     [AC_PDE_DIRTY_BIT] = "pde.d",
123     [AC_PDE_PSE_BIT] = "pde.pse",
124     [AC_PDE_NX_BIT] = "pde.nx",
125     [AC_PDE_BIT51_BIT] = "pde.51",
126     [AC_PDE_BIT13_BIT] = "pde.13",
127     [AC_PKU_AD_BIT] = "pkru.ad",
128     [AC_PKU_WD_BIT] = "pkru.wd",
129     [AC_PKU_PKEY_BIT] = "pkey=1",
130     [AC_ACCESS_WRITE_BIT] = "write",
131     [AC_ACCESS_USER_BIT] = "user",
132     [AC_ACCESS_FETCH_BIT] = "fetch",
133     [AC_ACCESS_TWICE_BIT] = "twice",
134     [AC_CPU_EFER_NX_BIT] = "efer.nx",
135     [AC_CPU_CR0_WP_BIT] = "cr0.wp",
136     [AC_CPU_CR4_SMEP_BIT] = "cr4.smep",
137     [AC_CPU_CR4_PKE_BIT] = "cr4.pke",
138 };
139 
140 static inline void *va(pt_element_t phys)
141 {
142     return (void *)phys;
143 }
144 
145 typedef struct {
146     pt_element_t pt_pool;
147     unsigned pt_pool_size;
148     unsigned pt_pool_current;
149 } ac_pool_t;
150 
151 typedef struct {
152     unsigned flags;
153     void *virt;
154     pt_element_t phys;
155     pt_element_t *ptep;
156     pt_element_t expected_pte;
157     pt_element_t *pdep;
158     pt_element_t expected_pde;
159     pt_element_t ignore_pde;
160     int expected_fault;
161     unsigned expected_error;
162 } ac_test_t;
163 
164 typedef struct {
165     unsigned short limit;
166     unsigned long linear_addr;
167 } __attribute__((packed)) descriptor_table_t;
168 
169 
170 static void ac_test_show(ac_test_t *at);
171 
172 static void set_cr0_wp(int wp)
173 {
174     unsigned long cr0 = read_cr0();
175     unsigned long old_cr0 = cr0;
176 
177     cr0 &= ~CR0_WP_MASK;
178     if (wp)
179 	cr0 |= CR0_WP_MASK;
180     if (old_cr0 != cr0)
181         write_cr0(cr0);
182 }
183 
184 static unsigned set_cr4_smep(int smep)
185 {
186     unsigned long cr4 = read_cr4();
187     unsigned long old_cr4 = cr4;
188     extern u64 ptl2[];
189     unsigned r;
190 
191     cr4 &= ~CR4_SMEP_MASK;
192     if (smep)
193 	cr4 |= CR4_SMEP_MASK;
194     if (old_cr4 == cr4)
195         return 0;
196 
197     if (smep)
198         ptl2[2] &= ~PT_USER_MASK;
199     r = write_cr4_checking(cr4);
200     if (r || !smep)
201         ptl2[2] |= PT_USER_MASK;
202     return r;
203 }
204 
205 static void set_cr4_pke(int pke)
206 {
207     unsigned long cr4 = read_cr4();
208     unsigned long old_cr4 = cr4;
209 
210     cr4 &= ~X86_CR4_PKE;
211     if (pke)
212 	cr4 |= X86_CR4_PKE;
213     if (old_cr4 == cr4)
214         return;
215 
216     /* Check that protection keys do not affect accesses when CR4.PKE=0.  */
217     if ((read_cr4() & X86_CR4_PKE) && !pke) {
218         write_pkru(0xfffffffc);
219     }
220     write_cr4(cr4);
221 }
222 
223 static void set_efer_nx(int nx)
224 {
225     unsigned long long efer = rdmsr(MSR_EFER);
226     unsigned long long old_efer = efer;
227 
228     efer &= ~EFER_NX_MASK;
229     if (nx)
230 	efer |= EFER_NX_MASK;
231     if (old_efer != efer)
232         wrmsr(MSR_EFER, efer);
233 }
234 
235 static void ac_env_int(ac_pool_t *pool)
236 {
237     extern char page_fault, kernel_entry;
238     set_idt_entry(14, &page_fault, 0);
239     set_idt_entry(0x20, &kernel_entry, 3);
240 
241     pool->pt_pool = 33 * 1024 * 1024;
242     pool->pt_pool_size = 120 * 1024 * 1024 - pool->pt_pool;
243     pool->pt_pool_current = 0;
244 }
245 
246 static void ac_test_init(ac_test_t *at, void *virt)
247 {
248     wrmsr(MSR_EFER, rdmsr(MSR_EFER) | EFER_NX_MASK);
249     set_cr0_wp(1);
250     at->flags = 0;
251     at->virt = virt;
252     at->phys = 32 * 1024 * 1024;
253 }
254 
255 static int ac_test_bump_one(ac_test_t *at)
256 {
257     at->flags = ((at->flags | invalid_mask) + 1) & ~invalid_mask;
258     return at->flags < (1 << NR_AC_FLAGS);
259 }
260 
261 #define F(x)  ((flags & x##_MASK) != 0)
262 
263 static _Bool ac_test_legal(ac_test_t *at)
264 {
265     int flags = at->flags;
266 
267     if (F(AC_ACCESS_FETCH) && F(AC_ACCESS_WRITE))
268 	return false;
269 
270     /*
271      * Since we convert current page to kernel page when cr4.smep=1,
272      * we can't switch to user mode.
273      */
274     if (F(AC_ACCESS_USER) && F(AC_CPU_CR4_SMEP))
275 	return false;
276 
277     /*
278      * Only test protection key faults if CR4.PKE=1.
279      */
280     if (!F(AC_CPU_CR4_PKE) &&
281         (F(AC_PKU_AD) || F(AC_PKU_WD))) {
282 	return false;
283     }
284 
285     /*
286      * pde.bit13 checks handling of reserved bits in largepage PDEs.  It is
287      * meaningless if there is a PTE.
288      */
289     if (!F(AC_PDE_PSE) && F(AC_PDE_BIT13))
290         return false;
291 
292     return true;
293 }
294 
295 static int ac_test_bump(ac_test_t *at)
296 {
297     int ret;
298 
299     ret = ac_test_bump_one(at);
300     while (ret && !ac_test_legal(at))
301 	ret = ac_test_bump_one(at);
302     return ret;
303 }
304 
305 static pt_element_t ac_test_alloc_pt(ac_pool_t *pool)
306 {
307     pt_element_t ret = pool->pt_pool + pool->pt_pool_current;
308     pool->pt_pool_current += PAGE_SIZE;
309     return ret;
310 }
311 
312 static _Bool ac_test_enough_room(ac_pool_t *pool)
313 {
314     return pool->pt_pool_current + 5 * PAGE_SIZE <= pool->pt_pool_size;
315 }
316 
317 static void ac_test_reset_pt_pool(ac_pool_t *pool)
318 {
319     pool->pt_pool_current = 0;
320 }
321 
322 static pt_element_t ac_test_permissions(ac_test_t *at, unsigned flags,
323                                         bool writable, bool user,
324                                         bool executable)
325 {
326     bool kwritable = !F(AC_CPU_CR0_WP) && !F(AC_ACCESS_USER);
327     pt_element_t expected = 0;
328 
329     if (F(AC_ACCESS_USER) && !user)
330 	at->expected_fault = 1;
331 
332     if (F(AC_ACCESS_WRITE) && !writable && !kwritable)
333 	at->expected_fault = 1;
334 
335     if (F(AC_ACCESS_FETCH) && !executable)
336 	at->expected_fault = 1;
337 
338     if (F(AC_ACCESS_FETCH) && user && F(AC_CPU_CR4_SMEP))
339         at->expected_fault = 1;
340 
341     if (user && !F(AC_ACCESS_FETCH) && F(AC_PKU_PKEY) && F(AC_CPU_CR4_PKE)) {
342         if (F(AC_PKU_AD)) {
343             at->expected_fault = 1;
344             at->expected_error |= PFERR_PK_MASK;
345         } else if (F(AC_ACCESS_WRITE) && F(AC_PKU_WD) && !kwritable) {
346             at->expected_fault = 1;
347             at->expected_error |= PFERR_PK_MASK;
348         }
349     }
350 
351     if (!at->expected_fault) {
352         expected |= PT_ACCESSED_MASK;
353         if (F(AC_ACCESS_WRITE))
354             expected |= PT_DIRTY_MASK;
355     }
356 
357     return expected;
358 }
359 
360 static void ac_emulate_access(ac_test_t *at, unsigned flags)
361 {
362     bool pde_valid, pte_valid;
363     bool user, writable, executable;
364 
365     if (F(AC_ACCESS_USER))
366 	at->expected_error |= PFERR_USER_MASK;
367 
368     if (F(AC_ACCESS_WRITE))
369 	at->expected_error |= PFERR_WRITE_MASK;
370 
371     if (F(AC_ACCESS_FETCH))
372 	at->expected_error |= PFERR_FETCH_MASK;
373 
374     if (!F(AC_PDE_ACCESSED))
375         at->ignore_pde = PT_ACCESSED_MASK;
376 
377     pde_valid = F(AC_PDE_PRESENT)
378         && !F(AC_PDE_BIT51) && !F(AC_PDE_BIT13)
379         && !(F(AC_PDE_NX) && !F(AC_CPU_EFER_NX));
380 
381     if (!pde_valid) {
382         at->expected_fault = 1;
383 	if (F(AC_PDE_PRESENT)) {
384             at->expected_error |= PFERR_RESERVED_MASK;
385         } else {
386             at->expected_error &= ~PFERR_PRESENT_MASK;
387         }
388 	goto fault;
389     }
390 
391     writable = F(AC_PDE_WRITABLE);
392     user = F(AC_PDE_USER);
393     executable = !F(AC_PDE_NX);
394 
395     if (F(AC_PDE_PSE)) {
396         at->expected_pde |= ac_test_permissions(at, flags, writable, user,
397                                                 executable);
398 	goto no_pte;
399     }
400 
401     at->expected_pde |= PT_ACCESSED_MASK;
402 
403     pte_valid = F(AC_PTE_PRESENT)
404         && !F(AC_PTE_BIT51)
405         && !(F(AC_PTE_NX) && !F(AC_CPU_EFER_NX));
406 
407     if (!pte_valid) {
408         at->expected_fault = 1;
409 	if (F(AC_PTE_PRESENT)) {
410             at->expected_error |= PFERR_RESERVED_MASK;
411         } else {
412             at->expected_error &= ~PFERR_PRESENT_MASK;
413         }
414 	goto fault;
415     }
416 
417     writable &= F(AC_PTE_WRITABLE);
418     user &= F(AC_PTE_USER);
419     executable &= !F(AC_PTE_NX);
420 
421     at->expected_pte |= ac_test_permissions(at, flags, writable, user,
422                                             executable);
423 
424 no_pte:
425 fault:
426     if (!at->expected_fault)
427         at->ignore_pde = 0;
428     if (!F(AC_CPU_EFER_NX) && !F(AC_CPU_CR4_SMEP))
429         at->expected_error &= ~PFERR_FETCH_MASK;
430 }
431 
432 static void ac_set_expected_status(ac_test_t *at)
433 {
434     invlpg(at->virt);
435 
436     if (at->ptep)
437 	at->expected_pte = *at->ptep;
438     at->expected_pde = *at->pdep;
439     at->ignore_pde = 0;
440     at->expected_fault = 0;
441     at->expected_error = PFERR_PRESENT_MASK;
442 
443     if (at->flags & AC_ACCESS_TWICE_MASK) {
444         ac_emulate_access(at, at->flags & ~AC_ACCESS_WRITE_MASK
445                           & ~AC_ACCESS_FETCH_MASK & ~AC_ACCESS_USER_MASK);
446         at->expected_fault = 0;
447 	at->expected_error = PFERR_PRESENT_MASK;
448         at->ignore_pde = 0;
449     }
450 
451     ac_emulate_access(at, at->flags);
452 }
453 
454 static void __ac_setup_specific_pages(ac_test_t *at, ac_pool_t *pool,
455 				      u64 pd_page, u64 pt_page)
456 
457 {
458     unsigned long root = read_cr3();
459     int flags = at->flags;
460     bool skip = true;
461 
462     if (!ac_test_enough_room(pool))
463 	ac_test_reset_pt_pool(pool);
464 
465     at->ptep = 0;
466     for (int i = page_table_levels; i >= 1 && (i >= 2 || !F(AC_PDE_PSE)); --i) {
467 	pt_element_t *vroot = va(root & PT_BASE_ADDR_MASK);
468 	unsigned index = PT_INDEX((unsigned long)at->virt, i);
469 	pt_element_t pte = 0;
470 
471 	/*
472 	 * Reuse existing page tables along the path to the test code and data
473 	 * (which is in the bottom 2MB).
474 	 */
475 	if (skip && i >= 2 && index == 0) {
476 	    goto next;
477 	}
478 	skip = false;
479 
480 	switch (i) {
481 	case 5:
482 	case 4:
483 	case 3:
484 	    pte = pd_page ? pd_page : ac_test_alloc_pt(pool);
485 	    pte |= PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK;
486 	    break;
487 	case 2:
488 	    if (!F(AC_PDE_PSE)) {
489 		pte = pt_page ? pt_page : ac_test_alloc_pt(pool);
490 		/* The protection key is ignored on non-leaf entries.  */
491                 if (F(AC_PKU_PKEY))
492                     pte |= 2ull << 59;
493 	    } else {
494 		pte = at->phys & PT_PSE_BASE_ADDR_MASK;
495 		pte |= PT_PAGE_SIZE_MASK;
496                 if (F(AC_PKU_PKEY))
497                     pte |= 1ull << 59;
498 	    }
499 	    if (F(AC_PDE_PRESENT))
500 		pte |= PT_PRESENT_MASK;
501 	    if (F(AC_PDE_WRITABLE))
502 		pte |= PT_WRITABLE_MASK;
503 	    if (F(AC_PDE_USER))
504 		pte |= PT_USER_MASK;
505 	    if (F(AC_PDE_ACCESSED))
506 		pte |= PT_ACCESSED_MASK;
507 	    if (F(AC_PDE_DIRTY))
508 		pte |= PT_DIRTY_MASK;
509 	    if (F(AC_PDE_NX))
510 		pte |= PT64_NX_MASK;
511 	    if (F(AC_PDE_BIT51))
512 		pte |= 1ull << 51;
513 	    if (F(AC_PDE_BIT13))
514 		pte |= 1ull << 13;
515 	    at->pdep = &vroot[index];
516 	    break;
517 	case 1:
518 	    pte = at->phys & PT_BASE_ADDR_MASK;
519 	    if (F(AC_PKU_PKEY))
520 		pte |= 1ull << 59;
521 	    if (F(AC_PTE_PRESENT))
522 		pte |= PT_PRESENT_MASK;
523 	    if (F(AC_PTE_WRITABLE))
524 		pte |= PT_WRITABLE_MASK;
525 	    if (F(AC_PTE_USER))
526 		pte |= PT_USER_MASK;
527 	    if (F(AC_PTE_ACCESSED))
528 		pte |= PT_ACCESSED_MASK;
529 	    if (F(AC_PTE_DIRTY))
530 		pte |= PT_DIRTY_MASK;
531 	    if (F(AC_PTE_NX))
532 		pte |= PT64_NX_MASK;
533 	    if (F(AC_PTE_BIT51))
534 		pte |= 1ull << 51;
535 	    at->ptep = &vroot[index];
536 	    break;
537 	}
538 	vroot[index] = pte;
539  next:
540 	root = vroot[index];
541     }
542     ac_set_expected_status(at);
543 }
544 
545 static void ac_test_setup_pte(ac_test_t *at, ac_pool_t *pool)
546 {
547 	__ac_setup_specific_pages(at, pool, 0, 0);
548 }
549 
550 static void ac_setup_specific_pages(ac_test_t *at, ac_pool_t *pool,
551 				    u64 pd_page, u64 pt_page)
552 {
553 	return __ac_setup_specific_pages(at, pool, pd_page, pt_page);
554 }
555 
556 static void dump_mapping(ac_test_t *at)
557 {
558 	unsigned long root = read_cr3();
559         int flags = at->flags;
560 	int i;
561 
562 	printf("Dump mapping: address: %p\n", at->virt);
563 	for (i = page_table_levels ; i >= 1 && (i >= 2 || !F(AC_PDE_PSE)); --i) {
564 		pt_element_t *vroot = va(root & PT_BASE_ADDR_MASK);
565 		unsigned index = PT_INDEX((unsigned long)at->virt, i);
566 		pt_element_t pte = vroot[index];
567 
568 		printf("------L%d: %lx\n", i, pte);
569 		root = vroot[index];
570 	}
571 }
572 
573 static void ac_test_check(ac_test_t *at, _Bool *success_ret, _Bool cond,
574                           const char *fmt, ...)
575 {
576     va_list ap;
577     char buf[500];
578 
579     if (!*success_ret) {
580         return;
581     }
582 
583     if (!cond) {
584         return;
585     }
586 
587     *success_ret = false;
588 
589     if (!verbose) {
590         puts("\n");
591         ac_test_show(at);
592     }
593 
594     va_start(ap, fmt);
595     vsnprintf(buf, sizeof(buf), fmt, ap);
596     va_end(ap);
597     printf("FAIL: %s\n", buf);
598     dump_mapping(at);
599 }
600 
601 static int pt_match(pt_element_t pte1, pt_element_t pte2, pt_element_t ignore)
602 {
603     pte1 &= ~ignore;
604     pte2 &= ~ignore;
605     return pte1 == pte2;
606 }
607 
608 static int ac_test_do_access(ac_test_t *at)
609 {
610     static unsigned unique = 42;
611     int fault = 0;
612     unsigned e;
613     static unsigned char user_stack[4096];
614     unsigned long rsp;
615     _Bool success = true;
616     int flags = at->flags;
617 
618     ++unique;
619     if (!(unique & 65535)) {
620         puts(".");
621     }
622 
623     *((unsigned char *)at->phys) = 0xc3; /* ret */
624 
625     unsigned r = unique;
626     set_cr0_wp(F(AC_CPU_CR0_WP));
627     set_efer_nx(F(AC_CPU_EFER_NX));
628     set_cr4_pke(F(AC_CPU_CR4_PKE));
629     if (F(AC_CPU_CR4_PKE)) {
630         /* WD2=AD2=1, WD1=F(AC_PKU_WD), AD1=F(AC_PKU_AD) */
631         write_pkru(0x30 | (F(AC_PKU_WD) ? 8 : 0) |
632                    (F(AC_PKU_AD) ? 4 : 0));
633     }
634 
635     set_cr4_smep(F(AC_CPU_CR4_SMEP));
636 
637     if (F(AC_ACCESS_TWICE)) {
638 	asm volatile (
639 	    "mov $fixed2, %%rsi \n\t"
640 	    "mov (%[addr]), %[reg] \n\t"
641 	    "fixed2:"
642 	    : [reg]"=r"(r), [fault]"=a"(fault), "=b"(e)
643 	    : [addr]"r"(at->virt)
644 	    : "rsi"
645 	    );
646 	fault = 0;
647     }
648 
649     asm volatile ("mov $fixed1, %%rsi \n\t"
650 		  "mov %%rsp, %%rdx \n\t"
651 		  "cmp $0, %[user] \n\t"
652 		  "jz do_access \n\t"
653 		  "push %%rax; mov %[user_ds], %%ax; mov %%ax, %%ds; pop %%rax  \n\t"
654 		  "pushq %[user_ds] \n\t"
655 		  "pushq %[user_stack_top] \n\t"
656 		  "pushfq \n\t"
657 		  "pushq %[user_cs] \n\t"
658 		  "pushq $do_access \n\t"
659 		  "iretq \n"
660 		  "do_access: \n\t"
661 		  "cmp $0, %[fetch] \n\t"
662 		  "jnz 2f \n\t"
663 		  "cmp $0, %[write] \n\t"
664 		  "jnz 1f \n\t"
665 		  "mov (%[addr]), %[reg] \n\t"
666 		  "jmp done \n\t"
667 		  "1: mov %[reg], (%[addr]) \n\t"
668 		  "jmp done \n\t"
669 		  "2: call *%[addr] \n\t"
670 		  "done: \n"
671 		  "fixed1: \n"
672 		  "int %[kernel_entry_vector] \n\t"
673 		  "back_to_kernel:"
674 		  : [reg]"+r"(r), "+a"(fault), "=b"(e), "=&d"(rsp)
675 		  : [addr]"r"(at->virt),
676 		    [write]"r"(F(AC_ACCESS_WRITE)),
677 		    [user]"r"(F(AC_ACCESS_USER)),
678 		    [fetch]"r"(F(AC_ACCESS_FETCH)),
679 		    [user_ds]"i"(USER_DS),
680 		    [user_cs]"i"(USER_CS),
681 		    [user_stack_top]"r"(user_stack + sizeof user_stack),
682 		    [kernel_entry_vector]"i"(0x20)
683 		  : "rsi");
684 
685     asm volatile (".section .text.pf \n\t"
686 		  "page_fault: \n\t"
687 		  "pop %rbx \n\t"
688 		  "mov %rsi, (%rsp) \n\t"
689 		  "movl $1, %eax \n\t"
690 		  "iretq \n\t"
691 		  ".section .text");
692 
693     asm volatile (".section .text.entry \n\t"
694 		  "kernel_entry: \n\t"
695 		  "mov %rdx, %rsp \n\t"
696 		  "jmp back_to_kernel \n\t"
697 		  ".section .text");
698 
699     ac_test_check(at, &success, fault && !at->expected_fault,
700                   "unexpected fault");
701     ac_test_check(at, &success, !fault && at->expected_fault,
702                   "unexpected access");
703     ac_test_check(at, &success, fault && e != at->expected_error,
704                   "error code %x expected %x", e, at->expected_error);
705     if (at->ptep)
706         ac_test_check(at, &success, *at->ptep != at->expected_pte,
707                       "pte %x expected %x", *at->ptep, at->expected_pte);
708     ac_test_check(at, &success,
709                   !pt_match(*at->pdep, at->expected_pde, at->ignore_pde),
710                   "pde %x expected %x", *at->pdep, at->expected_pde);
711 
712     if (success && verbose) {
713 	if (at->expected_fault) {
714             printf("PASS (%x)\n", at->expected_error);
715 	} else {
716             printf("PASS\n");
717 	}
718     }
719     return success;
720 }
721 
722 static void ac_test_show(ac_test_t *at)
723 {
724     char line[5000];
725 
726     *line = 0;
727     strcat(line, "test");
728     for (int i = 0; i < NR_AC_FLAGS; ++i)
729 	if (at->flags & (1 << i)) {
730 	    strcat(line, " ");
731 	    strcat(line, ac_names[i]);
732 	}
733     strcat(line, ": ");
734     printf("%s", line);
735 }
736 
737 /*
738  * This test case is used to triger the bug which is fixed by
739  * commit e09e90a5 in the kvm tree
740  */
741 static int corrupt_hugepage_triger(ac_pool_t *pool)
742 {
743     ac_test_t at1, at2;
744 
745     ac_test_init(&at1, (void *)(0x123400000000));
746     ac_test_init(&at2, (void *)(0x666600000000));
747 
748     at2.flags = AC_CPU_CR0_WP_MASK | AC_PDE_PSE_MASK | AC_PDE_PRESENT_MASK;
749     ac_test_setup_pte(&at2, pool);
750     if (!ac_test_do_access(&at2))
751         goto err;
752 
753     at1.flags = at2.flags | AC_PDE_WRITABLE_MASK;
754     ac_test_setup_pte(&at1, pool);
755     if (!ac_test_do_access(&at1))
756         goto err;
757 
758     at1.flags |= AC_ACCESS_WRITE_MASK;
759     ac_set_expected_status(&at1);
760     if (!ac_test_do_access(&at1))
761         goto err;
762 
763     at2.flags |= AC_ACCESS_WRITE_MASK;
764     ac_set_expected_status(&at2);
765     if (!ac_test_do_access(&at2))
766         goto err;
767 
768     return 1;
769 
770 err:
771     printf("corrupt_hugepage_triger test fail\n");
772     return 0;
773 }
774 
775 /*
776  * This test case is used to triger the bug which is fixed by
777  * commit 3ddf6c06e13e in the kvm tree
778  */
779 static int check_pfec_on_prefetch_pte(ac_pool_t *pool)
780 {
781 	ac_test_t at1, at2;
782 
783 	ac_test_init(&at1, (void *)(0x123406001000));
784 	ac_test_init(&at2, (void *)(0x123406003000));
785 
786 	at1.flags = AC_PDE_PRESENT_MASK | AC_PTE_PRESENT_MASK;
787 	ac_setup_specific_pages(&at1, pool, 30 * 1024 * 1024, 30 * 1024 * 1024);
788 
789         at2.flags = at1.flags | AC_PTE_NX_MASK;
790 	ac_setup_specific_pages(&at2, pool, 30 * 1024 * 1024, 30 * 1024 * 1024);
791 
792 	if (!ac_test_do_access(&at1)) {
793 		printf("%s: prepare fail\n", __FUNCTION__);
794 		goto err;
795 	}
796 
797 	if (!ac_test_do_access(&at2)) {
798 		printf("%s: check PFEC on prefetch pte path fail\n",
799 			__FUNCTION__);
800 		goto err;
801 	}
802 
803 	return 1;
804 
805 err:
806     return 0;
807 }
808 
809 /*
810  * If the write-fault access is from supervisor and CR0.WP is not set on the
811  * vcpu, kvm will fix it by adjusting pte access - it sets the W bit on pte
812  * and clears U bit. This is the chance that kvm can change pte access from
813  * readonly to writable.
814  *
815  * Unfortunately, the pte access is the access of 'direct' shadow page table,
816  * means direct sp.role.access = pte_access, then we will create a writable
817  * spte entry on the readonly shadow page table. It will cause Dirty bit is
818  * not tracked when two guest ptes point to the same large page. Note, it
819  * does not have other impact except Dirty bit since cr0.wp is encoded into
820  * sp.role.
821  *
822  * Note: to trigger this bug, hugepage should be disabled on host.
823  */
824 static int check_large_pte_dirty_for_nowp(ac_pool_t *pool)
825 {
826 	ac_test_t at1, at2;
827 
828 	ac_test_init(&at1, (void *)(0x123403000000));
829 	ac_test_init(&at2, (void *)(0x666606000000));
830 
831         at2.flags = AC_PDE_PRESENT_MASK | AC_PDE_PSE_MASK;
832 	ac_test_setup_pte(&at2, pool);
833 	if (!ac_test_do_access(&at2)) {
834 		printf("%s: read on the first mapping fail.\n", __FUNCTION__);
835 		goto err;
836 	}
837 
838         at1.flags = at2.flags | AC_ACCESS_WRITE_MASK;
839 	ac_test_setup_pte(&at1, pool);
840 	if (!ac_test_do_access(&at1)) {
841 		printf("%s: write on the second mapping fail.\n", __FUNCTION__);
842 		goto err;
843 	}
844 
845 	at2.flags |= AC_ACCESS_WRITE_MASK;
846 	ac_set_expected_status(&at2);
847 	if (!ac_test_do_access(&at2)) {
848 		printf("%s: write on the first mapping fail.\n", __FUNCTION__);
849 		goto err;
850 	}
851 
852 	return 1;
853 
854 err:
855 	return 0;
856 }
857 
858 static int check_smep_andnot_wp(ac_pool_t *pool)
859 {
860 	ac_test_t at1;
861 	int err_prepare_andnot_wp, err_smep_andnot_wp;
862 
863 	if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE)) {
864 	    return 1;
865 	}
866 
867 	ac_test_init(&at1, (void *)(0x123406001000));
868 
869 	at1.flags = AC_PDE_PRESENT_MASK | AC_PTE_PRESENT_MASK |
870             AC_PDE_USER_MASK | AC_PTE_USER_MASK |
871             AC_PDE_ACCESSED_MASK | AC_PTE_ACCESSED_MASK |
872             AC_CPU_CR4_SMEP_MASK |
873             AC_CPU_CR0_WP_MASK |
874             AC_ACCESS_WRITE_MASK;
875 	ac_test_setup_pte(&at1, pool);
876 
877 	/*
878 	 * Here we write the ro user page when
879 	 * cr0.wp=0, then we execute it and SMEP
880 	 * fault should happen.
881 	 */
882 	err_prepare_andnot_wp = ac_test_do_access(&at1);
883 	if (!err_prepare_andnot_wp) {
884 		printf("%s: SMEP prepare fail\n", __FUNCTION__);
885 		goto clean_up;
886 	}
887 
888         at1.flags &= ~AC_ACCESS_WRITE_MASK;
889         at1.flags |= AC_ACCESS_FETCH_MASK;
890         ac_set_expected_status(&at1);
891         err_smep_andnot_wp = ac_test_do_access(&at1);
892 
893 clean_up:
894 	set_cr4_smep(0);
895 
896 	if (!err_prepare_andnot_wp)
897 		goto err;
898 	if (!err_smep_andnot_wp) {
899 		printf("%s: check SMEP without wp fail\n", __FUNCTION__);
900 		goto err;
901 	}
902 	return 1;
903 
904 err:
905 	return 0;
906 }
907 
908 static int ac_test_exec(ac_test_t *at, ac_pool_t *pool)
909 {
910     int r;
911 
912     if (verbose) {
913         ac_test_show(at);
914     }
915     ac_test_setup_pte(at, pool);
916     r = ac_test_do_access(at);
917     return r;
918 }
919 
920 typedef int (*ac_test_fn)(ac_pool_t *pool);
921 const ac_test_fn ac_test_cases[] =
922 {
923 	corrupt_hugepage_triger,
924 	check_pfec_on_prefetch_pte,
925 	check_large_pte_dirty_for_nowp,
926 	check_smep_andnot_wp
927 };
928 
929 static int ac_test_run(void)
930 {
931     ac_test_t at;
932     ac_pool_t pool;
933     int i, tests, successes;
934 
935     printf("run\n");
936     tests = successes = 0;
937 
938     if (this_cpu_has(X86_FEATURE_PKU)) {
939         set_cr4_pke(1);
940         set_cr4_pke(0);
941         /* Now PKRU = 0xFFFFFFFF.  */
942     } else {
943 	unsigned long cr4 = read_cr4();
944 	tests++;
945 	if (write_cr4_checking(cr4 | X86_CR4_PKE) == GP_VECTOR) {
946             successes++;
947             invalid_mask |= AC_PKU_AD_MASK;
948             invalid_mask |= AC_PKU_WD_MASK;
949             invalid_mask |= AC_PKU_PKEY_MASK;
950             invalid_mask |= AC_CPU_CR4_PKE_MASK;
951             printf("CR4.PKE not available, disabling PKE tests\n");
952 	} else {
953             printf("Set PKE in CR4 - expect #GP: FAIL!\n");
954             set_cr4_pke(0);
955 	}
956     }
957 
958     if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE)) {
959 	tests++;
960 	if (set_cr4_smep(1) == GP_VECTOR) {
961             successes++;
962             invalid_mask |= AC_CPU_CR4_SMEP_MASK;
963             printf("CR4.SMEP not available, disabling SMEP tests\n");
964 	} else {
965             printf("Set SMEP in CR4 - expect #GP: FAIL!\n");
966             set_cr4_smep(0);
967 	}
968     }
969 
970     ac_env_int(&pool);
971     ac_test_init(&at, (void *)(0x123400000000 + 16 * smp_id()));
972     do {
973 	++tests;
974 	successes += ac_test_exec(&at, &pool);
975     } while (ac_test_bump(&at));
976 
977     for (i = 0; i < ARRAY_SIZE(ac_test_cases); i++) {
978 	++tests;
979 	successes += ac_test_cases[i](&pool);
980     }
981 
982     printf("\n%d tests, %d failures\n", tests, tests - successes);
983 
984     return successes == tests;
985 }
986 
987 int main(void)
988 {
989     int r;
990 
991     setup_idt();
992 
993     printf("starting test\n\n");
994     page_table_levels = 4;
995     r = ac_test_run();
996 
997     if (this_cpu_has(X86_FEATURE_LA57)) {
998         page_table_levels = 5;
999         setup_5level_page_table();
1000         printf("starting 5-level paging test.\n\n");
1001         r = ac_test_run();
1002     }
1003 
1004     return r ? 0 : 1;
1005 }
1006