xref: /kvm-unit-tests/x86/access.c (revision 1d0f08f40d53daa39566842ec46a112db5f7e524)
1 #include "libcflat.h"
2 #include "desc.h"
3 #include "processor.h"
4 #include "asm/page.h"
5 #include "x86/vm.h"
6 #include "access.h"
7 
8 #define true 1
9 #define false 0
10 
11 static _Bool verbose = false;
12 
13 typedef unsigned long pt_element_t;
14 static int invalid_mask;
15 
16 /* Test code/data is at 32MiB, paging structures at 33MiB. */
17 #define AT_CODE_DATA_PHYS	  32 * 1024 * 1024
18 #define AT_PAGING_STRUCTURES_PHYS 33 * 1024 * 1024
19 
20 #define PT_BASE_ADDR_MASK ((pt_element_t)((((pt_element_t)1 << 36) - 1) & PAGE_MASK))
21 #define PT_PSE_BASE_ADDR_MASK (PT_BASE_ADDR_MASK & ~(1ull << 21))
22 
23 #define PFERR_PRESENT_MASK (1U << 0)
24 #define PFERR_WRITE_MASK (1U << 1)
25 #define PFERR_USER_MASK (1U << 2)
26 #define PFERR_RESERVED_MASK (1U << 3)
27 #define PFERR_FETCH_MASK (1U << 4)
28 #define PFERR_PK_MASK (1U << 5)
29 
30 #define MSR_EFER 0xc0000080
31 #define EFER_NX_MASK            (1ull << 11)
32 
33 #define PT_INDEX(address, level)       \
34 	  (((address) >> (12 + ((level)-1) * 9)) & 511)
35 
36 /*
37  * Page table access check tests.  Each number/bit represent an individual
38  * test case.  The main test will bump a counter by 1 to run all permutations
39  * of the below test cases (sans illegal combinations).
40  *
41  * Keep the PRESENT and reserved bits in the higher numbers so that they aren't
42  * toggled on every test, e.g. to keep entries in the TLB.
43  */
44 enum {
45 	AC_PTE_WRITABLE_BIT,
46 	AC_PTE_USER_BIT,
47 	AC_PTE_ACCESSED_BIT,
48 	AC_PTE_DIRTY_BIT,
49 	AC_PTE_NX_BIT,
50 	AC_PTE_PRESENT_BIT,
51 	AC_PTE_BIT51_BIT,
52 	AC_PTE_BIT36_BIT,
53 
54 	AC_PDE_WRITABLE_BIT,
55 	AC_PDE_USER_BIT,
56 	AC_PDE_ACCESSED_BIT,
57 	AC_PDE_DIRTY_BIT,
58 	AC_PDE_PSE_BIT,
59 	AC_PDE_NX_BIT,
60 	AC_PDE_PRESENT_BIT,
61 	AC_PDE_BIT51_BIT,
62 	AC_PDE_BIT36_BIT,
63 	AC_PDE_BIT13_BIT,
64 
65 	/*
66 	 *  special test case to DISABLE writable bit on page directory
67 	 *  pointer table entry.
68 	 */
69 	AC_PDPTE_NO_WRITABLE_BIT,
70 
71 	AC_PKU_AD_BIT,
72 	AC_PKU_WD_BIT,
73 	AC_PKU_PKEY_BIT,
74 
75 	AC_ACCESS_USER_BIT,
76 	AC_ACCESS_WRITE_BIT,
77 	AC_ACCESS_FETCH_BIT,
78 	AC_ACCESS_TWICE_BIT,
79 
80 	AC_CPU_EFER_NX_BIT,
81 	AC_CPU_CR0_WP_BIT,
82 	AC_CPU_CR4_SMEP_BIT,
83 	AC_CPU_CR4_PKE_BIT,
84 
85 	AC_FEP_BIT,
86 
87 	NR_AC_FLAGS,
88 };
89 
90 #define AC_PTE_PRESENT_MASK   (1 << AC_PTE_PRESENT_BIT)
91 #define AC_PTE_WRITABLE_MASK  (1 << AC_PTE_WRITABLE_BIT)
92 #define AC_PTE_USER_MASK      (1 << AC_PTE_USER_BIT)
93 #define AC_PTE_ACCESSED_MASK  (1 << AC_PTE_ACCESSED_BIT)
94 #define AC_PTE_DIRTY_MASK     (1 << AC_PTE_DIRTY_BIT)
95 #define AC_PTE_NX_MASK        (1 << AC_PTE_NX_BIT)
96 #define AC_PTE_BIT51_MASK     (1 << AC_PTE_BIT51_BIT)
97 #define AC_PTE_BIT36_MASK     (1 << AC_PTE_BIT36_BIT)
98 
99 #define AC_PDE_PRESENT_MASK   (1 << AC_PDE_PRESENT_BIT)
100 #define AC_PDE_WRITABLE_MASK  (1 << AC_PDE_WRITABLE_BIT)
101 #define AC_PDE_USER_MASK      (1 << AC_PDE_USER_BIT)
102 #define AC_PDE_ACCESSED_MASK  (1 << AC_PDE_ACCESSED_BIT)
103 #define AC_PDE_DIRTY_MASK     (1 << AC_PDE_DIRTY_BIT)
104 #define AC_PDE_PSE_MASK       (1 << AC_PDE_PSE_BIT)
105 #define AC_PDE_NX_MASK        (1 << AC_PDE_NX_BIT)
106 #define AC_PDE_BIT51_MASK     (1 << AC_PDE_BIT51_BIT)
107 #define AC_PDE_BIT36_MASK     (1 << AC_PDE_BIT36_BIT)
108 #define AC_PDE_BIT13_MASK     (1 << AC_PDE_BIT13_BIT)
109 
110 #define AC_PDPTE_NO_WRITABLE_MASK  (1 << AC_PDPTE_NO_WRITABLE_BIT)
111 
112 #define AC_PKU_AD_MASK        (1 << AC_PKU_AD_BIT)
113 #define AC_PKU_WD_MASK        (1 << AC_PKU_WD_BIT)
114 #define AC_PKU_PKEY_MASK      (1 << AC_PKU_PKEY_BIT)
115 
116 #define AC_ACCESS_USER_MASK   (1 << AC_ACCESS_USER_BIT)
117 #define AC_ACCESS_WRITE_MASK  (1 << AC_ACCESS_WRITE_BIT)
118 #define AC_ACCESS_FETCH_MASK  (1 << AC_ACCESS_FETCH_BIT)
119 #define AC_ACCESS_TWICE_MASK  (1 << AC_ACCESS_TWICE_BIT)
120 
121 #define AC_CPU_EFER_NX_MASK   (1 << AC_CPU_EFER_NX_BIT)
122 #define AC_CPU_CR0_WP_MASK    (1 << AC_CPU_CR0_WP_BIT)
123 #define AC_CPU_CR4_SMEP_MASK  (1 << AC_CPU_CR4_SMEP_BIT)
124 #define AC_CPU_CR4_PKE_MASK   (1 << AC_CPU_CR4_PKE_BIT)
125 
126 #define AC_FEP_MASK           (1 << AC_FEP_BIT)
127 
128 const char *ac_names[] = {
129 	[AC_PTE_PRESENT_BIT] = "pte.p",
130 	[AC_PTE_ACCESSED_BIT] = "pte.a",
131 	[AC_PTE_WRITABLE_BIT] = "pte.rw",
132 	[AC_PTE_USER_BIT] = "pte.user",
133 	[AC_PTE_DIRTY_BIT] = "pte.d",
134 	[AC_PTE_NX_BIT] = "pte.nx",
135 	[AC_PTE_BIT51_BIT] = "pte.51",
136 	[AC_PTE_BIT36_BIT] = "pte.36",
137 	[AC_PDE_PRESENT_BIT] = "pde.p",
138 	[AC_PDE_ACCESSED_BIT] = "pde.a",
139 	[AC_PDE_WRITABLE_BIT] = "pde.rw",
140 	[AC_PDE_USER_BIT] = "pde.user",
141 	[AC_PDE_DIRTY_BIT] = "pde.d",
142 	[AC_PDE_PSE_BIT] = "pde.pse",
143 	[AC_PDE_NX_BIT] = "pde.nx",
144 	[AC_PDE_BIT51_BIT] = "pde.51",
145 	[AC_PDE_BIT36_BIT] = "pde.36",
146 	[AC_PDE_BIT13_BIT] = "pde.13",
147 	[AC_PDPTE_NO_WRITABLE_BIT] = "pdpte.ro",
148 	[AC_PKU_AD_BIT] = "pkru.ad",
149 	[AC_PKU_WD_BIT] = "pkru.wd",
150 	[AC_PKU_PKEY_BIT] = "pkey=1",
151 	[AC_ACCESS_WRITE_BIT] = "write",
152 	[AC_ACCESS_USER_BIT] = "user",
153 	[AC_ACCESS_FETCH_BIT] = "fetch",
154 	[AC_ACCESS_TWICE_BIT] = "twice",
155 	[AC_CPU_EFER_NX_BIT] = "efer.nx",
156 	[AC_CPU_CR0_WP_BIT] = "cr0.wp",
157 	[AC_CPU_CR4_SMEP_BIT] = "cr4.smep",
158 	[AC_CPU_CR4_PKE_BIT] = "cr4.pke",
159 	[AC_FEP_BIT] = "fep",
160 };
161 
162 static inline void *va(pt_element_t phys)
163 {
164 	return (void *)phys;
165 }
166 
167 typedef struct {
168 	pt_element_t pt_pool_pa;
169 	unsigned int pt_pool_current;
170 	int pt_levels;
171 } ac_pt_env_t;
172 
173 typedef struct {
174 	unsigned flags;
175 	void *virt;
176 	pt_element_t phys;
177 	pt_element_t *ptep;
178 	pt_element_t expected_pte;
179 	pt_element_t *pdep;
180 	pt_element_t expected_pde;
181 	pt_element_t ignore_pde;
182 	int expected_fault;
183 	unsigned expected_error;
184 	int pt_levels;
185 
186 	/* 5-level paging, 1-based to avoid math. */
187 	pt_element_t page_tables[6];
188 } ac_test_t;
189 
190 typedef struct {
191 	unsigned short limit;
192 	unsigned long linear_addr;
193 } __attribute__((packed)) descriptor_table_t;
194 
195 
196 static void ac_test_show(ac_test_t *at);
197 
198 static unsigned long shadow_cr0;
199 static unsigned long shadow_cr3;
200 static unsigned long shadow_cr4;
201 static unsigned long long shadow_efer;
202 
203 typedef void (*walk_fn)(pt_element_t *ptep, int level, unsigned long virt);
204 
205 /* Returns the size of the range covered by the last processed entry. */
206 static unsigned long walk_va(ac_test_t *at, int min_level, unsigned long virt,
207 			     walk_fn callback, bool leaf_only)
208 {
209 	unsigned long parent_pte = shadow_cr3;
210 	int i;
211 
212 	for (i = at->pt_levels; i >= min_level; --i) {
213 		pt_element_t *parent_pt = va(parent_pte & PT_BASE_ADDR_MASK);
214 		unsigned int index = PT_INDEX(virt, i);
215 		pt_element_t *ptep = &parent_pt[index];
216 
217 		assert(!leaf_only || (*ptep & PT_PRESENT_MASK));
218 
219 		if (!leaf_only || i == 1 || (*ptep & PT_PAGE_SIZE_MASK))
220 			callback(ptep, i, virt);
221 
222 		if (i == 1 || *ptep & PT_PAGE_SIZE_MASK)
223 			break;
224 
225 		parent_pte = *ptep;
226 	}
227 
228 	return 1ul << PGDIR_BITS(i);
229 }
230 
231 static void walk_ptes(ac_test_t *at, unsigned long virt, unsigned long end,
232 		      walk_fn callback)
233 {
234 	unsigned long page_size;
235 
236 	for ( ; virt < end; virt = ALIGN_DOWN(virt + page_size, page_size))
237 		page_size = walk_va(at, 1, virt, callback, true);
238 }
239 
240 static void set_cr0_wp(int wp)
241 {
242 	unsigned long cr0 = shadow_cr0;
243 
244 	cr0 &= ~X86_CR0_WP;
245 	if (wp)
246 		cr0 |= X86_CR0_WP;
247 	if (cr0 != shadow_cr0) {
248 		write_cr0(cr0);
249 		shadow_cr0 = cr0;
250 	}
251 }
252 
253 static void clear_user_mask(pt_element_t *ptep, int level, unsigned long virt)
254 {
255 	*ptep &= ~PT_USER_MASK;
256 
257 	/* Flush to avoid spurious #PF */
258 	invlpg((void*)virt);
259 }
260 
261 static void set_user_mask(pt_element_t *ptep, int level, unsigned long virt)
262 {
263 	*ptep |= PT_USER_MASK;
264 
265 	/* Flush to avoid spurious #PF */
266 	invlpg((void*)virt);
267 }
268 
269 static unsigned set_cr4_smep(ac_test_t *at, int smep)
270 {
271 	extern char stext, etext;
272 	unsigned long code_start = (unsigned long)&stext;
273 	unsigned long code_end = (unsigned long)&etext;
274 	unsigned long cr4 = shadow_cr4;
275 	unsigned r;
276 
277 	cr4 &= ~X86_CR4_SMEP;
278 	if (smep)
279 		cr4 |= X86_CR4_SMEP;
280 	if (cr4 == shadow_cr4)
281 		return 0;
282 
283 	if (smep)
284 		walk_ptes(at, code_start, code_end, clear_user_mask);
285 	r = write_cr4_safe(cr4);
286 	if (r || !smep)
287 		walk_ptes(at, code_start, code_end, set_user_mask);
288 	if (!r)
289 		shadow_cr4 = cr4;
290 	return r;
291 }
292 
293 static void set_cr4_pke(int pke)
294 {
295 	unsigned long cr4 = shadow_cr4;
296 
297 	cr4 &= ~X86_CR4_PKE;
298 	if (pke)
299 		cr4 |= X86_CR4_PKE;
300 	if (cr4 == shadow_cr4)
301 		return;
302 
303 	/* Check that protection keys do not affect accesses when CR4.PKE=0.  */
304 	if ((shadow_cr4 & X86_CR4_PKE) && !pke)
305 		write_pkru(0xfffffffc);
306 	write_cr4(cr4);
307 	shadow_cr4 = cr4;
308 }
309 
310 static void set_efer_nx(int nx)
311 {
312 	unsigned long long efer = shadow_efer;
313 
314 	efer &= ~EFER_NX_MASK;
315 	if (nx)
316 		efer |= EFER_NX_MASK;
317 	if (efer != shadow_efer) {
318 		wrmsr(MSR_EFER, efer);
319 		shadow_efer = efer;
320 	}
321 }
322 
323 static void ac_env_int(ac_pt_env_t *pt_env, int page_table_levels)
324 {
325 	extern char page_fault, kernel_entry;
326 	set_idt_entry(14, &page_fault, 0);
327 	set_idt_entry(0x20, &kernel_entry, 3);
328 
329 	pt_env->pt_pool_pa = AT_PAGING_STRUCTURES_PHYS;
330 	pt_env->pt_pool_current = 0;
331 	pt_env->pt_levels = page_table_levels;
332 }
333 
334 static pt_element_t ac_test_alloc_pt(ac_pt_env_t *pt_env)
335 {
336 	pt_element_t pt;
337 
338 	/*
339 	 * Each test needs at most pt_levels-1 structures per virtual address,
340 	 * and no existing scenario uses more than four addresses.
341 	 */
342 	assert(pt_env->pt_pool_current < (4 * (pt_env->pt_levels - 1)));
343 
344 	pt = pt_env->pt_pool_pa + (pt_env->pt_pool_current * PAGE_SIZE);
345 	pt_env->pt_pool_current++;
346 	memset(va(pt), 0, PAGE_SIZE);
347 	return pt;
348 }
349 
350 static void __ac_test_init(ac_test_t *at, unsigned long virt,
351 			   ac_pt_env_t *pt_env, ac_test_t *buddy)
352 {
353 	unsigned long buddy_virt = buddy ? (unsigned long)buddy->virt : 0;
354 	pt_element_t *root_pt = va(shadow_cr3 & PT_BASE_ADDR_MASK);
355 	int i;
356 
357 	/*
358 	 * The test infrastructure, e.g. this function, must use a different
359 	 * top-level SPTE than the test, otherwise modifying SPTEs can affect
360 	 * normal behavior, e.g. crash the test due to marking code SPTEs
361 	 * USER when CR4.SMEP=1.
362 	 */
363 	assert(PT_INDEX(virt, pt_env->pt_levels) !=
364 	       PT_INDEX((unsigned long)__ac_test_init, pt_env->pt_levels));
365 
366 	set_efer_nx(1);
367 	set_cr0_wp(1);
368 	at->flags = 0;
369 	at->virt = (void *)virt;
370 	at->phys = AT_CODE_DATA_PHYS;
371 	at->pt_levels = pt_env->pt_levels;
372 
373 	at->page_tables[0] = -1ull;
374 	at->page_tables[1] = -1ull;
375 
376 	/*
377 	 * Zap the existing top-level PTE as it may be reused from a previous
378 	 * sub-test.  This allows runtime PTE modification to assert that two
379 	 * overlapping walks don't try to install different paging structures.
380 	 */
381 	root_pt[PT_INDEX(virt, pt_env->pt_levels)] = 0;
382 
383 	for (i = at->pt_levels; i > 1; i--) {
384 		/*
385 		 * Buddies can reuse any part of the walk that share the same
386 		 * index.  This is weird, but intentional, as several tests
387 		 * want different walks to merge at lower levels.
388 		 */
389 		if (buddy && PT_INDEX(virt, i) == PT_INDEX(buddy_virt, i))
390 			at->page_tables[i] = buddy->page_tables[i];
391 		else
392 			at->page_tables[i] = ac_test_alloc_pt(pt_env);
393 	}
394 }
395 
396 static void ac_test_init(ac_test_t *at, unsigned long virt, ac_pt_env_t *pt_env)
397 {
398 	__ac_test_init(at, virt, pt_env, NULL);
399 }
400 
401 static int ac_test_bump_one(ac_test_t *at)
402 {
403 	at->flags = ((at->flags | invalid_mask) + 1) & ~invalid_mask;
404 	return at->flags < (1 << NR_AC_FLAGS);
405 }
406 
407 #define F(x)  ((flags & x##_MASK) != 0)
408 
409 static _Bool ac_test_legal(ac_test_t *at)
410 {
411 	int flags = at->flags;
412 	unsigned reserved;
413 
414 	if (F(AC_CPU_CR4_SMEP))
415 		return false;
416 
417 	if (F(AC_ACCESS_FETCH) && F(AC_ACCESS_WRITE))
418 		return false;
419 
420 	/*
421 	 * Since we convert current page to kernel page when cr4.smep=1,
422 	 * we can't switch to user mode.
423 	 */
424 	if (F(AC_ACCESS_USER) && F(AC_CPU_CR4_SMEP))
425 		return false;
426 
427 	/*
428 	 * Only test protection key faults if CR4.PKE=1.
429 	 */
430 	if (!F(AC_CPU_CR4_PKE) &&
431 		(F(AC_PKU_AD) || F(AC_PKU_WD))) {
432 		return false;
433 	}
434 
435 	/*
436 	 * pde.bit13 checks handling of reserved bits in largepage PDEs.  It is
437 	 * meaningless if there is a PTE.
438 	 */
439 	if (!F(AC_PDE_PSE) && F(AC_PDE_BIT13))
440 		return false;
441 
442 	/*
443 	 * Shorten the test by avoiding testing too many reserved bit combinations.
444 	 * Skip testing multiple reserved bits to shorten the test. Reserved bit
445 	 * page faults are terminal and multiple reserved bits do not affect the
446 	 * error code; the odds of a KVM bug are super low, and the odds of actually
447 	 * being able to detect a bug are even lower.
448 	 */
449 	reserved = (AC_PDE_BIT51_MASK | AC_PDE_BIT36_MASK | AC_PDE_BIT13_MASK |
450 		   AC_PTE_BIT51_MASK | AC_PTE_BIT36_MASK);
451 	if (!F(AC_CPU_EFER_NX))
452 		reserved |= AC_PDE_NX_MASK | AC_PTE_NX_MASK;
453 
454 	/* Only test one reserved bit at a time.  */
455 	reserved &= flags;
456 	if (reserved & (reserved - 1))
457 		return false;
458 
459 	return true;
460 }
461 
462 static int ac_test_bump(ac_test_t *at)
463 {
464 	int ret;
465 
466 	do {
467 		ret = ac_test_bump_one(at);
468 	} while (ret && !ac_test_legal(at));
469 
470 	return ret;
471 }
472 
473 static pt_element_t ac_test_permissions(ac_test_t *at, unsigned flags,
474 					bool writable, bool user,
475 					bool executable)
476 {
477 	bool kwritable = !F(AC_CPU_CR0_WP) && !F(AC_ACCESS_USER);
478 	pt_element_t expected = 0;
479 
480 	if (F(AC_ACCESS_USER) && !user)
481 		at->expected_fault = 1;
482 
483 	if (F(AC_ACCESS_WRITE) && !writable && !kwritable)
484 		at->expected_fault = 1;
485 
486 	if (F(AC_ACCESS_FETCH) && !executable)
487 		at->expected_fault = 1;
488 
489 	if (F(AC_ACCESS_FETCH) && user && F(AC_CPU_CR4_SMEP))
490 		at->expected_fault = 1;
491 
492 	if (user && !F(AC_ACCESS_FETCH) && F(AC_PKU_PKEY) && F(AC_CPU_CR4_PKE)) {
493 		if (F(AC_PKU_AD)) {
494 			at->expected_fault = 1;
495 			at->expected_error |= PFERR_PK_MASK;
496 		} else if (F(AC_ACCESS_WRITE) && F(AC_PKU_WD) && !kwritable) {
497 			at->expected_fault = 1;
498 			at->expected_error |= PFERR_PK_MASK;
499 		}
500 	}
501 
502 	if (!at->expected_fault) {
503 		expected |= PT_ACCESSED_MASK;
504 		if (F(AC_ACCESS_WRITE))
505 			expected |= PT_DIRTY_MASK;
506 	}
507 
508 	return expected;
509 }
510 
511 static void ac_emulate_access(ac_test_t *at, unsigned flags)
512 {
513 	bool pde_valid, pte_valid;
514 	bool user, writable, executable;
515 
516 	if (F(AC_ACCESS_USER))
517 		at->expected_error |= PFERR_USER_MASK;
518 
519 	if (F(AC_ACCESS_WRITE))
520 		at->expected_error |= PFERR_WRITE_MASK;
521 
522 	if (F(AC_ACCESS_FETCH))
523 		at->expected_error |= PFERR_FETCH_MASK;
524 
525 	if (!F(AC_PDE_ACCESSED))
526 		at->ignore_pde = PT_ACCESSED_MASK;
527 
528 	pde_valid = F(AC_PDE_PRESENT)
529 		&& !F(AC_PDE_BIT51) && !F(AC_PDE_BIT36) && !F(AC_PDE_BIT13)
530 		&& !(F(AC_PDE_NX) && !F(AC_CPU_EFER_NX));
531 
532 	if (!pde_valid) {
533 		at->expected_fault = 1;
534 		if (F(AC_PDE_PRESENT)) {
535 			at->expected_error |= PFERR_RESERVED_MASK;
536 		} else {
537 			at->expected_error &= ~PFERR_PRESENT_MASK;
538 		}
539 		goto fault;
540 	}
541 
542 	writable = !F(AC_PDPTE_NO_WRITABLE) && F(AC_PDE_WRITABLE);
543 	user = F(AC_PDE_USER);
544 	executable = !F(AC_PDE_NX);
545 
546 	if (F(AC_PDE_PSE)) {
547 		at->expected_pde |= ac_test_permissions(at, flags, writable,
548 							user, executable);
549 		goto no_pte;
550 	}
551 
552 	at->expected_pde |= PT_ACCESSED_MASK;
553 
554 	pte_valid = F(AC_PTE_PRESENT)
555 		    && !F(AC_PTE_BIT51) && !F(AC_PTE_BIT36)
556 		    && !(F(AC_PTE_NX) && !F(AC_CPU_EFER_NX));
557 
558 	if (!pte_valid) {
559 		at->expected_fault = 1;
560 		if (F(AC_PTE_PRESENT)) {
561 			at->expected_error |= PFERR_RESERVED_MASK;
562 		} else {
563 			at->expected_error &= ~PFERR_PRESENT_MASK;
564 		}
565 		goto fault;
566 	}
567 
568 	writable &= F(AC_PTE_WRITABLE);
569 	user &= F(AC_PTE_USER);
570 	executable &= !F(AC_PTE_NX);
571 
572 	at->expected_pte |= ac_test_permissions(at, flags, writable, user,
573 						executable);
574 
575 no_pte:
576 fault:
577 	if (!at->expected_fault)
578 		at->ignore_pde = 0;
579 	if (!F(AC_CPU_EFER_NX) && !F(AC_CPU_CR4_SMEP))
580 		at->expected_error &= ~PFERR_FETCH_MASK;
581 }
582 
583 static void __ac_set_expected_status(ac_test_t *at, bool flush)
584 {
585 	if (flush)
586 		invlpg(at->virt);
587 
588 	if (at->ptep)
589 		at->expected_pte = *at->ptep;
590 	at->expected_pde = *at->pdep;
591 	at->ignore_pde = 0;
592 	at->expected_fault = 0;
593 	at->expected_error = PFERR_PRESENT_MASK;
594 
595 	if (at->flags & AC_ACCESS_TWICE_MASK) {
596 		ac_emulate_access(at, at->flags &
597 				  ~AC_ACCESS_WRITE_MASK &
598 				  ~AC_ACCESS_FETCH_MASK &
599 				  ~AC_ACCESS_USER_MASK);
600 		at->expected_fault = 0;
601 		at->expected_error = PFERR_PRESENT_MASK;
602 		at->ignore_pde = 0;
603 	}
604 
605 	ac_emulate_access(at, at->flags);
606 }
607 
608 static void ac_set_expected_status(ac_test_t *at)
609 {
610 	__ac_set_expected_status(at, true);
611 }
612 
613 static pt_element_t ac_get_pt(ac_test_t *at, int i, pt_element_t *ptep)
614 {
615 	pt_element_t pte;
616 
617 	pte = *ptep;
618 	if (pte && !(pte & PT_PAGE_SIZE_MASK) &&
619 	    (pte & PT_BASE_ADDR_MASK) != at->page_tables[i]) {
620 		printf("\nPT collision.  VA = 0x%lx, level = %d, index = %ld, found PT = 0x%lx, want PT = 0x%lx\n",
621 			(unsigned long)at->virt, i,
622 			PT_INDEX((unsigned long)at->virt, i),
623 			pte, at->page_tables[i]);
624 		abort();
625 	}
626 
627 	/*
628 	 * Preserve A/D bits to avoid writing upper level PTEs,
629 	 * which cannot be unsyc'd when KVM uses shadow paging.
630 	 */
631 	pte = at->page_tables[i] | (pte & (PT_DIRTY_MASK | PT_ACCESSED_MASK));
632 	return pte;
633 }
634 
635 static void ac_test_setup_ptes(ac_test_t *at)
636 {
637 	unsigned long parent_pte = shadow_cr3;
638 	int flags = at->flags;
639 	int i;
640 
641 	at->ptep = 0;
642 	for (i = at->pt_levels; i >= 1 && (i >= 2 || !F(AC_PDE_PSE)); --i) {
643 		pt_element_t *parent_pt = va(parent_pte & PT_BASE_ADDR_MASK);
644 		unsigned index = PT_INDEX((unsigned long)at->virt, i);
645 		pt_element_t *ptep = &parent_pt[index];
646 		pt_element_t pte;
647 
648 		switch (i) {
649 		case 5:
650 		case 4:
651 			pte = ac_get_pt(at, i, ptep);
652 			pte |= PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK;
653 			break;
654 		case 3:
655 			pte = ac_get_pt(at, i, ptep);
656 			pte |= PT_PRESENT_MASK | PT_USER_MASK;
657 			if (!F(AC_PDPTE_NO_WRITABLE))
658 				pte |= PT_WRITABLE_MASK;
659 			break;
660 		case 2:
661 			if (!F(AC_PDE_PSE)) {
662 				pte = ac_get_pt(at, i, ptep);
663 
664 				/* The protection key is ignored on non-leaf entries.  */
665 				if (F(AC_PKU_PKEY))
666 					pte |= 2ull << 59;
667 			} else {
668 				pte = at->phys & PT_PSE_BASE_ADDR_MASK;
669 				pte |= PT_PAGE_SIZE_MASK;
670 				if (F(AC_PKU_PKEY))
671 					pte |= 1ull << 59;
672 			}
673 			if (F(AC_PDE_PRESENT))
674 				pte |= PT_PRESENT_MASK;
675 			if (F(AC_PDE_WRITABLE))
676 				pte |= PT_WRITABLE_MASK;
677 			if (F(AC_PDE_USER))
678 				pte |= PT_USER_MASK;
679 			if (F(AC_PDE_ACCESSED))
680 				pte |= PT_ACCESSED_MASK;
681 			if (F(AC_PDE_DIRTY))
682 				pte |= PT_DIRTY_MASK;
683 			if (F(AC_PDE_NX))
684 				pte |= PT64_NX_MASK;
685 			if (F(AC_PDE_BIT51))
686 				pte |= 1ull << 51;
687 			if (F(AC_PDE_BIT36))
688 				pte |= 1ull << 36;
689 			if (F(AC_PDE_BIT13))
690 				pte |= 1ull << 13;
691 			at->pdep = ptep;
692 			break;
693 		case 1:
694 			pte = at->phys & PT_BASE_ADDR_MASK;
695 			if (F(AC_PKU_PKEY))
696 				pte |= 1ull << 59;
697 			if (F(AC_PTE_PRESENT))
698 				pte |= PT_PRESENT_MASK;
699 			if (F(AC_PTE_WRITABLE))
700 				pte |= PT_WRITABLE_MASK;
701 			if (F(AC_PTE_USER))
702 				pte |= PT_USER_MASK;
703 			if (F(AC_PTE_ACCESSED))
704 				pte |= PT_ACCESSED_MASK;
705 			if (F(AC_PTE_DIRTY))
706 				pte |= PT_DIRTY_MASK;
707 			if (F(AC_PTE_NX))
708 				pte |= PT64_NX_MASK;
709 			if (F(AC_PTE_BIT51))
710 				pte |= 1ull << 51;
711 			if (F(AC_PTE_BIT36))
712 				pte |= 1ull << 36;
713 			at->ptep = ptep;
714 			break;
715 		default:
716 			assert(0);
717 		}
718 
719 		if (pte != *ptep)
720 			*ptep = pte;
721 
722 		parent_pte = pte;
723 	}
724 	ac_set_expected_status(at);
725 }
726 
727 static void __dump_pte(pt_element_t *ptep, int level, unsigned long virt)
728 {
729 	printf("------L%d I%lu: %lx\n", level, PT_INDEX(virt, level), *ptep);
730 }
731 
732 static void dump_mapping(ac_test_t *at)
733 {
734 	unsigned long virt = (unsigned long)at->virt;
735 	int flags = at->flags;
736 
737 	printf("Dump mapping: address: %p\n", at->virt);
738 	walk_va(at, F(AC_PDE_PSE) ? 2 : 1, virt, __dump_pte, false);
739 }
740 
741 static void ac_test_check(ac_test_t *at, _Bool *success_ret, _Bool cond,
742 			  const char *fmt, ...)
743 {
744 	va_list ap;
745 	char buf[500];
746 
747 	if (!*success_ret) {
748 		return;
749 	}
750 
751 	if (!cond) {
752 		return;
753 	}
754 
755 	*success_ret = false;
756 
757 	if (!verbose) {
758 		puts("\n");
759 		ac_test_show(at);
760 	}
761 
762 	va_start(ap, fmt);
763 	vsnprintf(buf, sizeof(buf), fmt, ap);
764 	va_end(ap);
765 	printf("FAIL: %s\n", buf);
766 	dump_mapping(at);
767 }
768 
769 static int pt_match(pt_element_t pte1, pt_element_t pte2, pt_element_t ignore)
770 {
771 	pte1 &= ~ignore;
772 	pte2 &= ~ignore;
773 	return pte1 == pte2;
774 }
775 
776 static int ac_test_do_access(ac_test_t *at)
777 {
778 	static unsigned unique = 42;
779 	int fault = 0;
780 	unsigned e;
781 	static unsigned char user_stack[4096];
782 	unsigned long rsp;
783 	_Bool success = true;
784 	int flags = at->flags;
785 
786 	++unique;
787 	if (!(unique & 65535)) {
788 		puts(".");
789 	}
790 
791 	*((unsigned char *)at->phys) = 0xc3; /* ret */
792 
793 	unsigned r = unique;
794 	set_cr0_wp(F(AC_CPU_CR0_WP));
795 	set_efer_nx(F(AC_CPU_EFER_NX));
796 	set_cr4_pke(F(AC_CPU_CR4_PKE));
797 	if (F(AC_CPU_CR4_PKE)) {
798 		/* WD2=AD2=1, WD1=F(AC_PKU_WD), AD1=F(AC_PKU_AD) */
799 		write_pkru(0x30 | (F(AC_PKU_WD) ? 8 : 0) |
800 			   (F(AC_PKU_AD) ? 4 : 0));
801 	}
802 
803 	set_cr4_smep(at, F(AC_CPU_CR4_SMEP));
804 
805 	if (F(AC_ACCESS_TWICE)) {
806 		asm volatile ("mov $fixed2, %%rsi \n\t"
807 			      "cmp $0, %[fep] \n\t"
808 			      "jz 1f \n\t"
809 			      KVM_FEP
810 			      "1: mov (%[addr]), %[reg] \n\t"
811 			      "fixed2:"
812 			      : [reg]"=r"(r), [fault]"=a"(fault), "=b"(e)
813 			      : [addr]"r"(at->virt), [fep]"r"(F(AC_FEP))
814 			      : "rsi");
815 		fault = 0;
816 	}
817 
818 	asm volatile ("mov $fixed1, %%rsi \n\t"
819 		      "mov %%rsp, %[rsp0] \n\t"
820 		      "cmp $0, %[user] \n\t"
821 		      "jz do_access \n\t"
822 		      "push %%rax; mov %[user_ds], %%ax; mov %%ax, %%ds; pop %%rax \n\t"
823 		      "pushq %[user_ds] \n\t"
824 		      "pushq %[user_stack_top] \n\t"
825 		      "pushfq \n\t"
826 		      "pushq %[user_cs] \n\t"
827 		      "pushq $do_access \n\t"
828 		      "iretq \n"
829 		      "do_access: \n\t"
830 		      "cmp $0, %[fetch] \n\t"
831 		      "jnz 2f \n\t"
832 		      "cmp $0, %[write] \n\t"
833 		      "jnz 1f \n\t"
834 		      "cmp $0, %[fep] \n\t"
835 		      "jz 0f \n\t"
836 		      KVM_FEP
837 		      "0: mov (%[addr]), %[reg] \n\t"
838 		      "jmp done \n\t"
839 		      "1: cmp $0, %[fep] \n\t"
840 		      "jz 0f \n\t"
841 		      KVM_FEP
842 		      "0: mov %[reg], (%[addr]) \n\t"
843 		      "jmp done \n\t"
844 		      "2: call *%[addr] \n\t"
845 		      "done: \n"
846 		      "fixed1: \n"
847 		      "int %[kernel_entry_vector] \n\t"
848 		      ".section .text.entry \n\t"
849 		      "kernel_entry: \n\t"
850 		      "mov %[rsp0], %%rsp \n\t"
851 		      "jmp back_to_kernel \n\t"
852 		      ".section .text \n\t"
853 		      "back_to_kernel:"
854 		      : [reg]"+r"(r), "+a"(fault), "=b"(e), "=&d"(rsp),
855 			[rsp0]"=m"(tss[0].rsp0)
856 		      : [addr]"r"(at->virt),
857 			[write]"r"(F(AC_ACCESS_WRITE)),
858 			[user]"r"(F(AC_ACCESS_USER)),
859 			[fetch]"r"(F(AC_ACCESS_FETCH)),
860 			[fep]"r"(F(AC_FEP)),
861 			[user_ds]"i"(USER_DS),
862 			[user_cs]"i"(USER_CS),
863 			[user_stack_top]"r"(user_stack + sizeof user_stack),
864 			[kernel_entry_vector]"i"(0x20)
865 		      : "rsi");
866 
867 	asm volatile (".section .text.pf \n\t"
868 		      "page_fault: \n\t"
869 		      "pop %rbx \n\t"
870 		      "mov %rsi, (%rsp) \n\t"
871 		      "movl $1, %eax \n\t"
872 		      "iretq \n\t"
873 		      ".section .text");
874 
875 	ac_test_check(at, &success, fault && !at->expected_fault,
876 		      "unexpected fault");
877 	ac_test_check(at, &success, !fault && at->expected_fault,
878 		      "unexpected access");
879 	ac_test_check(at, &success, fault && e != at->expected_error,
880 		      "error code %x expected %x", e, at->expected_error);
881 	if (at->ptep)
882 		ac_test_check(at, &success, *at->ptep != at->expected_pte,
883 			      "pte %x expected %x", *at->ptep, at->expected_pte);
884 	ac_test_check(at, &success,
885 		      !pt_match(*at->pdep, at->expected_pde, at->ignore_pde),
886 		      "pde %x expected %x", *at->pdep, at->expected_pde);
887 
888 	if (success && verbose) {
889 		if (at->expected_fault) {
890 			printf("PASS (%x)\n", at->expected_error);
891 		} else {
892 			printf("PASS\n");
893 		}
894 	}
895 	return success;
896 }
897 
898 static void ac_test_show(ac_test_t *at)
899 {
900 	char line[5000];
901 
902 	*line = 0;
903 	strcat(line, "test");
904 	for (int i = 0; i < NR_AC_FLAGS; ++i)
905 		if (at->flags & (1 << i)) {
906 			strcat(line, " ");
907 			strcat(line, ac_names[i]);
908 		}
909 
910 	strcat(line, ": ");
911 	printf("%s", line);
912 }
913 
914 /*
915  * This test case is used to trigger the bug which is fixed by
916  * commit e09e90a5 in the kvm tree
917  */
918 static int corrupt_hugepage_trigger(ac_pt_env_t *pt_env)
919 {
920 	ac_test_t at1, at2;
921 
922 	ac_test_init(&at1, 0xffff923400000000ul, pt_env);
923 	__ac_test_init(&at2, 0xffffe66600000000ul, pt_env, &at1);
924 
925 	at2.flags = AC_CPU_CR0_WP_MASK | AC_PDE_PSE_MASK | AC_PDE_PRESENT_MASK;
926 	ac_test_setup_ptes(&at2);
927 	if (!ac_test_do_access(&at2))
928 		goto err;
929 
930 	at1.flags = at2.flags | AC_PDE_WRITABLE_MASK;
931 	ac_test_setup_ptes(&at1);
932 	if (!ac_test_do_access(&at1))
933 		goto err;
934 
935 	at1.flags |= AC_ACCESS_WRITE_MASK;
936 	ac_set_expected_status(&at1);
937 	if (!ac_test_do_access(&at1))
938 		goto err;
939 
940 	at2.flags |= AC_ACCESS_WRITE_MASK;
941 	ac_set_expected_status(&at2);
942 	if (!ac_test_do_access(&at2))
943 		goto err;
944 
945 	return 1;
946 
947 err:
948 	printf("corrupt_hugepage_trigger test fail\n");
949 	return 0;
950 }
951 
952 /*
953  * This test case is used to trigger the bug which is fixed by
954  * commit 3ddf6c06e13e in the kvm tree
955  */
956 static int check_pfec_on_prefetch_pte(ac_pt_env_t *pt_env)
957 {
958 	ac_test_t at1, at2;
959 
960 	ac_test_init(&at1, 0xffff923406001000ul, pt_env);
961 	__ac_test_init(&at2, 0xffff923406003000ul, pt_env, &at1);
962 
963 	at1.flags = AC_PDE_PRESENT_MASK | AC_PTE_PRESENT_MASK;
964 	ac_test_setup_ptes(&at1);
965 
966 	at2.flags = at1.flags | AC_PTE_NX_MASK;
967 	ac_test_setup_ptes(&at2);
968 
969 	if (!ac_test_do_access(&at1)) {
970 		printf("%s: prepare fail\n", __FUNCTION__);
971 			goto err;
972 	}
973 
974 	if (!ac_test_do_access(&at2)) {
975 		printf("%s: check PFEC on prefetch pte path fail\n",
976 		       __FUNCTION__);
977 		goto err;
978 	}
979 
980 	return 1;
981 
982 err:
983 	return 0;
984 }
985 
986 /*
987  * If the write-fault access is from supervisor and CR0.WP is not set on the
988  * vcpu, kvm will fix it by adjusting pte access - it sets the W bit on pte
989  * and clears U bit. This is the chance that kvm can change pte access from
990  * readonly to writable.
991  *
992  * Unfortunately, the pte access is the access of 'direct' shadow page table,
993  * means direct sp.role.access = pte_access, then we will create a writable
994  * spte entry on the readonly shadow page table. It will cause Dirty bit is
995  * not tracked when two guest ptes point to the same large page. Note, it
996  * does not have other impact except Dirty bit since cr0.wp is encoded into
997  * sp.role.
998  *
999  * Note: to trigger this bug, hugepage should be disabled on host.
1000  */
1001 static int check_large_pte_dirty_for_nowp(ac_pt_env_t *pt_env)
1002 {
1003 	ac_test_t at1, at2;
1004 
1005 	ac_test_init(&at1, 0xffff923403000000ul, pt_env);
1006 	__ac_test_init(&at2, 0xffffe66606000000ul, pt_env, &at1);
1007 
1008 	at2.flags = AC_PDE_PRESENT_MASK | AC_PDE_PSE_MASK;
1009 	ac_test_setup_ptes(&at2);
1010 	if (!ac_test_do_access(&at2)) {
1011 		printf("%s: read on the first mapping fail.\n", __FUNCTION__);
1012 		goto err;
1013 	}
1014 
1015 	at1.flags = at2.flags | AC_ACCESS_WRITE_MASK;
1016 	ac_test_setup_ptes(&at1);
1017 	if (!ac_test_do_access(&at1)) {
1018 		printf("%s: write on the second mapping fail.\n", __FUNCTION__);
1019 		goto err;
1020 	}
1021 
1022 	at2.flags |= AC_ACCESS_WRITE_MASK;
1023 	ac_set_expected_status(&at2);
1024 	if (!ac_test_do_access(&at2)) {
1025 		printf("%s: write on the first mapping fail.\n", __FUNCTION__);
1026 		goto err;
1027 	}
1028 
1029 	return 1;
1030 
1031 err:
1032 	return 0;
1033 }
1034 
1035 static int check_smep_andnot_wp(ac_pt_env_t *pt_env)
1036 {
1037 	ac_test_t at1;
1038 	int err_prepare_andnot_wp, err_smep_andnot_wp;
1039 
1040 	if (!this_cpu_has(X86_FEATURE_SMEP)) {
1041 		return 1;
1042 	}
1043 
1044 	ac_test_init(&at1, 0xffff923406001000ul, pt_env);
1045 
1046 	at1.flags = AC_PDE_PRESENT_MASK | AC_PTE_PRESENT_MASK |
1047 		    AC_PDE_USER_MASK | AC_PTE_USER_MASK |
1048 		    AC_PDE_ACCESSED_MASK | AC_PTE_ACCESSED_MASK |
1049 		    AC_CPU_CR4_SMEP_MASK |
1050 		    AC_CPU_CR0_WP_MASK |
1051 		    AC_ACCESS_WRITE_MASK;
1052 	ac_test_setup_ptes(&at1);
1053 
1054 	/*
1055 	 * Here we write the ro user page when
1056 	 * cr0.wp=0, then we execute it and SMEP
1057 	 * fault should happen.
1058 	 */
1059 	err_prepare_andnot_wp = ac_test_do_access(&at1);
1060 	if (!err_prepare_andnot_wp) {
1061 		printf("%s: SMEP prepare fail\n", __FUNCTION__);
1062 		goto clean_up;
1063 	}
1064 
1065 	at1.flags &= ~AC_ACCESS_WRITE_MASK;
1066 	at1.flags |= AC_ACCESS_FETCH_MASK;
1067 	ac_set_expected_status(&at1);
1068 	err_smep_andnot_wp = ac_test_do_access(&at1);
1069 
1070 clean_up:
1071 	set_cr4_smep(&at1, 0);
1072 
1073 	if (!err_prepare_andnot_wp)
1074 		goto err;
1075 	if (!err_smep_andnot_wp) {
1076 		printf("%s: check SMEP without wp fail\n", __FUNCTION__);
1077 		goto err;
1078 	}
1079 	return 1;
1080 
1081 err:
1082 	return 0;
1083 }
1084 
1085 #define TOGGLE_CR0_WP_TEST_BASE_FLAGS \
1086 	(AC_PDE_PRESENT_MASK | AC_PDE_ACCESSED_MASK | \
1087 	 AC_PTE_PRESENT_MASK | AC_PTE_ACCESSED_MASK | \
1088 	 AC_ACCESS_WRITE_MASK)
1089 
1090 static int do_cr0_wp_access(ac_test_t *at, int flags)
1091 {
1092 	const bool cr0_wp = !!(flags & AC_CPU_CR0_WP_MASK);
1093 
1094 	at->flags = TOGGLE_CR0_WP_TEST_BASE_FLAGS | flags;
1095 	__ac_set_expected_status(at, false);
1096 
1097 	/*
1098 	 * Under VMX the guest might own the CR0.WP bit, requiring KVM to
1099 	 * manually keep track of it where needed, e.g. in the guest page
1100 	 * table walker.
1101 	 *
1102 	 * Load CR0.WP with the inverse value of what will be used during
1103 	 * the access test and toggle EFER.NX to coerce KVM into rebuilding
1104 	 * the current MMU context based on the soon-to-be-stale CR0.WP.
1105 	 */
1106 	set_cr0_wp(!cr0_wp);
1107 	set_efer_nx(1);
1108 	set_efer_nx(0);
1109 
1110 	if (!ac_test_do_access(at)) {
1111 		printf("%s: %ssupervisor write with CR0.WP=%d did not %s\n",
1112 		       __FUNCTION__, (flags & AC_FEP_MASK) ? "emulated " : "",
1113 		       cr0_wp, cr0_wp ? "FAULT" : "SUCCEED");
1114 		return 1;
1115 	}
1116 
1117 	return 0;
1118 }
1119 
1120 static int check_toggle_cr0_wp(ac_pt_env_t *pt_env)
1121 {
1122 	ac_test_t at;
1123 	int err = 0;
1124 
1125 	ac_test_init(&at, 0xffff923042007000ul, pt_env);
1126 	at.flags = TOGGLE_CR0_WP_TEST_BASE_FLAGS;
1127 	ac_test_setup_ptes(&at);
1128 
1129 	err += do_cr0_wp_access(&at, 0);
1130 	err += do_cr0_wp_access(&at, AC_CPU_CR0_WP_MASK);
1131 	if (!(invalid_mask & AC_FEP_MASK)) {
1132 		err += do_cr0_wp_access(&at, AC_FEP_MASK);
1133 		err += do_cr0_wp_access(&at, AC_FEP_MASK | AC_CPU_CR0_WP_MASK);
1134 	}
1135 
1136 	return err == 0;
1137 }
1138 
1139 static int check_effective_sp_permissions(ac_pt_env_t *pt_env)
1140 {
1141 	unsigned long ptr1 = 0xffff923480000000;
1142 	unsigned long ptr2 = ptr1 + SZ_2M;
1143 	unsigned long ptr3 = ptr1 + SZ_1G;
1144 	unsigned long ptr4 = ptr3 + SZ_2M;
1145 	ac_test_t at1, at2, at3, at4;
1146 	int err_read_at1, err_write_at2;
1147 	int err_read_at3, err_write_at4;
1148 
1149 	/*
1150 	 * pgd[]   pud[]        pmd[]            virtual address pointers
1151 	 *                   /->pmd(u--)->pte1(uw-)->page1 <- ptr1 (u--)
1152 	 *      /->pud1(uw-)--->pmd(uw-)->pte2(uw-)->page2 <- ptr2 (uw-)
1153 	 * pgd-|
1154 	 *      \->pud2(u--)--->pmd(u--)->pte1(uw-)->page1 <- ptr3 (u--)
1155 	 *                   \->pmd(uw-)->pte2(uw-)->page2 <- ptr4 (u--)
1156 	 * pud1 and pud2 point to the same pmd page.
1157 	 */
1158 
1159 	ac_test_init(&at1, ptr1, pt_env);
1160 	at1.flags = AC_PDE_PRESENT_MASK | AC_PTE_PRESENT_MASK |
1161 		    AC_PDE_USER_MASK | AC_PTE_USER_MASK |
1162 		    AC_PDE_ACCESSED_MASK | AC_PTE_ACCESSED_MASK |
1163 		    AC_PTE_WRITABLE_MASK | AC_ACCESS_USER_MASK;
1164 	ac_test_setup_ptes(&at1);
1165 
1166 	__ac_test_init(&at2, ptr2, pt_env, &at1);
1167 	at2.flags = at1.flags | AC_PDE_WRITABLE_MASK | AC_PTE_DIRTY_MASK | AC_ACCESS_WRITE_MASK;
1168 	ac_test_setup_ptes(&at2);
1169 
1170 	__ac_test_init(&at3, ptr3, pt_env, &at1);
1171 	/* Override the PMD (1-based index) to point at ptr1's PMD. */
1172 	at3.page_tables[3] = at1.page_tables[3];
1173 	at3.flags = AC_PDPTE_NO_WRITABLE_MASK | at1.flags;
1174 	ac_test_setup_ptes(&at3);
1175 
1176 	/* Alias ptr2, only the PMD will differ; manually override the PMD. */
1177 	__ac_test_init(&at4, ptr4, pt_env, &at2);
1178 	at4.page_tables[3] = at1.page_tables[3];
1179 	at4.flags = AC_PDPTE_NO_WRITABLE_MASK | at2.flags;
1180 	ac_test_setup_ptes(&at4);
1181 
1182 	err_read_at1 = ac_test_do_access(&at1);
1183 	if (!err_read_at1) {
1184 		printf("%s: read access at1 fail\n", __FUNCTION__);
1185 		return 0;
1186 	}
1187 
1188 	err_write_at2 = ac_test_do_access(&at2);
1189 	if (!err_write_at2) {
1190 		printf("%s: write access at2 fail\n", __FUNCTION__);
1191 		return 0;
1192 	}
1193 
1194 	err_read_at3 = ac_test_do_access(&at3);
1195 	if (!err_read_at3) {
1196 		printf("%s: read access at3 fail\n", __FUNCTION__);
1197 		return 0;
1198 	}
1199 
1200 	err_write_at4 = ac_test_do_access(&at4);
1201 	if (!err_write_at4) {
1202 		printf("%s: write access at4 should fail\n", __FUNCTION__);
1203 		return 0;
1204 	}
1205 
1206 	return 1;
1207 }
1208 
1209 static int ac_test_exec(ac_test_t *at, ac_pt_env_t *pt_env)
1210 {
1211 	int r;
1212 
1213 	if (verbose) {
1214 		ac_test_show(at);
1215 	}
1216 	ac_test_setup_ptes(at);
1217 	r = ac_test_do_access(at);
1218 	return r;
1219 }
1220 
1221 typedef int (*ac_test_fn)(ac_pt_env_t *pt_env);
1222 const ac_test_fn ac_test_cases[] =
1223 {
1224 	corrupt_hugepage_trigger,
1225 	check_pfec_on_prefetch_pte,
1226 	check_large_pte_dirty_for_nowp,
1227 	check_smep_andnot_wp,
1228 	check_toggle_cr0_wp,
1229 	check_effective_sp_permissions,
1230 };
1231 
1232 void ac_test_run(int pt_levels, bool force_emulation)
1233 {
1234 	ac_test_t at;
1235 	ac_pt_env_t pt_env;
1236 	int i, tests, successes;
1237 
1238 	if (force_emulation && !is_fep_available()) {
1239 		report_skip("Forced emulation prefix (FEP) not available\n");
1240 		return;
1241 	}
1242 
1243 	printf("run\n");
1244 	tests = successes = 0;
1245 
1246 	shadow_cr0 = read_cr0();
1247 	shadow_cr4 = read_cr4();
1248 	shadow_cr3 = read_cr3();
1249 	shadow_efer = rdmsr(MSR_EFER);
1250 
1251 	if (cpuid_maxphyaddr() >= 52) {
1252 		invalid_mask |= AC_PDE_BIT51_MASK;
1253 		invalid_mask |= AC_PTE_BIT51_MASK;
1254 	}
1255 	if (cpuid_maxphyaddr() >= 37) {
1256 		invalid_mask |= AC_PDE_BIT36_MASK;
1257 		invalid_mask |= AC_PTE_BIT36_MASK;
1258 	}
1259 
1260 	if (!force_emulation)
1261 		invalid_mask |= AC_FEP_MASK;
1262 
1263 	ac_env_int(&pt_env, pt_levels);
1264 	ac_test_init(&at, 0xffff923400000000ul, &pt_env);
1265 
1266 	if (this_cpu_has(X86_FEATURE_PKU)) {
1267 		set_cr4_pke(1);
1268 		set_cr4_pke(0);
1269 		/* Now PKRU = 0xFFFFFFFF.  */
1270 	} else {
1271 		tests++;
1272 		if (write_cr4_safe(shadow_cr4 | X86_CR4_PKE) == GP_VECTOR) {
1273 			successes++;
1274 			invalid_mask |= AC_PKU_AD_MASK;
1275 			invalid_mask |= AC_PKU_WD_MASK;
1276 			invalid_mask |= AC_PKU_PKEY_MASK;
1277 			invalid_mask |= AC_CPU_CR4_PKE_MASK;
1278 			printf("CR4.PKE not available, disabling PKE tests\n");
1279 		} else {
1280 			printf("Set PKE in CR4 - expect #GP: FAIL!\n");
1281 			set_cr4_pke(0);
1282 		}
1283 	}
1284 
1285 	if (!this_cpu_has(X86_FEATURE_SMEP)) {
1286 		tests++;
1287 		if (set_cr4_smep(&at, 1) == GP_VECTOR) {
1288 			successes++;
1289 			invalid_mask |= AC_CPU_CR4_SMEP_MASK;
1290 			printf("CR4.SMEP not available, disabling SMEP tests\n");
1291 		} else {
1292 			printf("Set SMEP in CR4 - expect #GP: FAIL!\n");
1293 			set_cr4_smep(&at, 0);
1294 		}
1295 	}
1296 
1297 	/* Toggling LA57 in 64-bit mode (guaranteed for this test) is illegal. */
1298 	if (this_cpu_has(X86_FEATURE_LA57)) {
1299 		tests++;
1300 		if (write_cr4_safe(shadow_cr4 ^ X86_CR4_LA57) == GP_VECTOR)
1301 			successes++;
1302 
1303 		/* Force a VM-Exit on KVM, which doesn't intercept LA57 itself. */
1304 		tests++;
1305 		if (write_cr4_safe(shadow_cr4 ^ (X86_CR4_LA57 | X86_CR4_PSE)) == GP_VECTOR)
1306 			successes++;
1307 	}
1308 
1309 	do {
1310 		++tests;
1311 		successes += ac_test_exec(&at, &pt_env);
1312 	} while (ac_test_bump(&at));
1313 
1314 	for (i = 0; i < ARRAY_SIZE(ac_test_cases); i++) {
1315 		ac_env_int(&pt_env, pt_levels);
1316 
1317 		++tests;
1318 		successes += ac_test_cases[i](&pt_env);
1319 	}
1320 
1321 	printf("\n%d tests, %d failures\n", tests, tests - successes);
1322 
1323 	report(successes == tests, "%d-level paging tests%s", pt_levels,
1324 	       force_emulation ? " (with forced emulation)" : "");
1325 }
1326