xref: /kvm-unit-tests/x86/vmx.c (revision d74708246bd9a593e03ecca476a5f1ed36e47288)
1 /*
2  * x86/vmx.c : Framework for testing nested virtualization
3  *	This is a framework to test nested VMX for KVM, which
4  * 	started as a project of GSoC 2013. All test cases should
5  *	be located in x86/vmx_tests.c and framework related
6  *	functions should be in this file.
7  *
8  * How to write test cases?
9  *	Add callbacks of test suite in variant "vmx_tests". You can
10  *	write:
11  *		1. init function used for initializing test suite
12  *		2. main function for codes running in L2 guest,
13  *		3. exit_handler to handle vmexit of L2 to L1
14  *		4. syscall handler to handle L2 syscall vmexit
15  *		5. vmenter fail handler to handle direct failure of vmenter
16  *		6. guest_regs is loaded when vmenter and saved when
17  *			vmexit, you can read and set it in exit_handler
18  *	If no special function is needed for a test suite, use
19  *	coressponding basic_* functions as callback. More handlers
20  *	can be added to "vmx_tests", see details of "struct vmx_test"
21  *	and function test_run().
22  *
23  * Currently, vmx test framework only set up one VCPU and one
24  * concurrent guest test environment with same paging for L2 and
25  * L1. For usage of EPT, only 1:1 mapped paging is used from VFN
26  * to PFN.
27  *
28  * Author : Arthur Chunqi Li <yzt356@gmail.com>
29  */
30 
31 #include "libcflat.h"
32 #include "processor.h"
33 #include "alloc_page.h"
34 #include "vm.h"
35 #include "vmalloc.h"
36 #include "desc.h"
37 #include "vmx.h"
38 #include "msr.h"
39 #include "smp.h"
40 #include "apic.h"
41 
42 u64 *bsp_vmxon_region;
43 struct vmcs *vmcs_root;
44 u32 vpid_cnt;
45 void *guest_stack, *guest_syscall_stack;
46 u32 ctrl_pin, ctrl_enter, ctrl_exit, ctrl_cpu[2];
47 struct regs regs;
48 
49 struct vmx_test *current;
50 
51 #define MAX_TEST_TEARDOWN_STEPS 10
52 
53 struct test_teardown_step {
54 	test_teardown_func func;
55 	void *data;
56 };
57 
58 static int teardown_count;
59 static struct test_teardown_step teardown_steps[MAX_TEST_TEARDOWN_STEPS];
60 
61 static test_guest_func v2_guest_main;
62 
63 u64 hypercall_field;
64 bool launched;
65 static int matched;
66 static int guest_finished;
67 static int in_guest;
68 
69 union vmx_basic basic;
70 union vmx_ctrl_msr ctrl_pin_rev;
71 union vmx_ctrl_msr ctrl_cpu_rev[2];
72 union vmx_ctrl_msr ctrl_exit_rev;
73 union vmx_ctrl_msr ctrl_enter_rev;
74 union vmx_ept_vpid  ept_vpid;
75 
76 extern struct descriptor_table_ptr gdt64_desc;
77 extern struct descriptor_table_ptr idt_descr;
78 extern struct descriptor_table_ptr tss_descr;
79 extern void *vmx_return;
80 extern void *entry_sysenter;
81 extern void *guest_entry;
82 
83 static volatile u32 stage;
84 
85 static jmp_buf abort_target;
86 
87 struct vmcs_field {
88 	u64 mask;
89 	u64 encoding;
90 };
91 
92 #define MASK(_bits) GENMASK_ULL((_bits) - 1, 0)
93 #define MASK_NATURAL MASK(sizeof(unsigned long) * 8)
94 
95 static struct vmcs_field vmcs_fields[] = {
96 	{ MASK(16), VPID },
97 	{ MASK(16), PINV },
98 	{ MASK(16), EPTP_IDX },
99 
100 	{ MASK(16), GUEST_SEL_ES },
101 	{ MASK(16), GUEST_SEL_CS },
102 	{ MASK(16), GUEST_SEL_SS },
103 	{ MASK(16), GUEST_SEL_DS },
104 	{ MASK(16), GUEST_SEL_FS },
105 	{ MASK(16), GUEST_SEL_GS },
106 	{ MASK(16), GUEST_SEL_LDTR },
107 	{ MASK(16), GUEST_SEL_TR },
108 	{ MASK(16), GUEST_INT_STATUS },
109 
110 	{ MASK(16), HOST_SEL_ES },
111 	{ MASK(16), HOST_SEL_CS },
112 	{ MASK(16), HOST_SEL_SS },
113 	{ MASK(16), HOST_SEL_DS },
114 	{ MASK(16), HOST_SEL_FS },
115 	{ MASK(16), HOST_SEL_GS },
116 	{ MASK(16), HOST_SEL_TR },
117 
118 	{ MASK(64), IO_BITMAP_A },
119 	{ MASK(64), IO_BITMAP_B },
120 	{ MASK(64), MSR_BITMAP },
121 	{ MASK(64), EXIT_MSR_ST_ADDR },
122 	{ MASK(64), EXIT_MSR_LD_ADDR },
123 	{ MASK(64), ENTER_MSR_LD_ADDR },
124 	{ MASK(64), VMCS_EXEC_PTR },
125 	{ MASK(64), TSC_OFFSET },
126 	{ MASK(64), APIC_VIRT_ADDR },
127 	{ MASK(64), APIC_ACCS_ADDR },
128 	{ MASK(64), EPTP },
129 
130 	{ MASK(64), INFO_PHYS_ADDR },
131 
132 	{ MASK(64), VMCS_LINK_PTR },
133 	{ MASK(64), GUEST_DEBUGCTL },
134 	{ MASK(64), GUEST_EFER },
135 	{ MASK(64), GUEST_PAT },
136 	{ MASK(64), GUEST_PERF_GLOBAL_CTRL },
137 	{ MASK(64), GUEST_PDPTE },
138 
139 	{ MASK(64), HOST_PAT },
140 	{ MASK(64), HOST_EFER },
141 	{ MASK(64), HOST_PERF_GLOBAL_CTRL },
142 
143 	{ MASK(32), PIN_CONTROLS },
144 	{ MASK(32), CPU_EXEC_CTRL0 },
145 	{ MASK(32), EXC_BITMAP },
146 	{ MASK(32), PF_ERROR_MASK },
147 	{ MASK(32), PF_ERROR_MATCH },
148 	{ MASK(32), CR3_TARGET_COUNT },
149 	{ MASK(32), EXI_CONTROLS },
150 	{ MASK(32), EXI_MSR_ST_CNT },
151 	{ MASK(32), EXI_MSR_LD_CNT },
152 	{ MASK(32), ENT_CONTROLS },
153 	{ MASK(32), ENT_MSR_LD_CNT },
154 	{ MASK(32), ENT_INTR_INFO },
155 	{ MASK(32), ENT_INTR_ERROR },
156 	{ MASK(32), ENT_INST_LEN },
157 	{ MASK(32), TPR_THRESHOLD },
158 	{ MASK(32), CPU_EXEC_CTRL1 },
159 
160 	{ MASK(32), VMX_INST_ERROR },
161 	{ MASK(32), EXI_REASON },
162 	{ MASK(32), EXI_INTR_INFO },
163 	{ MASK(32), EXI_INTR_ERROR },
164 	{ MASK(32), IDT_VECT_INFO },
165 	{ MASK(32), IDT_VECT_ERROR },
166 	{ MASK(32), EXI_INST_LEN },
167 	{ MASK(32), EXI_INST_INFO },
168 
169 	{ MASK(32), GUEST_LIMIT_ES },
170 	{ MASK(32), GUEST_LIMIT_CS },
171 	{ MASK(32), GUEST_LIMIT_SS },
172 	{ MASK(32), GUEST_LIMIT_DS },
173 	{ MASK(32), GUEST_LIMIT_FS },
174 	{ MASK(32), GUEST_LIMIT_GS },
175 	{ MASK(32), GUEST_LIMIT_LDTR },
176 	{ MASK(32), GUEST_LIMIT_TR },
177 	{ MASK(32), GUEST_LIMIT_GDTR },
178 	{ MASK(32), GUEST_LIMIT_IDTR },
179 	{ 0x1d0ff, GUEST_AR_ES },
180 	{ 0x1f0ff, GUEST_AR_CS },
181 	{ 0x1d0ff, GUEST_AR_SS },
182 	{ 0x1d0ff, GUEST_AR_DS },
183 	{ 0x1d0ff, GUEST_AR_FS },
184 	{ 0x1d0ff, GUEST_AR_GS },
185 	{ 0x1d0ff, GUEST_AR_LDTR },
186 	{ 0x1d0ff, GUEST_AR_TR },
187 	{ MASK(32), GUEST_INTR_STATE },
188 	{ MASK(32), GUEST_ACTV_STATE },
189 	{ MASK(32), GUEST_SMBASE },
190 	{ MASK(32), GUEST_SYSENTER_CS },
191 	{ MASK(32), PREEMPT_TIMER_VALUE },
192 
193 	{ MASK(32), HOST_SYSENTER_CS },
194 
195 	{ MASK_NATURAL, CR0_MASK },
196 	{ MASK_NATURAL, CR4_MASK },
197 	{ MASK_NATURAL, CR0_READ_SHADOW },
198 	{ MASK_NATURAL, CR4_READ_SHADOW },
199 	{ MASK_NATURAL, CR3_TARGET_0 },
200 	{ MASK_NATURAL, CR3_TARGET_1 },
201 	{ MASK_NATURAL, CR3_TARGET_2 },
202 	{ MASK_NATURAL, CR3_TARGET_3 },
203 
204 	{ MASK_NATURAL, EXI_QUALIFICATION },
205 	{ MASK_NATURAL, IO_RCX },
206 	{ MASK_NATURAL, IO_RSI },
207 	{ MASK_NATURAL, IO_RDI },
208 	{ MASK_NATURAL, IO_RIP },
209 	{ MASK_NATURAL, GUEST_LINEAR_ADDRESS },
210 
211 	{ MASK_NATURAL, GUEST_CR0 },
212 	{ MASK_NATURAL, GUEST_CR3 },
213 	{ MASK_NATURAL, GUEST_CR4 },
214 	{ MASK_NATURAL, GUEST_BASE_ES },
215 	{ MASK_NATURAL, GUEST_BASE_CS },
216 	{ MASK_NATURAL, GUEST_BASE_SS },
217 	{ MASK_NATURAL, GUEST_BASE_DS },
218 	{ MASK_NATURAL, GUEST_BASE_FS },
219 	{ MASK_NATURAL, GUEST_BASE_GS },
220 	{ MASK_NATURAL, GUEST_BASE_LDTR },
221 	{ MASK_NATURAL, GUEST_BASE_TR },
222 	{ MASK_NATURAL, GUEST_BASE_GDTR },
223 	{ MASK_NATURAL, GUEST_BASE_IDTR },
224 	{ MASK_NATURAL, GUEST_DR7 },
225 	{ MASK_NATURAL, GUEST_RSP },
226 	{ MASK_NATURAL, GUEST_RIP },
227 	{ MASK_NATURAL, GUEST_RFLAGS },
228 	{ MASK_NATURAL, GUEST_PENDING_DEBUG },
229 	{ MASK_NATURAL, GUEST_SYSENTER_ESP },
230 	{ MASK_NATURAL, GUEST_SYSENTER_EIP },
231 
232 	{ MASK_NATURAL, HOST_CR0 },
233 	{ MASK_NATURAL, HOST_CR3 },
234 	{ MASK_NATURAL, HOST_CR4 },
235 	{ MASK_NATURAL, HOST_BASE_FS },
236 	{ MASK_NATURAL, HOST_BASE_GS },
237 	{ MASK_NATURAL, HOST_BASE_TR },
238 	{ MASK_NATURAL, HOST_BASE_GDTR },
239 	{ MASK_NATURAL, HOST_BASE_IDTR },
240 	{ MASK_NATURAL, HOST_SYSENTER_ESP },
241 	{ MASK_NATURAL, HOST_SYSENTER_EIP },
242 	{ MASK_NATURAL, HOST_RSP },
243 	{ MASK_NATURAL, HOST_RIP },
244 };
245 
246 enum vmcs_field_type {
247 	VMCS_FIELD_TYPE_CONTROL = 0,
248 	VMCS_FIELD_TYPE_READ_ONLY_DATA = 1,
249 	VMCS_FIELD_TYPE_GUEST = 2,
250 	VMCS_FIELD_TYPE_HOST = 3,
251 	VMCS_FIELD_TYPES,
252 };
253 
254 static inline int vmcs_field_type(struct vmcs_field *f)
255 {
256 	return (f->encoding >> VMCS_FIELD_TYPE_SHIFT) & 0x3;
257 }
258 
259 static int vmcs_field_readonly(struct vmcs_field *f)
260 {
261 	u64 ia32_vmx_misc;
262 
263 	ia32_vmx_misc = rdmsr(MSR_IA32_VMX_MISC);
264 	return !(ia32_vmx_misc & MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS) &&
265 		(vmcs_field_type(f) == VMCS_FIELD_TYPE_READ_ONLY_DATA);
266 }
267 
268 static inline u64 vmcs_field_value(struct vmcs_field *f, u8 cookie)
269 {
270 	u64 value;
271 
272 	/* Incorporate the cookie and the field encoding into the value. */
273 	value = cookie;
274 	value |= (f->encoding << 8);
275 	value |= 0xdeadbeefull << 32;
276 
277 	return value & f->mask;
278 }
279 
280 static void set_vmcs_field(struct vmcs_field *f, u8 cookie)
281 {
282 	vmcs_write(f->encoding, vmcs_field_value(f, cookie));
283 }
284 
285 static bool check_vmcs_field(struct vmcs_field *f, u8 cookie)
286 {
287 	u64 expected;
288 	u64 actual;
289 	int ret;
290 
291 	if (f->encoding == VMX_INST_ERROR) {
292 		printf("Skipping volatile field %lx\n", f->encoding);
293 		return true;
294 	}
295 
296 	ret = vmcs_read_checking(f->encoding, &actual);
297 	assert(!(ret & X86_EFLAGS_CF));
298 	/* Skip VMCS fields that aren't recognized by the CPU */
299 	if (ret & X86_EFLAGS_ZF)
300 		return true;
301 
302 	if (vmcs_field_readonly(f)) {
303 		printf("Skipping read-only field %lx\n", f->encoding);
304 		return true;
305 	}
306 
307 	expected = vmcs_field_value(f, cookie);
308 	actual &= f->mask;
309 
310 	if (expected == actual)
311 		return true;
312 
313 	printf("FAIL: VMWRITE/VMREAD %lx (expected: %lx, actual: %lx)\n",
314 	       f->encoding, (unsigned long) expected, (unsigned long) actual);
315 
316 	return false;
317 }
318 
319 static void set_all_vmcs_fields(u8 cookie)
320 {
321 	int i;
322 
323 	for (i = 0; i < ARRAY_SIZE(vmcs_fields); i++)
324 		set_vmcs_field(&vmcs_fields[i], cookie);
325 }
326 
327 static bool check_all_vmcs_fields(u8 cookie)
328 {
329 	bool pass = true;
330 	int i;
331 
332 	for (i = 0; i < ARRAY_SIZE(vmcs_fields); i++) {
333 		if (!check_vmcs_field(&vmcs_fields[i], cookie))
334 			pass = false;
335 	}
336 
337 	return pass;
338 }
339 
340 static u32 find_vmcs_max_index(void)
341 {
342 	u32 idx, width, type, enc;
343 	u64 actual;
344 	int ret;
345 
346 	/* scan backwards and stop when found */
347 	for (idx = (1 << 9) - 1; idx >= 0; idx--) {
348 
349 		/* try all combinations of width and type */
350 		for (type = 0; type < (1 << 2); type++) {
351 			for (width = 0; width < (1 << 2) ; width++) {
352 				enc = (idx << VMCS_FIELD_INDEX_SHIFT) |
353 				      (type << VMCS_FIELD_TYPE_SHIFT) |
354 				      (width << VMCS_FIELD_WIDTH_SHIFT);
355 
356 				ret = vmcs_read_checking(enc, &actual);
357 				assert(!(ret & X86_EFLAGS_CF));
358 				if (!(ret & X86_EFLAGS_ZF))
359 					return idx;
360 			}
361 		}
362 	}
363 	/* some VMCS fields should exist */
364 	assert(0);
365 	return 0;
366 }
367 
368 static void test_vmwrite_vmread(void)
369 {
370 	struct vmcs *vmcs = alloc_page();
371 	u32 vmcs_enum_max, max_index = 0;
372 
373 	vmcs->hdr.revision_id = basic.revision;
374 	assert(!vmcs_clear(vmcs));
375 	assert(!make_vmcs_current(vmcs));
376 
377 	set_all_vmcs_fields(0x42);
378 	report(check_all_vmcs_fields(0x42), "VMWRITE/VMREAD");
379 
380 	vmcs_enum_max = (rdmsr(MSR_IA32_VMX_VMCS_ENUM) & VMCS_FIELD_INDEX_MASK)
381 			>> VMCS_FIELD_INDEX_SHIFT;
382 	max_index = find_vmcs_max_index();
383 	report(vmcs_enum_max == max_index,
384 	       "VMX_VMCS_ENUM.MAX_INDEX expected: %x, actual: %x",
385 	       max_index, vmcs_enum_max);
386 
387 	assert(!vmcs_clear(vmcs));
388 	free_page(vmcs);
389 }
390 
391 ulong finish_fault;
392 u8 sentinel;
393 bool handler_called;
394 
395 static void pf_handler(struct ex_regs *regs)
396 {
397 	/*
398 	 * check that RIP was not improperly advanced and that the
399 	 * flags value was preserved.
400 	 */
401 	report(regs->rip < finish_fault, "RIP has not been advanced!");
402 	report(((u8)regs->rflags == ((sentinel | 2) & 0xd7)),
403 	       "The low byte of RFLAGS was preserved!");
404 	regs->rip = finish_fault;
405 	handler_called = true;
406 
407 }
408 
409 static void prep_flags_test_env(void **vpage, struct vmcs **vmcs, handler *old)
410 {
411 	/*
412 	 * get an unbacked address that will cause a #PF
413 	 */
414 	*vpage = alloc_vpage();
415 
416 	/*
417 	 * set up VMCS so we have something to read from
418 	 */
419 	*vmcs = alloc_page();
420 
421 	memset(*vmcs, 0, PAGE_SIZE);
422 	(*vmcs)->hdr.revision_id = basic.revision;
423 	assert(!vmcs_clear(*vmcs));
424 	assert(!make_vmcs_current(*vmcs));
425 
426 	*old = handle_exception(PF_VECTOR, &pf_handler);
427 }
428 
429 static void test_read_sentinel(void)
430 {
431 	void *vpage;
432 	struct vmcs *vmcs;
433 	handler old;
434 
435 	prep_flags_test_env(&vpage, &vmcs, &old);
436 
437 	/*
438 	 * set the proper label
439 	 */
440 	extern char finish_read_fault;
441 
442 	finish_fault = (ulong)&finish_read_fault;
443 
444 	/*
445 	 * execute the vmread instruction that will cause a #PF
446 	 */
447 	handler_called = false;
448 	asm volatile ("movb %[byte], %%ah\n\t"
449 		      "sahf\n\t"
450 		      "vmread %[enc], %[val]; finish_read_fault:"
451 		      : [val] "=m" (*(u64 *)vpage)
452 		      : [byte] "Krm" (sentinel),
453 		      [enc] "r" ((u64)GUEST_SEL_SS)
454 		      : "cc", "ah");
455 	report(handler_called, "The #PF handler was invoked");
456 
457 	/*
458 	 * restore the old #PF handler
459 	 */
460 	handle_exception(PF_VECTOR, old);
461 }
462 
463 static void test_vmread_flags_touch(void)
464 {
465 	/*
466 	 * set up the sentinel value in the flags register. we
467 	 * choose these two values because they candy-stripe
468 	 * the 5 flags that sahf sets.
469 	 */
470 	sentinel = 0x91;
471 	test_read_sentinel();
472 
473 	sentinel = 0x45;
474 	test_read_sentinel();
475 }
476 
477 static void test_write_sentinel(void)
478 {
479 	void *vpage;
480 	struct vmcs *vmcs;
481 	handler old;
482 
483 	prep_flags_test_env(&vpage, &vmcs, &old);
484 
485 	/*
486 	 * set the proper label
487 	 */
488 	extern char finish_write_fault;
489 
490 	finish_fault = (ulong)&finish_write_fault;
491 
492 	/*
493 	 * execute the vmwrite instruction that will cause a #PF
494 	 */
495 	handler_called = false;
496 	asm volatile ("movb %[byte], %%ah\n\t"
497 		      "sahf\n\t"
498 		      "vmwrite %[val], %[enc]; finish_write_fault:"
499 		      : [val] "=m" (*(u64 *)vpage)
500 		      : [byte] "Krm" (sentinel),
501 		      [enc] "r" ((u64)GUEST_SEL_SS)
502 		      : "cc", "ah");
503 	report(handler_called, "The #PF handler was invoked");
504 
505 	/*
506 	 * restore the old #PF handler
507 	 */
508 	handle_exception(PF_VECTOR, old);
509 }
510 
511 static void test_vmwrite_flags_touch(void)
512 {
513 	/*
514 	 * set up the sentinel value in the flags register. we
515 	 * choose these two values because they candy-stripe
516 	 * the 5 flags that sahf sets.
517 	 */
518 	sentinel = 0x91;
519 	test_write_sentinel();
520 
521 	sentinel = 0x45;
522 	test_write_sentinel();
523 }
524 
525 
526 static void test_vmcs_high(void)
527 {
528 	struct vmcs *vmcs = alloc_page();
529 
530 	vmcs->hdr.revision_id = basic.revision;
531 	assert(!vmcs_clear(vmcs));
532 	assert(!make_vmcs_current(vmcs));
533 
534 	vmcs_write(TSC_OFFSET, 0x0123456789ABCDEFull);
535 	report(vmcs_read(TSC_OFFSET) == 0x0123456789ABCDEFull,
536 	       "VMREAD TSC_OFFSET after VMWRITE TSC_OFFSET");
537 	report(vmcs_read(TSC_OFFSET_HI) == 0x01234567ull,
538 	       "VMREAD TSC_OFFSET_HI after VMWRITE TSC_OFFSET");
539 	vmcs_write(TSC_OFFSET_HI, 0x76543210ul);
540 	report(vmcs_read(TSC_OFFSET_HI) == 0x76543210ul,
541 	       "VMREAD TSC_OFFSET_HI after VMWRITE TSC_OFFSET_HI");
542 	report(vmcs_read(TSC_OFFSET) == 0x7654321089ABCDEFull,
543 	       "VMREAD TSC_OFFSET after VMWRITE TSC_OFFSET_HI");
544 
545 	assert(!vmcs_clear(vmcs));
546 	free_page(vmcs);
547 }
548 
549 static void test_vmcs_lifecycle(void)
550 {
551 	struct vmcs *vmcs[2] = {};
552 	int i;
553 
554 	for (i = 0; i < ARRAY_SIZE(vmcs); i++) {
555 		vmcs[i] = alloc_page();
556 		vmcs[i]->hdr.revision_id = basic.revision;
557 	}
558 
559 #define VMPTRLD(_i) do { \
560 	assert(_i < ARRAY_SIZE(vmcs)); \
561 	assert(!make_vmcs_current(vmcs[_i])); \
562 	printf("VMPTRLD VMCS%d\n", (_i)); \
563 } while (0)
564 
565 #define VMCLEAR(_i) do { \
566 	assert(_i < ARRAY_SIZE(vmcs)); \
567 	assert(!vmcs_clear(vmcs[_i])); \
568 	printf("VMCLEAR VMCS%d\n", (_i)); \
569 } while (0)
570 
571 	VMCLEAR(0);
572 	VMPTRLD(0);
573 	set_all_vmcs_fields(0);
574 	report(check_all_vmcs_fields(0), "current:VMCS0 active:[VMCS0]");
575 
576 	VMCLEAR(0);
577 	VMPTRLD(0);
578 	report(check_all_vmcs_fields(0), "current:VMCS0 active:[VMCS0]");
579 
580 	VMCLEAR(1);
581 	report(check_all_vmcs_fields(0), "current:VMCS0 active:[VMCS0]");
582 
583 	VMPTRLD(1);
584 	set_all_vmcs_fields(1);
585 	report(check_all_vmcs_fields(1), "current:VMCS1 active:[VMCS0,VCMS1]");
586 
587 	VMPTRLD(0);
588 	report(check_all_vmcs_fields(0), "current:VMCS0 active:[VMCS0,VCMS1]");
589 	VMPTRLD(1);
590 	report(check_all_vmcs_fields(1), "current:VMCS1 active:[VMCS0,VCMS1]");
591 	VMPTRLD(1);
592 	report(check_all_vmcs_fields(1), "current:VMCS1 active:[VMCS0,VCMS1]");
593 
594 	VMCLEAR(0);
595 	report(check_all_vmcs_fields(1), "current:VMCS1 active:[VCMS1]");
596 
597 	/* VMPTRLD should not erase VMWRITEs to the current VMCS */
598 	set_all_vmcs_fields(2);
599 	VMPTRLD(1);
600 	report(check_all_vmcs_fields(2), "current:VMCS1 active:[VCMS1]");
601 
602 	for (i = 0; i < ARRAY_SIZE(vmcs); i++) {
603 		VMCLEAR(i);
604 		free_page(vmcs[i]);
605 	}
606 
607 #undef VMPTRLD
608 #undef VMCLEAR
609 }
610 
611 void vmx_set_test_stage(u32 s)
612 {
613 	barrier();
614 	stage = s;
615 	barrier();
616 }
617 
618 u32 vmx_get_test_stage(void)
619 {
620 	u32 s;
621 
622 	barrier();
623 	s = stage;
624 	barrier();
625 	return s;
626 }
627 
628 void vmx_inc_test_stage(void)
629 {
630 	barrier();
631 	stage++;
632 	barrier();
633 }
634 
635 /* entry_sysenter */
636 asm(
637 	".align	4, 0x90\n\t"
638 	".globl	entry_sysenter\n\t"
639 	"entry_sysenter:\n\t"
640 	SAVE_GPR
641 	"	and	$0xf, %rax\n\t"
642 	"	mov	%rax, %rdi\n\t"
643 	"	call	syscall_handler\n\t"
644 	LOAD_GPR
645 	"	vmresume\n\t"
646 );
647 
648 static void __attribute__((__used__)) syscall_handler(u64 syscall_no)
649 {
650 	if (current->syscall_handler)
651 		current->syscall_handler(syscall_no);
652 }
653 
654 static const char * const exit_reason_descriptions[] = {
655 	[VMX_EXC_NMI]		= "VMX_EXC_NMI",
656 	[VMX_EXTINT]		= "VMX_EXTINT",
657 	[VMX_TRIPLE_FAULT]	= "VMX_TRIPLE_FAULT",
658 	[VMX_INIT]		= "VMX_INIT",
659 	[VMX_SIPI]		= "VMX_SIPI",
660 	[VMX_SMI_IO]		= "VMX_SMI_IO",
661 	[VMX_SMI_OTHER]		= "VMX_SMI_OTHER",
662 	[VMX_INTR_WINDOW]	= "VMX_INTR_WINDOW",
663 	[VMX_NMI_WINDOW]	= "VMX_NMI_WINDOW",
664 	[VMX_TASK_SWITCH]	= "VMX_TASK_SWITCH",
665 	[VMX_CPUID]		= "VMX_CPUID",
666 	[VMX_GETSEC]		= "VMX_GETSEC",
667 	[VMX_HLT]		= "VMX_HLT",
668 	[VMX_INVD]		= "VMX_INVD",
669 	[VMX_INVLPG]		= "VMX_INVLPG",
670 	[VMX_RDPMC]		= "VMX_RDPMC",
671 	[VMX_RDTSC]		= "VMX_RDTSC",
672 	[VMX_RSM]		= "VMX_RSM",
673 	[VMX_VMCALL]		= "VMX_VMCALL",
674 	[VMX_VMCLEAR]		= "VMX_VMCLEAR",
675 	[VMX_VMLAUNCH]		= "VMX_VMLAUNCH",
676 	[VMX_VMPTRLD]		= "VMX_VMPTRLD",
677 	[VMX_VMPTRST]		= "VMX_VMPTRST",
678 	[VMX_VMREAD]		= "VMX_VMREAD",
679 	[VMX_VMRESUME]		= "VMX_VMRESUME",
680 	[VMX_VMWRITE]		= "VMX_VMWRITE",
681 	[VMX_VMXOFF]		= "VMX_VMXOFF",
682 	[VMX_VMXON]		= "VMX_VMXON",
683 	[VMX_CR]		= "VMX_CR",
684 	[VMX_DR]		= "VMX_DR",
685 	[VMX_IO]		= "VMX_IO",
686 	[VMX_RDMSR]		= "VMX_RDMSR",
687 	[VMX_WRMSR]		= "VMX_WRMSR",
688 	[VMX_FAIL_STATE]	= "VMX_FAIL_STATE",
689 	[VMX_FAIL_MSR]		= "VMX_FAIL_MSR",
690 	[VMX_MWAIT]		= "VMX_MWAIT",
691 	[VMX_MTF]		= "VMX_MTF",
692 	[VMX_MONITOR]		= "VMX_MONITOR",
693 	[VMX_PAUSE]		= "VMX_PAUSE",
694 	[VMX_FAIL_MCHECK]	= "VMX_FAIL_MCHECK",
695 	[VMX_TPR_THRESHOLD]	= "VMX_TPR_THRESHOLD",
696 	[VMX_APIC_ACCESS]	= "VMX_APIC_ACCESS",
697 	[VMX_EOI_INDUCED]	= "VMX_EOI_INDUCED",
698 	[VMX_GDTR_IDTR]		= "VMX_GDTR_IDTR",
699 	[VMX_LDTR_TR]		= "VMX_LDTR_TR",
700 	[VMX_EPT_VIOLATION]	= "VMX_EPT_VIOLATION",
701 	[VMX_EPT_MISCONFIG]	= "VMX_EPT_MISCONFIG",
702 	[VMX_INVEPT]		= "VMX_INVEPT",
703 	[VMX_PREEMPT]		= "VMX_PREEMPT",
704 	[VMX_INVVPID]		= "VMX_INVVPID",
705 	[VMX_WBINVD]		= "VMX_WBINVD",
706 	[VMX_XSETBV]		= "VMX_XSETBV",
707 	[VMX_APIC_WRITE]	= "VMX_APIC_WRITE",
708 	[VMX_RDRAND]		= "VMX_RDRAND",
709 	[VMX_INVPCID]		= "VMX_INVPCID",
710 	[VMX_VMFUNC]		= "VMX_VMFUNC",
711 	[VMX_RDSEED]		= "VMX_RDSEED",
712 	[VMX_PML_FULL]		= "VMX_PML_FULL",
713 	[VMX_XSAVES]		= "VMX_XSAVES",
714 	[VMX_XRSTORS]		= "VMX_XRSTORS",
715 };
716 
717 const char *exit_reason_description(u64 reason)
718 {
719 	if (reason >= ARRAY_SIZE(exit_reason_descriptions))
720 		return "(unknown)";
721 	return exit_reason_descriptions[reason] ? : "(unused)";
722 }
723 
724 void print_vmexit_info(union exit_reason exit_reason)
725 {
726 	u64 guest_rip, guest_rsp;
727 	ulong exit_qual = vmcs_read(EXI_QUALIFICATION);
728 	guest_rip = vmcs_read(GUEST_RIP);
729 	guest_rsp = vmcs_read(GUEST_RSP);
730 	printf("VMEXIT info:\n");
731 	printf("\tvmexit reason = %u\n", exit_reason.basic);
732 	printf("\tfailed vmentry = %u\n", !!exit_reason.failed_vmentry);
733 	printf("\texit qualification = %#lx\n", exit_qual);
734 	printf("\tguest_rip = %#lx\n", guest_rip);
735 	printf("\tRAX=%#lx    RBX=%#lx    RCX=%#lx    RDX=%#lx\n",
736 		regs.rax, regs.rbx, regs.rcx, regs.rdx);
737 	printf("\tRSP=%#lx    RBP=%#lx    RSI=%#lx    RDI=%#lx\n",
738 		guest_rsp, regs.rbp, regs.rsi, regs.rdi);
739 	printf("\tR8 =%#lx    R9 =%#lx    R10=%#lx    R11=%#lx\n",
740 		regs.r8, regs.r9, regs.r10, regs.r11);
741 	printf("\tR12=%#lx    R13=%#lx    R14=%#lx    R15=%#lx\n",
742 		regs.r12, regs.r13, regs.r14, regs.r15);
743 }
744 
745 void print_vmentry_failure_info(struct vmentry_result *result)
746 {
747 	if (result->entered)
748 		return;
749 
750 	if (result->vm_fail) {
751 		printf("VM-Fail on %s: ", result->instr);
752 		switch (result->flags & VMX_ENTRY_FLAGS) {
753 		case X86_EFLAGS_CF:
754 			printf("current-VMCS pointer is not valid.\n");
755 			break;
756 		case X86_EFLAGS_ZF:
757 			printf("error number is %ld. See Intel 30.4.\n",
758 			       vmcs_read(VMX_INST_ERROR));
759 			break;
760 		default:
761 			printf("unexpected flags %lx!\n", result->flags);
762 		}
763 	} else {
764 		u64 qual = vmcs_read(EXI_QUALIFICATION);
765 
766 		printf("VM-Exit failure on %s (reason=%#x, qual=%#lx): ",
767 			result->instr, result->exit_reason.full, qual);
768 
769 		switch (result->exit_reason.basic) {
770 		case VMX_FAIL_STATE:
771 			printf("invalid guest state\n");
772 			break;
773 		case VMX_FAIL_MSR:
774 			printf("MSR loading\n");
775 			break;
776 		case VMX_FAIL_MCHECK:
777 			printf("machine-check event\n");
778 			break;
779 		default:
780 			printf("unexpected basic exit reason %u\n",
781 			  result->exit_reason.basic);
782 		}
783 
784 		if (!result->exit_reason.failed_vmentry)
785 			printf("\tVMX_ENTRY_FAILURE BIT NOT SET!\n");
786 
787 		if (result->exit_reason.full & 0x7fff0000)
788 			printf("\tRESERVED BITS SET!\n");
789 	}
790 }
791 
792 /*
793  * VMCLEAR should ensures all VMCS state is flushed to the VMCS
794  * region in memory.
795  */
796 static void test_vmclear_flushing(void)
797 {
798 	struct vmcs *vmcs[3] = {};
799 	int i;
800 
801 	for (i = 0; i < ARRAY_SIZE(vmcs); i++) {
802 		vmcs[i] = alloc_page();
803 	}
804 
805 	vmcs[0]->hdr.revision_id = basic.revision;
806 	assert(!vmcs_clear(vmcs[0]));
807 	assert(!make_vmcs_current(vmcs[0]));
808 	set_all_vmcs_fields(0x86);
809 
810 	assert(!vmcs_clear(vmcs[0]));
811 	memcpy(vmcs[1], vmcs[0], basic.size);
812 	assert(!make_vmcs_current(vmcs[1]));
813 	report(check_all_vmcs_fields(0x86),
814 	       "test vmclear flush (current VMCS)");
815 
816 	set_all_vmcs_fields(0x87);
817 	assert(!make_vmcs_current(vmcs[0]));
818 	assert(!vmcs_clear(vmcs[1]));
819 	memcpy(vmcs[2], vmcs[1], basic.size);
820 	assert(!make_vmcs_current(vmcs[2]));
821 	report(check_all_vmcs_fields(0x87),
822 	       "test vmclear flush (!current VMCS)");
823 
824 	for (i = 0; i < ARRAY_SIZE(vmcs); i++) {
825 		assert(!vmcs_clear(vmcs[i]));
826 		free_page(vmcs[i]);
827 	}
828 }
829 
830 static void test_vmclear(void)
831 {
832 	struct vmcs *tmp_root;
833 	int width = cpuid_maxphyaddr();
834 
835 	/*
836 	 * Note- The tests below do not necessarily have a
837 	 * valid VMCS, but that's ok since the invalid vmcs
838 	 * is only used for a specific test and is discarded
839 	 * without touching its contents
840 	 */
841 
842 	/* Unaligned page access */
843 	tmp_root = (struct vmcs *)((intptr_t)vmcs_root + 1);
844 	report(vmcs_clear(tmp_root) == 1, "test vmclear with unaligned vmcs");
845 
846 	/* gpa bits beyond physical address width are set*/
847 	tmp_root = (struct vmcs *)((intptr_t)vmcs_root |
848 				   ((u64)1 << (width+1)));
849 	report(vmcs_clear(tmp_root) == 1,
850 	       "test vmclear with vmcs address bits set beyond physical address width");
851 
852 	/* Pass VMXON region */
853 	tmp_root = (struct vmcs *)bsp_vmxon_region;
854 	report(vmcs_clear(tmp_root) == 1, "test vmclear with vmxon region");
855 
856 	/* Valid VMCS */
857 	report(vmcs_clear(vmcs_root) == 0,
858 	       "test vmclear with valid vmcs region");
859 
860 	test_vmclear_flushing();
861 }
862 
863 static void __attribute__((__used__)) guest_main(void)
864 {
865 	if (current->v2)
866 		v2_guest_main();
867 	else
868 		current->guest_main();
869 }
870 
871 /* guest_entry */
872 asm(
873 	".align	4, 0x90\n\t"
874 	".globl	entry_guest\n\t"
875 	"guest_entry:\n\t"
876 	"	call guest_main\n\t"
877 	"	mov $1, %edi\n\t"
878 	"	call hypercall\n\t"
879 );
880 
881 /* EPT paging structure related functions */
882 /* split_large_ept_entry: Split a 2M/1G large page into 512 smaller PTEs.
883 		@ptep : large page table entry to split
884 		@level : level of ptep (2 or 3)
885  */
886 static void split_large_ept_entry(unsigned long *ptep, int level)
887 {
888 	unsigned long *new_pt;
889 	unsigned long gpa;
890 	unsigned long pte;
891 	unsigned long prototype;
892 	int i;
893 
894 	pte = *ptep;
895 	assert(pte & EPT_PRESENT);
896 	assert(pte & EPT_LARGE_PAGE);
897 	assert(level == 2 || level == 3);
898 
899 	new_pt = alloc_page();
900 	assert(new_pt);
901 
902 	prototype = pte & ~EPT_ADDR_MASK;
903 	if (level == 2)
904 		prototype &= ~EPT_LARGE_PAGE;
905 
906 	gpa = pte & EPT_ADDR_MASK;
907 	for (i = 0; i < EPT_PGDIR_ENTRIES; i++) {
908 		new_pt[i] = prototype | gpa;
909 		gpa += 1ul << EPT_LEVEL_SHIFT(level - 1);
910 	}
911 
912 	pte &= ~EPT_LARGE_PAGE;
913 	pte &= ~EPT_ADDR_MASK;
914 	pte |= virt_to_phys(new_pt);
915 
916 	*ptep = pte;
917 }
918 
919 /* install_ept_entry : Install a page to a given level in EPT
920 		@pml4 : addr of pml4 table
921 		@pte_level : level of PTE to set
922 		@guest_addr : physical address of guest
923 		@pte : pte value to set
924 		@pt_page : address of page table, NULL for a new page
925  */
926 void install_ept_entry(unsigned long *pml4,
927 		int pte_level,
928 		unsigned long guest_addr,
929 		unsigned long pte,
930 		unsigned long *pt_page)
931 {
932 	int level;
933 	unsigned long *pt = pml4;
934 	unsigned offset;
935 
936 	/* EPT only uses 48 bits of GPA. */
937 	assert(guest_addr < (1ul << 48));
938 
939 	for (level = EPT_PAGE_LEVEL; level > pte_level; --level) {
940 		offset = (guest_addr >> EPT_LEVEL_SHIFT(level))
941 				& EPT_PGDIR_MASK;
942 		if (!(pt[offset] & (EPT_PRESENT))) {
943 			unsigned long *new_pt = pt_page;
944 			if (!new_pt)
945 				new_pt = alloc_page();
946 			else
947 				pt_page = 0;
948 			memset(new_pt, 0, PAGE_SIZE);
949 			pt[offset] = virt_to_phys(new_pt)
950 					| EPT_RA | EPT_WA | EPT_EA;
951 		} else if (pt[offset] & EPT_LARGE_PAGE)
952 			split_large_ept_entry(&pt[offset], level);
953 		pt = phys_to_virt(pt[offset] & EPT_ADDR_MASK);
954 	}
955 	offset = (guest_addr >> EPT_LEVEL_SHIFT(level)) & EPT_PGDIR_MASK;
956 	pt[offset] = pte;
957 }
958 
959 /* Map a page, @perm is the permission of the page */
960 void install_ept(unsigned long *pml4,
961 		unsigned long phys,
962 		unsigned long guest_addr,
963 		u64 perm)
964 {
965 	install_ept_entry(pml4, 1, guest_addr, (phys & PAGE_MASK) | perm, 0);
966 }
967 
968 /* Map a 1G-size page */
969 void install_1g_ept(unsigned long *pml4,
970 		unsigned long phys,
971 		unsigned long guest_addr,
972 		u64 perm)
973 {
974 	install_ept_entry(pml4, 3, guest_addr,
975 			(phys & PAGE_MASK) | perm | EPT_LARGE_PAGE, 0);
976 }
977 
978 /* Map a 2M-size page */
979 void install_2m_ept(unsigned long *pml4,
980 		unsigned long phys,
981 		unsigned long guest_addr,
982 		u64 perm)
983 {
984 	install_ept_entry(pml4, 2, guest_addr,
985 			(phys & PAGE_MASK) | perm | EPT_LARGE_PAGE, 0);
986 }
987 
988 /* setup_ept_range : Setup a range of 1:1 mapped page to EPT paging structure.
989 		@start : start address of guest page
990 		@len : length of address to be mapped
991 		@map_1g : whether 1G page map is used
992 		@map_2m : whether 2M page map is used
993 		@perm : permission for every page
994  */
995 void setup_ept_range(unsigned long *pml4, unsigned long start,
996 		     unsigned long len, int map_1g, int map_2m, u64 perm)
997 {
998 	u64 phys = start;
999 	u64 max = (u64)len + (u64)start;
1000 
1001 	if (map_1g) {
1002 		while (phys + PAGE_SIZE_1G <= max) {
1003 			install_1g_ept(pml4, phys, phys, perm);
1004 			phys += PAGE_SIZE_1G;
1005 		}
1006 	}
1007 	if (map_2m) {
1008 		while (phys + PAGE_SIZE_2M <= max) {
1009 			install_2m_ept(pml4, phys, phys, perm);
1010 			phys += PAGE_SIZE_2M;
1011 		}
1012 	}
1013 	while (phys + PAGE_SIZE <= max) {
1014 		install_ept(pml4, phys, phys, perm);
1015 		phys += PAGE_SIZE;
1016 	}
1017 }
1018 
1019 /* get_ept_pte : Get the PTE of a given level in EPT,
1020     @level == 1 means get the latest level*/
1021 bool get_ept_pte(unsigned long *pml4, unsigned long guest_addr, int level,
1022 		unsigned long *pte)
1023 {
1024 	int l;
1025 	unsigned long *pt = pml4, iter_pte;
1026 	unsigned offset;
1027 
1028 	assert(level >= 1 && level <= 4);
1029 
1030 	for (l = EPT_PAGE_LEVEL; ; --l) {
1031 		offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK;
1032 		iter_pte = pt[offset];
1033 		if (l == level)
1034 			break;
1035 		if (l < 4 && (iter_pte & EPT_LARGE_PAGE))
1036 			return false;
1037 		if (!(iter_pte & (EPT_PRESENT)))
1038 			return false;
1039 		pt = (unsigned long *)(iter_pte & EPT_ADDR_MASK);
1040 	}
1041 	offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK;
1042 	if (pte)
1043 		*pte = pt[offset];
1044 	return true;
1045 }
1046 
1047 static void clear_ept_ad_pte(unsigned long *pml4, unsigned long guest_addr)
1048 {
1049 	int l;
1050 	unsigned long *pt = pml4;
1051 	u64 pte;
1052 	unsigned offset;
1053 
1054 	for (l = EPT_PAGE_LEVEL; ; --l) {
1055 		offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK;
1056 		pt[offset] &= ~(EPT_ACCESS_FLAG|EPT_DIRTY_FLAG);
1057 		pte = pt[offset];
1058 		if (l == 1 || (l < 4 && (pte & EPT_LARGE_PAGE)))
1059 			break;
1060 		pt = (unsigned long *)(pte & EPT_ADDR_MASK);
1061 	}
1062 }
1063 
1064 /* clear_ept_ad : Clear EPT A/D bits for the page table walk and the
1065    final GPA of a guest address.  */
1066 void clear_ept_ad(unsigned long *pml4, u64 guest_cr3,
1067 		  unsigned long guest_addr)
1068 {
1069 	int l;
1070 	unsigned long *pt = (unsigned long *)guest_cr3, gpa;
1071 	u64 pte, offset_in_page;
1072 	unsigned offset;
1073 
1074 	for (l = EPT_PAGE_LEVEL; ; --l) {
1075 		offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK;
1076 
1077 		clear_ept_ad_pte(pml4, (u64) &pt[offset]);
1078 		pte = pt[offset];
1079 		if (l == 1 || (l < 4 && (pte & PT_PAGE_SIZE_MASK)))
1080 			break;
1081 		if (!(pte & PT_PRESENT_MASK))
1082 			return;
1083 		pt = (unsigned long *)(pte & PT_ADDR_MASK);
1084 	}
1085 
1086 	offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK;
1087 	offset_in_page = guest_addr & ((1 << EPT_LEVEL_SHIFT(l)) - 1);
1088 	gpa = (pt[offset] & PT_ADDR_MASK) | (guest_addr & offset_in_page);
1089 	clear_ept_ad_pte(pml4, gpa);
1090 }
1091 
1092 /* check_ept_ad : Check the content of EPT A/D bits for the page table
1093    walk and the final GPA of a guest address.  */
1094 void check_ept_ad(unsigned long *pml4, u64 guest_cr3,
1095 		  unsigned long guest_addr, int expected_gpa_ad,
1096 		  int expected_pt_ad)
1097 {
1098 	int l;
1099 	unsigned long *pt = (unsigned long *)guest_cr3, gpa;
1100 	u64 ept_pte, pte, offset_in_page;
1101 	unsigned offset;
1102 	bool bad_pt_ad = false;
1103 
1104 	for (l = EPT_PAGE_LEVEL; ; --l) {
1105 		offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK;
1106 
1107 		if (!get_ept_pte(pml4, (u64) &pt[offset], 1, &ept_pte)) {
1108 			printf("EPT - guest level %d page table is not mapped.\n", l);
1109 			return;
1110 		}
1111 
1112 		if (!bad_pt_ad) {
1113 			bad_pt_ad |= (ept_pte & (EPT_ACCESS_FLAG|EPT_DIRTY_FLAG)) != expected_pt_ad;
1114 			if (bad_pt_ad)
1115 				report(false,
1116 				       "EPT - guest level %d page table A=%d/D=%d",
1117 				       l,
1118 				       !!(expected_pt_ad & EPT_ACCESS_FLAG),
1119 				       !!(expected_pt_ad & EPT_DIRTY_FLAG));
1120 		}
1121 
1122 		pte = pt[offset];
1123 		if (l == 1 || (l < 4 && (pte & PT_PAGE_SIZE_MASK)))
1124 			break;
1125 		if (!(pte & PT_PRESENT_MASK))
1126 			return;
1127 		pt = (unsigned long *)(pte & PT_ADDR_MASK);
1128 	}
1129 
1130 	if (!bad_pt_ad)
1131 		report(true, "EPT - guest page table structures A=%d/D=%d",
1132 		       !!(expected_pt_ad & EPT_ACCESS_FLAG),
1133 		       !!(expected_pt_ad & EPT_DIRTY_FLAG));
1134 
1135 	offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK;
1136 	offset_in_page = guest_addr & ((1 << EPT_LEVEL_SHIFT(l)) - 1);
1137 	gpa = (pt[offset] & PT_ADDR_MASK) | (guest_addr & offset_in_page);
1138 
1139 	if (!get_ept_pte(pml4, gpa, 1, &ept_pte)) {
1140 		report(false, "EPT - guest physical address is not mapped");
1141 		return;
1142 	}
1143 	report((ept_pte & (EPT_ACCESS_FLAG | EPT_DIRTY_FLAG)) == expected_gpa_ad,
1144 	       "EPT - guest physical address A=%d/D=%d",
1145 	       !!(expected_gpa_ad & EPT_ACCESS_FLAG),
1146 	       !!(expected_gpa_ad & EPT_DIRTY_FLAG));
1147 }
1148 
1149 
1150 void ept_sync(int type, u64 eptp)
1151 {
1152 	switch (type) {
1153 	case INVEPT_SINGLE:
1154 		if (ept_vpid.val & EPT_CAP_INVEPT_SINGLE) {
1155 			invept(INVEPT_SINGLE, eptp);
1156 			break;
1157 		}
1158 		/* else fall through */
1159 	case INVEPT_GLOBAL:
1160 		if (ept_vpid.val & EPT_CAP_INVEPT_ALL) {
1161 			invept(INVEPT_GLOBAL, eptp);
1162 			break;
1163 		}
1164 		/* else fall through */
1165 	default:
1166 		printf("WARNING: invept is not supported!\n");
1167 	}
1168 }
1169 
1170 void set_ept_pte(unsigned long *pml4, unsigned long guest_addr,
1171 		 int level, u64 pte_val)
1172 {
1173 	int l;
1174 	unsigned long *pt = pml4;
1175 	unsigned offset;
1176 
1177 	assert(level >= 1 && level <= 4);
1178 
1179 	for (l = EPT_PAGE_LEVEL; ; --l) {
1180 		offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK;
1181 		if (l == level)
1182 			break;
1183 		assert(pt[offset] & EPT_PRESENT);
1184 		pt = (unsigned long *)(pt[offset] & EPT_ADDR_MASK);
1185 	}
1186 	offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK;
1187 	pt[offset] = pte_val;
1188 }
1189 
1190 bool ept_2m_supported(void)
1191 {
1192 	return ept_vpid.val & EPT_CAP_2M_PAGE;
1193 }
1194 
1195 bool ept_1g_supported(void)
1196 {
1197 	return ept_vpid.val & EPT_CAP_1G_PAGE;
1198 }
1199 
1200 bool ept_huge_pages_supported(int level)
1201 {
1202 	if (level == 2)
1203 		return ept_2m_supported();
1204 	else if (level == 3)
1205 		return ept_1g_supported();
1206 	else
1207 		return false;
1208 }
1209 
1210 bool ept_execute_only_supported(void)
1211 {
1212 	return ept_vpid.val & EPT_CAP_WT;
1213 }
1214 
1215 bool ept_ad_bits_supported(void)
1216 {
1217 	return ept_vpid.val & EPT_CAP_AD_FLAG;
1218 }
1219 
1220 void vpid_sync(int type, u16 vpid)
1221 {
1222 	switch(type) {
1223 	case INVVPID_CONTEXT_GLOBAL:
1224 		if (ept_vpid.val & VPID_CAP_INVVPID_CXTGLB) {
1225 			invvpid(INVVPID_CONTEXT_GLOBAL, vpid, 0);
1226 			break;
1227 		}
1228 	case INVVPID_ALL:
1229 		if (ept_vpid.val & VPID_CAP_INVVPID_ALL) {
1230 			invvpid(INVVPID_ALL, vpid, 0);
1231 			break;
1232 		}
1233 	default:
1234 		printf("WARNING: invvpid is not supported\n");
1235 	}
1236 }
1237 
1238 static void init_vmcs_ctrl(void)
1239 {
1240 	/* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */
1241 	/* 26.2.1.1 */
1242 	vmcs_write(PIN_CONTROLS, ctrl_pin);
1243 	/* Disable VMEXIT of IO instruction */
1244 	vmcs_write(CPU_EXEC_CTRL0, ctrl_cpu[0]);
1245 	if (ctrl_cpu_rev[0].set & CPU_SECONDARY) {
1246 		ctrl_cpu[1] = (ctrl_cpu[1] | ctrl_cpu_rev[1].set) &
1247 			ctrl_cpu_rev[1].clr;
1248 		vmcs_write(CPU_EXEC_CTRL1, ctrl_cpu[1]);
1249 	}
1250 	vmcs_write(CR3_TARGET_COUNT, 0);
1251 	vmcs_write(VPID, ++vpid_cnt);
1252 }
1253 
1254 static void init_vmcs_host(void)
1255 {
1256 	/* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */
1257 	/* 26.2.1.2 */
1258 	vmcs_write(HOST_EFER, rdmsr(MSR_EFER));
1259 
1260 	/* 26.2.1.3 */
1261 	vmcs_write(ENT_CONTROLS, ctrl_enter);
1262 	vmcs_write(EXI_CONTROLS, ctrl_exit);
1263 
1264 	/* 26.2.2 */
1265 	vmcs_write(HOST_CR0, read_cr0());
1266 	vmcs_write(HOST_CR3, read_cr3());
1267 	vmcs_write(HOST_CR4, read_cr4());
1268 	vmcs_write(HOST_SYSENTER_EIP, (u64)(&entry_sysenter));
1269 	vmcs_write(HOST_SYSENTER_CS,  KERNEL_CS);
1270 
1271 	/* 26.2.3 */
1272 	vmcs_write(HOST_SEL_CS, KERNEL_CS);
1273 	vmcs_write(HOST_SEL_SS, KERNEL_DS);
1274 	vmcs_write(HOST_SEL_DS, KERNEL_DS);
1275 	vmcs_write(HOST_SEL_ES, KERNEL_DS);
1276 	vmcs_write(HOST_SEL_FS, KERNEL_DS);
1277 	vmcs_write(HOST_SEL_GS, KERNEL_DS);
1278 	vmcs_write(HOST_SEL_TR, TSS_MAIN);
1279 	vmcs_write(HOST_BASE_TR, tss_descr.base);
1280 	vmcs_write(HOST_BASE_GDTR, gdt64_desc.base);
1281 	vmcs_write(HOST_BASE_IDTR, idt_descr.base);
1282 	vmcs_write(HOST_BASE_FS, 0);
1283 	vmcs_write(HOST_BASE_GS, 0);
1284 
1285 	/* Set other vmcs area */
1286 	vmcs_write(PF_ERROR_MASK, 0);
1287 	vmcs_write(PF_ERROR_MATCH, 0);
1288 	vmcs_write(VMCS_LINK_PTR, ~0ul);
1289 	vmcs_write(VMCS_LINK_PTR_HI, ~0ul);
1290 	vmcs_write(HOST_RIP, (u64)(&vmx_return));
1291 }
1292 
1293 static void init_vmcs_guest(void)
1294 {
1295 	/* 26.3 CHECKING AND LOADING GUEST STATE */
1296 	ulong guest_cr0, guest_cr4, guest_cr3;
1297 	/* 26.3.1.1 */
1298 	guest_cr0 = read_cr0();
1299 	guest_cr4 = read_cr4();
1300 	guest_cr3 = read_cr3();
1301 	if (ctrl_enter & ENT_GUEST_64) {
1302 		guest_cr0 |= X86_CR0_PG;
1303 		guest_cr4 |= X86_CR4_PAE;
1304 	}
1305 	if ((ctrl_enter & ENT_GUEST_64) == 0)
1306 		guest_cr4 &= (~X86_CR4_PCIDE);
1307 	if (guest_cr0 & X86_CR0_PG)
1308 		guest_cr0 |= X86_CR0_PE;
1309 	vmcs_write(GUEST_CR0, guest_cr0);
1310 	vmcs_write(GUEST_CR3, guest_cr3);
1311 	vmcs_write(GUEST_CR4, guest_cr4);
1312 	vmcs_write(GUEST_SYSENTER_CS,  KERNEL_CS);
1313 	vmcs_write(GUEST_SYSENTER_ESP,
1314 		(u64)(guest_syscall_stack + PAGE_SIZE - 1));
1315 	vmcs_write(GUEST_SYSENTER_EIP, (u64)(&entry_sysenter));
1316 	vmcs_write(GUEST_DR7, 0);
1317 	vmcs_write(GUEST_EFER, rdmsr(MSR_EFER));
1318 
1319 	/* 26.3.1.2 */
1320 	vmcs_write(GUEST_SEL_CS, KERNEL_CS);
1321 	vmcs_write(GUEST_SEL_SS, KERNEL_DS);
1322 	vmcs_write(GUEST_SEL_DS, KERNEL_DS);
1323 	vmcs_write(GUEST_SEL_ES, KERNEL_DS);
1324 	vmcs_write(GUEST_SEL_FS, KERNEL_DS);
1325 	vmcs_write(GUEST_SEL_GS, KERNEL_DS);
1326 	vmcs_write(GUEST_SEL_TR, TSS_MAIN);
1327 	vmcs_write(GUEST_SEL_LDTR, 0);
1328 
1329 	vmcs_write(GUEST_BASE_CS, 0);
1330 	vmcs_write(GUEST_BASE_ES, 0);
1331 	vmcs_write(GUEST_BASE_SS, 0);
1332 	vmcs_write(GUEST_BASE_DS, 0);
1333 	vmcs_write(GUEST_BASE_FS, 0);
1334 	vmcs_write(GUEST_BASE_GS, 0);
1335 	vmcs_write(GUEST_BASE_TR, tss_descr.base);
1336 	vmcs_write(GUEST_BASE_LDTR, 0);
1337 
1338 	vmcs_write(GUEST_LIMIT_CS, 0xFFFFFFFF);
1339 	vmcs_write(GUEST_LIMIT_DS, 0xFFFFFFFF);
1340 	vmcs_write(GUEST_LIMIT_ES, 0xFFFFFFFF);
1341 	vmcs_write(GUEST_LIMIT_SS, 0xFFFFFFFF);
1342 	vmcs_write(GUEST_LIMIT_FS, 0xFFFFFFFF);
1343 	vmcs_write(GUEST_LIMIT_GS, 0xFFFFFFFF);
1344 	vmcs_write(GUEST_LIMIT_LDTR, 0xffff);
1345 	vmcs_write(GUEST_LIMIT_TR, tss_descr.limit);
1346 
1347 	vmcs_write(GUEST_AR_CS, 0xa09b);
1348 	vmcs_write(GUEST_AR_DS, 0xc093);
1349 	vmcs_write(GUEST_AR_ES, 0xc093);
1350 	vmcs_write(GUEST_AR_FS, 0xc093);
1351 	vmcs_write(GUEST_AR_GS, 0xc093);
1352 	vmcs_write(GUEST_AR_SS, 0xc093);
1353 	vmcs_write(GUEST_AR_LDTR, 0x82);
1354 	vmcs_write(GUEST_AR_TR, 0x8b);
1355 
1356 	/* 26.3.1.3 */
1357 	vmcs_write(GUEST_BASE_GDTR, gdt64_desc.base);
1358 	vmcs_write(GUEST_BASE_IDTR, idt_descr.base);
1359 	vmcs_write(GUEST_LIMIT_GDTR, gdt64_desc.limit);
1360 	vmcs_write(GUEST_LIMIT_IDTR, idt_descr.limit);
1361 
1362 	/* 26.3.1.4 */
1363 	vmcs_write(GUEST_RIP, (u64)(&guest_entry));
1364 	vmcs_write(GUEST_RSP, (u64)(guest_stack + PAGE_SIZE - 1));
1365 	vmcs_write(GUEST_RFLAGS, X86_EFLAGS_FIXED);
1366 
1367 	/* 26.3.1.5 */
1368 	vmcs_write(GUEST_ACTV_STATE, ACTV_ACTIVE);
1369 	vmcs_write(GUEST_INTR_STATE, 0);
1370 }
1371 
1372 int init_vmcs(struct vmcs **vmcs)
1373 {
1374 	*vmcs = alloc_page();
1375 	(*vmcs)->hdr.revision_id = basic.revision;
1376 	/* vmclear first to init vmcs */
1377 	if (vmcs_clear(*vmcs)) {
1378 		printf("%s : vmcs_clear error\n", __func__);
1379 		return 1;
1380 	}
1381 
1382 	if (make_vmcs_current(*vmcs)) {
1383 		printf("%s : make_vmcs_current error\n", __func__);
1384 		return 1;
1385 	}
1386 
1387 	/* All settings to pin/exit/enter/cpu
1388 	   control fields should be placed here */
1389 	ctrl_pin |= PIN_EXTINT | PIN_NMI | PIN_VIRT_NMI;
1390 	ctrl_exit = EXI_LOAD_EFER | EXI_HOST_64;
1391 	ctrl_enter = (ENT_LOAD_EFER | ENT_GUEST_64);
1392 	/* DIsable IO instruction VMEXIT now */
1393 	ctrl_cpu[0] &= (~(CPU_IO | CPU_IO_BITMAP));
1394 	ctrl_cpu[1] = 0;
1395 
1396 	ctrl_pin = (ctrl_pin | ctrl_pin_rev.set) & ctrl_pin_rev.clr;
1397 	ctrl_enter = (ctrl_enter | ctrl_enter_rev.set) & ctrl_enter_rev.clr;
1398 	ctrl_exit = (ctrl_exit | ctrl_exit_rev.set) & ctrl_exit_rev.clr;
1399 	ctrl_cpu[0] = (ctrl_cpu[0] | ctrl_cpu_rev[0].set) & ctrl_cpu_rev[0].clr;
1400 
1401 	init_vmcs_ctrl();
1402 	init_vmcs_host();
1403 	init_vmcs_guest();
1404 	return 0;
1405 }
1406 
1407 void enable_vmx(void)
1408 {
1409 	bool vmx_enabled =
1410 		rdmsr(MSR_IA32_FEATURE_CONTROL) &
1411 		FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
1412 
1413 	if (!vmx_enabled) {
1414 		wrmsr(MSR_IA32_FEATURE_CONTROL,
1415 				FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX |
1416 				FEATURE_CONTROL_LOCKED);
1417 	}
1418 }
1419 
1420 static void init_vmx_caps(void)
1421 {
1422 	basic.val = rdmsr(MSR_IA32_VMX_BASIC);
1423 	ctrl_pin_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PIN
1424 			: MSR_IA32_VMX_PINBASED_CTLS);
1425 	ctrl_exit_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_EXIT
1426 			: MSR_IA32_VMX_EXIT_CTLS);
1427 	ctrl_enter_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_ENTRY
1428 			: MSR_IA32_VMX_ENTRY_CTLS);
1429 	ctrl_cpu_rev[0].val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PROC
1430 			: MSR_IA32_VMX_PROCBASED_CTLS);
1431 	if ((ctrl_cpu_rev[0].clr & CPU_SECONDARY) != 0)
1432 		ctrl_cpu_rev[1].val = rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2);
1433 	else
1434 		ctrl_cpu_rev[1].val = 0;
1435 	if ((ctrl_cpu_rev[1].clr & (CPU_EPT | CPU_VPID)) != 0)
1436 		ept_vpid.val = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP);
1437 	else
1438 		ept_vpid.val = 0;
1439 }
1440 
1441 void init_vmx(u64 *vmxon_region)
1442 {
1443 	ulong fix_cr0_set, fix_cr0_clr;
1444 	ulong fix_cr4_set, fix_cr4_clr;
1445 
1446 	fix_cr0_set =  rdmsr(MSR_IA32_VMX_CR0_FIXED0);
1447 	fix_cr0_clr =  rdmsr(MSR_IA32_VMX_CR0_FIXED1);
1448 	fix_cr4_set =  rdmsr(MSR_IA32_VMX_CR4_FIXED0);
1449 	fix_cr4_clr = rdmsr(MSR_IA32_VMX_CR4_FIXED1);
1450 
1451 	write_cr0((read_cr0() & fix_cr0_clr) | fix_cr0_set);
1452 	write_cr4((read_cr4() & fix_cr4_clr) | fix_cr4_set | X86_CR4_VMXE);
1453 
1454 	*vmxon_region = basic.revision;
1455 }
1456 
1457 static void alloc_bsp_vmx_pages(void)
1458 {
1459 	bsp_vmxon_region = alloc_page();
1460 	guest_stack = alloc_page();
1461 	guest_syscall_stack = alloc_page();
1462 	vmcs_root = alloc_page();
1463 }
1464 
1465 static void init_bsp_vmx(void)
1466 {
1467 	init_vmx_caps();
1468 	alloc_bsp_vmx_pages();
1469 	init_vmx(bsp_vmxon_region);
1470 }
1471 
1472 static void do_vmxon_off(void *data)
1473 {
1474 	vmx_on();
1475 	vmx_off();
1476 }
1477 
1478 static void do_write_feature_control(void *data)
1479 {
1480 	wrmsr(MSR_IA32_FEATURE_CONTROL, 0);
1481 }
1482 
1483 static int test_vmx_feature_control(void)
1484 {
1485 	u64 ia32_feature_control;
1486 	bool vmx_enabled;
1487 	bool feature_control_locked;
1488 
1489 	ia32_feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL);
1490 	vmx_enabled =
1491 		ia32_feature_control & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
1492 	feature_control_locked =
1493 		ia32_feature_control & FEATURE_CONTROL_LOCKED;
1494 
1495 	if (vmx_enabled && feature_control_locked) {
1496 		printf("VMX enabled and locked by BIOS\n");
1497 		return 0;
1498 	} else if (feature_control_locked) {
1499 		printf("ERROR: VMX locked out by BIOS!?\n");
1500 		return 1;
1501 	}
1502 
1503 	wrmsr(MSR_IA32_FEATURE_CONTROL, 0);
1504 	report(test_for_exception(GP_VECTOR, &do_vmxon_off, NULL),
1505 	       "test vmxon with FEATURE_CONTROL cleared");
1506 
1507 	wrmsr(MSR_IA32_FEATURE_CONTROL, FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX);
1508 	report(test_for_exception(GP_VECTOR, &do_vmxon_off, NULL),
1509 	       "test vmxon without FEATURE_CONTROL lock");
1510 
1511 	wrmsr(MSR_IA32_FEATURE_CONTROL,
1512 		  FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX |
1513 		  FEATURE_CONTROL_LOCKED);
1514 
1515 	ia32_feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL);
1516 	vmx_enabled =
1517 		ia32_feature_control & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
1518 	report(vmx_enabled, "test enable VMX in FEATURE_CONTROL");
1519 
1520 	report(test_for_exception(GP_VECTOR, &do_write_feature_control, NULL),
1521 	       "test FEATURE_CONTROL lock bit");
1522 
1523 	return !vmx_enabled;
1524 }
1525 
1526 static int test_vmxon(void)
1527 {
1528 	int ret, ret1;
1529 	u64 *vmxon_region;
1530 	int width = cpuid_maxphyaddr();
1531 
1532 	/* Unaligned page access */
1533 	vmxon_region = (u64 *)((intptr_t)bsp_vmxon_region + 1);
1534 	ret1 = _vmx_on(vmxon_region);
1535 	report(ret1, "test vmxon with unaligned vmxon region");
1536 	if (!ret1) {
1537 		ret = 1;
1538 		goto out;
1539 	}
1540 
1541 	/* gpa bits beyond physical address width are set*/
1542 	vmxon_region = (u64 *)((intptr_t)bsp_vmxon_region | ((u64)1 << (width+1)));
1543 	ret1 = _vmx_on(vmxon_region);
1544 	report(ret1, "test vmxon with bits set beyond physical address width");
1545 	if (!ret1) {
1546 		ret = 1;
1547 		goto out;
1548 	}
1549 
1550 	/* invalid revision indentifier */
1551 	*bsp_vmxon_region = 0xba9da9;
1552 	ret1 = vmx_on();
1553 	report(ret1, "test vmxon with invalid revision identifier");
1554 	if (!ret1) {
1555 		ret = 1;
1556 		goto out;
1557 	}
1558 
1559 	/* and finally a valid region */
1560 	*bsp_vmxon_region = basic.revision;
1561 	ret = vmx_on();
1562 	report(!ret, "test vmxon with valid vmxon region");
1563 
1564 out:
1565 	return ret;
1566 }
1567 
1568 static void test_vmptrld(void)
1569 {
1570 	struct vmcs *vmcs, *tmp_root;
1571 	int width = cpuid_maxphyaddr();
1572 
1573 	vmcs = alloc_page();
1574 	vmcs->hdr.revision_id = basic.revision;
1575 
1576 	/* Unaligned page access */
1577 	tmp_root = (struct vmcs *)((intptr_t)vmcs + 1);
1578 	report(make_vmcs_current(tmp_root) == 1,
1579 	       "test vmptrld with unaligned vmcs");
1580 
1581 	/* gpa bits beyond physical address width are set*/
1582 	tmp_root = (struct vmcs *)((intptr_t)vmcs |
1583 				   ((u64)1 << (width+1)));
1584 	report(make_vmcs_current(tmp_root) == 1,
1585 	       "test vmptrld with vmcs address bits set beyond physical address width");
1586 
1587 	/* Pass VMXON region */
1588 	assert(!vmcs_clear(vmcs));
1589 	assert(!make_vmcs_current(vmcs));
1590 	tmp_root = (struct vmcs *)bsp_vmxon_region;
1591 	report(make_vmcs_current(tmp_root) == 1,
1592 	       "test vmptrld with vmxon region");
1593 	report(vmcs_read(VMX_INST_ERROR) == VMXERR_VMPTRLD_VMXON_POINTER,
1594 	       "test vmptrld with vmxon region vm-instruction error");
1595 
1596 	report(make_vmcs_current(vmcs) == 0,
1597 	       "test vmptrld with valid vmcs region");
1598 }
1599 
1600 static void test_vmptrst(void)
1601 {
1602 	int ret;
1603 	struct vmcs *vmcs1, *vmcs2;
1604 
1605 	vmcs1 = alloc_page();
1606 	init_vmcs(&vmcs1);
1607 	ret = vmcs_save(&vmcs2);
1608 	report((!ret) && (vmcs1 == vmcs2), "test vmptrst");
1609 }
1610 
1611 struct vmx_ctl_msr {
1612 	const char *name;
1613 	u32 index, true_index;
1614 	u32 default1;
1615 } vmx_ctl_msr[] = {
1616 	{ "MSR_IA32_VMX_PINBASED_CTLS", MSR_IA32_VMX_PINBASED_CTLS,
1617 	  MSR_IA32_VMX_TRUE_PIN, 0x16 },
1618 	{ "MSR_IA32_VMX_PROCBASED_CTLS", MSR_IA32_VMX_PROCBASED_CTLS,
1619 	  MSR_IA32_VMX_TRUE_PROC, 0x401e172 },
1620 	{ "MSR_IA32_VMX_PROCBASED_CTLS2", MSR_IA32_VMX_PROCBASED_CTLS2,
1621 	  MSR_IA32_VMX_PROCBASED_CTLS2, 0 },
1622 	{ "MSR_IA32_VMX_EXIT_CTLS", MSR_IA32_VMX_EXIT_CTLS,
1623 	  MSR_IA32_VMX_TRUE_EXIT, 0x36dff },
1624 	{ "MSR_IA32_VMX_ENTRY_CTLS", MSR_IA32_VMX_ENTRY_CTLS,
1625 	  MSR_IA32_VMX_TRUE_ENTRY, 0x11ff },
1626 };
1627 
1628 static void test_vmx_caps(void)
1629 {
1630 	u64 val, default1, fixed0, fixed1;
1631 	union vmx_ctrl_msr ctrl, true_ctrl;
1632 	unsigned int n;
1633 	bool ok;
1634 
1635 	printf("\nTest suite: VMX capability reporting\n");
1636 
1637 	report((basic.revision & (1ul << 31)) == 0 &&
1638 	       basic.size > 0 && basic.size <= 4096 &&
1639 	       (basic.type == 0 || basic.type == 6) &&
1640 	       basic.reserved1 == 0 && basic.reserved2 == 0,
1641 	       "MSR_IA32_VMX_BASIC");
1642 
1643 	val = rdmsr(MSR_IA32_VMX_MISC);
1644 	report((!(ctrl_cpu_rev[1].clr & CPU_URG) || val & (1ul << 5)) &&
1645 	       ((val >> 16) & 0x1ff) <= 256 &&
1646 	       (val & 0x80007e00) == 0,
1647 	       "MSR_IA32_VMX_MISC");
1648 
1649 	for (n = 0; n < ARRAY_SIZE(vmx_ctl_msr); n++) {
1650 		ctrl.val = rdmsr(vmx_ctl_msr[n].index);
1651 		default1 = vmx_ctl_msr[n].default1;
1652 		ok = (ctrl.set & default1) == default1;
1653 		ok = ok && (ctrl.set & ~ctrl.clr) == 0;
1654 		if (ok && basic.ctrl) {
1655 			true_ctrl.val = rdmsr(vmx_ctl_msr[n].true_index);
1656 			ok = ctrl.clr == true_ctrl.clr;
1657 			ok = ok && ctrl.set == (true_ctrl.set | default1);
1658 		}
1659 		report(ok, "%s", vmx_ctl_msr[n].name);
1660 	}
1661 
1662 	fixed0 = rdmsr(MSR_IA32_VMX_CR0_FIXED0);
1663 	fixed1 = rdmsr(MSR_IA32_VMX_CR0_FIXED1);
1664 	report(((fixed0 ^ fixed1) & ~fixed1) == 0,
1665 	       "MSR_IA32_VMX_IA32_VMX_CR0_FIXED0/1");
1666 
1667 	fixed0 = rdmsr(MSR_IA32_VMX_CR4_FIXED0);
1668 	fixed1 = rdmsr(MSR_IA32_VMX_CR4_FIXED1);
1669 	report(((fixed0 ^ fixed1) & ~fixed1) == 0,
1670 	       "MSR_IA32_VMX_IA32_VMX_CR4_FIXED0/1");
1671 
1672 	val = rdmsr(MSR_IA32_VMX_VMCS_ENUM);
1673 	report((val & VMCS_FIELD_INDEX_MASK) >= 0x2a &&
1674 	       (val & 0xfffffffffffffc01Ull) == 0,
1675 	       "MSR_IA32_VMX_VMCS_ENUM");
1676 
1677 	fixed0 = -1ull;
1678 	fixed0 &= ~(EPT_CAP_WT |
1679 		    EPT_CAP_PWL4 |
1680 		    EPT_CAP_PWL5 |
1681 		    EPT_CAP_UC |
1682 		    EPT_CAP_WB |
1683 		    EPT_CAP_2M_PAGE |
1684 		    EPT_CAP_1G_PAGE |
1685 		    EPT_CAP_INVEPT |
1686 		    EPT_CAP_AD_FLAG |
1687 		    EPT_CAP_ADV_EPT_INFO |
1688 		    EPT_CAP_INVEPT_SINGLE |
1689 		    EPT_CAP_INVEPT_ALL |
1690 		    VPID_CAP_INVVPID |
1691 		    VPID_CAP_INVVPID_ADDR |
1692 		    VPID_CAP_INVVPID_CXTGLB |
1693 		    VPID_CAP_INVVPID_ALL |
1694 		    VPID_CAP_INVVPID_CXTLOC);
1695 
1696 	val = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP);
1697 	report((val & fixed0) == 0,
1698 	       "MSR_IA32_VMX_EPT_VPID_CAP");
1699 }
1700 
1701 /* This function can only be called in guest */
1702 void __attribute__((__used__)) hypercall(u32 hypercall_no)
1703 {
1704 	u64 val = 0;
1705 	val = (hypercall_no & HYPERCALL_MASK) | HYPERCALL_BIT;
1706 	hypercall_field = val;
1707 	asm volatile("vmcall\n\t");
1708 }
1709 
1710 static bool is_hypercall(union exit_reason exit_reason)
1711 {
1712 	return exit_reason.basic == VMX_VMCALL &&
1713 	       (hypercall_field & HYPERCALL_BIT);
1714 }
1715 
1716 static int handle_hypercall(void)
1717 {
1718 	ulong hypercall_no;
1719 
1720 	hypercall_no = hypercall_field & HYPERCALL_MASK;
1721 	hypercall_field = 0;
1722 	switch (hypercall_no) {
1723 	case HYPERCALL_VMEXIT:
1724 		return VMX_TEST_VMEXIT;
1725 	case HYPERCALL_VMABORT:
1726 		return VMX_TEST_VMABORT;
1727 	case HYPERCALL_VMSKIP:
1728 		return VMX_TEST_VMSKIP;
1729 	default:
1730 		printf("ERROR : Invalid hypercall number : %ld\n", hypercall_no);
1731 	}
1732 	return VMX_TEST_EXIT;
1733 }
1734 
1735 static void continue_abort(void)
1736 {
1737 	assert(!in_guest);
1738 	printf("Host was here when guest aborted:\n");
1739 	dump_stack();
1740 	longjmp(abort_target, 1);
1741 	abort();
1742 }
1743 
1744 void __abort_test(void)
1745 {
1746 	if (in_guest)
1747 		hypercall(HYPERCALL_VMABORT);
1748 	else
1749 		longjmp(abort_target, 1);
1750 	abort();
1751 }
1752 
1753 static void continue_skip(void)
1754 {
1755 	assert(!in_guest);
1756 	longjmp(abort_target, 1);
1757 	abort();
1758 }
1759 
1760 void test_skip(const char *msg)
1761 {
1762 	printf("%s skipping test: %s\n", in_guest ? "Guest" : "Host", msg);
1763 	if (in_guest)
1764 		hypercall(HYPERCALL_VMABORT);
1765 	else
1766 		longjmp(abort_target, 1);
1767 	abort();
1768 }
1769 
1770 static int exit_handler(union exit_reason exit_reason)
1771 {
1772 	int ret;
1773 
1774 	current->exits++;
1775 	regs.rflags = vmcs_read(GUEST_RFLAGS);
1776 	if (is_hypercall(exit_reason))
1777 		ret = handle_hypercall();
1778 	else
1779 		ret = current->exit_handler(exit_reason);
1780 	vmcs_write(GUEST_RFLAGS, regs.rflags);
1781 
1782 	return ret;
1783 }
1784 
1785 /*
1786  * Tries to enter the guest, populates @result with VM-Fail, VM-Exit, entered,
1787  * etc...
1788  */
1789 static void vmx_enter_guest(struct vmentry_result *result)
1790 {
1791 	memset(result, 0, sizeof(*result));
1792 
1793 	in_guest = 1;
1794 	asm volatile (
1795 		"mov %[HOST_RSP], %%rdi\n\t"
1796 		"vmwrite %%rsp, %%rdi\n\t"
1797 		LOAD_GPR_C
1798 		"cmpb $0, %[launched]\n\t"
1799 		"jne 1f\n\t"
1800 		"vmlaunch\n\t"
1801 		"jmp 2f\n\t"
1802 		"1: "
1803 		"vmresume\n\t"
1804 		"2: "
1805 		SAVE_GPR_C
1806 		"pushf\n\t"
1807 		"pop %%rdi\n\t"
1808 		"mov %%rdi, %[vm_fail_flags]\n\t"
1809 		"movl $1, %[vm_fail]\n\t"
1810 		"jmp 3f\n\t"
1811 		"vmx_return:\n\t"
1812 		SAVE_GPR_C
1813 		"3: \n\t"
1814 		: [vm_fail]"+m"(result->vm_fail),
1815 		  [vm_fail_flags]"=m"(result->flags)
1816 		: [launched]"m"(launched), [HOST_RSP]"i"(HOST_RSP)
1817 		: "rdi", "memory", "cc"
1818 	);
1819 	in_guest = 0;
1820 
1821 	result->vmlaunch = !launched;
1822 	result->instr = launched ? "vmresume" : "vmlaunch";
1823 	result->exit_reason.full = result->vm_fail ? 0xdead :
1824 						     vmcs_read(EXI_REASON);
1825 	result->entered = !result->vm_fail &&
1826 			  !result->exit_reason.failed_vmentry;
1827 }
1828 
1829 static int vmx_run(void)
1830 {
1831 	struct vmentry_result result;
1832 	u32 ret;
1833 
1834 	while (1) {
1835 		vmx_enter_guest(&result);
1836 		if (result.entered) {
1837 			/*
1838 			 * VMCS isn't in "launched" state if there's been any
1839 			 * entry failure (early or otherwise).
1840 			 */
1841 			launched = 1;
1842 			ret = exit_handler(result.exit_reason);
1843 		} else if (current->entry_failure_handler) {
1844 			ret = current->entry_failure_handler(&result);
1845 		} else {
1846 			ret = VMX_TEST_EXIT;
1847 		}
1848 
1849 		switch (ret) {
1850 		case VMX_TEST_RESUME:
1851 			continue;
1852 		case VMX_TEST_VMEXIT:
1853 			guest_finished = 1;
1854 			return 0;
1855 		case VMX_TEST_EXIT:
1856 			break;
1857 		default:
1858 			printf("ERROR : Invalid %s_handler return val %d.\n",
1859 			       result.entered ? "exit" : "entry_failure",
1860 			       ret);
1861 			break;
1862 		}
1863 
1864 		if (result.entered)
1865 			print_vmexit_info(result.exit_reason);
1866 		else
1867 			print_vmentry_failure_info(&result);
1868 		abort();
1869 	}
1870 }
1871 
1872 static void run_teardown_step(struct test_teardown_step *step)
1873 {
1874 	step->func(step->data);
1875 }
1876 
1877 static int test_run(struct vmx_test *test)
1878 {
1879 	int r;
1880 
1881 	/* Validate V2 interface. */
1882 	if (test->v2) {
1883 		int ret = 0;
1884 		if (test->init || test->guest_main || test->exit_handler ||
1885 		    test->syscall_handler) {
1886 			report(0, "V2 test cannot specify V1 callbacks.");
1887 			ret = 1;
1888 		}
1889 		if (ret)
1890 			return ret;
1891 	}
1892 
1893 	if (test->name == NULL)
1894 		test->name = "(no name)";
1895 	if (vmx_on()) {
1896 		printf("%s : vmxon failed.\n", __func__);
1897 		return 1;
1898 	}
1899 
1900 	init_vmcs(&(test->vmcs));
1901 	/* Directly call test->init is ok here, init_vmcs has done
1902 	   vmcs init, vmclear and vmptrld*/
1903 	if (test->init && test->init(test->vmcs) != VMX_TEST_START)
1904 		goto out;
1905 	teardown_count = 0;
1906 	v2_guest_main = NULL;
1907 	test->exits = 0;
1908 	current = test;
1909 	regs = test->guest_regs;
1910 	vmcs_write(GUEST_RFLAGS, regs.rflags | X86_EFLAGS_FIXED);
1911 	launched = 0;
1912 	guest_finished = 0;
1913 	printf("\nTest suite: %s\n", test->name);
1914 
1915 	r = setjmp(abort_target);
1916 	if (r) {
1917 		assert(!in_guest);
1918 		goto out;
1919 	}
1920 
1921 
1922 	if (test->v2)
1923 		test->v2();
1924 	else
1925 		vmx_run();
1926 
1927 	while (teardown_count > 0)
1928 		run_teardown_step(&teardown_steps[--teardown_count]);
1929 
1930 	if (launched && !guest_finished)
1931 		report(0, "Guest didn't run to completion.");
1932 
1933 out:
1934 	if (vmx_off()) {
1935 		printf("%s : vmxoff failed.\n", __func__);
1936 		return 1;
1937 	}
1938 	return 0;
1939 }
1940 
1941 /*
1942  * Add a teardown step. Executed after the test's main function returns.
1943  * Teardown steps executed in reverse order.
1944  */
1945 void test_add_teardown(test_teardown_func func, void *data)
1946 {
1947 	struct test_teardown_step *step;
1948 
1949 	TEST_ASSERT_MSG(teardown_count < MAX_TEST_TEARDOWN_STEPS,
1950 			"There are already %d teardown steps.",
1951 			teardown_count);
1952 	step = &teardown_steps[teardown_count++];
1953 	step->func = func;
1954 	step->data = data;
1955 }
1956 
1957 /*
1958  * Set the target of the first enter_guest call. Can only be called once per
1959  * test. Must be called before first enter_guest call.
1960  */
1961 void test_set_guest(test_guest_func func)
1962 {
1963 	assert(current->v2);
1964 	TEST_ASSERT_MSG(!v2_guest_main, "Already set guest func.");
1965 	v2_guest_main = func;
1966 }
1967 
1968 static void check_for_guest_termination(union exit_reason exit_reason)
1969 {
1970 	if (is_hypercall(exit_reason)) {
1971 		int ret;
1972 
1973 		ret = handle_hypercall();
1974 		switch (ret) {
1975 		case VMX_TEST_VMEXIT:
1976 			guest_finished = 1;
1977 			break;
1978 		case VMX_TEST_VMABORT:
1979 			continue_abort();
1980 			break;
1981 		case VMX_TEST_VMSKIP:
1982 			continue_skip();
1983 			break;
1984 		default:
1985 			printf("ERROR : Invalid handle_hypercall return %d.\n",
1986 			       ret);
1987 			abort();
1988 		}
1989 	}
1990 }
1991 
1992 /*
1993  * Enters the guest (or launches it for the first time). Error to call once the
1994  * guest has returned (i.e., run past the end of its guest() function).
1995  */
1996 void __enter_guest(u8 abort_flag, struct vmentry_result *result)
1997 {
1998 	TEST_ASSERT_MSG(v2_guest_main,
1999 			"Never called test_set_guest_func!");
2000 
2001 	TEST_ASSERT_MSG(!guest_finished,
2002 			"Called enter_guest() after guest returned.");
2003 
2004 	vmx_enter_guest(result);
2005 
2006 	if (result->vm_fail) {
2007 		if (abort_flag & ABORT_ON_EARLY_VMENTRY_FAIL)
2008 			goto do_abort;
2009 		return;
2010 	}
2011 	if (result->exit_reason.failed_vmentry) {
2012 		if ((abort_flag & ABORT_ON_INVALID_GUEST_STATE) ||
2013 		    result->exit_reason.basic != VMX_FAIL_STATE)
2014 			goto do_abort;
2015 		return;
2016 	}
2017 
2018 	launched = 1;
2019 	check_for_guest_termination(result->exit_reason);
2020 	return;
2021 
2022 do_abort:
2023 	print_vmentry_failure_info(result);
2024 	abort();
2025 }
2026 
2027 void enter_guest_with_bad_controls(void)
2028 {
2029 	struct vmentry_result result;
2030 
2031 	TEST_ASSERT_MSG(v2_guest_main,
2032 			"Never called test_set_guest_func!");
2033 
2034 	TEST_ASSERT_MSG(!guest_finished,
2035 			"Called enter_guest() after guest returned.");
2036 
2037 	__enter_guest(ABORT_ON_INVALID_GUEST_STATE, &result);
2038 	report(result.vm_fail, "VM-Fail occurred as expected");
2039 	report((result.flags & VMX_ENTRY_FLAGS) == X86_EFLAGS_ZF,
2040                "FLAGS set correctly on VM-Fail");
2041 	report(vmcs_read(VMX_INST_ERROR) == VMXERR_ENTRY_INVALID_CONTROL_FIELD,
2042 	       "VM-Inst Error # is %d (VM entry with invalid control field(s))",
2043 	       VMXERR_ENTRY_INVALID_CONTROL_FIELD);
2044 }
2045 
2046 void enter_guest(void)
2047 {
2048 	struct vmentry_result result;
2049 
2050 	__enter_guest(ABORT_ON_EARLY_VMENTRY_FAIL |
2051 		      ABORT_ON_INVALID_GUEST_STATE, &result);
2052 }
2053 
2054 extern struct vmx_test vmx_tests[];
2055 
2056 static bool
2057 test_wanted(const char *name, const char *filters[], int filter_count)
2058 {
2059 	int i;
2060 	bool positive = false;
2061 	bool match = false;
2062 	char clean_name[strlen(name) + 1];
2063 	char *c;
2064 	const char *n;
2065 
2066 	printf("filter = %s, test = %s\n", filters[0], name);
2067 
2068 	/* Replace spaces with underscores. */
2069 	n = name;
2070 	c = &clean_name[0];
2071 	do *c++ = (*n == ' ') ? '_' : *n;
2072 	while (*n++);
2073 
2074 	for (i = 0; i < filter_count; i++) {
2075 		const char *filter = filters[i];
2076 
2077 		if (filter[0] == '-') {
2078 			if (simple_glob(clean_name, filter + 1))
2079 				return false;
2080 		} else {
2081 			positive = true;
2082 			match |= simple_glob(clean_name, filter);
2083 		}
2084 	}
2085 
2086 	if (!positive || match) {
2087 		matched++;
2088 		return true;
2089 	} else {
2090 		return false;
2091 	}
2092 }
2093 
2094 int main(int argc, const char *argv[])
2095 {
2096 	int i = 0;
2097 
2098 	setup_vm();
2099 	hypercall_field = 0;
2100 
2101 	/* We want xAPIC mode to test MMIO passthrough from L1 (us) to L2.  */
2102 	smp_reset_apic();
2103 
2104 	argv++;
2105 	argc--;
2106 
2107 	if (!this_cpu_has(X86_FEATURE_VMX)) {
2108 		printf("WARNING: vmx not supported, add '-cpu host'\n");
2109 		goto exit;
2110 	}
2111 	init_bsp_vmx();
2112 	if (test_wanted("test_vmx_feature_control", argv, argc)) {
2113 		/* Sets MSR_IA32_FEATURE_CONTROL to 0x5 */
2114 		if (test_vmx_feature_control() != 0)
2115 			goto exit;
2116 	} else {
2117 		enable_vmx();
2118 	}
2119 
2120 	if (test_wanted("test_vmxon", argv, argc)) {
2121 		/* Enables VMX */
2122 		if (test_vmxon() != 0)
2123 			goto exit;
2124 	} else {
2125 		if (vmx_on()) {
2126 			report(0, "vmxon");
2127 			goto exit;
2128 		}
2129 	}
2130 
2131 	if (test_wanted("test_vmptrld", argv, argc))
2132 		test_vmptrld();
2133 	if (test_wanted("test_vmclear", argv, argc))
2134 		test_vmclear();
2135 	if (test_wanted("test_vmptrst", argv, argc))
2136 		test_vmptrst();
2137 	if (test_wanted("test_vmwrite_vmread", argv, argc))
2138 		test_vmwrite_vmread();
2139 	if (test_wanted("test_vmcs_high", argv, argc))
2140 		test_vmcs_high();
2141 	if (test_wanted("test_vmcs_lifecycle", argv, argc))
2142 		test_vmcs_lifecycle();
2143 	if (test_wanted("test_vmx_caps", argv, argc))
2144 		test_vmx_caps();
2145 	if (test_wanted("test_vmread_flags_touch", argv, argc))
2146 		test_vmread_flags_touch();
2147 	if (test_wanted("test_vmwrite_flags_touch", argv, argc))
2148 		test_vmwrite_flags_touch();
2149 
2150 	/* Balance vmxon from test_vmxon. */
2151 	vmx_off();
2152 
2153 	for (; vmx_tests[i].name != NULL; i++) {
2154 		if (!test_wanted(vmx_tests[i].name, argv, argc))
2155 			continue;
2156 		if (test_run(&vmx_tests[i]))
2157 			goto exit;
2158 	}
2159 
2160 	if (!matched)
2161 		report(matched, "command line didn't match any tests!");
2162 
2163 exit:
2164 	return report_summary();
2165 }
2166