xref: /kvm-unit-tests/x86/vmx.c (revision 1df80b5711b9339a5e3423d9b1f683dd02a0b75b)
1 /*
2  * x86/vmx.c : Framework for testing nested virtualization
3  *	This is a framework to test nested VMX for KVM, which
4  * 	started as a project of GSoC 2013. All test cases should
5  *	be located in x86/vmx_tests.c and framework related
6  *	functions should be in this file.
7  *
8  * How to write test cases?
9  *	Add callbacks of test suite in variant "vmx_tests". You can
10  *	write:
11  *		1. init function used for initializing test suite
12  *		2. main function for codes running in L2 guest,
13  *		3. exit_handler to handle vmexit of L2 to L1
14  *		4. syscall handler to handle L2 syscall vmexit
15  *		5. vmenter fail handler to handle direct failure of vmenter
16  *		6. guest_regs is loaded when vmenter and saved when
17  *			vmexit, you can read and set it in exit_handler
18  *	If no special function is needed for a test suite, use
19  *	coressponding basic_* functions as callback. More handlers
20  *	can be added to "vmx_tests", see details of "struct vmx_test"
21  *	and function test_run().
22  *
23  * Currently, vmx test framework only set up one VCPU and one
24  * concurrent guest test environment with same paging for L2 and
25  * L1. For usage of EPT, only 1:1 mapped paging is used from VFN
26  * to PFN.
27  *
28  * Author : Arthur Chunqi Li <yzt356@gmail.com>
29  */
30 
31 #include "libcflat.h"
32 #include "processor.h"
33 #include "vm.h"
34 #include "desc.h"
35 #include "vmx.h"
36 #include "msr.h"
37 #include "smp.h"
38 
39 u64 *vmxon_region;
40 struct vmcs *vmcs_root;
41 u32 vpid_cnt;
42 void *guest_stack, *guest_syscall_stack;
43 u32 ctrl_pin, ctrl_enter, ctrl_exit, ctrl_cpu[2];
44 struct regs regs;
45 
46 struct vmx_test *current;
47 
48 #define MAX_TEST_TEARDOWN_STEPS 10
49 
50 struct test_teardown_step {
51 	test_teardown_func func;
52 	void *data;
53 };
54 
55 static int teardown_count;
56 static struct test_teardown_step teardown_steps[MAX_TEST_TEARDOWN_STEPS];
57 
58 static test_guest_func v2_guest_main;
59 
60 u64 hypercall_field;
61 bool launched;
62 static int matched;
63 static int guest_finished;
64 static int in_guest;
65 
66 union vmx_basic basic;
67 union vmx_ctrl_msr ctrl_pin_rev;
68 union vmx_ctrl_msr ctrl_cpu_rev[2];
69 union vmx_ctrl_msr ctrl_exit_rev;
70 union vmx_ctrl_msr ctrl_enter_rev;
71 union vmx_ept_vpid  ept_vpid;
72 
73 extern struct descriptor_table_ptr gdt64_desc;
74 extern struct descriptor_table_ptr idt_descr;
75 extern struct descriptor_table_ptr tss_descr;
76 extern void *vmx_return;
77 extern void *entry_sysenter;
78 extern void *guest_entry;
79 
80 static volatile u32 stage;
81 
82 static jmp_buf abort_target;
83 
84 struct vmcs_field {
85 	u64 mask;
86 	u64 encoding;
87 };
88 
89 #define MASK(_bits) GENMASK_ULL((_bits) - 1, 0)
90 #define MASK_NATURAL MASK(sizeof(unsigned long) * 8)
91 
92 static struct vmcs_field vmcs_fields[] = {
93 	{ MASK(16), VPID },
94 	{ MASK(16), PINV },
95 	{ MASK(16), EPTP_IDX },
96 
97 	{ MASK(16), GUEST_SEL_ES },
98 	{ MASK(16), GUEST_SEL_CS },
99 	{ MASK(16), GUEST_SEL_SS },
100 	{ MASK(16), GUEST_SEL_DS },
101 	{ MASK(16), GUEST_SEL_FS },
102 	{ MASK(16), GUEST_SEL_GS },
103 	{ MASK(16), GUEST_SEL_LDTR },
104 	{ MASK(16), GUEST_SEL_TR },
105 	{ MASK(16), GUEST_INT_STATUS },
106 
107 	{ MASK(16), HOST_SEL_ES },
108 	{ MASK(16), HOST_SEL_CS },
109 	{ MASK(16), HOST_SEL_SS },
110 	{ MASK(16), HOST_SEL_DS },
111 	{ MASK(16), HOST_SEL_FS },
112 	{ MASK(16), HOST_SEL_GS },
113 	{ MASK(16), HOST_SEL_TR },
114 
115 	{ MASK(64), IO_BITMAP_A },
116 	{ MASK(64), IO_BITMAP_B },
117 	{ MASK(64), MSR_BITMAP },
118 	{ MASK(64), EXIT_MSR_ST_ADDR },
119 	{ MASK(64), EXIT_MSR_LD_ADDR },
120 	{ MASK(64), ENTER_MSR_LD_ADDR },
121 	{ MASK(64), VMCS_EXEC_PTR },
122 	{ MASK(64), TSC_OFFSET },
123 	{ MASK(64), APIC_VIRT_ADDR },
124 	{ MASK(64), APIC_ACCS_ADDR },
125 	{ MASK(64), EPTP },
126 
127 	{ 0 /* read-only */, INFO_PHYS_ADDR },
128 
129 	{ MASK(64), VMCS_LINK_PTR },
130 	{ MASK(64), GUEST_DEBUGCTL },
131 	{ MASK(64), GUEST_EFER },
132 	{ MASK(64), GUEST_PAT },
133 	{ MASK(64), GUEST_PERF_GLOBAL_CTRL },
134 	{ MASK(64), GUEST_PDPTE },
135 
136 	{ MASK(64), HOST_PAT },
137 	{ MASK(64), HOST_EFER },
138 	{ MASK(64), HOST_PERF_GLOBAL_CTRL },
139 
140 	{ MASK(32), PIN_CONTROLS },
141 	{ MASK(32), CPU_EXEC_CTRL0 },
142 	{ MASK(32), EXC_BITMAP },
143 	{ MASK(32), PF_ERROR_MASK },
144 	{ MASK(32), PF_ERROR_MATCH },
145 	{ MASK(32), CR3_TARGET_COUNT },
146 	{ MASK(32), EXI_CONTROLS },
147 	{ MASK(32), EXI_MSR_ST_CNT },
148 	{ MASK(32), EXI_MSR_LD_CNT },
149 	{ MASK(32), ENT_CONTROLS },
150 	{ MASK(32), ENT_MSR_LD_CNT },
151 	{ MASK(32), ENT_INTR_INFO },
152 	{ MASK(32), ENT_INTR_ERROR },
153 	{ MASK(32), ENT_INST_LEN },
154 	{ MASK(32), TPR_THRESHOLD },
155 	{ MASK(32), CPU_EXEC_CTRL1 },
156 
157 	{ 0 /* read-only */, VMX_INST_ERROR },
158 	{ 0 /* read-only */, EXI_REASON },
159 	{ 0 /* read-only */, EXI_INTR_INFO },
160 	{ 0 /* read-only */, EXI_INTR_ERROR },
161 	{ 0 /* read-only */, IDT_VECT_INFO },
162 	{ 0 /* read-only */, IDT_VECT_ERROR },
163 	{ 0 /* read-only */, EXI_INST_LEN },
164 	{ 0 /* read-only */, EXI_INST_INFO },
165 
166 	{ MASK(32), GUEST_LIMIT_ES },
167 	{ MASK(32), GUEST_LIMIT_CS },
168 	{ MASK(32), GUEST_LIMIT_SS },
169 	{ MASK(32), GUEST_LIMIT_DS },
170 	{ MASK(32), GUEST_LIMIT_FS },
171 	{ MASK(32), GUEST_LIMIT_GS },
172 	{ MASK(32), GUEST_LIMIT_LDTR },
173 	{ MASK(32), GUEST_LIMIT_TR },
174 	{ MASK(32), GUEST_LIMIT_GDTR },
175 	{ MASK(32), GUEST_LIMIT_IDTR },
176 	{ 0x1d0ff, GUEST_AR_ES },
177 	{ 0x1f0ff, GUEST_AR_CS },
178 	{ 0x1d0ff, GUEST_AR_SS },
179 	{ 0x1d0ff, GUEST_AR_DS },
180 	{ 0x1d0ff, GUEST_AR_FS },
181 	{ 0x1d0ff, GUEST_AR_GS },
182 	{ 0x1d0ff, GUEST_AR_LDTR },
183 	{ 0x1d0ff, GUEST_AR_TR },
184 	{ MASK(32), GUEST_INTR_STATE },
185 	{ MASK(32), GUEST_ACTV_STATE },
186 	{ MASK(32), GUEST_SMBASE },
187 	{ MASK(32), GUEST_SYSENTER_CS },
188 	{ MASK(32), PREEMPT_TIMER_VALUE },
189 
190 	{ MASK(32), HOST_SYSENTER_CS },
191 
192 	{ MASK_NATURAL, CR0_MASK },
193 	{ MASK_NATURAL, CR4_MASK },
194 	{ MASK_NATURAL, CR0_READ_SHADOW },
195 	{ MASK_NATURAL, CR4_READ_SHADOW },
196 	{ MASK_NATURAL, CR3_TARGET_0 },
197 	{ MASK_NATURAL, CR3_TARGET_1 },
198 	{ MASK_NATURAL, CR3_TARGET_2 },
199 	{ MASK_NATURAL, CR3_TARGET_3 },
200 
201 	{ 0 /* read-only */, EXI_QUALIFICATION },
202 	{ 0 /* read-only */, IO_RCX },
203 	{ 0 /* read-only */, IO_RSI },
204 	{ 0 /* read-only */, IO_RDI },
205 	{ 0 /* read-only */, IO_RIP },
206 	{ 0 /* read-only */, GUEST_LINEAR_ADDRESS },
207 
208 	{ MASK_NATURAL, GUEST_CR0 },
209 	{ MASK_NATURAL, GUEST_CR3 },
210 	{ MASK_NATURAL, GUEST_CR4 },
211 	{ MASK_NATURAL, GUEST_BASE_ES },
212 	{ MASK_NATURAL, GUEST_BASE_CS },
213 	{ MASK_NATURAL, GUEST_BASE_SS },
214 	{ MASK_NATURAL, GUEST_BASE_DS },
215 	{ MASK_NATURAL, GUEST_BASE_FS },
216 	{ MASK_NATURAL, GUEST_BASE_GS },
217 	{ MASK_NATURAL, GUEST_BASE_LDTR },
218 	{ MASK_NATURAL, GUEST_BASE_TR },
219 	{ MASK_NATURAL, GUEST_BASE_GDTR },
220 	{ MASK_NATURAL, GUEST_BASE_IDTR },
221 	{ MASK_NATURAL, GUEST_DR7 },
222 	{ MASK_NATURAL, GUEST_RSP },
223 	{ MASK_NATURAL, GUEST_RIP },
224 	{ MASK_NATURAL, GUEST_RFLAGS },
225 	{ MASK_NATURAL, GUEST_PENDING_DEBUG },
226 	{ MASK_NATURAL, GUEST_SYSENTER_ESP },
227 	{ MASK_NATURAL, GUEST_SYSENTER_EIP },
228 
229 	{ MASK_NATURAL, HOST_CR0 },
230 	{ MASK_NATURAL, HOST_CR3 },
231 	{ MASK_NATURAL, HOST_CR4 },
232 	{ MASK_NATURAL, HOST_BASE_FS },
233 	{ MASK_NATURAL, HOST_BASE_GS },
234 	{ MASK_NATURAL, HOST_BASE_TR },
235 	{ MASK_NATURAL, HOST_BASE_GDTR },
236 	{ MASK_NATURAL, HOST_BASE_IDTR },
237 	{ MASK_NATURAL, HOST_SYSENTER_ESP },
238 	{ MASK_NATURAL, HOST_SYSENTER_EIP },
239 	{ MASK_NATURAL, HOST_RSP },
240 	{ MASK_NATURAL, HOST_RIP },
241 };
242 
243 static inline u64 vmcs_field_value(struct vmcs_field *f, u8 cookie)
244 {
245 	u64 value;
246 
247 	/* Incorporate the cookie and the field encoding into the value. */
248 	value = cookie;
249 	value |= (f->encoding << 8);
250 	value |= 0xdeadbeefull << 32;
251 
252 	return value & f->mask;
253 }
254 
255 static void set_vmcs_field(struct vmcs_field *f, u8 cookie)
256 {
257 	vmcs_write(f->encoding, vmcs_field_value(f, cookie));
258 }
259 
260 static bool check_vmcs_field(struct vmcs_field *f, u8 cookie)
261 {
262 	u64 expected;
263 	u64 actual;
264 	int ret;
265 
266 	ret = vmcs_read_checking(f->encoding, &actual);
267 	assert(!(ret & X86_EFLAGS_CF));
268 	/* Skip VMCS fields that aren't recognized by the CPU */
269 	if (ret & X86_EFLAGS_ZF)
270 		return true;
271 
272 	expected = vmcs_field_value(f, cookie);
273 	actual &= f->mask;
274 
275 	if (expected == actual)
276 		return true;
277 
278 	printf("FAIL: VMWRITE/VMREAD %lx (expected: %lx, actual: %lx)\n",
279 	       f->encoding, (unsigned long) expected, (unsigned long) actual);
280 
281 	return false;
282 }
283 
284 static void set_all_vmcs_fields(u8 cookie)
285 {
286 	int i;
287 
288 	for (i = 0; i < ARRAY_SIZE(vmcs_fields); i++)
289 		set_vmcs_field(&vmcs_fields[i], cookie);
290 }
291 
292 static bool check_all_vmcs_fields(u8 cookie)
293 {
294 	bool pass = true;
295 	int i;
296 
297 	for (i = 0; i < ARRAY_SIZE(vmcs_fields); i++) {
298 		if (!check_vmcs_field(&vmcs_fields[i], cookie))
299 			pass = false;
300 	}
301 
302 	return pass;
303 }
304 
305 void test_vmwrite_vmread(void)
306 {
307 	struct vmcs *vmcs = alloc_page();
308 
309 	memset(vmcs, 0, PAGE_SIZE);
310 	vmcs->revision_id = basic.revision;
311 	assert(!vmcs_clear(vmcs));
312 	assert(!make_vmcs_current(vmcs));
313 
314 	set_all_vmcs_fields(0x42);
315 	report("VMWRITE/VMREAD", check_all_vmcs_fields(0x42));
316 
317 	assert(!vmcs_clear(vmcs));
318 	free_page(vmcs);
319 }
320 
321 void test_vmcs_lifecycle(void)
322 {
323 	struct vmcs *vmcs[2] = {};
324 	int i;
325 
326 	for (i = 0; i < ARRAY_SIZE(vmcs); i++) {
327 		vmcs[i] = alloc_page();
328 		memset(vmcs[i], 0, PAGE_SIZE);
329 		vmcs[i]->revision_id = basic.revision;
330 	}
331 
332 #define VMPTRLD(_i) do { \
333 	assert(_i < ARRAY_SIZE(vmcs)); \
334 	assert(!make_vmcs_current(vmcs[_i])); \
335 	printf("VMPTRLD VMCS%d\n", (_i)); \
336 } while (0)
337 
338 #define VMCLEAR(_i) do { \
339 	assert(_i < ARRAY_SIZE(vmcs)); \
340 	assert(!vmcs_clear(vmcs[_i])); \
341 	printf("VMCLEAR VMCS%d\n", (_i)); \
342 } while (0)
343 
344 	VMCLEAR(0);
345 	VMPTRLD(0);
346 	set_all_vmcs_fields(0);
347 	report("current:VMCS0 active:[VMCS0]", check_all_vmcs_fields(0));
348 
349 	VMCLEAR(0);
350 	VMPTRLD(0);
351 	report("current:VMCS0 active:[VMCS0]", check_all_vmcs_fields(0));
352 
353 	VMCLEAR(1);
354 	report("current:VMCS0 active:[VMCS0]", check_all_vmcs_fields(0));
355 
356 	VMPTRLD(1);
357 	set_all_vmcs_fields(1);
358 	report("current:VMCS1 active:[VMCS0,VCMS1]", check_all_vmcs_fields(1));
359 
360 	VMPTRLD(0);
361 	report("current:VMCS0 active:[VMCS0,VCMS1]", check_all_vmcs_fields(0));
362 	VMPTRLD(1);
363 	report("current:VMCS1 active:[VMCS0,VCMS1]", check_all_vmcs_fields(1));
364 	VMPTRLD(1);
365 	report("current:VMCS1 active:[VMCS0,VCMS1]", check_all_vmcs_fields(1));
366 
367 	VMCLEAR(0);
368 	report("current:VMCS1 active:[VCMS1]", check_all_vmcs_fields(1));
369 
370 	/* VMPTRLD should not erase VMWRITEs to the current VMCS */
371 	set_all_vmcs_fields(2);
372 	VMPTRLD(1);
373 	report("current:VMCS1 active:[VCMS1]", check_all_vmcs_fields(2));
374 
375 	for (i = 0; i < ARRAY_SIZE(vmcs); i++) {
376 		VMCLEAR(i);
377 		free_page(vmcs[i]);
378 	}
379 
380 #undef VMPTRLD
381 #undef VMCLEAR
382 }
383 
384 void vmx_set_test_stage(u32 s)
385 {
386 	barrier();
387 	stage = s;
388 	barrier();
389 }
390 
391 u32 vmx_get_test_stage(void)
392 {
393 	u32 s;
394 
395 	barrier();
396 	s = stage;
397 	barrier();
398 	return s;
399 }
400 
401 void vmx_inc_test_stage(void)
402 {
403 	barrier();
404 	stage++;
405 	barrier();
406 }
407 
408 /* entry_sysenter */
409 asm(
410 	".align	4, 0x90\n\t"
411 	".globl	entry_sysenter\n\t"
412 	"entry_sysenter:\n\t"
413 	SAVE_GPR
414 	"	and	$0xf, %rax\n\t"
415 	"	mov	%rax, %rdi\n\t"
416 	"	call	syscall_handler\n\t"
417 	LOAD_GPR
418 	"	vmresume\n\t"
419 );
420 
421 static void __attribute__((__used__)) syscall_handler(u64 syscall_no)
422 {
423 	if (current->syscall_handler)
424 		current->syscall_handler(syscall_no);
425 }
426 
427 static inline int vmx_on()
428 {
429 	bool ret;
430 	u64 rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF;
431 	asm volatile ("push %1; popf; vmxon %2; setbe %0\n\t"
432 		      : "=q" (ret) : "q" (rflags), "m" (vmxon_region) : "cc");
433 	return ret;
434 }
435 
436 static inline int vmx_off()
437 {
438 	bool ret;
439 	u64 rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF;
440 
441 	asm volatile("push %1; popf; vmxoff; setbe %0\n\t"
442 		     : "=q"(ret) : "q" (rflags) : "cc");
443 	return ret;
444 }
445 
446 static const char * const exit_reason_descriptions[] = {
447 	[VMX_EXC_NMI]		= "VMX_EXC_NMI",
448 	[VMX_EXTINT]		= "VMX_EXTINT",
449 	[VMX_TRIPLE_FAULT]	= "VMX_TRIPLE_FAULT",
450 	[VMX_INIT]		= "VMX_INIT",
451 	[VMX_SIPI]		= "VMX_SIPI",
452 	[VMX_SMI_IO]		= "VMX_SMI_IO",
453 	[VMX_SMI_OTHER]		= "VMX_SMI_OTHER",
454 	[VMX_INTR_WINDOW]	= "VMX_INTR_WINDOW",
455 	[VMX_NMI_WINDOW]	= "VMX_NMI_WINDOW",
456 	[VMX_TASK_SWITCH]	= "VMX_TASK_SWITCH",
457 	[VMX_CPUID]		= "VMX_CPUID",
458 	[VMX_GETSEC]		= "VMX_GETSEC",
459 	[VMX_HLT]		= "VMX_HLT",
460 	[VMX_INVD]		= "VMX_INVD",
461 	[VMX_INVLPG]		= "VMX_INVLPG",
462 	[VMX_RDPMC]		= "VMX_RDPMC",
463 	[VMX_RDTSC]		= "VMX_RDTSC",
464 	[VMX_RSM]		= "VMX_RSM",
465 	[VMX_VMCALL]		= "VMX_VMCALL",
466 	[VMX_VMCLEAR]		= "VMX_VMCLEAR",
467 	[VMX_VMLAUNCH]		= "VMX_VMLAUNCH",
468 	[VMX_VMPTRLD]		= "VMX_VMPTRLD",
469 	[VMX_VMPTRST]		= "VMX_VMPTRST",
470 	[VMX_VMREAD]		= "VMX_VMREAD",
471 	[VMX_VMRESUME]		= "VMX_VMRESUME",
472 	[VMX_VMWRITE]		= "VMX_VMWRITE",
473 	[VMX_VMXOFF]		= "VMX_VMXOFF",
474 	[VMX_VMXON]		= "VMX_VMXON",
475 	[VMX_CR]		= "VMX_CR",
476 	[VMX_DR]		= "VMX_DR",
477 	[VMX_IO]		= "VMX_IO",
478 	[VMX_RDMSR]		= "VMX_RDMSR",
479 	[VMX_WRMSR]		= "VMX_WRMSR",
480 	[VMX_FAIL_STATE]	= "VMX_FAIL_STATE",
481 	[VMX_FAIL_MSR]		= "VMX_FAIL_MSR",
482 	[VMX_MWAIT]		= "VMX_MWAIT",
483 	[VMX_MTF]		= "VMX_MTF",
484 	[VMX_MONITOR]		= "VMX_MONITOR",
485 	[VMX_PAUSE]		= "VMX_PAUSE",
486 	[VMX_FAIL_MCHECK]	= "VMX_FAIL_MCHECK",
487 	[VMX_TPR_THRESHOLD]	= "VMX_TPR_THRESHOLD",
488 	[VMX_APIC_ACCESS]	= "VMX_APIC_ACCESS",
489 	[VMX_GDTR_IDTR]		= "VMX_GDTR_IDTR",
490 	[VMX_LDTR_TR]		= "VMX_LDTR_TR",
491 	[VMX_EPT_VIOLATION]	= "VMX_EPT_VIOLATION",
492 	[VMX_EPT_MISCONFIG]	= "VMX_EPT_MISCONFIG",
493 	[VMX_INVEPT]		= "VMX_INVEPT",
494 	[VMX_PREEMPT]		= "VMX_PREEMPT",
495 	[VMX_INVVPID]		= "VMX_INVVPID",
496 	[VMX_WBINVD]		= "VMX_WBINVD",
497 	[VMX_XSETBV]		= "VMX_XSETBV",
498 	[VMX_APIC_WRITE]	= "VMX_APIC_WRITE",
499 	[VMX_RDRAND]		= "VMX_RDRAND",
500 	[VMX_INVPCID]		= "VMX_INVPCID",
501 	[VMX_VMFUNC]		= "VMX_VMFUNC",
502 	[VMX_RDSEED]		= "VMX_RDSEED",
503 	[VMX_PML_FULL]		= "VMX_PML_FULL",
504 	[VMX_XSAVES]		= "VMX_XSAVES",
505 	[VMX_XRSTORS]		= "VMX_XRSTORS",
506 };
507 
508 const char *exit_reason_description(u64 reason)
509 {
510 	if (reason >= ARRAY_SIZE(exit_reason_descriptions))
511 		return "(unknown)";
512 	return exit_reason_descriptions[reason] ? : "(unused)";
513 }
514 
515 void print_vmexit_info()
516 {
517 	u64 guest_rip, guest_rsp;
518 	ulong reason = vmcs_read(EXI_REASON) & 0xff;
519 	ulong exit_qual = vmcs_read(EXI_QUALIFICATION);
520 	guest_rip = vmcs_read(GUEST_RIP);
521 	guest_rsp = vmcs_read(GUEST_RSP);
522 	printf("VMEXIT info:\n");
523 	printf("\tvmexit reason = %ld\n", reason);
524 	printf("\texit qualification = 0x%lx\n", exit_qual);
525 	printf("\tBit 31 of reason = %lx\n", (vmcs_read(EXI_REASON) >> 31) & 1);
526 	printf("\tguest_rip = 0x%lx\n", guest_rip);
527 	printf("\tRAX=0x%lx    RBX=0x%lx    RCX=0x%lx    RDX=0x%lx\n",
528 		regs.rax, regs.rbx, regs.rcx, regs.rdx);
529 	printf("\tRSP=0x%lx    RBP=0x%lx    RSI=0x%lx    RDI=0x%lx\n",
530 		guest_rsp, regs.rbp, regs.rsi, regs.rdi);
531 	printf("\tR8 =0x%lx    R9 =0x%lx    R10=0x%lx    R11=0x%lx\n",
532 		regs.r8, regs.r9, regs.r10, regs.r11);
533 	printf("\tR12=0x%lx    R13=0x%lx    R14=0x%lx    R15=0x%lx\n",
534 		regs.r12, regs.r13, regs.r14, regs.r15);
535 }
536 
537 void
538 print_vmentry_failure_info(struct vmentry_failure *failure) {
539 	if (failure->early) {
540 		printf("Early %s failure: ", failure->instr);
541 		switch (failure->flags & VMX_ENTRY_FLAGS) {
542 		case X86_EFLAGS_CF:
543 			printf("current-VMCS pointer is not valid.\n");
544 			break;
545 		case X86_EFLAGS_ZF:
546 			printf("error number is %ld. See Intel 30.4.\n",
547 			       vmcs_read(VMX_INST_ERROR));
548 			break;
549 		default:
550 			printf("unexpected flags %lx!\n", failure->flags);
551 		}
552 	} else {
553 		u64 reason = vmcs_read(EXI_REASON);
554 		u64 qual = vmcs_read(EXI_QUALIFICATION);
555 
556 		printf("Non-early %s failure (reason=0x%lx, qual=0x%lx): ",
557 			failure->instr, reason, qual);
558 
559 		switch (reason & 0xff) {
560 		case VMX_FAIL_STATE:
561 			printf("invalid guest state\n");
562 			break;
563 		case VMX_FAIL_MSR:
564 			printf("MSR loading\n");
565 			break;
566 		case VMX_FAIL_MCHECK:
567 			printf("machine-check event\n");
568 			break;
569 		default:
570 			printf("unexpected basic exit reason %ld\n",
571 			       reason & 0xff);
572 		}
573 
574 		if (!(reason & VMX_ENTRY_FAILURE))
575 			printf("\tVMX_ENTRY_FAILURE BIT NOT SET!\n");
576 
577 		if (reason & 0x7fff0000)
578 			printf("\tRESERVED BITS SET!\n");
579 	}
580 }
581 
582 /*
583  * VMCLEAR should ensures all VMCS state is flushed to the VMCS
584  * region in memory.
585  */
586 static void test_vmclear_flushing(void)
587 {
588 	struct vmcs *vmcs[3] = {};
589 	int i;
590 
591 	for (i = 0; i < ARRAY_SIZE(vmcs); i++) {
592 		vmcs[i] = alloc_page();
593 		memset(vmcs[i], 0, PAGE_SIZE);
594 	}
595 
596 	vmcs[0]->revision_id = basic.revision;
597 	assert(!vmcs_clear(vmcs[0]));
598 	assert(!make_vmcs_current(vmcs[0]));
599 	set_all_vmcs_fields(0x86);
600 
601 	assert(!vmcs_clear(vmcs[0]));
602 	memcpy(vmcs[1], vmcs[0], basic.size);
603 	assert(!make_vmcs_current(vmcs[1]));
604 	report("test vmclear flush (current VMCS)", check_all_vmcs_fields(0x86));
605 
606 	set_all_vmcs_fields(0x87);
607 	assert(!make_vmcs_current(vmcs[0]));
608 	assert(!vmcs_clear(vmcs[1]));
609 	memcpy(vmcs[2], vmcs[1], basic.size);
610 	assert(!make_vmcs_current(vmcs[2]));
611 	report("test vmclear flush (!current VMCS)", check_all_vmcs_fields(0x87));
612 
613 	for (i = 0; i < ARRAY_SIZE(vmcs); i++) {
614 		assert(!vmcs_clear(vmcs[i]));
615 		free_page(vmcs[i]);
616 	}
617 }
618 
619 static void test_vmclear(void)
620 {
621 	struct vmcs *tmp_root;
622 	int width = cpuid_maxphyaddr();
623 
624 	/*
625 	 * Note- The tests below do not necessarily have a
626 	 * valid VMCS, but that's ok since the invalid vmcs
627 	 * is only used for a specific test and is discarded
628 	 * without touching its contents
629 	 */
630 
631 	/* Unaligned page access */
632 	tmp_root = (struct vmcs *)((intptr_t)vmcs_root + 1);
633 	report("test vmclear with unaligned vmcs",
634 	       vmcs_clear(tmp_root) == 1);
635 
636 	/* gpa bits beyond physical address width are set*/
637 	tmp_root = (struct vmcs *)((intptr_t)vmcs_root |
638 				   ((u64)1 << (width+1)));
639 	report("test vmclear with vmcs address bits set beyond physical address width",
640 	       vmcs_clear(tmp_root) == 1);
641 
642 	/* Pass VMXON region */
643 	tmp_root = (struct vmcs *)vmxon_region;
644 	report("test vmclear with vmxon region",
645 	       vmcs_clear(tmp_root) == 1);
646 
647 	/* Valid VMCS */
648 	report("test vmclear with valid vmcs region", vmcs_clear(vmcs_root) == 0);
649 
650 	test_vmclear_flushing();
651 }
652 
653 static void __attribute__((__used__)) guest_main(void)
654 {
655 	if (current->v2)
656 		v2_guest_main();
657 	else
658 		current->guest_main();
659 }
660 
661 /* guest_entry */
662 asm(
663 	".align	4, 0x90\n\t"
664 	".globl	entry_guest\n\t"
665 	"guest_entry:\n\t"
666 	"	call guest_main\n\t"
667 	"	mov $1, %edi\n\t"
668 	"	call hypercall\n\t"
669 );
670 
671 /* EPT paging structure related functions */
672 /* split_large_ept_entry: Split a 2M/1G large page into 512 smaller PTEs.
673 		@ptep : large page table entry to split
674 		@level : level of ptep (2 or 3)
675  */
676 static void split_large_ept_entry(unsigned long *ptep, int level)
677 {
678 	unsigned long *new_pt;
679 	unsigned long gpa;
680 	unsigned long pte;
681 	unsigned long prototype;
682 	int i;
683 
684 	pte = *ptep;
685 	assert(pte & EPT_PRESENT);
686 	assert(pte & EPT_LARGE_PAGE);
687 	assert(level == 2 || level == 3);
688 
689 	new_pt = alloc_page();
690 	assert(new_pt);
691 	memset(new_pt, 0, PAGE_SIZE);
692 
693 	prototype = pte & ~EPT_ADDR_MASK;
694 	if (level == 2)
695 		prototype &= ~EPT_LARGE_PAGE;
696 
697 	gpa = pte & EPT_ADDR_MASK;
698 	for (i = 0; i < EPT_PGDIR_ENTRIES; i++) {
699 		new_pt[i] = prototype | gpa;
700 		gpa += 1ul << EPT_LEVEL_SHIFT(level - 1);
701 	}
702 
703 	pte &= ~EPT_LARGE_PAGE;
704 	pte &= ~EPT_ADDR_MASK;
705 	pte |= virt_to_phys(new_pt);
706 
707 	*ptep = pte;
708 }
709 
710 /* install_ept_entry : Install a page to a given level in EPT
711 		@pml4 : addr of pml4 table
712 		@pte_level : level of PTE to set
713 		@guest_addr : physical address of guest
714 		@pte : pte value to set
715 		@pt_page : address of page table, NULL for a new page
716  */
717 void install_ept_entry(unsigned long *pml4,
718 		int pte_level,
719 		unsigned long guest_addr,
720 		unsigned long pte,
721 		unsigned long *pt_page)
722 {
723 	int level;
724 	unsigned long *pt = pml4;
725 	unsigned offset;
726 
727 	/* EPT only uses 48 bits of GPA. */
728 	assert(guest_addr < (1ul << 48));
729 
730 	for (level = EPT_PAGE_LEVEL; level > pte_level; --level) {
731 		offset = (guest_addr >> EPT_LEVEL_SHIFT(level))
732 				& EPT_PGDIR_MASK;
733 		if (!(pt[offset] & (EPT_PRESENT))) {
734 			unsigned long *new_pt = pt_page;
735 			if (!new_pt)
736 				new_pt = alloc_page();
737 			else
738 				pt_page = 0;
739 			memset(new_pt, 0, PAGE_SIZE);
740 			pt[offset] = virt_to_phys(new_pt)
741 					| EPT_RA | EPT_WA | EPT_EA;
742 		} else if (pt[offset] & EPT_LARGE_PAGE)
743 			split_large_ept_entry(&pt[offset], level);
744 		pt = phys_to_virt(pt[offset] & EPT_ADDR_MASK);
745 	}
746 	offset = (guest_addr >> EPT_LEVEL_SHIFT(level)) & EPT_PGDIR_MASK;
747 	pt[offset] = pte;
748 }
749 
750 /* Map a page, @perm is the permission of the page */
751 void install_ept(unsigned long *pml4,
752 		unsigned long phys,
753 		unsigned long guest_addr,
754 		u64 perm)
755 {
756 	install_ept_entry(pml4, 1, guest_addr, (phys & PAGE_MASK) | perm, 0);
757 }
758 
759 /* Map a 1G-size page */
760 void install_1g_ept(unsigned long *pml4,
761 		unsigned long phys,
762 		unsigned long guest_addr,
763 		u64 perm)
764 {
765 	install_ept_entry(pml4, 3, guest_addr,
766 			(phys & PAGE_MASK) | perm | EPT_LARGE_PAGE, 0);
767 }
768 
769 /* Map a 2M-size page */
770 void install_2m_ept(unsigned long *pml4,
771 		unsigned long phys,
772 		unsigned long guest_addr,
773 		u64 perm)
774 {
775 	install_ept_entry(pml4, 2, guest_addr,
776 			(phys & PAGE_MASK) | perm | EPT_LARGE_PAGE, 0);
777 }
778 
779 /* setup_ept_range : Setup a range of 1:1 mapped page to EPT paging structure.
780 		@start : start address of guest page
781 		@len : length of address to be mapped
782 		@map_1g : whether 1G page map is used
783 		@map_2m : whether 2M page map is used
784 		@perm : permission for every page
785  */
786 void setup_ept_range(unsigned long *pml4, unsigned long start,
787 		     unsigned long len, int map_1g, int map_2m, u64 perm)
788 {
789 	u64 phys = start;
790 	u64 max = (u64)len + (u64)start;
791 
792 	if (map_1g) {
793 		while (phys + PAGE_SIZE_1G <= max) {
794 			install_1g_ept(pml4, phys, phys, perm);
795 			phys += PAGE_SIZE_1G;
796 		}
797 	}
798 	if (map_2m) {
799 		while (phys + PAGE_SIZE_2M <= max) {
800 			install_2m_ept(pml4, phys, phys, perm);
801 			phys += PAGE_SIZE_2M;
802 		}
803 	}
804 	while (phys + PAGE_SIZE <= max) {
805 		install_ept(pml4, phys, phys, perm);
806 		phys += PAGE_SIZE;
807 	}
808 }
809 
810 /* get_ept_pte : Get the PTE of a given level in EPT,
811     @level == 1 means get the latest level*/
812 unsigned long get_ept_pte(unsigned long *pml4,
813 		unsigned long guest_addr, int level)
814 {
815 	int l;
816 	unsigned long *pt = pml4, pte;
817 	unsigned offset;
818 
819 	assert(level >= 1 && level <= 4);
820 
821 	for (l = EPT_PAGE_LEVEL; ; --l) {
822 		offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK;
823 		pte = pt[offset];
824 		if (!(pte & (EPT_PRESENT)))
825 			return -1;
826 		if (l == level)
827 			break;
828 		if (l < 4 && (pte & EPT_LARGE_PAGE))
829 			return -1;
830 		pt = (unsigned long *)(pte & EPT_ADDR_MASK);
831 	}
832 	offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK;
833 	pte = pt[offset];
834 	return pte;
835 }
836 
837 static void clear_ept_ad_pte(unsigned long *pml4, unsigned long guest_addr)
838 {
839 	int l;
840 	unsigned long *pt = pml4;
841 	u64 pte;
842 	unsigned offset;
843 
844 	for (l = EPT_PAGE_LEVEL; ; --l) {
845 		offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK;
846 		pt[offset] &= ~(EPT_ACCESS_FLAG|EPT_DIRTY_FLAG);
847 		pte = pt[offset];
848 		if (l == 1 || (l < 4 && (pte & EPT_LARGE_PAGE)))
849 			break;
850 		pt = (unsigned long *)(pte & EPT_ADDR_MASK);
851 	}
852 }
853 
854 /* clear_ept_ad : Clear EPT A/D bits for the page table walk and the
855    final GPA of a guest address.  */
856 void clear_ept_ad(unsigned long *pml4, u64 guest_cr3,
857 		  unsigned long guest_addr)
858 {
859 	int l;
860 	unsigned long *pt = (unsigned long *)guest_cr3, gpa;
861 	u64 pte, offset_in_page;
862 	unsigned offset;
863 
864 	for (l = EPT_PAGE_LEVEL; ; --l) {
865 		offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK;
866 
867 		clear_ept_ad_pte(pml4, (u64) &pt[offset]);
868 		pte = pt[offset];
869 		if (l == 1 || (l < 4 && (pte & PT_PAGE_SIZE_MASK)))
870 			break;
871 		if (!(pte & PT_PRESENT_MASK))
872 			return;
873 		pt = (unsigned long *)(pte & PT_ADDR_MASK);
874 	}
875 
876 	offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK;
877 	offset_in_page = guest_addr & ((1 << EPT_LEVEL_SHIFT(l)) - 1);
878 	gpa = (pt[offset] & PT_ADDR_MASK) | (guest_addr & offset_in_page);
879 	clear_ept_ad_pte(pml4, gpa);
880 }
881 
882 /* check_ept_ad : Check the content of EPT A/D bits for the page table
883    walk and the final GPA of a guest address.  */
884 void check_ept_ad(unsigned long *pml4, u64 guest_cr3,
885 		  unsigned long guest_addr, int expected_gpa_ad,
886 		  int expected_pt_ad)
887 {
888 	int l;
889 	unsigned long *pt = (unsigned long *)guest_cr3, gpa;
890 	u64 ept_pte, pte, offset_in_page;
891 	unsigned offset;
892 	bool bad_pt_ad = false;
893 
894 	for (l = EPT_PAGE_LEVEL; ; --l) {
895 		offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK;
896 
897 		ept_pte = get_ept_pte(pml4, (u64) &pt[offset], 1);
898 		if (ept_pte == 0)
899 			return;
900 
901 		if (!bad_pt_ad) {
902 			bad_pt_ad |= (ept_pte & (EPT_ACCESS_FLAG|EPT_DIRTY_FLAG)) != expected_pt_ad;
903 			if (bad_pt_ad)
904 				report("EPT - guest level %d page table A=%d/D=%d",
905 				       false, l,
906 				       !!(expected_pt_ad & EPT_ACCESS_FLAG),
907 				       !!(expected_pt_ad & EPT_DIRTY_FLAG));
908 		}
909 
910 		pte = pt[offset];
911 		if (l == 1 || (l < 4 && (pte & PT_PAGE_SIZE_MASK)))
912 			break;
913 		if (!(pte & PT_PRESENT_MASK))
914 			return;
915 		pt = (unsigned long *)(pte & PT_ADDR_MASK);
916 	}
917 
918 	if (!bad_pt_ad)
919 		report("EPT - guest page table structures A=%d/D=%d",
920 		       true,
921 		       !!(expected_pt_ad & EPT_ACCESS_FLAG),
922 		       !!(expected_pt_ad & EPT_DIRTY_FLAG));
923 
924 	offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK;
925 	offset_in_page = guest_addr & ((1 << EPT_LEVEL_SHIFT(l)) - 1);
926 	gpa = (pt[offset] & PT_ADDR_MASK) | (guest_addr & offset_in_page);
927 
928 	ept_pte = get_ept_pte(pml4, gpa, 1);
929 	report("EPT - guest physical address A=%d/D=%d",
930 	       (ept_pte & (EPT_ACCESS_FLAG|EPT_DIRTY_FLAG)) == expected_gpa_ad,
931 	       !!(expected_gpa_ad & EPT_ACCESS_FLAG),
932 	       !!(expected_gpa_ad & EPT_DIRTY_FLAG));
933 }
934 
935 
936 void ept_sync(int type, u64 eptp)
937 {
938 	switch (type) {
939 	case INVEPT_SINGLE:
940 		if (ept_vpid.val & EPT_CAP_INVEPT_SINGLE) {
941 			invept(INVEPT_SINGLE, eptp);
942 			break;
943 		}
944 		/* else fall through */
945 	case INVEPT_GLOBAL:
946 		if (ept_vpid.val & EPT_CAP_INVEPT_ALL) {
947 			invept(INVEPT_GLOBAL, eptp);
948 			break;
949 		}
950 		/* else fall through */
951 	default:
952 		printf("WARNING: invept is not supported!\n");
953 	}
954 }
955 
956 void set_ept_pte(unsigned long *pml4, unsigned long guest_addr,
957 		 int level, u64 pte_val)
958 {
959 	int l;
960 	unsigned long *pt = pml4;
961 	unsigned offset;
962 
963 	assert(level >= 1 && level <= 4);
964 
965 	for (l = EPT_PAGE_LEVEL; ; --l) {
966 		offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK;
967 		if (l == level)
968 			break;
969 		assert(pt[offset] & EPT_PRESENT);
970 		pt = (unsigned long *)(pt[offset] & EPT_ADDR_MASK);
971 	}
972 	offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK;
973 	pt[offset] = pte_val;
974 }
975 
976 bool ept_2m_supported(void)
977 {
978 	return ept_vpid.val & EPT_CAP_2M_PAGE;
979 }
980 
981 bool ept_1g_supported(void)
982 {
983 	return ept_vpid.val & EPT_CAP_1G_PAGE;
984 }
985 
986 bool ept_huge_pages_supported(int level)
987 {
988 	if (level == 2)
989 		return ept_2m_supported();
990 	else if (level == 3)
991 		return ept_1g_supported();
992 	else
993 		return false;
994 }
995 
996 bool ept_execute_only_supported(void)
997 {
998 	return ept_vpid.val & EPT_CAP_WT;
999 }
1000 
1001 bool ept_ad_bits_supported(void)
1002 {
1003 	return ept_vpid.val & EPT_CAP_AD_FLAG;
1004 }
1005 
1006 void vpid_sync(int type, u16 vpid)
1007 {
1008 	switch(type) {
1009 	case INVVPID_SINGLE:
1010 		if (ept_vpid.val & VPID_CAP_INVVPID_SINGLE) {
1011 			invvpid(INVVPID_SINGLE, vpid, 0);
1012 			break;
1013 		}
1014 	case INVVPID_ALL:
1015 		if (ept_vpid.val & VPID_CAP_INVVPID_ALL) {
1016 			invvpid(INVVPID_ALL, vpid, 0);
1017 			break;
1018 		}
1019 	default:
1020 		printf("WARNING: invvpid is not supported\n");
1021 	}
1022 }
1023 
1024 static void init_vmcs_ctrl(void)
1025 {
1026 	/* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */
1027 	/* 26.2.1.1 */
1028 	vmcs_write(PIN_CONTROLS, ctrl_pin);
1029 	/* Disable VMEXIT of IO instruction */
1030 	vmcs_write(CPU_EXEC_CTRL0, ctrl_cpu[0]);
1031 	if (ctrl_cpu_rev[0].set & CPU_SECONDARY) {
1032 		ctrl_cpu[1] = (ctrl_cpu[1] | ctrl_cpu_rev[1].set) &
1033 			ctrl_cpu_rev[1].clr;
1034 		vmcs_write(CPU_EXEC_CTRL1, ctrl_cpu[1]);
1035 	}
1036 	vmcs_write(CR3_TARGET_COUNT, 0);
1037 	vmcs_write(VPID, ++vpid_cnt);
1038 }
1039 
1040 static void init_vmcs_host(void)
1041 {
1042 	/* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */
1043 	/* 26.2.1.2 */
1044 	vmcs_write(HOST_EFER, rdmsr(MSR_EFER));
1045 
1046 	/* 26.2.1.3 */
1047 	vmcs_write(ENT_CONTROLS, ctrl_enter);
1048 	vmcs_write(EXI_CONTROLS, ctrl_exit);
1049 
1050 	/* 26.2.2 */
1051 	vmcs_write(HOST_CR0, read_cr0());
1052 	vmcs_write(HOST_CR3, read_cr3());
1053 	vmcs_write(HOST_CR4, read_cr4());
1054 	vmcs_write(HOST_SYSENTER_EIP, (u64)(&entry_sysenter));
1055 	vmcs_write(HOST_SYSENTER_CS,  KERNEL_CS);
1056 
1057 	/* 26.2.3 */
1058 	vmcs_write(HOST_SEL_CS, KERNEL_CS);
1059 	vmcs_write(HOST_SEL_SS, KERNEL_DS);
1060 	vmcs_write(HOST_SEL_DS, KERNEL_DS);
1061 	vmcs_write(HOST_SEL_ES, KERNEL_DS);
1062 	vmcs_write(HOST_SEL_FS, KERNEL_DS);
1063 	vmcs_write(HOST_SEL_GS, KERNEL_DS);
1064 	vmcs_write(HOST_SEL_TR, TSS_MAIN);
1065 	vmcs_write(HOST_BASE_TR, tss_descr.base);
1066 	vmcs_write(HOST_BASE_GDTR, gdt64_desc.base);
1067 	vmcs_write(HOST_BASE_IDTR, idt_descr.base);
1068 	vmcs_write(HOST_BASE_FS, 0);
1069 	vmcs_write(HOST_BASE_GS, 0);
1070 
1071 	/* Set other vmcs area */
1072 	vmcs_write(PF_ERROR_MASK, 0);
1073 	vmcs_write(PF_ERROR_MATCH, 0);
1074 	vmcs_write(VMCS_LINK_PTR, ~0ul);
1075 	vmcs_write(VMCS_LINK_PTR_HI, ~0ul);
1076 	vmcs_write(HOST_RIP, (u64)(&vmx_return));
1077 }
1078 
1079 static void init_vmcs_guest(void)
1080 {
1081 	/* 26.3 CHECKING AND LOADING GUEST STATE */
1082 	ulong guest_cr0, guest_cr4, guest_cr3;
1083 	/* 26.3.1.1 */
1084 	guest_cr0 = read_cr0();
1085 	guest_cr4 = read_cr4();
1086 	guest_cr3 = read_cr3();
1087 	if (ctrl_enter & ENT_GUEST_64) {
1088 		guest_cr0 |= X86_CR0_PG;
1089 		guest_cr4 |= X86_CR4_PAE;
1090 	}
1091 	if ((ctrl_enter & ENT_GUEST_64) == 0)
1092 		guest_cr4 &= (~X86_CR4_PCIDE);
1093 	if (guest_cr0 & X86_CR0_PG)
1094 		guest_cr0 |= X86_CR0_PE;
1095 	vmcs_write(GUEST_CR0, guest_cr0);
1096 	vmcs_write(GUEST_CR3, guest_cr3);
1097 	vmcs_write(GUEST_CR4, guest_cr4);
1098 	vmcs_write(GUEST_SYSENTER_CS,  KERNEL_CS);
1099 	vmcs_write(GUEST_SYSENTER_ESP,
1100 		(u64)(guest_syscall_stack + PAGE_SIZE - 1));
1101 	vmcs_write(GUEST_SYSENTER_EIP, (u64)(&entry_sysenter));
1102 	vmcs_write(GUEST_DR7, 0);
1103 	vmcs_write(GUEST_EFER, rdmsr(MSR_EFER));
1104 
1105 	/* 26.3.1.2 */
1106 	vmcs_write(GUEST_SEL_CS, KERNEL_CS);
1107 	vmcs_write(GUEST_SEL_SS, KERNEL_DS);
1108 	vmcs_write(GUEST_SEL_DS, KERNEL_DS);
1109 	vmcs_write(GUEST_SEL_ES, KERNEL_DS);
1110 	vmcs_write(GUEST_SEL_FS, KERNEL_DS);
1111 	vmcs_write(GUEST_SEL_GS, KERNEL_DS);
1112 	vmcs_write(GUEST_SEL_TR, TSS_MAIN);
1113 	vmcs_write(GUEST_SEL_LDTR, 0);
1114 
1115 	vmcs_write(GUEST_BASE_CS, 0);
1116 	vmcs_write(GUEST_BASE_ES, 0);
1117 	vmcs_write(GUEST_BASE_SS, 0);
1118 	vmcs_write(GUEST_BASE_DS, 0);
1119 	vmcs_write(GUEST_BASE_FS, 0);
1120 	vmcs_write(GUEST_BASE_GS, 0);
1121 	vmcs_write(GUEST_BASE_TR, tss_descr.base);
1122 	vmcs_write(GUEST_BASE_LDTR, 0);
1123 
1124 	vmcs_write(GUEST_LIMIT_CS, 0xFFFFFFFF);
1125 	vmcs_write(GUEST_LIMIT_DS, 0xFFFFFFFF);
1126 	vmcs_write(GUEST_LIMIT_ES, 0xFFFFFFFF);
1127 	vmcs_write(GUEST_LIMIT_SS, 0xFFFFFFFF);
1128 	vmcs_write(GUEST_LIMIT_FS, 0xFFFFFFFF);
1129 	vmcs_write(GUEST_LIMIT_GS, 0xFFFFFFFF);
1130 	vmcs_write(GUEST_LIMIT_LDTR, 0xffff);
1131 	vmcs_write(GUEST_LIMIT_TR, tss_descr.limit);
1132 
1133 	vmcs_write(GUEST_AR_CS, 0xa09b);
1134 	vmcs_write(GUEST_AR_DS, 0xc093);
1135 	vmcs_write(GUEST_AR_ES, 0xc093);
1136 	vmcs_write(GUEST_AR_FS, 0xc093);
1137 	vmcs_write(GUEST_AR_GS, 0xc093);
1138 	vmcs_write(GUEST_AR_SS, 0xc093);
1139 	vmcs_write(GUEST_AR_LDTR, 0x82);
1140 	vmcs_write(GUEST_AR_TR, 0x8b);
1141 
1142 	/* 26.3.1.3 */
1143 	vmcs_write(GUEST_BASE_GDTR, gdt64_desc.base);
1144 	vmcs_write(GUEST_BASE_IDTR, idt_descr.base);
1145 	vmcs_write(GUEST_LIMIT_GDTR, gdt64_desc.limit);
1146 	vmcs_write(GUEST_LIMIT_IDTR, idt_descr.limit);
1147 
1148 	/* 26.3.1.4 */
1149 	vmcs_write(GUEST_RIP, (u64)(&guest_entry));
1150 	vmcs_write(GUEST_RSP, (u64)(guest_stack + PAGE_SIZE - 1));
1151 	vmcs_write(GUEST_RFLAGS, 0x2);
1152 
1153 	/* 26.3.1.5 */
1154 	vmcs_write(GUEST_ACTV_STATE, ACTV_ACTIVE);
1155 	vmcs_write(GUEST_INTR_STATE, 0);
1156 }
1157 
1158 static int init_vmcs(struct vmcs **vmcs)
1159 {
1160 	*vmcs = alloc_page();
1161 	memset(*vmcs, 0, PAGE_SIZE);
1162 	(*vmcs)->revision_id = basic.revision;
1163 	/* vmclear first to init vmcs */
1164 	if (vmcs_clear(*vmcs)) {
1165 		printf("%s : vmcs_clear error\n", __func__);
1166 		return 1;
1167 	}
1168 
1169 	if (make_vmcs_current(*vmcs)) {
1170 		printf("%s : make_vmcs_current error\n", __func__);
1171 		return 1;
1172 	}
1173 
1174 	/* All settings to pin/exit/enter/cpu
1175 	   control fields should be placed here */
1176 	ctrl_pin |= PIN_EXTINT | PIN_NMI | PIN_VIRT_NMI;
1177 	ctrl_exit = EXI_LOAD_EFER | EXI_HOST_64;
1178 	ctrl_enter = (ENT_LOAD_EFER | ENT_GUEST_64);
1179 	/* DIsable IO instruction VMEXIT now */
1180 	ctrl_cpu[0] &= (~(CPU_IO | CPU_IO_BITMAP));
1181 	ctrl_cpu[1] = 0;
1182 
1183 	ctrl_pin = (ctrl_pin | ctrl_pin_rev.set) & ctrl_pin_rev.clr;
1184 	ctrl_enter = (ctrl_enter | ctrl_enter_rev.set) & ctrl_enter_rev.clr;
1185 	ctrl_exit = (ctrl_exit | ctrl_exit_rev.set) & ctrl_exit_rev.clr;
1186 	ctrl_cpu[0] = (ctrl_cpu[0] | ctrl_cpu_rev[0].set) & ctrl_cpu_rev[0].clr;
1187 
1188 	init_vmcs_ctrl();
1189 	init_vmcs_host();
1190 	init_vmcs_guest();
1191 	return 0;
1192 }
1193 
1194 static void init_vmx(void)
1195 {
1196 	ulong fix_cr0_set, fix_cr0_clr;
1197 	ulong fix_cr4_set, fix_cr4_clr;
1198 
1199 	vmxon_region = alloc_page();
1200 	memset(vmxon_region, 0, PAGE_SIZE);
1201 
1202 	fix_cr0_set =  rdmsr(MSR_IA32_VMX_CR0_FIXED0);
1203 	fix_cr0_clr =  rdmsr(MSR_IA32_VMX_CR0_FIXED1);
1204 	fix_cr4_set =  rdmsr(MSR_IA32_VMX_CR4_FIXED0);
1205 	fix_cr4_clr = rdmsr(MSR_IA32_VMX_CR4_FIXED1);
1206 	basic.val = rdmsr(MSR_IA32_VMX_BASIC);
1207 	ctrl_pin_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PIN
1208 			: MSR_IA32_VMX_PINBASED_CTLS);
1209 	ctrl_exit_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_EXIT
1210 			: MSR_IA32_VMX_EXIT_CTLS);
1211 	ctrl_enter_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_ENTRY
1212 			: MSR_IA32_VMX_ENTRY_CTLS);
1213 	ctrl_cpu_rev[0].val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PROC
1214 			: MSR_IA32_VMX_PROCBASED_CTLS);
1215 	if ((ctrl_cpu_rev[0].clr & CPU_SECONDARY) != 0)
1216 		ctrl_cpu_rev[1].val = rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2);
1217 	else
1218 		ctrl_cpu_rev[1].val = 0;
1219 	if ((ctrl_cpu_rev[1].clr & (CPU_EPT | CPU_VPID)) != 0)
1220 		ept_vpid.val = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP);
1221 	else
1222 		ept_vpid.val = 0;
1223 
1224 	write_cr0((read_cr0() & fix_cr0_clr) | fix_cr0_set);
1225 	write_cr4((read_cr4() & fix_cr4_clr) | fix_cr4_set | X86_CR4_VMXE);
1226 
1227 	*vmxon_region = basic.revision;
1228 
1229 	guest_stack = alloc_page();
1230 	memset(guest_stack, 0, PAGE_SIZE);
1231 	guest_syscall_stack = alloc_page();
1232 	memset(guest_syscall_stack, 0, PAGE_SIZE);
1233 }
1234 
1235 static void do_vmxon_off(void *data)
1236 {
1237 	vmx_on();
1238 	vmx_off();
1239 }
1240 
1241 static void do_write_feature_control(void *data)
1242 {
1243 	wrmsr(MSR_IA32_FEATURE_CONTROL, 0);
1244 }
1245 
1246 static int test_vmx_feature_control(void)
1247 {
1248 	u64 ia32_feature_control;
1249 	bool vmx_enabled;
1250 
1251 	ia32_feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL);
1252 	vmx_enabled = ((ia32_feature_control & 0x5) == 0x5);
1253 	if ((ia32_feature_control & 0x5) == 0x5) {
1254 		printf("VMX enabled and locked by BIOS\n");
1255 		return 0;
1256 	} else if (ia32_feature_control & 0x1) {
1257 		printf("ERROR: VMX locked out by BIOS!?\n");
1258 		return 1;
1259 	}
1260 
1261 	wrmsr(MSR_IA32_FEATURE_CONTROL, 0);
1262 	report("test vmxon with FEATURE_CONTROL cleared",
1263 	       test_for_exception(GP_VECTOR, &do_vmxon_off, NULL));
1264 
1265 	wrmsr(MSR_IA32_FEATURE_CONTROL, 0x4);
1266 	report("test vmxon without FEATURE_CONTROL lock",
1267 	       test_for_exception(GP_VECTOR, &do_vmxon_off, NULL));
1268 
1269 	wrmsr(MSR_IA32_FEATURE_CONTROL, 0x5);
1270 	vmx_enabled = ((rdmsr(MSR_IA32_FEATURE_CONTROL) & 0x5) == 0x5);
1271 	report("test enable VMX in FEATURE_CONTROL", vmx_enabled);
1272 
1273 	report("test FEATURE_CONTROL lock bit",
1274 	       test_for_exception(GP_VECTOR, &do_write_feature_control, NULL));
1275 
1276 	return !vmx_enabled;
1277 }
1278 
1279 static int test_vmxon(void)
1280 {
1281 	int ret, ret1;
1282 	u64 *tmp_region = vmxon_region;
1283 	int width = cpuid_maxphyaddr();
1284 
1285 	/* Unaligned page access */
1286 	vmxon_region = (u64 *)((intptr_t)vmxon_region + 1);
1287 	ret1 = vmx_on();
1288 	report("test vmxon with unaligned vmxon region", ret1);
1289 	if (!ret1) {
1290 		ret = 1;
1291 		goto out;
1292 	}
1293 
1294 	/* gpa bits beyond physical address width are set*/
1295 	vmxon_region = (u64 *)((intptr_t)tmp_region | ((u64)1 << (width+1)));
1296 	ret1 = vmx_on();
1297 	report("test vmxon with bits set beyond physical address width", ret1);
1298 	if (!ret1) {
1299 		ret = 1;
1300 		goto out;
1301 	}
1302 
1303 	/* invalid revision indentifier */
1304 	vmxon_region = tmp_region;
1305 	*vmxon_region = 0xba9da9;
1306 	ret1 = vmx_on();
1307 	report("test vmxon with invalid revision identifier", ret1);
1308 	if (!ret1) {
1309 		ret = 1;
1310 		goto out;
1311 	}
1312 
1313 	/* and finally a valid region */
1314 	*vmxon_region = basic.revision;
1315 	ret = vmx_on();
1316 	report("test vmxon with valid vmxon region", !ret);
1317 
1318 out:
1319 	return ret;
1320 }
1321 
1322 static void test_vmptrld(void)
1323 {
1324 	struct vmcs *vmcs, *tmp_root;
1325 	int width = cpuid_maxphyaddr();
1326 
1327 	vmcs = alloc_page();
1328 	vmcs->revision_id = basic.revision;
1329 
1330 	/* Unaligned page access */
1331 	tmp_root = (struct vmcs *)((intptr_t)vmcs + 1);
1332 	report("test vmptrld with unaligned vmcs",
1333 	       make_vmcs_current(tmp_root) == 1);
1334 
1335 	/* gpa bits beyond physical address width are set*/
1336 	tmp_root = (struct vmcs *)((intptr_t)vmcs |
1337 				   ((u64)1 << (width+1)));
1338 	report("test vmptrld with vmcs address bits set beyond physical address width",
1339 	       make_vmcs_current(tmp_root) == 1);
1340 
1341 	/* Pass VMXON region */
1342 	make_vmcs_current(vmcs);
1343 	tmp_root = (struct vmcs *)vmxon_region;
1344 	report("test vmptrld with vmxon region",
1345 	       make_vmcs_current(tmp_root) == 1);
1346 	report("test vmptrld with vmxon region vm-instruction error",
1347 	       vmcs_read(VMX_INST_ERROR) == VMXERR_VMPTRLD_VMXON_POINTER);
1348 
1349 	report("test vmptrld with valid vmcs region", make_vmcs_current(vmcs) == 0);
1350 }
1351 
1352 static void test_vmptrst(void)
1353 {
1354 	int ret;
1355 	struct vmcs *vmcs1, *vmcs2;
1356 
1357 	vmcs1 = alloc_page();
1358 	memset(vmcs1, 0, PAGE_SIZE);
1359 	init_vmcs(&vmcs1);
1360 	ret = vmcs_save(&vmcs2);
1361 	report("test vmptrst", (!ret) && (vmcs1 == vmcs2));
1362 }
1363 
1364 struct vmx_ctl_msr {
1365 	const char *name;
1366 	u32 index, true_index;
1367 	u32 default1;
1368 } vmx_ctl_msr[] = {
1369 	{ "MSR_IA32_VMX_PINBASED_CTLS", MSR_IA32_VMX_PINBASED_CTLS,
1370 	  MSR_IA32_VMX_TRUE_PIN, 0x16 },
1371 	{ "MSR_IA32_VMX_PROCBASED_CTLS", MSR_IA32_VMX_PROCBASED_CTLS,
1372 	  MSR_IA32_VMX_TRUE_PROC, 0x401e172 },
1373 	{ "MSR_IA32_VMX_PROCBASED_CTLS2", MSR_IA32_VMX_PROCBASED_CTLS2,
1374 	  MSR_IA32_VMX_PROCBASED_CTLS2, 0 },
1375 	{ "MSR_IA32_VMX_EXIT_CTLS", MSR_IA32_VMX_EXIT_CTLS,
1376 	  MSR_IA32_VMX_TRUE_EXIT, 0x36dff },
1377 	{ "MSR_IA32_VMX_ENTRY_CTLS", MSR_IA32_VMX_ENTRY_CTLS,
1378 	  MSR_IA32_VMX_TRUE_ENTRY, 0x11ff },
1379 };
1380 
1381 static void test_vmx_caps(void)
1382 {
1383 	u64 val, default1, fixed0, fixed1;
1384 	union vmx_ctrl_msr ctrl, true_ctrl;
1385 	unsigned int n;
1386 	bool ok;
1387 
1388 	printf("\nTest suite: VMX capability reporting\n");
1389 
1390 	report("MSR_IA32_VMX_BASIC",
1391 	       (basic.revision & (1ul << 31)) == 0 &&
1392 	       basic.size > 0 && basic.size <= 4096 &&
1393 	       (basic.type == 0 || basic.type == 6) &&
1394 	       basic.reserved1 == 0 && basic.reserved2 == 0);
1395 
1396 	val = rdmsr(MSR_IA32_VMX_MISC);
1397 	report("MSR_IA32_VMX_MISC",
1398 	       (!(ctrl_cpu_rev[1].clr & CPU_URG) || val & (1ul << 5)) &&
1399 	       ((val >> 16) & 0x1ff) <= 256 &&
1400 	       (val & 0xc0007e00) == 0);
1401 
1402 	for (n = 0; n < ARRAY_SIZE(vmx_ctl_msr); n++) {
1403 		ctrl.val = rdmsr(vmx_ctl_msr[n].index);
1404 		default1 = vmx_ctl_msr[n].default1;
1405 		ok = (ctrl.set & default1) == default1;
1406 		ok = ok && (ctrl.set & ~ctrl.clr) == 0;
1407 		if (ok && basic.ctrl) {
1408 			true_ctrl.val = rdmsr(vmx_ctl_msr[n].true_index);
1409 			ok = ctrl.clr == true_ctrl.clr;
1410 			ok = ok && ctrl.set == (true_ctrl.set | default1);
1411 		}
1412 		report(vmx_ctl_msr[n].name, ok);
1413 	}
1414 
1415 	fixed0 = rdmsr(MSR_IA32_VMX_CR0_FIXED0);
1416 	fixed1 = rdmsr(MSR_IA32_VMX_CR0_FIXED1);
1417 	report("MSR_IA32_VMX_IA32_VMX_CR0_FIXED0/1",
1418 	       ((fixed0 ^ fixed1) & ~fixed1) == 0);
1419 
1420 	fixed0 = rdmsr(MSR_IA32_VMX_CR4_FIXED0);
1421 	fixed1 = rdmsr(MSR_IA32_VMX_CR4_FIXED1);
1422 	report("MSR_IA32_VMX_IA32_VMX_CR4_FIXED0/1",
1423 	       ((fixed0 ^ fixed1) & ~fixed1) == 0);
1424 
1425 	val = rdmsr(MSR_IA32_VMX_VMCS_ENUM);
1426 	report("MSR_IA32_VMX_VMCS_ENUM",
1427 	       (val & 0x3e) >= 0x2a &&
1428 	       (val & 0xfffffffffffffc01Ull) == 0);
1429 
1430 	val = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP);
1431 	report("MSR_IA32_VMX_EPT_VPID_CAP",
1432 	       (val & 0xfffff07ef98cbebeUll) == 0);
1433 }
1434 
1435 /* This function can only be called in guest */
1436 static void __attribute__((__used__)) hypercall(u32 hypercall_no)
1437 {
1438 	u64 val = 0;
1439 	val = (hypercall_no & HYPERCALL_MASK) | HYPERCALL_BIT;
1440 	hypercall_field = val;
1441 	asm volatile("vmcall\n\t");
1442 }
1443 
1444 static bool is_hypercall()
1445 {
1446 	ulong reason, hyper_bit;
1447 
1448 	reason = vmcs_read(EXI_REASON) & 0xff;
1449 	hyper_bit = hypercall_field & HYPERCALL_BIT;
1450 	if (reason == VMX_VMCALL && hyper_bit)
1451 		return true;
1452 	return false;
1453 }
1454 
1455 static int handle_hypercall()
1456 {
1457 	ulong hypercall_no;
1458 
1459 	hypercall_no = hypercall_field & HYPERCALL_MASK;
1460 	hypercall_field = 0;
1461 	switch (hypercall_no) {
1462 	case HYPERCALL_VMEXIT:
1463 		return VMX_TEST_VMEXIT;
1464 	case HYPERCALL_VMABORT:
1465 		return VMX_TEST_VMABORT;
1466 	case HYPERCALL_VMSKIP:
1467 		return VMX_TEST_VMSKIP;
1468 	default:
1469 		printf("ERROR : Invalid hypercall number : %ld\n", hypercall_no);
1470 	}
1471 	return VMX_TEST_EXIT;
1472 }
1473 
1474 static void continue_abort(void)
1475 {
1476 	assert(!in_guest);
1477 	printf("Host was here when guest aborted:\n");
1478 	dump_stack();
1479 	longjmp(abort_target, 1);
1480 	abort();
1481 }
1482 
1483 void __abort_test(void)
1484 {
1485 	if (in_guest)
1486 		hypercall(HYPERCALL_VMABORT);
1487 	else
1488 		longjmp(abort_target, 1);
1489 	abort();
1490 }
1491 
1492 static void continue_skip(void)
1493 {
1494 	assert(!in_guest);
1495 	longjmp(abort_target, 1);
1496 	abort();
1497 }
1498 
1499 void test_skip(const char *msg)
1500 {
1501 	printf("%s skipping test: %s\n", in_guest ? "Guest" : "Host", msg);
1502 	if (in_guest)
1503 		hypercall(HYPERCALL_VMABORT);
1504 	else
1505 		longjmp(abort_target, 1);
1506 	abort();
1507 }
1508 
1509 static int exit_handler()
1510 {
1511 	int ret;
1512 
1513 	current->exits++;
1514 	regs.rflags = vmcs_read(GUEST_RFLAGS);
1515 	if (is_hypercall())
1516 		ret = handle_hypercall();
1517 	else
1518 		ret = current->exit_handler();
1519 	vmcs_write(GUEST_RFLAGS, regs.rflags);
1520 
1521 	return ret;
1522 }
1523 
1524 /*
1525  * Called if vmlaunch or vmresume fails.
1526  *	@early    - failure due to "VMX controls and host-state area" (26.2)
1527  *	@vmlaunch - was this a vmlaunch or vmresume
1528  *	@rflags   - host rflags
1529  */
1530 static int
1531 entry_failure_handler(struct vmentry_failure *failure)
1532 {
1533 	if (current->entry_failure_handler)
1534 		return current->entry_failure_handler(failure);
1535 	else
1536 		return VMX_TEST_EXIT;
1537 }
1538 
1539 /*
1540  * Tries to enter the guest. Returns true iff entry succeeded. Otherwise,
1541  * populates @failure.
1542  */
1543 static bool vmx_enter_guest(struct vmentry_failure *failure)
1544 {
1545 	failure->early = 0;
1546 
1547 	in_guest = 1;
1548 	asm volatile (
1549 		"mov %[HOST_RSP], %%rdi\n\t"
1550 		"vmwrite %%rsp, %%rdi\n\t"
1551 		LOAD_GPR_C
1552 		"cmpb $0, %[launched]\n\t"
1553 		"jne 1f\n\t"
1554 		"vmlaunch\n\t"
1555 		"jmp 2f\n\t"
1556 		"1: "
1557 		"vmresume\n\t"
1558 		"2: "
1559 		SAVE_GPR_C
1560 		"pushf\n\t"
1561 		"pop %%rdi\n\t"
1562 		"mov %%rdi, %[failure_flags]\n\t"
1563 		"movl $1, %[failure_flags]\n\t"
1564 		"jmp 3f\n\t"
1565 		"vmx_return:\n\t"
1566 		SAVE_GPR_C
1567 		"3: \n\t"
1568 		: [failure_early]"+m"(failure->early),
1569 		  [failure_flags]"=m"(failure->flags)
1570 		: [launched]"m"(launched), [HOST_RSP]"i"(HOST_RSP)
1571 		: "rdi", "memory", "cc"
1572 	);
1573 	in_guest = 0;
1574 
1575 	failure->vmlaunch = !launched;
1576 	failure->instr = launched ? "vmresume" : "vmlaunch";
1577 
1578 	return !failure->early && !(vmcs_read(EXI_REASON) & VMX_ENTRY_FAILURE);
1579 }
1580 
1581 static int vmx_run()
1582 {
1583 	while (1) {
1584 		u32 ret;
1585 		bool entered;
1586 		struct vmentry_failure failure;
1587 
1588 		entered = vmx_enter_guest(&failure);
1589 
1590 		if (entered) {
1591 			/*
1592 			 * VMCS isn't in "launched" state if there's been any
1593 			 * entry failure (early or otherwise).
1594 			 */
1595 			launched = 1;
1596 			ret = exit_handler();
1597 		} else {
1598 			ret = entry_failure_handler(&failure);
1599 		}
1600 
1601 		switch (ret) {
1602 		case VMX_TEST_RESUME:
1603 			continue;
1604 		case VMX_TEST_VMEXIT:
1605 			guest_finished = 1;
1606 			return 0;
1607 		case VMX_TEST_EXIT:
1608 			break;
1609 		default:
1610 			printf("ERROR : Invalid %s_handler return val %d.\n",
1611 			       entered ? "exit" : "entry_failure",
1612 			       ret);
1613 			break;
1614 		}
1615 
1616 		if (entered)
1617 			print_vmexit_info();
1618 		else
1619 			print_vmentry_failure_info(&failure);
1620 		abort();
1621 	}
1622 }
1623 
1624 static void run_teardown_step(struct test_teardown_step *step)
1625 {
1626 	step->func(step->data);
1627 }
1628 
1629 static int test_run(struct vmx_test *test)
1630 {
1631 	int r;
1632 
1633 	/* Validate V2 interface. */
1634 	if (test->v2) {
1635 		int ret = 0;
1636 		if (test->init || test->guest_main || test->exit_handler ||
1637 		    test->syscall_handler) {
1638 			report("V2 test cannot specify V1 callbacks.", 0);
1639 			ret = 1;
1640 		}
1641 		if (ret)
1642 			return ret;
1643 	}
1644 
1645 	if (test->name == NULL)
1646 		test->name = "(no name)";
1647 	if (vmx_on()) {
1648 		printf("%s : vmxon failed.\n", __func__);
1649 		return 1;
1650 	}
1651 
1652 	init_vmcs(&(test->vmcs));
1653 	/* Directly call test->init is ok here, init_vmcs has done
1654 	   vmcs init, vmclear and vmptrld*/
1655 	if (test->init && test->init(test->vmcs) != VMX_TEST_START)
1656 		goto out;
1657 	teardown_count = 0;
1658 	v2_guest_main = NULL;
1659 	test->exits = 0;
1660 	current = test;
1661 	regs = test->guest_regs;
1662 	vmcs_write(GUEST_RFLAGS, regs.rflags | 0x2);
1663 	launched = 0;
1664 	guest_finished = 0;
1665 	printf("\nTest suite: %s\n", test->name);
1666 
1667 	r = setjmp(abort_target);
1668 	if (r) {
1669 		assert(!in_guest);
1670 		goto out;
1671 	}
1672 
1673 
1674 	if (test->v2)
1675 		test->v2();
1676 	else
1677 		vmx_run();
1678 
1679 	while (teardown_count > 0)
1680 		run_teardown_step(&teardown_steps[--teardown_count]);
1681 
1682 	if (launched && !guest_finished)
1683 		report("Guest didn't run to completion.", 0);
1684 
1685 out:
1686 	if (vmx_off()) {
1687 		printf("%s : vmxoff failed.\n", __func__);
1688 		return 1;
1689 	}
1690 	return 0;
1691 }
1692 
1693 /*
1694  * Add a teardown step. Executed after the test's main function returns.
1695  * Teardown steps executed in reverse order.
1696  */
1697 void test_add_teardown(test_teardown_func func, void *data)
1698 {
1699 	struct test_teardown_step *step;
1700 
1701 	TEST_ASSERT_MSG(teardown_count < MAX_TEST_TEARDOWN_STEPS,
1702 			"There are already %d teardown steps.",
1703 			teardown_count);
1704 	step = &teardown_steps[teardown_count++];
1705 	step->func = func;
1706 	step->data = data;
1707 }
1708 
1709 /*
1710  * Set the target of the first enter_guest call. Can only be called once per
1711  * test. Must be called before first enter_guest call.
1712  */
1713 void test_set_guest(test_guest_func func)
1714 {
1715 	assert(current->v2);
1716 	TEST_ASSERT_MSG(!v2_guest_main, "Already set guest func.");
1717 	v2_guest_main = func;
1718 }
1719 
1720 /*
1721  * Enters the guest (or launches it for the first time). Error to call once the
1722  * guest has returned (i.e., run past the end of its guest() function). Also
1723  * aborts if guest entry fails.
1724  */
1725 void enter_guest(void)
1726 {
1727 	struct vmentry_failure failure;
1728 
1729 	TEST_ASSERT_MSG(v2_guest_main,
1730 			"Never called test_set_guest_func!");
1731 
1732 	TEST_ASSERT_MSG(!guest_finished,
1733 			"Called enter_guest() after guest returned.");
1734 
1735 	if (!vmx_enter_guest(&failure)) {
1736 		print_vmentry_failure_info(&failure);
1737 		abort();
1738 	}
1739 
1740 	launched = 1;
1741 
1742 	if (is_hypercall()) {
1743 		int ret;
1744 
1745 		ret = handle_hypercall();
1746 		switch (ret) {
1747 		case VMX_TEST_VMEXIT:
1748 			guest_finished = 1;
1749 			break;
1750 		case VMX_TEST_VMABORT:
1751 			continue_abort();
1752 			break;
1753 		case VMX_TEST_VMSKIP:
1754 			continue_skip();
1755 			break;
1756 		default:
1757 			printf("ERROR : Invalid handle_hypercall return %d.\n",
1758 			       ret);
1759 			abort();
1760 		}
1761 	}
1762 }
1763 
1764 extern struct vmx_test vmx_tests[];
1765 
1766 static bool
1767 test_wanted(const char *name, const char *filters[], int filter_count)
1768 {
1769 	int i;
1770 	bool positive = false;
1771 	bool match = false;
1772 	char clean_name[strlen(name) + 1];
1773 	char *c;
1774 	const char *n;
1775 
1776 	/* Replace spaces with underscores. */
1777 	n = name;
1778 	c = &clean_name[0];
1779 	do *c++ = (*n == ' ') ? '_' : *n;
1780 	while (*n++);
1781 
1782 	for (i = 0; i < filter_count; i++) {
1783 		const char *filter = filters[i];
1784 
1785 		if (filter[0] == '-') {
1786 			if (simple_glob(clean_name, filter + 1))
1787 				return false;
1788 		} else {
1789 			positive = true;
1790 			match |= simple_glob(clean_name, filter);
1791 		}
1792 	}
1793 
1794 	if (!positive || match) {
1795 		matched++;
1796 		return true;
1797 	} else {
1798 		return false;
1799 	}
1800 }
1801 
1802 int main(int argc, const char *argv[])
1803 {
1804 	int i = 0;
1805 
1806 	setup_vm();
1807 	setup_idt();
1808 	hypercall_field = 0;
1809 
1810 	argv++;
1811 	argc--;
1812 
1813 	if (!(cpuid(1).c & (1 << 5))) {
1814 		printf("WARNING: vmx not supported, add '-cpu host'\n");
1815 		goto exit;
1816 	}
1817 	init_vmx();
1818 	if (test_wanted("test_vmx_feature_control", argv, argc)) {
1819 		/* Sets MSR_IA32_FEATURE_CONTROL to 0x5 */
1820 		if (test_vmx_feature_control() != 0)
1821 			goto exit;
1822 	} else {
1823 		if ((rdmsr(MSR_IA32_FEATURE_CONTROL) & 0x5) != 0x5)
1824 			wrmsr(MSR_IA32_FEATURE_CONTROL, 0x5);
1825 	}
1826 
1827 	if (test_wanted("test_vmxon", argv, argc)) {
1828 		/* Enables VMX */
1829 		if (test_vmxon() != 0)
1830 			goto exit;
1831 	} else {
1832 		if (vmx_on()) {
1833 			report("vmxon", 0);
1834 			goto exit;
1835 		}
1836 	}
1837 
1838 	if (test_wanted("test_vmptrld", argv, argc))
1839 		test_vmptrld();
1840 	if (test_wanted("test_vmclear", argv, argc))
1841 		test_vmclear();
1842 	if (test_wanted("test_vmptrst", argv, argc))
1843 		test_vmptrst();
1844 	if (test_wanted("test_vmwrite_vmread", argv, argc))
1845 		test_vmwrite_vmread();
1846 	if (test_wanted("test_vmcs_lifecycle", argv, argc))
1847 		test_vmcs_lifecycle();
1848 	if (test_wanted("test_vmx_caps", argv, argc))
1849 		test_vmx_caps();
1850 
1851 	/* Balance vmxon from test_vmxon. */
1852 	vmx_off();
1853 
1854 	for (; vmx_tests[i].name != NULL; i++) {
1855 		if (!test_wanted(vmx_tests[i].name, argv, argc))
1856 			continue;
1857 		if (test_run(&vmx_tests[i]))
1858 			goto exit;
1859 	}
1860 
1861 	if (!matched)
1862 		report("command line didn't match any tests!", matched);
1863 
1864 exit:
1865 	return report_summary();
1866 }
1867