xref: /kvm-unit-tests/x86/vmx.c (revision ebc1b903f12dab0921c055712131a54b7e37eea1)
1 /*
2  * x86/vmx.c : Framework for testing nested virtualization
3  *	This is a framework to test nested VMX for KVM, which
4  * 	started as a project of GSoC 2013. All test cases should
5  *	be located in x86/vmx_tests.c and framework related
6  *	functions should be in this file.
7  *
8  * How to write test cases?
9  *	Add callbacks of test suite in variant "vmx_tests". You can
10  *	write:
11  *		1. init function used for initializing test suite
12  *		2. main function for codes running in L2 guest,
13  *		3. exit_handler to handle vmexit of L2 to L1
14  *		4. syscall handler to handle L2 syscall vmexit
15  *		5. vmenter fail handler to handle direct failure of vmenter
16  *		6. guest_regs is loaded when vmenter and saved when
17  *			vmexit, you can read and set it in exit_handler
18  *	If no special function is needed for a test suite, use
19  *	coressponding basic_* functions as callback. More handlers
20  *	can be added to "vmx_tests", see details of "struct vmx_test"
21  *	and function test_run().
22  *
23  * Currently, vmx test framework only set up one VCPU and one
24  * concurrent guest test environment with same paging for L2 and
25  * L1. For usage of EPT, only 1:1 mapped paging is used from VFN
26  * to PFN.
27  *
28  * Author : Arthur Chunqi Li <yzt356@gmail.com>
29  */
30 
31 #include "libcflat.h"
32 #include "processor.h"
33 #include "alloc_page.h"
34 #include "vm.h"
35 #include "desc.h"
36 #include "vmx.h"
37 #include "msr.h"
38 #include "smp.h"
39 
40 u64 *vmxon_region;
41 struct vmcs *vmcs_root;
42 u32 vpid_cnt;
43 void *guest_stack, *guest_syscall_stack;
44 u32 ctrl_pin, ctrl_enter, ctrl_exit, ctrl_cpu[2];
45 struct regs regs;
46 
47 struct vmx_test *current;
48 
49 #define MAX_TEST_TEARDOWN_STEPS 10
50 
51 struct test_teardown_step {
52 	test_teardown_func func;
53 	void *data;
54 };
55 
56 static int teardown_count;
57 static struct test_teardown_step teardown_steps[MAX_TEST_TEARDOWN_STEPS];
58 
59 static test_guest_func v2_guest_main;
60 
61 u64 hypercall_field;
62 bool launched;
63 static int matched;
64 static int guest_finished;
65 static int in_guest;
66 
67 union vmx_basic basic;
68 union vmx_ctrl_msr ctrl_pin_rev;
69 union vmx_ctrl_msr ctrl_cpu_rev[2];
70 union vmx_ctrl_msr ctrl_exit_rev;
71 union vmx_ctrl_msr ctrl_enter_rev;
72 union vmx_ept_vpid  ept_vpid;
73 
74 extern struct descriptor_table_ptr gdt64_desc;
75 extern struct descriptor_table_ptr idt_descr;
76 extern struct descriptor_table_ptr tss_descr;
77 extern void *vmx_return;
78 extern void *entry_sysenter;
79 extern void *guest_entry;
80 
81 static volatile u32 stage;
82 
83 static jmp_buf abort_target;
84 
85 struct vmcs_field {
86 	u64 mask;
87 	u64 encoding;
88 };
89 
90 #define MASK(_bits) GENMASK_ULL((_bits) - 1, 0)
91 #define MASK_NATURAL MASK(sizeof(unsigned long) * 8)
92 
93 static struct vmcs_field vmcs_fields[] = {
94 	{ MASK(16), VPID },
95 	{ MASK(16), PINV },
96 	{ MASK(16), EPTP_IDX },
97 
98 	{ MASK(16), GUEST_SEL_ES },
99 	{ MASK(16), GUEST_SEL_CS },
100 	{ MASK(16), GUEST_SEL_SS },
101 	{ MASK(16), GUEST_SEL_DS },
102 	{ MASK(16), GUEST_SEL_FS },
103 	{ MASK(16), GUEST_SEL_GS },
104 	{ MASK(16), GUEST_SEL_LDTR },
105 	{ MASK(16), GUEST_SEL_TR },
106 	{ MASK(16), GUEST_INT_STATUS },
107 
108 	{ MASK(16), HOST_SEL_ES },
109 	{ MASK(16), HOST_SEL_CS },
110 	{ MASK(16), HOST_SEL_SS },
111 	{ MASK(16), HOST_SEL_DS },
112 	{ MASK(16), HOST_SEL_FS },
113 	{ MASK(16), HOST_SEL_GS },
114 	{ MASK(16), HOST_SEL_TR },
115 
116 	{ MASK(64), IO_BITMAP_A },
117 	{ MASK(64), IO_BITMAP_B },
118 	{ MASK(64), MSR_BITMAP },
119 	{ MASK(64), EXIT_MSR_ST_ADDR },
120 	{ MASK(64), EXIT_MSR_LD_ADDR },
121 	{ MASK(64), ENTER_MSR_LD_ADDR },
122 	{ MASK(64), VMCS_EXEC_PTR },
123 	{ MASK(64), TSC_OFFSET },
124 	{ MASK(64), APIC_VIRT_ADDR },
125 	{ MASK(64), APIC_ACCS_ADDR },
126 	{ MASK(64), EPTP },
127 
128 	{ 0 /* read-only */, INFO_PHYS_ADDR },
129 
130 	{ MASK(64), VMCS_LINK_PTR },
131 	{ MASK(64), GUEST_DEBUGCTL },
132 	{ MASK(64), GUEST_EFER },
133 	{ MASK(64), GUEST_PAT },
134 	{ MASK(64), GUEST_PERF_GLOBAL_CTRL },
135 	{ MASK(64), GUEST_PDPTE },
136 
137 	{ MASK(64), HOST_PAT },
138 	{ MASK(64), HOST_EFER },
139 	{ MASK(64), HOST_PERF_GLOBAL_CTRL },
140 
141 	{ MASK(32), PIN_CONTROLS },
142 	{ MASK(32), CPU_EXEC_CTRL0 },
143 	{ MASK(32), EXC_BITMAP },
144 	{ MASK(32), PF_ERROR_MASK },
145 	{ MASK(32), PF_ERROR_MATCH },
146 	{ MASK(32), CR3_TARGET_COUNT },
147 	{ MASK(32), EXI_CONTROLS },
148 	{ MASK(32), EXI_MSR_ST_CNT },
149 	{ MASK(32), EXI_MSR_LD_CNT },
150 	{ MASK(32), ENT_CONTROLS },
151 	{ MASK(32), ENT_MSR_LD_CNT },
152 	{ MASK(32), ENT_INTR_INFO },
153 	{ MASK(32), ENT_INTR_ERROR },
154 	{ MASK(32), ENT_INST_LEN },
155 	{ MASK(32), TPR_THRESHOLD },
156 	{ MASK(32), CPU_EXEC_CTRL1 },
157 
158 	{ 0 /* read-only */, VMX_INST_ERROR },
159 	{ 0 /* read-only */, EXI_REASON },
160 	{ 0 /* read-only */, EXI_INTR_INFO },
161 	{ 0 /* read-only */, EXI_INTR_ERROR },
162 	{ 0 /* read-only */, IDT_VECT_INFO },
163 	{ 0 /* read-only */, IDT_VECT_ERROR },
164 	{ 0 /* read-only */, EXI_INST_LEN },
165 	{ 0 /* read-only */, EXI_INST_INFO },
166 
167 	{ MASK(32), GUEST_LIMIT_ES },
168 	{ MASK(32), GUEST_LIMIT_CS },
169 	{ MASK(32), GUEST_LIMIT_SS },
170 	{ MASK(32), GUEST_LIMIT_DS },
171 	{ MASK(32), GUEST_LIMIT_FS },
172 	{ MASK(32), GUEST_LIMIT_GS },
173 	{ MASK(32), GUEST_LIMIT_LDTR },
174 	{ MASK(32), GUEST_LIMIT_TR },
175 	{ MASK(32), GUEST_LIMIT_GDTR },
176 	{ MASK(32), GUEST_LIMIT_IDTR },
177 	{ 0x1d0ff, GUEST_AR_ES },
178 	{ 0x1f0ff, GUEST_AR_CS },
179 	{ 0x1d0ff, GUEST_AR_SS },
180 	{ 0x1d0ff, GUEST_AR_DS },
181 	{ 0x1d0ff, GUEST_AR_FS },
182 	{ 0x1d0ff, GUEST_AR_GS },
183 	{ 0x1d0ff, GUEST_AR_LDTR },
184 	{ 0x1d0ff, GUEST_AR_TR },
185 	{ MASK(32), GUEST_INTR_STATE },
186 	{ MASK(32), GUEST_ACTV_STATE },
187 	{ MASK(32), GUEST_SMBASE },
188 	{ MASK(32), GUEST_SYSENTER_CS },
189 	{ MASK(32), PREEMPT_TIMER_VALUE },
190 
191 	{ MASK(32), HOST_SYSENTER_CS },
192 
193 	{ MASK_NATURAL, CR0_MASK },
194 	{ MASK_NATURAL, CR4_MASK },
195 	{ MASK_NATURAL, CR0_READ_SHADOW },
196 	{ MASK_NATURAL, CR4_READ_SHADOW },
197 	{ MASK_NATURAL, CR3_TARGET_0 },
198 	{ MASK_NATURAL, CR3_TARGET_1 },
199 	{ MASK_NATURAL, CR3_TARGET_2 },
200 	{ MASK_NATURAL, CR3_TARGET_3 },
201 
202 	{ 0 /* read-only */, EXI_QUALIFICATION },
203 	{ 0 /* read-only */, IO_RCX },
204 	{ 0 /* read-only */, IO_RSI },
205 	{ 0 /* read-only */, IO_RDI },
206 	{ 0 /* read-only */, IO_RIP },
207 	{ 0 /* read-only */, GUEST_LINEAR_ADDRESS },
208 
209 	{ MASK_NATURAL, GUEST_CR0 },
210 	{ MASK_NATURAL, GUEST_CR3 },
211 	{ MASK_NATURAL, GUEST_CR4 },
212 	{ MASK_NATURAL, GUEST_BASE_ES },
213 	{ MASK_NATURAL, GUEST_BASE_CS },
214 	{ MASK_NATURAL, GUEST_BASE_SS },
215 	{ MASK_NATURAL, GUEST_BASE_DS },
216 	{ MASK_NATURAL, GUEST_BASE_FS },
217 	{ MASK_NATURAL, GUEST_BASE_GS },
218 	{ MASK_NATURAL, GUEST_BASE_LDTR },
219 	{ MASK_NATURAL, GUEST_BASE_TR },
220 	{ MASK_NATURAL, GUEST_BASE_GDTR },
221 	{ MASK_NATURAL, GUEST_BASE_IDTR },
222 	{ MASK_NATURAL, GUEST_DR7 },
223 	{ MASK_NATURAL, GUEST_RSP },
224 	{ MASK_NATURAL, GUEST_RIP },
225 	{ MASK_NATURAL, GUEST_RFLAGS },
226 	{ MASK_NATURAL, GUEST_PENDING_DEBUG },
227 	{ MASK_NATURAL, GUEST_SYSENTER_ESP },
228 	{ MASK_NATURAL, GUEST_SYSENTER_EIP },
229 
230 	{ MASK_NATURAL, HOST_CR0 },
231 	{ MASK_NATURAL, HOST_CR3 },
232 	{ MASK_NATURAL, HOST_CR4 },
233 	{ MASK_NATURAL, HOST_BASE_FS },
234 	{ MASK_NATURAL, HOST_BASE_GS },
235 	{ MASK_NATURAL, HOST_BASE_TR },
236 	{ MASK_NATURAL, HOST_BASE_GDTR },
237 	{ MASK_NATURAL, HOST_BASE_IDTR },
238 	{ MASK_NATURAL, HOST_SYSENTER_ESP },
239 	{ MASK_NATURAL, HOST_SYSENTER_EIP },
240 	{ MASK_NATURAL, HOST_RSP },
241 	{ MASK_NATURAL, HOST_RIP },
242 };
243 
244 static inline u64 vmcs_field_value(struct vmcs_field *f, u8 cookie)
245 {
246 	u64 value;
247 
248 	/* Incorporate the cookie and the field encoding into the value. */
249 	value = cookie;
250 	value |= (f->encoding << 8);
251 	value |= 0xdeadbeefull << 32;
252 
253 	return value & f->mask;
254 }
255 
256 static void set_vmcs_field(struct vmcs_field *f, u8 cookie)
257 {
258 	vmcs_write(f->encoding, vmcs_field_value(f, cookie));
259 }
260 
261 static bool check_vmcs_field(struct vmcs_field *f, u8 cookie)
262 {
263 	u64 expected;
264 	u64 actual;
265 	int ret;
266 
267 	ret = vmcs_read_checking(f->encoding, &actual);
268 	assert(!(ret & X86_EFLAGS_CF));
269 	/* Skip VMCS fields that aren't recognized by the CPU */
270 	if (ret & X86_EFLAGS_ZF)
271 		return true;
272 
273 	expected = vmcs_field_value(f, cookie);
274 	actual &= f->mask;
275 
276 	if (expected == actual)
277 		return true;
278 
279 	printf("FAIL: VMWRITE/VMREAD %lx (expected: %lx, actual: %lx)\n",
280 	       f->encoding, (unsigned long) expected, (unsigned long) actual);
281 
282 	return false;
283 }
284 
285 static void set_all_vmcs_fields(u8 cookie)
286 {
287 	int i;
288 
289 	for (i = 0; i < ARRAY_SIZE(vmcs_fields); i++)
290 		set_vmcs_field(&vmcs_fields[i], cookie);
291 }
292 
293 static bool check_all_vmcs_fields(u8 cookie)
294 {
295 	bool pass = true;
296 	int i;
297 
298 	for (i = 0; i < ARRAY_SIZE(vmcs_fields); i++) {
299 		if (!check_vmcs_field(&vmcs_fields[i], cookie))
300 			pass = false;
301 	}
302 
303 	return pass;
304 }
305 
306 void test_vmwrite_vmread(void)
307 {
308 	struct vmcs *vmcs = alloc_page();
309 
310 	memset(vmcs, 0, PAGE_SIZE);
311 	vmcs->revision_id = basic.revision;
312 	assert(!vmcs_clear(vmcs));
313 	assert(!make_vmcs_current(vmcs));
314 
315 	set_all_vmcs_fields(0x42);
316 	report("VMWRITE/VMREAD", check_all_vmcs_fields(0x42));
317 
318 	assert(!vmcs_clear(vmcs));
319 	free_page(vmcs);
320 }
321 
322 void test_vmcs_high(void)
323 {
324 	struct vmcs *vmcs = alloc_page();
325 
326 	memset(vmcs, 0, PAGE_SIZE);
327 	vmcs->revision_id = basic.revision;
328 	assert(!vmcs_clear(vmcs));
329 	assert(!make_vmcs_current(vmcs));
330 
331 	vmcs_write(TSC_OFFSET, 0x0123456789ABCDEFull);
332 	report("VMREAD TSC_OFFSET after VMWRITE TSC_OFFSET",
333 	       vmcs_read(TSC_OFFSET) == 0x0123456789ABCDEFull);
334 	report("VMREAD TSC_OFFSET_HI after VMWRITE TSC_OFFSET",
335 	       vmcs_read(TSC_OFFSET_HI) == 0x01234567ull);
336 	vmcs_write(TSC_OFFSET_HI, 0x76543210ul);
337 	report("VMREAD TSC_OFFSET_HI after VMWRITE TSC_OFFSET_HI",
338 	       vmcs_read(TSC_OFFSET_HI) == 0x76543210ul);
339 	report("VMREAD TSC_OFFSET after VMWRITE TSC_OFFSET_HI",
340 	       vmcs_read(TSC_OFFSET) == 0x7654321089ABCDEFull);
341 
342 	assert(!vmcs_clear(vmcs));
343 	free_page(vmcs);
344 }
345 
346 void test_vmcs_lifecycle(void)
347 {
348 	struct vmcs *vmcs[2] = {};
349 	int i;
350 
351 	for (i = 0; i < ARRAY_SIZE(vmcs); i++) {
352 		vmcs[i] = alloc_page();
353 		memset(vmcs[i], 0, PAGE_SIZE);
354 		vmcs[i]->revision_id = basic.revision;
355 	}
356 
357 #define VMPTRLD(_i) do { \
358 	assert(_i < ARRAY_SIZE(vmcs)); \
359 	assert(!make_vmcs_current(vmcs[_i])); \
360 	printf("VMPTRLD VMCS%d\n", (_i)); \
361 } while (0)
362 
363 #define VMCLEAR(_i) do { \
364 	assert(_i < ARRAY_SIZE(vmcs)); \
365 	assert(!vmcs_clear(vmcs[_i])); \
366 	printf("VMCLEAR VMCS%d\n", (_i)); \
367 } while (0)
368 
369 	VMCLEAR(0);
370 	VMPTRLD(0);
371 	set_all_vmcs_fields(0);
372 	report("current:VMCS0 active:[VMCS0]", check_all_vmcs_fields(0));
373 
374 	VMCLEAR(0);
375 	VMPTRLD(0);
376 	report("current:VMCS0 active:[VMCS0]", check_all_vmcs_fields(0));
377 
378 	VMCLEAR(1);
379 	report("current:VMCS0 active:[VMCS0]", check_all_vmcs_fields(0));
380 
381 	VMPTRLD(1);
382 	set_all_vmcs_fields(1);
383 	report("current:VMCS1 active:[VMCS0,VCMS1]", check_all_vmcs_fields(1));
384 
385 	VMPTRLD(0);
386 	report("current:VMCS0 active:[VMCS0,VCMS1]", check_all_vmcs_fields(0));
387 	VMPTRLD(1);
388 	report("current:VMCS1 active:[VMCS0,VCMS1]", check_all_vmcs_fields(1));
389 	VMPTRLD(1);
390 	report("current:VMCS1 active:[VMCS0,VCMS1]", check_all_vmcs_fields(1));
391 
392 	VMCLEAR(0);
393 	report("current:VMCS1 active:[VCMS1]", check_all_vmcs_fields(1));
394 
395 	/* VMPTRLD should not erase VMWRITEs to the current VMCS */
396 	set_all_vmcs_fields(2);
397 	VMPTRLD(1);
398 	report("current:VMCS1 active:[VCMS1]", check_all_vmcs_fields(2));
399 
400 	for (i = 0; i < ARRAY_SIZE(vmcs); i++) {
401 		VMCLEAR(i);
402 		free_page(vmcs[i]);
403 	}
404 
405 #undef VMPTRLD
406 #undef VMCLEAR
407 }
408 
409 void vmx_set_test_stage(u32 s)
410 {
411 	barrier();
412 	stage = s;
413 	barrier();
414 }
415 
416 u32 vmx_get_test_stage(void)
417 {
418 	u32 s;
419 
420 	barrier();
421 	s = stage;
422 	barrier();
423 	return s;
424 }
425 
426 void vmx_inc_test_stage(void)
427 {
428 	barrier();
429 	stage++;
430 	barrier();
431 }
432 
433 /* entry_sysenter */
434 asm(
435 	".align	4, 0x90\n\t"
436 	".globl	entry_sysenter\n\t"
437 	"entry_sysenter:\n\t"
438 	SAVE_GPR
439 	"	and	$0xf, %rax\n\t"
440 	"	mov	%rax, %rdi\n\t"
441 	"	call	syscall_handler\n\t"
442 	LOAD_GPR
443 	"	vmresume\n\t"
444 );
445 
446 static void __attribute__((__used__)) syscall_handler(u64 syscall_no)
447 {
448 	if (current->syscall_handler)
449 		current->syscall_handler(syscall_no);
450 }
451 
452 static const char * const exit_reason_descriptions[] = {
453 	[VMX_EXC_NMI]		= "VMX_EXC_NMI",
454 	[VMX_EXTINT]		= "VMX_EXTINT",
455 	[VMX_TRIPLE_FAULT]	= "VMX_TRIPLE_FAULT",
456 	[VMX_INIT]		= "VMX_INIT",
457 	[VMX_SIPI]		= "VMX_SIPI",
458 	[VMX_SMI_IO]		= "VMX_SMI_IO",
459 	[VMX_SMI_OTHER]		= "VMX_SMI_OTHER",
460 	[VMX_INTR_WINDOW]	= "VMX_INTR_WINDOW",
461 	[VMX_NMI_WINDOW]	= "VMX_NMI_WINDOW",
462 	[VMX_TASK_SWITCH]	= "VMX_TASK_SWITCH",
463 	[VMX_CPUID]		= "VMX_CPUID",
464 	[VMX_GETSEC]		= "VMX_GETSEC",
465 	[VMX_HLT]		= "VMX_HLT",
466 	[VMX_INVD]		= "VMX_INVD",
467 	[VMX_INVLPG]		= "VMX_INVLPG",
468 	[VMX_RDPMC]		= "VMX_RDPMC",
469 	[VMX_RDTSC]		= "VMX_RDTSC",
470 	[VMX_RSM]		= "VMX_RSM",
471 	[VMX_VMCALL]		= "VMX_VMCALL",
472 	[VMX_VMCLEAR]		= "VMX_VMCLEAR",
473 	[VMX_VMLAUNCH]		= "VMX_VMLAUNCH",
474 	[VMX_VMPTRLD]		= "VMX_VMPTRLD",
475 	[VMX_VMPTRST]		= "VMX_VMPTRST",
476 	[VMX_VMREAD]		= "VMX_VMREAD",
477 	[VMX_VMRESUME]		= "VMX_VMRESUME",
478 	[VMX_VMWRITE]		= "VMX_VMWRITE",
479 	[VMX_VMXOFF]		= "VMX_VMXOFF",
480 	[VMX_VMXON]		= "VMX_VMXON",
481 	[VMX_CR]		= "VMX_CR",
482 	[VMX_DR]		= "VMX_DR",
483 	[VMX_IO]		= "VMX_IO",
484 	[VMX_RDMSR]		= "VMX_RDMSR",
485 	[VMX_WRMSR]		= "VMX_WRMSR",
486 	[VMX_FAIL_STATE]	= "VMX_FAIL_STATE",
487 	[VMX_FAIL_MSR]		= "VMX_FAIL_MSR",
488 	[VMX_MWAIT]		= "VMX_MWAIT",
489 	[VMX_MTF]		= "VMX_MTF",
490 	[VMX_MONITOR]		= "VMX_MONITOR",
491 	[VMX_PAUSE]		= "VMX_PAUSE",
492 	[VMX_FAIL_MCHECK]	= "VMX_FAIL_MCHECK",
493 	[VMX_TPR_THRESHOLD]	= "VMX_TPR_THRESHOLD",
494 	[VMX_APIC_ACCESS]	= "VMX_APIC_ACCESS",
495 	[VMX_EOI_INDUCED]	= "VMX_EOI_INDUCED",
496 	[VMX_GDTR_IDTR]		= "VMX_GDTR_IDTR",
497 	[VMX_LDTR_TR]		= "VMX_LDTR_TR",
498 	[VMX_EPT_VIOLATION]	= "VMX_EPT_VIOLATION",
499 	[VMX_EPT_MISCONFIG]	= "VMX_EPT_MISCONFIG",
500 	[VMX_INVEPT]		= "VMX_INVEPT",
501 	[VMX_PREEMPT]		= "VMX_PREEMPT",
502 	[VMX_INVVPID]		= "VMX_INVVPID",
503 	[VMX_WBINVD]		= "VMX_WBINVD",
504 	[VMX_XSETBV]		= "VMX_XSETBV",
505 	[VMX_APIC_WRITE]	= "VMX_APIC_WRITE",
506 	[VMX_RDRAND]		= "VMX_RDRAND",
507 	[VMX_INVPCID]		= "VMX_INVPCID",
508 	[VMX_VMFUNC]		= "VMX_VMFUNC",
509 	[VMX_RDSEED]		= "VMX_RDSEED",
510 	[VMX_PML_FULL]		= "VMX_PML_FULL",
511 	[VMX_XSAVES]		= "VMX_XSAVES",
512 	[VMX_XRSTORS]		= "VMX_XRSTORS",
513 };
514 
515 const char *exit_reason_description(u64 reason)
516 {
517 	if (reason >= ARRAY_SIZE(exit_reason_descriptions))
518 		return "(unknown)";
519 	return exit_reason_descriptions[reason] ? : "(unused)";
520 }
521 
522 void print_vmexit_info()
523 {
524 	u64 guest_rip, guest_rsp;
525 	ulong reason = vmcs_read(EXI_REASON) & 0xff;
526 	ulong exit_qual = vmcs_read(EXI_QUALIFICATION);
527 	guest_rip = vmcs_read(GUEST_RIP);
528 	guest_rsp = vmcs_read(GUEST_RSP);
529 	printf("VMEXIT info:\n");
530 	printf("\tvmexit reason = %ld\n", reason);
531 	printf("\texit qualification = %#lx\n", exit_qual);
532 	printf("\tBit 31 of reason = %lx\n", (vmcs_read(EXI_REASON) >> 31) & 1);
533 	printf("\tguest_rip = %#lx\n", guest_rip);
534 	printf("\tRAX=%#lx    RBX=%#lx    RCX=%#lx    RDX=%#lx\n",
535 		regs.rax, regs.rbx, regs.rcx, regs.rdx);
536 	printf("\tRSP=%#lx    RBP=%#lx    RSI=%#lx    RDI=%#lx\n",
537 		guest_rsp, regs.rbp, regs.rsi, regs.rdi);
538 	printf("\tR8 =%#lx    R9 =%#lx    R10=%#lx    R11=%#lx\n",
539 		regs.r8, regs.r9, regs.r10, regs.r11);
540 	printf("\tR12=%#lx    R13=%#lx    R14=%#lx    R15=%#lx\n",
541 		regs.r12, regs.r13, regs.r14, regs.r15);
542 }
543 
544 void
545 print_vmentry_failure_info(struct vmentry_failure *failure) {
546 	if (failure->early) {
547 		printf("Early %s failure: ", failure->instr);
548 		switch (failure->flags & VMX_ENTRY_FLAGS) {
549 		case X86_EFLAGS_CF:
550 			printf("current-VMCS pointer is not valid.\n");
551 			break;
552 		case X86_EFLAGS_ZF:
553 			printf("error number is %ld. See Intel 30.4.\n",
554 			       vmcs_read(VMX_INST_ERROR));
555 			break;
556 		default:
557 			printf("unexpected flags %lx!\n", failure->flags);
558 		}
559 	} else {
560 		u64 reason = vmcs_read(EXI_REASON);
561 		u64 qual = vmcs_read(EXI_QUALIFICATION);
562 
563 		printf("Non-early %s failure (reason=%#lx, qual=%#lx): ",
564 			failure->instr, reason, qual);
565 
566 		switch (reason & 0xff) {
567 		case VMX_FAIL_STATE:
568 			printf("invalid guest state\n");
569 			break;
570 		case VMX_FAIL_MSR:
571 			printf("MSR loading\n");
572 			break;
573 		case VMX_FAIL_MCHECK:
574 			printf("machine-check event\n");
575 			break;
576 		default:
577 			printf("unexpected basic exit reason %ld\n",
578 			       reason & 0xff);
579 		}
580 
581 		if (!(reason & VMX_ENTRY_FAILURE))
582 			printf("\tVMX_ENTRY_FAILURE BIT NOT SET!\n");
583 
584 		if (reason & 0x7fff0000)
585 			printf("\tRESERVED BITS SET!\n");
586 	}
587 }
588 
589 /*
590  * VMCLEAR should ensures all VMCS state is flushed to the VMCS
591  * region in memory.
592  */
593 static void test_vmclear_flushing(void)
594 {
595 	struct vmcs *vmcs[3] = {};
596 	int i;
597 
598 	for (i = 0; i < ARRAY_SIZE(vmcs); i++) {
599 		vmcs[i] = alloc_page();
600 		memset(vmcs[i], 0, PAGE_SIZE);
601 	}
602 
603 	vmcs[0]->revision_id = basic.revision;
604 	assert(!vmcs_clear(vmcs[0]));
605 	assert(!make_vmcs_current(vmcs[0]));
606 	set_all_vmcs_fields(0x86);
607 
608 	assert(!vmcs_clear(vmcs[0]));
609 	memcpy(vmcs[1], vmcs[0], basic.size);
610 	assert(!make_vmcs_current(vmcs[1]));
611 	report("test vmclear flush (current VMCS)", check_all_vmcs_fields(0x86));
612 
613 	set_all_vmcs_fields(0x87);
614 	assert(!make_vmcs_current(vmcs[0]));
615 	assert(!vmcs_clear(vmcs[1]));
616 	memcpy(vmcs[2], vmcs[1], basic.size);
617 	assert(!make_vmcs_current(vmcs[2]));
618 	report("test vmclear flush (!current VMCS)", check_all_vmcs_fields(0x87));
619 
620 	for (i = 0; i < ARRAY_SIZE(vmcs); i++) {
621 		assert(!vmcs_clear(vmcs[i]));
622 		free_page(vmcs[i]);
623 	}
624 }
625 
626 static void test_vmclear(void)
627 {
628 	struct vmcs *tmp_root;
629 	int width = cpuid_maxphyaddr();
630 
631 	/*
632 	 * Note- The tests below do not necessarily have a
633 	 * valid VMCS, but that's ok since the invalid vmcs
634 	 * is only used for a specific test and is discarded
635 	 * without touching its contents
636 	 */
637 
638 	/* Unaligned page access */
639 	tmp_root = (struct vmcs *)((intptr_t)vmcs_root + 1);
640 	report("test vmclear with unaligned vmcs",
641 	       vmcs_clear(tmp_root) == 1);
642 
643 	/* gpa bits beyond physical address width are set*/
644 	tmp_root = (struct vmcs *)((intptr_t)vmcs_root |
645 				   ((u64)1 << (width+1)));
646 	report("test vmclear with vmcs address bits set beyond physical address width",
647 	       vmcs_clear(tmp_root) == 1);
648 
649 	/* Pass VMXON region */
650 	tmp_root = (struct vmcs *)vmxon_region;
651 	report("test vmclear with vmxon region",
652 	       vmcs_clear(tmp_root) == 1);
653 
654 	/* Valid VMCS */
655 	report("test vmclear with valid vmcs region", vmcs_clear(vmcs_root) == 0);
656 
657 	test_vmclear_flushing();
658 }
659 
660 static void __attribute__((__used__)) guest_main(void)
661 {
662 	if (current->v2)
663 		v2_guest_main();
664 	else
665 		current->guest_main();
666 }
667 
668 /* guest_entry */
669 asm(
670 	".align	4, 0x90\n\t"
671 	".globl	entry_guest\n\t"
672 	"guest_entry:\n\t"
673 	"	call guest_main\n\t"
674 	"	mov $1, %edi\n\t"
675 	"	call hypercall\n\t"
676 );
677 
678 /* EPT paging structure related functions */
679 /* split_large_ept_entry: Split a 2M/1G large page into 512 smaller PTEs.
680 		@ptep : large page table entry to split
681 		@level : level of ptep (2 or 3)
682  */
683 static void split_large_ept_entry(unsigned long *ptep, int level)
684 {
685 	unsigned long *new_pt;
686 	unsigned long gpa;
687 	unsigned long pte;
688 	unsigned long prototype;
689 	int i;
690 
691 	pte = *ptep;
692 	assert(pte & EPT_PRESENT);
693 	assert(pte & EPT_LARGE_PAGE);
694 	assert(level == 2 || level == 3);
695 
696 	new_pt = alloc_page();
697 	assert(new_pt);
698 	memset(new_pt, 0, PAGE_SIZE);
699 
700 	prototype = pte & ~EPT_ADDR_MASK;
701 	if (level == 2)
702 		prototype &= ~EPT_LARGE_PAGE;
703 
704 	gpa = pte & EPT_ADDR_MASK;
705 	for (i = 0; i < EPT_PGDIR_ENTRIES; i++) {
706 		new_pt[i] = prototype | gpa;
707 		gpa += 1ul << EPT_LEVEL_SHIFT(level - 1);
708 	}
709 
710 	pte &= ~EPT_LARGE_PAGE;
711 	pte &= ~EPT_ADDR_MASK;
712 	pte |= virt_to_phys(new_pt);
713 
714 	*ptep = pte;
715 }
716 
717 /* install_ept_entry : Install a page to a given level in EPT
718 		@pml4 : addr of pml4 table
719 		@pte_level : level of PTE to set
720 		@guest_addr : physical address of guest
721 		@pte : pte value to set
722 		@pt_page : address of page table, NULL for a new page
723  */
724 void install_ept_entry(unsigned long *pml4,
725 		int pte_level,
726 		unsigned long guest_addr,
727 		unsigned long pte,
728 		unsigned long *pt_page)
729 {
730 	int level;
731 	unsigned long *pt = pml4;
732 	unsigned offset;
733 
734 	/* EPT only uses 48 bits of GPA. */
735 	assert(guest_addr < (1ul << 48));
736 
737 	for (level = EPT_PAGE_LEVEL; level > pte_level; --level) {
738 		offset = (guest_addr >> EPT_LEVEL_SHIFT(level))
739 				& EPT_PGDIR_MASK;
740 		if (!(pt[offset] & (EPT_PRESENT))) {
741 			unsigned long *new_pt = pt_page;
742 			if (!new_pt)
743 				new_pt = alloc_page();
744 			else
745 				pt_page = 0;
746 			memset(new_pt, 0, PAGE_SIZE);
747 			pt[offset] = virt_to_phys(new_pt)
748 					| EPT_RA | EPT_WA | EPT_EA;
749 		} else if (pt[offset] & EPT_LARGE_PAGE)
750 			split_large_ept_entry(&pt[offset], level);
751 		pt = phys_to_virt(pt[offset] & EPT_ADDR_MASK);
752 	}
753 	offset = (guest_addr >> EPT_LEVEL_SHIFT(level)) & EPT_PGDIR_MASK;
754 	pt[offset] = pte;
755 }
756 
757 /* Map a page, @perm is the permission of the page */
758 void install_ept(unsigned long *pml4,
759 		unsigned long phys,
760 		unsigned long guest_addr,
761 		u64 perm)
762 {
763 	install_ept_entry(pml4, 1, guest_addr, (phys & PAGE_MASK) | perm, 0);
764 }
765 
766 /* Map a 1G-size page */
767 void install_1g_ept(unsigned long *pml4,
768 		unsigned long phys,
769 		unsigned long guest_addr,
770 		u64 perm)
771 {
772 	install_ept_entry(pml4, 3, guest_addr,
773 			(phys & PAGE_MASK) | perm | EPT_LARGE_PAGE, 0);
774 }
775 
776 /* Map a 2M-size page */
777 void install_2m_ept(unsigned long *pml4,
778 		unsigned long phys,
779 		unsigned long guest_addr,
780 		u64 perm)
781 {
782 	install_ept_entry(pml4, 2, guest_addr,
783 			(phys & PAGE_MASK) | perm | EPT_LARGE_PAGE, 0);
784 }
785 
786 /* setup_ept_range : Setup a range of 1:1 mapped page to EPT paging structure.
787 		@start : start address of guest page
788 		@len : length of address to be mapped
789 		@map_1g : whether 1G page map is used
790 		@map_2m : whether 2M page map is used
791 		@perm : permission for every page
792  */
793 void setup_ept_range(unsigned long *pml4, unsigned long start,
794 		     unsigned long len, int map_1g, int map_2m, u64 perm)
795 {
796 	u64 phys = start;
797 	u64 max = (u64)len + (u64)start;
798 
799 	if (map_1g) {
800 		while (phys + PAGE_SIZE_1G <= max) {
801 			install_1g_ept(pml4, phys, phys, perm);
802 			phys += PAGE_SIZE_1G;
803 		}
804 	}
805 	if (map_2m) {
806 		while (phys + PAGE_SIZE_2M <= max) {
807 			install_2m_ept(pml4, phys, phys, perm);
808 			phys += PAGE_SIZE_2M;
809 		}
810 	}
811 	while (phys + PAGE_SIZE <= max) {
812 		install_ept(pml4, phys, phys, perm);
813 		phys += PAGE_SIZE;
814 	}
815 }
816 
817 /* get_ept_pte : Get the PTE of a given level in EPT,
818     @level == 1 means get the latest level*/
819 bool get_ept_pte(unsigned long *pml4, unsigned long guest_addr, int level,
820 		unsigned long *pte)
821 {
822 	int l;
823 	unsigned long *pt = pml4, iter_pte;
824 	unsigned offset;
825 
826 	assert(level >= 1 && level <= 4);
827 
828 	for (l = EPT_PAGE_LEVEL; ; --l) {
829 		offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK;
830 		iter_pte = pt[offset];
831 		if (l == level)
832 			break;
833 		if (l < 4 && (iter_pte & EPT_LARGE_PAGE))
834 			return false;
835 		if (!(iter_pte & (EPT_PRESENT)))
836 			return false;
837 		pt = (unsigned long *)(iter_pte & EPT_ADDR_MASK);
838 	}
839 	offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK;
840 	if (pte)
841 		*pte = pt[offset];
842 	return true;
843 }
844 
845 static void clear_ept_ad_pte(unsigned long *pml4, unsigned long guest_addr)
846 {
847 	int l;
848 	unsigned long *pt = pml4;
849 	u64 pte;
850 	unsigned offset;
851 
852 	for (l = EPT_PAGE_LEVEL; ; --l) {
853 		offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK;
854 		pt[offset] &= ~(EPT_ACCESS_FLAG|EPT_DIRTY_FLAG);
855 		pte = pt[offset];
856 		if (l == 1 || (l < 4 && (pte & EPT_LARGE_PAGE)))
857 			break;
858 		pt = (unsigned long *)(pte & EPT_ADDR_MASK);
859 	}
860 }
861 
862 /* clear_ept_ad : Clear EPT A/D bits for the page table walk and the
863    final GPA of a guest address.  */
864 void clear_ept_ad(unsigned long *pml4, u64 guest_cr3,
865 		  unsigned long guest_addr)
866 {
867 	int l;
868 	unsigned long *pt = (unsigned long *)guest_cr3, gpa;
869 	u64 pte, offset_in_page;
870 	unsigned offset;
871 
872 	for (l = EPT_PAGE_LEVEL; ; --l) {
873 		offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK;
874 
875 		clear_ept_ad_pte(pml4, (u64) &pt[offset]);
876 		pte = pt[offset];
877 		if (l == 1 || (l < 4 && (pte & PT_PAGE_SIZE_MASK)))
878 			break;
879 		if (!(pte & PT_PRESENT_MASK))
880 			return;
881 		pt = (unsigned long *)(pte & PT_ADDR_MASK);
882 	}
883 
884 	offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK;
885 	offset_in_page = guest_addr & ((1 << EPT_LEVEL_SHIFT(l)) - 1);
886 	gpa = (pt[offset] & PT_ADDR_MASK) | (guest_addr & offset_in_page);
887 	clear_ept_ad_pte(pml4, gpa);
888 }
889 
890 /* check_ept_ad : Check the content of EPT A/D bits for the page table
891    walk and the final GPA of a guest address.  */
892 void check_ept_ad(unsigned long *pml4, u64 guest_cr3,
893 		  unsigned long guest_addr, int expected_gpa_ad,
894 		  int expected_pt_ad)
895 {
896 	int l;
897 	unsigned long *pt = (unsigned long *)guest_cr3, gpa;
898 	u64 ept_pte, pte, offset_in_page;
899 	unsigned offset;
900 	bool bad_pt_ad = false;
901 
902 	for (l = EPT_PAGE_LEVEL; ; --l) {
903 		offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK;
904 
905 		if (!get_ept_pte(pml4, (u64) &pt[offset], 1, &ept_pte)) {
906 			printf("EPT - guest level %d page table is not mapped.\n", l);
907 			return;
908 		}
909 
910 		if (!bad_pt_ad) {
911 			bad_pt_ad |= (ept_pte & (EPT_ACCESS_FLAG|EPT_DIRTY_FLAG)) != expected_pt_ad;
912 			if (bad_pt_ad)
913 				report("EPT - guest level %d page table A=%d/D=%d",
914 				       false, l,
915 				       !!(expected_pt_ad & EPT_ACCESS_FLAG),
916 				       !!(expected_pt_ad & EPT_DIRTY_FLAG));
917 		}
918 
919 		pte = pt[offset];
920 		if (l == 1 || (l < 4 && (pte & PT_PAGE_SIZE_MASK)))
921 			break;
922 		if (!(pte & PT_PRESENT_MASK))
923 			return;
924 		pt = (unsigned long *)(pte & PT_ADDR_MASK);
925 	}
926 
927 	if (!bad_pt_ad)
928 		report("EPT - guest page table structures A=%d/D=%d",
929 		       true,
930 		       !!(expected_pt_ad & EPT_ACCESS_FLAG),
931 		       !!(expected_pt_ad & EPT_DIRTY_FLAG));
932 
933 	offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK;
934 	offset_in_page = guest_addr & ((1 << EPT_LEVEL_SHIFT(l)) - 1);
935 	gpa = (pt[offset] & PT_ADDR_MASK) | (guest_addr & offset_in_page);
936 
937 	if (!get_ept_pte(pml4, gpa, 1, &ept_pte)) {
938 		report("EPT - guest physical address is not mapped", false);
939 		return;
940 	}
941 	report("EPT - guest physical address A=%d/D=%d",
942 	       (ept_pte & (EPT_ACCESS_FLAG|EPT_DIRTY_FLAG)) == expected_gpa_ad,
943 	       !!(expected_gpa_ad & EPT_ACCESS_FLAG),
944 	       !!(expected_gpa_ad & EPT_DIRTY_FLAG));
945 }
946 
947 
948 void ept_sync(int type, u64 eptp)
949 {
950 	switch (type) {
951 	case INVEPT_SINGLE:
952 		if (ept_vpid.val & EPT_CAP_INVEPT_SINGLE) {
953 			invept(INVEPT_SINGLE, eptp);
954 			break;
955 		}
956 		/* else fall through */
957 	case INVEPT_GLOBAL:
958 		if (ept_vpid.val & EPT_CAP_INVEPT_ALL) {
959 			invept(INVEPT_GLOBAL, eptp);
960 			break;
961 		}
962 		/* else fall through */
963 	default:
964 		printf("WARNING: invept is not supported!\n");
965 	}
966 }
967 
968 void set_ept_pte(unsigned long *pml4, unsigned long guest_addr,
969 		 int level, u64 pte_val)
970 {
971 	int l;
972 	unsigned long *pt = pml4;
973 	unsigned offset;
974 
975 	assert(level >= 1 && level <= 4);
976 
977 	for (l = EPT_PAGE_LEVEL; ; --l) {
978 		offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK;
979 		if (l == level)
980 			break;
981 		assert(pt[offset] & EPT_PRESENT);
982 		pt = (unsigned long *)(pt[offset] & EPT_ADDR_MASK);
983 	}
984 	offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK;
985 	pt[offset] = pte_val;
986 }
987 
988 bool ept_2m_supported(void)
989 {
990 	return ept_vpid.val & EPT_CAP_2M_PAGE;
991 }
992 
993 bool ept_1g_supported(void)
994 {
995 	return ept_vpid.val & EPT_CAP_1G_PAGE;
996 }
997 
998 bool ept_huge_pages_supported(int level)
999 {
1000 	if (level == 2)
1001 		return ept_2m_supported();
1002 	else if (level == 3)
1003 		return ept_1g_supported();
1004 	else
1005 		return false;
1006 }
1007 
1008 bool ept_execute_only_supported(void)
1009 {
1010 	return ept_vpid.val & EPT_CAP_WT;
1011 }
1012 
1013 bool ept_ad_bits_supported(void)
1014 {
1015 	return ept_vpid.val & EPT_CAP_AD_FLAG;
1016 }
1017 
1018 void vpid_sync(int type, u16 vpid)
1019 {
1020 	switch(type) {
1021 	case INVVPID_CONTEXT_GLOBAL:
1022 		if (ept_vpid.val & VPID_CAP_INVVPID_CXTGLB) {
1023 			invvpid(INVVPID_CONTEXT_GLOBAL, vpid, 0);
1024 			break;
1025 		}
1026 	case INVVPID_ALL:
1027 		if (ept_vpid.val & VPID_CAP_INVVPID_ALL) {
1028 			invvpid(INVVPID_ALL, vpid, 0);
1029 			break;
1030 		}
1031 	default:
1032 		printf("WARNING: invvpid is not supported\n");
1033 	}
1034 }
1035 
1036 static void init_vmcs_ctrl(void)
1037 {
1038 	/* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */
1039 	/* 26.2.1.1 */
1040 	vmcs_write(PIN_CONTROLS, ctrl_pin);
1041 	/* Disable VMEXIT of IO instruction */
1042 	vmcs_write(CPU_EXEC_CTRL0, ctrl_cpu[0]);
1043 	if (ctrl_cpu_rev[0].set & CPU_SECONDARY) {
1044 		ctrl_cpu[1] = (ctrl_cpu[1] | ctrl_cpu_rev[1].set) &
1045 			ctrl_cpu_rev[1].clr;
1046 		vmcs_write(CPU_EXEC_CTRL1, ctrl_cpu[1]);
1047 	}
1048 	vmcs_write(CR3_TARGET_COUNT, 0);
1049 	vmcs_write(VPID, ++vpid_cnt);
1050 }
1051 
1052 static void init_vmcs_host(void)
1053 {
1054 	/* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */
1055 	/* 26.2.1.2 */
1056 	vmcs_write(HOST_EFER, rdmsr(MSR_EFER));
1057 
1058 	/* 26.2.1.3 */
1059 	vmcs_write(ENT_CONTROLS, ctrl_enter);
1060 	vmcs_write(EXI_CONTROLS, ctrl_exit);
1061 
1062 	/* 26.2.2 */
1063 	vmcs_write(HOST_CR0, read_cr0());
1064 	vmcs_write(HOST_CR3, read_cr3());
1065 	vmcs_write(HOST_CR4, read_cr4());
1066 	vmcs_write(HOST_SYSENTER_EIP, (u64)(&entry_sysenter));
1067 	vmcs_write(HOST_SYSENTER_CS,  KERNEL_CS);
1068 
1069 	/* 26.2.3 */
1070 	vmcs_write(HOST_SEL_CS, KERNEL_CS);
1071 	vmcs_write(HOST_SEL_SS, KERNEL_DS);
1072 	vmcs_write(HOST_SEL_DS, KERNEL_DS);
1073 	vmcs_write(HOST_SEL_ES, KERNEL_DS);
1074 	vmcs_write(HOST_SEL_FS, KERNEL_DS);
1075 	vmcs_write(HOST_SEL_GS, KERNEL_DS);
1076 	vmcs_write(HOST_SEL_TR, TSS_MAIN);
1077 	vmcs_write(HOST_BASE_TR, tss_descr.base);
1078 	vmcs_write(HOST_BASE_GDTR, gdt64_desc.base);
1079 	vmcs_write(HOST_BASE_IDTR, idt_descr.base);
1080 	vmcs_write(HOST_BASE_FS, 0);
1081 	vmcs_write(HOST_BASE_GS, 0);
1082 
1083 	/* Set other vmcs area */
1084 	vmcs_write(PF_ERROR_MASK, 0);
1085 	vmcs_write(PF_ERROR_MATCH, 0);
1086 	vmcs_write(VMCS_LINK_PTR, ~0ul);
1087 	vmcs_write(VMCS_LINK_PTR_HI, ~0ul);
1088 	vmcs_write(HOST_RIP, (u64)(&vmx_return));
1089 }
1090 
1091 static void init_vmcs_guest(void)
1092 {
1093 	/* 26.3 CHECKING AND LOADING GUEST STATE */
1094 	ulong guest_cr0, guest_cr4, guest_cr3;
1095 	/* 26.3.1.1 */
1096 	guest_cr0 = read_cr0();
1097 	guest_cr4 = read_cr4();
1098 	guest_cr3 = read_cr3();
1099 	if (ctrl_enter & ENT_GUEST_64) {
1100 		guest_cr0 |= X86_CR0_PG;
1101 		guest_cr4 |= X86_CR4_PAE;
1102 	}
1103 	if ((ctrl_enter & ENT_GUEST_64) == 0)
1104 		guest_cr4 &= (~X86_CR4_PCIDE);
1105 	if (guest_cr0 & X86_CR0_PG)
1106 		guest_cr0 |= X86_CR0_PE;
1107 	vmcs_write(GUEST_CR0, guest_cr0);
1108 	vmcs_write(GUEST_CR3, guest_cr3);
1109 	vmcs_write(GUEST_CR4, guest_cr4);
1110 	vmcs_write(GUEST_SYSENTER_CS,  KERNEL_CS);
1111 	vmcs_write(GUEST_SYSENTER_ESP,
1112 		(u64)(guest_syscall_stack + PAGE_SIZE - 1));
1113 	vmcs_write(GUEST_SYSENTER_EIP, (u64)(&entry_sysenter));
1114 	vmcs_write(GUEST_DR7, 0);
1115 	vmcs_write(GUEST_EFER, rdmsr(MSR_EFER));
1116 
1117 	/* 26.3.1.2 */
1118 	vmcs_write(GUEST_SEL_CS, KERNEL_CS);
1119 	vmcs_write(GUEST_SEL_SS, KERNEL_DS);
1120 	vmcs_write(GUEST_SEL_DS, KERNEL_DS);
1121 	vmcs_write(GUEST_SEL_ES, KERNEL_DS);
1122 	vmcs_write(GUEST_SEL_FS, KERNEL_DS);
1123 	vmcs_write(GUEST_SEL_GS, KERNEL_DS);
1124 	vmcs_write(GUEST_SEL_TR, TSS_MAIN);
1125 	vmcs_write(GUEST_SEL_LDTR, 0);
1126 
1127 	vmcs_write(GUEST_BASE_CS, 0);
1128 	vmcs_write(GUEST_BASE_ES, 0);
1129 	vmcs_write(GUEST_BASE_SS, 0);
1130 	vmcs_write(GUEST_BASE_DS, 0);
1131 	vmcs_write(GUEST_BASE_FS, 0);
1132 	vmcs_write(GUEST_BASE_GS, 0);
1133 	vmcs_write(GUEST_BASE_TR, tss_descr.base);
1134 	vmcs_write(GUEST_BASE_LDTR, 0);
1135 
1136 	vmcs_write(GUEST_LIMIT_CS, 0xFFFFFFFF);
1137 	vmcs_write(GUEST_LIMIT_DS, 0xFFFFFFFF);
1138 	vmcs_write(GUEST_LIMIT_ES, 0xFFFFFFFF);
1139 	vmcs_write(GUEST_LIMIT_SS, 0xFFFFFFFF);
1140 	vmcs_write(GUEST_LIMIT_FS, 0xFFFFFFFF);
1141 	vmcs_write(GUEST_LIMIT_GS, 0xFFFFFFFF);
1142 	vmcs_write(GUEST_LIMIT_LDTR, 0xffff);
1143 	vmcs_write(GUEST_LIMIT_TR, tss_descr.limit);
1144 
1145 	vmcs_write(GUEST_AR_CS, 0xa09b);
1146 	vmcs_write(GUEST_AR_DS, 0xc093);
1147 	vmcs_write(GUEST_AR_ES, 0xc093);
1148 	vmcs_write(GUEST_AR_FS, 0xc093);
1149 	vmcs_write(GUEST_AR_GS, 0xc093);
1150 	vmcs_write(GUEST_AR_SS, 0xc093);
1151 	vmcs_write(GUEST_AR_LDTR, 0x82);
1152 	vmcs_write(GUEST_AR_TR, 0x8b);
1153 
1154 	/* 26.3.1.3 */
1155 	vmcs_write(GUEST_BASE_GDTR, gdt64_desc.base);
1156 	vmcs_write(GUEST_BASE_IDTR, idt_descr.base);
1157 	vmcs_write(GUEST_LIMIT_GDTR, gdt64_desc.limit);
1158 	vmcs_write(GUEST_LIMIT_IDTR, idt_descr.limit);
1159 
1160 	/* 26.3.1.4 */
1161 	vmcs_write(GUEST_RIP, (u64)(&guest_entry));
1162 	vmcs_write(GUEST_RSP, (u64)(guest_stack + PAGE_SIZE - 1));
1163 	vmcs_write(GUEST_RFLAGS, 0x2);
1164 
1165 	/* 26.3.1.5 */
1166 	vmcs_write(GUEST_ACTV_STATE, ACTV_ACTIVE);
1167 	vmcs_write(GUEST_INTR_STATE, 0);
1168 }
1169 
1170 static int init_vmcs(struct vmcs **vmcs)
1171 {
1172 	*vmcs = alloc_page();
1173 	memset(*vmcs, 0, PAGE_SIZE);
1174 	(*vmcs)->revision_id = basic.revision;
1175 	/* vmclear first to init vmcs */
1176 	if (vmcs_clear(*vmcs)) {
1177 		printf("%s : vmcs_clear error\n", __func__);
1178 		return 1;
1179 	}
1180 
1181 	if (make_vmcs_current(*vmcs)) {
1182 		printf("%s : make_vmcs_current error\n", __func__);
1183 		return 1;
1184 	}
1185 
1186 	/* All settings to pin/exit/enter/cpu
1187 	   control fields should be placed here */
1188 	ctrl_pin |= PIN_EXTINT | PIN_NMI | PIN_VIRT_NMI;
1189 	ctrl_exit = EXI_LOAD_EFER | EXI_HOST_64;
1190 	ctrl_enter = (ENT_LOAD_EFER | ENT_GUEST_64);
1191 	/* DIsable IO instruction VMEXIT now */
1192 	ctrl_cpu[0] &= (~(CPU_IO | CPU_IO_BITMAP));
1193 	ctrl_cpu[1] = 0;
1194 
1195 	ctrl_pin = (ctrl_pin | ctrl_pin_rev.set) & ctrl_pin_rev.clr;
1196 	ctrl_enter = (ctrl_enter | ctrl_enter_rev.set) & ctrl_enter_rev.clr;
1197 	ctrl_exit = (ctrl_exit | ctrl_exit_rev.set) & ctrl_exit_rev.clr;
1198 	ctrl_cpu[0] = (ctrl_cpu[0] | ctrl_cpu_rev[0].set) & ctrl_cpu_rev[0].clr;
1199 
1200 	init_vmcs_ctrl();
1201 	init_vmcs_host();
1202 	init_vmcs_guest();
1203 	return 0;
1204 }
1205 
1206 static void init_vmx(void)
1207 {
1208 	ulong fix_cr0_set, fix_cr0_clr;
1209 	ulong fix_cr4_set, fix_cr4_clr;
1210 
1211 	vmxon_region = alloc_page();
1212 	memset(vmxon_region, 0, PAGE_SIZE);
1213 
1214 	fix_cr0_set =  rdmsr(MSR_IA32_VMX_CR0_FIXED0);
1215 	fix_cr0_clr =  rdmsr(MSR_IA32_VMX_CR0_FIXED1);
1216 	fix_cr4_set =  rdmsr(MSR_IA32_VMX_CR4_FIXED0);
1217 	fix_cr4_clr = rdmsr(MSR_IA32_VMX_CR4_FIXED1);
1218 	basic.val = rdmsr(MSR_IA32_VMX_BASIC);
1219 	ctrl_pin_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PIN
1220 			: MSR_IA32_VMX_PINBASED_CTLS);
1221 	ctrl_exit_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_EXIT
1222 			: MSR_IA32_VMX_EXIT_CTLS);
1223 	ctrl_enter_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_ENTRY
1224 			: MSR_IA32_VMX_ENTRY_CTLS);
1225 	ctrl_cpu_rev[0].val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PROC
1226 			: MSR_IA32_VMX_PROCBASED_CTLS);
1227 	if ((ctrl_cpu_rev[0].clr & CPU_SECONDARY) != 0)
1228 		ctrl_cpu_rev[1].val = rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2);
1229 	else
1230 		ctrl_cpu_rev[1].val = 0;
1231 	if ((ctrl_cpu_rev[1].clr & (CPU_EPT | CPU_VPID)) != 0)
1232 		ept_vpid.val = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP);
1233 	else
1234 		ept_vpid.val = 0;
1235 
1236 	write_cr0((read_cr0() & fix_cr0_clr) | fix_cr0_set);
1237 	write_cr4((read_cr4() & fix_cr4_clr) | fix_cr4_set | X86_CR4_VMXE);
1238 
1239 	*vmxon_region = basic.revision;
1240 
1241 	guest_stack = alloc_page();
1242 	memset(guest_stack, 0, PAGE_SIZE);
1243 	guest_syscall_stack = alloc_page();
1244 	memset(guest_syscall_stack, 0, PAGE_SIZE);
1245 }
1246 
1247 static void do_vmxon_off(void *data)
1248 {
1249 	vmx_on();
1250 	vmx_off();
1251 }
1252 
1253 static void do_write_feature_control(void *data)
1254 {
1255 	wrmsr(MSR_IA32_FEATURE_CONTROL, 0);
1256 }
1257 
1258 static int test_vmx_feature_control(void)
1259 {
1260 	u64 ia32_feature_control;
1261 	bool vmx_enabled;
1262 
1263 	ia32_feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL);
1264 	vmx_enabled = ((ia32_feature_control & 0x5) == 0x5);
1265 	if ((ia32_feature_control & 0x5) == 0x5) {
1266 		printf("VMX enabled and locked by BIOS\n");
1267 		return 0;
1268 	} else if (ia32_feature_control & 0x1) {
1269 		printf("ERROR: VMX locked out by BIOS!?\n");
1270 		return 1;
1271 	}
1272 
1273 	wrmsr(MSR_IA32_FEATURE_CONTROL, 0);
1274 	report("test vmxon with FEATURE_CONTROL cleared",
1275 	       test_for_exception(GP_VECTOR, &do_vmxon_off, NULL));
1276 
1277 	wrmsr(MSR_IA32_FEATURE_CONTROL, 0x4);
1278 	report("test vmxon without FEATURE_CONTROL lock",
1279 	       test_for_exception(GP_VECTOR, &do_vmxon_off, NULL));
1280 
1281 	wrmsr(MSR_IA32_FEATURE_CONTROL, 0x5);
1282 	vmx_enabled = ((rdmsr(MSR_IA32_FEATURE_CONTROL) & 0x5) == 0x5);
1283 	report("test enable VMX in FEATURE_CONTROL", vmx_enabled);
1284 
1285 	report("test FEATURE_CONTROL lock bit",
1286 	       test_for_exception(GP_VECTOR, &do_write_feature_control, NULL));
1287 
1288 	return !vmx_enabled;
1289 }
1290 
1291 static int test_vmxon(void)
1292 {
1293 	int ret, ret1;
1294 	u64 *tmp_region = vmxon_region;
1295 	int width = cpuid_maxphyaddr();
1296 
1297 	/* Unaligned page access */
1298 	vmxon_region = (u64 *)((intptr_t)vmxon_region + 1);
1299 	ret1 = vmx_on();
1300 	report("test vmxon with unaligned vmxon region", ret1);
1301 	if (!ret1) {
1302 		ret = 1;
1303 		goto out;
1304 	}
1305 
1306 	/* gpa bits beyond physical address width are set*/
1307 	vmxon_region = (u64 *)((intptr_t)tmp_region | ((u64)1 << (width+1)));
1308 	ret1 = vmx_on();
1309 	report("test vmxon with bits set beyond physical address width", ret1);
1310 	if (!ret1) {
1311 		ret = 1;
1312 		goto out;
1313 	}
1314 
1315 	/* invalid revision indentifier */
1316 	vmxon_region = tmp_region;
1317 	*vmxon_region = 0xba9da9;
1318 	ret1 = vmx_on();
1319 	report("test vmxon with invalid revision identifier", ret1);
1320 	if (!ret1) {
1321 		ret = 1;
1322 		goto out;
1323 	}
1324 
1325 	/* and finally a valid region */
1326 	*vmxon_region = basic.revision;
1327 	ret = vmx_on();
1328 	report("test vmxon with valid vmxon region", !ret);
1329 
1330 out:
1331 	return ret;
1332 }
1333 
1334 static void test_vmptrld(void)
1335 {
1336 	struct vmcs *vmcs, *tmp_root;
1337 	int width = cpuid_maxphyaddr();
1338 
1339 	vmcs = alloc_page();
1340 	vmcs->revision_id = basic.revision;
1341 
1342 	/* Unaligned page access */
1343 	tmp_root = (struct vmcs *)((intptr_t)vmcs + 1);
1344 	report("test vmptrld with unaligned vmcs",
1345 	       make_vmcs_current(tmp_root) == 1);
1346 
1347 	/* gpa bits beyond physical address width are set*/
1348 	tmp_root = (struct vmcs *)((intptr_t)vmcs |
1349 				   ((u64)1 << (width+1)));
1350 	report("test vmptrld with vmcs address bits set beyond physical address width",
1351 	       make_vmcs_current(tmp_root) == 1);
1352 
1353 	/* Pass VMXON region */
1354 	make_vmcs_current(vmcs);
1355 	tmp_root = (struct vmcs *)vmxon_region;
1356 	report("test vmptrld with vmxon region",
1357 	       make_vmcs_current(tmp_root) == 1);
1358 	report("test vmptrld with vmxon region vm-instruction error",
1359 	       vmcs_read(VMX_INST_ERROR) == VMXERR_VMPTRLD_VMXON_POINTER);
1360 
1361 	report("test vmptrld with valid vmcs region", make_vmcs_current(vmcs) == 0);
1362 }
1363 
1364 static void test_vmptrst(void)
1365 {
1366 	int ret;
1367 	struct vmcs *vmcs1, *vmcs2;
1368 
1369 	vmcs1 = alloc_page();
1370 	memset(vmcs1, 0, PAGE_SIZE);
1371 	init_vmcs(&vmcs1);
1372 	ret = vmcs_save(&vmcs2);
1373 	report("test vmptrst", (!ret) && (vmcs1 == vmcs2));
1374 }
1375 
1376 struct vmx_ctl_msr {
1377 	const char *name;
1378 	u32 index, true_index;
1379 	u32 default1;
1380 } vmx_ctl_msr[] = {
1381 	{ "MSR_IA32_VMX_PINBASED_CTLS", MSR_IA32_VMX_PINBASED_CTLS,
1382 	  MSR_IA32_VMX_TRUE_PIN, 0x16 },
1383 	{ "MSR_IA32_VMX_PROCBASED_CTLS", MSR_IA32_VMX_PROCBASED_CTLS,
1384 	  MSR_IA32_VMX_TRUE_PROC, 0x401e172 },
1385 	{ "MSR_IA32_VMX_PROCBASED_CTLS2", MSR_IA32_VMX_PROCBASED_CTLS2,
1386 	  MSR_IA32_VMX_PROCBASED_CTLS2, 0 },
1387 	{ "MSR_IA32_VMX_EXIT_CTLS", MSR_IA32_VMX_EXIT_CTLS,
1388 	  MSR_IA32_VMX_TRUE_EXIT, 0x36dff },
1389 	{ "MSR_IA32_VMX_ENTRY_CTLS", MSR_IA32_VMX_ENTRY_CTLS,
1390 	  MSR_IA32_VMX_TRUE_ENTRY, 0x11ff },
1391 };
1392 
1393 static void test_vmx_caps(void)
1394 {
1395 	u64 val, default1, fixed0, fixed1;
1396 	union vmx_ctrl_msr ctrl, true_ctrl;
1397 	unsigned int n;
1398 	bool ok;
1399 
1400 	printf("\nTest suite: VMX capability reporting\n");
1401 
1402 	report("MSR_IA32_VMX_BASIC",
1403 	       (basic.revision & (1ul << 31)) == 0 &&
1404 	       basic.size > 0 && basic.size <= 4096 &&
1405 	       (basic.type == 0 || basic.type == 6) &&
1406 	       basic.reserved1 == 0 && basic.reserved2 == 0);
1407 
1408 	val = rdmsr(MSR_IA32_VMX_MISC);
1409 	report("MSR_IA32_VMX_MISC",
1410 	       (!(ctrl_cpu_rev[1].clr & CPU_URG) || val & (1ul << 5)) &&
1411 	       ((val >> 16) & 0x1ff) <= 256 &&
1412 	       (val & 0xc0007e00) == 0);
1413 
1414 	for (n = 0; n < ARRAY_SIZE(vmx_ctl_msr); n++) {
1415 		ctrl.val = rdmsr(vmx_ctl_msr[n].index);
1416 		default1 = vmx_ctl_msr[n].default1;
1417 		ok = (ctrl.set & default1) == default1;
1418 		ok = ok && (ctrl.set & ~ctrl.clr) == 0;
1419 		if (ok && basic.ctrl) {
1420 			true_ctrl.val = rdmsr(vmx_ctl_msr[n].true_index);
1421 			ok = ctrl.clr == true_ctrl.clr;
1422 			ok = ok && ctrl.set == (true_ctrl.set | default1);
1423 		}
1424 		report("%s", ok, vmx_ctl_msr[n].name);
1425 	}
1426 
1427 	fixed0 = rdmsr(MSR_IA32_VMX_CR0_FIXED0);
1428 	fixed1 = rdmsr(MSR_IA32_VMX_CR0_FIXED1);
1429 	report("MSR_IA32_VMX_IA32_VMX_CR0_FIXED0/1",
1430 	       ((fixed0 ^ fixed1) & ~fixed1) == 0);
1431 
1432 	fixed0 = rdmsr(MSR_IA32_VMX_CR4_FIXED0);
1433 	fixed1 = rdmsr(MSR_IA32_VMX_CR4_FIXED1);
1434 	report("MSR_IA32_VMX_IA32_VMX_CR4_FIXED0/1",
1435 	       ((fixed0 ^ fixed1) & ~fixed1) == 0);
1436 
1437 	val = rdmsr(MSR_IA32_VMX_VMCS_ENUM);
1438 	report("MSR_IA32_VMX_VMCS_ENUM",
1439 	       (val & 0x3e) >= 0x2a &&
1440 	       (val & 0xfffffffffffffc01Ull) == 0);
1441 
1442 	val = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP);
1443 	report("MSR_IA32_VMX_EPT_VPID_CAP",
1444 	       (val & 0xfffff07ef98cbebeUll) == 0);
1445 }
1446 
1447 /* This function can only be called in guest */
1448 static void __attribute__((__used__)) hypercall(u32 hypercall_no)
1449 {
1450 	u64 val = 0;
1451 	val = (hypercall_no & HYPERCALL_MASK) | HYPERCALL_BIT;
1452 	hypercall_field = val;
1453 	asm volatile("vmcall\n\t");
1454 }
1455 
1456 static bool is_hypercall()
1457 {
1458 	ulong reason, hyper_bit;
1459 
1460 	reason = vmcs_read(EXI_REASON) & 0xff;
1461 	hyper_bit = hypercall_field & HYPERCALL_BIT;
1462 	if (reason == VMX_VMCALL && hyper_bit)
1463 		return true;
1464 	return false;
1465 }
1466 
1467 static int handle_hypercall()
1468 {
1469 	ulong hypercall_no;
1470 
1471 	hypercall_no = hypercall_field & HYPERCALL_MASK;
1472 	hypercall_field = 0;
1473 	switch (hypercall_no) {
1474 	case HYPERCALL_VMEXIT:
1475 		return VMX_TEST_VMEXIT;
1476 	case HYPERCALL_VMABORT:
1477 		return VMX_TEST_VMABORT;
1478 	case HYPERCALL_VMSKIP:
1479 		return VMX_TEST_VMSKIP;
1480 	default:
1481 		printf("ERROR : Invalid hypercall number : %ld\n", hypercall_no);
1482 	}
1483 	return VMX_TEST_EXIT;
1484 }
1485 
1486 static void continue_abort(void)
1487 {
1488 	assert(!in_guest);
1489 	printf("Host was here when guest aborted:\n");
1490 	dump_stack();
1491 	longjmp(abort_target, 1);
1492 	abort();
1493 }
1494 
1495 void __abort_test(void)
1496 {
1497 	if (in_guest)
1498 		hypercall(HYPERCALL_VMABORT);
1499 	else
1500 		longjmp(abort_target, 1);
1501 	abort();
1502 }
1503 
1504 static void continue_skip(void)
1505 {
1506 	assert(!in_guest);
1507 	longjmp(abort_target, 1);
1508 	abort();
1509 }
1510 
1511 void test_skip(const char *msg)
1512 {
1513 	printf("%s skipping test: %s\n", in_guest ? "Guest" : "Host", msg);
1514 	if (in_guest)
1515 		hypercall(HYPERCALL_VMABORT);
1516 	else
1517 		longjmp(abort_target, 1);
1518 	abort();
1519 }
1520 
1521 static int exit_handler()
1522 {
1523 	int ret;
1524 
1525 	current->exits++;
1526 	regs.rflags = vmcs_read(GUEST_RFLAGS);
1527 	if (is_hypercall())
1528 		ret = handle_hypercall();
1529 	else
1530 		ret = current->exit_handler();
1531 	vmcs_write(GUEST_RFLAGS, regs.rflags);
1532 
1533 	return ret;
1534 }
1535 
1536 /*
1537  * Called if vmlaunch or vmresume fails.
1538  *	@early    - failure due to "VMX controls and host-state area" (26.2)
1539  *	@vmlaunch - was this a vmlaunch or vmresume
1540  *	@rflags   - host rflags
1541  */
1542 static int
1543 entry_failure_handler(struct vmentry_failure *failure)
1544 {
1545 	if (current->entry_failure_handler)
1546 		return current->entry_failure_handler(failure);
1547 	else
1548 		return VMX_TEST_EXIT;
1549 }
1550 
1551 /*
1552  * Tries to enter the guest. Returns true iff entry succeeded. Otherwise,
1553  * populates @failure.
1554  */
1555 static bool vmx_enter_guest(struct vmentry_failure *failure)
1556 {
1557 	failure->early = 0;
1558 
1559 	in_guest = 1;
1560 	asm volatile (
1561 		"mov %[HOST_RSP], %%rdi\n\t"
1562 		"vmwrite %%rsp, %%rdi\n\t"
1563 		LOAD_GPR_C
1564 		"cmpb $0, %[launched]\n\t"
1565 		"jne 1f\n\t"
1566 		"vmlaunch\n\t"
1567 		"jmp 2f\n\t"
1568 		"1: "
1569 		"vmresume\n\t"
1570 		"2: "
1571 		SAVE_GPR_C
1572 		"pushf\n\t"
1573 		"pop %%rdi\n\t"
1574 		"mov %%rdi, %[failure_flags]\n\t"
1575 		"movl $1, %[failure_flags]\n\t"
1576 		"jmp 3f\n\t"
1577 		"vmx_return:\n\t"
1578 		SAVE_GPR_C
1579 		"3: \n\t"
1580 		: [failure_early]"+m"(failure->early),
1581 		  [failure_flags]"=m"(failure->flags)
1582 		: [launched]"m"(launched), [HOST_RSP]"i"(HOST_RSP)
1583 		: "rdi", "memory", "cc"
1584 	);
1585 	in_guest = 0;
1586 
1587 	failure->vmlaunch = !launched;
1588 	failure->instr = launched ? "vmresume" : "vmlaunch";
1589 
1590 	return !failure->early && !(vmcs_read(EXI_REASON) & VMX_ENTRY_FAILURE);
1591 }
1592 
1593 static int vmx_run()
1594 {
1595 	while (1) {
1596 		u32 ret;
1597 		bool entered;
1598 		struct vmentry_failure failure;
1599 
1600 		entered = vmx_enter_guest(&failure);
1601 
1602 		if (entered) {
1603 			/*
1604 			 * VMCS isn't in "launched" state if there's been any
1605 			 * entry failure (early or otherwise).
1606 			 */
1607 			launched = 1;
1608 			ret = exit_handler();
1609 		} else {
1610 			ret = entry_failure_handler(&failure);
1611 		}
1612 
1613 		switch (ret) {
1614 		case VMX_TEST_RESUME:
1615 			continue;
1616 		case VMX_TEST_VMEXIT:
1617 			guest_finished = 1;
1618 			return 0;
1619 		case VMX_TEST_EXIT:
1620 			break;
1621 		default:
1622 			printf("ERROR : Invalid %s_handler return val %d.\n",
1623 			       entered ? "exit" : "entry_failure",
1624 			       ret);
1625 			break;
1626 		}
1627 
1628 		if (entered)
1629 			print_vmexit_info();
1630 		else
1631 			print_vmentry_failure_info(&failure);
1632 		abort();
1633 	}
1634 }
1635 
1636 static void run_teardown_step(struct test_teardown_step *step)
1637 {
1638 	step->func(step->data);
1639 }
1640 
1641 static int test_run(struct vmx_test *test)
1642 {
1643 	int r;
1644 
1645 	/* Validate V2 interface. */
1646 	if (test->v2) {
1647 		int ret = 0;
1648 		if (test->init || test->guest_main || test->exit_handler ||
1649 		    test->syscall_handler) {
1650 			report("V2 test cannot specify V1 callbacks.", 0);
1651 			ret = 1;
1652 		}
1653 		if (ret)
1654 			return ret;
1655 	}
1656 
1657 	if (test->name == NULL)
1658 		test->name = "(no name)";
1659 	if (vmx_on()) {
1660 		printf("%s : vmxon failed.\n", __func__);
1661 		return 1;
1662 	}
1663 
1664 	init_vmcs(&(test->vmcs));
1665 	/* Directly call test->init is ok here, init_vmcs has done
1666 	   vmcs init, vmclear and vmptrld*/
1667 	if (test->init && test->init(test->vmcs) != VMX_TEST_START)
1668 		goto out;
1669 	teardown_count = 0;
1670 	v2_guest_main = NULL;
1671 	test->exits = 0;
1672 	current = test;
1673 	regs = test->guest_regs;
1674 	vmcs_write(GUEST_RFLAGS, regs.rflags | 0x2);
1675 	launched = 0;
1676 	guest_finished = 0;
1677 	printf("\nTest suite: %s\n", test->name);
1678 
1679 	r = setjmp(abort_target);
1680 	if (r) {
1681 		assert(!in_guest);
1682 		goto out;
1683 	}
1684 
1685 
1686 	if (test->v2)
1687 		test->v2();
1688 	else
1689 		vmx_run();
1690 
1691 	while (teardown_count > 0)
1692 		run_teardown_step(&teardown_steps[--teardown_count]);
1693 
1694 	if (launched && !guest_finished)
1695 		report("Guest didn't run to completion.", 0);
1696 
1697 out:
1698 	if (vmx_off()) {
1699 		printf("%s : vmxoff failed.\n", __func__);
1700 		return 1;
1701 	}
1702 	return 0;
1703 }
1704 
1705 /*
1706  * Add a teardown step. Executed after the test's main function returns.
1707  * Teardown steps executed in reverse order.
1708  */
1709 void test_add_teardown(test_teardown_func func, void *data)
1710 {
1711 	struct test_teardown_step *step;
1712 
1713 	TEST_ASSERT_MSG(teardown_count < MAX_TEST_TEARDOWN_STEPS,
1714 			"There are already %d teardown steps.",
1715 			teardown_count);
1716 	step = &teardown_steps[teardown_count++];
1717 	step->func = func;
1718 	step->data = data;
1719 }
1720 
1721 /*
1722  * Set the target of the first enter_guest call. Can only be called once per
1723  * test. Must be called before first enter_guest call.
1724  */
1725 void test_set_guest(test_guest_func func)
1726 {
1727 	assert(current->v2);
1728 	TEST_ASSERT_MSG(!v2_guest_main, "Already set guest func.");
1729 	v2_guest_main = func;
1730 }
1731 
1732 /*
1733  * Enters the guest (or launches it for the first time). Error to call once the
1734  * guest has returned (i.e., run past the end of its guest() function). Also
1735  * aborts if guest entry fails.
1736  */
1737 void enter_guest(void)
1738 {
1739 	struct vmentry_failure failure;
1740 
1741 	TEST_ASSERT_MSG(v2_guest_main,
1742 			"Never called test_set_guest_func!");
1743 
1744 	TEST_ASSERT_MSG(!guest_finished,
1745 			"Called enter_guest() after guest returned.");
1746 
1747 	if (!vmx_enter_guest(&failure)) {
1748 		print_vmentry_failure_info(&failure);
1749 		abort();
1750 	}
1751 
1752 	launched = 1;
1753 
1754 	if (is_hypercall()) {
1755 		int ret;
1756 
1757 		ret = handle_hypercall();
1758 		switch (ret) {
1759 		case VMX_TEST_VMEXIT:
1760 			guest_finished = 1;
1761 			break;
1762 		case VMX_TEST_VMABORT:
1763 			continue_abort();
1764 			break;
1765 		case VMX_TEST_VMSKIP:
1766 			continue_skip();
1767 			break;
1768 		default:
1769 			printf("ERROR : Invalid handle_hypercall return %d.\n",
1770 			       ret);
1771 			abort();
1772 		}
1773 	}
1774 }
1775 
1776 extern struct vmx_test vmx_tests[];
1777 
1778 static bool
1779 test_wanted(const char *name, const char *filters[], int filter_count)
1780 {
1781 	int i;
1782 	bool positive = false;
1783 	bool match = false;
1784 	char clean_name[strlen(name) + 1];
1785 	char *c;
1786 	const char *n;
1787 
1788 	/* Replace spaces with underscores. */
1789 	n = name;
1790 	c = &clean_name[0];
1791 	do *c++ = (*n == ' ') ? '_' : *n;
1792 	while (*n++);
1793 
1794 	for (i = 0; i < filter_count; i++) {
1795 		const char *filter = filters[i];
1796 
1797 		if (filter[0] == '-') {
1798 			if (simple_glob(clean_name, filter + 1))
1799 				return false;
1800 		} else {
1801 			positive = true;
1802 			match |= simple_glob(clean_name, filter);
1803 		}
1804 	}
1805 
1806 	if (!positive || match) {
1807 		matched++;
1808 		return true;
1809 	} else {
1810 		return false;
1811 	}
1812 }
1813 
1814 int main(int argc, const char *argv[])
1815 {
1816 	int i = 0;
1817 
1818 	setup_vm();
1819 	smp_init();
1820 	hypercall_field = 0;
1821 
1822 	argv++;
1823 	argc--;
1824 
1825 	if (!(cpuid(1).c & (1 << 5))) {
1826 		printf("WARNING: vmx not supported, add '-cpu host'\n");
1827 		goto exit;
1828 	}
1829 	init_vmx();
1830 	if (test_wanted("test_vmx_feature_control", argv, argc)) {
1831 		/* Sets MSR_IA32_FEATURE_CONTROL to 0x5 */
1832 		if (test_vmx_feature_control() != 0)
1833 			goto exit;
1834 	} else {
1835 		if ((rdmsr(MSR_IA32_FEATURE_CONTROL) & 0x5) != 0x5)
1836 			wrmsr(MSR_IA32_FEATURE_CONTROL, 0x5);
1837 	}
1838 
1839 	if (test_wanted("test_vmxon", argv, argc)) {
1840 		/* Enables VMX */
1841 		if (test_vmxon() != 0)
1842 			goto exit;
1843 	} else {
1844 		if (vmx_on()) {
1845 			report("vmxon", 0);
1846 			goto exit;
1847 		}
1848 	}
1849 
1850 	if (test_wanted("test_vmptrld", argv, argc))
1851 		test_vmptrld();
1852 	if (test_wanted("test_vmclear", argv, argc))
1853 		test_vmclear();
1854 	if (test_wanted("test_vmptrst", argv, argc))
1855 		test_vmptrst();
1856 	if (test_wanted("test_vmwrite_vmread", argv, argc))
1857 		test_vmwrite_vmread();
1858 	if (test_wanted("test_vmcs_high", argv, argc))
1859 		test_vmcs_high();
1860 	if (test_wanted("test_vmcs_lifecycle", argv, argc))
1861 		test_vmcs_lifecycle();
1862 	if (test_wanted("test_vmx_caps", argv, argc))
1863 		test_vmx_caps();
1864 
1865 	/* Balance vmxon from test_vmxon. */
1866 	vmx_off();
1867 
1868 	for (; vmx_tests[i].name != NULL; i++) {
1869 		if (!test_wanted(vmx_tests[i].name, argv, argc))
1870 			continue;
1871 		if (test_run(&vmx_tests[i]))
1872 			goto exit;
1873 	}
1874 
1875 	if (!matched)
1876 		report("command line didn't match any tests!", matched);
1877 
1878 exit:
1879 	return report_summary();
1880 }
1881