xref: /kvm-unit-tests/x86/vmx.c (revision 97b5f9553a48a15296462dc4c6170200b0dd70b8)
1 /*
2  * x86/vmx.c : Framework for testing nested virtualization
3  *	This is a framework to test nested VMX for KVM, which
4  * 	started as a project of GSoC 2013. All test cases should
5  *	be located in x86/vmx_tests.c and framework related
6  *	functions should be in this file.
7  *
8  * How to write test cases?
9  *	Add callbacks of test suite in variant "vmx_tests". You can
10  *	write:
11  *		1. init function used for initializing test suite
12  *		2. main function for codes running in L2 guest,
13  *		3. exit_handler to handle vmexit of L2 to L1
14  *		4. syscall handler to handle L2 syscall vmexit
15  *		5. vmenter fail handler to handle direct failure of vmenter
16  *		6. guest_regs is loaded when vmenter and saved when
17  *			vmexit, you can read and set it in exit_handler
18  *	If no special function is needed for a test suite, use
19  *	coressponding basic_* functions as callback. More handlers
20  *	can be added to "vmx_tests", see details of "struct vmx_test"
21  *	and function test_run().
22  *
23  * Currently, vmx test framework only set up one VCPU and one
24  * concurrent guest test environment with same paging for L2 and
25  * L1. For usage of EPT, only 1:1 mapped paging is used from VFN
26  * to PFN.
27  *
28  * Author : Arthur Chunqi Li <yzt356@gmail.com>
29  */
30 
31 #include "libcflat.h"
32 #include "processor.h"
33 #include "alloc_page.h"
34 #include "vm.h"
35 #include "desc.h"
36 #include "vmx.h"
37 #include "msr.h"
38 #include "smp.h"
39 #include "apic.h"
40 
41 u64 *bsp_vmxon_region;
42 struct vmcs *vmcs_root;
43 u32 vpid_cnt;
44 void *guest_stack, *guest_syscall_stack;
45 u32 ctrl_pin, ctrl_enter, ctrl_exit, ctrl_cpu[2];
46 struct regs regs;
47 
48 struct vmx_test *current;
49 
50 #define MAX_TEST_TEARDOWN_STEPS 10
51 
52 struct test_teardown_step {
53 	test_teardown_func func;
54 	void *data;
55 };
56 
57 static int teardown_count;
58 static struct test_teardown_step teardown_steps[MAX_TEST_TEARDOWN_STEPS];
59 
60 static test_guest_func v2_guest_main;
61 
62 u64 hypercall_field;
63 bool launched;
64 static int matched;
65 static int guest_finished;
66 static int in_guest;
67 
68 union vmx_basic basic;
69 union vmx_ctrl_msr ctrl_pin_rev;
70 union vmx_ctrl_msr ctrl_cpu_rev[2];
71 union vmx_ctrl_msr ctrl_exit_rev;
72 union vmx_ctrl_msr ctrl_enter_rev;
73 union vmx_ept_vpid  ept_vpid;
74 
75 extern struct descriptor_table_ptr gdt64_desc;
76 extern struct descriptor_table_ptr idt_descr;
77 extern struct descriptor_table_ptr tss_descr;
78 extern void *vmx_return;
79 extern void *entry_sysenter;
80 extern void *guest_entry;
81 
82 static volatile u32 stage;
83 
84 static jmp_buf abort_target;
85 
86 struct vmcs_field {
87 	u64 mask;
88 	u64 encoding;
89 };
90 
91 #define MASK(_bits) GENMASK_ULL((_bits) - 1, 0)
92 #define MASK_NATURAL MASK(sizeof(unsigned long) * 8)
93 
94 static struct vmcs_field vmcs_fields[] = {
95 	{ MASK(16), VPID },
96 	{ MASK(16), PINV },
97 	{ MASK(16), EPTP_IDX },
98 
99 	{ MASK(16), GUEST_SEL_ES },
100 	{ MASK(16), GUEST_SEL_CS },
101 	{ MASK(16), GUEST_SEL_SS },
102 	{ MASK(16), GUEST_SEL_DS },
103 	{ MASK(16), GUEST_SEL_FS },
104 	{ MASK(16), GUEST_SEL_GS },
105 	{ MASK(16), GUEST_SEL_LDTR },
106 	{ MASK(16), GUEST_SEL_TR },
107 	{ MASK(16), GUEST_INT_STATUS },
108 
109 	{ MASK(16), HOST_SEL_ES },
110 	{ MASK(16), HOST_SEL_CS },
111 	{ MASK(16), HOST_SEL_SS },
112 	{ MASK(16), HOST_SEL_DS },
113 	{ MASK(16), HOST_SEL_FS },
114 	{ MASK(16), HOST_SEL_GS },
115 	{ MASK(16), HOST_SEL_TR },
116 
117 	{ MASK(64), IO_BITMAP_A },
118 	{ MASK(64), IO_BITMAP_B },
119 	{ MASK(64), MSR_BITMAP },
120 	{ MASK(64), EXIT_MSR_ST_ADDR },
121 	{ MASK(64), EXIT_MSR_LD_ADDR },
122 	{ MASK(64), ENTER_MSR_LD_ADDR },
123 	{ MASK(64), VMCS_EXEC_PTR },
124 	{ MASK(64), TSC_OFFSET },
125 	{ MASK(64), APIC_VIRT_ADDR },
126 	{ MASK(64), APIC_ACCS_ADDR },
127 	{ MASK(64), EPTP },
128 
129 	{ MASK(64), INFO_PHYS_ADDR },
130 
131 	{ MASK(64), VMCS_LINK_PTR },
132 	{ MASK(64), GUEST_DEBUGCTL },
133 	{ MASK(64), GUEST_EFER },
134 	{ MASK(64), GUEST_PAT },
135 	{ MASK(64), GUEST_PERF_GLOBAL_CTRL },
136 	{ MASK(64), GUEST_PDPTE },
137 
138 	{ MASK(64), HOST_PAT },
139 	{ MASK(64), HOST_EFER },
140 	{ MASK(64), HOST_PERF_GLOBAL_CTRL },
141 
142 	{ MASK(32), PIN_CONTROLS },
143 	{ MASK(32), CPU_EXEC_CTRL0 },
144 	{ MASK(32), EXC_BITMAP },
145 	{ MASK(32), PF_ERROR_MASK },
146 	{ MASK(32), PF_ERROR_MATCH },
147 	{ MASK(32), CR3_TARGET_COUNT },
148 	{ MASK(32), EXI_CONTROLS },
149 	{ MASK(32), EXI_MSR_ST_CNT },
150 	{ MASK(32), EXI_MSR_LD_CNT },
151 	{ MASK(32), ENT_CONTROLS },
152 	{ MASK(32), ENT_MSR_LD_CNT },
153 	{ MASK(32), ENT_INTR_INFO },
154 	{ MASK(32), ENT_INTR_ERROR },
155 	{ MASK(32), ENT_INST_LEN },
156 	{ MASK(32), TPR_THRESHOLD },
157 	{ MASK(32), CPU_EXEC_CTRL1 },
158 
159 	{ MASK(32), VMX_INST_ERROR },
160 	{ MASK(32), EXI_REASON },
161 	{ MASK(32), EXI_INTR_INFO },
162 	{ MASK(32), EXI_INTR_ERROR },
163 	{ MASK(32), IDT_VECT_INFO },
164 	{ MASK(32), IDT_VECT_ERROR },
165 	{ MASK(32), EXI_INST_LEN },
166 	{ MASK(32), EXI_INST_INFO },
167 
168 	{ MASK(32), GUEST_LIMIT_ES },
169 	{ MASK(32), GUEST_LIMIT_CS },
170 	{ MASK(32), GUEST_LIMIT_SS },
171 	{ MASK(32), GUEST_LIMIT_DS },
172 	{ MASK(32), GUEST_LIMIT_FS },
173 	{ MASK(32), GUEST_LIMIT_GS },
174 	{ MASK(32), GUEST_LIMIT_LDTR },
175 	{ MASK(32), GUEST_LIMIT_TR },
176 	{ MASK(32), GUEST_LIMIT_GDTR },
177 	{ MASK(32), GUEST_LIMIT_IDTR },
178 	{ 0x1d0ff, GUEST_AR_ES },
179 	{ 0x1f0ff, GUEST_AR_CS },
180 	{ 0x1d0ff, GUEST_AR_SS },
181 	{ 0x1d0ff, GUEST_AR_DS },
182 	{ 0x1d0ff, GUEST_AR_FS },
183 	{ 0x1d0ff, GUEST_AR_GS },
184 	{ 0x1d0ff, GUEST_AR_LDTR },
185 	{ 0x1d0ff, GUEST_AR_TR },
186 	{ MASK(32), GUEST_INTR_STATE },
187 	{ MASK(32), GUEST_ACTV_STATE },
188 	{ MASK(32), GUEST_SMBASE },
189 	{ MASK(32), GUEST_SYSENTER_CS },
190 	{ MASK(32), PREEMPT_TIMER_VALUE },
191 
192 	{ MASK(32), HOST_SYSENTER_CS },
193 
194 	{ MASK_NATURAL, CR0_MASK },
195 	{ MASK_NATURAL, CR4_MASK },
196 	{ MASK_NATURAL, CR0_READ_SHADOW },
197 	{ MASK_NATURAL, CR4_READ_SHADOW },
198 	{ MASK_NATURAL, CR3_TARGET_0 },
199 	{ MASK_NATURAL, CR3_TARGET_1 },
200 	{ MASK_NATURAL, CR3_TARGET_2 },
201 	{ MASK_NATURAL, CR3_TARGET_3 },
202 
203 	{ MASK_NATURAL, EXI_QUALIFICATION },
204 	{ MASK_NATURAL, IO_RCX },
205 	{ MASK_NATURAL, IO_RSI },
206 	{ MASK_NATURAL, IO_RDI },
207 	{ MASK_NATURAL, IO_RIP },
208 	{ MASK_NATURAL, GUEST_LINEAR_ADDRESS },
209 
210 	{ MASK_NATURAL, GUEST_CR0 },
211 	{ MASK_NATURAL, GUEST_CR3 },
212 	{ MASK_NATURAL, GUEST_CR4 },
213 	{ MASK_NATURAL, GUEST_BASE_ES },
214 	{ MASK_NATURAL, GUEST_BASE_CS },
215 	{ MASK_NATURAL, GUEST_BASE_SS },
216 	{ MASK_NATURAL, GUEST_BASE_DS },
217 	{ MASK_NATURAL, GUEST_BASE_FS },
218 	{ MASK_NATURAL, GUEST_BASE_GS },
219 	{ MASK_NATURAL, GUEST_BASE_LDTR },
220 	{ MASK_NATURAL, GUEST_BASE_TR },
221 	{ MASK_NATURAL, GUEST_BASE_GDTR },
222 	{ MASK_NATURAL, GUEST_BASE_IDTR },
223 	{ MASK_NATURAL, GUEST_DR7 },
224 	{ MASK_NATURAL, GUEST_RSP },
225 	{ MASK_NATURAL, GUEST_RIP },
226 	{ MASK_NATURAL, GUEST_RFLAGS },
227 	{ MASK_NATURAL, GUEST_PENDING_DEBUG },
228 	{ MASK_NATURAL, GUEST_SYSENTER_ESP },
229 	{ MASK_NATURAL, GUEST_SYSENTER_EIP },
230 
231 	{ MASK_NATURAL, HOST_CR0 },
232 	{ MASK_NATURAL, HOST_CR3 },
233 	{ MASK_NATURAL, HOST_CR4 },
234 	{ MASK_NATURAL, HOST_BASE_FS },
235 	{ MASK_NATURAL, HOST_BASE_GS },
236 	{ MASK_NATURAL, HOST_BASE_TR },
237 	{ MASK_NATURAL, HOST_BASE_GDTR },
238 	{ MASK_NATURAL, HOST_BASE_IDTR },
239 	{ MASK_NATURAL, HOST_SYSENTER_ESP },
240 	{ MASK_NATURAL, HOST_SYSENTER_EIP },
241 	{ MASK_NATURAL, HOST_RSP },
242 	{ MASK_NATURAL, HOST_RIP },
243 };
244 
245 enum vmcs_field_type {
246 	VMCS_FIELD_TYPE_CONTROL = 0,
247 	VMCS_FIELD_TYPE_READ_ONLY_DATA = 1,
248 	VMCS_FIELD_TYPE_GUEST = 2,
249 	VMCS_FIELD_TYPE_HOST = 3,
250 	VMCS_FIELD_TYPES,
251 };
252 
253 static inline int vmcs_field_type(struct vmcs_field *f)
254 {
255 	return (f->encoding >> VMCS_FIELD_TYPE_SHIFT) & 0x3;
256 }
257 
258 static int vmcs_field_readonly(struct vmcs_field *f)
259 {
260 	u64 ia32_vmx_misc;
261 
262 	ia32_vmx_misc = rdmsr(MSR_IA32_VMX_MISC);
263 	return !(ia32_vmx_misc & MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS) &&
264 		(vmcs_field_type(f) == VMCS_FIELD_TYPE_READ_ONLY_DATA);
265 }
266 
267 static inline u64 vmcs_field_value(struct vmcs_field *f, u8 cookie)
268 {
269 	u64 value;
270 
271 	/* Incorporate the cookie and the field encoding into the value. */
272 	value = cookie;
273 	value |= (f->encoding << 8);
274 	value |= 0xdeadbeefull << 32;
275 
276 	return value & f->mask;
277 }
278 
279 static void set_vmcs_field(struct vmcs_field *f, u8 cookie)
280 {
281 	vmcs_write(f->encoding, vmcs_field_value(f, cookie));
282 }
283 
284 static bool check_vmcs_field(struct vmcs_field *f, u8 cookie)
285 {
286 	u64 expected;
287 	u64 actual;
288 	int ret;
289 
290 	if (f->encoding == VMX_INST_ERROR) {
291 		printf("Skipping volatile field %lx\n", f->encoding);
292 		return true;
293 	}
294 
295 	ret = vmcs_read_checking(f->encoding, &actual);
296 	assert(!(ret & X86_EFLAGS_CF));
297 	/* Skip VMCS fields that aren't recognized by the CPU */
298 	if (ret & X86_EFLAGS_ZF)
299 		return true;
300 
301 	if (vmcs_field_readonly(f)) {
302 		printf("Skipping read-only field %lx\n", f->encoding);
303 		return true;
304 	}
305 
306 	expected = vmcs_field_value(f, cookie);
307 	actual &= f->mask;
308 
309 	if (expected == actual)
310 		return true;
311 
312 	printf("FAIL: VMWRITE/VMREAD %lx (expected: %lx, actual: %lx)\n",
313 	       f->encoding, (unsigned long) expected, (unsigned long) actual);
314 
315 	return false;
316 }
317 
318 static void set_all_vmcs_fields(u8 cookie)
319 {
320 	int i;
321 
322 	for (i = 0; i < ARRAY_SIZE(vmcs_fields); i++)
323 		set_vmcs_field(&vmcs_fields[i], cookie);
324 }
325 
326 static bool check_all_vmcs_fields(u8 cookie)
327 {
328 	bool pass = true;
329 	int i;
330 
331 	for (i = 0; i < ARRAY_SIZE(vmcs_fields); i++) {
332 		if (!check_vmcs_field(&vmcs_fields[i], cookie))
333 			pass = false;
334 	}
335 
336 	return pass;
337 }
338 
339 static u32 find_vmcs_max_index(void)
340 {
341 	u32 idx, width, type, enc;
342 	u64 actual;
343 	int ret;
344 
345 	/* scan backwards and stop when found */
346 	for (idx = (1 << 9) - 1; idx >= 0; idx--) {
347 
348 		/* try all combinations of width and type */
349 		for (type = 0; type < (1 << 2); type++) {
350 			for (width = 0; width < (1 << 2) ; width++) {
351 				enc = (idx << VMCS_FIELD_INDEX_SHIFT) |
352 				      (type << VMCS_FIELD_TYPE_SHIFT) |
353 				      (width << VMCS_FIELD_WIDTH_SHIFT);
354 
355 				ret = vmcs_read_checking(enc, &actual);
356 				assert(!(ret & X86_EFLAGS_CF));
357 				if (!(ret & X86_EFLAGS_ZF))
358 					return idx;
359 			}
360 		}
361 	}
362 	/* some VMCS fields should exist */
363 	assert(0);
364 	return 0;
365 }
366 
367 static void test_vmwrite_vmread(void)
368 {
369 	struct vmcs *vmcs = alloc_page();
370 	u32 vmcs_enum_max, max_index = 0;
371 
372 	vmcs->hdr.revision_id = basic.revision;
373 	assert(!vmcs_clear(vmcs));
374 	assert(!make_vmcs_current(vmcs));
375 
376 	set_all_vmcs_fields(0x42);
377 	report(check_all_vmcs_fields(0x42), "VMWRITE/VMREAD");
378 
379 	vmcs_enum_max = (rdmsr(MSR_IA32_VMX_VMCS_ENUM) & VMCS_FIELD_INDEX_MASK)
380 			>> VMCS_FIELD_INDEX_SHIFT;
381 	max_index = find_vmcs_max_index();
382 	report(vmcs_enum_max == max_index,
383 	       "VMX_VMCS_ENUM.MAX_INDEX expected: %x, actual: %x",
384 	       max_index, vmcs_enum_max);
385 
386 	assert(!vmcs_clear(vmcs));
387 	free_page(vmcs);
388 }
389 
390 static void test_vmcs_high(void)
391 {
392 	struct vmcs *vmcs = alloc_page();
393 
394 	vmcs->hdr.revision_id = basic.revision;
395 	assert(!vmcs_clear(vmcs));
396 	assert(!make_vmcs_current(vmcs));
397 
398 	vmcs_write(TSC_OFFSET, 0x0123456789ABCDEFull);
399 	report(vmcs_read(TSC_OFFSET) == 0x0123456789ABCDEFull,
400 	       "VMREAD TSC_OFFSET after VMWRITE TSC_OFFSET");
401 	report(vmcs_read(TSC_OFFSET_HI) == 0x01234567ull,
402 	       "VMREAD TSC_OFFSET_HI after VMWRITE TSC_OFFSET");
403 	vmcs_write(TSC_OFFSET_HI, 0x76543210ul);
404 	report(vmcs_read(TSC_OFFSET_HI) == 0x76543210ul,
405 	       "VMREAD TSC_OFFSET_HI after VMWRITE TSC_OFFSET_HI");
406 	report(vmcs_read(TSC_OFFSET) == 0x7654321089ABCDEFull,
407 	       "VMREAD TSC_OFFSET after VMWRITE TSC_OFFSET_HI");
408 
409 	assert(!vmcs_clear(vmcs));
410 	free_page(vmcs);
411 }
412 
413 static void test_vmcs_lifecycle(void)
414 {
415 	struct vmcs *vmcs[2] = {};
416 	int i;
417 
418 	for (i = 0; i < ARRAY_SIZE(vmcs); i++) {
419 		vmcs[i] = alloc_page();
420 		vmcs[i]->hdr.revision_id = basic.revision;
421 	}
422 
423 #define VMPTRLD(_i) do { \
424 	assert(_i < ARRAY_SIZE(vmcs)); \
425 	assert(!make_vmcs_current(vmcs[_i])); \
426 	printf("VMPTRLD VMCS%d\n", (_i)); \
427 } while (0)
428 
429 #define VMCLEAR(_i) do { \
430 	assert(_i < ARRAY_SIZE(vmcs)); \
431 	assert(!vmcs_clear(vmcs[_i])); \
432 	printf("VMCLEAR VMCS%d\n", (_i)); \
433 } while (0)
434 
435 	VMCLEAR(0);
436 	VMPTRLD(0);
437 	set_all_vmcs_fields(0);
438 	report(check_all_vmcs_fields(0), "current:VMCS0 active:[VMCS0]");
439 
440 	VMCLEAR(0);
441 	VMPTRLD(0);
442 	report(check_all_vmcs_fields(0), "current:VMCS0 active:[VMCS0]");
443 
444 	VMCLEAR(1);
445 	report(check_all_vmcs_fields(0), "current:VMCS0 active:[VMCS0]");
446 
447 	VMPTRLD(1);
448 	set_all_vmcs_fields(1);
449 	report(check_all_vmcs_fields(1), "current:VMCS1 active:[VMCS0,VCMS1]");
450 
451 	VMPTRLD(0);
452 	report(check_all_vmcs_fields(0), "current:VMCS0 active:[VMCS0,VCMS1]");
453 	VMPTRLD(1);
454 	report(check_all_vmcs_fields(1), "current:VMCS1 active:[VMCS0,VCMS1]");
455 	VMPTRLD(1);
456 	report(check_all_vmcs_fields(1), "current:VMCS1 active:[VMCS0,VCMS1]");
457 
458 	VMCLEAR(0);
459 	report(check_all_vmcs_fields(1), "current:VMCS1 active:[VCMS1]");
460 
461 	/* VMPTRLD should not erase VMWRITEs to the current VMCS */
462 	set_all_vmcs_fields(2);
463 	VMPTRLD(1);
464 	report(check_all_vmcs_fields(2), "current:VMCS1 active:[VCMS1]");
465 
466 	for (i = 0; i < ARRAY_SIZE(vmcs); i++) {
467 		VMCLEAR(i);
468 		free_page(vmcs[i]);
469 	}
470 
471 #undef VMPTRLD
472 #undef VMCLEAR
473 }
474 
475 void vmx_set_test_stage(u32 s)
476 {
477 	barrier();
478 	stage = s;
479 	barrier();
480 }
481 
482 u32 vmx_get_test_stage(void)
483 {
484 	u32 s;
485 
486 	barrier();
487 	s = stage;
488 	barrier();
489 	return s;
490 }
491 
492 void vmx_inc_test_stage(void)
493 {
494 	barrier();
495 	stage++;
496 	barrier();
497 }
498 
499 /* entry_sysenter */
500 asm(
501 	".align	4, 0x90\n\t"
502 	".globl	entry_sysenter\n\t"
503 	"entry_sysenter:\n\t"
504 	SAVE_GPR
505 	"	and	$0xf, %rax\n\t"
506 	"	mov	%rax, %rdi\n\t"
507 	"	call	syscall_handler\n\t"
508 	LOAD_GPR
509 	"	vmresume\n\t"
510 );
511 
512 static void __attribute__((__used__)) syscall_handler(u64 syscall_no)
513 {
514 	if (current->syscall_handler)
515 		current->syscall_handler(syscall_no);
516 }
517 
518 static const char * const exit_reason_descriptions[] = {
519 	[VMX_EXC_NMI]		= "VMX_EXC_NMI",
520 	[VMX_EXTINT]		= "VMX_EXTINT",
521 	[VMX_TRIPLE_FAULT]	= "VMX_TRIPLE_FAULT",
522 	[VMX_INIT]		= "VMX_INIT",
523 	[VMX_SIPI]		= "VMX_SIPI",
524 	[VMX_SMI_IO]		= "VMX_SMI_IO",
525 	[VMX_SMI_OTHER]		= "VMX_SMI_OTHER",
526 	[VMX_INTR_WINDOW]	= "VMX_INTR_WINDOW",
527 	[VMX_NMI_WINDOW]	= "VMX_NMI_WINDOW",
528 	[VMX_TASK_SWITCH]	= "VMX_TASK_SWITCH",
529 	[VMX_CPUID]		= "VMX_CPUID",
530 	[VMX_GETSEC]		= "VMX_GETSEC",
531 	[VMX_HLT]		= "VMX_HLT",
532 	[VMX_INVD]		= "VMX_INVD",
533 	[VMX_INVLPG]		= "VMX_INVLPG",
534 	[VMX_RDPMC]		= "VMX_RDPMC",
535 	[VMX_RDTSC]		= "VMX_RDTSC",
536 	[VMX_RSM]		= "VMX_RSM",
537 	[VMX_VMCALL]		= "VMX_VMCALL",
538 	[VMX_VMCLEAR]		= "VMX_VMCLEAR",
539 	[VMX_VMLAUNCH]		= "VMX_VMLAUNCH",
540 	[VMX_VMPTRLD]		= "VMX_VMPTRLD",
541 	[VMX_VMPTRST]		= "VMX_VMPTRST",
542 	[VMX_VMREAD]		= "VMX_VMREAD",
543 	[VMX_VMRESUME]		= "VMX_VMRESUME",
544 	[VMX_VMWRITE]		= "VMX_VMWRITE",
545 	[VMX_VMXOFF]		= "VMX_VMXOFF",
546 	[VMX_VMXON]		= "VMX_VMXON",
547 	[VMX_CR]		= "VMX_CR",
548 	[VMX_DR]		= "VMX_DR",
549 	[VMX_IO]		= "VMX_IO",
550 	[VMX_RDMSR]		= "VMX_RDMSR",
551 	[VMX_WRMSR]		= "VMX_WRMSR",
552 	[VMX_FAIL_STATE]	= "VMX_FAIL_STATE",
553 	[VMX_FAIL_MSR]		= "VMX_FAIL_MSR",
554 	[VMX_MWAIT]		= "VMX_MWAIT",
555 	[VMX_MTF]		= "VMX_MTF",
556 	[VMX_MONITOR]		= "VMX_MONITOR",
557 	[VMX_PAUSE]		= "VMX_PAUSE",
558 	[VMX_FAIL_MCHECK]	= "VMX_FAIL_MCHECK",
559 	[VMX_TPR_THRESHOLD]	= "VMX_TPR_THRESHOLD",
560 	[VMX_APIC_ACCESS]	= "VMX_APIC_ACCESS",
561 	[VMX_EOI_INDUCED]	= "VMX_EOI_INDUCED",
562 	[VMX_GDTR_IDTR]		= "VMX_GDTR_IDTR",
563 	[VMX_LDTR_TR]		= "VMX_LDTR_TR",
564 	[VMX_EPT_VIOLATION]	= "VMX_EPT_VIOLATION",
565 	[VMX_EPT_MISCONFIG]	= "VMX_EPT_MISCONFIG",
566 	[VMX_INVEPT]		= "VMX_INVEPT",
567 	[VMX_PREEMPT]		= "VMX_PREEMPT",
568 	[VMX_INVVPID]		= "VMX_INVVPID",
569 	[VMX_WBINVD]		= "VMX_WBINVD",
570 	[VMX_XSETBV]		= "VMX_XSETBV",
571 	[VMX_APIC_WRITE]	= "VMX_APIC_WRITE",
572 	[VMX_RDRAND]		= "VMX_RDRAND",
573 	[VMX_INVPCID]		= "VMX_INVPCID",
574 	[VMX_VMFUNC]		= "VMX_VMFUNC",
575 	[VMX_RDSEED]		= "VMX_RDSEED",
576 	[VMX_PML_FULL]		= "VMX_PML_FULL",
577 	[VMX_XSAVES]		= "VMX_XSAVES",
578 	[VMX_XRSTORS]		= "VMX_XRSTORS",
579 };
580 
581 const char *exit_reason_description(u64 reason)
582 {
583 	if (reason >= ARRAY_SIZE(exit_reason_descriptions))
584 		return "(unknown)";
585 	return exit_reason_descriptions[reason] ? : "(unused)";
586 }
587 
588 void print_vmexit_info()
589 {
590 	u64 guest_rip, guest_rsp;
591 	ulong reason = vmcs_read(EXI_REASON) & 0xff;
592 	ulong exit_qual = vmcs_read(EXI_QUALIFICATION);
593 	guest_rip = vmcs_read(GUEST_RIP);
594 	guest_rsp = vmcs_read(GUEST_RSP);
595 	printf("VMEXIT info:\n");
596 	printf("\tvmexit reason = %ld\n", reason);
597 	printf("\texit qualification = %#lx\n", exit_qual);
598 	printf("\tBit 31 of reason = %lx\n", (vmcs_read(EXI_REASON) >> 31) & 1);
599 	printf("\tguest_rip = %#lx\n", guest_rip);
600 	printf("\tRAX=%#lx    RBX=%#lx    RCX=%#lx    RDX=%#lx\n",
601 		regs.rax, regs.rbx, regs.rcx, regs.rdx);
602 	printf("\tRSP=%#lx    RBP=%#lx    RSI=%#lx    RDI=%#lx\n",
603 		guest_rsp, regs.rbp, regs.rsi, regs.rdi);
604 	printf("\tR8 =%#lx    R9 =%#lx    R10=%#lx    R11=%#lx\n",
605 		regs.r8, regs.r9, regs.r10, regs.r11);
606 	printf("\tR12=%#lx    R13=%#lx    R14=%#lx    R15=%#lx\n",
607 		regs.r12, regs.r13, regs.r14, regs.r15);
608 }
609 
610 void
611 print_vmentry_failure_info(struct vmentry_failure *failure) {
612 	if (failure->early) {
613 		printf("Early %s failure: ", failure->instr);
614 		switch (failure->flags & VMX_ENTRY_FLAGS) {
615 		case X86_EFLAGS_CF:
616 			printf("current-VMCS pointer is not valid.\n");
617 			break;
618 		case X86_EFLAGS_ZF:
619 			printf("error number is %ld. See Intel 30.4.\n",
620 			       vmcs_read(VMX_INST_ERROR));
621 			break;
622 		default:
623 			printf("unexpected flags %lx!\n", failure->flags);
624 		}
625 	} else {
626 		u64 reason = vmcs_read(EXI_REASON);
627 		u64 qual = vmcs_read(EXI_QUALIFICATION);
628 
629 		printf("Non-early %s failure (reason=%#lx, qual=%#lx): ",
630 			failure->instr, reason, qual);
631 
632 		switch (reason & 0xff) {
633 		case VMX_FAIL_STATE:
634 			printf("invalid guest state\n");
635 			break;
636 		case VMX_FAIL_MSR:
637 			printf("MSR loading\n");
638 			break;
639 		case VMX_FAIL_MCHECK:
640 			printf("machine-check event\n");
641 			break;
642 		default:
643 			printf("unexpected basic exit reason %ld\n",
644 			       reason & 0xff);
645 		}
646 
647 		if (!(reason & VMX_ENTRY_FAILURE))
648 			printf("\tVMX_ENTRY_FAILURE BIT NOT SET!\n");
649 
650 		if (reason & 0x7fff0000)
651 			printf("\tRESERVED BITS SET!\n");
652 	}
653 }
654 
655 /*
656  * VMCLEAR should ensures all VMCS state is flushed to the VMCS
657  * region in memory.
658  */
659 static void test_vmclear_flushing(void)
660 {
661 	struct vmcs *vmcs[3] = {};
662 	int i;
663 
664 	for (i = 0; i < ARRAY_SIZE(vmcs); i++) {
665 		vmcs[i] = alloc_page();
666 	}
667 
668 	vmcs[0]->hdr.revision_id = basic.revision;
669 	assert(!vmcs_clear(vmcs[0]));
670 	assert(!make_vmcs_current(vmcs[0]));
671 	set_all_vmcs_fields(0x86);
672 
673 	assert(!vmcs_clear(vmcs[0]));
674 	memcpy(vmcs[1], vmcs[0], basic.size);
675 	assert(!make_vmcs_current(vmcs[1]));
676 	report(check_all_vmcs_fields(0x86),
677 	       "test vmclear flush (current VMCS)");
678 
679 	set_all_vmcs_fields(0x87);
680 	assert(!make_vmcs_current(vmcs[0]));
681 	assert(!vmcs_clear(vmcs[1]));
682 	memcpy(vmcs[2], vmcs[1], basic.size);
683 	assert(!make_vmcs_current(vmcs[2]));
684 	report(check_all_vmcs_fields(0x87),
685 	       "test vmclear flush (!current VMCS)");
686 
687 	for (i = 0; i < ARRAY_SIZE(vmcs); i++) {
688 		assert(!vmcs_clear(vmcs[i]));
689 		free_page(vmcs[i]);
690 	}
691 }
692 
693 static void test_vmclear(void)
694 {
695 	struct vmcs *tmp_root;
696 	int width = cpuid_maxphyaddr();
697 
698 	/*
699 	 * Note- The tests below do not necessarily have a
700 	 * valid VMCS, but that's ok since the invalid vmcs
701 	 * is only used for a specific test and is discarded
702 	 * without touching its contents
703 	 */
704 
705 	/* Unaligned page access */
706 	tmp_root = (struct vmcs *)((intptr_t)vmcs_root + 1);
707 	report(vmcs_clear(tmp_root) == 1, "test vmclear with unaligned vmcs");
708 
709 	/* gpa bits beyond physical address width are set*/
710 	tmp_root = (struct vmcs *)((intptr_t)vmcs_root |
711 				   ((u64)1 << (width+1)));
712 	report(vmcs_clear(tmp_root) == 1,
713 	       "test vmclear with vmcs address bits set beyond physical address width");
714 
715 	/* Pass VMXON region */
716 	tmp_root = (struct vmcs *)bsp_vmxon_region;
717 	report(vmcs_clear(tmp_root) == 1, "test vmclear with vmxon region");
718 
719 	/* Valid VMCS */
720 	report(vmcs_clear(vmcs_root) == 0,
721 	       "test vmclear with valid vmcs region");
722 
723 	test_vmclear_flushing();
724 }
725 
726 static void __attribute__((__used__)) guest_main(void)
727 {
728 	if (current->v2)
729 		v2_guest_main();
730 	else
731 		current->guest_main();
732 }
733 
734 /* guest_entry */
735 asm(
736 	".align	4, 0x90\n\t"
737 	".globl	entry_guest\n\t"
738 	"guest_entry:\n\t"
739 	"	call guest_main\n\t"
740 	"	mov $1, %edi\n\t"
741 	"	call hypercall\n\t"
742 );
743 
744 /* EPT paging structure related functions */
745 /* split_large_ept_entry: Split a 2M/1G large page into 512 smaller PTEs.
746 		@ptep : large page table entry to split
747 		@level : level of ptep (2 or 3)
748  */
749 static void split_large_ept_entry(unsigned long *ptep, int level)
750 {
751 	unsigned long *new_pt;
752 	unsigned long gpa;
753 	unsigned long pte;
754 	unsigned long prototype;
755 	int i;
756 
757 	pte = *ptep;
758 	assert(pte & EPT_PRESENT);
759 	assert(pte & EPT_LARGE_PAGE);
760 	assert(level == 2 || level == 3);
761 
762 	new_pt = alloc_page();
763 	assert(new_pt);
764 
765 	prototype = pte & ~EPT_ADDR_MASK;
766 	if (level == 2)
767 		prototype &= ~EPT_LARGE_PAGE;
768 
769 	gpa = pte & EPT_ADDR_MASK;
770 	for (i = 0; i < EPT_PGDIR_ENTRIES; i++) {
771 		new_pt[i] = prototype | gpa;
772 		gpa += 1ul << EPT_LEVEL_SHIFT(level - 1);
773 	}
774 
775 	pte &= ~EPT_LARGE_PAGE;
776 	pte &= ~EPT_ADDR_MASK;
777 	pte |= virt_to_phys(new_pt);
778 
779 	*ptep = pte;
780 }
781 
782 /* install_ept_entry : Install a page to a given level in EPT
783 		@pml4 : addr of pml4 table
784 		@pte_level : level of PTE to set
785 		@guest_addr : physical address of guest
786 		@pte : pte value to set
787 		@pt_page : address of page table, NULL for a new page
788  */
789 void install_ept_entry(unsigned long *pml4,
790 		int pte_level,
791 		unsigned long guest_addr,
792 		unsigned long pte,
793 		unsigned long *pt_page)
794 {
795 	int level;
796 	unsigned long *pt = pml4;
797 	unsigned offset;
798 
799 	/* EPT only uses 48 bits of GPA. */
800 	assert(guest_addr < (1ul << 48));
801 
802 	for (level = EPT_PAGE_LEVEL; level > pte_level; --level) {
803 		offset = (guest_addr >> EPT_LEVEL_SHIFT(level))
804 				& EPT_PGDIR_MASK;
805 		if (!(pt[offset] & (EPT_PRESENT))) {
806 			unsigned long *new_pt = pt_page;
807 			if (!new_pt)
808 				new_pt = alloc_page();
809 			else
810 				pt_page = 0;
811 			memset(new_pt, 0, PAGE_SIZE);
812 			pt[offset] = virt_to_phys(new_pt)
813 					| EPT_RA | EPT_WA | EPT_EA;
814 		} else if (pt[offset] & EPT_LARGE_PAGE)
815 			split_large_ept_entry(&pt[offset], level);
816 		pt = phys_to_virt(pt[offset] & EPT_ADDR_MASK);
817 	}
818 	offset = (guest_addr >> EPT_LEVEL_SHIFT(level)) & EPT_PGDIR_MASK;
819 	pt[offset] = pte;
820 }
821 
822 /* Map a page, @perm is the permission of the page */
823 void install_ept(unsigned long *pml4,
824 		unsigned long phys,
825 		unsigned long guest_addr,
826 		u64 perm)
827 {
828 	install_ept_entry(pml4, 1, guest_addr, (phys & PAGE_MASK) | perm, 0);
829 }
830 
831 /* Map a 1G-size page */
832 void install_1g_ept(unsigned long *pml4,
833 		unsigned long phys,
834 		unsigned long guest_addr,
835 		u64 perm)
836 {
837 	install_ept_entry(pml4, 3, guest_addr,
838 			(phys & PAGE_MASK) | perm | EPT_LARGE_PAGE, 0);
839 }
840 
841 /* Map a 2M-size page */
842 void install_2m_ept(unsigned long *pml4,
843 		unsigned long phys,
844 		unsigned long guest_addr,
845 		u64 perm)
846 {
847 	install_ept_entry(pml4, 2, guest_addr,
848 			(phys & PAGE_MASK) | perm | EPT_LARGE_PAGE, 0);
849 }
850 
851 /* setup_ept_range : Setup a range of 1:1 mapped page to EPT paging structure.
852 		@start : start address of guest page
853 		@len : length of address to be mapped
854 		@map_1g : whether 1G page map is used
855 		@map_2m : whether 2M page map is used
856 		@perm : permission for every page
857  */
858 void setup_ept_range(unsigned long *pml4, unsigned long start,
859 		     unsigned long len, int map_1g, int map_2m, u64 perm)
860 {
861 	u64 phys = start;
862 	u64 max = (u64)len + (u64)start;
863 
864 	if (map_1g) {
865 		while (phys + PAGE_SIZE_1G <= max) {
866 			install_1g_ept(pml4, phys, phys, perm);
867 			phys += PAGE_SIZE_1G;
868 		}
869 	}
870 	if (map_2m) {
871 		while (phys + PAGE_SIZE_2M <= max) {
872 			install_2m_ept(pml4, phys, phys, perm);
873 			phys += PAGE_SIZE_2M;
874 		}
875 	}
876 	while (phys + PAGE_SIZE <= max) {
877 		install_ept(pml4, phys, phys, perm);
878 		phys += PAGE_SIZE;
879 	}
880 }
881 
882 /* get_ept_pte : Get the PTE of a given level in EPT,
883     @level == 1 means get the latest level*/
884 bool get_ept_pte(unsigned long *pml4, unsigned long guest_addr, int level,
885 		unsigned long *pte)
886 {
887 	int l;
888 	unsigned long *pt = pml4, iter_pte;
889 	unsigned offset;
890 
891 	assert(level >= 1 && level <= 4);
892 
893 	for (l = EPT_PAGE_LEVEL; ; --l) {
894 		offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK;
895 		iter_pte = pt[offset];
896 		if (l == level)
897 			break;
898 		if (l < 4 && (iter_pte & EPT_LARGE_PAGE))
899 			return false;
900 		if (!(iter_pte & (EPT_PRESENT)))
901 			return false;
902 		pt = (unsigned long *)(iter_pte & EPT_ADDR_MASK);
903 	}
904 	offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK;
905 	if (pte)
906 		*pte = pt[offset];
907 	return true;
908 }
909 
910 static void clear_ept_ad_pte(unsigned long *pml4, unsigned long guest_addr)
911 {
912 	int l;
913 	unsigned long *pt = pml4;
914 	u64 pte;
915 	unsigned offset;
916 
917 	for (l = EPT_PAGE_LEVEL; ; --l) {
918 		offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK;
919 		pt[offset] &= ~(EPT_ACCESS_FLAG|EPT_DIRTY_FLAG);
920 		pte = pt[offset];
921 		if (l == 1 || (l < 4 && (pte & EPT_LARGE_PAGE)))
922 			break;
923 		pt = (unsigned long *)(pte & EPT_ADDR_MASK);
924 	}
925 }
926 
927 /* clear_ept_ad : Clear EPT A/D bits for the page table walk and the
928    final GPA of a guest address.  */
929 void clear_ept_ad(unsigned long *pml4, u64 guest_cr3,
930 		  unsigned long guest_addr)
931 {
932 	int l;
933 	unsigned long *pt = (unsigned long *)guest_cr3, gpa;
934 	u64 pte, offset_in_page;
935 	unsigned offset;
936 
937 	for (l = EPT_PAGE_LEVEL; ; --l) {
938 		offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK;
939 
940 		clear_ept_ad_pte(pml4, (u64) &pt[offset]);
941 		pte = pt[offset];
942 		if (l == 1 || (l < 4 && (pte & PT_PAGE_SIZE_MASK)))
943 			break;
944 		if (!(pte & PT_PRESENT_MASK))
945 			return;
946 		pt = (unsigned long *)(pte & PT_ADDR_MASK);
947 	}
948 
949 	offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK;
950 	offset_in_page = guest_addr & ((1 << EPT_LEVEL_SHIFT(l)) - 1);
951 	gpa = (pt[offset] & PT_ADDR_MASK) | (guest_addr & offset_in_page);
952 	clear_ept_ad_pte(pml4, gpa);
953 }
954 
955 /* check_ept_ad : Check the content of EPT A/D bits for the page table
956    walk and the final GPA of a guest address.  */
957 void check_ept_ad(unsigned long *pml4, u64 guest_cr3,
958 		  unsigned long guest_addr, int expected_gpa_ad,
959 		  int expected_pt_ad)
960 {
961 	int l;
962 	unsigned long *pt = (unsigned long *)guest_cr3, gpa;
963 	u64 ept_pte, pte, offset_in_page;
964 	unsigned offset;
965 	bool bad_pt_ad = false;
966 
967 	for (l = EPT_PAGE_LEVEL; ; --l) {
968 		offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK;
969 
970 		if (!get_ept_pte(pml4, (u64) &pt[offset], 1, &ept_pte)) {
971 			printf("EPT - guest level %d page table is not mapped.\n", l);
972 			return;
973 		}
974 
975 		if (!bad_pt_ad) {
976 			bad_pt_ad |= (ept_pte & (EPT_ACCESS_FLAG|EPT_DIRTY_FLAG)) != expected_pt_ad;
977 			if (bad_pt_ad)
978 				report(false,
979 				       "EPT - guest level %d page table A=%d/D=%d",
980 				       l,
981 				       !!(expected_pt_ad & EPT_ACCESS_FLAG),
982 				       !!(expected_pt_ad & EPT_DIRTY_FLAG));
983 		}
984 
985 		pte = pt[offset];
986 		if (l == 1 || (l < 4 && (pte & PT_PAGE_SIZE_MASK)))
987 			break;
988 		if (!(pte & PT_PRESENT_MASK))
989 			return;
990 		pt = (unsigned long *)(pte & PT_ADDR_MASK);
991 	}
992 
993 	if (!bad_pt_ad)
994 		report(true, "EPT - guest page table structures A=%d/D=%d",
995 		       !!(expected_pt_ad & EPT_ACCESS_FLAG),
996 		       !!(expected_pt_ad & EPT_DIRTY_FLAG));
997 
998 	offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK;
999 	offset_in_page = guest_addr & ((1 << EPT_LEVEL_SHIFT(l)) - 1);
1000 	gpa = (pt[offset] & PT_ADDR_MASK) | (guest_addr & offset_in_page);
1001 
1002 	if (!get_ept_pte(pml4, gpa, 1, &ept_pte)) {
1003 		report(false, "EPT - guest physical address is not mapped");
1004 		return;
1005 	}
1006 	report((ept_pte & (EPT_ACCESS_FLAG | EPT_DIRTY_FLAG)) == expected_gpa_ad,
1007 	       "EPT - guest physical address A=%d/D=%d",
1008 	       !!(expected_gpa_ad & EPT_ACCESS_FLAG),
1009 	       !!(expected_gpa_ad & EPT_DIRTY_FLAG));
1010 }
1011 
1012 
1013 void ept_sync(int type, u64 eptp)
1014 {
1015 	switch (type) {
1016 	case INVEPT_SINGLE:
1017 		if (ept_vpid.val & EPT_CAP_INVEPT_SINGLE) {
1018 			invept(INVEPT_SINGLE, eptp);
1019 			break;
1020 		}
1021 		/* else fall through */
1022 	case INVEPT_GLOBAL:
1023 		if (ept_vpid.val & EPT_CAP_INVEPT_ALL) {
1024 			invept(INVEPT_GLOBAL, eptp);
1025 			break;
1026 		}
1027 		/* else fall through */
1028 	default:
1029 		printf("WARNING: invept is not supported!\n");
1030 	}
1031 }
1032 
1033 void set_ept_pte(unsigned long *pml4, unsigned long guest_addr,
1034 		 int level, u64 pte_val)
1035 {
1036 	int l;
1037 	unsigned long *pt = pml4;
1038 	unsigned offset;
1039 
1040 	assert(level >= 1 && level <= 4);
1041 
1042 	for (l = EPT_PAGE_LEVEL; ; --l) {
1043 		offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK;
1044 		if (l == level)
1045 			break;
1046 		assert(pt[offset] & EPT_PRESENT);
1047 		pt = (unsigned long *)(pt[offset] & EPT_ADDR_MASK);
1048 	}
1049 	offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK;
1050 	pt[offset] = pte_val;
1051 }
1052 
1053 bool ept_2m_supported(void)
1054 {
1055 	return ept_vpid.val & EPT_CAP_2M_PAGE;
1056 }
1057 
1058 bool ept_1g_supported(void)
1059 {
1060 	return ept_vpid.val & EPT_CAP_1G_PAGE;
1061 }
1062 
1063 bool ept_huge_pages_supported(int level)
1064 {
1065 	if (level == 2)
1066 		return ept_2m_supported();
1067 	else if (level == 3)
1068 		return ept_1g_supported();
1069 	else
1070 		return false;
1071 }
1072 
1073 bool ept_execute_only_supported(void)
1074 {
1075 	return ept_vpid.val & EPT_CAP_WT;
1076 }
1077 
1078 bool ept_ad_bits_supported(void)
1079 {
1080 	return ept_vpid.val & EPT_CAP_AD_FLAG;
1081 }
1082 
1083 void vpid_sync(int type, u16 vpid)
1084 {
1085 	switch(type) {
1086 	case INVVPID_CONTEXT_GLOBAL:
1087 		if (ept_vpid.val & VPID_CAP_INVVPID_CXTGLB) {
1088 			invvpid(INVVPID_CONTEXT_GLOBAL, vpid, 0);
1089 			break;
1090 		}
1091 	case INVVPID_ALL:
1092 		if (ept_vpid.val & VPID_CAP_INVVPID_ALL) {
1093 			invvpid(INVVPID_ALL, vpid, 0);
1094 			break;
1095 		}
1096 	default:
1097 		printf("WARNING: invvpid is not supported\n");
1098 	}
1099 }
1100 
1101 static void init_vmcs_ctrl(void)
1102 {
1103 	/* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */
1104 	/* 26.2.1.1 */
1105 	vmcs_write(PIN_CONTROLS, ctrl_pin);
1106 	/* Disable VMEXIT of IO instruction */
1107 	vmcs_write(CPU_EXEC_CTRL0, ctrl_cpu[0]);
1108 	if (ctrl_cpu_rev[0].set & CPU_SECONDARY) {
1109 		ctrl_cpu[1] = (ctrl_cpu[1] | ctrl_cpu_rev[1].set) &
1110 			ctrl_cpu_rev[1].clr;
1111 		vmcs_write(CPU_EXEC_CTRL1, ctrl_cpu[1]);
1112 	}
1113 	vmcs_write(CR3_TARGET_COUNT, 0);
1114 	vmcs_write(VPID, ++vpid_cnt);
1115 }
1116 
1117 static void init_vmcs_host(void)
1118 {
1119 	/* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */
1120 	/* 26.2.1.2 */
1121 	vmcs_write(HOST_EFER, rdmsr(MSR_EFER));
1122 
1123 	/* 26.2.1.3 */
1124 	vmcs_write(ENT_CONTROLS, ctrl_enter);
1125 	vmcs_write(EXI_CONTROLS, ctrl_exit);
1126 
1127 	/* 26.2.2 */
1128 	vmcs_write(HOST_CR0, read_cr0());
1129 	vmcs_write(HOST_CR3, read_cr3());
1130 	vmcs_write(HOST_CR4, read_cr4());
1131 	vmcs_write(HOST_SYSENTER_EIP, (u64)(&entry_sysenter));
1132 	vmcs_write(HOST_SYSENTER_CS,  KERNEL_CS);
1133 
1134 	/* 26.2.3 */
1135 	vmcs_write(HOST_SEL_CS, KERNEL_CS);
1136 	vmcs_write(HOST_SEL_SS, KERNEL_DS);
1137 	vmcs_write(HOST_SEL_DS, KERNEL_DS);
1138 	vmcs_write(HOST_SEL_ES, KERNEL_DS);
1139 	vmcs_write(HOST_SEL_FS, KERNEL_DS);
1140 	vmcs_write(HOST_SEL_GS, KERNEL_DS);
1141 	vmcs_write(HOST_SEL_TR, TSS_MAIN);
1142 	vmcs_write(HOST_BASE_TR, tss_descr.base);
1143 	vmcs_write(HOST_BASE_GDTR, gdt64_desc.base);
1144 	vmcs_write(HOST_BASE_IDTR, idt_descr.base);
1145 	vmcs_write(HOST_BASE_FS, 0);
1146 	vmcs_write(HOST_BASE_GS, 0);
1147 
1148 	/* Set other vmcs area */
1149 	vmcs_write(PF_ERROR_MASK, 0);
1150 	vmcs_write(PF_ERROR_MATCH, 0);
1151 	vmcs_write(VMCS_LINK_PTR, ~0ul);
1152 	vmcs_write(VMCS_LINK_PTR_HI, ~0ul);
1153 	vmcs_write(HOST_RIP, (u64)(&vmx_return));
1154 }
1155 
1156 static void init_vmcs_guest(void)
1157 {
1158 	/* 26.3 CHECKING AND LOADING GUEST STATE */
1159 	ulong guest_cr0, guest_cr4, guest_cr3;
1160 	/* 26.3.1.1 */
1161 	guest_cr0 = read_cr0();
1162 	guest_cr4 = read_cr4();
1163 	guest_cr3 = read_cr3();
1164 	if (ctrl_enter & ENT_GUEST_64) {
1165 		guest_cr0 |= X86_CR0_PG;
1166 		guest_cr4 |= X86_CR4_PAE;
1167 	}
1168 	if ((ctrl_enter & ENT_GUEST_64) == 0)
1169 		guest_cr4 &= (~X86_CR4_PCIDE);
1170 	if (guest_cr0 & X86_CR0_PG)
1171 		guest_cr0 |= X86_CR0_PE;
1172 	vmcs_write(GUEST_CR0, guest_cr0);
1173 	vmcs_write(GUEST_CR3, guest_cr3);
1174 	vmcs_write(GUEST_CR4, guest_cr4);
1175 	vmcs_write(GUEST_SYSENTER_CS,  KERNEL_CS);
1176 	vmcs_write(GUEST_SYSENTER_ESP,
1177 		(u64)(guest_syscall_stack + PAGE_SIZE - 1));
1178 	vmcs_write(GUEST_SYSENTER_EIP, (u64)(&entry_sysenter));
1179 	vmcs_write(GUEST_DR7, 0);
1180 	vmcs_write(GUEST_EFER, rdmsr(MSR_EFER));
1181 
1182 	/* 26.3.1.2 */
1183 	vmcs_write(GUEST_SEL_CS, KERNEL_CS);
1184 	vmcs_write(GUEST_SEL_SS, KERNEL_DS);
1185 	vmcs_write(GUEST_SEL_DS, KERNEL_DS);
1186 	vmcs_write(GUEST_SEL_ES, KERNEL_DS);
1187 	vmcs_write(GUEST_SEL_FS, KERNEL_DS);
1188 	vmcs_write(GUEST_SEL_GS, KERNEL_DS);
1189 	vmcs_write(GUEST_SEL_TR, TSS_MAIN);
1190 	vmcs_write(GUEST_SEL_LDTR, 0);
1191 
1192 	vmcs_write(GUEST_BASE_CS, 0);
1193 	vmcs_write(GUEST_BASE_ES, 0);
1194 	vmcs_write(GUEST_BASE_SS, 0);
1195 	vmcs_write(GUEST_BASE_DS, 0);
1196 	vmcs_write(GUEST_BASE_FS, 0);
1197 	vmcs_write(GUEST_BASE_GS, 0);
1198 	vmcs_write(GUEST_BASE_TR, tss_descr.base);
1199 	vmcs_write(GUEST_BASE_LDTR, 0);
1200 
1201 	vmcs_write(GUEST_LIMIT_CS, 0xFFFFFFFF);
1202 	vmcs_write(GUEST_LIMIT_DS, 0xFFFFFFFF);
1203 	vmcs_write(GUEST_LIMIT_ES, 0xFFFFFFFF);
1204 	vmcs_write(GUEST_LIMIT_SS, 0xFFFFFFFF);
1205 	vmcs_write(GUEST_LIMIT_FS, 0xFFFFFFFF);
1206 	vmcs_write(GUEST_LIMIT_GS, 0xFFFFFFFF);
1207 	vmcs_write(GUEST_LIMIT_LDTR, 0xffff);
1208 	vmcs_write(GUEST_LIMIT_TR, tss_descr.limit);
1209 
1210 	vmcs_write(GUEST_AR_CS, 0xa09b);
1211 	vmcs_write(GUEST_AR_DS, 0xc093);
1212 	vmcs_write(GUEST_AR_ES, 0xc093);
1213 	vmcs_write(GUEST_AR_FS, 0xc093);
1214 	vmcs_write(GUEST_AR_GS, 0xc093);
1215 	vmcs_write(GUEST_AR_SS, 0xc093);
1216 	vmcs_write(GUEST_AR_LDTR, 0x82);
1217 	vmcs_write(GUEST_AR_TR, 0x8b);
1218 
1219 	/* 26.3.1.3 */
1220 	vmcs_write(GUEST_BASE_GDTR, gdt64_desc.base);
1221 	vmcs_write(GUEST_BASE_IDTR, idt_descr.base);
1222 	vmcs_write(GUEST_LIMIT_GDTR, gdt64_desc.limit);
1223 	vmcs_write(GUEST_LIMIT_IDTR, idt_descr.limit);
1224 
1225 	/* 26.3.1.4 */
1226 	vmcs_write(GUEST_RIP, (u64)(&guest_entry));
1227 	vmcs_write(GUEST_RSP, (u64)(guest_stack + PAGE_SIZE - 1));
1228 	vmcs_write(GUEST_RFLAGS, X86_EFLAGS_FIXED);
1229 
1230 	/* 26.3.1.5 */
1231 	vmcs_write(GUEST_ACTV_STATE, ACTV_ACTIVE);
1232 	vmcs_write(GUEST_INTR_STATE, 0);
1233 }
1234 
1235 static int init_vmcs(struct vmcs **vmcs)
1236 {
1237 	*vmcs = alloc_page();
1238 	(*vmcs)->hdr.revision_id = basic.revision;
1239 	/* vmclear first to init vmcs */
1240 	if (vmcs_clear(*vmcs)) {
1241 		printf("%s : vmcs_clear error\n", __func__);
1242 		return 1;
1243 	}
1244 
1245 	if (make_vmcs_current(*vmcs)) {
1246 		printf("%s : make_vmcs_current error\n", __func__);
1247 		return 1;
1248 	}
1249 
1250 	/* All settings to pin/exit/enter/cpu
1251 	   control fields should be placed here */
1252 	ctrl_pin |= PIN_EXTINT | PIN_NMI | PIN_VIRT_NMI;
1253 	ctrl_exit = EXI_LOAD_EFER | EXI_HOST_64;
1254 	ctrl_enter = (ENT_LOAD_EFER | ENT_GUEST_64);
1255 	/* DIsable IO instruction VMEXIT now */
1256 	ctrl_cpu[0] &= (~(CPU_IO | CPU_IO_BITMAP));
1257 	ctrl_cpu[1] = 0;
1258 
1259 	ctrl_pin = (ctrl_pin | ctrl_pin_rev.set) & ctrl_pin_rev.clr;
1260 	ctrl_enter = (ctrl_enter | ctrl_enter_rev.set) & ctrl_enter_rev.clr;
1261 	ctrl_exit = (ctrl_exit | ctrl_exit_rev.set) & ctrl_exit_rev.clr;
1262 	ctrl_cpu[0] = (ctrl_cpu[0] | ctrl_cpu_rev[0].set) & ctrl_cpu_rev[0].clr;
1263 
1264 	init_vmcs_ctrl();
1265 	init_vmcs_host();
1266 	init_vmcs_guest();
1267 	return 0;
1268 }
1269 
1270 void enable_vmx(void)
1271 {
1272 	bool vmx_enabled =
1273 		rdmsr(MSR_IA32_FEATURE_CONTROL) &
1274 		FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
1275 
1276 	if (!vmx_enabled) {
1277 		wrmsr(MSR_IA32_FEATURE_CONTROL,
1278 				FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX |
1279 				FEATURE_CONTROL_LOCKED);
1280 	}
1281 }
1282 
1283 static void init_vmx_caps(void)
1284 {
1285 	basic.val = rdmsr(MSR_IA32_VMX_BASIC);
1286 	ctrl_pin_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PIN
1287 			: MSR_IA32_VMX_PINBASED_CTLS);
1288 	ctrl_exit_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_EXIT
1289 			: MSR_IA32_VMX_EXIT_CTLS);
1290 	ctrl_enter_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_ENTRY
1291 			: MSR_IA32_VMX_ENTRY_CTLS);
1292 	ctrl_cpu_rev[0].val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PROC
1293 			: MSR_IA32_VMX_PROCBASED_CTLS);
1294 	if ((ctrl_cpu_rev[0].clr & CPU_SECONDARY) != 0)
1295 		ctrl_cpu_rev[1].val = rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2);
1296 	else
1297 		ctrl_cpu_rev[1].val = 0;
1298 	if ((ctrl_cpu_rev[1].clr & (CPU_EPT | CPU_VPID)) != 0)
1299 		ept_vpid.val = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP);
1300 	else
1301 		ept_vpid.val = 0;
1302 }
1303 
1304 void init_vmx(u64 *vmxon_region)
1305 {
1306 	ulong fix_cr0_set, fix_cr0_clr;
1307 	ulong fix_cr4_set, fix_cr4_clr;
1308 
1309 	fix_cr0_set =  rdmsr(MSR_IA32_VMX_CR0_FIXED0);
1310 	fix_cr0_clr =  rdmsr(MSR_IA32_VMX_CR0_FIXED1);
1311 	fix_cr4_set =  rdmsr(MSR_IA32_VMX_CR4_FIXED0);
1312 	fix_cr4_clr = rdmsr(MSR_IA32_VMX_CR4_FIXED1);
1313 
1314 	write_cr0((read_cr0() & fix_cr0_clr) | fix_cr0_set);
1315 	write_cr4((read_cr4() & fix_cr4_clr) | fix_cr4_set | X86_CR4_VMXE);
1316 
1317 	*vmxon_region = basic.revision;
1318 }
1319 
1320 static void alloc_bsp_vmx_pages(void)
1321 {
1322 	bsp_vmxon_region = alloc_page();
1323 	guest_stack = alloc_page();
1324 	guest_syscall_stack = alloc_page();
1325 	vmcs_root = alloc_page();
1326 }
1327 
1328 static void init_bsp_vmx(void)
1329 {
1330 	init_vmx_caps();
1331 	alloc_bsp_vmx_pages();
1332 	init_vmx(bsp_vmxon_region);
1333 }
1334 
1335 static void do_vmxon_off(void *data)
1336 {
1337 	vmx_on();
1338 	vmx_off();
1339 }
1340 
1341 static void do_write_feature_control(void *data)
1342 {
1343 	wrmsr(MSR_IA32_FEATURE_CONTROL, 0);
1344 }
1345 
1346 static int test_vmx_feature_control(void)
1347 {
1348 	u64 ia32_feature_control;
1349 	bool vmx_enabled;
1350 	bool feature_control_locked;
1351 
1352 	ia32_feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL);
1353 	vmx_enabled =
1354 		ia32_feature_control & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
1355 	feature_control_locked =
1356 		ia32_feature_control & FEATURE_CONTROL_LOCKED;
1357 
1358 	if (vmx_enabled && feature_control_locked) {
1359 		printf("VMX enabled and locked by BIOS\n");
1360 		return 0;
1361 	} else if (feature_control_locked) {
1362 		printf("ERROR: VMX locked out by BIOS!?\n");
1363 		return 1;
1364 	}
1365 
1366 	wrmsr(MSR_IA32_FEATURE_CONTROL, 0);
1367 	report(test_for_exception(GP_VECTOR, &do_vmxon_off, NULL),
1368 	       "test vmxon with FEATURE_CONTROL cleared");
1369 
1370 	wrmsr(MSR_IA32_FEATURE_CONTROL, FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX);
1371 	report(test_for_exception(GP_VECTOR, &do_vmxon_off, NULL),
1372 	       "test vmxon without FEATURE_CONTROL lock");
1373 
1374 	wrmsr(MSR_IA32_FEATURE_CONTROL,
1375 		  FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX |
1376 		  FEATURE_CONTROL_LOCKED);
1377 
1378 	ia32_feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL);
1379 	vmx_enabled =
1380 		ia32_feature_control & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
1381 	report(vmx_enabled, "test enable VMX in FEATURE_CONTROL");
1382 
1383 	report(test_for_exception(GP_VECTOR, &do_write_feature_control, NULL),
1384 	       "test FEATURE_CONTROL lock bit");
1385 
1386 	return !vmx_enabled;
1387 }
1388 
1389 static int test_vmxon(void)
1390 {
1391 	int ret, ret1;
1392 	u64 *vmxon_region;
1393 	int width = cpuid_maxphyaddr();
1394 
1395 	/* Unaligned page access */
1396 	vmxon_region = (u64 *)((intptr_t)bsp_vmxon_region + 1);
1397 	ret1 = _vmx_on(vmxon_region);
1398 	report(ret1, "test vmxon with unaligned vmxon region");
1399 	if (!ret1) {
1400 		ret = 1;
1401 		goto out;
1402 	}
1403 
1404 	/* gpa bits beyond physical address width are set*/
1405 	vmxon_region = (u64 *)((intptr_t)bsp_vmxon_region | ((u64)1 << (width+1)));
1406 	ret1 = _vmx_on(vmxon_region);
1407 	report(ret1, "test vmxon with bits set beyond physical address width");
1408 	if (!ret1) {
1409 		ret = 1;
1410 		goto out;
1411 	}
1412 
1413 	/* invalid revision indentifier */
1414 	*bsp_vmxon_region = 0xba9da9;
1415 	ret1 = vmx_on();
1416 	report(ret1, "test vmxon with invalid revision identifier");
1417 	if (!ret1) {
1418 		ret = 1;
1419 		goto out;
1420 	}
1421 
1422 	/* and finally a valid region */
1423 	*bsp_vmxon_region = basic.revision;
1424 	ret = vmx_on();
1425 	report(!ret, "test vmxon with valid vmxon region");
1426 
1427 out:
1428 	return ret;
1429 }
1430 
1431 static void test_vmptrld(void)
1432 {
1433 	struct vmcs *vmcs, *tmp_root;
1434 	int width = cpuid_maxphyaddr();
1435 
1436 	vmcs = alloc_page();
1437 	vmcs->hdr.revision_id = basic.revision;
1438 
1439 	/* Unaligned page access */
1440 	tmp_root = (struct vmcs *)((intptr_t)vmcs + 1);
1441 	report(make_vmcs_current(tmp_root) == 1,
1442 	       "test vmptrld with unaligned vmcs");
1443 
1444 	/* gpa bits beyond physical address width are set*/
1445 	tmp_root = (struct vmcs *)((intptr_t)vmcs |
1446 				   ((u64)1 << (width+1)));
1447 	report(make_vmcs_current(tmp_root) == 1,
1448 	       "test vmptrld with vmcs address bits set beyond physical address width");
1449 
1450 	/* Pass VMXON region */
1451 	assert(!vmcs_clear(vmcs));
1452 	assert(!make_vmcs_current(vmcs));
1453 	tmp_root = (struct vmcs *)bsp_vmxon_region;
1454 	report(make_vmcs_current(tmp_root) == 1,
1455 	       "test vmptrld with vmxon region");
1456 	report(vmcs_read(VMX_INST_ERROR) == VMXERR_VMPTRLD_VMXON_POINTER,
1457 	       "test vmptrld with vmxon region vm-instruction error");
1458 
1459 	report(make_vmcs_current(vmcs) == 0,
1460 	       "test vmptrld with valid vmcs region");
1461 }
1462 
1463 static void test_vmptrst(void)
1464 {
1465 	int ret;
1466 	struct vmcs *vmcs1, *vmcs2;
1467 
1468 	vmcs1 = alloc_page();
1469 	init_vmcs(&vmcs1);
1470 	ret = vmcs_save(&vmcs2);
1471 	report((!ret) && (vmcs1 == vmcs2), "test vmptrst");
1472 }
1473 
1474 struct vmx_ctl_msr {
1475 	const char *name;
1476 	u32 index, true_index;
1477 	u32 default1;
1478 } vmx_ctl_msr[] = {
1479 	{ "MSR_IA32_VMX_PINBASED_CTLS", MSR_IA32_VMX_PINBASED_CTLS,
1480 	  MSR_IA32_VMX_TRUE_PIN, 0x16 },
1481 	{ "MSR_IA32_VMX_PROCBASED_CTLS", MSR_IA32_VMX_PROCBASED_CTLS,
1482 	  MSR_IA32_VMX_TRUE_PROC, 0x401e172 },
1483 	{ "MSR_IA32_VMX_PROCBASED_CTLS2", MSR_IA32_VMX_PROCBASED_CTLS2,
1484 	  MSR_IA32_VMX_PROCBASED_CTLS2, 0 },
1485 	{ "MSR_IA32_VMX_EXIT_CTLS", MSR_IA32_VMX_EXIT_CTLS,
1486 	  MSR_IA32_VMX_TRUE_EXIT, 0x36dff },
1487 	{ "MSR_IA32_VMX_ENTRY_CTLS", MSR_IA32_VMX_ENTRY_CTLS,
1488 	  MSR_IA32_VMX_TRUE_ENTRY, 0x11ff },
1489 };
1490 
1491 static void test_vmx_caps(void)
1492 {
1493 	u64 val, default1, fixed0, fixed1;
1494 	union vmx_ctrl_msr ctrl, true_ctrl;
1495 	unsigned int n;
1496 	bool ok;
1497 
1498 	printf("\nTest suite: VMX capability reporting\n");
1499 
1500 	report((basic.revision & (1ul << 31)) == 0 &&
1501 	       basic.size > 0 && basic.size <= 4096 &&
1502 	       (basic.type == 0 || basic.type == 6) &&
1503 	       basic.reserved1 == 0 && basic.reserved2 == 0,
1504 	       "MSR_IA32_VMX_BASIC");
1505 
1506 	val = rdmsr(MSR_IA32_VMX_MISC);
1507 	report((!(ctrl_cpu_rev[1].clr & CPU_URG) || val & (1ul << 5)) &&
1508 	       ((val >> 16) & 0x1ff) <= 256 &&
1509 	       (val & 0x80007e00) == 0,
1510 	       "MSR_IA32_VMX_MISC");
1511 
1512 	for (n = 0; n < ARRAY_SIZE(vmx_ctl_msr); n++) {
1513 		ctrl.val = rdmsr(vmx_ctl_msr[n].index);
1514 		default1 = vmx_ctl_msr[n].default1;
1515 		ok = (ctrl.set & default1) == default1;
1516 		ok = ok && (ctrl.set & ~ctrl.clr) == 0;
1517 		if (ok && basic.ctrl) {
1518 			true_ctrl.val = rdmsr(vmx_ctl_msr[n].true_index);
1519 			ok = ctrl.clr == true_ctrl.clr;
1520 			ok = ok && ctrl.set == (true_ctrl.set | default1);
1521 		}
1522 		report(ok, "%s", vmx_ctl_msr[n].name);
1523 	}
1524 
1525 	fixed0 = rdmsr(MSR_IA32_VMX_CR0_FIXED0);
1526 	fixed1 = rdmsr(MSR_IA32_VMX_CR0_FIXED1);
1527 	report(((fixed0 ^ fixed1) & ~fixed1) == 0,
1528 	       "MSR_IA32_VMX_IA32_VMX_CR0_FIXED0/1");
1529 
1530 	fixed0 = rdmsr(MSR_IA32_VMX_CR4_FIXED0);
1531 	fixed1 = rdmsr(MSR_IA32_VMX_CR4_FIXED1);
1532 	report(((fixed0 ^ fixed1) & ~fixed1) == 0,
1533 	       "MSR_IA32_VMX_IA32_VMX_CR4_FIXED0/1");
1534 
1535 	val = rdmsr(MSR_IA32_VMX_VMCS_ENUM);
1536 	report((val & VMCS_FIELD_INDEX_MASK) >= 0x2a &&
1537 	       (val & 0xfffffffffffffc01Ull) == 0,
1538 	       "MSR_IA32_VMX_VMCS_ENUM");
1539 
1540 	val = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP);
1541 	report((val & 0xfffff07ef98cbebeUll) == 0,
1542 	       "MSR_IA32_VMX_EPT_VPID_CAP");
1543 }
1544 
1545 /* This function can only be called in guest */
1546 static void __attribute__((__used__)) hypercall(u32 hypercall_no)
1547 {
1548 	u64 val = 0;
1549 	val = (hypercall_no & HYPERCALL_MASK) | HYPERCALL_BIT;
1550 	hypercall_field = val;
1551 	asm volatile("vmcall\n\t");
1552 }
1553 
1554 static bool is_hypercall(void)
1555 {
1556 	ulong reason, hyper_bit;
1557 
1558 	reason = vmcs_read(EXI_REASON) & 0xff;
1559 	hyper_bit = hypercall_field & HYPERCALL_BIT;
1560 	if (reason == VMX_VMCALL && hyper_bit)
1561 		return true;
1562 	return false;
1563 }
1564 
1565 static int handle_hypercall(void)
1566 {
1567 	ulong hypercall_no;
1568 
1569 	hypercall_no = hypercall_field & HYPERCALL_MASK;
1570 	hypercall_field = 0;
1571 	switch (hypercall_no) {
1572 	case HYPERCALL_VMEXIT:
1573 		return VMX_TEST_VMEXIT;
1574 	case HYPERCALL_VMABORT:
1575 		return VMX_TEST_VMABORT;
1576 	case HYPERCALL_VMSKIP:
1577 		return VMX_TEST_VMSKIP;
1578 	default:
1579 		printf("ERROR : Invalid hypercall number : %ld\n", hypercall_no);
1580 	}
1581 	return VMX_TEST_EXIT;
1582 }
1583 
1584 static void continue_abort(void)
1585 {
1586 	assert(!in_guest);
1587 	printf("Host was here when guest aborted:\n");
1588 	dump_stack();
1589 	longjmp(abort_target, 1);
1590 	abort();
1591 }
1592 
1593 void __abort_test(void)
1594 {
1595 	if (in_guest)
1596 		hypercall(HYPERCALL_VMABORT);
1597 	else
1598 		longjmp(abort_target, 1);
1599 	abort();
1600 }
1601 
1602 static void continue_skip(void)
1603 {
1604 	assert(!in_guest);
1605 	longjmp(abort_target, 1);
1606 	abort();
1607 }
1608 
1609 void test_skip(const char *msg)
1610 {
1611 	printf("%s skipping test: %s\n", in_guest ? "Guest" : "Host", msg);
1612 	if (in_guest)
1613 		hypercall(HYPERCALL_VMABORT);
1614 	else
1615 		longjmp(abort_target, 1);
1616 	abort();
1617 }
1618 
1619 static int exit_handler(void)
1620 {
1621 	int ret;
1622 
1623 	current->exits++;
1624 	regs.rflags = vmcs_read(GUEST_RFLAGS);
1625 	if (is_hypercall())
1626 		ret = handle_hypercall();
1627 	else
1628 		ret = current->exit_handler();
1629 	vmcs_write(GUEST_RFLAGS, regs.rflags);
1630 
1631 	return ret;
1632 }
1633 
1634 /*
1635  * Called if vmlaunch or vmresume fails.
1636  *	@early    - failure due to "VMX controls and host-state area" (26.2)
1637  *	@vmlaunch - was this a vmlaunch or vmresume
1638  *	@rflags   - host rflags
1639  */
1640 static int
1641 entry_failure_handler(struct vmentry_failure *failure)
1642 {
1643 	if (current->entry_failure_handler)
1644 		return current->entry_failure_handler(failure);
1645 	else
1646 		return VMX_TEST_EXIT;
1647 }
1648 
1649 /*
1650  * Tries to enter the guest. Returns true if entry succeeded. Otherwise,
1651  * populates @failure.
1652  */
1653 static void vmx_enter_guest(struct vmentry_failure *failure)
1654 {
1655 	failure->early = 0;
1656 
1657 	in_guest = 1;
1658 	asm volatile (
1659 		"mov %[HOST_RSP], %%rdi\n\t"
1660 		"vmwrite %%rsp, %%rdi\n\t"
1661 		LOAD_GPR_C
1662 		"cmpb $0, %[launched]\n\t"
1663 		"jne 1f\n\t"
1664 		"vmlaunch\n\t"
1665 		"jmp 2f\n\t"
1666 		"1: "
1667 		"vmresume\n\t"
1668 		"2: "
1669 		SAVE_GPR_C
1670 		"pushf\n\t"
1671 		"pop %%rdi\n\t"
1672 		"mov %%rdi, %[failure_flags]\n\t"
1673 		"movl $1, %[failure_early]\n\t"
1674 		"jmp 3f\n\t"
1675 		"vmx_return:\n\t"
1676 		SAVE_GPR_C
1677 		"3: \n\t"
1678 		: [failure_early]"+m"(failure->early),
1679 		  [failure_flags]"=m"(failure->flags)
1680 		: [launched]"m"(launched), [HOST_RSP]"i"(HOST_RSP)
1681 		: "rdi", "memory", "cc"
1682 	);
1683 	in_guest = 0;
1684 
1685 	failure->vmlaunch = !launched;
1686 	failure->instr = launched ? "vmresume" : "vmlaunch";
1687 }
1688 
1689 static int vmx_run(void)
1690 {
1691 	while (1) {
1692 		u32 ret;
1693 		bool entered;
1694 		struct vmentry_failure failure;
1695 
1696 		vmx_enter_guest(&failure);
1697 		entered = !failure.early &&
1698 			  !(vmcs_read(EXI_REASON) & VMX_ENTRY_FAILURE);
1699 
1700 		if (entered) {
1701 			/*
1702 			 * VMCS isn't in "launched" state if there's been any
1703 			 * entry failure (early or otherwise).
1704 			 */
1705 			launched = 1;
1706 			ret = exit_handler();
1707 		} else {
1708 			ret = entry_failure_handler(&failure);
1709 		}
1710 
1711 		switch (ret) {
1712 		case VMX_TEST_RESUME:
1713 			continue;
1714 		case VMX_TEST_VMEXIT:
1715 			guest_finished = 1;
1716 			return 0;
1717 		case VMX_TEST_EXIT:
1718 			break;
1719 		default:
1720 			printf("ERROR : Invalid %s_handler return val %d.\n",
1721 			       entered ? "exit" : "entry_failure",
1722 			       ret);
1723 			break;
1724 		}
1725 
1726 		if (entered)
1727 			print_vmexit_info();
1728 		else
1729 			print_vmentry_failure_info(&failure);
1730 		abort();
1731 	}
1732 }
1733 
1734 static void run_teardown_step(struct test_teardown_step *step)
1735 {
1736 	step->func(step->data);
1737 }
1738 
1739 static int test_run(struct vmx_test *test)
1740 {
1741 	int r;
1742 
1743 	/* Validate V2 interface. */
1744 	if (test->v2) {
1745 		int ret = 0;
1746 		if (test->init || test->guest_main || test->exit_handler ||
1747 		    test->syscall_handler) {
1748 			report(0, "V2 test cannot specify V1 callbacks.");
1749 			ret = 1;
1750 		}
1751 		if (ret)
1752 			return ret;
1753 	}
1754 
1755 	if (test->name == NULL)
1756 		test->name = "(no name)";
1757 	if (vmx_on()) {
1758 		printf("%s : vmxon failed.\n", __func__);
1759 		return 1;
1760 	}
1761 
1762 	init_vmcs(&(test->vmcs));
1763 	/* Directly call test->init is ok here, init_vmcs has done
1764 	   vmcs init, vmclear and vmptrld*/
1765 	if (test->init && test->init(test->vmcs) != VMX_TEST_START)
1766 		goto out;
1767 	teardown_count = 0;
1768 	v2_guest_main = NULL;
1769 	test->exits = 0;
1770 	current = test;
1771 	regs = test->guest_regs;
1772 	vmcs_write(GUEST_RFLAGS, regs.rflags | X86_EFLAGS_FIXED);
1773 	launched = 0;
1774 	guest_finished = 0;
1775 	printf("\nTest suite: %s\n", test->name);
1776 
1777 	r = setjmp(abort_target);
1778 	if (r) {
1779 		assert(!in_guest);
1780 		goto out;
1781 	}
1782 
1783 
1784 	if (test->v2)
1785 		test->v2();
1786 	else
1787 		vmx_run();
1788 
1789 	while (teardown_count > 0)
1790 		run_teardown_step(&teardown_steps[--teardown_count]);
1791 
1792 	if (launched && !guest_finished)
1793 		report(0, "Guest didn't run to completion.");
1794 
1795 out:
1796 	if (vmx_off()) {
1797 		printf("%s : vmxoff failed.\n", __func__);
1798 		return 1;
1799 	}
1800 	return 0;
1801 }
1802 
1803 /*
1804  * Add a teardown step. Executed after the test's main function returns.
1805  * Teardown steps executed in reverse order.
1806  */
1807 void test_add_teardown(test_teardown_func func, void *data)
1808 {
1809 	struct test_teardown_step *step;
1810 
1811 	TEST_ASSERT_MSG(teardown_count < MAX_TEST_TEARDOWN_STEPS,
1812 			"There are already %d teardown steps.",
1813 			teardown_count);
1814 	step = &teardown_steps[teardown_count++];
1815 	step->func = func;
1816 	step->data = data;
1817 }
1818 
1819 /*
1820  * Set the target of the first enter_guest call. Can only be called once per
1821  * test. Must be called before first enter_guest call.
1822  */
1823 void test_set_guest(test_guest_func func)
1824 {
1825 	assert(current->v2);
1826 	TEST_ASSERT_MSG(!v2_guest_main, "Already set guest func.");
1827 	v2_guest_main = func;
1828 }
1829 
1830 static void check_for_guest_termination(void)
1831 {
1832 	if (is_hypercall()) {
1833 		int ret;
1834 
1835 		ret = handle_hypercall();
1836 		switch (ret) {
1837 		case VMX_TEST_VMEXIT:
1838 			guest_finished = 1;
1839 			break;
1840 		case VMX_TEST_VMABORT:
1841 			continue_abort();
1842 			break;
1843 		case VMX_TEST_VMSKIP:
1844 			continue_skip();
1845 			break;
1846 		default:
1847 			printf("ERROR : Invalid handle_hypercall return %d.\n",
1848 			       ret);
1849 			abort();
1850 		}
1851 	}
1852 }
1853 
1854 #define        ABORT_ON_EARLY_VMENTRY_FAIL     0x1
1855 #define        ABORT_ON_INVALID_GUEST_STATE    0x2
1856 
1857 /*
1858  * Enters the guest (or launches it for the first time). Error to call once the
1859  * guest has returned (i.e., run past the end of its guest() function).
1860  */
1861 static void __enter_guest(u8 abort_flag, struct vmentry_failure *failure)
1862 {
1863 	TEST_ASSERT_MSG(v2_guest_main,
1864 			"Never called test_set_guest_func!");
1865 
1866 	TEST_ASSERT_MSG(!guest_finished,
1867 			"Called enter_guest() after guest returned.");
1868 
1869 	vmx_enter_guest(failure);
1870 	if ((abort_flag & ABORT_ON_EARLY_VMENTRY_FAIL && failure->early) ||
1871 	    (abort_flag & ABORT_ON_INVALID_GUEST_STATE &&
1872 	    vmcs_read(EXI_REASON) & VMX_ENTRY_FAILURE)) {
1873 
1874 		print_vmentry_failure_info(failure);
1875 		abort();
1876 	}
1877 
1878 	if (!failure->early && !(vmcs_read(EXI_REASON) & VMX_ENTRY_FAILURE)) {
1879 		launched = 1;
1880 		check_for_guest_termination();
1881 	}
1882 }
1883 
1884 void enter_guest_with_bad_controls(void)
1885 {
1886 	struct vmentry_failure failure = {0};
1887 
1888 	TEST_ASSERT_MSG(v2_guest_main,
1889 			"Never called test_set_guest_func!");
1890 
1891 	TEST_ASSERT_MSG(!guest_finished,
1892 			"Called enter_guest() after guest returned.");
1893 
1894 	__enter_guest(ABORT_ON_INVALID_GUEST_STATE, &failure);
1895 	report(failure.early, "failure occurred early");
1896 	report((failure.flags & VMX_ENTRY_FLAGS) == X86_EFLAGS_ZF,
1897                "FLAGS set correctly");
1898 	report(vmcs_read(VMX_INST_ERROR) == VMXERR_ENTRY_INVALID_CONTROL_FIELD,
1899 	       "VM-Inst Error # is %d (VM entry with invalid control field(s))",
1900 	       VMXERR_ENTRY_INVALID_CONTROL_FIELD);
1901 
1902 	/*
1903 	 * This if statement shouldn't fire, as the entire premise of this
1904 	 * function is that VM entry is expected to fail, rather than succeed
1905 	 * and execute to termination. However, if the VM entry does
1906 	 * unexpectedly succeed, it's nice to check whether the guest has
1907 	 * terminated, to reduce the number of error messages.
1908 	 */
1909 	if (!failure.early)
1910 		check_for_guest_termination();
1911 }
1912 
1913 void enter_guest(void)
1914 {
1915 	struct vmentry_failure failure = {0};
1916 
1917 	__enter_guest(ABORT_ON_EARLY_VMENTRY_FAIL |
1918 		      ABORT_ON_INVALID_GUEST_STATE, &failure);
1919 }
1920 
1921 void enter_guest_with_invalid_guest_state(void)
1922 {
1923 	struct vmentry_failure failure = {0};
1924 
1925 	__enter_guest(ABORT_ON_EARLY_VMENTRY_FAIL, &failure);
1926 }
1927 
1928 extern struct vmx_test vmx_tests[];
1929 
1930 static bool
1931 test_wanted(const char *name, const char *filters[], int filter_count)
1932 {
1933 	int i;
1934 	bool positive = false;
1935 	bool match = false;
1936 	char clean_name[strlen(name) + 1];
1937 	char *c;
1938 	const char *n;
1939 
1940 	/* Replace spaces with underscores. */
1941 	n = name;
1942 	c = &clean_name[0];
1943 	do *c++ = (*n == ' ') ? '_' : *n;
1944 	while (*n++);
1945 
1946 	for (i = 0; i < filter_count; i++) {
1947 		const char *filter = filters[i];
1948 
1949 		if (filter[0] == '-') {
1950 			if (simple_glob(clean_name, filter + 1))
1951 				return false;
1952 		} else {
1953 			positive = true;
1954 			match |= simple_glob(clean_name, filter);
1955 		}
1956 	}
1957 
1958 	if (!positive || match) {
1959 		matched++;
1960 		return true;
1961 	} else {
1962 		return false;
1963 	}
1964 }
1965 
1966 int main(int argc, const char *argv[])
1967 {
1968 	int i = 0;
1969 
1970 	setup_vm();
1971 	smp_init();
1972 	hypercall_field = 0;
1973 
1974 	/* We want xAPIC mode to test MMIO passthrough from L1 (us) to L2.  */
1975 	reset_apic();
1976 
1977 	argv++;
1978 	argc--;
1979 
1980 	if (!this_cpu_has(X86_FEATURE_VMX)) {
1981 		printf("WARNING: vmx not supported, add '-cpu host'\n");
1982 		goto exit;
1983 	}
1984 	init_bsp_vmx();
1985 	if (test_wanted("test_vmx_feature_control", argv, argc)) {
1986 		/* Sets MSR_IA32_FEATURE_CONTROL to 0x5 */
1987 		if (test_vmx_feature_control() != 0)
1988 			goto exit;
1989 	} else {
1990 		enable_vmx();
1991 	}
1992 
1993 	if (test_wanted("test_vmxon", argv, argc)) {
1994 		/* Enables VMX */
1995 		if (test_vmxon() != 0)
1996 			goto exit;
1997 	} else {
1998 		if (vmx_on()) {
1999 			report(0, "vmxon");
2000 			goto exit;
2001 		}
2002 	}
2003 
2004 	if (test_wanted("test_vmptrld", argv, argc))
2005 		test_vmptrld();
2006 	if (test_wanted("test_vmclear", argv, argc))
2007 		test_vmclear();
2008 	if (test_wanted("test_vmptrst", argv, argc))
2009 		test_vmptrst();
2010 	if (test_wanted("test_vmwrite_vmread", argv, argc))
2011 		test_vmwrite_vmread();
2012 	if (test_wanted("test_vmcs_high", argv, argc))
2013 		test_vmcs_high();
2014 	if (test_wanted("test_vmcs_lifecycle", argv, argc))
2015 		test_vmcs_lifecycle();
2016 	if (test_wanted("test_vmx_caps", argv, argc))
2017 		test_vmx_caps();
2018 
2019 	/* Balance vmxon from test_vmxon. */
2020 	vmx_off();
2021 
2022 	for (; vmx_tests[i].name != NULL; i++) {
2023 		if (!test_wanted(vmx_tests[i].name, argv, argc))
2024 			continue;
2025 		if (test_run(&vmx_tests[i]))
2026 			goto exit;
2027 	}
2028 
2029 	if (!matched)
2030 		report(matched, "command line didn't match any tests!");
2031 
2032 exit:
2033 	return report_summary();
2034 }
2035