xref: /kvm-unit-tests/x86/vmx.c (revision e0e5509bc97838de4d74a15aa7137d61d0b7a162)
1 /*
2  * x86/vmx.c : Framework for testing nested virtualization
3  *	This is a framework to test nested VMX for KVM, which
4  * 	started as a project of GSoC 2013. All test cases should
5  *	be located in x86/vmx_tests.c and framework related
6  *	functions should be in this file.
7  *
8  * How to write test cases?
9  *	Add callbacks of test suite in variant "vmx_tests". You can
10  *	write:
11  *		1. init function used for initializing test suite
12  *		2. main function for codes running in L2 guest,
13  *		3. exit_handler to handle vmexit of L2 to L1
14  *		4. syscall handler to handle L2 syscall vmexit
15  *		5. vmenter fail handler to handle direct failure of vmenter
16  *		6. guest_regs is loaded when vmenter and saved when
17  *			vmexit, you can read and set it in exit_handler
18  *	If no special function is needed for a test suite, use
19  *	coressponding basic_* functions as callback. More handlers
20  *	can be added to "vmx_tests", see details of "struct vmx_test"
21  *	and function test_run().
22  *
23  * Currently, vmx test framework only set up one VCPU and one
24  * concurrent guest test environment with same paging for L2 and
25  * L1. For usage of EPT, only 1:1 mapped paging is used from VFN
26  * to PFN.
27  *
28  * Author : Arthur Chunqi Li <yzt356@gmail.com>
29  */
30 
31 #include "libcflat.h"
32 #include "processor.h"
33 #include "alloc_page.h"
34 #include "vm.h"
35 #include "desc.h"
36 #include "vmx.h"
37 #include "msr.h"
38 #include "smp.h"
39 #include "apic.h"
40 
41 u64 *vmxon_region;
42 struct vmcs *vmcs_root;
43 u32 vpid_cnt;
44 void *guest_stack, *guest_syscall_stack;
45 u32 ctrl_pin, ctrl_enter, ctrl_exit, ctrl_cpu[2];
46 struct regs regs;
47 
48 struct vmx_test *current;
49 
50 #define MAX_TEST_TEARDOWN_STEPS 10
51 
52 struct test_teardown_step {
53 	test_teardown_func func;
54 	void *data;
55 };
56 
57 static int teardown_count;
58 static struct test_teardown_step teardown_steps[MAX_TEST_TEARDOWN_STEPS];
59 
60 static test_guest_func v2_guest_main;
61 
62 u64 hypercall_field;
63 bool launched;
64 static int matched;
65 static int guest_finished;
66 static int in_guest;
67 
68 union vmx_basic basic;
69 union vmx_ctrl_msr ctrl_pin_rev;
70 union vmx_ctrl_msr ctrl_cpu_rev[2];
71 union vmx_ctrl_msr ctrl_exit_rev;
72 union vmx_ctrl_msr ctrl_enter_rev;
73 union vmx_ept_vpid  ept_vpid;
74 
75 extern struct descriptor_table_ptr gdt64_desc;
76 extern struct descriptor_table_ptr idt_descr;
77 extern struct descriptor_table_ptr tss_descr;
78 extern void *vmx_return;
79 extern void *entry_sysenter;
80 extern void *guest_entry;
81 
82 static volatile u32 stage;
83 
84 static jmp_buf abort_target;
85 
86 struct vmcs_field {
87 	u64 mask;
88 	u64 encoding;
89 };
90 
91 #define MASK(_bits) GENMASK_ULL((_bits) - 1, 0)
92 #define MASK_NATURAL MASK(sizeof(unsigned long) * 8)
93 
94 static struct vmcs_field vmcs_fields[] = {
95 	{ MASK(16), VPID },
96 	{ MASK(16), PINV },
97 	{ MASK(16), EPTP_IDX },
98 
99 	{ MASK(16), GUEST_SEL_ES },
100 	{ MASK(16), GUEST_SEL_CS },
101 	{ MASK(16), GUEST_SEL_SS },
102 	{ MASK(16), GUEST_SEL_DS },
103 	{ MASK(16), GUEST_SEL_FS },
104 	{ MASK(16), GUEST_SEL_GS },
105 	{ MASK(16), GUEST_SEL_LDTR },
106 	{ MASK(16), GUEST_SEL_TR },
107 	{ MASK(16), GUEST_INT_STATUS },
108 
109 	{ MASK(16), HOST_SEL_ES },
110 	{ MASK(16), HOST_SEL_CS },
111 	{ MASK(16), HOST_SEL_SS },
112 	{ MASK(16), HOST_SEL_DS },
113 	{ MASK(16), HOST_SEL_FS },
114 	{ MASK(16), HOST_SEL_GS },
115 	{ MASK(16), HOST_SEL_TR },
116 
117 	{ MASK(64), IO_BITMAP_A },
118 	{ MASK(64), IO_BITMAP_B },
119 	{ MASK(64), MSR_BITMAP },
120 	{ MASK(64), EXIT_MSR_ST_ADDR },
121 	{ MASK(64), EXIT_MSR_LD_ADDR },
122 	{ MASK(64), ENTER_MSR_LD_ADDR },
123 	{ MASK(64), VMCS_EXEC_PTR },
124 	{ MASK(64), TSC_OFFSET },
125 	{ MASK(64), APIC_VIRT_ADDR },
126 	{ MASK(64), APIC_ACCS_ADDR },
127 	{ MASK(64), EPTP },
128 
129 	{ MASK(64), INFO_PHYS_ADDR },
130 
131 	{ MASK(64), VMCS_LINK_PTR },
132 	{ MASK(64), GUEST_DEBUGCTL },
133 	{ MASK(64), GUEST_EFER },
134 	{ MASK(64), GUEST_PAT },
135 	{ MASK(64), GUEST_PERF_GLOBAL_CTRL },
136 	{ MASK(64), GUEST_PDPTE },
137 
138 	{ MASK(64), HOST_PAT },
139 	{ MASK(64), HOST_EFER },
140 	{ MASK(64), HOST_PERF_GLOBAL_CTRL },
141 
142 	{ MASK(32), PIN_CONTROLS },
143 	{ MASK(32), CPU_EXEC_CTRL0 },
144 	{ MASK(32), EXC_BITMAP },
145 	{ MASK(32), PF_ERROR_MASK },
146 	{ MASK(32), PF_ERROR_MATCH },
147 	{ MASK(32), CR3_TARGET_COUNT },
148 	{ MASK(32), EXI_CONTROLS },
149 	{ MASK(32), EXI_MSR_ST_CNT },
150 	{ MASK(32), EXI_MSR_LD_CNT },
151 	{ MASK(32), ENT_CONTROLS },
152 	{ MASK(32), ENT_MSR_LD_CNT },
153 	{ MASK(32), ENT_INTR_INFO },
154 	{ MASK(32), ENT_INTR_ERROR },
155 	{ MASK(32), ENT_INST_LEN },
156 	{ MASK(32), TPR_THRESHOLD },
157 	{ MASK(32), CPU_EXEC_CTRL1 },
158 
159 	{ MASK(32), VMX_INST_ERROR },
160 	{ MASK(32), EXI_REASON },
161 	{ MASK(32), EXI_INTR_INFO },
162 	{ MASK(32), EXI_INTR_ERROR },
163 	{ MASK(32), IDT_VECT_INFO },
164 	{ MASK(32), IDT_VECT_ERROR },
165 	{ MASK(32), EXI_INST_LEN },
166 	{ MASK(32), EXI_INST_INFO },
167 
168 	{ MASK(32), GUEST_LIMIT_ES },
169 	{ MASK(32), GUEST_LIMIT_CS },
170 	{ MASK(32), GUEST_LIMIT_SS },
171 	{ MASK(32), GUEST_LIMIT_DS },
172 	{ MASK(32), GUEST_LIMIT_FS },
173 	{ MASK(32), GUEST_LIMIT_GS },
174 	{ MASK(32), GUEST_LIMIT_LDTR },
175 	{ MASK(32), GUEST_LIMIT_TR },
176 	{ MASK(32), GUEST_LIMIT_GDTR },
177 	{ MASK(32), GUEST_LIMIT_IDTR },
178 	{ 0x1d0ff, GUEST_AR_ES },
179 	{ 0x1f0ff, GUEST_AR_CS },
180 	{ 0x1d0ff, GUEST_AR_SS },
181 	{ 0x1d0ff, GUEST_AR_DS },
182 	{ 0x1d0ff, GUEST_AR_FS },
183 	{ 0x1d0ff, GUEST_AR_GS },
184 	{ 0x1d0ff, GUEST_AR_LDTR },
185 	{ 0x1d0ff, GUEST_AR_TR },
186 	{ MASK(32), GUEST_INTR_STATE },
187 	{ MASK(32), GUEST_ACTV_STATE },
188 	{ MASK(32), GUEST_SMBASE },
189 	{ MASK(32), GUEST_SYSENTER_CS },
190 	{ MASK(32), PREEMPT_TIMER_VALUE },
191 
192 	{ MASK(32), HOST_SYSENTER_CS },
193 
194 	{ MASK_NATURAL, CR0_MASK },
195 	{ MASK_NATURAL, CR4_MASK },
196 	{ MASK_NATURAL, CR0_READ_SHADOW },
197 	{ MASK_NATURAL, CR4_READ_SHADOW },
198 	{ MASK_NATURAL, CR3_TARGET_0 },
199 	{ MASK_NATURAL, CR3_TARGET_1 },
200 	{ MASK_NATURAL, CR3_TARGET_2 },
201 	{ MASK_NATURAL, CR3_TARGET_3 },
202 
203 	{ MASK_NATURAL, EXI_QUALIFICATION },
204 	{ MASK_NATURAL, IO_RCX },
205 	{ MASK_NATURAL, IO_RSI },
206 	{ MASK_NATURAL, IO_RDI },
207 	{ MASK_NATURAL, IO_RIP },
208 	{ MASK_NATURAL, GUEST_LINEAR_ADDRESS },
209 
210 	{ MASK_NATURAL, GUEST_CR0 },
211 	{ MASK_NATURAL, GUEST_CR3 },
212 	{ MASK_NATURAL, GUEST_CR4 },
213 	{ MASK_NATURAL, GUEST_BASE_ES },
214 	{ MASK_NATURAL, GUEST_BASE_CS },
215 	{ MASK_NATURAL, GUEST_BASE_SS },
216 	{ MASK_NATURAL, GUEST_BASE_DS },
217 	{ MASK_NATURAL, GUEST_BASE_FS },
218 	{ MASK_NATURAL, GUEST_BASE_GS },
219 	{ MASK_NATURAL, GUEST_BASE_LDTR },
220 	{ MASK_NATURAL, GUEST_BASE_TR },
221 	{ MASK_NATURAL, GUEST_BASE_GDTR },
222 	{ MASK_NATURAL, GUEST_BASE_IDTR },
223 	{ MASK_NATURAL, GUEST_DR7 },
224 	{ MASK_NATURAL, GUEST_RSP },
225 	{ MASK_NATURAL, GUEST_RIP },
226 	{ MASK_NATURAL, GUEST_RFLAGS },
227 	{ MASK_NATURAL, GUEST_PENDING_DEBUG },
228 	{ MASK_NATURAL, GUEST_SYSENTER_ESP },
229 	{ MASK_NATURAL, GUEST_SYSENTER_EIP },
230 
231 	{ MASK_NATURAL, HOST_CR0 },
232 	{ MASK_NATURAL, HOST_CR3 },
233 	{ MASK_NATURAL, HOST_CR4 },
234 	{ MASK_NATURAL, HOST_BASE_FS },
235 	{ MASK_NATURAL, HOST_BASE_GS },
236 	{ MASK_NATURAL, HOST_BASE_TR },
237 	{ MASK_NATURAL, HOST_BASE_GDTR },
238 	{ MASK_NATURAL, HOST_BASE_IDTR },
239 	{ MASK_NATURAL, HOST_SYSENTER_ESP },
240 	{ MASK_NATURAL, HOST_SYSENTER_EIP },
241 	{ MASK_NATURAL, HOST_RSP },
242 	{ MASK_NATURAL, HOST_RIP },
243 };
244 
245 enum vmcs_field_type {
246 	VMCS_FIELD_TYPE_CONTROL = 0,
247 	VMCS_FIELD_TYPE_READ_ONLY_DATA = 1,
248 	VMCS_FIELD_TYPE_GUEST = 2,
249 	VMCS_FIELD_TYPE_HOST = 3,
250 	VMCS_FIELD_TYPES,
251 };
252 
253 static inline int vmcs_field_type(struct vmcs_field *f)
254 {
255 	return (f->encoding >> VMCS_FIELD_TYPE_SHIFT) & 0x3;
256 }
257 
258 static int vmcs_field_readonly(struct vmcs_field *f)
259 {
260 	u64 ia32_vmx_misc;
261 
262 	ia32_vmx_misc = rdmsr(MSR_IA32_VMX_MISC);
263 	return !(ia32_vmx_misc & MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS) &&
264 		(vmcs_field_type(f) == VMCS_FIELD_TYPE_READ_ONLY_DATA);
265 }
266 
267 static inline u64 vmcs_field_value(struct vmcs_field *f, u8 cookie)
268 {
269 	u64 value;
270 
271 	/* Incorporate the cookie and the field encoding into the value. */
272 	value = cookie;
273 	value |= (f->encoding << 8);
274 	value |= 0xdeadbeefull << 32;
275 
276 	return value & f->mask;
277 }
278 
279 static void set_vmcs_field(struct vmcs_field *f, u8 cookie)
280 {
281 	vmcs_write(f->encoding, vmcs_field_value(f, cookie));
282 }
283 
284 static bool check_vmcs_field(struct vmcs_field *f, u8 cookie, u32 *max_index)
285 {
286 	u64 expected;
287 	u64 actual;
288 	u32 index;
289 	int ret;
290 
291 	if (f->encoding == VMX_INST_ERROR) {
292 		printf("Skipping volatile field %lx\n", f->encoding);
293 		return true;
294 	}
295 
296 	ret = vmcs_read_checking(f->encoding, &actual);
297 	assert(!(ret & X86_EFLAGS_CF));
298 	/* Skip VMCS fields that aren't recognized by the CPU */
299 	if (ret & X86_EFLAGS_ZF)
300 		return true;
301 
302 	if (max_index) {
303 		index = f->encoding & VMCS_FIELD_INDEX_MASK;
304 		if (index > *max_index)
305 			*max_index = index;
306 	}
307 
308 	if (vmcs_field_readonly(f)) {
309 		printf("Skipping read-only field %lx\n", f->encoding);
310 		return true;
311 	}
312 
313 	expected = vmcs_field_value(f, cookie);
314 	actual &= f->mask;
315 
316 	if (expected == actual)
317 		return true;
318 
319 	printf("FAIL: VMWRITE/VMREAD %lx (expected: %lx, actual: %lx)\n",
320 	       f->encoding, (unsigned long) expected, (unsigned long) actual);
321 
322 	return false;
323 }
324 
325 static void set_all_vmcs_fields(u8 cookie)
326 {
327 	int i;
328 
329 	for (i = 0; i < ARRAY_SIZE(vmcs_fields); i++)
330 		set_vmcs_field(&vmcs_fields[i], cookie);
331 }
332 
333 static bool __check_all_vmcs_fields(u8 cookie, u32 *max_index)
334 {
335 	bool pass = true;
336 	int i;
337 
338 	for (i = 0; i < ARRAY_SIZE(vmcs_fields); i++) {
339 		if (!check_vmcs_field(&vmcs_fields[i], cookie, max_index))
340 			pass = false;
341 	}
342 
343 	return pass;
344 }
345 
346 static bool check_all_vmcs_fields(u8 cookie)
347 {
348 	return __check_all_vmcs_fields(cookie, NULL);
349 }
350 
351 static void test_vmwrite_vmread(void)
352 {
353 	struct vmcs *vmcs = alloc_page();
354 	u32 vmcs_enum_max, max_index = 0;
355 
356 	memset(vmcs, 0, PAGE_SIZE);
357 	vmcs->hdr.revision_id = basic.revision;
358 	assert(!vmcs_clear(vmcs));
359 	assert(!make_vmcs_current(vmcs));
360 
361 	set_all_vmcs_fields(0x42);
362 	report("VMWRITE/VMREAD", __check_all_vmcs_fields(0x42, &max_index));
363 
364 	vmcs_enum_max = rdmsr(MSR_IA32_VMX_VMCS_ENUM) & VMCS_FIELD_INDEX_MASK;
365 	report("VMX_VMCS_ENUM.MAX_INDEX expected: %x, actual: %x",
366 		vmcs_enum_max == max_index, max_index, vmcs_enum_max);
367 
368 	assert(!vmcs_clear(vmcs));
369 	free_page(vmcs);
370 }
371 
372 static void test_vmcs_high(void)
373 {
374 	struct vmcs *vmcs = alloc_page();
375 
376 	memset(vmcs, 0, PAGE_SIZE);
377 	vmcs->hdr.revision_id = basic.revision;
378 	assert(!vmcs_clear(vmcs));
379 	assert(!make_vmcs_current(vmcs));
380 
381 	vmcs_write(TSC_OFFSET, 0x0123456789ABCDEFull);
382 	report("VMREAD TSC_OFFSET after VMWRITE TSC_OFFSET",
383 	       vmcs_read(TSC_OFFSET) == 0x0123456789ABCDEFull);
384 	report("VMREAD TSC_OFFSET_HI after VMWRITE TSC_OFFSET",
385 	       vmcs_read(TSC_OFFSET_HI) == 0x01234567ull);
386 	vmcs_write(TSC_OFFSET_HI, 0x76543210ul);
387 	report("VMREAD TSC_OFFSET_HI after VMWRITE TSC_OFFSET_HI",
388 	       vmcs_read(TSC_OFFSET_HI) == 0x76543210ul);
389 	report("VMREAD TSC_OFFSET after VMWRITE TSC_OFFSET_HI",
390 	       vmcs_read(TSC_OFFSET) == 0x7654321089ABCDEFull);
391 
392 	assert(!vmcs_clear(vmcs));
393 	free_page(vmcs);
394 }
395 
396 static void test_vmcs_lifecycle(void)
397 {
398 	struct vmcs *vmcs[2] = {};
399 	int i;
400 
401 	for (i = 0; i < ARRAY_SIZE(vmcs); i++) {
402 		vmcs[i] = alloc_page();
403 		memset(vmcs[i], 0, PAGE_SIZE);
404 		vmcs[i]->hdr.revision_id = basic.revision;
405 	}
406 
407 #define VMPTRLD(_i) do { \
408 	assert(_i < ARRAY_SIZE(vmcs)); \
409 	assert(!make_vmcs_current(vmcs[_i])); \
410 	printf("VMPTRLD VMCS%d\n", (_i)); \
411 } while (0)
412 
413 #define VMCLEAR(_i) do { \
414 	assert(_i < ARRAY_SIZE(vmcs)); \
415 	assert(!vmcs_clear(vmcs[_i])); \
416 	printf("VMCLEAR VMCS%d\n", (_i)); \
417 } while (0)
418 
419 	VMCLEAR(0);
420 	VMPTRLD(0);
421 	set_all_vmcs_fields(0);
422 	report("current:VMCS0 active:[VMCS0]", check_all_vmcs_fields(0));
423 
424 	VMCLEAR(0);
425 	VMPTRLD(0);
426 	report("current:VMCS0 active:[VMCS0]", check_all_vmcs_fields(0));
427 
428 	VMCLEAR(1);
429 	report("current:VMCS0 active:[VMCS0]", check_all_vmcs_fields(0));
430 
431 	VMPTRLD(1);
432 	set_all_vmcs_fields(1);
433 	report("current:VMCS1 active:[VMCS0,VCMS1]", check_all_vmcs_fields(1));
434 
435 	VMPTRLD(0);
436 	report("current:VMCS0 active:[VMCS0,VCMS1]", check_all_vmcs_fields(0));
437 	VMPTRLD(1);
438 	report("current:VMCS1 active:[VMCS0,VCMS1]", check_all_vmcs_fields(1));
439 	VMPTRLD(1);
440 	report("current:VMCS1 active:[VMCS0,VCMS1]", check_all_vmcs_fields(1));
441 
442 	VMCLEAR(0);
443 	report("current:VMCS1 active:[VCMS1]", check_all_vmcs_fields(1));
444 
445 	/* VMPTRLD should not erase VMWRITEs to the current VMCS */
446 	set_all_vmcs_fields(2);
447 	VMPTRLD(1);
448 	report("current:VMCS1 active:[VCMS1]", check_all_vmcs_fields(2));
449 
450 	for (i = 0; i < ARRAY_SIZE(vmcs); i++) {
451 		VMCLEAR(i);
452 		free_page(vmcs[i]);
453 	}
454 
455 #undef VMPTRLD
456 #undef VMCLEAR
457 }
458 
459 void vmx_set_test_stage(u32 s)
460 {
461 	barrier();
462 	stage = s;
463 	barrier();
464 }
465 
466 u32 vmx_get_test_stage(void)
467 {
468 	u32 s;
469 
470 	barrier();
471 	s = stage;
472 	barrier();
473 	return s;
474 }
475 
476 void vmx_inc_test_stage(void)
477 {
478 	barrier();
479 	stage++;
480 	barrier();
481 }
482 
483 /* entry_sysenter */
484 asm(
485 	".align	4, 0x90\n\t"
486 	".globl	entry_sysenter\n\t"
487 	"entry_sysenter:\n\t"
488 	SAVE_GPR
489 	"	and	$0xf, %rax\n\t"
490 	"	mov	%rax, %rdi\n\t"
491 	"	call	syscall_handler\n\t"
492 	LOAD_GPR
493 	"	vmresume\n\t"
494 );
495 
496 static void __attribute__((__used__)) syscall_handler(u64 syscall_no)
497 {
498 	if (current->syscall_handler)
499 		current->syscall_handler(syscall_no);
500 }
501 
502 static const char * const exit_reason_descriptions[] = {
503 	[VMX_EXC_NMI]		= "VMX_EXC_NMI",
504 	[VMX_EXTINT]		= "VMX_EXTINT",
505 	[VMX_TRIPLE_FAULT]	= "VMX_TRIPLE_FAULT",
506 	[VMX_INIT]		= "VMX_INIT",
507 	[VMX_SIPI]		= "VMX_SIPI",
508 	[VMX_SMI_IO]		= "VMX_SMI_IO",
509 	[VMX_SMI_OTHER]		= "VMX_SMI_OTHER",
510 	[VMX_INTR_WINDOW]	= "VMX_INTR_WINDOW",
511 	[VMX_NMI_WINDOW]	= "VMX_NMI_WINDOW",
512 	[VMX_TASK_SWITCH]	= "VMX_TASK_SWITCH",
513 	[VMX_CPUID]		= "VMX_CPUID",
514 	[VMX_GETSEC]		= "VMX_GETSEC",
515 	[VMX_HLT]		= "VMX_HLT",
516 	[VMX_INVD]		= "VMX_INVD",
517 	[VMX_INVLPG]		= "VMX_INVLPG",
518 	[VMX_RDPMC]		= "VMX_RDPMC",
519 	[VMX_RDTSC]		= "VMX_RDTSC",
520 	[VMX_RSM]		= "VMX_RSM",
521 	[VMX_VMCALL]		= "VMX_VMCALL",
522 	[VMX_VMCLEAR]		= "VMX_VMCLEAR",
523 	[VMX_VMLAUNCH]		= "VMX_VMLAUNCH",
524 	[VMX_VMPTRLD]		= "VMX_VMPTRLD",
525 	[VMX_VMPTRST]		= "VMX_VMPTRST",
526 	[VMX_VMREAD]		= "VMX_VMREAD",
527 	[VMX_VMRESUME]		= "VMX_VMRESUME",
528 	[VMX_VMWRITE]		= "VMX_VMWRITE",
529 	[VMX_VMXOFF]		= "VMX_VMXOFF",
530 	[VMX_VMXON]		= "VMX_VMXON",
531 	[VMX_CR]		= "VMX_CR",
532 	[VMX_DR]		= "VMX_DR",
533 	[VMX_IO]		= "VMX_IO",
534 	[VMX_RDMSR]		= "VMX_RDMSR",
535 	[VMX_WRMSR]		= "VMX_WRMSR",
536 	[VMX_FAIL_STATE]	= "VMX_FAIL_STATE",
537 	[VMX_FAIL_MSR]		= "VMX_FAIL_MSR",
538 	[VMX_MWAIT]		= "VMX_MWAIT",
539 	[VMX_MTF]		= "VMX_MTF",
540 	[VMX_MONITOR]		= "VMX_MONITOR",
541 	[VMX_PAUSE]		= "VMX_PAUSE",
542 	[VMX_FAIL_MCHECK]	= "VMX_FAIL_MCHECK",
543 	[VMX_TPR_THRESHOLD]	= "VMX_TPR_THRESHOLD",
544 	[VMX_APIC_ACCESS]	= "VMX_APIC_ACCESS",
545 	[VMX_EOI_INDUCED]	= "VMX_EOI_INDUCED",
546 	[VMX_GDTR_IDTR]		= "VMX_GDTR_IDTR",
547 	[VMX_LDTR_TR]		= "VMX_LDTR_TR",
548 	[VMX_EPT_VIOLATION]	= "VMX_EPT_VIOLATION",
549 	[VMX_EPT_MISCONFIG]	= "VMX_EPT_MISCONFIG",
550 	[VMX_INVEPT]		= "VMX_INVEPT",
551 	[VMX_PREEMPT]		= "VMX_PREEMPT",
552 	[VMX_INVVPID]		= "VMX_INVVPID",
553 	[VMX_WBINVD]		= "VMX_WBINVD",
554 	[VMX_XSETBV]		= "VMX_XSETBV",
555 	[VMX_APIC_WRITE]	= "VMX_APIC_WRITE",
556 	[VMX_RDRAND]		= "VMX_RDRAND",
557 	[VMX_INVPCID]		= "VMX_INVPCID",
558 	[VMX_VMFUNC]		= "VMX_VMFUNC",
559 	[VMX_RDSEED]		= "VMX_RDSEED",
560 	[VMX_PML_FULL]		= "VMX_PML_FULL",
561 	[VMX_XSAVES]		= "VMX_XSAVES",
562 	[VMX_XRSTORS]		= "VMX_XRSTORS",
563 };
564 
565 const char *exit_reason_description(u64 reason)
566 {
567 	if (reason >= ARRAY_SIZE(exit_reason_descriptions))
568 		return "(unknown)";
569 	return exit_reason_descriptions[reason] ? : "(unused)";
570 }
571 
572 void print_vmexit_info()
573 {
574 	u64 guest_rip, guest_rsp;
575 	ulong reason = vmcs_read(EXI_REASON) & 0xff;
576 	ulong exit_qual = vmcs_read(EXI_QUALIFICATION);
577 	guest_rip = vmcs_read(GUEST_RIP);
578 	guest_rsp = vmcs_read(GUEST_RSP);
579 	printf("VMEXIT info:\n");
580 	printf("\tvmexit reason = %ld\n", reason);
581 	printf("\texit qualification = %#lx\n", exit_qual);
582 	printf("\tBit 31 of reason = %lx\n", (vmcs_read(EXI_REASON) >> 31) & 1);
583 	printf("\tguest_rip = %#lx\n", guest_rip);
584 	printf("\tRAX=%#lx    RBX=%#lx    RCX=%#lx    RDX=%#lx\n",
585 		regs.rax, regs.rbx, regs.rcx, regs.rdx);
586 	printf("\tRSP=%#lx    RBP=%#lx    RSI=%#lx    RDI=%#lx\n",
587 		guest_rsp, regs.rbp, regs.rsi, regs.rdi);
588 	printf("\tR8 =%#lx    R9 =%#lx    R10=%#lx    R11=%#lx\n",
589 		regs.r8, regs.r9, regs.r10, regs.r11);
590 	printf("\tR12=%#lx    R13=%#lx    R14=%#lx    R15=%#lx\n",
591 		regs.r12, regs.r13, regs.r14, regs.r15);
592 }
593 
594 void
595 print_vmentry_failure_info(struct vmentry_failure *failure) {
596 	if (failure->early) {
597 		printf("Early %s failure: ", failure->instr);
598 		switch (failure->flags & VMX_ENTRY_FLAGS) {
599 		case X86_EFLAGS_CF:
600 			printf("current-VMCS pointer is not valid.\n");
601 			break;
602 		case X86_EFLAGS_ZF:
603 			printf("error number is %ld. See Intel 30.4.\n",
604 			       vmcs_read(VMX_INST_ERROR));
605 			break;
606 		default:
607 			printf("unexpected flags %lx!\n", failure->flags);
608 		}
609 	} else {
610 		u64 reason = vmcs_read(EXI_REASON);
611 		u64 qual = vmcs_read(EXI_QUALIFICATION);
612 
613 		printf("Non-early %s failure (reason=%#lx, qual=%#lx): ",
614 			failure->instr, reason, qual);
615 
616 		switch (reason & 0xff) {
617 		case VMX_FAIL_STATE:
618 			printf("invalid guest state\n");
619 			break;
620 		case VMX_FAIL_MSR:
621 			printf("MSR loading\n");
622 			break;
623 		case VMX_FAIL_MCHECK:
624 			printf("machine-check event\n");
625 			break;
626 		default:
627 			printf("unexpected basic exit reason %ld\n",
628 			       reason & 0xff);
629 		}
630 
631 		if (!(reason & VMX_ENTRY_FAILURE))
632 			printf("\tVMX_ENTRY_FAILURE BIT NOT SET!\n");
633 
634 		if (reason & 0x7fff0000)
635 			printf("\tRESERVED BITS SET!\n");
636 	}
637 }
638 
639 /*
640  * VMCLEAR should ensures all VMCS state is flushed to the VMCS
641  * region in memory.
642  */
643 static void test_vmclear_flushing(void)
644 {
645 	struct vmcs *vmcs[3] = {};
646 	int i;
647 
648 	for (i = 0; i < ARRAY_SIZE(vmcs); i++) {
649 		vmcs[i] = alloc_page();
650 		memset(vmcs[i], 0, PAGE_SIZE);
651 	}
652 
653 	vmcs[0]->hdr.revision_id = basic.revision;
654 	assert(!vmcs_clear(vmcs[0]));
655 	assert(!make_vmcs_current(vmcs[0]));
656 	set_all_vmcs_fields(0x86);
657 
658 	assert(!vmcs_clear(vmcs[0]));
659 	memcpy(vmcs[1], vmcs[0], basic.size);
660 	assert(!make_vmcs_current(vmcs[1]));
661 	report("test vmclear flush (current VMCS)", check_all_vmcs_fields(0x86));
662 
663 	set_all_vmcs_fields(0x87);
664 	assert(!make_vmcs_current(vmcs[0]));
665 	assert(!vmcs_clear(vmcs[1]));
666 	memcpy(vmcs[2], vmcs[1], basic.size);
667 	assert(!make_vmcs_current(vmcs[2]));
668 	report("test vmclear flush (!current VMCS)", check_all_vmcs_fields(0x87));
669 
670 	for (i = 0; i < ARRAY_SIZE(vmcs); i++) {
671 		assert(!vmcs_clear(vmcs[i]));
672 		free_page(vmcs[i]);
673 	}
674 }
675 
676 static void test_vmclear(void)
677 {
678 	struct vmcs *tmp_root;
679 	int width = cpuid_maxphyaddr();
680 
681 	/*
682 	 * Note- The tests below do not necessarily have a
683 	 * valid VMCS, but that's ok since the invalid vmcs
684 	 * is only used for a specific test and is discarded
685 	 * without touching its contents
686 	 */
687 
688 	/* Unaligned page access */
689 	tmp_root = (struct vmcs *)((intptr_t)vmcs_root + 1);
690 	report("test vmclear with unaligned vmcs",
691 	       vmcs_clear(tmp_root) == 1);
692 
693 	/* gpa bits beyond physical address width are set*/
694 	tmp_root = (struct vmcs *)((intptr_t)vmcs_root |
695 				   ((u64)1 << (width+1)));
696 	report("test vmclear with vmcs address bits set beyond physical address width",
697 	       vmcs_clear(tmp_root) == 1);
698 
699 	/* Pass VMXON region */
700 	tmp_root = (struct vmcs *)vmxon_region;
701 	report("test vmclear with vmxon region",
702 	       vmcs_clear(tmp_root) == 1);
703 
704 	/* Valid VMCS */
705 	report("test vmclear with valid vmcs region", vmcs_clear(vmcs_root) == 0);
706 
707 	test_vmclear_flushing();
708 }
709 
710 static void __attribute__((__used__)) guest_main(void)
711 {
712 	if (current->v2)
713 		v2_guest_main();
714 	else
715 		current->guest_main();
716 }
717 
718 /* guest_entry */
719 asm(
720 	".align	4, 0x90\n\t"
721 	".globl	entry_guest\n\t"
722 	"guest_entry:\n\t"
723 	"	call guest_main\n\t"
724 	"	mov $1, %edi\n\t"
725 	"	call hypercall\n\t"
726 );
727 
728 /* EPT paging structure related functions */
729 /* split_large_ept_entry: Split a 2M/1G large page into 512 smaller PTEs.
730 		@ptep : large page table entry to split
731 		@level : level of ptep (2 or 3)
732  */
733 static void split_large_ept_entry(unsigned long *ptep, int level)
734 {
735 	unsigned long *new_pt;
736 	unsigned long gpa;
737 	unsigned long pte;
738 	unsigned long prototype;
739 	int i;
740 
741 	pte = *ptep;
742 	assert(pte & EPT_PRESENT);
743 	assert(pte & EPT_LARGE_PAGE);
744 	assert(level == 2 || level == 3);
745 
746 	new_pt = alloc_page();
747 	assert(new_pt);
748 	memset(new_pt, 0, PAGE_SIZE);
749 
750 	prototype = pte & ~EPT_ADDR_MASK;
751 	if (level == 2)
752 		prototype &= ~EPT_LARGE_PAGE;
753 
754 	gpa = pte & EPT_ADDR_MASK;
755 	for (i = 0; i < EPT_PGDIR_ENTRIES; i++) {
756 		new_pt[i] = prototype | gpa;
757 		gpa += 1ul << EPT_LEVEL_SHIFT(level - 1);
758 	}
759 
760 	pte &= ~EPT_LARGE_PAGE;
761 	pte &= ~EPT_ADDR_MASK;
762 	pte |= virt_to_phys(new_pt);
763 
764 	*ptep = pte;
765 }
766 
767 /* install_ept_entry : Install a page to a given level in EPT
768 		@pml4 : addr of pml4 table
769 		@pte_level : level of PTE to set
770 		@guest_addr : physical address of guest
771 		@pte : pte value to set
772 		@pt_page : address of page table, NULL for a new page
773  */
774 void install_ept_entry(unsigned long *pml4,
775 		int pte_level,
776 		unsigned long guest_addr,
777 		unsigned long pte,
778 		unsigned long *pt_page)
779 {
780 	int level;
781 	unsigned long *pt = pml4;
782 	unsigned offset;
783 
784 	/* EPT only uses 48 bits of GPA. */
785 	assert(guest_addr < (1ul << 48));
786 
787 	for (level = EPT_PAGE_LEVEL; level > pte_level; --level) {
788 		offset = (guest_addr >> EPT_LEVEL_SHIFT(level))
789 				& EPT_PGDIR_MASK;
790 		if (!(pt[offset] & (EPT_PRESENT))) {
791 			unsigned long *new_pt = pt_page;
792 			if (!new_pt)
793 				new_pt = alloc_page();
794 			else
795 				pt_page = 0;
796 			memset(new_pt, 0, PAGE_SIZE);
797 			pt[offset] = virt_to_phys(new_pt)
798 					| EPT_RA | EPT_WA | EPT_EA;
799 		} else if (pt[offset] & EPT_LARGE_PAGE)
800 			split_large_ept_entry(&pt[offset], level);
801 		pt = phys_to_virt(pt[offset] & EPT_ADDR_MASK);
802 	}
803 	offset = (guest_addr >> EPT_LEVEL_SHIFT(level)) & EPT_PGDIR_MASK;
804 	pt[offset] = pte;
805 }
806 
807 /* Map a page, @perm is the permission of the page */
808 void install_ept(unsigned long *pml4,
809 		unsigned long phys,
810 		unsigned long guest_addr,
811 		u64 perm)
812 {
813 	install_ept_entry(pml4, 1, guest_addr, (phys & PAGE_MASK) | perm, 0);
814 }
815 
816 /* Map a 1G-size page */
817 void install_1g_ept(unsigned long *pml4,
818 		unsigned long phys,
819 		unsigned long guest_addr,
820 		u64 perm)
821 {
822 	install_ept_entry(pml4, 3, guest_addr,
823 			(phys & PAGE_MASK) | perm | EPT_LARGE_PAGE, 0);
824 }
825 
826 /* Map a 2M-size page */
827 void install_2m_ept(unsigned long *pml4,
828 		unsigned long phys,
829 		unsigned long guest_addr,
830 		u64 perm)
831 {
832 	install_ept_entry(pml4, 2, guest_addr,
833 			(phys & PAGE_MASK) | perm | EPT_LARGE_PAGE, 0);
834 }
835 
836 /* setup_ept_range : Setup a range of 1:1 mapped page to EPT paging structure.
837 		@start : start address of guest page
838 		@len : length of address to be mapped
839 		@map_1g : whether 1G page map is used
840 		@map_2m : whether 2M page map is used
841 		@perm : permission for every page
842  */
843 void setup_ept_range(unsigned long *pml4, unsigned long start,
844 		     unsigned long len, int map_1g, int map_2m, u64 perm)
845 {
846 	u64 phys = start;
847 	u64 max = (u64)len + (u64)start;
848 
849 	if (map_1g) {
850 		while (phys + PAGE_SIZE_1G <= max) {
851 			install_1g_ept(pml4, phys, phys, perm);
852 			phys += PAGE_SIZE_1G;
853 		}
854 	}
855 	if (map_2m) {
856 		while (phys + PAGE_SIZE_2M <= max) {
857 			install_2m_ept(pml4, phys, phys, perm);
858 			phys += PAGE_SIZE_2M;
859 		}
860 	}
861 	while (phys + PAGE_SIZE <= max) {
862 		install_ept(pml4, phys, phys, perm);
863 		phys += PAGE_SIZE;
864 	}
865 }
866 
867 /* get_ept_pte : Get the PTE of a given level in EPT,
868     @level == 1 means get the latest level*/
869 bool get_ept_pte(unsigned long *pml4, unsigned long guest_addr, int level,
870 		unsigned long *pte)
871 {
872 	int l;
873 	unsigned long *pt = pml4, iter_pte;
874 	unsigned offset;
875 
876 	assert(level >= 1 && level <= 4);
877 
878 	for (l = EPT_PAGE_LEVEL; ; --l) {
879 		offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK;
880 		iter_pte = pt[offset];
881 		if (l == level)
882 			break;
883 		if (l < 4 && (iter_pte & EPT_LARGE_PAGE))
884 			return false;
885 		if (!(iter_pte & (EPT_PRESENT)))
886 			return false;
887 		pt = (unsigned long *)(iter_pte & EPT_ADDR_MASK);
888 	}
889 	offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK;
890 	if (pte)
891 		*pte = pt[offset];
892 	return true;
893 }
894 
895 static void clear_ept_ad_pte(unsigned long *pml4, unsigned long guest_addr)
896 {
897 	int l;
898 	unsigned long *pt = pml4;
899 	u64 pte;
900 	unsigned offset;
901 
902 	for (l = EPT_PAGE_LEVEL; ; --l) {
903 		offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK;
904 		pt[offset] &= ~(EPT_ACCESS_FLAG|EPT_DIRTY_FLAG);
905 		pte = pt[offset];
906 		if (l == 1 || (l < 4 && (pte & EPT_LARGE_PAGE)))
907 			break;
908 		pt = (unsigned long *)(pte & EPT_ADDR_MASK);
909 	}
910 }
911 
912 /* clear_ept_ad : Clear EPT A/D bits for the page table walk and the
913    final GPA of a guest address.  */
914 void clear_ept_ad(unsigned long *pml4, u64 guest_cr3,
915 		  unsigned long guest_addr)
916 {
917 	int l;
918 	unsigned long *pt = (unsigned long *)guest_cr3, gpa;
919 	u64 pte, offset_in_page;
920 	unsigned offset;
921 
922 	for (l = EPT_PAGE_LEVEL; ; --l) {
923 		offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK;
924 
925 		clear_ept_ad_pte(pml4, (u64) &pt[offset]);
926 		pte = pt[offset];
927 		if (l == 1 || (l < 4 && (pte & PT_PAGE_SIZE_MASK)))
928 			break;
929 		if (!(pte & PT_PRESENT_MASK))
930 			return;
931 		pt = (unsigned long *)(pte & PT_ADDR_MASK);
932 	}
933 
934 	offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK;
935 	offset_in_page = guest_addr & ((1 << EPT_LEVEL_SHIFT(l)) - 1);
936 	gpa = (pt[offset] & PT_ADDR_MASK) | (guest_addr & offset_in_page);
937 	clear_ept_ad_pte(pml4, gpa);
938 }
939 
940 /* check_ept_ad : Check the content of EPT A/D bits for the page table
941    walk and the final GPA of a guest address.  */
942 void check_ept_ad(unsigned long *pml4, u64 guest_cr3,
943 		  unsigned long guest_addr, int expected_gpa_ad,
944 		  int expected_pt_ad)
945 {
946 	int l;
947 	unsigned long *pt = (unsigned long *)guest_cr3, gpa;
948 	u64 ept_pte, pte, offset_in_page;
949 	unsigned offset;
950 	bool bad_pt_ad = false;
951 
952 	for (l = EPT_PAGE_LEVEL; ; --l) {
953 		offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK;
954 
955 		if (!get_ept_pte(pml4, (u64) &pt[offset], 1, &ept_pte)) {
956 			printf("EPT - guest level %d page table is not mapped.\n", l);
957 			return;
958 		}
959 
960 		if (!bad_pt_ad) {
961 			bad_pt_ad |= (ept_pte & (EPT_ACCESS_FLAG|EPT_DIRTY_FLAG)) != expected_pt_ad;
962 			if (bad_pt_ad)
963 				report("EPT - guest level %d page table A=%d/D=%d",
964 				       false, l,
965 				       !!(expected_pt_ad & EPT_ACCESS_FLAG),
966 				       !!(expected_pt_ad & EPT_DIRTY_FLAG));
967 		}
968 
969 		pte = pt[offset];
970 		if (l == 1 || (l < 4 && (pte & PT_PAGE_SIZE_MASK)))
971 			break;
972 		if (!(pte & PT_PRESENT_MASK))
973 			return;
974 		pt = (unsigned long *)(pte & PT_ADDR_MASK);
975 	}
976 
977 	if (!bad_pt_ad)
978 		report("EPT - guest page table structures A=%d/D=%d",
979 		       true,
980 		       !!(expected_pt_ad & EPT_ACCESS_FLAG),
981 		       !!(expected_pt_ad & EPT_DIRTY_FLAG));
982 
983 	offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK;
984 	offset_in_page = guest_addr & ((1 << EPT_LEVEL_SHIFT(l)) - 1);
985 	gpa = (pt[offset] & PT_ADDR_MASK) | (guest_addr & offset_in_page);
986 
987 	if (!get_ept_pte(pml4, gpa, 1, &ept_pte)) {
988 		report("EPT - guest physical address is not mapped", false);
989 		return;
990 	}
991 	report("EPT - guest physical address A=%d/D=%d",
992 	       (ept_pte & (EPT_ACCESS_FLAG|EPT_DIRTY_FLAG)) == expected_gpa_ad,
993 	       !!(expected_gpa_ad & EPT_ACCESS_FLAG),
994 	       !!(expected_gpa_ad & EPT_DIRTY_FLAG));
995 }
996 
997 
998 void ept_sync(int type, u64 eptp)
999 {
1000 	switch (type) {
1001 	case INVEPT_SINGLE:
1002 		if (ept_vpid.val & EPT_CAP_INVEPT_SINGLE) {
1003 			invept(INVEPT_SINGLE, eptp);
1004 			break;
1005 		}
1006 		/* else fall through */
1007 	case INVEPT_GLOBAL:
1008 		if (ept_vpid.val & EPT_CAP_INVEPT_ALL) {
1009 			invept(INVEPT_GLOBAL, eptp);
1010 			break;
1011 		}
1012 		/* else fall through */
1013 	default:
1014 		printf("WARNING: invept is not supported!\n");
1015 	}
1016 }
1017 
1018 void set_ept_pte(unsigned long *pml4, unsigned long guest_addr,
1019 		 int level, u64 pte_val)
1020 {
1021 	int l;
1022 	unsigned long *pt = pml4;
1023 	unsigned offset;
1024 
1025 	assert(level >= 1 && level <= 4);
1026 
1027 	for (l = EPT_PAGE_LEVEL; ; --l) {
1028 		offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK;
1029 		if (l == level)
1030 			break;
1031 		assert(pt[offset] & EPT_PRESENT);
1032 		pt = (unsigned long *)(pt[offset] & EPT_ADDR_MASK);
1033 	}
1034 	offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK;
1035 	pt[offset] = pte_val;
1036 }
1037 
1038 bool ept_2m_supported(void)
1039 {
1040 	return ept_vpid.val & EPT_CAP_2M_PAGE;
1041 }
1042 
1043 bool ept_1g_supported(void)
1044 {
1045 	return ept_vpid.val & EPT_CAP_1G_PAGE;
1046 }
1047 
1048 bool ept_huge_pages_supported(int level)
1049 {
1050 	if (level == 2)
1051 		return ept_2m_supported();
1052 	else if (level == 3)
1053 		return ept_1g_supported();
1054 	else
1055 		return false;
1056 }
1057 
1058 bool ept_execute_only_supported(void)
1059 {
1060 	return ept_vpid.val & EPT_CAP_WT;
1061 }
1062 
1063 bool ept_ad_bits_supported(void)
1064 {
1065 	return ept_vpid.val & EPT_CAP_AD_FLAG;
1066 }
1067 
1068 void vpid_sync(int type, u16 vpid)
1069 {
1070 	switch(type) {
1071 	case INVVPID_CONTEXT_GLOBAL:
1072 		if (ept_vpid.val & VPID_CAP_INVVPID_CXTGLB) {
1073 			invvpid(INVVPID_CONTEXT_GLOBAL, vpid, 0);
1074 			break;
1075 		}
1076 	case INVVPID_ALL:
1077 		if (ept_vpid.val & VPID_CAP_INVVPID_ALL) {
1078 			invvpid(INVVPID_ALL, vpid, 0);
1079 			break;
1080 		}
1081 	default:
1082 		printf("WARNING: invvpid is not supported\n");
1083 	}
1084 }
1085 
1086 static void init_vmcs_ctrl(void)
1087 {
1088 	/* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */
1089 	/* 26.2.1.1 */
1090 	vmcs_write(PIN_CONTROLS, ctrl_pin);
1091 	/* Disable VMEXIT of IO instruction */
1092 	vmcs_write(CPU_EXEC_CTRL0, ctrl_cpu[0]);
1093 	if (ctrl_cpu_rev[0].set & CPU_SECONDARY) {
1094 		ctrl_cpu[1] = (ctrl_cpu[1] | ctrl_cpu_rev[1].set) &
1095 			ctrl_cpu_rev[1].clr;
1096 		vmcs_write(CPU_EXEC_CTRL1, ctrl_cpu[1]);
1097 	}
1098 	vmcs_write(CR3_TARGET_COUNT, 0);
1099 	vmcs_write(VPID, ++vpid_cnt);
1100 }
1101 
1102 static void init_vmcs_host(void)
1103 {
1104 	/* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */
1105 	/* 26.2.1.2 */
1106 	vmcs_write(HOST_EFER, rdmsr(MSR_EFER));
1107 
1108 	/* 26.2.1.3 */
1109 	vmcs_write(ENT_CONTROLS, ctrl_enter);
1110 	vmcs_write(EXI_CONTROLS, ctrl_exit);
1111 
1112 	/* 26.2.2 */
1113 	vmcs_write(HOST_CR0, read_cr0());
1114 	vmcs_write(HOST_CR3, read_cr3());
1115 	vmcs_write(HOST_CR4, read_cr4());
1116 	vmcs_write(HOST_SYSENTER_EIP, (u64)(&entry_sysenter));
1117 	vmcs_write(HOST_SYSENTER_CS,  KERNEL_CS);
1118 
1119 	/* 26.2.3 */
1120 	vmcs_write(HOST_SEL_CS, KERNEL_CS);
1121 	vmcs_write(HOST_SEL_SS, KERNEL_DS);
1122 	vmcs_write(HOST_SEL_DS, KERNEL_DS);
1123 	vmcs_write(HOST_SEL_ES, KERNEL_DS);
1124 	vmcs_write(HOST_SEL_FS, KERNEL_DS);
1125 	vmcs_write(HOST_SEL_GS, KERNEL_DS);
1126 	vmcs_write(HOST_SEL_TR, TSS_MAIN);
1127 	vmcs_write(HOST_BASE_TR, tss_descr.base);
1128 	vmcs_write(HOST_BASE_GDTR, gdt64_desc.base);
1129 	vmcs_write(HOST_BASE_IDTR, idt_descr.base);
1130 	vmcs_write(HOST_BASE_FS, 0);
1131 	vmcs_write(HOST_BASE_GS, 0);
1132 
1133 	/* Set other vmcs area */
1134 	vmcs_write(PF_ERROR_MASK, 0);
1135 	vmcs_write(PF_ERROR_MATCH, 0);
1136 	vmcs_write(VMCS_LINK_PTR, ~0ul);
1137 	vmcs_write(VMCS_LINK_PTR_HI, ~0ul);
1138 	vmcs_write(HOST_RIP, (u64)(&vmx_return));
1139 }
1140 
1141 static void init_vmcs_guest(void)
1142 {
1143 	/* 26.3 CHECKING AND LOADING GUEST STATE */
1144 	ulong guest_cr0, guest_cr4, guest_cr3;
1145 	/* 26.3.1.1 */
1146 	guest_cr0 = read_cr0();
1147 	guest_cr4 = read_cr4();
1148 	guest_cr3 = read_cr3();
1149 	if (ctrl_enter & ENT_GUEST_64) {
1150 		guest_cr0 |= X86_CR0_PG;
1151 		guest_cr4 |= X86_CR4_PAE;
1152 	}
1153 	if ((ctrl_enter & ENT_GUEST_64) == 0)
1154 		guest_cr4 &= (~X86_CR4_PCIDE);
1155 	if (guest_cr0 & X86_CR0_PG)
1156 		guest_cr0 |= X86_CR0_PE;
1157 	vmcs_write(GUEST_CR0, guest_cr0);
1158 	vmcs_write(GUEST_CR3, guest_cr3);
1159 	vmcs_write(GUEST_CR4, guest_cr4);
1160 	vmcs_write(GUEST_SYSENTER_CS,  KERNEL_CS);
1161 	vmcs_write(GUEST_SYSENTER_ESP,
1162 		(u64)(guest_syscall_stack + PAGE_SIZE - 1));
1163 	vmcs_write(GUEST_SYSENTER_EIP, (u64)(&entry_sysenter));
1164 	vmcs_write(GUEST_DR7, 0);
1165 	vmcs_write(GUEST_EFER, rdmsr(MSR_EFER));
1166 
1167 	/* 26.3.1.2 */
1168 	vmcs_write(GUEST_SEL_CS, KERNEL_CS);
1169 	vmcs_write(GUEST_SEL_SS, KERNEL_DS);
1170 	vmcs_write(GUEST_SEL_DS, KERNEL_DS);
1171 	vmcs_write(GUEST_SEL_ES, KERNEL_DS);
1172 	vmcs_write(GUEST_SEL_FS, KERNEL_DS);
1173 	vmcs_write(GUEST_SEL_GS, KERNEL_DS);
1174 	vmcs_write(GUEST_SEL_TR, TSS_MAIN);
1175 	vmcs_write(GUEST_SEL_LDTR, 0);
1176 
1177 	vmcs_write(GUEST_BASE_CS, 0);
1178 	vmcs_write(GUEST_BASE_ES, 0);
1179 	vmcs_write(GUEST_BASE_SS, 0);
1180 	vmcs_write(GUEST_BASE_DS, 0);
1181 	vmcs_write(GUEST_BASE_FS, 0);
1182 	vmcs_write(GUEST_BASE_GS, 0);
1183 	vmcs_write(GUEST_BASE_TR, tss_descr.base);
1184 	vmcs_write(GUEST_BASE_LDTR, 0);
1185 
1186 	vmcs_write(GUEST_LIMIT_CS, 0xFFFFFFFF);
1187 	vmcs_write(GUEST_LIMIT_DS, 0xFFFFFFFF);
1188 	vmcs_write(GUEST_LIMIT_ES, 0xFFFFFFFF);
1189 	vmcs_write(GUEST_LIMIT_SS, 0xFFFFFFFF);
1190 	vmcs_write(GUEST_LIMIT_FS, 0xFFFFFFFF);
1191 	vmcs_write(GUEST_LIMIT_GS, 0xFFFFFFFF);
1192 	vmcs_write(GUEST_LIMIT_LDTR, 0xffff);
1193 	vmcs_write(GUEST_LIMIT_TR, tss_descr.limit);
1194 
1195 	vmcs_write(GUEST_AR_CS, 0xa09b);
1196 	vmcs_write(GUEST_AR_DS, 0xc093);
1197 	vmcs_write(GUEST_AR_ES, 0xc093);
1198 	vmcs_write(GUEST_AR_FS, 0xc093);
1199 	vmcs_write(GUEST_AR_GS, 0xc093);
1200 	vmcs_write(GUEST_AR_SS, 0xc093);
1201 	vmcs_write(GUEST_AR_LDTR, 0x82);
1202 	vmcs_write(GUEST_AR_TR, 0x8b);
1203 
1204 	/* 26.3.1.3 */
1205 	vmcs_write(GUEST_BASE_GDTR, gdt64_desc.base);
1206 	vmcs_write(GUEST_BASE_IDTR, idt_descr.base);
1207 	vmcs_write(GUEST_LIMIT_GDTR, gdt64_desc.limit);
1208 	vmcs_write(GUEST_LIMIT_IDTR, idt_descr.limit);
1209 
1210 	/* 26.3.1.4 */
1211 	vmcs_write(GUEST_RIP, (u64)(&guest_entry));
1212 	vmcs_write(GUEST_RSP, (u64)(guest_stack + PAGE_SIZE - 1));
1213 	vmcs_write(GUEST_RFLAGS, 0x2);
1214 
1215 	/* 26.3.1.5 */
1216 	vmcs_write(GUEST_ACTV_STATE, ACTV_ACTIVE);
1217 	vmcs_write(GUEST_INTR_STATE, 0);
1218 }
1219 
1220 static int init_vmcs(struct vmcs **vmcs)
1221 {
1222 	*vmcs = alloc_page();
1223 	memset(*vmcs, 0, PAGE_SIZE);
1224 	(*vmcs)->hdr.revision_id = basic.revision;
1225 	/* vmclear first to init vmcs */
1226 	if (vmcs_clear(*vmcs)) {
1227 		printf("%s : vmcs_clear error\n", __func__);
1228 		return 1;
1229 	}
1230 
1231 	if (make_vmcs_current(*vmcs)) {
1232 		printf("%s : make_vmcs_current error\n", __func__);
1233 		return 1;
1234 	}
1235 
1236 	/* All settings to pin/exit/enter/cpu
1237 	   control fields should be placed here */
1238 	ctrl_pin |= PIN_EXTINT | PIN_NMI | PIN_VIRT_NMI;
1239 	ctrl_exit = EXI_LOAD_EFER | EXI_HOST_64;
1240 	ctrl_enter = (ENT_LOAD_EFER | ENT_GUEST_64);
1241 	/* DIsable IO instruction VMEXIT now */
1242 	ctrl_cpu[0] &= (~(CPU_IO | CPU_IO_BITMAP));
1243 	ctrl_cpu[1] = 0;
1244 
1245 	ctrl_pin = (ctrl_pin | ctrl_pin_rev.set) & ctrl_pin_rev.clr;
1246 	ctrl_enter = (ctrl_enter | ctrl_enter_rev.set) & ctrl_enter_rev.clr;
1247 	ctrl_exit = (ctrl_exit | ctrl_exit_rev.set) & ctrl_exit_rev.clr;
1248 	ctrl_cpu[0] = (ctrl_cpu[0] | ctrl_cpu_rev[0].set) & ctrl_cpu_rev[0].clr;
1249 
1250 	init_vmcs_ctrl();
1251 	init_vmcs_host();
1252 	init_vmcs_guest();
1253 	return 0;
1254 }
1255 
1256 static void init_vmx(void)
1257 {
1258 	ulong fix_cr0_set, fix_cr0_clr;
1259 	ulong fix_cr4_set, fix_cr4_clr;
1260 
1261 	vmxon_region = alloc_page();
1262 	memset(vmxon_region, 0, PAGE_SIZE);
1263 
1264 	vmcs_root = alloc_page();
1265 
1266 	fix_cr0_set =  rdmsr(MSR_IA32_VMX_CR0_FIXED0);
1267 	fix_cr0_clr =  rdmsr(MSR_IA32_VMX_CR0_FIXED1);
1268 	fix_cr4_set =  rdmsr(MSR_IA32_VMX_CR4_FIXED0);
1269 	fix_cr4_clr = rdmsr(MSR_IA32_VMX_CR4_FIXED1);
1270 	basic.val = rdmsr(MSR_IA32_VMX_BASIC);
1271 	ctrl_pin_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PIN
1272 			: MSR_IA32_VMX_PINBASED_CTLS);
1273 	ctrl_exit_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_EXIT
1274 			: MSR_IA32_VMX_EXIT_CTLS);
1275 	ctrl_enter_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_ENTRY
1276 			: MSR_IA32_VMX_ENTRY_CTLS);
1277 	ctrl_cpu_rev[0].val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PROC
1278 			: MSR_IA32_VMX_PROCBASED_CTLS);
1279 	if ((ctrl_cpu_rev[0].clr & CPU_SECONDARY) != 0)
1280 		ctrl_cpu_rev[1].val = rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2);
1281 	else
1282 		ctrl_cpu_rev[1].val = 0;
1283 	if ((ctrl_cpu_rev[1].clr & (CPU_EPT | CPU_VPID)) != 0)
1284 		ept_vpid.val = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP);
1285 	else
1286 		ept_vpid.val = 0;
1287 
1288 	write_cr0((read_cr0() & fix_cr0_clr) | fix_cr0_set);
1289 	write_cr4((read_cr4() & fix_cr4_clr) | fix_cr4_set | X86_CR4_VMXE);
1290 
1291 	*vmxon_region = basic.revision;
1292 
1293 	guest_stack = alloc_page();
1294 	memset(guest_stack, 0, PAGE_SIZE);
1295 	guest_syscall_stack = alloc_page();
1296 	memset(guest_syscall_stack, 0, PAGE_SIZE);
1297 }
1298 
1299 static void do_vmxon_off(void *data)
1300 {
1301 	vmx_on();
1302 	vmx_off();
1303 }
1304 
1305 static void do_write_feature_control(void *data)
1306 {
1307 	wrmsr(MSR_IA32_FEATURE_CONTROL, 0);
1308 }
1309 
1310 static int test_vmx_feature_control(void)
1311 {
1312 	u64 ia32_feature_control;
1313 	bool vmx_enabled;
1314 
1315 	ia32_feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL);
1316 	vmx_enabled = ((ia32_feature_control & 0x5) == 0x5);
1317 	if ((ia32_feature_control & 0x5) == 0x5) {
1318 		printf("VMX enabled and locked by BIOS\n");
1319 		return 0;
1320 	} else if (ia32_feature_control & 0x1) {
1321 		printf("ERROR: VMX locked out by BIOS!?\n");
1322 		return 1;
1323 	}
1324 
1325 	wrmsr(MSR_IA32_FEATURE_CONTROL, 0);
1326 	report("test vmxon with FEATURE_CONTROL cleared",
1327 	       test_for_exception(GP_VECTOR, &do_vmxon_off, NULL));
1328 
1329 	wrmsr(MSR_IA32_FEATURE_CONTROL, 0x4);
1330 	report("test vmxon without FEATURE_CONTROL lock",
1331 	       test_for_exception(GP_VECTOR, &do_vmxon_off, NULL));
1332 
1333 	wrmsr(MSR_IA32_FEATURE_CONTROL, 0x5);
1334 	vmx_enabled = ((rdmsr(MSR_IA32_FEATURE_CONTROL) & 0x5) == 0x5);
1335 	report("test enable VMX in FEATURE_CONTROL", vmx_enabled);
1336 
1337 	report("test FEATURE_CONTROL lock bit",
1338 	       test_for_exception(GP_VECTOR, &do_write_feature_control, NULL));
1339 
1340 	return !vmx_enabled;
1341 }
1342 
1343 static int test_vmxon(void)
1344 {
1345 	int ret, ret1;
1346 	u64 *tmp_region = vmxon_region;
1347 	int width = cpuid_maxphyaddr();
1348 
1349 	/* Unaligned page access */
1350 	vmxon_region = (u64 *)((intptr_t)vmxon_region + 1);
1351 	ret1 = vmx_on();
1352 	report("test vmxon with unaligned vmxon region", ret1);
1353 	if (!ret1) {
1354 		ret = 1;
1355 		goto out;
1356 	}
1357 
1358 	/* gpa bits beyond physical address width are set*/
1359 	vmxon_region = (u64 *)((intptr_t)tmp_region | ((u64)1 << (width+1)));
1360 	ret1 = vmx_on();
1361 	report("test vmxon with bits set beyond physical address width", ret1);
1362 	if (!ret1) {
1363 		ret = 1;
1364 		goto out;
1365 	}
1366 
1367 	/* invalid revision indentifier */
1368 	vmxon_region = tmp_region;
1369 	*vmxon_region = 0xba9da9;
1370 	ret1 = vmx_on();
1371 	report("test vmxon with invalid revision identifier", ret1);
1372 	if (!ret1) {
1373 		ret = 1;
1374 		goto out;
1375 	}
1376 
1377 	/* and finally a valid region */
1378 	*vmxon_region = basic.revision;
1379 	ret = vmx_on();
1380 	report("test vmxon with valid vmxon region", !ret);
1381 
1382 out:
1383 	return ret;
1384 }
1385 
1386 static void test_vmptrld(void)
1387 {
1388 	struct vmcs *vmcs, *tmp_root;
1389 	int width = cpuid_maxphyaddr();
1390 
1391 	vmcs = alloc_page();
1392 	vmcs->hdr.revision_id = basic.revision;
1393 
1394 	/* Unaligned page access */
1395 	tmp_root = (struct vmcs *)((intptr_t)vmcs + 1);
1396 	report("test vmptrld with unaligned vmcs",
1397 	       make_vmcs_current(tmp_root) == 1);
1398 
1399 	/* gpa bits beyond physical address width are set*/
1400 	tmp_root = (struct vmcs *)((intptr_t)vmcs |
1401 				   ((u64)1 << (width+1)));
1402 	report("test vmptrld with vmcs address bits set beyond physical address width",
1403 	       make_vmcs_current(tmp_root) == 1);
1404 
1405 	/* Pass VMXON region */
1406 	assert(!vmcs_clear(vmcs));
1407 	assert(!make_vmcs_current(vmcs));
1408 	tmp_root = (struct vmcs *)vmxon_region;
1409 	report("test vmptrld with vmxon region",
1410 	       make_vmcs_current(tmp_root) == 1);
1411 	report("test vmptrld with vmxon region vm-instruction error",
1412 	       vmcs_read(VMX_INST_ERROR) == VMXERR_VMPTRLD_VMXON_POINTER);
1413 
1414 	report("test vmptrld with valid vmcs region", make_vmcs_current(vmcs) == 0);
1415 }
1416 
1417 static void test_vmptrst(void)
1418 {
1419 	int ret;
1420 	struct vmcs *vmcs1, *vmcs2;
1421 
1422 	vmcs1 = alloc_page();
1423 	memset(vmcs1, 0, PAGE_SIZE);
1424 	init_vmcs(&vmcs1);
1425 	ret = vmcs_save(&vmcs2);
1426 	report("test vmptrst", (!ret) && (vmcs1 == vmcs2));
1427 }
1428 
1429 struct vmx_ctl_msr {
1430 	const char *name;
1431 	u32 index, true_index;
1432 	u32 default1;
1433 } vmx_ctl_msr[] = {
1434 	{ "MSR_IA32_VMX_PINBASED_CTLS", MSR_IA32_VMX_PINBASED_CTLS,
1435 	  MSR_IA32_VMX_TRUE_PIN, 0x16 },
1436 	{ "MSR_IA32_VMX_PROCBASED_CTLS", MSR_IA32_VMX_PROCBASED_CTLS,
1437 	  MSR_IA32_VMX_TRUE_PROC, 0x401e172 },
1438 	{ "MSR_IA32_VMX_PROCBASED_CTLS2", MSR_IA32_VMX_PROCBASED_CTLS2,
1439 	  MSR_IA32_VMX_PROCBASED_CTLS2, 0 },
1440 	{ "MSR_IA32_VMX_EXIT_CTLS", MSR_IA32_VMX_EXIT_CTLS,
1441 	  MSR_IA32_VMX_TRUE_EXIT, 0x36dff },
1442 	{ "MSR_IA32_VMX_ENTRY_CTLS", MSR_IA32_VMX_ENTRY_CTLS,
1443 	  MSR_IA32_VMX_TRUE_ENTRY, 0x11ff },
1444 };
1445 
1446 static void test_vmx_caps(void)
1447 {
1448 	u64 val, default1, fixed0, fixed1;
1449 	union vmx_ctrl_msr ctrl, true_ctrl;
1450 	unsigned int n;
1451 	bool ok;
1452 
1453 	printf("\nTest suite: VMX capability reporting\n");
1454 
1455 	report("MSR_IA32_VMX_BASIC",
1456 	       (basic.revision & (1ul << 31)) == 0 &&
1457 	       basic.size > 0 && basic.size <= 4096 &&
1458 	       (basic.type == 0 || basic.type == 6) &&
1459 	       basic.reserved1 == 0 && basic.reserved2 == 0);
1460 
1461 	val = rdmsr(MSR_IA32_VMX_MISC);
1462 	report("MSR_IA32_VMX_MISC",
1463 	       (!(ctrl_cpu_rev[1].clr & CPU_URG) || val & (1ul << 5)) &&
1464 	       ((val >> 16) & 0x1ff) <= 256 &&
1465 	       (val & 0xc0007e00) == 0);
1466 
1467 	for (n = 0; n < ARRAY_SIZE(vmx_ctl_msr); n++) {
1468 		ctrl.val = rdmsr(vmx_ctl_msr[n].index);
1469 		default1 = vmx_ctl_msr[n].default1;
1470 		ok = (ctrl.set & default1) == default1;
1471 		ok = ok && (ctrl.set & ~ctrl.clr) == 0;
1472 		if (ok && basic.ctrl) {
1473 			true_ctrl.val = rdmsr(vmx_ctl_msr[n].true_index);
1474 			ok = ctrl.clr == true_ctrl.clr;
1475 			ok = ok && ctrl.set == (true_ctrl.set | default1);
1476 		}
1477 		report("%s", ok, vmx_ctl_msr[n].name);
1478 	}
1479 
1480 	fixed0 = rdmsr(MSR_IA32_VMX_CR0_FIXED0);
1481 	fixed1 = rdmsr(MSR_IA32_VMX_CR0_FIXED1);
1482 	report("MSR_IA32_VMX_IA32_VMX_CR0_FIXED0/1",
1483 	       ((fixed0 ^ fixed1) & ~fixed1) == 0);
1484 
1485 	fixed0 = rdmsr(MSR_IA32_VMX_CR4_FIXED0);
1486 	fixed1 = rdmsr(MSR_IA32_VMX_CR4_FIXED1);
1487 	report("MSR_IA32_VMX_IA32_VMX_CR4_FIXED0/1",
1488 	       ((fixed0 ^ fixed1) & ~fixed1) == 0);
1489 
1490 	val = rdmsr(MSR_IA32_VMX_VMCS_ENUM);
1491 	report("MSR_IA32_VMX_VMCS_ENUM",
1492 	       (val & VMCS_FIELD_INDEX_MASK) >= 0x2a &&
1493 	       (val & 0xfffffffffffffc01Ull) == 0);
1494 
1495 	val = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP);
1496 	report("MSR_IA32_VMX_EPT_VPID_CAP",
1497 	       (val & 0xfffff07ef98cbebeUll) == 0);
1498 }
1499 
1500 /* This function can only be called in guest */
1501 static void __attribute__((__used__)) hypercall(u32 hypercall_no)
1502 {
1503 	u64 val = 0;
1504 	val = (hypercall_no & HYPERCALL_MASK) | HYPERCALL_BIT;
1505 	hypercall_field = val;
1506 	asm volatile("vmcall\n\t");
1507 }
1508 
1509 static bool is_hypercall(void)
1510 {
1511 	ulong reason, hyper_bit;
1512 
1513 	reason = vmcs_read(EXI_REASON) & 0xff;
1514 	hyper_bit = hypercall_field & HYPERCALL_BIT;
1515 	if (reason == VMX_VMCALL && hyper_bit)
1516 		return true;
1517 	return false;
1518 }
1519 
1520 static int handle_hypercall(void)
1521 {
1522 	ulong hypercall_no;
1523 
1524 	hypercall_no = hypercall_field & HYPERCALL_MASK;
1525 	hypercall_field = 0;
1526 	switch (hypercall_no) {
1527 	case HYPERCALL_VMEXIT:
1528 		return VMX_TEST_VMEXIT;
1529 	case HYPERCALL_VMABORT:
1530 		return VMX_TEST_VMABORT;
1531 	case HYPERCALL_VMSKIP:
1532 		return VMX_TEST_VMSKIP;
1533 	default:
1534 		printf("ERROR : Invalid hypercall number : %ld\n", hypercall_no);
1535 	}
1536 	return VMX_TEST_EXIT;
1537 }
1538 
1539 static void continue_abort(void)
1540 {
1541 	assert(!in_guest);
1542 	printf("Host was here when guest aborted:\n");
1543 	dump_stack();
1544 	longjmp(abort_target, 1);
1545 	abort();
1546 }
1547 
1548 void __abort_test(void)
1549 {
1550 	if (in_guest)
1551 		hypercall(HYPERCALL_VMABORT);
1552 	else
1553 		longjmp(abort_target, 1);
1554 	abort();
1555 }
1556 
1557 static void continue_skip(void)
1558 {
1559 	assert(!in_guest);
1560 	longjmp(abort_target, 1);
1561 	abort();
1562 }
1563 
1564 void test_skip(const char *msg)
1565 {
1566 	printf("%s skipping test: %s\n", in_guest ? "Guest" : "Host", msg);
1567 	if (in_guest)
1568 		hypercall(HYPERCALL_VMABORT);
1569 	else
1570 		longjmp(abort_target, 1);
1571 	abort();
1572 }
1573 
1574 static int exit_handler(void)
1575 {
1576 	int ret;
1577 
1578 	current->exits++;
1579 	regs.rflags = vmcs_read(GUEST_RFLAGS);
1580 	if (is_hypercall())
1581 		ret = handle_hypercall();
1582 	else
1583 		ret = current->exit_handler();
1584 	vmcs_write(GUEST_RFLAGS, regs.rflags);
1585 
1586 	return ret;
1587 }
1588 
1589 /*
1590  * Called if vmlaunch or vmresume fails.
1591  *	@early    - failure due to "VMX controls and host-state area" (26.2)
1592  *	@vmlaunch - was this a vmlaunch or vmresume
1593  *	@rflags   - host rflags
1594  */
1595 static int
1596 entry_failure_handler(struct vmentry_failure *failure)
1597 {
1598 	if (current->entry_failure_handler)
1599 		return current->entry_failure_handler(failure);
1600 	else
1601 		return VMX_TEST_EXIT;
1602 }
1603 
1604 /*
1605  * Tries to enter the guest. Returns true if entry succeeded. Otherwise,
1606  * populates @failure.
1607  */
1608 static void vmx_enter_guest(struct vmentry_failure *failure)
1609 {
1610 	failure->early = 0;
1611 
1612 	in_guest = 1;
1613 	asm volatile (
1614 		"mov %[HOST_RSP], %%rdi\n\t"
1615 		"vmwrite %%rsp, %%rdi\n\t"
1616 		LOAD_GPR_C
1617 		"cmpb $0, %[launched]\n\t"
1618 		"jne 1f\n\t"
1619 		"vmlaunch\n\t"
1620 		"jmp 2f\n\t"
1621 		"1: "
1622 		"vmresume\n\t"
1623 		"2: "
1624 		SAVE_GPR_C
1625 		"pushf\n\t"
1626 		"pop %%rdi\n\t"
1627 		"mov %%rdi, %[failure_flags]\n\t"
1628 		"movl $1, %[failure_early]\n\t"
1629 		"jmp 3f\n\t"
1630 		"vmx_return:\n\t"
1631 		SAVE_GPR_C
1632 		"3: \n\t"
1633 		: [failure_early]"+m"(failure->early),
1634 		  [failure_flags]"=m"(failure->flags)
1635 		: [launched]"m"(launched), [HOST_RSP]"i"(HOST_RSP)
1636 		: "rdi", "memory", "cc"
1637 	);
1638 	in_guest = 0;
1639 
1640 	failure->vmlaunch = !launched;
1641 	failure->instr = launched ? "vmresume" : "vmlaunch";
1642 }
1643 
1644 static int vmx_run(void)
1645 {
1646 	while (1) {
1647 		u32 ret;
1648 		bool entered;
1649 		struct vmentry_failure failure;
1650 
1651 		vmx_enter_guest(&failure);
1652 		entered = !failure.early &&
1653 			  !(vmcs_read(EXI_REASON) & VMX_ENTRY_FAILURE);
1654 
1655 		if (entered) {
1656 			/*
1657 			 * VMCS isn't in "launched" state if there's been any
1658 			 * entry failure (early or otherwise).
1659 			 */
1660 			launched = 1;
1661 			ret = exit_handler();
1662 		} else {
1663 			ret = entry_failure_handler(&failure);
1664 		}
1665 
1666 		switch (ret) {
1667 		case VMX_TEST_RESUME:
1668 			continue;
1669 		case VMX_TEST_VMEXIT:
1670 			guest_finished = 1;
1671 			return 0;
1672 		case VMX_TEST_EXIT:
1673 			break;
1674 		default:
1675 			printf("ERROR : Invalid %s_handler return val %d.\n",
1676 			       entered ? "exit" : "entry_failure",
1677 			       ret);
1678 			break;
1679 		}
1680 
1681 		if (entered)
1682 			print_vmexit_info();
1683 		else
1684 			print_vmentry_failure_info(&failure);
1685 		abort();
1686 	}
1687 }
1688 
1689 static void run_teardown_step(struct test_teardown_step *step)
1690 {
1691 	step->func(step->data);
1692 }
1693 
1694 static int test_run(struct vmx_test *test)
1695 {
1696 	int r;
1697 
1698 	/* Validate V2 interface. */
1699 	if (test->v2) {
1700 		int ret = 0;
1701 		if (test->init || test->guest_main || test->exit_handler ||
1702 		    test->syscall_handler) {
1703 			report("V2 test cannot specify V1 callbacks.", 0);
1704 			ret = 1;
1705 		}
1706 		if (ret)
1707 			return ret;
1708 	}
1709 
1710 	if (test->name == NULL)
1711 		test->name = "(no name)";
1712 	if (vmx_on()) {
1713 		printf("%s : vmxon failed.\n", __func__);
1714 		return 1;
1715 	}
1716 
1717 	init_vmcs(&(test->vmcs));
1718 	/* Directly call test->init is ok here, init_vmcs has done
1719 	   vmcs init, vmclear and vmptrld*/
1720 	if (test->init && test->init(test->vmcs) != VMX_TEST_START)
1721 		goto out;
1722 	teardown_count = 0;
1723 	v2_guest_main = NULL;
1724 	test->exits = 0;
1725 	current = test;
1726 	regs = test->guest_regs;
1727 	vmcs_write(GUEST_RFLAGS, regs.rflags | 0x2);
1728 	launched = 0;
1729 	guest_finished = 0;
1730 	printf("\nTest suite: %s\n", test->name);
1731 
1732 	r = setjmp(abort_target);
1733 	if (r) {
1734 		assert(!in_guest);
1735 		goto out;
1736 	}
1737 
1738 
1739 	if (test->v2)
1740 		test->v2();
1741 	else
1742 		vmx_run();
1743 
1744 	while (teardown_count > 0)
1745 		run_teardown_step(&teardown_steps[--teardown_count]);
1746 
1747 	if (launched && !guest_finished)
1748 		report("Guest didn't run to completion.", 0);
1749 
1750 out:
1751 	if (vmx_off()) {
1752 		printf("%s : vmxoff failed.\n", __func__);
1753 		return 1;
1754 	}
1755 	return 0;
1756 }
1757 
1758 /*
1759  * Add a teardown step. Executed after the test's main function returns.
1760  * Teardown steps executed in reverse order.
1761  */
1762 void test_add_teardown(test_teardown_func func, void *data)
1763 {
1764 	struct test_teardown_step *step;
1765 
1766 	TEST_ASSERT_MSG(teardown_count < MAX_TEST_TEARDOWN_STEPS,
1767 			"There are already %d teardown steps.",
1768 			teardown_count);
1769 	step = &teardown_steps[teardown_count++];
1770 	step->func = func;
1771 	step->data = data;
1772 }
1773 
1774 /*
1775  * Set the target of the first enter_guest call. Can only be called once per
1776  * test. Must be called before first enter_guest call.
1777  */
1778 void test_set_guest(test_guest_func func)
1779 {
1780 	assert(current->v2);
1781 	TEST_ASSERT_MSG(!v2_guest_main, "Already set guest func.");
1782 	v2_guest_main = func;
1783 }
1784 
1785 static void check_for_guest_termination(void)
1786 {
1787 	if (is_hypercall()) {
1788 		int ret;
1789 
1790 		ret = handle_hypercall();
1791 		switch (ret) {
1792 		case VMX_TEST_VMEXIT:
1793 			guest_finished = 1;
1794 			break;
1795 		case VMX_TEST_VMABORT:
1796 			continue_abort();
1797 			break;
1798 		case VMX_TEST_VMSKIP:
1799 			continue_skip();
1800 			break;
1801 		default:
1802 			printf("ERROR : Invalid handle_hypercall return %d.\n",
1803 			       ret);
1804 			abort();
1805 		}
1806 	}
1807 }
1808 
1809 #define        ABORT_ON_EARLY_VMENTRY_FAIL     0x1
1810 #define        ABORT_ON_INVALID_GUEST_STATE    0x2
1811 
1812 /*
1813  * Enters the guest (or launches it for the first time). Error to call once the
1814  * guest has returned (i.e., run past the end of its guest() function).
1815  */
1816 static void __enter_guest(u8 abort_flag, struct vmentry_failure *failure)
1817 {
1818 	TEST_ASSERT_MSG(v2_guest_main,
1819 			"Never called test_set_guest_func!");
1820 
1821 	TEST_ASSERT_MSG(!guest_finished,
1822 			"Called enter_guest() after guest returned.");
1823 
1824 	vmx_enter_guest(failure);
1825 	if ((abort_flag & ABORT_ON_EARLY_VMENTRY_FAIL && failure->early) ||
1826 	    (abort_flag & ABORT_ON_INVALID_GUEST_STATE &&
1827 	    vmcs_read(EXI_REASON) & VMX_ENTRY_FAILURE)) {
1828 
1829 		print_vmentry_failure_info(failure);
1830 		abort();
1831 	}
1832 
1833 	if (!failure->early) {
1834 		launched = 1;
1835 		check_for_guest_termination();
1836 	}
1837 }
1838 
1839 void enter_guest_with_bad_controls(void)
1840 {
1841 	struct vmentry_failure failure = {0};
1842 
1843 	TEST_ASSERT_MSG(v2_guest_main,
1844 			"Never called test_set_guest_func!");
1845 
1846 	TEST_ASSERT_MSG(!guest_finished,
1847 			"Called enter_guest() after guest returned.");
1848 
1849 	__enter_guest(ABORT_ON_INVALID_GUEST_STATE, &failure);
1850 	report("failure occurred early", failure.early);
1851 	report("FLAGS set correctly",
1852 	       (failure.flags & VMX_ENTRY_FLAGS) == X86_EFLAGS_ZF);
1853 	report("VM-Inst Error # is %d (VM entry with invalid control field(s))",
1854 	       vmcs_read(VMX_INST_ERROR) == VMXERR_ENTRY_INVALID_CONTROL_FIELD,
1855 	       VMXERR_ENTRY_INVALID_CONTROL_FIELD);
1856 
1857 	/*
1858 	 * This if statement shouldn't fire, as the entire premise of this
1859 	 * function is that VM entry is expected to fail, rather than succeed
1860 	 * and execute to termination. However, if the VM entry does
1861 	 * unexpectedly succeed, it's nice to check whether the guest has
1862 	 * terminated, to reduce the number of error messages.
1863 	 */
1864 	if (!failure.early)
1865 		check_for_guest_termination();
1866 }
1867 
1868 void enter_guest(void)
1869 {
1870 	struct vmentry_failure failure = {0};
1871 
1872 	__enter_guest(ABORT_ON_EARLY_VMENTRY_FAIL |
1873 		      ABORT_ON_INVALID_GUEST_STATE, &failure);
1874 }
1875 
1876 void enter_guest_with_invalid_guest_state(void)
1877 {
1878 	struct vmentry_failure failure = {0};
1879 
1880 	__enter_guest(ABORT_ON_EARLY_VMENTRY_FAIL, &failure);
1881 }
1882 
1883 extern struct vmx_test vmx_tests[];
1884 
1885 static bool
1886 test_wanted(const char *name, const char *filters[], int filter_count)
1887 {
1888 	int i;
1889 	bool positive = false;
1890 	bool match = false;
1891 	char clean_name[strlen(name) + 1];
1892 	char *c;
1893 	const char *n;
1894 
1895 	/* Replace spaces with underscores. */
1896 	n = name;
1897 	c = &clean_name[0];
1898 	do *c++ = (*n == ' ') ? '_' : *n;
1899 	while (*n++);
1900 
1901 	for (i = 0; i < filter_count; i++) {
1902 		const char *filter = filters[i];
1903 
1904 		if (filter[0] == '-') {
1905 			if (simple_glob(clean_name, filter + 1))
1906 				return false;
1907 		} else {
1908 			positive = true;
1909 			match |= simple_glob(clean_name, filter);
1910 		}
1911 	}
1912 
1913 	if (!positive || match) {
1914 		matched++;
1915 		return true;
1916 	} else {
1917 		return false;
1918 	}
1919 }
1920 
1921 int main(int argc, const char *argv[])
1922 {
1923 	int i = 0;
1924 
1925 	setup_vm();
1926 	smp_init();
1927 	hypercall_field = 0;
1928 
1929 	/* We want xAPIC mode to test MMIO passthrough from L1 (us) to L2.  */
1930 	reset_apic();
1931 
1932 	argv++;
1933 	argc--;
1934 
1935 	if (!(cpuid(1).c & (1 << 5))) {
1936 		printf("WARNING: vmx not supported, add '-cpu host'\n");
1937 		goto exit;
1938 	}
1939 	init_vmx();
1940 	if (test_wanted("test_vmx_feature_control", argv, argc)) {
1941 		/* Sets MSR_IA32_FEATURE_CONTROL to 0x5 */
1942 		if (test_vmx_feature_control() != 0)
1943 			goto exit;
1944 	} else {
1945 		if ((rdmsr(MSR_IA32_FEATURE_CONTROL) & 0x5) != 0x5)
1946 			wrmsr(MSR_IA32_FEATURE_CONTROL, 0x5);
1947 	}
1948 
1949 	if (test_wanted("test_vmxon", argv, argc)) {
1950 		/* Enables VMX */
1951 		if (test_vmxon() != 0)
1952 			goto exit;
1953 	} else {
1954 		if (vmx_on()) {
1955 			report("vmxon", 0);
1956 			goto exit;
1957 		}
1958 	}
1959 
1960 	if (test_wanted("test_vmptrld", argv, argc))
1961 		test_vmptrld();
1962 	if (test_wanted("test_vmclear", argv, argc))
1963 		test_vmclear();
1964 	if (test_wanted("test_vmptrst", argv, argc))
1965 		test_vmptrst();
1966 	if (test_wanted("test_vmwrite_vmread", argv, argc))
1967 		test_vmwrite_vmread();
1968 	if (test_wanted("test_vmcs_high", argv, argc))
1969 		test_vmcs_high();
1970 	if (test_wanted("test_vmcs_lifecycle", argv, argc))
1971 		test_vmcs_lifecycle();
1972 	if (test_wanted("test_vmx_caps", argv, argc))
1973 		test_vmx_caps();
1974 
1975 	/* Balance vmxon from test_vmxon. */
1976 	vmx_off();
1977 
1978 	for (; vmx_tests[i].name != NULL; i++) {
1979 		if (!test_wanted(vmx_tests[i].name, argv, argc))
1980 			continue;
1981 		if (test_run(&vmx_tests[i]))
1982 			goto exit;
1983 	}
1984 
1985 	if (!matched)
1986 		report("command line didn't match any tests!", matched);
1987 
1988 exit:
1989 	return report_summary();
1990 }
1991