xref: /kvm-unit-tests/x86/vmx.c (revision 03f37ef20df25a9c90fc9bc9380bb5ebba17bb00)
1 /*
2  * x86/vmx.c : Framework for testing nested virtualization
3  *	This is a framework to test nested VMX for KVM, which
4  * 	started as a project of GSoC 2013. All test cases should
5  *	be located in x86/vmx_tests.c and framework related
6  *	functions should be in this file.
7  *
8  * How to write test cases?
9  *	Add callbacks of test suite in variant "vmx_tests". You can
10  *	write:
11  *		1. init function used for initializing test suite
12  *		2. main function for codes running in L2 guest,
13  *		3. exit_handler to handle vmexit of L2 to L1
14  *		4. syscall handler to handle L2 syscall vmexit
15  *		5. vmenter fail handler to handle direct failure of vmenter
16  *		6. guest_regs is loaded when vmenter and saved when
17  *			vmexit, you can read and set it in exit_handler
18  *	If no special function is needed for a test suite, use
19  *	coressponding basic_* functions as callback. More handlers
20  *	can be added to "vmx_tests", see details of "struct vmx_test"
21  *	and function test_run().
22  *
23  * Currently, vmx test framework only set up one VCPU and one
24  * concurrent guest test environment with same paging for L2 and
25  * L1. For usage of EPT, only 1:1 mapped paging is used from VFN
26  * to PFN.
27  *
28  * Author : Arthur Chunqi Li <yzt356@gmail.com>
29  */
30 
31 #include "libcflat.h"
32 #include "processor.h"
33 #include "vm.h"
34 #include "desc.h"
35 #include "vmx.h"
36 #include "msr.h"
37 #include "smp.h"
38 #include "io.h"
39 
40 u64 *vmxon_region;
41 struct vmcs *vmcs_root;
42 u32 vpid_cnt;
43 void *guest_stack, *guest_syscall_stack;
44 u32 ctrl_pin, ctrl_enter, ctrl_exit, ctrl_cpu[2];
45 struct regs regs;
46 struct vmx_test *current;
47 u64 hypercall_field;
48 bool launched;
49 u64 host_rflags;
50 
51 union vmx_basic basic;
52 union vmx_ctrl_msr ctrl_pin_rev;
53 union vmx_ctrl_msr ctrl_cpu_rev[2];
54 union vmx_ctrl_msr ctrl_exit_rev;
55 union vmx_ctrl_msr ctrl_enter_rev;
56 union vmx_ept_vpid  ept_vpid;
57 
58 extern struct descriptor_table_ptr gdt64_desc;
59 extern struct descriptor_table_ptr idt_descr;
60 extern struct descriptor_table_ptr tss_descr;
61 extern void *vmx_return;
62 extern void *entry_sysenter;
63 extern void *guest_entry;
64 
65 static volatile u32 stage;
66 
67 void vmx_set_test_stage(u32 s)
68 {
69 	barrier();
70 	stage = s;
71 	barrier();
72 }
73 
74 u32 vmx_get_test_stage(void)
75 {
76 	u32 s;
77 
78 	barrier();
79 	s = stage;
80 	barrier();
81 	return s;
82 }
83 
84 void vmx_inc_test_stage(void)
85 {
86 	barrier();
87 	stage++;
88 	barrier();
89 }
90 
91 static int make_vmcs_current(struct vmcs *vmcs)
92 {
93 	bool ret;
94 	u64 rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF;
95 
96 	asm volatile ("push %1; popf; vmptrld %2; setbe %0"
97 		      : "=q" (ret) : "q" (rflags), "m" (vmcs) : "cc");
98 	return ret;
99 }
100 
101 /* entry_sysenter */
102 asm(
103 	".align	4, 0x90\n\t"
104 	".globl	entry_sysenter\n\t"
105 	"entry_sysenter:\n\t"
106 	SAVE_GPR
107 	"	and	$0xf, %rax\n\t"
108 	"	mov	%rax, %rdi\n\t"
109 	"	call	syscall_handler\n\t"
110 	LOAD_GPR
111 	"	vmresume\n\t"
112 );
113 
114 static void __attribute__((__used__)) syscall_handler(u64 syscall_no)
115 {
116 	if (current->syscall_handler)
117 		current->syscall_handler(syscall_no);
118 }
119 
120 static inline int vmx_on()
121 {
122 	bool ret;
123 	u64 rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF;
124 	asm volatile ("push %1; popf; vmxon %2; setbe %0\n\t"
125 		      : "=q" (ret) : "q" (rflags), "m" (vmxon_region) : "cc");
126 	return ret;
127 }
128 
129 static inline int vmx_off()
130 {
131 	bool ret;
132 	u64 rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF;
133 
134 	asm volatile("push %1; popf; vmxoff; setbe %0\n\t"
135 		     : "=q"(ret) : "q" (rflags) : "cc");
136 	return ret;
137 }
138 
139 void print_vmexit_info()
140 {
141 	u64 guest_rip, guest_rsp;
142 	ulong reason = vmcs_read(EXI_REASON) & 0xff;
143 	ulong exit_qual = vmcs_read(EXI_QUALIFICATION);
144 	guest_rip = vmcs_read(GUEST_RIP);
145 	guest_rsp = vmcs_read(GUEST_RSP);
146 	printf("VMEXIT info:\n");
147 	printf("\tvmexit reason = %d\n", reason);
148 	printf("\texit qualification = 0x%x\n", exit_qual);
149 	printf("\tBit 31 of reason = %x\n", (vmcs_read(EXI_REASON) >> 31) & 1);
150 	printf("\tguest_rip = 0x%llx\n", guest_rip);
151 	printf("\tRAX=0x%llx    RBX=0x%llx    RCX=0x%llx    RDX=0x%llx\n",
152 		regs.rax, regs.rbx, regs.rcx, regs.rdx);
153 	printf("\tRSP=0x%llx    RBP=0x%llx    RSI=0x%llx    RDI=0x%llx\n",
154 		guest_rsp, regs.rbp, regs.rsi, regs.rdi);
155 	printf("\tR8 =0x%llx    R9 =0x%llx    R10=0x%llx    R11=0x%llx\n",
156 		regs.r8, regs.r9, regs.r10, regs.r11);
157 	printf("\tR12=0x%llx    R13=0x%llx    R14=0x%llx    R15=0x%llx\n",
158 		regs.r12, regs.r13, regs.r14, regs.r15);
159 }
160 
161 static void test_vmclear(void)
162 {
163 	struct vmcs *tmp_root;
164 	int width = cpuid_maxphyaddr();
165 
166 	/*
167 	 * Note- The tests below do not necessarily have a
168 	 * valid VMCS, but that's ok since the invalid vmcs
169 	 * is only used for a specific test and is discarded
170 	 * without touching its contents
171 	 */
172 
173 	/* Unaligned page access */
174 	tmp_root = (struct vmcs *)((intptr_t)vmcs_root + 1);
175 	report("test vmclear with unaligned vmcs",
176 	       vmcs_clear(tmp_root) == 1);
177 
178 	/* gpa bits beyond physical address width are set*/
179 	tmp_root = (struct vmcs *)((intptr_t)vmcs_root |
180 				   ((u64)1 << (width+1)));
181 	report("test vmclear with vmcs address bits set beyond physical address width",
182 	       vmcs_clear(tmp_root) == 1);
183 
184 	/* Pass VMXON region */
185 	tmp_root = (struct vmcs *)vmxon_region;
186 	report("test vmclear with vmxon region",
187 	       vmcs_clear(tmp_root) == 1);
188 
189 	/* Valid VMCS */
190 	report("test vmclear with valid vmcs region", vmcs_clear(vmcs_root) == 0);
191 
192 }
193 
194 static void test_vmxoff(void)
195 {
196 	int ret;
197 
198 	ret = vmx_off();
199 	report("test vmxoff", !ret);
200 }
201 
202 static void __attribute__((__used__)) guest_main(void)
203 {
204 	current->guest_main();
205 }
206 
207 /* guest_entry */
208 asm(
209 	".align	4, 0x90\n\t"
210 	".globl	entry_guest\n\t"
211 	"guest_entry:\n\t"
212 	"	call guest_main\n\t"
213 	"	mov $1, %edi\n\t"
214 	"	call hypercall\n\t"
215 );
216 
217 /* EPT paging structure related functions */
218 /* install_ept_entry : Install a page to a given level in EPT
219 		@pml4 : addr of pml4 table
220 		@pte_level : level of PTE to set
221 		@guest_addr : physical address of guest
222 		@pte : pte value to set
223 		@pt_page : address of page table, NULL for a new page
224  */
225 void install_ept_entry(unsigned long *pml4,
226 		int pte_level,
227 		unsigned long guest_addr,
228 		unsigned long pte,
229 		unsigned long *pt_page)
230 {
231 	int level;
232 	unsigned long *pt = pml4;
233 	unsigned offset;
234 
235 	for (level = EPT_PAGE_LEVEL; level > pte_level; --level) {
236 		offset = (guest_addr >> ((level-1) * EPT_PGDIR_WIDTH + 12))
237 				& EPT_PGDIR_MASK;
238 		if (!(pt[offset] & (EPT_PRESENT))) {
239 			unsigned long *new_pt = pt_page;
240 			if (!new_pt)
241 				new_pt = alloc_page();
242 			else
243 				pt_page = 0;
244 			memset(new_pt, 0, PAGE_SIZE);
245 			pt[offset] = virt_to_phys(new_pt)
246 					| EPT_RA | EPT_WA | EPT_EA;
247 		} else
248 			pt[offset] &= ~EPT_LARGE_PAGE;
249 		pt = phys_to_virt(pt[offset] & 0xffffffffff000ull);
250 	}
251 	offset = ((unsigned long)guest_addr >> ((level-1) *
252 			EPT_PGDIR_WIDTH + 12)) & EPT_PGDIR_MASK;
253 	pt[offset] = pte;
254 }
255 
256 /* Map a page, @perm is the permission of the page */
257 void install_ept(unsigned long *pml4,
258 		unsigned long phys,
259 		unsigned long guest_addr,
260 		u64 perm)
261 {
262 	install_ept_entry(pml4, 1, guest_addr, (phys & PAGE_MASK) | perm, 0);
263 }
264 
265 /* Map a 1G-size page */
266 void install_1g_ept(unsigned long *pml4,
267 		unsigned long phys,
268 		unsigned long guest_addr,
269 		u64 perm)
270 {
271 	install_ept_entry(pml4, 3, guest_addr,
272 			(phys & PAGE_MASK) | perm | EPT_LARGE_PAGE, 0);
273 }
274 
275 /* Map a 2M-size page */
276 void install_2m_ept(unsigned long *pml4,
277 		unsigned long phys,
278 		unsigned long guest_addr,
279 		u64 perm)
280 {
281 	install_ept_entry(pml4, 2, guest_addr,
282 			(phys & PAGE_MASK) | perm | EPT_LARGE_PAGE, 0);
283 }
284 
285 /* setup_ept_range : Setup a range of 1:1 mapped page to EPT paging structure.
286 		@start : start address of guest page
287 		@len : length of address to be mapped
288 		@map_1g : whether 1G page map is used
289 		@map_2m : whether 2M page map is used
290 		@perm : permission for every page
291  */
292 void setup_ept_range(unsigned long *pml4, unsigned long start,
293 		     unsigned long len, int map_1g, int map_2m, u64 perm)
294 {
295 	u64 phys = start;
296 	u64 max = (u64)len + (u64)start;
297 
298 	if (map_1g) {
299 		while (phys + PAGE_SIZE_1G <= max) {
300 			install_1g_ept(pml4, phys, phys, perm);
301 			phys += PAGE_SIZE_1G;
302 		}
303 	}
304 	if (map_2m) {
305 		while (phys + PAGE_SIZE_2M <= max) {
306 			install_2m_ept(pml4, phys, phys, perm);
307 			phys += PAGE_SIZE_2M;
308 		}
309 	}
310 	while (phys + PAGE_SIZE <= max) {
311 		install_ept(pml4, phys, phys, perm);
312 		phys += PAGE_SIZE;
313 	}
314 }
315 
316 /* get_ept_pte : Get the PTE of a given level in EPT,
317     @level == 1 means get the latest level*/
318 unsigned long get_ept_pte(unsigned long *pml4,
319 		unsigned long guest_addr, int level)
320 {
321 	int l;
322 	unsigned long *pt = pml4, pte;
323 	unsigned offset;
324 
325 	if (level < 1 || level > 3)
326 		return -1;
327 	for (l = EPT_PAGE_LEVEL; ; --l) {
328 		offset = (guest_addr >> (((l-1) * EPT_PGDIR_WIDTH) + 12))
329 				& EPT_PGDIR_MASK;
330 		pte = pt[offset];
331 		if (!(pte & (EPT_PRESENT)))
332 			return 0;
333 		if (l == level)
334 			break;
335 		if (l < 4 && (pte & EPT_LARGE_PAGE))
336 			return pte;
337 		pt = (unsigned long *)(pte & 0xffffffffff000ull);
338 	}
339 	offset = (guest_addr >> (((l-1) * EPT_PGDIR_WIDTH) + 12))
340 			& EPT_PGDIR_MASK;
341 	pte = pt[offset];
342 	return pte;
343 }
344 
345 void ept_sync(int type, u64 eptp)
346 {
347 	switch (type) {
348 	case INVEPT_SINGLE:
349 		if (ept_vpid.val & EPT_CAP_INVEPT_SINGLE) {
350 			invept(INVEPT_SINGLE, eptp);
351 			break;
352 		}
353 		/* else fall through */
354 	case INVEPT_GLOBAL:
355 		if (ept_vpid.val & EPT_CAP_INVEPT_ALL) {
356 			invept(INVEPT_GLOBAL, eptp);
357 			break;
358 		}
359 		/* else fall through */
360 	default:
361 		printf("WARNING: invept is not supported!\n");
362 	}
363 }
364 
365 int set_ept_pte(unsigned long *pml4, unsigned long guest_addr,
366 		int level, u64 pte_val)
367 {
368 	int l;
369 	unsigned long *pt = pml4;
370 	unsigned offset;
371 
372 	if (level < 1 || level > 3)
373 		return -1;
374 	for (l = EPT_PAGE_LEVEL; ; --l) {
375 		offset = (guest_addr >> (((l-1) * EPT_PGDIR_WIDTH) + 12))
376 				& EPT_PGDIR_MASK;
377 		if (l == level)
378 			break;
379 		if (!(pt[offset] & (EPT_PRESENT)))
380 			return -1;
381 		pt = (unsigned long *)(pt[offset] & 0xffffffffff000ull);
382 	}
383 	offset = (guest_addr >> (((l-1) * EPT_PGDIR_WIDTH) + 12))
384 			& EPT_PGDIR_MASK;
385 	pt[offset] = pte_val;
386 	return 0;
387 }
388 
389 void vpid_sync(int type, u16 vpid)
390 {
391 	switch(type) {
392 	case INVVPID_SINGLE:
393 		if (ept_vpid.val & VPID_CAP_INVVPID_SINGLE) {
394 			invvpid(INVVPID_SINGLE, vpid, 0);
395 			break;
396 		}
397 	case INVVPID_ALL:
398 		if (ept_vpid.val & VPID_CAP_INVVPID_ALL) {
399 			invvpid(INVVPID_ALL, vpid, 0);
400 			break;
401 		}
402 	default:
403 		printf("WARNING: invvpid is not supported\n");
404 	}
405 }
406 
407 static void init_vmcs_ctrl(void)
408 {
409 	/* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */
410 	/* 26.2.1.1 */
411 	vmcs_write(PIN_CONTROLS, ctrl_pin);
412 	/* Disable VMEXIT of IO instruction */
413 	vmcs_write(CPU_EXEC_CTRL0, ctrl_cpu[0]);
414 	if (ctrl_cpu_rev[0].set & CPU_SECONDARY) {
415 		ctrl_cpu[1] = (ctrl_cpu[1] | ctrl_cpu_rev[1].set) &
416 			ctrl_cpu_rev[1].clr;
417 		vmcs_write(CPU_EXEC_CTRL1, ctrl_cpu[1]);
418 	}
419 	vmcs_write(CR3_TARGET_COUNT, 0);
420 	vmcs_write(VPID, ++vpid_cnt);
421 }
422 
423 static void init_vmcs_host(void)
424 {
425 	/* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */
426 	/* 26.2.1.2 */
427 	vmcs_write(HOST_EFER, rdmsr(MSR_EFER));
428 
429 	/* 26.2.1.3 */
430 	vmcs_write(ENT_CONTROLS, ctrl_enter);
431 	vmcs_write(EXI_CONTROLS, ctrl_exit);
432 
433 	/* 26.2.2 */
434 	vmcs_write(HOST_CR0, read_cr0());
435 	vmcs_write(HOST_CR3, read_cr3());
436 	vmcs_write(HOST_CR4, read_cr4());
437 	vmcs_write(HOST_SYSENTER_EIP, (u64)(&entry_sysenter));
438 	vmcs_write(HOST_SYSENTER_CS,  KERNEL_CS);
439 
440 	/* 26.2.3 */
441 	vmcs_write(HOST_SEL_CS, KERNEL_CS);
442 	vmcs_write(HOST_SEL_SS, KERNEL_DS);
443 	vmcs_write(HOST_SEL_DS, KERNEL_DS);
444 	vmcs_write(HOST_SEL_ES, KERNEL_DS);
445 	vmcs_write(HOST_SEL_FS, KERNEL_DS);
446 	vmcs_write(HOST_SEL_GS, KERNEL_DS);
447 	vmcs_write(HOST_SEL_TR, TSS_MAIN);
448 	vmcs_write(HOST_BASE_TR, tss_descr.base);
449 	vmcs_write(HOST_BASE_GDTR, gdt64_desc.base);
450 	vmcs_write(HOST_BASE_IDTR, idt_descr.base);
451 	vmcs_write(HOST_BASE_FS, 0);
452 	vmcs_write(HOST_BASE_GS, 0);
453 
454 	/* Set other vmcs area */
455 	vmcs_write(PF_ERROR_MASK, 0);
456 	vmcs_write(PF_ERROR_MATCH, 0);
457 	vmcs_write(VMCS_LINK_PTR, ~0ul);
458 	vmcs_write(VMCS_LINK_PTR_HI, ~0ul);
459 	vmcs_write(HOST_RIP, (u64)(&vmx_return));
460 }
461 
462 static void init_vmcs_guest(void)
463 {
464 	/* 26.3 CHECKING AND LOADING GUEST STATE */
465 	ulong guest_cr0, guest_cr4, guest_cr3;
466 	/* 26.3.1.1 */
467 	guest_cr0 = read_cr0();
468 	guest_cr4 = read_cr4();
469 	guest_cr3 = read_cr3();
470 	if (ctrl_enter & ENT_GUEST_64) {
471 		guest_cr0 |= X86_CR0_PG;
472 		guest_cr4 |= X86_CR4_PAE;
473 	}
474 	if ((ctrl_enter & ENT_GUEST_64) == 0)
475 		guest_cr4 &= (~X86_CR4_PCIDE);
476 	if (guest_cr0 & X86_CR0_PG)
477 		guest_cr0 |= X86_CR0_PE;
478 	vmcs_write(GUEST_CR0, guest_cr0);
479 	vmcs_write(GUEST_CR3, guest_cr3);
480 	vmcs_write(GUEST_CR4, guest_cr4);
481 	vmcs_write(GUEST_SYSENTER_CS,  KERNEL_CS);
482 	vmcs_write(GUEST_SYSENTER_ESP,
483 		(u64)(guest_syscall_stack + PAGE_SIZE - 1));
484 	vmcs_write(GUEST_SYSENTER_EIP, (u64)(&entry_sysenter));
485 	vmcs_write(GUEST_DR7, 0);
486 	vmcs_write(GUEST_EFER, rdmsr(MSR_EFER));
487 
488 	/* 26.3.1.2 */
489 	vmcs_write(GUEST_SEL_CS, KERNEL_CS);
490 	vmcs_write(GUEST_SEL_SS, KERNEL_DS);
491 	vmcs_write(GUEST_SEL_DS, KERNEL_DS);
492 	vmcs_write(GUEST_SEL_ES, KERNEL_DS);
493 	vmcs_write(GUEST_SEL_FS, KERNEL_DS);
494 	vmcs_write(GUEST_SEL_GS, KERNEL_DS);
495 	vmcs_write(GUEST_SEL_TR, TSS_MAIN);
496 	vmcs_write(GUEST_SEL_LDTR, 0);
497 
498 	vmcs_write(GUEST_BASE_CS, 0);
499 	vmcs_write(GUEST_BASE_ES, 0);
500 	vmcs_write(GUEST_BASE_SS, 0);
501 	vmcs_write(GUEST_BASE_DS, 0);
502 	vmcs_write(GUEST_BASE_FS, 0);
503 	vmcs_write(GUEST_BASE_GS, 0);
504 	vmcs_write(GUEST_BASE_TR, tss_descr.base);
505 	vmcs_write(GUEST_BASE_LDTR, 0);
506 
507 	vmcs_write(GUEST_LIMIT_CS, 0xFFFFFFFF);
508 	vmcs_write(GUEST_LIMIT_DS, 0xFFFFFFFF);
509 	vmcs_write(GUEST_LIMIT_ES, 0xFFFFFFFF);
510 	vmcs_write(GUEST_LIMIT_SS, 0xFFFFFFFF);
511 	vmcs_write(GUEST_LIMIT_FS, 0xFFFFFFFF);
512 	vmcs_write(GUEST_LIMIT_GS, 0xFFFFFFFF);
513 	vmcs_write(GUEST_LIMIT_LDTR, 0xffff);
514 	vmcs_write(GUEST_LIMIT_TR, tss_descr.limit);
515 
516 	vmcs_write(GUEST_AR_CS, 0xa09b);
517 	vmcs_write(GUEST_AR_DS, 0xc093);
518 	vmcs_write(GUEST_AR_ES, 0xc093);
519 	vmcs_write(GUEST_AR_FS, 0xc093);
520 	vmcs_write(GUEST_AR_GS, 0xc093);
521 	vmcs_write(GUEST_AR_SS, 0xc093);
522 	vmcs_write(GUEST_AR_LDTR, 0x82);
523 	vmcs_write(GUEST_AR_TR, 0x8b);
524 
525 	/* 26.3.1.3 */
526 	vmcs_write(GUEST_BASE_GDTR, gdt64_desc.base);
527 	vmcs_write(GUEST_BASE_IDTR, idt_descr.base);
528 	vmcs_write(GUEST_LIMIT_GDTR, gdt64_desc.limit);
529 	vmcs_write(GUEST_LIMIT_IDTR, idt_descr.limit);
530 
531 	/* 26.3.1.4 */
532 	vmcs_write(GUEST_RIP, (u64)(&guest_entry));
533 	vmcs_write(GUEST_RSP, (u64)(guest_stack + PAGE_SIZE - 1));
534 	vmcs_write(GUEST_RFLAGS, 0x2);
535 
536 	/* 26.3.1.5 */
537 	vmcs_write(GUEST_ACTV_STATE, ACTV_ACTIVE);
538 	vmcs_write(GUEST_INTR_STATE, 0);
539 }
540 
541 static int init_vmcs(struct vmcs **vmcs)
542 {
543 	*vmcs = alloc_page();
544 	memset(*vmcs, 0, PAGE_SIZE);
545 	(*vmcs)->revision_id = basic.revision;
546 	/* vmclear first to init vmcs */
547 	if (vmcs_clear(*vmcs)) {
548 		printf("%s : vmcs_clear error\n", __func__);
549 		return 1;
550 	}
551 
552 	if (make_vmcs_current(*vmcs)) {
553 		printf("%s : make_vmcs_current error\n", __func__);
554 		return 1;
555 	}
556 
557 	/* All settings to pin/exit/enter/cpu
558 	   control fields should be placed here */
559 	ctrl_pin |= PIN_EXTINT | PIN_NMI | PIN_VIRT_NMI;
560 	ctrl_exit = EXI_LOAD_EFER | EXI_HOST_64;
561 	ctrl_enter = (ENT_LOAD_EFER | ENT_GUEST_64);
562 	/* DIsable IO instruction VMEXIT now */
563 	ctrl_cpu[0] &= (~(CPU_IO | CPU_IO_BITMAP));
564 	ctrl_cpu[1] = 0;
565 
566 	ctrl_pin = (ctrl_pin | ctrl_pin_rev.set) & ctrl_pin_rev.clr;
567 	ctrl_enter = (ctrl_enter | ctrl_enter_rev.set) & ctrl_enter_rev.clr;
568 	ctrl_exit = (ctrl_exit | ctrl_exit_rev.set) & ctrl_exit_rev.clr;
569 	ctrl_cpu[0] = (ctrl_cpu[0] | ctrl_cpu_rev[0].set) & ctrl_cpu_rev[0].clr;
570 
571 	init_vmcs_ctrl();
572 	init_vmcs_host();
573 	init_vmcs_guest();
574 	return 0;
575 }
576 
577 static void init_vmx(void)
578 {
579 	ulong fix_cr0_set, fix_cr0_clr;
580 	ulong fix_cr4_set, fix_cr4_clr;
581 
582 	vmxon_region = alloc_page();
583 	memset(vmxon_region, 0, PAGE_SIZE);
584 
585 	fix_cr0_set =  rdmsr(MSR_IA32_VMX_CR0_FIXED0);
586 	fix_cr0_clr =  rdmsr(MSR_IA32_VMX_CR0_FIXED1);
587 	fix_cr4_set =  rdmsr(MSR_IA32_VMX_CR4_FIXED0);
588 	fix_cr4_clr = rdmsr(MSR_IA32_VMX_CR4_FIXED1);
589 	basic.val = rdmsr(MSR_IA32_VMX_BASIC);
590 	ctrl_pin_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PIN
591 			: MSR_IA32_VMX_PINBASED_CTLS);
592 	ctrl_exit_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_EXIT
593 			: MSR_IA32_VMX_EXIT_CTLS);
594 	ctrl_enter_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_ENTRY
595 			: MSR_IA32_VMX_ENTRY_CTLS);
596 	ctrl_cpu_rev[0].val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PROC
597 			: MSR_IA32_VMX_PROCBASED_CTLS);
598 	if ((ctrl_cpu_rev[0].clr & CPU_SECONDARY) != 0)
599 		ctrl_cpu_rev[1].val = rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2);
600 	else
601 		ctrl_cpu_rev[1].val = 0;
602 	if ((ctrl_cpu_rev[1].clr & (CPU_EPT | CPU_VPID)) != 0)
603 		ept_vpid.val = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP);
604 	else
605 		ept_vpid.val = 0;
606 
607 	write_cr0((read_cr0() & fix_cr0_clr) | fix_cr0_set);
608 	write_cr4((read_cr4() & fix_cr4_clr) | fix_cr4_set | X86_CR4_VMXE);
609 
610 	*vmxon_region = basic.revision;
611 
612 	guest_stack = alloc_page();
613 	memset(guest_stack, 0, PAGE_SIZE);
614 	guest_syscall_stack = alloc_page();
615 	memset(guest_syscall_stack, 0, PAGE_SIZE);
616 }
617 
618 static void do_vmxon_off(void *data)
619 {
620 	jmp_buf jmpbuf;
621 	if (set_exception_jmpbuf(jmpbuf) == 0) {
622 		vmx_on();
623 		vmx_off();
624 	}
625 }
626 
627 static void do_write_feature_control(void *data)
628 {
629 	jmp_buf jmpbuf;
630 	if (set_exception_jmpbuf(jmpbuf) == 0) {
631 		wrmsr(MSR_IA32_FEATURE_CONTROL, 0);
632 	}
633 }
634 
635 static int test_vmx_feature_control(void)
636 {
637 	u64 ia32_feature_control;
638 	bool vmx_enabled;
639 
640 	ia32_feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL);
641 	vmx_enabled = ((ia32_feature_control & 0x5) == 0x5);
642 	if ((ia32_feature_control & 0x5) == 0x5) {
643 		printf("VMX enabled and locked by BIOS\n");
644 		return 0;
645 	} else if (ia32_feature_control & 0x1) {
646 		printf("ERROR: VMX locked out by BIOS!?\n");
647 		return 1;
648 	}
649 
650 	wrmsr(MSR_IA32_FEATURE_CONTROL, 0);
651 	report("test vmxon with FEATURE_CONTROL cleared",
652 	       test_for_exception(GP_VECTOR, &do_vmxon_off, NULL));
653 
654 	wrmsr(MSR_IA32_FEATURE_CONTROL, 0x4);
655 	report("test vmxon without FEATURE_CONTROL lock",
656 	       test_for_exception(GP_VECTOR, &do_vmxon_off, NULL));
657 
658 	wrmsr(MSR_IA32_FEATURE_CONTROL, 0x5);
659 	vmx_enabled = ((rdmsr(MSR_IA32_FEATURE_CONTROL) & 0x5) == 0x5);
660 	report("test enable VMX in FEATURE_CONTROL", vmx_enabled);
661 
662 	report("test FEATURE_CONTROL lock bit",
663 	       test_for_exception(GP_VECTOR, &do_write_feature_control, NULL));
664 
665 	return !vmx_enabled;
666 }
667 
668 static int test_vmxon(void)
669 {
670 	int ret, ret1;
671 	u64 *tmp_region = vmxon_region;
672 	int width = cpuid_maxphyaddr();
673 
674 	/* Unaligned page access */
675 	vmxon_region = (u64 *)((intptr_t)vmxon_region + 1);
676 	ret1 = vmx_on();
677 	report("test vmxon with unaligned vmxon region", ret1);
678 	if (!ret1) {
679 		ret = 1;
680 		goto out;
681 	}
682 
683 	/* gpa bits beyond physical address width are set*/
684 	vmxon_region = (u64 *)((intptr_t)tmp_region | ((u64)1 << (width+1)));
685 	ret1 = vmx_on();
686 	report("test vmxon with bits set beyond physical address width", ret1);
687 	if (!ret1) {
688 		ret = 1;
689 		goto out;
690 	}
691 
692 	/* invalid revision indentifier */
693 	vmxon_region = tmp_region;
694 	*vmxon_region = 0xba9da9;
695 	ret1 = vmx_on();
696 	report("test vmxon with invalid revision identifier", ret1);
697 	if (!ret1) {
698 		ret = 1;
699 		goto out;
700 	}
701 
702 	/* and finally a valid region */
703 	*vmxon_region = basic.revision;
704 	ret = vmx_on();
705 	report("test vmxon with valid vmxon region", !ret);
706 
707 out:
708 	return ret;
709 }
710 
711 static void test_vmptrld(void)
712 {
713 	struct vmcs *vmcs, *tmp_root;
714 	int width = cpuid_maxphyaddr();
715 
716 	vmcs = alloc_page();
717 	vmcs->revision_id = basic.revision;
718 
719 	/* Unaligned page access */
720 	tmp_root = (struct vmcs *)((intptr_t)vmcs + 1);
721 	report("test vmptrld with unaligned vmcs",
722 	       make_vmcs_current(tmp_root) == 1);
723 
724 	/* gpa bits beyond physical address width are set*/
725 	tmp_root = (struct vmcs *)((intptr_t)vmcs |
726 				   ((u64)1 << (width+1)));
727 	report("test vmptrld with vmcs address bits set beyond physical address width",
728 	       make_vmcs_current(tmp_root) == 1);
729 
730 	/* Pass VMXON region */
731 	tmp_root = (struct vmcs *)vmxon_region;
732 	report("test vmptrld with vmxon region",
733 	       make_vmcs_current(tmp_root) == 1);
734 
735 	report("test vmptrld with valid vmcs region", make_vmcs_current(vmcs) == 0);
736 }
737 
738 static void test_vmptrst(void)
739 {
740 	int ret;
741 	struct vmcs *vmcs1, *vmcs2;
742 
743 	vmcs1 = alloc_page();
744 	memset(vmcs1, 0, PAGE_SIZE);
745 	init_vmcs(&vmcs1);
746 	ret = vmcs_save(&vmcs2);
747 	report("test vmptrst", (!ret) && (vmcs1 == vmcs2));
748 }
749 
750 struct vmx_ctl_msr {
751 	const char *name;
752 	u32 index, true_index;
753 	u32 default1;
754 } vmx_ctl_msr[] = {
755 	{ "MSR_IA32_VMX_PINBASED_CTLS", MSR_IA32_VMX_PINBASED_CTLS,
756 	  MSR_IA32_VMX_TRUE_PIN, 0x16 },
757 	{ "MSR_IA32_VMX_PROCBASED_CTLS", MSR_IA32_VMX_PROCBASED_CTLS,
758 	  MSR_IA32_VMX_TRUE_PROC, 0x401e172 },
759 	{ "MSR_IA32_VMX_PROCBASED_CTLS2", MSR_IA32_VMX_PROCBASED_CTLS2,
760 	  MSR_IA32_VMX_PROCBASED_CTLS2, 0 },
761 	{ "MSR_IA32_VMX_EXIT_CTLS", MSR_IA32_VMX_EXIT_CTLS,
762 	  MSR_IA32_VMX_TRUE_EXIT, 0x36dff },
763 	{ "MSR_IA32_VMX_ENTRY_CTLS", MSR_IA32_VMX_ENTRY_CTLS,
764 	  MSR_IA32_VMX_TRUE_ENTRY, 0x11ff },
765 };
766 
767 static void test_vmx_caps(void)
768 {
769 	u64 val, default1, fixed0, fixed1;
770 	union vmx_ctrl_msr ctrl, true_ctrl;
771 	unsigned int n;
772 	bool ok;
773 
774 	printf("\nTest suite: VMX capability reporting\n");
775 
776 	report("MSR_IA32_VMX_BASIC",
777 	       (basic.revision & (1ul << 31)) == 0 &&
778 	       basic.size > 0 && basic.size <= 4096 &&
779 	       (basic.type == 0 || basic.type == 6) &&
780 	       basic.reserved1 == 0 && basic.reserved2 == 0);
781 
782 	val = rdmsr(MSR_IA32_VMX_MISC);
783 	report("MSR_IA32_VMX_MISC",
784 	       (!(ctrl_cpu_rev[1].clr & CPU_URG) || val & (1ul << 5)) &&
785 	       ((val >> 16) & 0x1ff) <= 256 &&
786 	       (val & 0xc0007e00) == 0);
787 
788 	for (n = 0; n < ARRAY_SIZE(vmx_ctl_msr); n++) {
789 		ctrl.val = rdmsr(vmx_ctl_msr[n].index);
790 		default1 = vmx_ctl_msr[n].default1;
791 		ok = (ctrl.set & default1) == default1;
792 		ok = ok && (ctrl.set & ~ctrl.clr) == 0;
793 		if (ok && basic.ctrl) {
794 			true_ctrl.val = rdmsr(vmx_ctl_msr[n].true_index);
795 			ok = ctrl.clr == true_ctrl.clr;
796 			ok = ok && ctrl.set == (true_ctrl.set | default1);
797 		}
798 		report(vmx_ctl_msr[n].name, ok);
799 	}
800 
801 	fixed0 = rdmsr(MSR_IA32_VMX_CR0_FIXED0);
802 	fixed1 = rdmsr(MSR_IA32_VMX_CR0_FIXED1);
803 	report("MSR_IA32_VMX_IA32_VMX_CR0_FIXED0/1",
804 	       ((fixed0 ^ fixed1) & ~fixed1) == 0);
805 
806 	fixed0 = rdmsr(MSR_IA32_VMX_CR4_FIXED0);
807 	fixed1 = rdmsr(MSR_IA32_VMX_CR4_FIXED1);
808 	report("MSR_IA32_VMX_IA32_VMX_CR4_FIXED0/1",
809 	       ((fixed0 ^ fixed1) & ~fixed1) == 0);
810 
811 	val = rdmsr(MSR_IA32_VMX_VMCS_ENUM);
812 	report("MSR_IA32_VMX_VMCS_ENUM",
813 	       (val & 0x3e) >= 0x2a &&
814 	       (val & 0xfffffffffffffc01Ull) == 0);
815 
816 	val = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP);
817 	report("MSR_IA32_VMX_EPT_VPID_CAP",
818 	       (val & 0xfffff07ef9eebebeUll) == 0);
819 }
820 
821 /* This function can only be called in guest */
822 static void __attribute__((__used__)) hypercall(u32 hypercall_no)
823 {
824 	u64 val = 0;
825 	val = (hypercall_no & HYPERCALL_MASK) | HYPERCALL_BIT;
826 	hypercall_field = val;
827 	asm volatile("vmcall\n\t");
828 }
829 
830 static bool is_hypercall()
831 {
832 	ulong reason, hyper_bit;
833 
834 	reason = vmcs_read(EXI_REASON) & 0xff;
835 	hyper_bit = hypercall_field & HYPERCALL_BIT;
836 	if (reason == VMX_VMCALL && hyper_bit)
837 		return true;
838 	return false;
839 }
840 
841 static int handle_hypercall()
842 {
843 	ulong hypercall_no;
844 
845 	hypercall_no = hypercall_field & HYPERCALL_MASK;
846 	hypercall_field = 0;
847 	switch (hypercall_no) {
848 	case HYPERCALL_VMEXIT:
849 		return VMX_TEST_VMEXIT;
850 	default:
851 		printf("ERROR : Invalid hypercall number : %d\n", hypercall_no);
852 	}
853 	return VMX_TEST_EXIT;
854 }
855 
856 static int exit_handler()
857 {
858 	int ret;
859 
860 	current->exits++;
861 	regs.rflags = vmcs_read(GUEST_RFLAGS);
862 	if (is_hypercall())
863 		ret = handle_hypercall();
864 	else
865 		ret = current->exit_handler();
866 	vmcs_write(GUEST_RFLAGS, regs.rflags);
867 	switch (ret) {
868 	case VMX_TEST_VMEXIT:
869 	case VMX_TEST_RESUME:
870 		return ret;
871 	case VMX_TEST_EXIT:
872 		break;
873 	default:
874 		printf("ERROR : Invalid exit_handler return val %d.\n"
875 			, ret);
876 	}
877 	print_vmexit_info();
878 	exit(-1);
879 	return 0;
880 }
881 
882 static int vmx_run()
883 {
884 	u32 ret = 0, fail = 0;
885 
886 	while (1) {
887 		asm volatile (
888 			"mov %%rsp, %%rsi\n\t"
889 			"mov %2, %%rdi\n\t"
890 			"vmwrite %%rsi, %%rdi\n\t"
891 
892 			LOAD_GPR_C
893 			"cmpl $0, %1\n\t"
894 			"jne 1f\n\t"
895 			LOAD_RFLAGS
896 			"vmlaunch\n\t"
897 			"jmp 2f\n\t"
898 			"1: "
899 			"vmresume\n\t"
900 			"2: "
901 			"setbe %0\n\t"
902 			"vmx_return:\n\t"
903 			SAVE_GPR_C
904 			SAVE_RFLAGS
905 			: "=m"(fail)
906 			: "m"(launched), "i"(HOST_RSP)
907 			: "rdi", "rsi", "memory", "cc"
908 
909 		);
910 		if (fail)
911 			ret = launched ? VMX_TEST_RESUME_ERR :
912 				VMX_TEST_LAUNCH_ERR;
913 		else {
914 			launched = 1;
915 			ret = exit_handler();
916 		}
917 		if (ret != VMX_TEST_RESUME)
918 			break;
919 	}
920 	launched = 0;
921 	switch (ret) {
922 	case VMX_TEST_VMEXIT:
923 		return 0;
924 	case VMX_TEST_LAUNCH_ERR:
925 		printf("%s : vmlaunch failed.\n", __func__);
926 		if ((!(host_rflags & X86_EFLAGS_CF) && !(host_rflags & X86_EFLAGS_ZF))
927 			|| ((host_rflags & X86_EFLAGS_CF) && (host_rflags & X86_EFLAGS_ZF)))
928 			printf("\tvmlaunch set wrong flags\n");
929 		report("test vmlaunch", 0);
930 		break;
931 	case VMX_TEST_RESUME_ERR:
932 		printf("%s : vmresume failed.\n", __func__);
933 		if ((!(host_rflags & X86_EFLAGS_CF) && !(host_rflags & X86_EFLAGS_ZF))
934 			|| ((host_rflags & X86_EFLAGS_CF) && (host_rflags & X86_EFLAGS_ZF)))
935 			printf("\tvmresume set wrong flags\n");
936 		report("test vmresume", 0);
937 		break;
938 	default:
939 		printf("%s : unhandled ret from exit_handler, ret=%d.\n", __func__, ret);
940 		break;
941 	}
942 	return 1;
943 }
944 
945 static int test_run(struct vmx_test *test)
946 {
947 	if (test->name == NULL)
948 		test->name = "(no name)";
949 	if (vmx_on()) {
950 		printf("%s : vmxon failed.\n", __func__);
951 		return 1;
952 	}
953 	init_vmcs(&(test->vmcs));
954 	/* Directly call test->init is ok here, init_vmcs has done
955 	   vmcs init, vmclear and vmptrld*/
956 	if (test->init && test->init(test->vmcs) != VMX_TEST_START)
957 		goto out;
958 	test->exits = 0;
959 	current = test;
960 	regs = test->guest_regs;
961 	vmcs_write(GUEST_RFLAGS, regs.rflags | 0x2);
962 	launched = 0;
963 	printf("\nTest suite: %s\n", test->name);
964 	vmx_run();
965 out:
966 	if (vmx_off()) {
967 		printf("%s : vmxoff failed.\n", __func__);
968 		return 1;
969 	}
970 	return 0;
971 }
972 
973 extern struct vmx_test vmx_tests[];
974 
975 int main(void)
976 {
977 	int i = 0;
978 
979 	setup_vm();
980 	setup_idt();
981 	hypercall_field = 0;
982 
983 	if (!(cpuid(1).c & (1 << 5))) {
984 		printf("WARNING: vmx not supported, add '-cpu host'\n");
985 		goto exit;
986 	}
987 	init_vmx();
988 	if (test_vmx_feature_control() != 0)
989 		goto exit;
990 	/* Set basic test ctxt the same as "null" */
991 	current = &vmx_tests[0];
992 	if (test_vmxon() != 0)
993 		goto exit;
994 	test_vmptrld();
995 	test_vmclear();
996 	test_vmptrst();
997 	init_vmcs(&vmcs_root);
998 	if (vmx_run()) {
999 		report("test vmlaunch", 0);
1000 		goto exit;
1001 	}
1002 	test_vmxoff();
1003 	test_vmx_caps();
1004 
1005 	while (vmx_tests[++i].name != NULL)
1006 		if (test_run(&vmx_tests[i]))
1007 			goto exit;
1008 
1009 exit:
1010 	return report_summary();
1011 }
1012