xref: /kvm-unit-tests/x86/vmx.c (revision a2b7c4999fec95bc48e27d9ed48627a47216fc98)
1 /*
2  * x86/vmx.c : Framework for testing nested virtualization
3  *	This is a framework to test nested VMX for KVM, which
4  * 	started as a project of GSoC 2013. All test cases should
5  *	be located in x86/vmx_tests.c and framework related
6  *	functions should be in this file.
7  *
8  * How to write test cases?
9  *	Add callbacks of test suite in variant "vmx_tests". You can
10  *	write:
11  *		1. init function used for initializing test suite
12  *		2. main function for codes running in L2 guest,
13  *		3. exit_handler to handle vmexit of L2 to L1
14  *		4. syscall handler to handle L2 syscall vmexit
15  *		5. vmenter fail handler to handle direct failure of vmenter
16  *		6. guest_regs is loaded when vmenter and saved when
17  *			vmexit, you can read and set it in exit_handler
18  *	If no special function is needed for a test suite, use
19  *	coressponding basic_* functions as callback. More handlers
20  *	can be added to "vmx_tests", see details of "struct vmx_test"
21  *	and function test_run().
22  *
23  * Currently, vmx test framework only set up one VCPU and one
24  * concurrent guest test environment with same paging for L2 and
25  * L1. For usage of EPT, only 1:1 mapped paging is used from VFN
26  * to PFN.
27  *
28  * Author : Arthur Chunqi Li <yzt356@gmail.com>
29  */
30 
31 #include "libcflat.h"
32 #include "processor.h"
33 #include "vm.h"
34 #include "desc.h"
35 #include "vmx.h"
36 #include "msr.h"
37 #include "smp.h"
38 #include "io.h"
39 
40 u64 *vmxon_region;
41 struct vmcs *vmcs_root;
42 u32 vpid_cnt;
43 void *guest_stack, *guest_syscall_stack;
44 u32 ctrl_pin, ctrl_enter, ctrl_exit, ctrl_cpu[2];
45 struct regs regs;
46 struct vmx_test *current;
47 u64 hypercall_field;
48 bool launched;
49 u64 host_rflags;
50 
51 union vmx_basic basic;
52 union vmx_ctrl_msr ctrl_pin_rev;
53 union vmx_ctrl_msr ctrl_cpu_rev[2];
54 union vmx_ctrl_msr ctrl_exit_rev;
55 union vmx_ctrl_msr ctrl_enter_rev;
56 union vmx_ept_vpid  ept_vpid;
57 
58 extern struct descriptor_table_ptr gdt64_desc;
59 extern struct descriptor_table_ptr idt_descr;
60 extern struct descriptor_table_ptr tss_descr;
61 extern void *vmx_return;
62 extern void *entry_sysenter;
63 extern void *guest_entry;
64 
65 static volatile u32 stage;
66 
67 void vmx_set_test_stage(u32 s)
68 {
69 	barrier();
70 	stage = s;
71 	barrier();
72 }
73 
74 u32 vmx_get_test_stage(void)
75 {
76 	u32 s;
77 
78 	barrier();
79 	s = stage;
80 	barrier();
81 	return s;
82 }
83 
84 void vmx_inc_test_stage(void)
85 {
86 	barrier();
87 	stage++;
88 	barrier();
89 }
90 
91 static int make_vmcs_current(struct vmcs *vmcs)
92 {
93 	bool ret;
94 	u64 rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF;
95 
96 	asm volatile ("push %1; popf; vmptrld %2; setbe %0"
97 		      : "=q" (ret) : "q" (rflags), "m" (vmcs) : "cc");
98 	return ret;
99 }
100 
101 /* entry_sysenter */
102 asm(
103 	".align	4, 0x90\n\t"
104 	".globl	entry_sysenter\n\t"
105 	"entry_sysenter:\n\t"
106 	SAVE_GPR
107 	"	and	$0xf, %rax\n\t"
108 	"	mov	%rax, %rdi\n\t"
109 	"	call	syscall_handler\n\t"
110 	LOAD_GPR
111 	"	vmresume\n\t"
112 );
113 
114 static void __attribute__((__used__)) syscall_handler(u64 syscall_no)
115 {
116 	if (current->syscall_handler)
117 		current->syscall_handler(syscall_no);
118 }
119 
120 static inline int vmx_on()
121 {
122 	bool ret;
123 	u64 rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF;
124 	asm volatile ("push %1; popf; vmxon %2; setbe %0\n\t"
125 		      : "=q" (ret) : "q" (rflags), "m" (vmxon_region) : "cc");
126 	return ret;
127 }
128 
129 static inline int vmx_off()
130 {
131 	bool ret;
132 	u64 rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF;
133 
134 	asm volatile("push %1; popf; vmxoff; setbe %0\n\t"
135 		     : "=q"(ret) : "q" (rflags) : "cc");
136 	return ret;
137 }
138 
139 void print_vmexit_info()
140 {
141 	u64 guest_rip, guest_rsp;
142 	ulong reason = vmcs_read(EXI_REASON) & 0xff;
143 	ulong exit_qual = vmcs_read(EXI_QUALIFICATION);
144 	guest_rip = vmcs_read(GUEST_RIP);
145 	guest_rsp = vmcs_read(GUEST_RSP);
146 	printf("VMEXIT info:\n");
147 	printf("\tvmexit reason = %d\n", reason);
148 	printf("\texit qualification = 0x%x\n", exit_qual);
149 	printf("\tBit 31 of reason = %x\n", (vmcs_read(EXI_REASON) >> 31) & 1);
150 	printf("\tguest_rip = 0x%llx\n", guest_rip);
151 	printf("\tRAX=0x%llx    RBX=0x%llx    RCX=0x%llx    RDX=0x%llx\n",
152 		regs.rax, regs.rbx, regs.rcx, regs.rdx);
153 	printf("\tRSP=0x%llx    RBP=0x%llx    RSI=0x%llx    RDI=0x%llx\n",
154 		guest_rsp, regs.rbp, regs.rsi, regs.rdi);
155 	printf("\tR8 =0x%llx    R9 =0x%llx    R10=0x%llx    R11=0x%llx\n",
156 		regs.r8, regs.r9, regs.r10, regs.r11);
157 	printf("\tR12=0x%llx    R13=0x%llx    R14=0x%llx    R15=0x%llx\n",
158 		regs.r12, regs.r13, regs.r14, regs.r15);
159 }
160 
161 static void test_vmclear(void)
162 {
163 	struct vmcs *tmp_root;
164 	int width = cpuid_maxphyaddr();
165 
166 	/*
167 	 * Note- The tests below do not necessarily have a
168 	 * valid VMCS, but that's ok since the invalid vmcs
169 	 * is only used for a specific test and is discarded
170 	 * without touching its contents
171 	 */
172 
173 	/* Unaligned page access */
174 	tmp_root = (struct vmcs *)((intptr_t)vmcs_root + 1);
175 	report("test vmclear with unaligned vmcs",
176 	       vmcs_clear(tmp_root) == 1);
177 
178 	/* gpa bits beyond physical address width are set*/
179 	tmp_root = (struct vmcs *)((intptr_t)vmcs_root |
180 				   ((u64)1 << (width+1)));
181 	report("test vmclear with vmcs address bits set beyond physical address width",
182 	       vmcs_clear(tmp_root) == 1);
183 
184 	/* Pass VMXON region */
185 	tmp_root = (struct vmcs *)vmxon_region;
186 	report("test vmclear with vmxon region",
187 	       vmcs_clear(tmp_root) == 1);
188 
189 	/* Valid VMCS */
190 	report("test vmclear with valid vmcs region", vmcs_clear(vmcs_root) == 0);
191 
192 }
193 
194 static void test_vmxoff(void)
195 {
196 	int ret;
197 
198 	ret = vmx_off();
199 	report("test vmxoff", !ret);
200 }
201 
202 static void __attribute__((__used__)) guest_main(void)
203 {
204 	current->guest_main();
205 }
206 
207 /* guest_entry */
208 asm(
209 	".align	4, 0x90\n\t"
210 	".globl	entry_guest\n\t"
211 	"guest_entry:\n\t"
212 	"	call guest_main\n\t"
213 	"	mov $1, %edi\n\t"
214 	"	call hypercall\n\t"
215 );
216 
217 /* EPT paging structure related functions */
218 /* split_large_ept_entry: Split a 2M/1G large page into 512 smaller PTEs.
219 		@ptep : large page table entry to split
220 		@level : level of ptep (2 or 3)
221  */
222 static void split_large_ept_entry(unsigned long *ptep, int level)
223 {
224 	unsigned long *new_pt;
225 	unsigned long gpa;
226 	unsigned long pte;
227 	unsigned long prototype;
228 	int i;
229 
230 	pte = *ptep;
231 	assert(pte & EPT_PRESENT);
232 	assert(pte & EPT_LARGE_PAGE);
233 	assert(level == 2 || level == 3);
234 
235 	new_pt = alloc_page();
236 	assert(new_pt);
237 	memset(new_pt, 0, PAGE_SIZE);
238 
239 	prototype = pte & ~EPT_ADDR_MASK;
240 	if (level == 2)
241 		prototype &= ~EPT_LARGE_PAGE;
242 
243 	gpa = pte & EPT_ADDR_MASK;
244 	for (i = 0; i < EPT_PGDIR_ENTRIES; i++) {
245 		new_pt[i] = prototype | gpa;
246 		gpa += 1ul << EPT_LEVEL_SHIFT(level - 1);
247 	}
248 
249 	pte &= ~EPT_LARGE_PAGE;
250 	pte &= ~EPT_ADDR_MASK;
251 	pte |= virt_to_phys(new_pt);
252 
253 	*ptep = pte;
254 }
255 
256 /* install_ept_entry : Install a page to a given level in EPT
257 		@pml4 : addr of pml4 table
258 		@pte_level : level of PTE to set
259 		@guest_addr : physical address of guest
260 		@pte : pte value to set
261 		@pt_page : address of page table, NULL for a new page
262  */
263 void install_ept_entry(unsigned long *pml4,
264 		int pte_level,
265 		unsigned long guest_addr,
266 		unsigned long pte,
267 		unsigned long *pt_page)
268 {
269 	int level;
270 	unsigned long *pt = pml4;
271 	unsigned offset;
272 
273 	for (level = EPT_PAGE_LEVEL; level > pte_level; --level) {
274 		offset = (guest_addr >> EPT_LEVEL_SHIFT(level))
275 				& EPT_PGDIR_MASK;
276 		if (!(pt[offset] & (EPT_PRESENT))) {
277 			unsigned long *new_pt = pt_page;
278 			if (!new_pt)
279 				new_pt = alloc_page();
280 			else
281 				pt_page = 0;
282 			memset(new_pt, 0, PAGE_SIZE);
283 			pt[offset] = virt_to_phys(new_pt)
284 					| EPT_RA | EPT_WA | EPT_EA;
285 		} else if (pt[offset] & EPT_LARGE_PAGE)
286 			split_large_ept_entry(&pt[offset], level);
287 		pt = phys_to_virt(pt[offset] & EPT_ADDR_MASK);
288 	}
289 	offset = (guest_addr >> EPT_LEVEL_SHIFT(level)) & EPT_PGDIR_MASK;
290 	pt[offset] = pte;
291 }
292 
293 /* Map a page, @perm is the permission of the page */
294 void install_ept(unsigned long *pml4,
295 		unsigned long phys,
296 		unsigned long guest_addr,
297 		u64 perm)
298 {
299 	install_ept_entry(pml4, 1, guest_addr, (phys & PAGE_MASK) | perm, 0);
300 }
301 
302 /* Map a 1G-size page */
303 void install_1g_ept(unsigned long *pml4,
304 		unsigned long phys,
305 		unsigned long guest_addr,
306 		u64 perm)
307 {
308 	install_ept_entry(pml4, 3, guest_addr,
309 			(phys & PAGE_MASK) | perm | EPT_LARGE_PAGE, 0);
310 }
311 
312 /* Map a 2M-size page */
313 void install_2m_ept(unsigned long *pml4,
314 		unsigned long phys,
315 		unsigned long guest_addr,
316 		u64 perm)
317 {
318 	install_ept_entry(pml4, 2, guest_addr,
319 			(phys & PAGE_MASK) | perm | EPT_LARGE_PAGE, 0);
320 }
321 
322 /* setup_ept_range : Setup a range of 1:1 mapped page to EPT paging structure.
323 		@start : start address of guest page
324 		@len : length of address to be mapped
325 		@map_1g : whether 1G page map is used
326 		@map_2m : whether 2M page map is used
327 		@perm : permission for every page
328  */
329 void setup_ept_range(unsigned long *pml4, unsigned long start,
330 		     unsigned long len, int map_1g, int map_2m, u64 perm)
331 {
332 	u64 phys = start;
333 	u64 max = (u64)len + (u64)start;
334 
335 	if (map_1g) {
336 		while (phys + PAGE_SIZE_1G <= max) {
337 			install_1g_ept(pml4, phys, phys, perm);
338 			phys += PAGE_SIZE_1G;
339 		}
340 	}
341 	if (map_2m) {
342 		while (phys + PAGE_SIZE_2M <= max) {
343 			install_2m_ept(pml4, phys, phys, perm);
344 			phys += PAGE_SIZE_2M;
345 		}
346 	}
347 	while (phys + PAGE_SIZE <= max) {
348 		install_ept(pml4, phys, phys, perm);
349 		phys += PAGE_SIZE;
350 	}
351 }
352 
353 /* get_ept_pte : Get the PTE of a given level in EPT,
354     @level == 1 means get the latest level*/
355 unsigned long get_ept_pte(unsigned long *pml4,
356 		unsigned long guest_addr, int level)
357 {
358 	int l;
359 	unsigned long *pt = pml4, pte;
360 	unsigned offset;
361 
362 	if (level < 1 || level > 3)
363 		return -1;
364 	for (l = EPT_PAGE_LEVEL; ; --l) {
365 		offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK;
366 		pte = pt[offset];
367 		if (!(pte & (EPT_PRESENT)))
368 			return 0;
369 		if (l == level)
370 			break;
371 		if (l < 4 && (pte & EPT_LARGE_PAGE))
372 			return pte;
373 		pt = (unsigned long *)(pte & EPT_ADDR_MASK);
374 	}
375 	offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK;
376 	pte = pt[offset];
377 	return pte;
378 }
379 
380 void ept_sync(int type, u64 eptp)
381 {
382 	switch (type) {
383 	case INVEPT_SINGLE:
384 		if (ept_vpid.val & EPT_CAP_INVEPT_SINGLE) {
385 			invept(INVEPT_SINGLE, eptp);
386 			break;
387 		}
388 		/* else fall through */
389 	case INVEPT_GLOBAL:
390 		if (ept_vpid.val & EPT_CAP_INVEPT_ALL) {
391 			invept(INVEPT_GLOBAL, eptp);
392 			break;
393 		}
394 		/* else fall through */
395 	default:
396 		printf("WARNING: invept is not supported!\n");
397 	}
398 }
399 
400 int set_ept_pte(unsigned long *pml4, unsigned long guest_addr,
401 		int level, u64 pte_val)
402 {
403 	int l;
404 	unsigned long *pt = pml4;
405 	unsigned offset;
406 
407 	if (level < 1 || level > 3)
408 		return -1;
409 	for (l = EPT_PAGE_LEVEL; ; --l) {
410 		offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK;
411 		if (l == level)
412 			break;
413 		if (!(pt[offset] & (EPT_PRESENT)))
414 			return -1;
415 		pt = (unsigned long *)(pt[offset] & EPT_ADDR_MASK);
416 	}
417 	offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK;
418 	pt[offset] = pte_val;
419 	return 0;
420 }
421 
422 void vpid_sync(int type, u16 vpid)
423 {
424 	switch(type) {
425 	case INVVPID_SINGLE:
426 		if (ept_vpid.val & VPID_CAP_INVVPID_SINGLE) {
427 			invvpid(INVVPID_SINGLE, vpid, 0);
428 			break;
429 		}
430 	case INVVPID_ALL:
431 		if (ept_vpid.val & VPID_CAP_INVVPID_ALL) {
432 			invvpid(INVVPID_ALL, vpid, 0);
433 			break;
434 		}
435 	default:
436 		printf("WARNING: invvpid is not supported\n");
437 	}
438 }
439 
440 static void init_vmcs_ctrl(void)
441 {
442 	/* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */
443 	/* 26.2.1.1 */
444 	vmcs_write(PIN_CONTROLS, ctrl_pin);
445 	/* Disable VMEXIT of IO instruction */
446 	vmcs_write(CPU_EXEC_CTRL0, ctrl_cpu[0]);
447 	if (ctrl_cpu_rev[0].set & CPU_SECONDARY) {
448 		ctrl_cpu[1] = (ctrl_cpu[1] | ctrl_cpu_rev[1].set) &
449 			ctrl_cpu_rev[1].clr;
450 		vmcs_write(CPU_EXEC_CTRL1, ctrl_cpu[1]);
451 	}
452 	vmcs_write(CR3_TARGET_COUNT, 0);
453 	vmcs_write(VPID, ++vpid_cnt);
454 }
455 
456 static void init_vmcs_host(void)
457 {
458 	/* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */
459 	/* 26.2.1.2 */
460 	vmcs_write(HOST_EFER, rdmsr(MSR_EFER));
461 
462 	/* 26.2.1.3 */
463 	vmcs_write(ENT_CONTROLS, ctrl_enter);
464 	vmcs_write(EXI_CONTROLS, ctrl_exit);
465 
466 	/* 26.2.2 */
467 	vmcs_write(HOST_CR0, read_cr0());
468 	vmcs_write(HOST_CR3, read_cr3());
469 	vmcs_write(HOST_CR4, read_cr4());
470 	vmcs_write(HOST_SYSENTER_EIP, (u64)(&entry_sysenter));
471 	vmcs_write(HOST_SYSENTER_CS,  KERNEL_CS);
472 
473 	/* 26.2.3 */
474 	vmcs_write(HOST_SEL_CS, KERNEL_CS);
475 	vmcs_write(HOST_SEL_SS, KERNEL_DS);
476 	vmcs_write(HOST_SEL_DS, KERNEL_DS);
477 	vmcs_write(HOST_SEL_ES, KERNEL_DS);
478 	vmcs_write(HOST_SEL_FS, KERNEL_DS);
479 	vmcs_write(HOST_SEL_GS, KERNEL_DS);
480 	vmcs_write(HOST_SEL_TR, TSS_MAIN);
481 	vmcs_write(HOST_BASE_TR, tss_descr.base);
482 	vmcs_write(HOST_BASE_GDTR, gdt64_desc.base);
483 	vmcs_write(HOST_BASE_IDTR, idt_descr.base);
484 	vmcs_write(HOST_BASE_FS, 0);
485 	vmcs_write(HOST_BASE_GS, 0);
486 
487 	/* Set other vmcs area */
488 	vmcs_write(PF_ERROR_MASK, 0);
489 	vmcs_write(PF_ERROR_MATCH, 0);
490 	vmcs_write(VMCS_LINK_PTR, ~0ul);
491 	vmcs_write(VMCS_LINK_PTR_HI, ~0ul);
492 	vmcs_write(HOST_RIP, (u64)(&vmx_return));
493 }
494 
495 static void init_vmcs_guest(void)
496 {
497 	/* 26.3 CHECKING AND LOADING GUEST STATE */
498 	ulong guest_cr0, guest_cr4, guest_cr3;
499 	/* 26.3.1.1 */
500 	guest_cr0 = read_cr0();
501 	guest_cr4 = read_cr4();
502 	guest_cr3 = read_cr3();
503 	if (ctrl_enter & ENT_GUEST_64) {
504 		guest_cr0 |= X86_CR0_PG;
505 		guest_cr4 |= X86_CR4_PAE;
506 	}
507 	if ((ctrl_enter & ENT_GUEST_64) == 0)
508 		guest_cr4 &= (~X86_CR4_PCIDE);
509 	if (guest_cr0 & X86_CR0_PG)
510 		guest_cr0 |= X86_CR0_PE;
511 	vmcs_write(GUEST_CR0, guest_cr0);
512 	vmcs_write(GUEST_CR3, guest_cr3);
513 	vmcs_write(GUEST_CR4, guest_cr4);
514 	vmcs_write(GUEST_SYSENTER_CS,  KERNEL_CS);
515 	vmcs_write(GUEST_SYSENTER_ESP,
516 		(u64)(guest_syscall_stack + PAGE_SIZE - 1));
517 	vmcs_write(GUEST_SYSENTER_EIP, (u64)(&entry_sysenter));
518 	vmcs_write(GUEST_DR7, 0);
519 	vmcs_write(GUEST_EFER, rdmsr(MSR_EFER));
520 
521 	/* 26.3.1.2 */
522 	vmcs_write(GUEST_SEL_CS, KERNEL_CS);
523 	vmcs_write(GUEST_SEL_SS, KERNEL_DS);
524 	vmcs_write(GUEST_SEL_DS, KERNEL_DS);
525 	vmcs_write(GUEST_SEL_ES, KERNEL_DS);
526 	vmcs_write(GUEST_SEL_FS, KERNEL_DS);
527 	vmcs_write(GUEST_SEL_GS, KERNEL_DS);
528 	vmcs_write(GUEST_SEL_TR, TSS_MAIN);
529 	vmcs_write(GUEST_SEL_LDTR, 0);
530 
531 	vmcs_write(GUEST_BASE_CS, 0);
532 	vmcs_write(GUEST_BASE_ES, 0);
533 	vmcs_write(GUEST_BASE_SS, 0);
534 	vmcs_write(GUEST_BASE_DS, 0);
535 	vmcs_write(GUEST_BASE_FS, 0);
536 	vmcs_write(GUEST_BASE_GS, 0);
537 	vmcs_write(GUEST_BASE_TR, tss_descr.base);
538 	vmcs_write(GUEST_BASE_LDTR, 0);
539 
540 	vmcs_write(GUEST_LIMIT_CS, 0xFFFFFFFF);
541 	vmcs_write(GUEST_LIMIT_DS, 0xFFFFFFFF);
542 	vmcs_write(GUEST_LIMIT_ES, 0xFFFFFFFF);
543 	vmcs_write(GUEST_LIMIT_SS, 0xFFFFFFFF);
544 	vmcs_write(GUEST_LIMIT_FS, 0xFFFFFFFF);
545 	vmcs_write(GUEST_LIMIT_GS, 0xFFFFFFFF);
546 	vmcs_write(GUEST_LIMIT_LDTR, 0xffff);
547 	vmcs_write(GUEST_LIMIT_TR, tss_descr.limit);
548 
549 	vmcs_write(GUEST_AR_CS, 0xa09b);
550 	vmcs_write(GUEST_AR_DS, 0xc093);
551 	vmcs_write(GUEST_AR_ES, 0xc093);
552 	vmcs_write(GUEST_AR_FS, 0xc093);
553 	vmcs_write(GUEST_AR_GS, 0xc093);
554 	vmcs_write(GUEST_AR_SS, 0xc093);
555 	vmcs_write(GUEST_AR_LDTR, 0x82);
556 	vmcs_write(GUEST_AR_TR, 0x8b);
557 
558 	/* 26.3.1.3 */
559 	vmcs_write(GUEST_BASE_GDTR, gdt64_desc.base);
560 	vmcs_write(GUEST_BASE_IDTR, idt_descr.base);
561 	vmcs_write(GUEST_LIMIT_GDTR, gdt64_desc.limit);
562 	vmcs_write(GUEST_LIMIT_IDTR, idt_descr.limit);
563 
564 	/* 26.3.1.4 */
565 	vmcs_write(GUEST_RIP, (u64)(&guest_entry));
566 	vmcs_write(GUEST_RSP, (u64)(guest_stack + PAGE_SIZE - 1));
567 	vmcs_write(GUEST_RFLAGS, 0x2);
568 
569 	/* 26.3.1.5 */
570 	vmcs_write(GUEST_ACTV_STATE, ACTV_ACTIVE);
571 	vmcs_write(GUEST_INTR_STATE, 0);
572 }
573 
574 static int init_vmcs(struct vmcs **vmcs)
575 {
576 	*vmcs = alloc_page();
577 	memset(*vmcs, 0, PAGE_SIZE);
578 	(*vmcs)->revision_id = basic.revision;
579 	/* vmclear first to init vmcs */
580 	if (vmcs_clear(*vmcs)) {
581 		printf("%s : vmcs_clear error\n", __func__);
582 		return 1;
583 	}
584 
585 	if (make_vmcs_current(*vmcs)) {
586 		printf("%s : make_vmcs_current error\n", __func__);
587 		return 1;
588 	}
589 
590 	/* All settings to pin/exit/enter/cpu
591 	   control fields should be placed here */
592 	ctrl_pin |= PIN_EXTINT | PIN_NMI | PIN_VIRT_NMI;
593 	ctrl_exit = EXI_LOAD_EFER | EXI_HOST_64;
594 	ctrl_enter = (ENT_LOAD_EFER | ENT_GUEST_64);
595 	/* DIsable IO instruction VMEXIT now */
596 	ctrl_cpu[0] &= (~(CPU_IO | CPU_IO_BITMAP));
597 	ctrl_cpu[1] = 0;
598 
599 	ctrl_pin = (ctrl_pin | ctrl_pin_rev.set) & ctrl_pin_rev.clr;
600 	ctrl_enter = (ctrl_enter | ctrl_enter_rev.set) & ctrl_enter_rev.clr;
601 	ctrl_exit = (ctrl_exit | ctrl_exit_rev.set) & ctrl_exit_rev.clr;
602 	ctrl_cpu[0] = (ctrl_cpu[0] | ctrl_cpu_rev[0].set) & ctrl_cpu_rev[0].clr;
603 
604 	init_vmcs_ctrl();
605 	init_vmcs_host();
606 	init_vmcs_guest();
607 	return 0;
608 }
609 
610 static void init_vmx(void)
611 {
612 	ulong fix_cr0_set, fix_cr0_clr;
613 	ulong fix_cr4_set, fix_cr4_clr;
614 
615 	vmxon_region = alloc_page();
616 	memset(vmxon_region, 0, PAGE_SIZE);
617 
618 	fix_cr0_set =  rdmsr(MSR_IA32_VMX_CR0_FIXED0);
619 	fix_cr0_clr =  rdmsr(MSR_IA32_VMX_CR0_FIXED1);
620 	fix_cr4_set =  rdmsr(MSR_IA32_VMX_CR4_FIXED0);
621 	fix_cr4_clr = rdmsr(MSR_IA32_VMX_CR4_FIXED1);
622 	basic.val = rdmsr(MSR_IA32_VMX_BASIC);
623 	ctrl_pin_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PIN
624 			: MSR_IA32_VMX_PINBASED_CTLS);
625 	ctrl_exit_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_EXIT
626 			: MSR_IA32_VMX_EXIT_CTLS);
627 	ctrl_enter_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_ENTRY
628 			: MSR_IA32_VMX_ENTRY_CTLS);
629 	ctrl_cpu_rev[0].val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PROC
630 			: MSR_IA32_VMX_PROCBASED_CTLS);
631 	if ((ctrl_cpu_rev[0].clr & CPU_SECONDARY) != 0)
632 		ctrl_cpu_rev[1].val = rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2);
633 	else
634 		ctrl_cpu_rev[1].val = 0;
635 	if ((ctrl_cpu_rev[1].clr & (CPU_EPT | CPU_VPID)) != 0)
636 		ept_vpid.val = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP);
637 	else
638 		ept_vpid.val = 0;
639 
640 	write_cr0((read_cr0() & fix_cr0_clr) | fix_cr0_set);
641 	write_cr4((read_cr4() & fix_cr4_clr) | fix_cr4_set | X86_CR4_VMXE);
642 
643 	*vmxon_region = basic.revision;
644 
645 	guest_stack = alloc_page();
646 	memset(guest_stack, 0, PAGE_SIZE);
647 	guest_syscall_stack = alloc_page();
648 	memset(guest_syscall_stack, 0, PAGE_SIZE);
649 }
650 
651 static void do_vmxon_off(void *data)
652 {
653 	vmx_on();
654 	vmx_off();
655 }
656 
657 static void do_write_feature_control(void *data)
658 {
659 	wrmsr(MSR_IA32_FEATURE_CONTROL, 0);
660 }
661 
662 static int test_vmx_feature_control(void)
663 {
664 	u64 ia32_feature_control;
665 	bool vmx_enabled;
666 
667 	ia32_feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL);
668 	vmx_enabled = ((ia32_feature_control & 0x5) == 0x5);
669 	if ((ia32_feature_control & 0x5) == 0x5) {
670 		printf("VMX enabled and locked by BIOS\n");
671 		return 0;
672 	} else if (ia32_feature_control & 0x1) {
673 		printf("ERROR: VMX locked out by BIOS!?\n");
674 		return 1;
675 	}
676 
677 	wrmsr(MSR_IA32_FEATURE_CONTROL, 0);
678 	report("test vmxon with FEATURE_CONTROL cleared",
679 	       test_for_exception(GP_VECTOR, &do_vmxon_off, NULL));
680 
681 	wrmsr(MSR_IA32_FEATURE_CONTROL, 0x4);
682 	report("test vmxon without FEATURE_CONTROL lock",
683 	       test_for_exception(GP_VECTOR, &do_vmxon_off, NULL));
684 
685 	wrmsr(MSR_IA32_FEATURE_CONTROL, 0x5);
686 	vmx_enabled = ((rdmsr(MSR_IA32_FEATURE_CONTROL) & 0x5) == 0x5);
687 	report("test enable VMX in FEATURE_CONTROL", vmx_enabled);
688 
689 	report("test FEATURE_CONTROL lock bit",
690 	       test_for_exception(GP_VECTOR, &do_write_feature_control, NULL));
691 
692 	return !vmx_enabled;
693 }
694 
695 static int test_vmxon(void)
696 {
697 	int ret, ret1;
698 	u64 *tmp_region = vmxon_region;
699 	int width = cpuid_maxphyaddr();
700 
701 	/* Unaligned page access */
702 	vmxon_region = (u64 *)((intptr_t)vmxon_region + 1);
703 	ret1 = vmx_on();
704 	report("test vmxon with unaligned vmxon region", ret1);
705 	if (!ret1) {
706 		ret = 1;
707 		goto out;
708 	}
709 
710 	/* gpa bits beyond physical address width are set*/
711 	vmxon_region = (u64 *)((intptr_t)tmp_region | ((u64)1 << (width+1)));
712 	ret1 = vmx_on();
713 	report("test vmxon with bits set beyond physical address width", ret1);
714 	if (!ret1) {
715 		ret = 1;
716 		goto out;
717 	}
718 
719 	/* invalid revision indentifier */
720 	vmxon_region = tmp_region;
721 	*vmxon_region = 0xba9da9;
722 	ret1 = vmx_on();
723 	report("test vmxon with invalid revision identifier", ret1);
724 	if (!ret1) {
725 		ret = 1;
726 		goto out;
727 	}
728 
729 	/* and finally a valid region */
730 	*vmxon_region = basic.revision;
731 	ret = vmx_on();
732 	report("test vmxon with valid vmxon region", !ret);
733 
734 out:
735 	return ret;
736 }
737 
738 static void test_vmptrld(void)
739 {
740 	struct vmcs *vmcs, *tmp_root;
741 	int width = cpuid_maxphyaddr();
742 
743 	vmcs = alloc_page();
744 	vmcs->revision_id = basic.revision;
745 
746 	/* Unaligned page access */
747 	tmp_root = (struct vmcs *)((intptr_t)vmcs + 1);
748 	report("test vmptrld with unaligned vmcs",
749 	       make_vmcs_current(tmp_root) == 1);
750 
751 	/* gpa bits beyond physical address width are set*/
752 	tmp_root = (struct vmcs *)((intptr_t)vmcs |
753 				   ((u64)1 << (width+1)));
754 	report("test vmptrld with vmcs address bits set beyond physical address width",
755 	       make_vmcs_current(tmp_root) == 1);
756 
757 	/* Pass VMXON region */
758 	tmp_root = (struct vmcs *)vmxon_region;
759 	report("test vmptrld with vmxon region",
760 	       make_vmcs_current(tmp_root) == 1);
761 
762 	report("test vmptrld with valid vmcs region", make_vmcs_current(vmcs) == 0);
763 }
764 
765 static void test_vmptrst(void)
766 {
767 	int ret;
768 	struct vmcs *vmcs1, *vmcs2;
769 
770 	vmcs1 = alloc_page();
771 	memset(vmcs1, 0, PAGE_SIZE);
772 	init_vmcs(&vmcs1);
773 	ret = vmcs_save(&vmcs2);
774 	report("test vmptrst", (!ret) && (vmcs1 == vmcs2));
775 }
776 
777 struct vmx_ctl_msr {
778 	const char *name;
779 	u32 index, true_index;
780 	u32 default1;
781 } vmx_ctl_msr[] = {
782 	{ "MSR_IA32_VMX_PINBASED_CTLS", MSR_IA32_VMX_PINBASED_CTLS,
783 	  MSR_IA32_VMX_TRUE_PIN, 0x16 },
784 	{ "MSR_IA32_VMX_PROCBASED_CTLS", MSR_IA32_VMX_PROCBASED_CTLS,
785 	  MSR_IA32_VMX_TRUE_PROC, 0x401e172 },
786 	{ "MSR_IA32_VMX_PROCBASED_CTLS2", MSR_IA32_VMX_PROCBASED_CTLS2,
787 	  MSR_IA32_VMX_PROCBASED_CTLS2, 0 },
788 	{ "MSR_IA32_VMX_EXIT_CTLS", MSR_IA32_VMX_EXIT_CTLS,
789 	  MSR_IA32_VMX_TRUE_EXIT, 0x36dff },
790 	{ "MSR_IA32_VMX_ENTRY_CTLS", MSR_IA32_VMX_ENTRY_CTLS,
791 	  MSR_IA32_VMX_TRUE_ENTRY, 0x11ff },
792 };
793 
794 static void test_vmx_caps(void)
795 {
796 	u64 val, default1, fixed0, fixed1;
797 	union vmx_ctrl_msr ctrl, true_ctrl;
798 	unsigned int n;
799 	bool ok;
800 
801 	printf("\nTest suite: VMX capability reporting\n");
802 
803 	report("MSR_IA32_VMX_BASIC",
804 	       (basic.revision & (1ul << 31)) == 0 &&
805 	       basic.size > 0 && basic.size <= 4096 &&
806 	       (basic.type == 0 || basic.type == 6) &&
807 	       basic.reserved1 == 0 && basic.reserved2 == 0);
808 
809 	val = rdmsr(MSR_IA32_VMX_MISC);
810 	report("MSR_IA32_VMX_MISC",
811 	       (!(ctrl_cpu_rev[1].clr & CPU_URG) || val & (1ul << 5)) &&
812 	       ((val >> 16) & 0x1ff) <= 256 &&
813 	       (val & 0xc0007e00) == 0);
814 
815 	for (n = 0; n < ARRAY_SIZE(vmx_ctl_msr); n++) {
816 		ctrl.val = rdmsr(vmx_ctl_msr[n].index);
817 		default1 = vmx_ctl_msr[n].default1;
818 		ok = (ctrl.set & default1) == default1;
819 		ok = ok && (ctrl.set & ~ctrl.clr) == 0;
820 		if (ok && basic.ctrl) {
821 			true_ctrl.val = rdmsr(vmx_ctl_msr[n].true_index);
822 			ok = ctrl.clr == true_ctrl.clr;
823 			ok = ok && ctrl.set == (true_ctrl.set | default1);
824 		}
825 		report(vmx_ctl_msr[n].name, ok);
826 	}
827 
828 	fixed0 = rdmsr(MSR_IA32_VMX_CR0_FIXED0);
829 	fixed1 = rdmsr(MSR_IA32_VMX_CR0_FIXED1);
830 	report("MSR_IA32_VMX_IA32_VMX_CR0_FIXED0/1",
831 	       ((fixed0 ^ fixed1) & ~fixed1) == 0);
832 
833 	fixed0 = rdmsr(MSR_IA32_VMX_CR4_FIXED0);
834 	fixed1 = rdmsr(MSR_IA32_VMX_CR4_FIXED1);
835 	report("MSR_IA32_VMX_IA32_VMX_CR4_FIXED0/1",
836 	       ((fixed0 ^ fixed1) & ~fixed1) == 0);
837 
838 	val = rdmsr(MSR_IA32_VMX_VMCS_ENUM);
839 	report("MSR_IA32_VMX_VMCS_ENUM",
840 	       (val & 0x3e) >= 0x2a &&
841 	       (val & 0xfffffffffffffc01Ull) == 0);
842 
843 	val = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP);
844 	report("MSR_IA32_VMX_EPT_VPID_CAP",
845 	       (val & 0xfffff07ef9eebebeUll) == 0);
846 }
847 
848 /* This function can only be called in guest */
849 static void __attribute__((__used__)) hypercall(u32 hypercall_no)
850 {
851 	u64 val = 0;
852 	val = (hypercall_no & HYPERCALL_MASK) | HYPERCALL_BIT;
853 	hypercall_field = val;
854 	asm volatile("vmcall\n\t");
855 }
856 
857 static bool is_hypercall()
858 {
859 	ulong reason, hyper_bit;
860 
861 	reason = vmcs_read(EXI_REASON) & 0xff;
862 	hyper_bit = hypercall_field & HYPERCALL_BIT;
863 	if (reason == VMX_VMCALL && hyper_bit)
864 		return true;
865 	return false;
866 }
867 
868 static int handle_hypercall()
869 {
870 	ulong hypercall_no;
871 
872 	hypercall_no = hypercall_field & HYPERCALL_MASK;
873 	hypercall_field = 0;
874 	switch (hypercall_no) {
875 	case HYPERCALL_VMEXIT:
876 		return VMX_TEST_VMEXIT;
877 	default:
878 		printf("ERROR : Invalid hypercall number : %d\n", hypercall_no);
879 	}
880 	return VMX_TEST_EXIT;
881 }
882 
883 static int exit_handler()
884 {
885 	int ret;
886 
887 	current->exits++;
888 	regs.rflags = vmcs_read(GUEST_RFLAGS);
889 	if (is_hypercall())
890 		ret = handle_hypercall();
891 	else
892 		ret = current->exit_handler();
893 	vmcs_write(GUEST_RFLAGS, regs.rflags);
894 	switch (ret) {
895 	case VMX_TEST_VMEXIT:
896 	case VMX_TEST_RESUME:
897 		return ret;
898 	case VMX_TEST_EXIT:
899 		break;
900 	default:
901 		printf("ERROR : Invalid exit_handler return val %d.\n"
902 			, ret);
903 	}
904 	print_vmexit_info();
905 	abort();
906 	return 0;
907 }
908 
909 static int vmx_run()
910 {
911 	u32 ret = 0, fail = 0;
912 
913 	while (1) {
914 		asm volatile (
915 			"mov %%rsp, %%rsi\n\t"
916 			"mov %2, %%rdi\n\t"
917 			"vmwrite %%rsi, %%rdi\n\t"
918 
919 			LOAD_GPR_C
920 			"cmpl $0, %1\n\t"
921 			"jne 1f\n\t"
922 			LOAD_RFLAGS
923 			"vmlaunch\n\t"
924 			"jmp 2f\n\t"
925 			"1: "
926 			"vmresume\n\t"
927 			"2: "
928 			"setbe %0\n\t"
929 			"vmx_return:\n\t"
930 			SAVE_GPR_C
931 			SAVE_RFLAGS
932 			: "=m"(fail)
933 			: "m"(launched), "i"(HOST_RSP)
934 			: "rdi", "rsi", "memory", "cc"
935 
936 		);
937 		if (fail)
938 			ret = launched ? VMX_TEST_RESUME_ERR :
939 				VMX_TEST_LAUNCH_ERR;
940 		else {
941 			launched = 1;
942 			ret = exit_handler();
943 		}
944 		if (ret != VMX_TEST_RESUME)
945 			break;
946 	}
947 	launched = 0;
948 	switch (ret) {
949 	case VMX_TEST_VMEXIT:
950 		return 0;
951 	case VMX_TEST_LAUNCH_ERR:
952 		printf("%s : vmlaunch failed.\n", __func__);
953 		if ((!(host_rflags & X86_EFLAGS_CF) && !(host_rflags & X86_EFLAGS_ZF))
954 			|| ((host_rflags & X86_EFLAGS_CF) && (host_rflags & X86_EFLAGS_ZF)))
955 			printf("\tvmlaunch set wrong flags\n");
956 		report("test vmlaunch", 0);
957 		break;
958 	case VMX_TEST_RESUME_ERR:
959 		printf("%s : vmresume failed.\n", __func__);
960 		if ((!(host_rflags & X86_EFLAGS_CF) && !(host_rflags & X86_EFLAGS_ZF))
961 			|| ((host_rflags & X86_EFLAGS_CF) && (host_rflags & X86_EFLAGS_ZF)))
962 			printf("\tvmresume set wrong flags\n");
963 		report("test vmresume", 0);
964 		break;
965 	default:
966 		printf("%s : unhandled ret from exit_handler, ret=%d.\n", __func__, ret);
967 		break;
968 	}
969 	return 1;
970 }
971 
972 static int test_run(struct vmx_test *test)
973 {
974 	if (test->name == NULL)
975 		test->name = "(no name)";
976 	if (vmx_on()) {
977 		printf("%s : vmxon failed.\n", __func__);
978 		return 1;
979 	}
980 	init_vmcs(&(test->vmcs));
981 	/* Directly call test->init is ok here, init_vmcs has done
982 	   vmcs init, vmclear and vmptrld*/
983 	if (test->init && test->init(test->vmcs) != VMX_TEST_START)
984 		goto out;
985 	test->exits = 0;
986 	current = test;
987 	regs = test->guest_regs;
988 	vmcs_write(GUEST_RFLAGS, regs.rflags | 0x2);
989 	launched = 0;
990 	printf("\nTest suite: %s\n", test->name);
991 	vmx_run();
992 out:
993 	if (vmx_off()) {
994 		printf("%s : vmxoff failed.\n", __func__);
995 		return 1;
996 	}
997 	return 0;
998 }
999 
1000 extern struct vmx_test vmx_tests[];
1001 
1002 int main(void)
1003 {
1004 	int i = 0;
1005 
1006 	setup_vm();
1007 	setup_idt();
1008 	hypercall_field = 0;
1009 
1010 	if (!(cpuid(1).c & (1 << 5))) {
1011 		printf("WARNING: vmx not supported, add '-cpu host'\n");
1012 		goto exit;
1013 	}
1014 	init_vmx();
1015 	if (test_vmx_feature_control() != 0)
1016 		goto exit;
1017 	/* Set basic test ctxt the same as "null" */
1018 	current = &vmx_tests[0];
1019 	if (test_vmxon() != 0)
1020 		goto exit;
1021 	test_vmptrld();
1022 	test_vmclear();
1023 	test_vmptrst();
1024 	init_vmcs(&vmcs_root);
1025 	if (vmx_run()) {
1026 		report("test vmlaunch", 0);
1027 		goto exit;
1028 	}
1029 	test_vmxoff();
1030 	test_vmx_caps();
1031 
1032 	while (vmx_tests[++i].name != NULL)
1033 		if (test_run(&vmx_tests[i]))
1034 			goto exit;
1035 
1036 exit:
1037 	return report_summary();
1038 }
1039