xref: /kvm-unit-tests/x86/vmx.c (revision 3b50efe3dee7ad84f2aa76b94a6ddeebed622d65)
1 /*
2  * x86/vmx.c : Framework for testing nested virtualization
3  *	This is a framework to test nested VMX for KVM, which
4  * 	started as a project of GSoC 2013. All test cases should
5  *	be located in x86/vmx_tests.c and framework related
6  *	functions should be in this file.
7  *
8  * How to write test cases?
9  *	Add callbacks of test suite in variant "vmx_tests". You can
10  *	write:
11  *		1. init function used for initializing test suite
12  *		2. main function for codes running in L2 guest,
13  *		3. exit_handler to handle vmexit of L2 to L1
14  *		4. syscall handler to handle L2 syscall vmexit
15  *		5. vmenter fail handler to handle direct failure of vmenter
16  *		6. guest_regs is loaded when vmenter and saved when
17  *			vmexit, you can read and set it in exit_handler
18  *	If no special function is needed for a test suite, use
19  *	coressponding basic_* functions as callback. More handlers
20  *	can be added to "vmx_tests", see details of "struct vmx_test"
21  *	and function test_run().
22  *
23  * Currently, vmx test framework only set up one VCPU and one
24  * concurrent guest test environment with same paging for L2 and
25  * L1. For usage of EPT, only 1:1 mapped paging is used from VFN
26  * to PFN.
27  *
28  * Author : Arthur Chunqi Li <yzt356@gmail.com>
29  */
30 
31 #include "libcflat.h"
32 #include "processor.h"
33 #include "vm.h"
34 #include "desc.h"
35 #include "vmx.h"
36 #include "msr.h"
37 #include "smp.h"
38 #include "io.h"
39 
40 u64 *vmxon_region;
41 struct vmcs *vmcs_root;
42 u32 vpid_cnt;
43 void *guest_stack, *guest_syscall_stack;
44 u32 ctrl_pin, ctrl_enter, ctrl_exit, ctrl_cpu[2];
45 struct regs regs;
46 struct vmx_test *current;
47 u64 hypercall_field;
48 bool launched;
49 
50 union vmx_basic basic;
51 union vmx_ctrl_msr ctrl_pin_rev;
52 union vmx_ctrl_msr ctrl_cpu_rev[2];
53 union vmx_ctrl_msr ctrl_exit_rev;
54 union vmx_ctrl_msr ctrl_enter_rev;
55 union vmx_ept_vpid  ept_vpid;
56 
57 extern struct descriptor_table_ptr gdt64_desc;
58 extern struct descriptor_table_ptr idt_descr;
59 extern struct descriptor_table_ptr tss_descr;
60 extern void *vmx_return;
61 extern void *entry_sysenter;
62 extern void *guest_entry;
63 
64 static volatile u32 stage;
65 
66 void vmx_set_test_stage(u32 s)
67 {
68 	barrier();
69 	stage = s;
70 	barrier();
71 }
72 
73 u32 vmx_get_test_stage(void)
74 {
75 	u32 s;
76 
77 	barrier();
78 	s = stage;
79 	barrier();
80 	return s;
81 }
82 
83 void vmx_inc_test_stage(void)
84 {
85 	barrier();
86 	stage++;
87 	barrier();
88 }
89 
90 static int make_vmcs_current(struct vmcs *vmcs)
91 {
92 	bool ret;
93 	u64 rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF;
94 
95 	asm volatile ("push %1; popf; vmptrld %2; setbe %0"
96 		      : "=q" (ret) : "q" (rflags), "m" (vmcs) : "cc");
97 	return ret;
98 }
99 
100 /* entry_sysenter */
101 asm(
102 	".align	4, 0x90\n\t"
103 	".globl	entry_sysenter\n\t"
104 	"entry_sysenter:\n\t"
105 	SAVE_GPR
106 	"	and	$0xf, %rax\n\t"
107 	"	mov	%rax, %rdi\n\t"
108 	"	call	syscall_handler\n\t"
109 	LOAD_GPR
110 	"	vmresume\n\t"
111 );
112 
113 static void __attribute__((__used__)) syscall_handler(u64 syscall_no)
114 {
115 	if (current->syscall_handler)
116 		current->syscall_handler(syscall_no);
117 }
118 
119 static inline int vmx_on()
120 {
121 	bool ret;
122 	u64 rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF;
123 	asm volatile ("push %1; popf; vmxon %2; setbe %0\n\t"
124 		      : "=q" (ret) : "q" (rflags), "m" (vmxon_region) : "cc");
125 	return ret;
126 }
127 
128 static inline int vmx_off()
129 {
130 	bool ret;
131 	u64 rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF;
132 
133 	asm volatile("push %1; popf; vmxoff; setbe %0\n\t"
134 		     : "=q"(ret) : "q" (rflags) : "cc");
135 	return ret;
136 }
137 
138 void print_vmexit_info()
139 {
140 	u64 guest_rip, guest_rsp;
141 	ulong reason = vmcs_read(EXI_REASON) & 0xff;
142 	ulong exit_qual = vmcs_read(EXI_QUALIFICATION);
143 	guest_rip = vmcs_read(GUEST_RIP);
144 	guest_rsp = vmcs_read(GUEST_RSP);
145 	printf("VMEXIT info:\n");
146 	printf("\tvmexit reason = %ld\n", reason);
147 	printf("\texit qualification = 0x%lx\n", exit_qual);
148 	printf("\tBit 31 of reason = %lx\n", (vmcs_read(EXI_REASON) >> 31) & 1);
149 	printf("\tguest_rip = 0x%lx\n", guest_rip);
150 	printf("\tRAX=0x%lx    RBX=0x%lx    RCX=0x%lx    RDX=0x%lx\n",
151 		regs.rax, regs.rbx, regs.rcx, regs.rdx);
152 	printf("\tRSP=0x%lx    RBP=0x%lx    RSI=0x%lx    RDI=0x%lx\n",
153 		guest_rsp, regs.rbp, regs.rsi, regs.rdi);
154 	printf("\tR8 =0x%lx    R9 =0x%lx    R10=0x%lx    R11=0x%lx\n",
155 		regs.r8, regs.r9, regs.r10, regs.r11);
156 	printf("\tR12=0x%lx    R13=0x%lx    R14=0x%lx    R15=0x%lx\n",
157 		regs.r12, regs.r13, regs.r14, regs.r15);
158 }
159 
160 void
161 print_vmentry_failure_info(struct vmentry_failure *failure) {
162 	if (failure->early) {
163 		printf("Early %s failure: ", failure->instr);
164 		switch (failure->flags & VMX_ENTRY_FLAGS) {
165 		case X86_EFLAGS_ZF:
166 			printf("current-VMCS pointer is not valid.\n");
167 			break;
168 		case X86_EFLAGS_CF:
169 			printf("error number is %ld. See Intel 30.4.\n",
170 			       vmcs_read(VMX_INST_ERROR));
171 			break;
172 		default:
173 			printf("unexpected flags %lx!\n", failure->flags);
174 		}
175 	} else {
176 		u64 reason = vmcs_read(EXI_REASON);
177 		u64 qual = vmcs_read(EXI_QUALIFICATION);
178 
179 		printf("Non-early %s failure (reason=0x%lx, qual=0x%lx): ",
180 			failure->instr, reason, qual);
181 
182 		switch (reason & 0xff) {
183 		case VMX_FAIL_STATE:
184 			printf("invalid guest state\n");
185 			break;
186 		case VMX_FAIL_MSR:
187 			printf("MSR loading\n");
188 			break;
189 		case VMX_FAIL_MCHECK:
190 			printf("machine-check event\n");
191 			break;
192 		default:
193 			printf("unexpected basic exit reason %ld\n",
194 			       reason & 0xff);
195 		}
196 
197 		if (!(reason & VMX_ENTRY_FAILURE))
198 			printf("\tVMX_ENTRY_FAILURE BIT NOT SET!\n");
199 
200 		if (reason & 0x7fff0000)
201 			printf("\tRESERVED BITS SET!\n");
202 	}
203 }
204 
205 
206 static void test_vmclear(void)
207 {
208 	struct vmcs *tmp_root;
209 	int width = cpuid_maxphyaddr();
210 
211 	/*
212 	 * Note- The tests below do not necessarily have a
213 	 * valid VMCS, but that's ok since the invalid vmcs
214 	 * is only used for a specific test and is discarded
215 	 * without touching its contents
216 	 */
217 
218 	/* Unaligned page access */
219 	tmp_root = (struct vmcs *)((intptr_t)vmcs_root + 1);
220 	report("test vmclear with unaligned vmcs",
221 	       vmcs_clear(tmp_root) == 1);
222 
223 	/* gpa bits beyond physical address width are set*/
224 	tmp_root = (struct vmcs *)((intptr_t)vmcs_root |
225 				   ((u64)1 << (width+1)));
226 	report("test vmclear with vmcs address bits set beyond physical address width",
227 	       vmcs_clear(tmp_root) == 1);
228 
229 	/* Pass VMXON region */
230 	tmp_root = (struct vmcs *)vmxon_region;
231 	report("test vmclear with vmxon region",
232 	       vmcs_clear(tmp_root) == 1);
233 
234 	/* Valid VMCS */
235 	report("test vmclear with valid vmcs region", vmcs_clear(vmcs_root) == 0);
236 
237 }
238 
239 static void test_vmxoff(void)
240 {
241 	int ret;
242 
243 	ret = vmx_off();
244 	report("test vmxoff", !ret);
245 }
246 
247 static void __attribute__((__used__)) guest_main(void)
248 {
249 	current->guest_main();
250 }
251 
252 /* guest_entry */
253 asm(
254 	".align	4, 0x90\n\t"
255 	".globl	entry_guest\n\t"
256 	"guest_entry:\n\t"
257 	"	call guest_main\n\t"
258 	"	mov $1, %edi\n\t"
259 	"	call hypercall\n\t"
260 );
261 
262 /* EPT paging structure related functions */
263 /* split_large_ept_entry: Split a 2M/1G large page into 512 smaller PTEs.
264 		@ptep : large page table entry to split
265 		@level : level of ptep (2 or 3)
266  */
267 static void split_large_ept_entry(unsigned long *ptep, int level)
268 {
269 	unsigned long *new_pt;
270 	unsigned long gpa;
271 	unsigned long pte;
272 	unsigned long prototype;
273 	int i;
274 
275 	pte = *ptep;
276 	assert(pte & EPT_PRESENT);
277 	assert(pte & EPT_LARGE_PAGE);
278 	assert(level == 2 || level == 3);
279 
280 	new_pt = alloc_page();
281 	assert(new_pt);
282 	memset(new_pt, 0, PAGE_SIZE);
283 
284 	prototype = pte & ~EPT_ADDR_MASK;
285 	if (level == 2)
286 		prototype &= ~EPT_LARGE_PAGE;
287 
288 	gpa = pte & EPT_ADDR_MASK;
289 	for (i = 0; i < EPT_PGDIR_ENTRIES; i++) {
290 		new_pt[i] = prototype | gpa;
291 		gpa += 1ul << EPT_LEVEL_SHIFT(level - 1);
292 	}
293 
294 	pte &= ~EPT_LARGE_PAGE;
295 	pte &= ~EPT_ADDR_MASK;
296 	pte |= virt_to_phys(new_pt);
297 
298 	*ptep = pte;
299 }
300 
301 /* install_ept_entry : Install a page to a given level in EPT
302 		@pml4 : addr of pml4 table
303 		@pte_level : level of PTE to set
304 		@guest_addr : physical address of guest
305 		@pte : pte value to set
306 		@pt_page : address of page table, NULL for a new page
307  */
308 void install_ept_entry(unsigned long *pml4,
309 		int pte_level,
310 		unsigned long guest_addr,
311 		unsigned long pte,
312 		unsigned long *pt_page)
313 {
314 	int level;
315 	unsigned long *pt = pml4;
316 	unsigned offset;
317 
318 	for (level = EPT_PAGE_LEVEL; level > pte_level; --level) {
319 		offset = (guest_addr >> EPT_LEVEL_SHIFT(level))
320 				& EPT_PGDIR_MASK;
321 		if (!(pt[offset] & (EPT_PRESENT))) {
322 			unsigned long *new_pt = pt_page;
323 			if (!new_pt)
324 				new_pt = alloc_page();
325 			else
326 				pt_page = 0;
327 			memset(new_pt, 0, PAGE_SIZE);
328 			pt[offset] = virt_to_phys(new_pt)
329 					| EPT_RA | EPT_WA | EPT_EA;
330 		} else if (pt[offset] & EPT_LARGE_PAGE)
331 			split_large_ept_entry(&pt[offset], level);
332 		pt = phys_to_virt(pt[offset] & EPT_ADDR_MASK);
333 	}
334 	offset = (guest_addr >> EPT_LEVEL_SHIFT(level)) & EPT_PGDIR_MASK;
335 	pt[offset] = pte;
336 }
337 
338 /* Map a page, @perm is the permission of the page */
339 void install_ept(unsigned long *pml4,
340 		unsigned long phys,
341 		unsigned long guest_addr,
342 		u64 perm)
343 {
344 	install_ept_entry(pml4, 1, guest_addr, (phys & PAGE_MASK) | perm, 0);
345 }
346 
347 /* Map a 1G-size page */
348 void install_1g_ept(unsigned long *pml4,
349 		unsigned long phys,
350 		unsigned long guest_addr,
351 		u64 perm)
352 {
353 	install_ept_entry(pml4, 3, guest_addr,
354 			(phys & PAGE_MASK) | perm | EPT_LARGE_PAGE, 0);
355 }
356 
357 /* Map a 2M-size page */
358 void install_2m_ept(unsigned long *pml4,
359 		unsigned long phys,
360 		unsigned long guest_addr,
361 		u64 perm)
362 {
363 	install_ept_entry(pml4, 2, guest_addr,
364 			(phys & PAGE_MASK) | perm | EPT_LARGE_PAGE, 0);
365 }
366 
367 /* setup_ept_range : Setup a range of 1:1 mapped page to EPT paging structure.
368 		@start : start address of guest page
369 		@len : length of address to be mapped
370 		@map_1g : whether 1G page map is used
371 		@map_2m : whether 2M page map is used
372 		@perm : permission for every page
373  */
374 void setup_ept_range(unsigned long *pml4, unsigned long start,
375 		     unsigned long len, int map_1g, int map_2m, u64 perm)
376 {
377 	u64 phys = start;
378 	u64 max = (u64)len + (u64)start;
379 
380 	if (map_1g) {
381 		while (phys + PAGE_SIZE_1G <= max) {
382 			install_1g_ept(pml4, phys, phys, perm);
383 			phys += PAGE_SIZE_1G;
384 		}
385 	}
386 	if (map_2m) {
387 		while (phys + PAGE_SIZE_2M <= max) {
388 			install_2m_ept(pml4, phys, phys, perm);
389 			phys += PAGE_SIZE_2M;
390 		}
391 	}
392 	while (phys + PAGE_SIZE <= max) {
393 		install_ept(pml4, phys, phys, perm);
394 		phys += PAGE_SIZE;
395 	}
396 }
397 
398 /* get_ept_pte : Get the PTE of a given level in EPT,
399     @level == 1 means get the latest level*/
400 unsigned long get_ept_pte(unsigned long *pml4,
401 		unsigned long guest_addr, int level)
402 {
403 	int l;
404 	unsigned long *pt = pml4, pte;
405 	unsigned offset;
406 
407 	if (level < 1 || level > 3)
408 		return -1;
409 	for (l = EPT_PAGE_LEVEL; ; --l) {
410 		offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK;
411 		pte = pt[offset];
412 		if (!(pte & (EPT_PRESENT)))
413 			return 0;
414 		if (l == level)
415 			break;
416 		if (l < 4 && (pte & EPT_LARGE_PAGE))
417 			return pte;
418 		pt = (unsigned long *)(pte & EPT_ADDR_MASK);
419 	}
420 	offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK;
421 	pte = pt[offset];
422 	return pte;
423 }
424 
425 void ept_sync(int type, u64 eptp)
426 {
427 	switch (type) {
428 	case INVEPT_SINGLE:
429 		if (ept_vpid.val & EPT_CAP_INVEPT_SINGLE) {
430 			invept(INVEPT_SINGLE, eptp);
431 			break;
432 		}
433 		/* else fall through */
434 	case INVEPT_GLOBAL:
435 		if (ept_vpid.val & EPT_CAP_INVEPT_ALL) {
436 			invept(INVEPT_GLOBAL, eptp);
437 			break;
438 		}
439 		/* else fall through */
440 	default:
441 		printf("WARNING: invept is not supported!\n");
442 	}
443 }
444 
445 int set_ept_pte(unsigned long *pml4, unsigned long guest_addr,
446 		int level, u64 pte_val)
447 {
448 	int l;
449 	unsigned long *pt = pml4;
450 	unsigned offset;
451 
452 	if (level < 1 || level > 3)
453 		return -1;
454 	for (l = EPT_PAGE_LEVEL; ; --l) {
455 		offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK;
456 		if (l == level)
457 			break;
458 		if (!(pt[offset] & (EPT_PRESENT)))
459 			return -1;
460 		pt = (unsigned long *)(pt[offset] & EPT_ADDR_MASK);
461 	}
462 	offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK;
463 	pt[offset] = pte_val;
464 	return 0;
465 }
466 
467 void vpid_sync(int type, u16 vpid)
468 {
469 	switch(type) {
470 	case INVVPID_SINGLE:
471 		if (ept_vpid.val & VPID_CAP_INVVPID_SINGLE) {
472 			invvpid(INVVPID_SINGLE, vpid, 0);
473 			break;
474 		}
475 	case INVVPID_ALL:
476 		if (ept_vpid.val & VPID_CAP_INVVPID_ALL) {
477 			invvpid(INVVPID_ALL, vpid, 0);
478 			break;
479 		}
480 	default:
481 		printf("WARNING: invvpid is not supported\n");
482 	}
483 }
484 
485 static void init_vmcs_ctrl(void)
486 {
487 	/* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */
488 	/* 26.2.1.1 */
489 	vmcs_write(PIN_CONTROLS, ctrl_pin);
490 	/* Disable VMEXIT of IO instruction */
491 	vmcs_write(CPU_EXEC_CTRL0, ctrl_cpu[0]);
492 	if (ctrl_cpu_rev[0].set & CPU_SECONDARY) {
493 		ctrl_cpu[1] = (ctrl_cpu[1] | ctrl_cpu_rev[1].set) &
494 			ctrl_cpu_rev[1].clr;
495 		vmcs_write(CPU_EXEC_CTRL1, ctrl_cpu[1]);
496 	}
497 	vmcs_write(CR3_TARGET_COUNT, 0);
498 	vmcs_write(VPID, ++vpid_cnt);
499 }
500 
501 static void init_vmcs_host(void)
502 {
503 	/* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */
504 	/* 26.2.1.2 */
505 	vmcs_write(HOST_EFER, rdmsr(MSR_EFER));
506 
507 	/* 26.2.1.3 */
508 	vmcs_write(ENT_CONTROLS, ctrl_enter);
509 	vmcs_write(EXI_CONTROLS, ctrl_exit);
510 
511 	/* 26.2.2 */
512 	vmcs_write(HOST_CR0, read_cr0());
513 	vmcs_write(HOST_CR3, read_cr3());
514 	vmcs_write(HOST_CR4, read_cr4());
515 	vmcs_write(HOST_SYSENTER_EIP, (u64)(&entry_sysenter));
516 	vmcs_write(HOST_SYSENTER_CS,  KERNEL_CS);
517 
518 	/* 26.2.3 */
519 	vmcs_write(HOST_SEL_CS, KERNEL_CS);
520 	vmcs_write(HOST_SEL_SS, KERNEL_DS);
521 	vmcs_write(HOST_SEL_DS, KERNEL_DS);
522 	vmcs_write(HOST_SEL_ES, KERNEL_DS);
523 	vmcs_write(HOST_SEL_FS, KERNEL_DS);
524 	vmcs_write(HOST_SEL_GS, KERNEL_DS);
525 	vmcs_write(HOST_SEL_TR, TSS_MAIN);
526 	vmcs_write(HOST_BASE_TR, tss_descr.base);
527 	vmcs_write(HOST_BASE_GDTR, gdt64_desc.base);
528 	vmcs_write(HOST_BASE_IDTR, idt_descr.base);
529 	vmcs_write(HOST_BASE_FS, 0);
530 	vmcs_write(HOST_BASE_GS, 0);
531 
532 	/* Set other vmcs area */
533 	vmcs_write(PF_ERROR_MASK, 0);
534 	vmcs_write(PF_ERROR_MATCH, 0);
535 	vmcs_write(VMCS_LINK_PTR, ~0ul);
536 	vmcs_write(VMCS_LINK_PTR_HI, ~0ul);
537 	vmcs_write(HOST_RIP, (u64)(&vmx_return));
538 }
539 
540 static void init_vmcs_guest(void)
541 {
542 	/* 26.3 CHECKING AND LOADING GUEST STATE */
543 	ulong guest_cr0, guest_cr4, guest_cr3;
544 	/* 26.3.1.1 */
545 	guest_cr0 = read_cr0();
546 	guest_cr4 = read_cr4();
547 	guest_cr3 = read_cr3();
548 	if (ctrl_enter & ENT_GUEST_64) {
549 		guest_cr0 |= X86_CR0_PG;
550 		guest_cr4 |= X86_CR4_PAE;
551 	}
552 	if ((ctrl_enter & ENT_GUEST_64) == 0)
553 		guest_cr4 &= (~X86_CR4_PCIDE);
554 	if (guest_cr0 & X86_CR0_PG)
555 		guest_cr0 |= X86_CR0_PE;
556 	vmcs_write(GUEST_CR0, guest_cr0);
557 	vmcs_write(GUEST_CR3, guest_cr3);
558 	vmcs_write(GUEST_CR4, guest_cr4);
559 	vmcs_write(GUEST_SYSENTER_CS,  KERNEL_CS);
560 	vmcs_write(GUEST_SYSENTER_ESP,
561 		(u64)(guest_syscall_stack + PAGE_SIZE - 1));
562 	vmcs_write(GUEST_SYSENTER_EIP, (u64)(&entry_sysenter));
563 	vmcs_write(GUEST_DR7, 0);
564 	vmcs_write(GUEST_EFER, rdmsr(MSR_EFER));
565 
566 	/* 26.3.1.2 */
567 	vmcs_write(GUEST_SEL_CS, KERNEL_CS);
568 	vmcs_write(GUEST_SEL_SS, KERNEL_DS);
569 	vmcs_write(GUEST_SEL_DS, KERNEL_DS);
570 	vmcs_write(GUEST_SEL_ES, KERNEL_DS);
571 	vmcs_write(GUEST_SEL_FS, KERNEL_DS);
572 	vmcs_write(GUEST_SEL_GS, KERNEL_DS);
573 	vmcs_write(GUEST_SEL_TR, TSS_MAIN);
574 	vmcs_write(GUEST_SEL_LDTR, 0);
575 
576 	vmcs_write(GUEST_BASE_CS, 0);
577 	vmcs_write(GUEST_BASE_ES, 0);
578 	vmcs_write(GUEST_BASE_SS, 0);
579 	vmcs_write(GUEST_BASE_DS, 0);
580 	vmcs_write(GUEST_BASE_FS, 0);
581 	vmcs_write(GUEST_BASE_GS, 0);
582 	vmcs_write(GUEST_BASE_TR, tss_descr.base);
583 	vmcs_write(GUEST_BASE_LDTR, 0);
584 
585 	vmcs_write(GUEST_LIMIT_CS, 0xFFFFFFFF);
586 	vmcs_write(GUEST_LIMIT_DS, 0xFFFFFFFF);
587 	vmcs_write(GUEST_LIMIT_ES, 0xFFFFFFFF);
588 	vmcs_write(GUEST_LIMIT_SS, 0xFFFFFFFF);
589 	vmcs_write(GUEST_LIMIT_FS, 0xFFFFFFFF);
590 	vmcs_write(GUEST_LIMIT_GS, 0xFFFFFFFF);
591 	vmcs_write(GUEST_LIMIT_LDTR, 0xffff);
592 	vmcs_write(GUEST_LIMIT_TR, tss_descr.limit);
593 
594 	vmcs_write(GUEST_AR_CS, 0xa09b);
595 	vmcs_write(GUEST_AR_DS, 0xc093);
596 	vmcs_write(GUEST_AR_ES, 0xc093);
597 	vmcs_write(GUEST_AR_FS, 0xc093);
598 	vmcs_write(GUEST_AR_GS, 0xc093);
599 	vmcs_write(GUEST_AR_SS, 0xc093);
600 	vmcs_write(GUEST_AR_LDTR, 0x82);
601 	vmcs_write(GUEST_AR_TR, 0x8b);
602 
603 	/* 26.3.1.3 */
604 	vmcs_write(GUEST_BASE_GDTR, gdt64_desc.base);
605 	vmcs_write(GUEST_BASE_IDTR, idt_descr.base);
606 	vmcs_write(GUEST_LIMIT_GDTR, gdt64_desc.limit);
607 	vmcs_write(GUEST_LIMIT_IDTR, idt_descr.limit);
608 
609 	/* 26.3.1.4 */
610 	vmcs_write(GUEST_RIP, (u64)(&guest_entry));
611 	vmcs_write(GUEST_RSP, (u64)(guest_stack + PAGE_SIZE - 1));
612 	vmcs_write(GUEST_RFLAGS, 0x2);
613 
614 	/* 26.3.1.5 */
615 	vmcs_write(GUEST_ACTV_STATE, ACTV_ACTIVE);
616 	vmcs_write(GUEST_INTR_STATE, 0);
617 }
618 
619 static int init_vmcs(struct vmcs **vmcs)
620 {
621 	*vmcs = alloc_page();
622 	memset(*vmcs, 0, PAGE_SIZE);
623 	(*vmcs)->revision_id = basic.revision;
624 	/* vmclear first to init vmcs */
625 	if (vmcs_clear(*vmcs)) {
626 		printf("%s : vmcs_clear error\n", __func__);
627 		return 1;
628 	}
629 
630 	if (make_vmcs_current(*vmcs)) {
631 		printf("%s : make_vmcs_current error\n", __func__);
632 		return 1;
633 	}
634 
635 	/* All settings to pin/exit/enter/cpu
636 	   control fields should be placed here */
637 	ctrl_pin |= PIN_EXTINT | PIN_NMI | PIN_VIRT_NMI;
638 	ctrl_exit = EXI_LOAD_EFER | EXI_HOST_64;
639 	ctrl_enter = (ENT_LOAD_EFER | ENT_GUEST_64);
640 	/* DIsable IO instruction VMEXIT now */
641 	ctrl_cpu[0] &= (~(CPU_IO | CPU_IO_BITMAP));
642 	ctrl_cpu[1] = 0;
643 
644 	ctrl_pin = (ctrl_pin | ctrl_pin_rev.set) & ctrl_pin_rev.clr;
645 	ctrl_enter = (ctrl_enter | ctrl_enter_rev.set) & ctrl_enter_rev.clr;
646 	ctrl_exit = (ctrl_exit | ctrl_exit_rev.set) & ctrl_exit_rev.clr;
647 	ctrl_cpu[0] = (ctrl_cpu[0] | ctrl_cpu_rev[0].set) & ctrl_cpu_rev[0].clr;
648 
649 	init_vmcs_ctrl();
650 	init_vmcs_host();
651 	init_vmcs_guest();
652 	return 0;
653 }
654 
655 static void init_vmx(void)
656 {
657 	ulong fix_cr0_set, fix_cr0_clr;
658 	ulong fix_cr4_set, fix_cr4_clr;
659 
660 	vmxon_region = alloc_page();
661 	memset(vmxon_region, 0, PAGE_SIZE);
662 
663 	fix_cr0_set =  rdmsr(MSR_IA32_VMX_CR0_FIXED0);
664 	fix_cr0_clr =  rdmsr(MSR_IA32_VMX_CR0_FIXED1);
665 	fix_cr4_set =  rdmsr(MSR_IA32_VMX_CR4_FIXED0);
666 	fix_cr4_clr = rdmsr(MSR_IA32_VMX_CR4_FIXED1);
667 	basic.val = rdmsr(MSR_IA32_VMX_BASIC);
668 	ctrl_pin_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PIN
669 			: MSR_IA32_VMX_PINBASED_CTLS);
670 	ctrl_exit_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_EXIT
671 			: MSR_IA32_VMX_EXIT_CTLS);
672 	ctrl_enter_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_ENTRY
673 			: MSR_IA32_VMX_ENTRY_CTLS);
674 	ctrl_cpu_rev[0].val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PROC
675 			: MSR_IA32_VMX_PROCBASED_CTLS);
676 	if ((ctrl_cpu_rev[0].clr & CPU_SECONDARY) != 0)
677 		ctrl_cpu_rev[1].val = rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2);
678 	else
679 		ctrl_cpu_rev[1].val = 0;
680 	if ((ctrl_cpu_rev[1].clr & (CPU_EPT | CPU_VPID)) != 0)
681 		ept_vpid.val = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP);
682 	else
683 		ept_vpid.val = 0;
684 
685 	write_cr0((read_cr0() & fix_cr0_clr) | fix_cr0_set);
686 	write_cr4((read_cr4() & fix_cr4_clr) | fix_cr4_set | X86_CR4_VMXE);
687 
688 	*vmxon_region = basic.revision;
689 
690 	guest_stack = alloc_page();
691 	memset(guest_stack, 0, PAGE_SIZE);
692 	guest_syscall_stack = alloc_page();
693 	memset(guest_syscall_stack, 0, PAGE_SIZE);
694 }
695 
696 static void do_vmxon_off(void *data)
697 {
698 	vmx_on();
699 	vmx_off();
700 }
701 
702 static void do_write_feature_control(void *data)
703 {
704 	wrmsr(MSR_IA32_FEATURE_CONTROL, 0);
705 }
706 
707 static int test_vmx_feature_control(void)
708 {
709 	u64 ia32_feature_control;
710 	bool vmx_enabled;
711 
712 	ia32_feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL);
713 	vmx_enabled = ((ia32_feature_control & 0x5) == 0x5);
714 	if ((ia32_feature_control & 0x5) == 0x5) {
715 		printf("VMX enabled and locked by BIOS\n");
716 		return 0;
717 	} else if (ia32_feature_control & 0x1) {
718 		printf("ERROR: VMX locked out by BIOS!?\n");
719 		return 1;
720 	}
721 
722 	wrmsr(MSR_IA32_FEATURE_CONTROL, 0);
723 	report("test vmxon with FEATURE_CONTROL cleared",
724 	       test_for_exception(GP_VECTOR, &do_vmxon_off, NULL));
725 
726 	wrmsr(MSR_IA32_FEATURE_CONTROL, 0x4);
727 	report("test vmxon without FEATURE_CONTROL lock",
728 	       test_for_exception(GP_VECTOR, &do_vmxon_off, NULL));
729 
730 	wrmsr(MSR_IA32_FEATURE_CONTROL, 0x5);
731 	vmx_enabled = ((rdmsr(MSR_IA32_FEATURE_CONTROL) & 0x5) == 0x5);
732 	report("test enable VMX in FEATURE_CONTROL", vmx_enabled);
733 
734 	report("test FEATURE_CONTROL lock bit",
735 	       test_for_exception(GP_VECTOR, &do_write_feature_control, NULL));
736 
737 	return !vmx_enabled;
738 }
739 
740 static int test_vmxon(void)
741 {
742 	int ret, ret1;
743 	u64 *tmp_region = vmxon_region;
744 	int width = cpuid_maxphyaddr();
745 
746 	/* Unaligned page access */
747 	vmxon_region = (u64 *)((intptr_t)vmxon_region + 1);
748 	ret1 = vmx_on();
749 	report("test vmxon with unaligned vmxon region", ret1);
750 	if (!ret1) {
751 		ret = 1;
752 		goto out;
753 	}
754 
755 	/* gpa bits beyond physical address width are set*/
756 	vmxon_region = (u64 *)((intptr_t)tmp_region | ((u64)1 << (width+1)));
757 	ret1 = vmx_on();
758 	report("test vmxon with bits set beyond physical address width", ret1);
759 	if (!ret1) {
760 		ret = 1;
761 		goto out;
762 	}
763 
764 	/* invalid revision indentifier */
765 	vmxon_region = tmp_region;
766 	*vmxon_region = 0xba9da9;
767 	ret1 = vmx_on();
768 	report("test vmxon with invalid revision identifier", ret1);
769 	if (!ret1) {
770 		ret = 1;
771 		goto out;
772 	}
773 
774 	/* and finally a valid region */
775 	*vmxon_region = basic.revision;
776 	ret = vmx_on();
777 	report("test vmxon with valid vmxon region", !ret);
778 
779 out:
780 	return ret;
781 }
782 
783 static void test_vmptrld(void)
784 {
785 	struct vmcs *vmcs, *tmp_root;
786 	int width = cpuid_maxphyaddr();
787 
788 	vmcs = alloc_page();
789 	vmcs->revision_id = basic.revision;
790 
791 	/* Unaligned page access */
792 	tmp_root = (struct vmcs *)((intptr_t)vmcs + 1);
793 	report("test vmptrld with unaligned vmcs",
794 	       make_vmcs_current(tmp_root) == 1);
795 
796 	/* gpa bits beyond physical address width are set*/
797 	tmp_root = (struct vmcs *)((intptr_t)vmcs |
798 				   ((u64)1 << (width+1)));
799 	report("test vmptrld with vmcs address bits set beyond physical address width",
800 	       make_vmcs_current(tmp_root) == 1);
801 
802 	/* Pass VMXON region */
803 	tmp_root = (struct vmcs *)vmxon_region;
804 	report("test vmptrld with vmxon region",
805 	       make_vmcs_current(tmp_root) == 1);
806 
807 	report("test vmptrld with valid vmcs region", make_vmcs_current(vmcs) == 0);
808 }
809 
810 static void test_vmptrst(void)
811 {
812 	int ret;
813 	struct vmcs *vmcs1, *vmcs2;
814 
815 	vmcs1 = alloc_page();
816 	memset(vmcs1, 0, PAGE_SIZE);
817 	init_vmcs(&vmcs1);
818 	ret = vmcs_save(&vmcs2);
819 	report("test vmptrst", (!ret) && (vmcs1 == vmcs2));
820 }
821 
822 struct vmx_ctl_msr {
823 	const char *name;
824 	u32 index, true_index;
825 	u32 default1;
826 } vmx_ctl_msr[] = {
827 	{ "MSR_IA32_VMX_PINBASED_CTLS", MSR_IA32_VMX_PINBASED_CTLS,
828 	  MSR_IA32_VMX_TRUE_PIN, 0x16 },
829 	{ "MSR_IA32_VMX_PROCBASED_CTLS", MSR_IA32_VMX_PROCBASED_CTLS,
830 	  MSR_IA32_VMX_TRUE_PROC, 0x401e172 },
831 	{ "MSR_IA32_VMX_PROCBASED_CTLS2", MSR_IA32_VMX_PROCBASED_CTLS2,
832 	  MSR_IA32_VMX_PROCBASED_CTLS2, 0 },
833 	{ "MSR_IA32_VMX_EXIT_CTLS", MSR_IA32_VMX_EXIT_CTLS,
834 	  MSR_IA32_VMX_TRUE_EXIT, 0x36dff },
835 	{ "MSR_IA32_VMX_ENTRY_CTLS", MSR_IA32_VMX_ENTRY_CTLS,
836 	  MSR_IA32_VMX_TRUE_ENTRY, 0x11ff },
837 };
838 
839 static void test_vmx_caps(void)
840 {
841 	u64 val, default1, fixed0, fixed1;
842 	union vmx_ctrl_msr ctrl, true_ctrl;
843 	unsigned int n;
844 	bool ok;
845 
846 	printf("\nTest suite: VMX capability reporting\n");
847 
848 	report("MSR_IA32_VMX_BASIC",
849 	       (basic.revision & (1ul << 31)) == 0 &&
850 	       basic.size > 0 && basic.size <= 4096 &&
851 	       (basic.type == 0 || basic.type == 6) &&
852 	       basic.reserved1 == 0 && basic.reserved2 == 0);
853 
854 	val = rdmsr(MSR_IA32_VMX_MISC);
855 	report("MSR_IA32_VMX_MISC",
856 	       (!(ctrl_cpu_rev[1].clr & CPU_URG) || val & (1ul << 5)) &&
857 	       ((val >> 16) & 0x1ff) <= 256 &&
858 	       (val & 0xc0007e00) == 0);
859 
860 	for (n = 0; n < ARRAY_SIZE(vmx_ctl_msr); n++) {
861 		ctrl.val = rdmsr(vmx_ctl_msr[n].index);
862 		default1 = vmx_ctl_msr[n].default1;
863 		ok = (ctrl.set & default1) == default1;
864 		ok = ok && (ctrl.set & ~ctrl.clr) == 0;
865 		if (ok && basic.ctrl) {
866 			true_ctrl.val = rdmsr(vmx_ctl_msr[n].true_index);
867 			ok = ctrl.clr == true_ctrl.clr;
868 			ok = ok && ctrl.set == (true_ctrl.set | default1);
869 		}
870 		report(vmx_ctl_msr[n].name, ok);
871 	}
872 
873 	fixed0 = rdmsr(MSR_IA32_VMX_CR0_FIXED0);
874 	fixed1 = rdmsr(MSR_IA32_VMX_CR0_FIXED1);
875 	report("MSR_IA32_VMX_IA32_VMX_CR0_FIXED0/1",
876 	       ((fixed0 ^ fixed1) & ~fixed1) == 0);
877 
878 	fixed0 = rdmsr(MSR_IA32_VMX_CR4_FIXED0);
879 	fixed1 = rdmsr(MSR_IA32_VMX_CR4_FIXED1);
880 	report("MSR_IA32_VMX_IA32_VMX_CR4_FIXED0/1",
881 	       ((fixed0 ^ fixed1) & ~fixed1) == 0);
882 
883 	val = rdmsr(MSR_IA32_VMX_VMCS_ENUM);
884 	report("MSR_IA32_VMX_VMCS_ENUM",
885 	       (val & 0x3e) >= 0x2a &&
886 	       (val & 0xfffffffffffffc01Ull) == 0);
887 
888 	val = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP);
889 	report("MSR_IA32_VMX_EPT_VPID_CAP",
890 	       (val & 0xfffff07ef9eebebeUll) == 0);
891 }
892 
893 /* This function can only be called in guest */
894 static void __attribute__((__used__)) hypercall(u32 hypercall_no)
895 {
896 	u64 val = 0;
897 	val = (hypercall_no & HYPERCALL_MASK) | HYPERCALL_BIT;
898 	hypercall_field = val;
899 	asm volatile("vmcall\n\t");
900 }
901 
902 static bool is_hypercall()
903 {
904 	ulong reason, hyper_bit;
905 
906 	reason = vmcs_read(EXI_REASON) & 0xff;
907 	hyper_bit = hypercall_field & HYPERCALL_BIT;
908 	if (reason == VMX_VMCALL && hyper_bit)
909 		return true;
910 	return false;
911 }
912 
913 static int handle_hypercall()
914 {
915 	ulong hypercall_no;
916 
917 	hypercall_no = hypercall_field & HYPERCALL_MASK;
918 	hypercall_field = 0;
919 	switch (hypercall_no) {
920 	case HYPERCALL_VMEXIT:
921 		return VMX_TEST_VMEXIT;
922 	default:
923 		printf("ERROR : Invalid hypercall number : %ld\n", hypercall_no);
924 	}
925 	return VMX_TEST_EXIT;
926 }
927 
928 static int exit_handler()
929 {
930 	int ret;
931 
932 	current->exits++;
933 	regs.rflags = vmcs_read(GUEST_RFLAGS);
934 	if (is_hypercall())
935 		ret = handle_hypercall();
936 	else
937 		ret = current->exit_handler();
938 	vmcs_write(GUEST_RFLAGS, regs.rflags);
939 
940 	return ret;
941 }
942 
943 /*
944  * Called if vmlaunch or vmresume fails.
945  *	@early    - failure due to "VMX controls and host-state area" (26.2)
946  *	@vmlaunch - was this a vmlaunch or vmresume
947  *	@rflags   - host rflags
948  */
949 static int
950 entry_failure_handler(struct vmentry_failure *failure)
951 {
952 	if (current->entry_failure_handler)
953 		return current->entry_failure_handler(failure);
954 	else
955 		return VMX_TEST_EXIT;
956 }
957 
958 static int vmx_run()
959 {
960 	unsigned long host_rflags;
961 
962 	while (1) {
963 		u32 ret;
964 		u32 fail = 0;
965 		bool entered;
966 		struct vmentry_failure failure;
967 
968 		asm volatile (
969 			"mov %[HOST_RSP], %%rdi\n\t"
970 			"vmwrite %%rsp, %%rdi\n\t"
971 			LOAD_GPR_C
972 			"cmpl $0, %[launched]\n\t"
973 			"jne 1f\n\t"
974 			"vmlaunch\n\t"
975 			"jmp 2f\n\t"
976 			"1: "
977 			"vmresume\n\t"
978 			"2: "
979 			SAVE_GPR_C
980 			"pushf\n\t"
981 			"pop %%rdi\n\t"
982 			"mov %%rdi, %[host_rflags]\n\t"
983 			"movl $1, %[fail]\n\t"
984 			"jmp 3f\n\t"
985 			"vmx_return:\n\t"
986 			SAVE_GPR_C
987 			"3: \n\t"
988 			: [fail]"+m"(fail), [host_rflags]"=m"(host_rflags)
989 			: [launched]"m"(launched), [HOST_RSP]"i"(HOST_RSP)
990 			: "rdi", "memory", "cc"
991 
992 		);
993 
994 		entered = !fail && !(vmcs_read(EXI_REASON) & VMX_ENTRY_FAILURE);
995 
996 		if (entered) {
997 			/*
998 			 * VMCS isn't in "launched" state if there's been any
999 			 * entry failure (early or otherwise).
1000 			 */
1001 			launched = 1;
1002 			ret = exit_handler();
1003 		} else {
1004 			failure.flags = host_rflags;
1005 			failure.vmlaunch = !launched;
1006 			failure.instr = launched ? "vmresume" : "vmlaunch";
1007 			failure.early = fail;
1008 			ret = entry_failure_handler(&failure);
1009 		}
1010 
1011 		switch (ret) {
1012 		case VMX_TEST_RESUME:
1013 			continue;
1014 		case VMX_TEST_VMEXIT:
1015 			return 0;
1016 		case VMX_TEST_EXIT:
1017 			break;
1018 		default:
1019 			printf("ERROR : Invalid %s_handler return val %d.\n",
1020 			       entered ? "exit" : "entry_failure",
1021 			       ret);
1022 			break;
1023 		}
1024 
1025 		if (entered)
1026 			print_vmexit_info();
1027 		else
1028 			print_vmentry_failure_info(&failure);
1029 		abort();
1030 	}
1031 }
1032 
1033 static int test_run(struct vmx_test *test)
1034 {
1035 	if (test->name == NULL)
1036 		test->name = "(no name)";
1037 	if (vmx_on()) {
1038 		printf("%s : vmxon failed.\n", __func__);
1039 		return 1;
1040 	}
1041 	init_vmcs(&(test->vmcs));
1042 	/* Directly call test->init is ok here, init_vmcs has done
1043 	   vmcs init, vmclear and vmptrld*/
1044 	if (test->init && test->init(test->vmcs) != VMX_TEST_START)
1045 		goto out;
1046 	test->exits = 0;
1047 	current = test;
1048 	regs = test->guest_regs;
1049 	vmcs_write(GUEST_RFLAGS, regs.rflags | 0x2);
1050 	launched = 0;
1051 	printf("\nTest suite: %s\n", test->name);
1052 	vmx_run();
1053 out:
1054 	if (vmx_off()) {
1055 		printf("%s : vmxoff failed.\n", __func__);
1056 		return 1;
1057 	}
1058 	return 0;
1059 }
1060 
1061 extern struct vmx_test vmx_tests[];
1062 
1063 int main(void)
1064 {
1065 	int i = 0;
1066 
1067 	setup_vm();
1068 	setup_idt();
1069 	hypercall_field = 0;
1070 
1071 	if (!(cpuid(1).c & (1 << 5))) {
1072 		printf("WARNING: vmx not supported, add '-cpu host'\n");
1073 		goto exit;
1074 	}
1075 	init_vmx();
1076 	if (test_vmx_feature_control() != 0)
1077 		goto exit;
1078 	/* Set basic test ctxt the same as "null" */
1079 	current = &vmx_tests[0];
1080 	if (test_vmxon() != 0)
1081 		goto exit;
1082 	test_vmptrld();
1083 	test_vmclear();
1084 	test_vmptrst();
1085 	init_vmcs(&vmcs_root);
1086 	if (vmx_run()) {
1087 		report("test vmlaunch", 0);
1088 		goto exit;
1089 	}
1090 	test_vmxoff();
1091 	test_vmx_caps();
1092 
1093 	while (vmx_tests[++i].name != NULL)
1094 		if (test_run(&vmx_tests[i]))
1095 			goto exit;
1096 
1097 exit:
1098 	return report_summary();
1099 }
1100