1 /*
2 * x86/vmx.c : Framework for testing nested virtualization
3 * This is a framework to test nested VMX for KVM, which
4 * started as a project of GSoC 2013. All test cases should
5 * be located in x86/vmx_tests.c and framework related
6 * functions should be in this file.
7 *
8 * How to write test cases?
9 * Add callbacks of test suite in variant "vmx_tests". You can
10 * write:
11 * 1. init function used for initializing test suite
12 * 2. main function for codes running in L2 guest,
13 * 3. exit_handler to handle vmexit of L2 to L1
14 * 4. syscall handler to handle L2 syscall vmexit
15 * 5. vmenter fail handler to handle direct failure of vmenter
16 * 6. guest_regs is loaded when vmenter and saved when
17 * vmexit, you can read and set it in exit_handler
18 * If no special function is needed for a test suite, use
19 * coressponding basic_* functions as callback. More handlers
20 * can be added to "vmx_tests", see details of "struct vmx_test"
21 * and function test_run().
22 *
23 * Currently, vmx test framework only set up one VCPU and one
24 * concurrent guest test environment with same paging for L2 and
25 * L1. For usage of EPT, only 1:1 mapped paging is used from VFN
26 * to PFN.
27 *
28 * Author : Arthur Chunqi Li <yzt356@gmail.com>
29 */
30
31 #include "libcflat.h"
32 #include "processor.h"
33 #include "alloc_page.h"
34 #include "vm.h"
35 #include "vmalloc.h"
36 #include "desc.h"
37 #include "vmx.h"
38 #include "msr.h"
39 #include "smp.h"
40 #include "apic.h"
41
42 u64 *bsp_vmxon_region;
43 struct vmcs *vmcs_root;
44 u32 vpid_cnt;
45 u64 guest_stack_top, guest_syscall_stack_top;
46 u32 ctrl_pin, ctrl_enter, ctrl_exit, ctrl_cpu[2];
47 struct regs regs;
48
49 struct vmx_test *current;
50
51 #define MAX_TEST_TEARDOWN_STEPS 10
52
53 struct test_teardown_step {
54 test_teardown_func func;
55 void *data;
56 };
57
58 static int teardown_count;
59 static struct test_teardown_step teardown_steps[MAX_TEST_TEARDOWN_STEPS];
60
61 static test_guest_func v2_guest_main;
62
63 u64 hypercall_field;
64 bool launched;
65 static int matched;
66 static int guest_finished;
67 static int in_guest;
68
69 union vmx_basic_msr basic_msr;
70 union vmx_ctrl_msr ctrl_pin_rev;
71 union vmx_ctrl_msr ctrl_cpu_rev[2];
72 union vmx_ctrl_msr ctrl_exit_rev;
73 union vmx_ctrl_msr ctrl_enter_rev;
74 union vmx_ept_vpid ept_vpid;
75
76 extern struct descriptor_table_ptr gdt_descr;
77 extern struct descriptor_table_ptr idt_descr;
78 extern void *vmx_return;
79 extern void *entry_sysenter;
80 extern void *guest_entry;
81
82 static volatile u32 stage;
83
84 static jmp_buf abort_target;
85
86 struct vmcs_field {
87 u64 mask;
88 u64 encoding;
89 };
90
91 #define MASK(_bits) GENMASK_ULL((_bits) - 1, 0)
92 #define MASK_NATURAL MASK(sizeof(unsigned long) * 8)
93
94 static struct vmcs_field vmcs_fields[] = {
95 { MASK(16), VPID },
96 { MASK(16), PINV },
97 { MASK(16), EPTP_IDX },
98
99 { MASK(16), GUEST_SEL_ES },
100 { MASK(16), GUEST_SEL_CS },
101 { MASK(16), GUEST_SEL_SS },
102 { MASK(16), GUEST_SEL_DS },
103 { MASK(16), GUEST_SEL_FS },
104 { MASK(16), GUEST_SEL_GS },
105 { MASK(16), GUEST_SEL_LDTR },
106 { MASK(16), GUEST_SEL_TR },
107 { MASK(16), GUEST_INT_STATUS },
108
109 { MASK(16), HOST_SEL_ES },
110 { MASK(16), HOST_SEL_CS },
111 { MASK(16), HOST_SEL_SS },
112 { MASK(16), HOST_SEL_DS },
113 { MASK(16), HOST_SEL_FS },
114 { MASK(16), HOST_SEL_GS },
115 { MASK(16), HOST_SEL_TR },
116
117 { MASK(64), IO_BITMAP_A },
118 { MASK(64), IO_BITMAP_B },
119 { MASK(64), MSR_BITMAP },
120 { MASK(64), EXIT_MSR_ST_ADDR },
121 { MASK(64), EXIT_MSR_LD_ADDR },
122 { MASK(64), ENTER_MSR_LD_ADDR },
123 { MASK(64), VMCS_EXEC_PTR },
124 { MASK(64), TSC_OFFSET },
125 { MASK(64), APIC_VIRT_ADDR },
126 { MASK(64), APIC_ACCS_ADDR },
127 { MASK(64), EPTP },
128
129 { MASK(64), INFO_PHYS_ADDR },
130
131 { MASK(64), VMCS_LINK_PTR },
132 { MASK(64), GUEST_DEBUGCTL },
133 { MASK(64), GUEST_EFER },
134 { MASK(64), GUEST_PAT },
135 { MASK(64), GUEST_PERF_GLOBAL_CTRL },
136 { MASK(64), GUEST_PDPTE },
137
138 { MASK(64), HOST_PAT },
139 { MASK(64), HOST_EFER },
140 { MASK(64), HOST_PERF_GLOBAL_CTRL },
141
142 { MASK(32), PIN_CONTROLS },
143 { MASK(32), CPU_EXEC_CTRL0 },
144 { MASK(32), EXC_BITMAP },
145 { MASK(32), PF_ERROR_MASK },
146 { MASK(32), PF_ERROR_MATCH },
147 { MASK(32), CR3_TARGET_COUNT },
148 { MASK(32), EXI_CONTROLS },
149 { MASK(32), EXI_MSR_ST_CNT },
150 { MASK(32), EXI_MSR_LD_CNT },
151 { MASK(32), ENT_CONTROLS },
152 { MASK(32), ENT_MSR_LD_CNT },
153 { MASK(32), ENT_INTR_INFO },
154 { MASK(32), ENT_INTR_ERROR },
155 { MASK(32), ENT_INST_LEN },
156 { MASK(32), TPR_THRESHOLD },
157 { MASK(32), CPU_EXEC_CTRL1 },
158
159 { MASK(32), VMX_INST_ERROR },
160 { MASK(32), EXI_REASON },
161 { MASK(32), EXI_INTR_INFO },
162 { MASK(32), EXI_INTR_ERROR },
163 { MASK(32), IDT_VECT_INFO },
164 { MASK(32), IDT_VECT_ERROR },
165 { MASK(32), EXI_INST_LEN },
166 { MASK(32), EXI_INST_INFO },
167
168 { MASK(32), GUEST_LIMIT_ES },
169 { MASK(32), GUEST_LIMIT_CS },
170 { MASK(32), GUEST_LIMIT_SS },
171 { MASK(32), GUEST_LIMIT_DS },
172 { MASK(32), GUEST_LIMIT_FS },
173 { MASK(32), GUEST_LIMIT_GS },
174 { MASK(32), GUEST_LIMIT_LDTR },
175 { MASK(32), GUEST_LIMIT_TR },
176 { MASK(32), GUEST_LIMIT_GDTR },
177 { MASK(32), GUEST_LIMIT_IDTR },
178 { 0x1d0ff, GUEST_AR_ES },
179 { 0x1f0ff, GUEST_AR_CS },
180 { 0x1d0ff, GUEST_AR_SS },
181 { 0x1d0ff, GUEST_AR_DS },
182 { 0x1d0ff, GUEST_AR_FS },
183 { 0x1d0ff, GUEST_AR_GS },
184 { 0x1d0ff, GUEST_AR_LDTR },
185 { 0x1d0ff, GUEST_AR_TR },
186 { MASK(32), GUEST_INTR_STATE },
187 { MASK(32), GUEST_ACTV_STATE },
188 { MASK(32), GUEST_SMBASE },
189 { MASK(32), GUEST_SYSENTER_CS },
190 { MASK(32), PREEMPT_TIMER_VALUE },
191
192 { MASK(32), HOST_SYSENTER_CS },
193
194 { MASK_NATURAL, CR0_MASK },
195 { MASK_NATURAL, CR4_MASK },
196 { MASK_NATURAL, CR0_READ_SHADOW },
197 { MASK_NATURAL, CR4_READ_SHADOW },
198 { MASK_NATURAL, CR3_TARGET_0 },
199 { MASK_NATURAL, CR3_TARGET_1 },
200 { MASK_NATURAL, CR3_TARGET_2 },
201 { MASK_NATURAL, CR3_TARGET_3 },
202
203 { MASK_NATURAL, EXI_QUALIFICATION },
204 { MASK_NATURAL, IO_RCX },
205 { MASK_NATURAL, IO_RSI },
206 { MASK_NATURAL, IO_RDI },
207 { MASK_NATURAL, IO_RIP },
208 { MASK_NATURAL, GUEST_LINEAR_ADDRESS },
209
210 { MASK_NATURAL, GUEST_CR0 },
211 { MASK_NATURAL, GUEST_CR3 },
212 { MASK_NATURAL, GUEST_CR4 },
213 { MASK_NATURAL, GUEST_BASE_ES },
214 { MASK_NATURAL, GUEST_BASE_CS },
215 { MASK_NATURAL, GUEST_BASE_SS },
216 { MASK_NATURAL, GUEST_BASE_DS },
217 { MASK_NATURAL, GUEST_BASE_FS },
218 { MASK_NATURAL, GUEST_BASE_GS },
219 { MASK_NATURAL, GUEST_BASE_LDTR },
220 { MASK_NATURAL, GUEST_BASE_TR },
221 { MASK_NATURAL, GUEST_BASE_GDTR },
222 { MASK_NATURAL, GUEST_BASE_IDTR },
223 { MASK_NATURAL, GUEST_DR7 },
224 { MASK_NATURAL, GUEST_RSP },
225 { MASK_NATURAL, GUEST_RIP },
226 { MASK_NATURAL, GUEST_RFLAGS },
227 { MASK_NATURAL, GUEST_PENDING_DEBUG },
228 { MASK_NATURAL, GUEST_SYSENTER_ESP },
229 { MASK_NATURAL, GUEST_SYSENTER_EIP },
230
231 { MASK_NATURAL, HOST_CR0 },
232 { MASK_NATURAL, HOST_CR3 },
233 { MASK_NATURAL, HOST_CR4 },
234 { MASK_NATURAL, HOST_BASE_FS },
235 { MASK_NATURAL, HOST_BASE_GS },
236 { MASK_NATURAL, HOST_BASE_TR },
237 { MASK_NATURAL, HOST_BASE_GDTR },
238 { MASK_NATURAL, HOST_BASE_IDTR },
239 { MASK_NATURAL, HOST_SYSENTER_ESP },
240 { MASK_NATURAL, HOST_SYSENTER_EIP },
241 { MASK_NATURAL, HOST_RSP },
242 { MASK_NATURAL, HOST_RIP },
243 };
244
245 enum vmcs_field_type {
246 VMCS_FIELD_TYPE_CONTROL = 0,
247 VMCS_FIELD_TYPE_READ_ONLY_DATA = 1,
248 VMCS_FIELD_TYPE_GUEST = 2,
249 VMCS_FIELD_TYPE_HOST = 3,
250 VMCS_FIELD_TYPES,
251 };
252
vmcs_field_type(struct vmcs_field * f)253 static inline int vmcs_field_type(struct vmcs_field *f)
254 {
255 return (f->encoding >> VMCS_FIELD_TYPE_SHIFT) & 0x3;
256 }
257
vmcs_field_readonly(struct vmcs_field * f)258 static int vmcs_field_readonly(struct vmcs_field *f)
259 {
260 u64 ia32_vmx_misc;
261
262 ia32_vmx_misc = rdmsr(MSR_IA32_VMX_MISC);
263 return !(ia32_vmx_misc & MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS) &&
264 (vmcs_field_type(f) == VMCS_FIELD_TYPE_READ_ONLY_DATA);
265 }
266
vmcs_field_value(struct vmcs_field * f,u8 cookie)267 static inline u64 vmcs_field_value(struct vmcs_field *f, u8 cookie)
268 {
269 u64 value;
270
271 /* Incorporate the cookie and the field encoding into the value. */
272 value = cookie;
273 value |= (f->encoding << 8);
274 value |= 0xdeadbeefull << 32;
275
276 return value & f->mask;
277 }
278
set_vmcs_field(struct vmcs_field * f,u8 cookie)279 static void set_vmcs_field(struct vmcs_field *f, u8 cookie)
280 {
281 vmcs_write(f->encoding, vmcs_field_value(f, cookie));
282 }
283
check_vmcs_field(struct vmcs_field * f,u8 cookie)284 static bool check_vmcs_field(struct vmcs_field *f, u8 cookie)
285 {
286 u64 expected;
287 u64 actual;
288 int ret;
289
290 if (f->encoding == VMX_INST_ERROR) {
291 printf("Skipping volatile field %lx\n", f->encoding);
292 return true;
293 }
294
295 ret = vmcs_read_safe(f->encoding, &actual);
296 assert(!(ret & X86_EFLAGS_CF));
297 /* Skip VMCS fields that aren't recognized by the CPU */
298 if (ret & X86_EFLAGS_ZF)
299 return true;
300
301 if (vmcs_field_readonly(f)) {
302 printf("Skipping read-only field %lx\n", f->encoding);
303 return true;
304 }
305
306 expected = vmcs_field_value(f, cookie);
307 actual &= f->mask;
308
309 if (expected == actual)
310 return true;
311
312 printf("FAIL: VMWRITE/VMREAD %lx (expected: %lx, actual: %lx)\n",
313 f->encoding, (unsigned long) expected, (unsigned long) actual);
314
315 return false;
316 }
317
set_all_vmcs_fields(u8 cookie)318 static void set_all_vmcs_fields(u8 cookie)
319 {
320 int i;
321
322 for (i = 0; i < ARRAY_SIZE(vmcs_fields); i++)
323 set_vmcs_field(&vmcs_fields[i], cookie);
324 }
325
check_all_vmcs_fields(u8 cookie)326 static bool check_all_vmcs_fields(u8 cookie)
327 {
328 bool pass = true;
329 int i;
330
331 for (i = 0; i < ARRAY_SIZE(vmcs_fields); i++) {
332 if (!check_vmcs_field(&vmcs_fields[i], cookie))
333 pass = false;
334 }
335
336 return pass;
337 }
338
find_vmcs_max_index(void)339 static u32 find_vmcs_max_index(void)
340 {
341 u32 idx, width, type, enc;
342 u64 actual;
343 int ret;
344
345 /* scan backwards and stop when found */
346 for (idx = (1 << 9) - 1; idx >= 0; idx--) {
347
348 /* try all combinations of width and type */
349 for (type = 0; type < (1 << 2); type++) {
350 for (width = 0; width < (1 << 2) ; width++) {
351 enc = (idx << VMCS_FIELD_INDEX_SHIFT) |
352 (type << VMCS_FIELD_TYPE_SHIFT) |
353 (width << VMCS_FIELD_WIDTH_SHIFT);
354
355 ret = vmcs_read_safe(enc, &actual);
356 assert(!(ret & X86_EFLAGS_CF));
357 if (!(ret & X86_EFLAGS_ZF))
358 return idx;
359 }
360 }
361 }
362 /* some VMCS fields should exist */
363 assert(0);
364 return 0;
365 }
366
test_vmwrite_vmread(void)367 static void test_vmwrite_vmread(void)
368 {
369 struct vmcs *vmcs = alloc_page();
370 u32 vmcs_enum_max, max_index = 0;
371
372 vmcs->hdr.revision_id = basic_msr.revision;
373 assert(!vmcs_clear(vmcs));
374 assert(!make_vmcs_current(vmcs));
375
376 set_all_vmcs_fields(0x42);
377 report(check_all_vmcs_fields(0x42), "VMWRITE/VMREAD");
378
379 vmcs_enum_max = (rdmsr(MSR_IA32_VMX_VMCS_ENUM) & VMCS_FIELD_INDEX_MASK)
380 >> VMCS_FIELD_INDEX_SHIFT;
381 max_index = find_vmcs_max_index();
382 report(vmcs_enum_max == max_index,
383 "VMX_VMCS_ENUM.MAX_INDEX expected: %x, actual: %x",
384 max_index, vmcs_enum_max);
385
386 assert(!vmcs_clear(vmcs));
387 free_page(vmcs);
388 }
389
__test_vmread_vmwrite_pf(bool vmread,u64 * val,u8 sentinel)390 static void __test_vmread_vmwrite_pf(bool vmread, u64 *val, u8 sentinel)
391 {
392 unsigned long flags = sentinel;
393 unsigned int vector;
394
395 /*
396 * Execute VMREAD/VMWRITE with a not-PRESENT memory operand, and verify
397 * a #PF occurred and RFLAGS were not modified.
398 */
399 if (vmread)
400 asm volatile ("sahf\n\t"
401 ASM_TRY("1f")
402 "vmread %[enc], %[val]\n\t"
403 "1: lahf"
404 : [val] "=m" (*val),
405 [flags] "+a" (flags)
406 : [enc] "r" ((u64)GUEST_SEL_SS)
407 : "cc");
408 else
409 asm volatile ("sahf\n\t"
410 ASM_TRY("1f")
411 "vmwrite %[val], %[enc]\n\t"
412 "1: lahf"
413 : [val] "=m" (*val),
414 [flags] "+a" (flags)
415 : [enc] "r" ((u64)GUEST_SEL_SS)
416 : "cc");
417
418 vector = exception_vector();
419 report(vector == PF_VECTOR,
420 "Expected #PF on %s, got exception '0x%x'\n",
421 vmread ? "VMREAD" : "VMWRITE", vector);
422
423 report((u8)flags == sentinel,
424 "Expected RFLAGS 0x%x, got 0x%x", sentinel, (u8)flags);
425 }
426
test_vmread_vmwrite_pf(bool vmread)427 static void test_vmread_vmwrite_pf(bool vmread)
428 {
429 struct vmcs *vmcs = alloc_page();
430 void *vpage = alloc_vpage();
431
432 memset(vmcs, 0, PAGE_SIZE);
433 vmcs->hdr.revision_id = basic_msr.revision;
434 assert(!vmcs_clear(vmcs));
435 assert(!make_vmcs_current(vmcs));
436
437 /*
438 * Test with two values to candy-stripe the 5 flags stored/loaded by
439 * SAHF/LAHF.
440 */
441 __test_vmread_vmwrite_pf(vmread, vpage, 0x91);
442 __test_vmread_vmwrite_pf(vmread, vpage, 0x45);
443 }
444
test_vmread_flags_touch(void)445 static void test_vmread_flags_touch(void)
446 {
447 test_vmread_vmwrite_pf(true);
448 }
449
test_vmwrite_flags_touch(void)450 static void test_vmwrite_flags_touch(void)
451 {
452 test_vmread_vmwrite_pf(false);
453 }
454
test_vmcs_high(void)455 static void test_vmcs_high(void)
456 {
457 struct vmcs *vmcs = alloc_page();
458
459 vmcs->hdr.revision_id = basic_msr.revision;
460 assert(!vmcs_clear(vmcs));
461 assert(!make_vmcs_current(vmcs));
462
463 vmcs_write(TSC_OFFSET, 0x0123456789ABCDEFull);
464 report(vmcs_read(TSC_OFFSET) == 0x0123456789ABCDEFull,
465 "VMREAD TSC_OFFSET after VMWRITE TSC_OFFSET");
466 report(vmcs_read(TSC_OFFSET_HI) == 0x01234567ull,
467 "VMREAD TSC_OFFSET_HI after VMWRITE TSC_OFFSET");
468 vmcs_write(TSC_OFFSET_HI, 0x76543210ul);
469 report(vmcs_read(TSC_OFFSET_HI) == 0x76543210ul,
470 "VMREAD TSC_OFFSET_HI after VMWRITE TSC_OFFSET_HI");
471 report(vmcs_read(TSC_OFFSET) == 0x7654321089ABCDEFull,
472 "VMREAD TSC_OFFSET after VMWRITE TSC_OFFSET_HI");
473
474 assert(!vmcs_clear(vmcs));
475 free_page(vmcs);
476 }
477
test_vmcs_lifecycle(void)478 static void test_vmcs_lifecycle(void)
479 {
480 struct vmcs *vmcs[2] = {};
481 int i;
482
483 for (i = 0; i < ARRAY_SIZE(vmcs); i++) {
484 vmcs[i] = alloc_page();
485 vmcs[i]->hdr.revision_id = basic_msr.revision;
486 }
487
488 #define VMPTRLD(_i) do { \
489 assert(_i < ARRAY_SIZE(vmcs)); \
490 assert(!make_vmcs_current(vmcs[_i])); \
491 printf("VMPTRLD VMCS%d\n", (_i)); \
492 } while (0)
493
494 #define VMCLEAR(_i) do { \
495 assert(_i < ARRAY_SIZE(vmcs)); \
496 assert(!vmcs_clear(vmcs[_i])); \
497 printf("VMCLEAR VMCS%d\n", (_i)); \
498 } while (0)
499
500 VMCLEAR(0);
501 VMPTRLD(0);
502 set_all_vmcs_fields(0);
503 report(check_all_vmcs_fields(0), "current:VMCS0 active:[VMCS0]");
504
505 VMCLEAR(0);
506 VMPTRLD(0);
507 report(check_all_vmcs_fields(0), "current:VMCS0 active:[VMCS0]");
508
509 VMCLEAR(1);
510 report(check_all_vmcs_fields(0), "current:VMCS0 active:[VMCS0]");
511
512 VMPTRLD(1);
513 set_all_vmcs_fields(1);
514 report(check_all_vmcs_fields(1), "current:VMCS1 active:[VMCS0,VCMS1]");
515
516 VMPTRLD(0);
517 report(check_all_vmcs_fields(0), "current:VMCS0 active:[VMCS0,VCMS1]");
518 VMPTRLD(1);
519 report(check_all_vmcs_fields(1), "current:VMCS1 active:[VMCS0,VCMS1]");
520 VMPTRLD(1);
521 report(check_all_vmcs_fields(1), "current:VMCS1 active:[VMCS0,VCMS1]");
522
523 VMCLEAR(0);
524 report(check_all_vmcs_fields(1), "current:VMCS1 active:[VCMS1]");
525
526 /* VMPTRLD should not erase VMWRITEs to the current VMCS */
527 set_all_vmcs_fields(2);
528 VMPTRLD(1);
529 report(check_all_vmcs_fields(2), "current:VMCS1 active:[VCMS1]");
530
531 for (i = 0; i < ARRAY_SIZE(vmcs); i++) {
532 VMCLEAR(i);
533 free_page(vmcs[i]);
534 }
535
536 #undef VMPTRLD
537 #undef VMCLEAR
538 }
539
vmx_set_test_stage(u32 s)540 void vmx_set_test_stage(u32 s)
541 {
542 barrier();
543 stage = s;
544 barrier();
545 }
546
vmx_get_test_stage(void)547 u32 vmx_get_test_stage(void)
548 {
549 u32 s;
550
551 barrier();
552 s = stage;
553 barrier();
554 return s;
555 }
556
vmx_inc_test_stage(void)557 void vmx_inc_test_stage(void)
558 {
559 barrier();
560 stage++;
561 barrier();
562 }
563
564 /* entry_sysenter */
565 asm(
566 ".align 4, 0x90\n\t"
567 ".globl entry_sysenter\n\t"
568 "entry_sysenter:\n\t"
569 SAVE_GPR
570 " and $0xf, %rax\n\t"
571 " mov %rax, %rdi\n\t"
572 " call syscall_handler\n\t"
573 LOAD_GPR
574 " vmresume\n\t"
575 );
576
syscall_handler(u64 syscall_no)577 static void __attribute__((__used__)) syscall_handler(u64 syscall_no)
578 {
579 if (current->syscall_handler)
580 current->syscall_handler(syscall_no);
581 }
582
583 static const char * const exit_reason_descriptions[] = {
584 [VMX_EXC_NMI] = "VMX_EXC_NMI",
585 [VMX_EXTINT] = "VMX_EXTINT",
586 [VMX_TRIPLE_FAULT] = "VMX_TRIPLE_FAULT",
587 [VMX_INIT] = "VMX_INIT",
588 [VMX_SIPI] = "VMX_SIPI",
589 [VMX_SMI_IO] = "VMX_SMI_IO",
590 [VMX_SMI_OTHER] = "VMX_SMI_OTHER",
591 [VMX_INTR_WINDOW] = "VMX_INTR_WINDOW",
592 [VMX_NMI_WINDOW] = "VMX_NMI_WINDOW",
593 [VMX_TASK_SWITCH] = "VMX_TASK_SWITCH",
594 [VMX_CPUID] = "VMX_CPUID",
595 [VMX_GETSEC] = "VMX_GETSEC",
596 [VMX_HLT] = "VMX_HLT",
597 [VMX_INVD] = "VMX_INVD",
598 [VMX_INVLPG] = "VMX_INVLPG",
599 [VMX_RDPMC] = "VMX_RDPMC",
600 [VMX_RDTSC] = "VMX_RDTSC",
601 [VMX_RSM] = "VMX_RSM",
602 [VMX_VMCALL] = "VMX_VMCALL",
603 [VMX_VMCLEAR] = "VMX_VMCLEAR",
604 [VMX_VMLAUNCH] = "VMX_VMLAUNCH",
605 [VMX_VMPTRLD] = "VMX_VMPTRLD",
606 [VMX_VMPTRST] = "VMX_VMPTRST",
607 [VMX_VMREAD] = "VMX_VMREAD",
608 [VMX_VMRESUME] = "VMX_VMRESUME",
609 [VMX_VMWRITE] = "VMX_VMWRITE",
610 [VMX_VMXOFF] = "VMX_VMXOFF",
611 [VMX_VMXON] = "VMX_VMXON",
612 [VMX_CR] = "VMX_CR",
613 [VMX_DR] = "VMX_DR",
614 [VMX_IO] = "VMX_IO",
615 [VMX_RDMSR] = "VMX_RDMSR",
616 [VMX_WRMSR] = "VMX_WRMSR",
617 [VMX_FAIL_STATE] = "VMX_FAIL_STATE",
618 [VMX_FAIL_MSR] = "VMX_FAIL_MSR",
619 [VMX_MWAIT] = "VMX_MWAIT",
620 [VMX_MTF] = "VMX_MTF",
621 [VMX_MONITOR] = "VMX_MONITOR",
622 [VMX_PAUSE] = "VMX_PAUSE",
623 [VMX_FAIL_MCHECK] = "VMX_FAIL_MCHECK",
624 [VMX_TPR_THRESHOLD] = "VMX_TPR_THRESHOLD",
625 [VMX_APIC_ACCESS] = "VMX_APIC_ACCESS",
626 [VMX_EOI_INDUCED] = "VMX_EOI_INDUCED",
627 [VMX_GDTR_IDTR] = "VMX_GDTR_IDTR",
628 [VMX_LDTR_TR] = "VMX_LDTR_TR",
629 [VMX_EPT_VIOLATION] = "VMX_EPT_VIOLATION",
630 [VMX_EPT_MISCONFIG] = "VMX_EPT_MISCONFIG",
631 [VMX_INVEPT] = "VMX_INVEPT",
632 [VMX_PREEMPT] = "VMX_PREEMPT",
633 [VMX_INVVPID] = "VMX_INVVPID",
634 [VMX_WBINVD] = "VMX_WBINVD",
635 [VMX_XSETBV] = "VMX_XSETBV",
636 [VMX_APIC_WRITE] = "VMX_APIC_WRITE",
637 [VMX_RDRAND] = "VMX_RDRAND",
638 [VMX_INVPCID] = "VMX_INVPCID",
639 [VMX_VMFUNC] = "VMX_VMFUNC",
640 [VMX_RDSEED] = "VMX_RDSEED",
641 [VMX_PML_FULL] = "VMX_PML_FULL",
642 [VMX_XSAVES] = "VMX_XSAVES",
643 [VMX_XRSTORS] = "VMX_XRSTORS",
644 };
645
exit_reason_description(u64 reason)646 const char *exit_reason_description(u64 reason)
647 {
648 if (reason >= ARRAY_SIZE(exit_reason_descriptions))
649 return "(unknown)";
650 return exit_reason_descriptions[reason] ? : "(unused)";
651 }
652
print_vmexit_info(union exit_reason exit_reason)653 void print_vmexit_info(union exit_reason exit_reason)
654 {
655 u64 guest_rip, guest_rsp;
656 ulong exit_qual = vmcs_read(EXI_QUALIFICATION);
657 guest_rip = vmcs_read(GUEST_RIP);
658 guest_rsp = vmcs_read(GUEST_RSP);
659 printf("VMEXIT info:\n");
660 printf("\tvmexit reason = %u\n", exit_reason.basic);
661 printf("\tfailed vmentry = %u\n", !!exit_reason.failed_vmentry);
662 printf("\texit qualification = %#lx\n", exit_qual);
663 printf("\tguest_rip = %#lx\n", guest_rip);
664 printf("\tRAX=%#lx RBX=%#lx RCX=%#lx RDX=%#lx\n",
665 regs.rax, regs.rbx, regs.rcx, regs.rdx);
666 printf("\tRSP=%#lx RBP=%#lx RSI=%#lx RDI=%#lx\n",
667 guest_rsp, regs.rbp, regs.rsi, regs.rdi);
668 printf("\tR8 =%#lx R9 =%#lx R10=%#lx R11=%#lx\n",
669 regs.r8, regs.r9, regs.r10, regs.r11);
670 printf("\tR12=%#lx R13=%#lx R14=%#lx R15=%#lx\n",
671 regs.r12, regs.r13, regs.r14, regs.r15);
672 }
673
print_vmentry_failure_info(struct vmentry_result * result)674 void print_vmentry_failure_info(struct vmentry_result *result)
675 {
676 if (result->entered)
677 return;
678
679 if (result->vm_fail) {
680 printf("VM-Fail on %s: ", result->instr);
681 switch (result->flags & VMX_ENTRY_FLAGS) {
682 case X86_EFLAGS_CF:
683 printf("current-VMCS pointer is not valid.\n");
684 break;
685 case X86_EFLAGS_ZF:
686 printf("error number is %ld. See Intel 30.4.\n",
687 vmcs_read(VMX_INST_ERROR));
688 break;
689 default:
690 printf("unexpected flags %lx!\n", result->flags);
691 }
692 } else {
693 u64 qual = vmcs_read(EXI_QUALIFICATION);
694
695 printf("VM-Exit failure on %s (reason=%#x, qual=%#lx): ",
696 result->instr, result->exit_reason.full, qual);
697
698 switch (result->exit_reason.basic) {
699 case VMX_FAIL_STATE:
700 printf("invalid guest state\n");
701 break;
702 case VMX_FAIL_MSR:
703 printf("MSR loading\n");
704 break;
705 case VMX_FAIL_MCHECK:
706 printf("machine-check event\n");
707 break;
708 default:
709 printf("unexpected basic exit reason %u\n",
710 result->exit_reason.basic);
711 }
712
713 if (!result->exit_reason.failed_vmentry)
714 printf("\tVMX_ENTRY_FAILURE BIT NOT SET!\n");
715
716 if (result->exit_reason.full & 0x7fff0000)
717 printf("\tRESERVED BITS SET!\n");
718 }
719 }
720
721 /*
722 * VMCLEAR should ensures all VMCS state is flushed to the VMCS
723 * region in memory.
724 */
test_vmclear_flushing(void)725 static void test_vmclear_flushing(void)
726 {
727 struct vmcs *vmcs[3] = {};
728 int i;
729
730 for (i = 0; i < ARRAY_SIZE(vmcs); i++) {
731 vmcs[i] = alloc_page();
732 }
733
734 vmcs[0]->hdr.revision_id = basic_msr.revision;
735 assert(!vmcs_clear(vmcs[0]));
736 assert(!make_vmcs_current(vmcs[0]));
737 set_all_vmcs_fields(0x86);
738
739 assert(!vmcs_clear(vmcs[0]));
740 memcpy(vmcs[1], vmcs[0], basic_msr.size);
741 assert(!make_vmcs_current(vmcs[1]));
742 report(check_all_vmcs_fields(0x86),
743 "test vmclear flush (current VMCS)");
744
745 set_all_vmcs_fields(0x87);
746 assert(!make_vmcs_current(vmcs[0]));
747 assert(!vmcs_clear(vmcs[1]));
748 memcpy(vmcs[2], vmcs[1], basic_msr.size);
749 assert(!make_vmcs_current(vmcs[2]));
750 report(check_all_vmcs_fields(0x87),
751 "test vmclear flush (!current VMCS)");
752
753 for (i = 0; i < ARRAY_SIZE(vmcs); i++) {
754 assert(!vmcs_clear(vmcs[i]));
755 free_page(vmcs[i]);
756 }
757 }
758
test_vmclear(void)759 static void test_vmclear(void)
760 {
761 struct vmcs *tmp_root;
762 int width = cpuid_maxphyaddr();
763
764 /*
765 * Note- The tests below do not necessarily have a
766 * valid VMCS, but that's ok since the invalid vmcs
767 * is only used for a specific test and is discarded
768 * without touching its contents
769 */
770
771 /* Unaligned page access */
772 tmp_root = (struct vmcs *)((intptr_t)vmcs_root + 1);
773 report(vmcs_clear(tmp_root) == 1, "test vmclear with unaligned vmcs");
774
775 /* gpa bits beyond physical address width are set*/
776 tmp_root = (struct vmcs *)((intptr_t)vmcs_root |
777 ((u64)1 << (width+1)));
778 report(vmcs_clear(tmp_root) == 1,
779 "test vmclear with vmcs address bits set beyond physical address width");
780
781 /* Pass VMXON region */
782 tmp_root = (struct vmcs *)bsp_vmxon_region;
783 report(vmcs_clear(tmp_root) == 1, "test vmclear with vmxon region");
784
785 /* Valid VMCS */
786 report(vmcs_clear(vmcs_root) == 0,
787 "test vmclear with valid vmcs region");
788
789 test_vmclear_flushing();
790 }
791
guest_main(void)792 static void __attribute__((__used__)) guest_main(void)
793 {
794 if (current->v2)
795 v2_guest_main();
796 else
797 current->guest_main();
798 }
799
800 /* guest_entry */
801 asm(
802 ".align 4, 0x90\n\t"
803 ".globl entry_guest\n\t"
804 "guest_entry:\n\t"
805 " call guest_main\n\t"
806 " mov $1, %edi\n\t"
807 " call hypercall\n\t"
808 );
809
810 /* EPT paging structure related functions */
811 /* split_large_ept_entry: Split a 2M/1G large page into 512 smaller PTEs.
812 @ptep : large page table entry to split
813 @level : level of ptep (2 or 3)
814 */
split_large_ept_entry(unsigned long * ptep,int level)815 static void split_large_ept_entry(unsigned long *ptep, int level)
816 {
817 unsigned long *new_pt;
818 unsigned long gpa;
819 unsigned long pte;
820 unsigned long prototype;
821 int i;
822
823 pte = *ptep;
824 assert(pte & EPT_PRESENT);
825 assert(pte & EPT_LARGE_PAGE);
826 assert(level == 2 || level == 3);
827
828 new_pt = alloc_page();
829 assert(new_pt);
830
831 prototype = pte & ~EPT_ADDR_MASK;
832 if (level == 2)
833 prototype &= ~EPT_LARGE_PAGE;
834
835 gpa = pte & EPT_ADDR_MASK;
836 for (i = 0; i < EPT_PGDIR_ENTRIES; i++) {
837 new_pt[i] = prototype | gpa;
838 gpa += 1ul << EPT_LEVEL_SHIFT(level - 1);
839 }
840
841 pte &= ~EPT_LARGE_PAGE;
842 pte &= ~EPT_ADDR_MASK;
843 pte |= virt_to_phys(new_pt);
844
845 *ptep = pte;
846 }
847
848 /* install_ept_entry : Install a page to a given level in EPT
849 @pml4 : addr of pml4 table
850 @pte_level : level of PTE to set
851 @guest_addr : physical address of guest
852 @pte : pte value to set
853 @pt_page : address of page table, NULL for a new page
854 */
install_ept_entry(unsigned long * pml4,int pte_level,unsigned long guest_addr,unsigned long pte,unsigned long * pt_page)855 void install_ept_entry(unsigned long *pml4,
856 int pte_level,
857 unsigned long guest_addr,
858 unsigned long pte,
859 unsigned long *pt_page)
860 {
861 int level;
862 unsigned long *pt = pml4;
863 unsigned offset;
864
865 /* EPT only uses 48 bits of GPA. */
866 assert(guest_addr < (1ul << 48));
867
868 for (level = EPT_PAGE_LEVEL; level > pte_level; --level) {
869 offset = (guest_addr >> EPT_LEVEL_SHIFT(level))
870 & EPT_PGDIR_MASK;
871 if (!(pt[offset] & (EPT_PRESENT))) {
872 unsigned long *new_pt = pt_page;
873 if (!new_pt)
874 new_pt = alloc_page();
875 else
876 pt_page = 0;
877 memset(new_pt, 0, PAGE_SIZE);
878 pt[offset] = virt_to_phys(new_pt)
879 | EPT_RA | EPT_WA | EPT_EA;
880 } else if (pt[offset] & EPT_LARGE_PAGE)
881 split_large_ept_entry(&pt[offset], level);
882 pt = phys_to_virt(pt[offset] & EPT_ADDR_MASK);
883 }
884 offset = (guest_addr >> EPT_LEVEL_SHIFT(level)) & EPT_PGDIR_MASK;
885 pt[offset] = pte;
886 }
887
888 /* Map a page, @perm is the permission of the page */
install_ept(unsigned long * pml4,unsigned long phys,unsigned long guest_addr,u64 perm)889 void install_ept(unsigned long *pml4,
890 unsigned long phys,
891 unsigned long guest_addr,
892 u64 perm)
893 {
894 install_ept_entry(pml4, 1, guest_addr, (phys & PAGE_MASK) | perm, 0);
895 }
896
897 /* Map a 1G-size page */
install_1g_ept(unsigned long * pml4,unsigned long phys,unsigned long guest_addr,u64 perm)898 void install_1g_ept(unsigned long *pml4,
899 unsigned long phys,
900 unsigned long guest_addr,
901 u64 perm)
902 {
903 install_ept_entry(pml4, 3, guest_addr,
904 (phys & PAGE_MASK) | perm | EPT_LARGE_PAGE, 0);
905 }
906
907 /* Map a 2M-size page */
install_2m_ept(unsigned long * pml4,unsigned long phys,unsigned long guest_addr,u64 perm)908 void install_2m_ept(unsigned long *pml4,
909 unsigned long phys,
910 unsigned long guest_addr,
911 u64 perm)
912 {
913 install_ept_entry(pml4, 2, guest_addr,
914 (phys & PAGE_MASK) | perm | EPT_LARGE_PAGE, 0);
915 }
916
917 /* setup_ept_range : Setup a range of 1:1 mapped page to EPT paging structure.
918 @start : start address of guest page
919 @len : length of address to be mapped
920 @map_1g : whether 1G page map is used
921 @map_2m : whether 2M page map is used
922 @perm : permission for every page
923 */
setup_ept_range(unsigned long * pml4,unsigned long start,unsigned long len,int map_1g,int map_2m,u64 perm)924 void setup_ept_range(unsigned long *pml4, unsigned long start,
925 unsigned long len, int map_1g, int map_2m, u64 perm)
926 {
927 u64 phys = start;
928 u64 max = (u64)len + (u64)start;
929
930 if (map_1g) {
931 while (phys + PAGE_SIZE_1G <= max) {
932 install_1g_ept(pml4, phys, phys, perm);
933 phys += PAGE_SIZE_1G;
934 }
935 }
936 if (map_2m) {
937 while (phys + PAGE_SIZE_2M <= max) {
938 install_2m_ept(pml4, phys, phys, perm);
939 phys += PAGE_SIZE_2M;
940 }
941 }
942 while (phys + PAGE_SIZE <= max) {
943 install_ept(pml4, phys, phys, perm);
944 phys += PAGE_SIZE;
945 }
946 }
947
948 /* get_ept_pte : Get the PTE of a given level in EPT,
949 @level == 1 means get the latest level*/
get_ept_pte(unsigned long * pml4,unsigned long guest_addr,int level,unsigned long * pte)950 bool get_ept_pte(unsigned long *pml4, unsigned long guest_addr, int level,
951 unsigned long *pte)
952 {
953 int l;
954 unsigned long *pt = pml4, iter_pte;
955 unsigned offset;
956
957 assert(level >= 1 && level <= 4);
958
959 for (l = EPT_PAGE_LEVEL; ; --l) {
960 offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK;
961 iter_pte = pt[offset];
962 if (l == level)
963 break;
964 if (l < 4 && (iter_pte & EPT_LARGE_PAGE))
965 return false;
966 if (!(iter_pte & (EPT_PRESENT)))
967 return false;
968 pt = (unsigned long *)(iter_pte & EPT_ADDR_MASK);
969 }
970 offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK;
971 if (pte)
972 *pte = pt[offset];
973 return true;
974 }
975
clear_ept_ad_pte(unsigned long * pml4,unsigned long guest_addr)976 static void clear_ept_ad_pte(unsigned long *pml4, unsigned long guest_addr)
977 {
978 int l;
979 unsigned long *pt = pml4;
980 u64 pte;
981 unsigned offset;
982
983 for (l = EPT_PAGE_LEVEL; ; --l) {
984 offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK;
985 pt[offset] &= ~(EPT_ACCESS_FLAG|EPT_DIRTY_FLAG);
986 pte = pt[offset];
987 if (l == 1 || (l < 4 && (pte & EPT_LARGE_PAGE)))
988 break;
989 pt = (unsigned long *)(pte & EPT_ADDR_MASK);
990 }
991 }
992
993 /* clear_ept_ad : Clear EPT A/D bits for the page table walk and the
994 final GPA of a guest address. */
clear_ept_ad(unsigned long * pml4,u64 guest_cr3,unsigned long guest_addr)995 void clear_ept_ad(unsigned long *pml4, u64 guest_cr3,
996 unsigned long guest_addr)
997 {
998 int l;
999 unsigned long *pt = (unsigned long *)guest_cr3, gpa;
1000 u64 pte, offset_in_page;
1001 unsigned offset;
1002
1003 for (l = EPT_PAGE_LEVEL; ; --l) {
1004 offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK;
1005
1006 clear_ept_ad_pte(pml4, (u64) &pt[offset]);
1007 pte = pt[offset];
1008 if (l == 1 || (l < 4 && (pte & PT_PAGE_SIZE_MASK)))
1009 break;
1010 if (!(pte & PT_PRESENT_MASK))
1011 return;
1012 pt = (unsigned long *)(pte & PT_ADDR_MASK);
1013 }
1014
1015 offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK;
1016 offset_in_page = guest_addr & ((1 << EPT_LEVEL_SHIFT(l)) - 1);
1017 gpa = (pt[offset] & PT_ADDR_MASK) | (guest_addr & offset_in_page);
1018 clear_ept_ad_pte(pml4, gpa);
1019 }
1020
1021 /* check_ept_ad : Check the content of EPT A/D bits for the page table
1022 walk and the final GPA of a guest address. */
check_ept_ad(unsigned long * pml4,u64 guest_cr3,unsigned long guest_addr,int expected_gpa_ad,int expected_pt_ad)1023 void check_ept_ad(unsigned long *pml4, u64 guest_cr3,
1024 unsigned long guest_addr, int expected_gpa_ad,
1025 int expected_pt_ad)
1026 {
1027 int l;
1028 unsigned long *pt = (unsigned long *)guest_cr3, gpa;
1029 u64 ept_pte, pte, offset_in_page;
1030 unsigned offset;
1031 bool bad_pt_ad = false;
1032
1033 for (l = EPT_PAGE_LEVEL; ; --l) {
1034 offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK;
1035
1036 if (!get_ept_pte(pml4, (u64) &pt[offset], 1, &ept_pte)) {
1037 printf("EPT - guest level %d page table is not mapped.\n", l);
1038 return;
1039 }
1040
1041 if (!bad_pt_ad) {
1042 bad_pt_ad |= (ept_pte & (EPT_ACCESS_FLAG|EPT_DIRTY_FLAG)) != expected_pt_ad;
1043 if (bad_pt_ad)
1044 report_fail("EPT - guest level %d page table A=%d/D=%d",
1045 l,
1046 !!(expected_pt_ad & EPT_ACCESS_FLAG),
1047 !!(expected_pt_ad & EPT_DIRTY_FLAG));
1048 }
1049
1050 pte = pt[offset];
1051 if (l == 1 || (l < 4 && (pte & PT_PAGE_SIZE_MASK)))
1052 break;
1053 if (!(pte & PT_PRESENT_MASK))
1054 return;
1055 pt = (unsigned long *)(pte & PT_ADDR_MASK);
1056 }
1057
1058 if (!bad_pt_ad)
1059 report_pass("EPT - guest page table structures A=%d/D=%d",
1060 !!(expected_pt_ad & EPT_ACCESS_FLAG),
1061 !!(expected_pt_ad & EPT_DIRTY_FLAG));
1062
1063 offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK;
1064 offset_in_page = guest_addr & ((1 << EPT_LEVEL_SHIFT(l)) - 1);
1065 gpa = (pt[offset] & PT_ADDR_MASK) | (guest_addr & offset_in_page);
1066
1067 if (!get_ept_pte(pml4, gpa, 1, &ept_pte)) {
1068 report_fail("EPT - guest physical address is not mapped");
1069 return;
1070 }
1071 report((ept_pte & (EPT_ACCESS_FLAG | EPT_DIRTY_FLAG)) == expected_gpa_ad,
1072 "EPT - guest physical address A=%d/D=%d",
1073 !!(expected_gpa_ad & EPT_ACCESS_FLAG),
1074 !!(expected_gpa_ad & EPT_DIRTY_FLAG));
1075 }
1076
set_ept_pte(unsigned long * pml4,unsigned long guest_addr,int level,u64 pte_val)1077 void set_ept_pte(unsigned long *pml4, unsigned long guest_addr,
1078 int level, u64 pte_val)
1079 {
1080 int l;
1081 unsigned long *pt = pml4;
1082 unsigned offset;
1083
1084 assert(level >= 1 && level <= 4);
1085
1086 for (l = EPT_PAGE_LEVEL; ; --l) {
1087 offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK;
1088 if (l == level)
1089 break;
1090 assert(pt[offset] & EPT_PRESENT);
1091 pt = (unsigned long *)(pt[offset] & EPT_ADDR_MASK);
1092 }
1093 offset = (guest_addr >> EPT_LEVEL_SHIFT(l)) & EPT_PGDIR_MASK;
1094 pt[offset] = pte_val;
1095 }
1096
init_vmcs_ctrl(void)1097 static void init_vmcs_ctrl(void)
1098 {
1099 /* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */
1100 /* 26.2.1.1 */
1101 vmcs_write(PIN_CONTROLS, ctrl_pin);
1102 /* Disable VMEXIT of IO instruction */
1103 vmcs_write(CPU_EXEC_CTRL0, ctrl_cpu[0]);
1104 if (ctrl_cpu_rev[0].set & CPU_SECONDARY) {
1105 ctrl_cpu[1] = (ctrl_cpu[1] | ctrl_cpu_rev[1].set) &
1106 ctrl_cpu_rev[1].clr;
1107 vmcs_write(CPU_EXEC_CTRL1, ctrl_cpu[1]);
1108 }
1109 vmcs_write(CR3_TARGET_COUNT, 0);
1110 vmcs_write(VPID, ++vpid_cnt);
1111 }
1112
init_vmcs_host(void)1113 static void init_vmcs_host(void)
1114 {
1115 /* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */
1116 /* 26.2.1.2 */
1117 vmcs_write(HOST_EFER, rdmsr(MSR_EFER));
1118
1119 /* 26.2.1.3 */
1120 vmcs_write(ENT_CONTROLS, ctrl_enter);
1121 vmcs_write(EXI_CONTROLS, ctrl_exit);
1122
1123 /* 26.2.2 */
1124 vmcs_write(HOST_CR0, read_cr0());
1125 vmcs_write(HOST_CR3, read_cr3());
1126 vmcs_write(HOST_CR4, read_cr4());
1127 vmcs_write(HOST_SYSENTER_EIP, (u64)(&entry_sysenter));
1128 vmcs_write(HOST_SYSENTER_CS, KERNEL_CS);
1129 if (ctrl_exit_rev.clr & EXI_LOAD_PAT)
1130 vmcs_write(HOST_PAT, rdmsr(MSR_IA32_CR_PAT));
1131
1132 /* 26.2.3 */
1133 vmcs_write(HOST_SEL_CS, KERNEL_CS);
1134 vmcs_write(HOST_SEL_SS, KERNEL_DS);
1135 vmcs_write(HOST_SEL_DS, KERNEL_DS);
1136 vmcs_write(HOST_SEL_ES, KERNEL_DS);
1137 vmcs_write(HOST_SEL_FS, KERNEL_DS);
1138 vmcs_write(HOST_SEL_GS, KERNEL_DS);
1139 vmcs_write(HOST_SEL_TR, TSS_MAIN);
1140 vmcs_write(HOST_BASE_TR, get_gdt_entry_base(get_tss_descr()));
1141 vmcs_write(HOST_BASE_GDTR, gdt_descr.base);
1142 vmcs_write(HOST_BASE_IDTR, idt_descr.base);
1143 vmcs_write(HOST_BASE_FS, 0);
1144 vmcs_write(HOST_BASE_GS, rdmsr(MSR_GS_BASE));
1145
1146 /* Set other vmcs area */
1147 vmcs_write(PF_ERROR_MASK, 0);
1148 vmcs_write(PF_ERROR_MATCH, 0);
1149 vmcs_write(VMCS_LINK_PTR, ~0ul);
1150 vmcs_write(VMCS_LINK_PTR_HI, ~0ul);
1151 vmcs_write(HOST_RIP, (u64)(&vmx_return));
1152 }
1153
init_vmcs_guest(void)1154 static void init_vmcs_guest(void)
1155 {
1156 gdt_entry_t *tss_descr = get_tss_descr();
1157
1158 /* 26.3 CHECKING AND LOADING GUEST STATE */
1159 ulong guest_cr0, guest_cr4, guest_cr3;
1160 /* 26.3.1.1 */
1161 guest_cr0 = read_cr0();
1162 guest_cr4 = read_cr4();
1163 guest_cr3 = read_cr3();
1164 if (ctrl_enter & ENT_GUEST_64) {
1165 guest_cr0 |= X86_CR0_PG;
1166 guest_cr4 |= X86_CR4_PAE;
1167 }
1168 if ((ctrl_enter & ENT_GUEST_64) == 0)
1169 guest_cr4 &= (~X86_CR4_PCIDE);
1170 if (guest_cr0 & X86_CR0_PG)
1171 guest_cr0 |= X86_CR0_PE;
1172 vmcs_write(GUEST_CR0, guest_cr0);
1173 vmcs_write(GUEST_CR3, guest_cr3);
1174 vmcs_write(GUEST_CR4, guest_cr4);
1175 vmcs_write(GUEST_SYSENTER_CS, KERNEL_CS);
1176 vmcs_write(GUEST_SYSENTER_ESP, guest_syscall_stack_top);
1177 vmcs_write(GUEST_SYSENTER_EIP, (u64)(&entry_sysenter));
1178 vmcs_write(GUEST_DR7, 0);
1179 vmcs_write(GUEST_EFER, rdmsr(MSR_EFER));
1180
1181 /* 26.3.1.2 */
1182 vmcs_write(GUEST_SEL_CS, KERNEL_CS);
1183 vmcs_write(GUEST_SEL_SS, KERNEL_DS);
1184 vmcs_write(GUEST_SEL_DS, KERNEL_DS);
1185 vmcs_write(GUEST_SEL_ES, KERNEL_DS);
1186 vmcs_write(GUEST_SEL_FS, KERNEL_DS);
1187 vmcs_write(GUEST_SEL_GS, KERNEL_DS);
1188 vmcs_write(GUEST_SEL_TR, TSS_MAIN);
1189 vmcs_write(GUEST_SEL_LDTR, 0);
1190
1191 vmcs_write(GUEST_BASE_CS, 0);
1192 vmcs_write(GUEST_BASE_ES, 0);
1193 vmcs_write(GUEST_BASE_SS, 0);
1194 vmcs_write(GUEST_BASE_DS, 0);
1195 vmcs_write(GUEST_BASE_FS, 0);
1196 vmcs_write(GUEST_BASE_GS, rdmsr(MSR_GS_BASE));
1197 vmcs_write(GUEST_BASE_TR, get_gdt_entry_base(tss_descr));
1198 vmcs_write(GUEST_BASE_LDTR, 0);
1199
1200 vmcs_write(GUEST_LIMIT_CS, 0xFFFFFFFF);
1201 vmcs_write(GUEST_LIMIT_DS, 0xFFFFFFFF);
1202 vmcs_write(GUEST_LIMIT_ES, 0xFFFFFFFF);
1203 vmcs_write(GUEST_LIMIT_SS, 0xFFFFFFFF);
1204 vmcs_write(GUEST_LIMIT_FS, 0xFFFFFFFF);
1205 vmcs_write(GUEST_LIMIT_GS, 0xFFFFFFFF);
1206 vmcs_write(GUEST_LIMIT_LDTR, 0xffff);
1207 vmcs_write(GUEST_LIMIT_TR, get_gdt_entry_limit(tss_descr));
1208
1209 vmcs_write(GUEST_AR_CS, 0xa09b);
1210 vmcs_write(GUEST_AR_DS, 0xc093);
1211 vmcs_write(GUEST_AR_ES, 0xc093);
1212 vmcs_write(GUEST_AR_FS, 0xc093);
1213 vmcs_write(GUEST_AR_GS, 0xc093);
1214 vmcs_write(GUEST_AR_SS, 0xc093);
1215 vmcs_write(GUEST_AR_LDTR, 0x82);
1216 vmcs_write(GUEST_AR_TR, 0x8b);
1217
1218 /* 26.3.1.3 */
1219 vmcs_write(GUEST_BASE_GDTR, gdt_descr.base);
1220 vmcs_write(GUEST_BASE_IDTR, idt_descr.base);
1221 vmcs_write(GUEST_LIMIT_GDTR, gdt_descr.limit);
1222 vmcs_write(GUEST_LIMIT_IDTR, idt_descr.limit);
1223
1224 /* 26.3.1.4 */
1225 vmcs_write(GUEST_RIP, (u64)(&guest_entry));
1226 vmcs_write(GUEST_RSP, guest_stack_top);
1227 vmcs_write(GUEST_RFLAGS, X86_EFLAGS_FIXED);
1228
1229 /* 26.3.1.5 */
1230 vmcs_write(GUEST_ACTV_STATE, ACTV_ACTIVE);
1231 vmcs_write(GUEST_INTR_STATE, 0);
1232 }
1233
init_vmcs(struct vmcs ** vmcs)1234 int init_vmcs(struct vmcs **vmcs)
1235 {
1236 *vmcs = alloc_page();
1237 (*vmcs)->hdr.revision_id = basic_msr.revision;
1238 /* vmclear first to init vmcs */
1239 if (vmcs_clear(*vmcs)) {
1240 printf("%s : vmcs_clear error\n", __func__);
1241 return 1;
1242 }
1243
1244 if (make_vmcs_current(*vmcs)) {
1245 printf("%s : make_vmcs_current error\n", __func__);
1246 return 1;
1247 }
1248
1249 /* All settings to pin/exit/enter/cpu
1250 control fields should be placed here */
1251 ctrl_pin |= PIN_EXTINT | PIN_NMI | PIN_VIRT_NMI;
1252 ctrl_exit = EXI_LOAD_EFER | EXI_HOST_64 | EXI_LOAD_PAT;
1253 ctrl_enter = (ENT_LOAD_EFER | ENT_GUEST_64);
1254 /* DIsable IO instruction VMEXIT now */
1255 ctrl_cpu[0] &= (~(CPU_IO | CPU_IO_BITMAP));
1256 ctrl_cpu[1] = 0;
1257
1258 ctrl_pin = (ctrl_pin | ctrl_pin_rev.set) & ctrl_pin_rev.clr;
1259 ctrl_enter = (ctrl_enter | ctrl_enter_rev.set) & ctrl_enter_rev.clr;
1260 ctrl_exit = (ctrl_exit | ctrl_exit_rev.set) & ctrl_exit_rev.clr;
1261 ctrl_cpu[0] = (ctrl_cpu[0] | ctrl_cpu_rev[0].set) & ctrl_cpu_rev[0].clr;
1262
1263 init_vmcs_ctrl();
1264 init_vmcs_host();
1265 init_vmcs_guest();
1266 return 0;
1267 }
1268
enable_vmx(void)1269 void enable_vmx(void)
1270 {
1271 bool vmx_enabled =
1272 rdmsr(MSR_IA32_FEATURE_CONTROL) &
1273 FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
1274
1275 if (!vmx_enabled) {
1276 wrmsr(MSR_IA32_FEATURE_CONTROL,
1277 FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX |
1278 FEATURE_CONTROL_LOCKED);
1279 }
1280 }
1281
init_vmx_caps(void)1282 static void init_vmx_caps(void)
1283 {
1284 basic_msr.val = rdmsr(MSR_IA32_VMX_BASIC);
1285 ctrl_pin_rev.val = rdmsr(basic_msr.ctrl ? MSR_IA32_VMX_TRUE_PIN
1286 : MSR_IA32_VMX_PINBASED_CTLS);
1287 ctrl_exit_rev.val = rdmsr(basic_msr.ctrl ? MSR_IA32_VMX_TRUE_EXIT
1288 : MSR_IA32_VMX_EXIT_CTLS);
1289 ctrl_enter_rev.val = rdmsr(basic_msr.ctrl ? MSR_IA32_VMX_TRUE_ENTRY
1290 : MSR_IA32_VMX_ENTRY_CTLS);
1291 ctrl_cpu_rev[0].val = rdmsr(basic_msr.ctrl ? MSR_IA32_VMX_TRUE_PROC
1292 : MSR_IA32_VMX_PROCBASED_CTLS);
1293 if ((ctrl_cpu_rev[0].clr & CPU_SECONDARY) != 0)
1294 ctrl_cpu_rev[1].val = rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2);
1295 else
1296 ctrl_cpu_rev[1].val = 0;
1297 if ((ctrl_cpu_rev[1].clr & (CPU_EPT | CPU_VPID)) != 0)
1298 ept_vpid.val = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP);
1299 else
1300 ept_vpid.val = 0;
1301 }
1302
init_vmx(u64 * vmxon_region)1303 void init_vmx(u64 *vmxon_region)
1304 {
1305 ulong fix_cr0_set, fix_cr0_clr;
1306 ulong fix_cr4_set, fix_cr4_clr;
1307
1308 fix_cr0_set = rdmsr(MSR_IA32_VMX_CR0_FIXED0);
1309 fix_cr0_clr = rdmsr(MSR_IA32_VMX_CR0_FIXED1);
1310 fix_cr4_set = rdmsr(MSR_IA32_VMX_CR4_FIXED0);
1311 fix_cr4_clr = rdmsr(MSR_IA32_VMX_CR4_FIXED1);
1312
1313 write_cr0((read_cr0() & fix_cr0_clr) | fix_cr0_set);
1314 write_cr4((read_cr4() & fix_cr4_clr) | fix_cr4_set | X86_CR4_VMXE);
1315
1316 *vmxon_region = basic_msr.revision;
1317 }
1318
alloc_bsp_vmx_pages(void)1319 static void alloc_bsp_vmx_pages(void)
1320 {
1321 bsp_vmxon_region = alloc_page();
1322 guest_stack_top = (uintptr_t)alloc_page() + PAGE_SIZE;
1323 guest_syscall_stack_top = (uintptr_t)alloc_page() + PAGE_SIZE;
1324 vmcs_root = alloc_page();
1325 }
1326
init_bsp_vmx(void)1327 static void init_bsp_vmx(void)
1328 {
1329 init_vmx_caps();
1330 alloc_bsp_vmx_pages();
1331 init_vmx(bsp_vmxon_region);
1332 }
1333
do_vmxon_off(void * data)1334 static void do_vmxon_off(void *data)
1335 {
1336 TEST_ASSERT(!vmx_on());
1337 TEST_ASSERT(!vmx_off());
1338 }
1339
do_write_feature_control(void * data)1340 static void do_write_feature_control(void *data)
1341 {
1342 wrmsr(MSR_IA32_FEATURE_CONTROL, 0);
1343 }
1344
test_vmx_feature_control(void)1345 static int test_vmx_feature_control(void)
1346 {
1347 u64 ia32_feature_control;
1348 bool vmx_enabled;
1349 bool feature_control_locked;
1350
1351 ia32_feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL);
1352 vmx_enabled =
1353 ia32_feature_control & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
1354 feature_control_locked =
1355 ia32_feature_control & FEATURE_CONTROL_LOCKED;
1356
1357 if (vmx_enabled && feature_control_locked) {
1358 printf("VMX enabled and locked by BIOS\n");
1359 return 0;
1360 } else if (feature_control_locked) {
1361 printf("ERROR: VMX locked out by BIOS!?\n");
1362 return 1;
1363 }
1364
1365 wrmsr(MSR_IA32_FEATURE_CONTROL, 0);
1366 report(test_for_exception(GP_VECTOR, &do_vmxon_off, NULL),
1367 "test vmxon with FEATURE_CONTROL cleared");
1368
1369 wrmsr(MSR_IA32_FEATURE_CONTROL, FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX);
1370 report(test_for_exception(GP_VECTOR, &do_vmxon_off, NULL),
1371 "test vmxon without FEATURE_CONTROL lock");
1372
1373 wrmsr(MSR_IA32_FEATURE_CONTROL,
1374 FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX |
1375 FEATURE_CONTROL_LOCKED);
1376
1377 ia32_feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL);
1378 vmx_enabled =
1379 ia32_feature_control & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
1380 report(vmx_enabled, "test enable VMX in FEATURE_CONTROL");
1381
1382 report(test_for_exception(GP_VECTOR, &do_write_feature_control, NULL),
1383 "test FEATURE_CONTROL lock bit");
1384
1385 return !vmx_enabled;
1386 }
1387
1388
write_cr(int cr_number,unsigned long val)1389 static void write_cr(int cr_number, unsigned long val)
1390 {
1391 if (!cr_number)
1392 write_cr0(val);
1393 else
1394 write_cr4(val);
1395 }
1396
write_cr_safe(int cr_number,unsigned long val)1397 static int write_cr_safe(int cr_number, unsigned long val)
1398 {
1399 if (!cr_number)
1400 return write_cr0_safe(val);
1401 else
1402 return write_cr4_safe(val);
1403 }
1404
test_vmxon_bad_cr(int cr_number,unsigned long orig_cr,unsigned long * flexible_bits)1405 static int test_vmxon_bad_cr(int cr_number, unsigned long orig_cr,
1406 unsigned long *flexible_bits)
1407 {
1408 unsigned long required1, disallowed1, val, bit;
1409 int ret, i, expected;
1410
1411 if (!cr_number) {
1412 required1 = rdmsr(MSR_IA32_VMX_CR0_FIXED0);
1413 disallowed1 = ~rdmsr(MSR_IA32_VMX_CR0_FIXED1);
1414 } else {
1415 required1 = rdmsr(MSR_IA32_VMX_CR4_FIXED0);
1416 disallowed1 = ~rdmsr(MSR_IA32_VMX_CR4_FIXED1);
1417 }
1418
1419 *flexible_bits = 0;
1420
1421 for (i = 0; i < BITS_PER_LONG; i++) {
1422 bit = BIT(i);
1423
1424 /*
1425 * Don't touch bits that will affect the current paging mode,
1426 * toggling them will send the test into the weeds before it
1427 * gets to VMXON. nVMX tests are 64-bit only, so CR4.PAE is
1428 * guaranteed to be '1', i.e. PSE is fair game. PKU/PKS are
1429 * also fair game as KVM doesn't configure any keys. SMAP and
1430 * SMEP are off limits because the page tables have the USER
1431 * bit set at all levels.
1432 */
1433 if ((cr_number == 0 && (bit == X86_CR0_PE || bit == X86_CR0_PG)) ||
1434 (cr_number == 4 && (bit == X86_CR4_PAE || bit == X86_CR4_SMAP ||
1435 bit == X86_CR4_SMEP || bit == X86_CR4_CET)))
1436 continue;
1437
1438 if (!(bit & required1) && !(bit & disallowed1)) {
1439 if (!write_cr_safe(cr_number, orig_cr ^ bit)) {
1440 *flexible_bits |= bit;
1441 write_cr(cr_number, orig_cr);
1442 }
1443 continue;
1444 }
1445
1446 assert(!(required1 & disallowed1));
1447
1448 if (required1 & bit)
1449 val = orig_cr & ~bit;
1450 else
1451 val = orig_cr | bit;
1452
1453 if (write_cr_safe(cr_number, val))
1454 continue;
1455
1456 /*
1457 * CR0.PE==0 and CR4.VMXE==0 result in #UD, all other invalid
1458 * CR0/CR4 bits result in #GP. Include CR0.PE even though it's
1459 * dead code (see above) for completeness.
1460 */
1461 if ((cr_number == 0 && bit == X86_CR0_PE) ||
1462 (cr_number == 4 && bit == X86_CR4_VMXE))
1463 expected = UD_VECTOR;
1464 else
1465 expected = GP_VECTOR;
1466
1467 ret = vmx_on();
1468 report(ret == expected,
1469 "VMXON with CR%d bit %d %s should %s, got '%d'",
1470 cr_number, i, (required1 & bit) ? "cleared" : "set",
1471 expected == UD_VECTOR ? "UD" : "#GP", ret);
1472
1473 write_cr(cr_number, orig_cr);
1474
1475 if (ret <= 0)
1476 return 1;
1477 }
1478 return 0;
1479 }
1480
test_vmxon(void)1481 static int test_vmxon(void)
1482 {
1483 unsigned long orig_cr0, flexible_cr0, orig_cr4, flexible_cr4;
1484 int width = cpuid_maxphyaddr();
1485 u64 *vmxon_region;
1486 int ret;
1487
1488 orig_cr0 = read_cr0();
1489 if (test_vmxon_bad_cr(0, orig_cr0, &flexible_cr0))
1490 return 1;
1491
1492 orig_cr4 = read_cr4();
1493 if (test_vmxon_bad_cr(4, orig_cr4, &flexible_cr4))
1494 return 1;
1495
1496 /* Unaligned page access */
1497 vmxon_region = (u64 *)((intptr_t)bsp_vmxon_region + 1);
1498 ret = __vmxon_safe(vmxon_region);
1499 report(ret < 0, "test vmxon with unaligned vmxon region");
1500 if (ret >= 0)
1501 return 1;
1502
1503 /* gpa bits beyond physical address width are set*/
1504 vmxon_region = (u64 *)((intptr_t)bsp_vmxon_region | ((u64)1 << (width+1)));
1505 ret = __vmxon_safe(vmxon_region);
1506 report(ret < 0, "test vmxon with bits set beyond physical address width");
1507 if (ret >= 0)
1508 return 1;
1509
1510 /* invalid revision identifier */
1511 *bsp_vmxon_region = 0xba9da9;
1512 ret = vmxon_safe();
1513 report(ret < 0, "test vmxon with invalid revision identifier");
1514 if (ret >= 0)
1515 return 1;
1516
1517 /* and finally a valid region, with valid-but-tweaked cr0/cr4 */
1518 write_cr0(orig_cr0 ^ flexible_cr0);
1519 write_cr4(orig_cr4 ^ flexible_cr4);
1520 *bsp_vmxon_region = basic_msr.revision;
1521 ret = vmxon_safe();
1522 report(!ret, "test vmxon with valid vmxon region");
1523 write_cr0(orig_cr0);
1524 write_cr4(orig_cr4);
1525 return ret;
1526 }
1527
test_vmptrld(void)1528 static void test_vmptrld(void)
1529 {
1530 struct vmcs *vmcs, *tmp_root;
1531 int width = cpuid_maxphyaddr();
1532
1533 vmcs = alloc_page();
1534 vmcs->hdr.revision_id = basic_msr.revision;
1535
1536 /* Unaligned page access */
1537 tmp_root = (struct vmcs *)((intptr_t)vmcs + 1);
1538 report(make_vmcs_current(tmp_root) == 1,
1539 "test vmptrld with unaligned vmcs");
1540
1541 /* gpa bits beyond physical address width are set*/
1542 tmp_root = (struct vmcs *)((intptr_t)vmcs |
1543 ((u64)1 << (width+1)));
1544 report(make_vmcs_current(tmp_root) == 1,
1545 "test vmptrld with vmcs address bits set beyond physical address width");
1546
1547 /* Pass VMXON region */
1548 assert(!vmcs_clear(vmcs));
1549 assert(!make_vmcs_current(vmcs));
1550 tmp_root = (struct vmcs *)bsp_vmxon_region;
1551 report(make_vmcs_current(tmp_root) == 1,
1552 "test vmptrld with vmxon region");
1553 report(vmcs_read(VMX_INST_ERROR) == VMXERR_VMPTRLD_VMXON_POINTER,
1554 "test vmptrld with vmxon region vm-instruction error");
1555
1556 report(make_vmcs_current(vmcs) == 0,
1557 "test vmptrld with valid vmcs region");
1558 }
1559
test_vmptrst(void)1560 static void test_vmptrst(void)
1561 {
1562 int ret;
1563 struct vmcs *vmcs1, *vmcs2;
1564
1565 vmcs1 = alloc_page();
1566 init_vmcs(&vmcs1);
1567 ret = vmcs_save(&vmcs2);
1568 report((!ret) && (vmcs1 == vmcs2), "test vmptrst");
1569 }
1570
1571 struct vmx_ctl_msr {
1572 const char *name;
1573 u32 index, true_index;
1574 u32 default1;
1575 } vmx_ctl_msr[] = {
1576 { "MSR_IA32_VMX_PINBASED_CTLS", MSR_IA32_VMX_PINBASED_CTLS,
1577 MSR_IA32_VMX_TRUE_PIN, 0x16 },
1578 { "MSR_IA32_VMX_PROCBASED_CTLS", MSR_IA32_VMX_PROCBASED_CTLS,
1579 MSR_IA32_VMX_TRUE_PROC, 0x401e172 },
1580 { "MSR_IA32_VMX_PROCBASED_CTLS2", MSR_IA32_VMX_PROCBASED_CTLS2,
1581 MSR_IA32_VMX_PROCBASED_CTLS2, 0 },
1582 { "MSR_IA32_VMX_EXIT_CTLS", MSR_IA32_VMX_EXIT_CTLS,
1583 MSR_IA32_VMX_TRUE_EXIT, 0x36dff },
1584 { "MSR_IA32_VMX_ENTRY_CTLS", MSR_IA32_VMX_ENTRY_CTLS,
1585 MSR_IA32_VMX_TRUE_ENTRY, 0x11ff },
1586 };
1587
test_vmx_caps(void)1588 static void test_vmx_caps(void)
1589 {
1590 u64 val, default1, fixed0, fixed1;
1591 union vmx_ctrl_msr ctrl, true_ctrl;
1592 unsigned int n;
1593 bool ok;
1594
1595 printf("\nTest suite: VMX capability reporting\n");
1596
1597 report((basic_msr.revision & (1ul << 31)) == 0 &&
1598 basic_msr.size > 0 && basic_msr.size <= 4096 &&
1599 (basic_msr.type == 0 || basic_msr.type == 6) &&
1600 basic_msr.reserved1 == 0 && basic_msr.reserved2 == 0,
1601 "MSR_IA32_VMX_BASIC");
1602
1603 val = rdmsr(MSR_IA32_VMX_MISC);
1604 report((!(ctrl_cpu_rev[1].clr & CPU_URG) || val & (1ul << 5)) &&
1605 ((val >> 16) & 0x1ff) <= 256 &&
1606 (val & 0x80007e00) == 0,
1607 "MSR_IA32_VMX_MISC");
1608
1609 for (n = 0; n < ARRAY_SIZE(vmx_ctl_msr); n++) {
1610 ctrl.val = rdmsr(vmx_ctl_msr[n].index);
1611 default1 = vmx_ctl_msr[n].default1;
1612 ok = (ctrl.set & default1) == default1;
1613 ok = ok && (ctrl.set & ~ctrl.clr) == 0;
1614 if (ok && basic_msr.ctrl) {
1615 true_ctrl.val = rdmsr(vmx_ctl_msr[n].true_index);
1616 ok = ctrl.clr == true_ctrl.clr;
1617 ok = ok && ctrl.set == (true_ctrl.set | default1);
1618 }
1619 report(ok, "%s", vmx_ctl_msr[n].name);
1620 }
1621
1622 fixed0 = rdmsr(MSR_IA32_VMX_CR0_FIXED0);
1623 fixed1 = rdmsr(MSR_IA32_VMX_CR0_FIXED1);
1624 report(((fixed0 ^ fixed1) & ~fixed1) == 0,
1625 "MSR_IA32_VMX_IA32_VMX_CR0_FIXED0/1");
1626
1627 fixed0 = rdmsr(MSR_IA32_VMX_CR4_FIXED0);
1628 fixed1 = rdmsr(MSR_IA32_VMX_CR4_FIXED1);
1629 report(((fixed0 ^ fixed1) & ~fixed1) == 0,
1630 "MSR_IA32_VMX_IA32_VMX_CR4_FIXED0/1");
1631
1632 val = rdmsr(MSR_IA32_VMX_VMCS_ENUM);
1633 report((val & VMCS_FIELD_INDEX_MASK) >= 0x2a &&
1634 (val & 0xfffffffffffffc01Ull) == 0,
1635 "MSR_IA32_VMX_VMCS_ENUM");
1636
1637 fixed0 = -1ull;
1638 fixed0 &= ~(EPT_CAP_EXEC_ONLY |
1639 EPT_CAP_PWL4 |
1640 EPT_CAP_PWL5 |
1641 EPT_CAP_UC |
1642 EPT_CAP_WB |
1643 EPT_CAP_2M_PAGE |
1644 EPT_CAP_1G_PAGE |
1645 EPT_CAP_INVEPT |
1646 EPT_CAP_AD_FLAG |
1647 EPT_CAP_ADV_EPT_INFO |
1648 EPT_CAP_INVEPT_SINGLE |
1649 EPT_CAP_INVEPT_ALL |
1650 VPID_CAP_INVVPID |
1651 VPID_CAP_INVVPID_ADDR |
1652 VPID_CAP_INVVPID_CXTGLB |
1653 VPID_CAP_INVVPID_ALL |
1654 VPID_CAP_INVVPID_CXTLOC);
1655
1656 val = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP);
1657 report((val & fixed0) == 0,
1658 "MSR_IA32_VMX_EPT_VPID_CAP");
1659 }
1660
1661 /* This function can only be called in guest */
hypercall(u32 hypercall_no)1662 void __attribute__((__used__)) hypercall(u32 hypercall_no)
1663 {
1664 u64 val = 0;
1665 val = (hypercall_no & HYPERCALL_MASK) | HYPERCALL_BIT;
1666 hypercall_field = val;
1667 asm volatile("vmcall\n\t");
1668 }
1669
is_hypercall(union exit_reason exit_reason)1670 static bool is_hypercall(union exit_reason exit_reason)
1671 {
1672 return exit_reason.basic == VMX_VMCALL &&
1673 (hypercall_field & HYPERCALL_BIT);
1674 }
1675
handle_hypercall(void)1676 static int handle_hypercall(void)
1677 {
1678 ulong hypercall_no;
1679
1680 hypercall_no = hypercall_field & HYPERCALL_MASK;
1681 hypercall_field = 0;
1682 switch (hypercall_no) {
1683 case HYPERCALL_VMEXIT:
1684 return VMX_TEST_VMEXIT;
1685 case HYPERCALL_VMABORT:
1686 return VMX_TEST_VMABORT;
1687 case HYPERCALL_VMSKIP:
1688 return VMX_TEST_VMSKIP;
1689 default:
1690 printf("ERROR : Invalid hypercall number : %ld\n", hypercall_no);
1691 }
1692 return VMX_TEST_EXIT;
1693 }
1694
continue_abort(void)1695 static void continue_abort(void)
1696 {
1697 assert(!in_guest);
1698 printf("Host was here when guest aborted:\n");
1699 dump_stack();
1700 longjmp(abort_target, 1);
1701 abort();
1702 }
1703
__abort_test(void)1704 void __abort_test(void)
1705 {
1706 if (in_guest)
1707 hypercall(HYPERCALL_VMABORT);
1708 else
1709 longjmp(abort_target, 1);
1710 abort();
1711 }
1712
continue_skip(void)1713 static void continue_skip(void)
1714 {
1715 assert(!in_guest);
1716 longjmp(abort_target, 1);
1717 abort();
1718 }
1719
test_skip(const char * msg)1720 void test_skip(const char *msg)
1721 {
1722 printf("%s skipping test: %s\n", in_guest ? "Guest" : "Host", msg);
1723 if (in_guest)
1724 hypercall(HYPERCALL_VMABORT);
1725 else
1726 longjmp(abort_target, 1);
1727 abort();
1728 }
1729
exit_handler(union exit_reason exit_reason)1730 static int exit_handler(union exit_reason exit_reason)
1731 {
1732 int ret;
1733
1734 current->exits++;
1735 regs.rflags = vmcs_read(GUEST_RFLAGS);
1736 if (is_hypercall(exit_reason))
1737 ret = handle_hypercall();
1738 else
1739 ret = current->exit_handler(exit_reason);
1740 vmcs_write(GUEST_RFLAGS, regs.rflags);
1741
1742 return ret;
1743 }
1744
1745 /*
1746 * Tries to enter the guest, populates @result with VM-Fail, VM-Exit, entered,
1747 * etc...
1748 */
vmx_enter_guest(struct vmentry_result * result)1749 static noinline void vmx_enter_guest(struct vmentry_result *result)
1750 {
1751 memset(result, 0, sizeof(*result));
1752
1753 in_guest = 1;
1754 asm volatile (
1755 "mov %[HOST_RSP], %%rdi\n\t"
1756 "vmwrite %%rsp, %%rdi\n\t"
1757 LOAD_GPR_C
1758 "cmpb $0, %[launched]\n\t"
1759 "jne 1f\n\t"
1760 "vmlaunch\n\t"
1761 "jmp 2f\n\t"
1762 "1: "
1763 "vmresume\n\t"
1764 "2: "
1765 SAVE_GPR_C
1766 "pushf\n\t"
1767 "pop %%rdi\n\t"
1768 "mov %%rdi, %[vm_fail_flags]\n\t"
1769 "movl $1, %[vm_fail]\n\t"
1770 "jmp 3f\n\t"
1771 "vmx_return:\n\t"
1772 SAVE_GPR_C
1773 "3: \n\t"
1774 : [vm_fail]"+m"(result->vm_fail),
1775 [vm_fail_flags]"=m"(result->flags)
1776 : [launched]"m"(launched), [HOST_RSP]"i"(HOST_RSP)
1777 : "rdi", "memory", "cc"
1778 );
1779 in_guest = 0;
1780
1781 result->vmlaunch = !launched;
1782 result->instr = launched ? "vmresume" : "vmlaunch";
1783 result->exit_reason.full = result->vm_fail ? 0xdead :
1784 vmcs_read(EXI_REASON);
1785 result->entered = !result->vm_fail &&
1786 !result->exit_reason.failed_vmentry;
1787 }
1788
vmx_run(void)1789 static int vmx_run(void)
1790 {
1791 struct vmentry_result result;
1792 u32 ret;
1793
1794 while (1) {
1795 vmx_enter_guest(&result);
1796 if (result.entered) {
1797 /*
1798 * VMCS isn't in "launched" state if there's been any
1799 * entry failure (early or otherwise).
1800 */
1801 launched = 1;
1802 ret = exit_handler(result.exit_reason);
1803 } else if (current->entry_failure_handler) {
1804 ret = current->entry_failure_handler(&result);
1805 } else {
1806 ret = VMX_TEST_EXIT;
1807 }
1808
1809 switch (ret) {
1810 case VMX_TEST_RESUME:
1811 continue;
1812 case VMX_TEST_VMEXIT:
1813 guest_finished = 1;
1814 return 0;
1815 case VMX_TEST_EXIT:
1816 break;
1817 default:
1818 printf("ERROR : Invalid %s_handler return val %d.\n",
1819 result.entered ? "exit" : "entry_failure",
1820 ret);
1821 break;
1822 }
1823
1824 if (result.entered)
1825 print_vmexit_info(result.exit_reason);
1826 else
1827 print_vmentry_failure_info(&result);
1828 abort();
1829 }
1830 }
1831
run_teardown_step(struct test_teardown_step * step)1832 static void run_teardown_step(struct test_teardown_step *step)
1833 {
1834 step->func(step->data);
1835 }
1836
test_run(struct vmx_test * test)1837 static int test_run(struct vmx_test *test)
1838 {
1839 int r;
1840
1841 /* Validate V2 interface. */
1842 if (test->v2) {
1843 int ret = 0;
1844 if (test->init || test->guest_main || test->exit_handler ||
1845 test->syscall_handler) {
1846 report_fail("V2 test cannot specify V1 callbacks.");
1847 ret = 1;
1848 }
1849 if (ret)
1850 return ret;
1851 }
1852
1853 if (test->name == NULL)
1854 test->name = "(no name)";
1855 if (vmx_on()) {
1856 printf("%s : vmxon failed.\n", __func__);
1857 return 1;
1858 }
1859
1860 init_vmcs(&(test->vmcs));
1861 /* Directly call test->init is ok here, init_vmcs has done
1862 vmcs init, vmclear and vmptrld*/
1863 if (test->init && test->init(test->vmcs) != VMX_TEST_START)
1864 goto out;
1865 teardown_count = 0;
1866 v2_guest_main = NULL;
1867 test->exits = 0;
1868 current = test;
1869 regs = test->guest_regs;
1870 vmcs_write(GUEST_RFLAGS, regs.rflags | X86_EFLAGS_FIXED);
1871 launched = 0;
1872 guest_finished = 0;
1873 printf("\nTest suite: %s\n", test->name);
1874
1875 r = setjmp(abort_target);
1876 if (r) {
1877 assert(!in_guest);
1878 goto out;
1879 }
1880
1881
1882 if (test->v2)
1883 test->v2();
1884 else
1885 vmx_run();
1886
1887 while (teardown_count > 0)
1888 run_teardown_step(&teardown_steps[--teardown_count]);
1889
1890 if (launched && !guest_finished)
1891 report_fail("Guest didn't run to completion.");
1892
1893 out:
1894 if (vmx_off()) {
1895 printf("%s : vmxoff failed.\n", __func__);
1896 return 1;
1897 }
1898 return 0;
1899 }
1900
1901 /*
1902 * Add a teardown step. Executed after the test's main function returns.
1903 * Teardown steps executed in reverse order.
1904 */
test_add_teardown(test_teardown_func func,void * data)1905 void test_add_teardown(test_teardown_func func, void *data)
1906 {
1907 struct test_teardown_step *step;
1908
1909 TEST_ASSERT_MSG(teardown_count < MAX_TEST_TEARDOWN_STEPS,
1910 "There are already %d teardown steps.",
1911 teardown_count);
1912 step = &teardown_steps[teardown_count++];
1913 step->func = func;
1914 step->data = data;
1915 }
1916
__test_set_guest(test_guest_func func)1917 static void __test_set_guest(test_guest_func func)
1918 {
1919 assert(current->v2);
1920 v2_guest_main = func;
1921 }
1922
1923 /*
1924 * Set the target of the first enter_guest call. Can only be called once per
1925 * test. Must be called before first enter_guest call.
1926 */
test_set_guest(test_guest_func func)1927 void test_set_guest(test_guest_func func)
1928 {
1929 TEST_ASSERT_MSG(!v2_guest_main, "Already set guest func.");
1930 __test_set_guest(func);
1931 }
1932
1933 /*
1934 * Set the target of the enter_guest call and reset the RIP so 'func' will
1935 * start from the beginning. This can be called multiple times per test.
1936 */
test_override_guest(test_guest_func func)1937 void test_override_guest(test_guest_func func)
1938 {
1939 __test_set_guest(func);
1940 init_vmcs_guest();
1941 }
1942
test_set_guest_finished(void)1943 void test_set_guest_finished(void)
1944 {
1945 guest_finished = 1;
1946 }
1947
check_for_guest_termination(union exit_reason exit_reason)1948 static void check_for_guest_termination(union exit_reason exit_reason)
1949 {
1950 if (is_hypercall(exit_reason)) {
1951 int ret;
1952
1953 ret = handle_hypercall();
1954 switch (ret) {
1955 case VMX_TEST_VMEXIT:
1956 guest_finished = 1;
1957 break;
1958 case VMX_TEST_VMABORT:
1959 continue_abort();
1960 break;
1961 case VMX_TEST_VMSKIP:
1962 continue_skip();
1963 break;
1964 default:
1965 printf("ERROR : Invalid handle_hypercall return %d.\n",
1966 ret);
1967 abort();
1968 }
1969 }
1970 }
1971
1972 /*
1973 * Enters the guest (or launches it for the first time). Error to call once the
1974 * guest has returned (i.e., run past the end of its guest() function).
1975 */
__enter_guest(u8 abort_flag,struct vmentry_result * result)1976 void __enter_guest(u8 abort_flag, struct vmentry_result *result)
1977 {
1978 TEST_ASSERT_MSG(v2_guest_main,
1979 "Never called test_set_guest_func!");
1980
1981 TEST_ASSERT_MSG(!guest_finished,
1982 "Called enter_guest() after guest returned.");
1983
1984 vmx_enter_guest(result);
1985
1986 if (result->vm_fail) {
1987 if (abort_flag & ABORT_ON_EARLY_VMENTRY_FAIL)
1988 goto do_abort;
1989 return;
1990 }
1991 if (result->exit_reason.failed_vmentry) {
1992 if ((abort_flag & ABORT_ON_INVALID_GUEST_STATE) ||
1993 result->exit_reason.basic != VMX_FAIL_STATE)
1994 goto do_abort;
1995 return;
1996 }
1997
1998 launched = 1;
1999 check_for_guest_termination(result->exit_reason);
2000 return;
2001
2002 do_abort:
2003 print_vmentry_failure_info(result);
2004 abort();
2005 }
2006
enter_guest_with_bad_controls(void)2007 void enter_guest_with_bad_controls(void)
2008 {
2009 struct vmentry_result result;
2010
2011 TEST_ASSERT_MSG(v2_guest_main,
2012 "Never called test_set_guest_func!");
2013
2014 TEST_ASSERT_MSG(!guest_finished,
2015 "Called enter_guest() after guest returned.");
2016
2017 __enter_guest(ABORT_ON_INVALID_GUEST_STATE, &result);
2018 report(result.vm_fail, "VM-Fail occurred as expected");
2019 report((result.flags & VMX_ENTRY_FLAGS) == X86_EFLAGS_ZF,
2020 "FLAGS set correctly on VM-Fail");
2021 report(vmcs_read(VMX_INST_ERROR) == VMXERR_ENTRY_INVALID_CONTROL_FIELD,
2022 "VM-Inst Error # is %d (VM entry with invalid control field(s))",
2023 VMXERR_ENTRY_INVALID_CONTROL_FIELD);
2024 }
2025
enter_guest(void)2026 void enter_guest(void)
2027 {
2028 struct vmentry_result result;
2029
2030 __enter_guest(ABORT_ON_EARLY_VMENTRY_FAIL |
2031 ABORT_ON_INVALID_GUEST_STATE, &result);
2032 }
2033
2034 extern struct vmx_test vmx_tests[];
2035
2036 static bool
test_wanted(const char * name,const char * filters[],int filter_count)2037 test_wanted(const char *name, const char *filters[], int filter_count)
2038 {
2039 int i;
2040 bool positive = false;
2041 bool match = false;
2042 char clean_name[strlen(name) + 1];
2043 char *c;
2044 const char *n;
2045
2046 printf("filter = %s, test = %s\n", filters[0], name);
2047
2048 /* Replace spaces with underscores. */
2049 n = name;
2050 c = &clean_name[0];
2051 do *c++ = (*n == ' ') ? '_' : *n;
2052 while (*n++);
2053
2054 for (i = 0; i < filter_count; i++) {
2055 const char *filter = filters[i];
2056
2057 if (filter[0] == '-') {
2058 if (simple_glob(clean_name, filter + 1))
2059 return false;
2060 } else {
2061 positive = true;
2062 match |= simple_glob(clean_name, filter);
2063 }
2064 }
2065
2066 if (!positive || match) {
2067 matched++;
2068 return true;
2069 } else {
2070 return false;
2071 }
2072 }
2073
main(int argc,const char * argv[])2074 int main(int argc, const char *argv[])
2075 {
2076 int i = 0;
2077
2078 setup_vm();
2079 hypercall_field = 0;
2080
2081 /* We want xAPIC mode to test MMIO passthrough from L1 (us) to L2. */
2082 smp_reset_apic();
2083
2084 argv++;
2085 argc--;
2086
2087 if (!this_cpu_has(X86_FEATURE_VMX)) {
2088 printf("WARNING: vmx not supported, add '-cpu host'\n");
2089 goto exit;
2090 }
2091 init_bsp_vmx();
2092 if (test_wanted("test_vmx_feature_control", argv, argc)) {
2093 /* Sets MSR_IA32_FEATURE_CONTROL to 0x5 */
2094 if (test_vmx_feature_control() != 0)
2095 goto exit;
2096 } else {
2097 enable_vmx();
2098 }
2099
2100 if (test_wanted("test_vmxon", argv, argc)) {
2101 /* Enables VMX */
2102 if (test_vmxon() != 0)
2103 goto exit;
2104 } else {
2105 if (vmx_on()) {
2106 report_fail("vmxon");
2107 goto exit;
2108 }
2109 }
2110
2111 if (test_wanted("test_vmptrld", argv, argc))
2112 test_vmptrld();
2113 if (test_wanted("test_vmclear", argv, argc))
2114 test_vmclear();
2115 if (test_wanted("test_vmptrst", argv, argc))
2116 test_vmptrst();
2117 if (test_wanted("test_vmwrite_vmread", argv, argc))
2118 test_vmwrite_vmread();
2119 if (test_wanted("test_vmcs_high", argv, argc))
2120 test_vmcs_high();
2121 if (test_wanted("test_vmcs_lifecycle", argv, argc))
2122 test_vmcs_lifecycle();
2123 if (test_wanted("test_vmx_caps", argv, argc))
2124 test_vmx_caps();
2125 if (test_wanted("test_vmread_flags_touch", argv, argc))
2126 test_vmread_flags_touch();
2127 if (test_wanted("test_vmwrite_flags_touch", argv, argc))
2128 test_vmwrite_flags_touch();
2129
2130 /* Balance vmxon from test_vmxon. */
2131 vmx_off();
2132
2133 for (; vmx_tests[i].name != NULL; i++) {
2134 if (!test_wanted(vmx_tests[i].name, argv, argc))
2135 continue;
2136 if (test_run(&vmx_tests[i]))
2137 goto exit;
2138 }
2139
2140 if (!matched)
2141 report(matched, "command line didn't match any tests!");
2142
2143 exit:
2144 return report_summary();
2145 }
2146