xref: /kvm-unit-tests/x86/svm_tests.c (revision f1dcfd54130ca2b1851d46dffd7ffadbe5eb4a3b)
1 #include "svm.h"
2 #include "libcflat.h"
3 #include "processor.h"
4 #include "desc.h"
5 #include "msr.h"
6 #include "vm.h"
7 #include "smp.h"
8 #include "types.h"
9 #include "alloc_page.h"
10 #include "isr.h"
11 #include "apic.h"
12 #include "delay.h"
13 
14 #define SVM_EXIT_MAX_DR_INTERCEPT 0x3f
15 
16 static void *scratch_page;
17 
18 #define LATENCY_RUNS 1000000
19 
20 extern u16 cpu_online_count;
21 
22 u64 tsc_start;
23 u64 tsc_end;
24 
25 u64 vmrun_sum, vmexit_sum;
26 u64 vmsave_sum, vmload_sum;
27 u64 stgi_sum, clgi_sum;
28 u64 latvmrun_max;
29 u64 latvmrun_min;
30 u64 latvmexit_max;
31 u64 latvmexit_min;
32 u64 latvmload_max;
33 u64 latvmload_min;
34 u64 latvmsave_max;
35 u64 latvmsave_min;
36 u64 latstgi_max;
37 u64 latstgi_min;
38 u64 latclgi_max;
39 u64 latclgi_min;
40 u64 runs;
41 
42 static void null_test(struct svm_test *test)
43 {
44 }
45 
46 static bool null_check(struct svm_test *test)
47 {
48     return vmcb->control.exit_code == SVM_EXIT_VMMCALL;
49 }
50 
51 static void prepare_no_vmrun_int(struct svm_test *test)
52 {
53     vmcb->control.intercept &= ~(1ULL << INTERCEPT_VMRUN);
54 }
55 
56 static bool check_no_vmrun_int(struct svm_test *test)
57 {
58     return vmcb->control.exit_code == SVM_EXIT_ERR;
59 }
60 
61 static void test_vmrun(struct svm_test *test)
62 {
63     asm volatile ("vmrun %0" : : "a"(virt_to_phys(vmcb)));
64 }
65 
66 static bool check_vmrun(struct svm_test *test)
67 {
68     return vmcb->control.exit_code == SVM_EXIT_VMRUN;
69 }
70 
71 static void prepare_rsm_intercept(struct svm_test *test)
72 {
73     default_prepare(test);
74     vmcb->control.intercept |= 1 << INTERCEPT_RSM;
75     vmcb->control.intercept_exceptions |= (1ULL << UD_VECTOR);
76 }
77 
78 static void test_rsm_intercept(struct svm_test *test)
79 {
80     asm volatile ("rsm" : : : "memory");
81 }
82 
83 static bool check_rsm_intercept(struct svm_test *test)
84 {
85     return get_test_stage(test) == 2;
86 }
87 
88 static bool finished_rsm_intercept(struct svm_test *test)
89 {
90     switch (get_test_stage(test)) {
91     case 0:
92         if (vmcb->control.exit_code != SVM_EXIT_RSM) {
93             report_fail("VMEXIT not due to rsm. Exit reason 0x%x",
94                         vmcb->control.exit_code);
95             return true;
96         }
97         vmcb->control.intercept &= ~(1 << INTERCEPT_RSM);
98         inc_test_stage(test);
99         break;
100 
101     case 1:
102         if (vmcb->control.exit_code != SVM_EXIT_EXCP_BASE + UD_VECTOR) {
103             report_fail("VMEXIT not due to #UD. Exit reason 0x%x",
104                         vmcb->control.exit_code);
105             return true;
106         }
107         vmcb->save.rip += 2;
108         inc_test_stage(test);
109         break;
110 
111     default:
112         return true;
113     }
114     return get_test_stage(test) == 2;
115 }
116 
117 static void prepare_cr3_intercept(struct svm_test *test)
118 {
119     default_prepare(test);
120     vmcb->control.intercept_cr_read |= 1 << 3;
121 }
122 
123 static void test_cr3_intercept(struct svm_test *test)
124 {
125     asm volatile ("mov %%cr3, %0" : "=r"(test->scratch) : : "memory");
126 }
127 
128 static bool check_cr3_intercept(struct svm_test *test)
129 {
130     return vmcb->control.exit_code == SVM_EXIT_READ_CR3;
131 }
132 
133 static bool check_cr3_nointercept(struct svm_test *test)
134 {
135     return null_check(test) && test->scratch == read_cr3();
136 }
137 
138 static void corrupt_cr3_intercept_bypass(void *_test)
139 {
140     struct svm_test *test = _test;
141     extern volatile u32 mmio_insn;
142 
143     while (!__sync_bool_compare_and_swap(&test->scratch, 1, 2))
144         pause();
145     pause();
146     pause();
147     pause();
148     mmio_insn = 0x90d8200f;  // mov %cr3, %rax; nop
149 }
150 
151 static void prepare_cr3_intercept_bypass(struct svm_test *test)
152 {
153     default_prepare(test);
154     vmcb->control.intercept_cr_read |= 1 << 3;
155     on_cpu_async(1, corrupt_cr3_intercept_bypass, test);
156 }
157 
158 static void test_cr3_intercept_bypass(struct svm_test *test)
159 {
160     ulong a = 0xa0000;
161 
162     test->scratch = 1;
163     while (test->scratch != 2)
164         barrier();
165 
166     asm volatile ("mmio_insn: mov %0, (%0); nop"
167                   : "+a"(a) : : "memory");
168     test->scratch = a;
169 }
170 
171 static void prepare_dr_intercept(struct svm_test *test)
172 {
173     default_prepare(test);
174     vmcb->control.intercept_dr_read = 0xff;
175     vmcb->control.intercept_dr_write = 0xff;
176 }
177 
178 static void test_dr_intercept(struct svm_test *test)
179 {
180     unsigned int i, failcnt = 0;
181 
182     /* Loop testing debug register reads */
183     for (i = 0; i < 8; i++) {
184 
185         switch (i) {
186         case 0:
187             asm volatile ("mov %%dr0, %0" : "=r"(test->scratch) : : "memory");
188             break;
189         case 1:
190             asm volatile ("mov %%dr1, %0" : "=r"(test->scratch) : : "memory");
191             break;
192         case 2:
193             asm volatile ("mov %%dr2, %0" : "=r"(test->scratch) : : "memory");
194             break;
195         case 3:
196             asm volatile ("mov %%dr3, %0" : "=r"(test->scratch) : : "memory");
197             break;
198         case 4:
199             asm volatile ("mov %%dr4, %0" : "=r"(test->scratch) : : "memory");
200             break;
201         case 5:
202             asm volatile ("mov %%dr5, %0" : "=r"(test->scratch) : : "memory");
203             break;
204         case 6:
205             asm volatile ("mov %%dr6, %0" : "=r"(test->scratch) : : "memory");
206             break;
207         case 7:
208             asm volatile ("mov %%dr7, %0" : "=r"(test->scratch) : : "memory");
209             break;
210         }
211 
212         if (test->scratch != i) {
213             report_fail("dr%u read intercept", i);
214             failcnt++;
215         }
216     }
217 
218     /* Loop testing debug register writes */
219     for (i = 0; i < 8; i++) {
220 
221         switch (i) {
222         case 0:
223             asm volatile ("mov %0, %%dr0" : : "r"(test->scratch) : "memory");
224             break;
225         case 1:
226             asm volatile ("mov %0, %%dr1" : : "r"(test->scratch) : "memory");
227             break;
228         case 2:
229             asm volatile ("mov %0, %%dr2" : : "r"(test->scratch) : "memory");
230             break;
231         case 3:
232             asm volatile ("mov %0, %%dr3" : : "r"(test->scratch) : "memory");
233             break;
234         case 4:
235             asm volatile ("mov %0, %%dr4" : : "r"(test->scratch) : "memory");
236             break;
237         case 5:
238             asm volatile ("mov %0, %%dr5" : : "r"(test->scratch) : "memory");
239             break;
240         case 6:
241             asm volatile ("mov %0, %%dr6" : : "r"(test->scratch) : "memory");
242             break;
243         case 7:
244             asm volatile ("mov %0, %%dr7" : : "r"(test->scratch) : "memory");
245             break;
246         }
247 
248         if (test->scratch != i) {
249             report_fail("dr%u write intercept", i);
250             failcnt++;
251         }
252     }
253 
254     test->scratch = failcnt;
255 }
256 
257 static bool dr_intercept_finished(struct svm_test *test)
258 {
259     ulong n = (vmcb->control.exit_code - SVM_EXIT_READ_DR0);
260 
261     /* Only expect DR intercepts */
262     if (n > (SVM_EXIT_MAX_DR_INTERCEPT - SVM_EXIT_READ_DR0))
263         return true;
264 
265     /*
266      * Compute debug register number.
267      * Per Appendix C "SVM Intercept Exit Codes" of AMD64 Architecture
268      * Programmer's Manual Volume 2 - System Programming:
269      * http://support.amd.com/TechDocs/24593.pdf
270      * there are 16 VMEXIT codes each for DR read and write.
271      */
272     test->scratch = (n % 16);
273 
274     /* Jump over MOV instruction */
275     vmcb->save.rip += 3;
276 
277     return false;
278 }
279 
280 static bool check_dr_intercept(struct svm_test *test)
281 {
282     return !test->scratch;
283 }
284 
285 static bool next_rip_supported(void)
286 {
287     return this_cpu_has(X86_FEATURE_NRIPS);
288 }
289 
290 static void prepare_next_rip(struct svm_test *test)
291 {
292     vmcb->control.intercept |= (1ULL << INTERCEPT_RDTSC);
293 }
294 
295 
296 static void test_next_rip(struct svm_test *test)
297 {
298     asm volatile ("rdtsc\n\t"
299                   ".globl exp_next_rip\n\t"
300                   "exp_next_rip:\n\t" ::: "eax", "edx");
301 }
302 
303 static bool check_next_rip(struct svm_test *test)
304 {
305     extern char exp_next_rip;
306     unsigned long address = (unsigned long)&exp_next_rip;
307 
308     return address == vmcb->control.next_rip;
309 }
310 
311 extern u8 *msr_bitmap;
312 
313 static void prepare_msr_intercept(struct svm_test *test)
314 {
315     default_prepare(test);
316     vmcb->control.intercept |= (1ULL << INTERCEPT_MSR_PROT);
317     vmcb->control.intercept_exceptions |= (1ULL << GP_VECTOR);
318     memset(msr_bitmap, 0xff, MSR_BITMAP_SIZE);
319 }
320 
321 static void test_msr_intercept(struct svm_test *test)
322 {
323     unsigned long msr_value = 0xef8056791234abcd; /* Arbitrary value */
324     unsigned long msr_index;
325 
326     for (msr_index = 0; msr_index <= 0xc0011fff; msr_index++) {
327         if (msr_index == 0xC0010131 /* MSR_SEV_STATUS */) {
328             /*
329              * Per section 15.34.10 "SEV_STATUS MSR" of AMD64 Architecture
330              * Programmer's Manual volume 2 - System Programming:
331              * http://support.amd.com/TechDocs/24593.pdf
332              * SEV_STATUS MSR (C001_0131) is a non-interceptable MSR.
333              */
334             continue;
335         }
336 
337         /* Skips gaps between supported MSR ranges */
338         if (msr_index == 0x2000)
339             msr_index = 0xc0000000;
340         else if (msr_index == 0xc0002000)
341             msr_index = 0xc0010000;
342 
343         test->scratch = -1;
344 
345         rdmsr(msr_index);
346 
347         /* Check that a read intercept occurred for MSR at msr_index */
348         if (test->scratch != msr_index)
349             report_fail("MSR 0x%lx read intercept", msr_index);
350 
351         /*
352          * Poor man approach to generate a value that
353          * seems arbitrary each time around the loop.
354          */
355         msr_value += (msr_value << 1);
356 
357         wrmsr(msr_index, msr_value);
358 
359         /* Check that a write intercept occurred for MSR with msr_value */
360         if (test->scratch != msr_value)
361             report_fail("MSR 0x%lx write intercept", msr_index);
362     }
363 
364     test->scratch = -2;
365 }
366 
367 static bool msr_intercept_finished(struct svm_test *test)
368 {
369     u32 exit_code = vmcb->control.exit_code;
370     u64 exit_info_1;
371     u8 *opcode;
372 
373     if (exit_code == SVM_EXIT_MSR) {
374         exit_info_1 = vmcb->control.exit_info_1;
375     } else {
376         /*
377          * If #GP exception occurs instead, check that it was
378          * for RDMSR/WRMSR and set exit_info_1 accordingly.
379          */
380 
381         if (exit_code != (SVM_EXIT_EXCP_BASE + GP_VECTOR))
382             return true;
383 
384         opcode = (u8 *)vmcb->save.rip;
385         if (opcode[0] != 0x0f)
386             return true;
387 
388         switch (opcode[1]) {
389         case 0x30: /* WRMSR */
390             exit_info_1 = 1;
391             break;
392         case 0x32: /* RDMSR */
393             exit_info_1 = 0;
394             break;
395         default:
396             return true;
397         }
398 
399         /*
400          * Warn that #GP exception occured instead.
401          * RCX holds the MSR index.
402          */
403         printf("%s 0x%lx #GP exception\n",
404             exit_info_1 ? "WRMSR" : "RDMSR", get_regs().rcx);
405     }
406 
407     /* Jump over RDMSR/WRMSR instruction */
408     vmcb->save.rip += 2;
409 
410     /*
411      * Test whether the intercept was for RDMSR/WRMSR.
412      * For RDMSR, test->scratch is set to the MSR index;
413      *      RCX holds the MSR index.
414      * For WRMSR, test->scratch is set to the MSR value;
415      *      RDX holds the upper 32 bits of the MSR value,
416      *      while RAX hold its lower 32 bits.
417      */
418     if (exit_info_1)
419         test->scratch =
420             ((get_regs().rdx << 32) | (vmcb->save.rax & 0xffffffff));
421     else
422         test->scratch = get_regs().rcx;
423 
424     return false;
425 }
426 
427 static bool check_msr_intercept(struct svm_test *test)
428 {
429     memset(msr_bitmap, 0, MSR_BITMAP_SIZE);
430     return (test->scratch == -2);
431 }
432 
433 static void prepare_mode_switch(struct svm_test *test)
434 {
435     vmcb->control.intercept_exceptions |= (1ULL << GP_VECTOR)
436                                              |  (1ULL << UD_VECTOR)
437                                              |  (1ULL << DF_VECTOR)
438                                              |  (1ULL << PF_VECTOR);
439     test->scratch = 0;
440 }
441 
442 static void test_mode_switch(struct svm_test *test)
443 {
444     asm volatile("	cli\n"
445 		 "	ljmp *1f\n" /* jump to 32-bit code segment */
446 		 "1:\n"
447 		 "	.long 2f\n"
448 		 "	.long " xstr(KERNEL_CS32) "\n"
449 		 ".code32\n"
450 		 "2:\n"
451 		 "	movl %%cr0, %%eax\n"
452 		 "	btcl  $31, %%eax\n" /* clear PG */
453 		 "	movl %%eax, %%cr0\n"
454 		 "	movl $0xc0000080, %%ecx\n" /* EFER */
455 		 "	rdmsr\n"
456 		 "	btcl $8, %%eax\n" /* clear LME */
457 		 "	wrmsr\n"
458 		 "	movl %%cr4, %%eax\n"
459 		 "	btcl $5, %%eax\n" /* clear PAE */
460 		 "	movl %%eax, %%cr4\n"
461 		 "	movw %[ds16], %%ax\n"
462 		 "	movw %%ax, %%ds\n"
463 		 "	ljmpl %[cs16], $3f\n" /* jump to 16 bit protected-mode */
464 		 ".code16\n"
465 		 "3:\n"
466 		 "	movl %%cr0, %%eax\n"
467 		 "	btcl $0, %%eax\n" /* clear PE  */
468 		 "	movl %%eax, %%cr0\n"
469 		 "	ljmpl $0, $4f\n"   /* jump to real-mode */
470 		 "4:\n"
471 		 "	vmmcall\n"
472 		 "	movl %%cr0, %%eax\n"
473 		 "	btsl $0, %%eax\n" /* set PE  */
474 		 "	movl %%eax, %%cr0\n"
475 		 "	ljmpl %[cs32], $5f\n" /* back to protected mode */
476 		 ".code32\n"
477 		 "5:\n"
478 		 "	movl %%cr4, %%eax\n"
479 		 "	btsl $5, %%eax\n" /* set PAE */
480 		 "	movl %%eax, %%cr4\n"
481 		 "	movl $0xc0000080, %%ecx\n" /* EFER */
482 		 "	rdmsr\n"
483 		 "	btsl $8, %%eax\n" /* set LME */
484 		 "	wrmsr\n"
485 		 "	movl %%cr0, %%eax\n"
486 		 "	btsl  $31, %%eax\n" /* set PG */
487 		 "	movl %%eax, %%cr0\n"
488 		 "	ljmpl %[cs64], $6f\n"    /* back to long mode */
489 		 ".code64\n\t"
490 		 "6:\n"
491 		 "	vmmcall\n"
492 		 :: [cs16] "i"(KERNEL_CS16), [ds16] "i"(KERNEL_DS16),
493 		    [cs32] "i"(KERNEL_CS32), [cs64] "i"(KERNEL_CS64)
494 		 : "rax", "rbx", "rcx", "rdx", "memory");
495 }
496 
497 static bool mode_switch_finished(struct svm_test *test)
498 {
499     u64 cr0, cr4, efer;
500 
501     cr0  = vmcb->save.cr0;
502     cr4  = vmcb->save.cr4;
503     efer = vmcb->save.efer;
504 
505     /* Only expect VMMCALL intercepts */
506     if (vmcb->control.exit_code != SVM_EXIT_VMMCALL)
507 	    return true;
508 
509     /* Jump over VMMCALL instruction */
510     vmcb->save.rip += 3;
511 
512     /* Do sanity checks */
513     switch (test->scratch) {
514     case 0:
515         /* Test should be in real mode now - check for this */
516         if ((cr0  & 0x80000001) || /* CR0.PG, CR0.PE */
517             (cr4  & 0x00000020) || /* CR4.PAE */
518             (efer & 0x00000500))   /* EFER.LMA, EFER.LME */
519                 return true;
520         break;
521     case 2:
522         /* Test should be back in long-mode now - check for this */
523         if (((cr0  & 0x80000001) != 0x80000001) || /* CR0.PG, CR0.PE */
524             ((cr4  & 0x00000020) != 0x00000020) || /* CR4.PAE */
525             ((efer & 0x00000500) != 0x00000500))   /* EFER.LMA, EFER.LME */
526 		    return true;
527 	break;
528     }
529 
530     /* one step forward */
531     test->scratch += 1;
532 
533     return test->scratch == 2;
534 }
535 
536 static bool check_mode_switch(struct svm_test *test)
537 {
538 	return test->scratch == 2;
539 }
540 
541 extern u8 *io_bitmap;
542 
543 static void prepare_ioio(struct svm_test *test)
544 {
545     vmcb->control.intercept |= (1ULL << INTERCEPT_IOIO_PROT);
546     test->scratch = 0;
547     memset(io_bitmap, 0, 8192);
548     io_bitmap[8192] = 0xFF;
549 }
550 
551 static void test_ioio(struct svm_test *test)
552 {
553     // stage 0, test IO pass
554     inb(0x5000);
555     outb(0x0, 0x5000);
556     if (get_test_stage(test) != 0)
557         goto fail;
558 
559     // test IO width, in/out
560     io_bitmap[0] = 0xFF;
561     inc_test_stage(test);
562     inb(0x0);
563     if (get_test_stage(test) != 2)
564         goto fail;
565 
566     outw(0x0, 0x0);
567     if (get_test_stage(test) != 3)
568         goto fail;
569 
570     inl(0x0);
571     if (get_test_stage(test) != 4)
572         goto fail;
573 
574     // test low/high IO port
575     io_bitmap[0x5000 / 8] = (1 << (0x5000 % 8));
576     inb(0x5000);
577     if (get_test_stage(test) != 5)
578         goto fail;
579 
580     io_bitmap[0x9000 / 8] = (1 << (0x9000 % 8));
581     inw(0x9000);
582     if (get_test_stage(test) != 6)
583         goto fail;
584 
585     // test partial pass
586     io_bitmap[0x5000 / 8] = (1 << (0x5000 % 8));
587     inl(0x4FFF);
588     if (get_test_stage(test) != 7)
589         goto fail;
590 
591     // test across pages
592     inc_test_stage(test);
593     inl(0x7FFF);
594     if (get_test_stage(test) != 8)
595         goto fail;
596 
597     inc_test_stage(test);
598     io_bitmap[0x8000 / 8] = 1 << (0x8000 % 8);
599     inl(0x7FFF);
600     if (get_test_stage(test) != 10)
601         goto fail;
602 
603     io_bitmap[0] = 0;
604     inl(0xFFFF);
605     if (get_test_stage(test) != 11)
606         goto fail;
607 
608     io_bitmap[0] = 0xFF;
609     io_bitmap[8192] = 0;
610     inl(0xFFFF);
611     inc_test_stage(test);
612     if (get_test_stage(test) != 12)
613         goto fail;
614 
615     return;
616 
617 fail:
618     report_fail("stage %d", get_test_stage(test));
619     test->scratch = -1;
620 }
621 
622 static bool ioio_finished(struct svm_test *test)
623 {
624     unsigned port, size;
625 
626     /* Only expect IOIO intercepts */
627     if (vmcb->control.exit_code == SVM_EXIT_VMMCALL)
628         return true;
629 
630     if (vmcb->control.exit_code != SVM_EXIT_IOIO)
631         return true;
632 
633     /* one step forward */
634     test->scratch += 1;
635 
636     port = vmcb->control.exit_info_1 >> 16;
637     size = (vmcb->control.exit_info_1 >> SVM_IOIO_SIZE_SHIFT) & 7;
638 
639     while (size--) {
640         io_bitmap[port / 8] &= ~(1 << (port & 7));
641         port++;
642     }
643 
644     return false;
645 }
646 
647 static bool check_ioio(struct svm_test *test)
648 {
649     memset(io_bitmap, 0, 8193);
650     return test->scratch != -1;
651 }
652 
653 static void prepare_asid_zero(struct svm_test *test)
654 {
655     vmcb->control.asid = 0;
656 }
657 
658 static void test_asid_zero(struct svm_test *test)
659 {
660     asm volatile ("vmmcall\n\t");
661 }
662 
663 static bool check_asid_zero(struct svm_test *test)
664 {
665     return vmcb->control.exit_code == SVM_EXIT_ERR;
666 }
667 
668 static void sel_cr0_bug_prepare(struct svm_test *test)
669 {
670     vmcb->control.intercept |= (1ULL << INTERCEPT_SELECTIVE_CR0);
671 }
672 
673 static bool sel_cr0_bug_finished(struct svm_test *test)
674 {
675 	return true;
676 }
677 
678 static void sel_cr0_bug_test(struct svm_test *test)
679 {
680     unsigned long cr0;
681 
682     /* read cr0, clear CD, and write back */
683     cr0  = read_cr0();
684     cr0 |= (1UL << 30);
685     write_cr0(cr0);
686 
687     /*
688      * If we are here the test failed, not sure what to do now because we
689      * are not in guest-mode anymore so we can't trigger an intercept.
690      * Trigger a tripple-fault for now.
691      */
692     report_fail("sel_cr0 test. Can not recover from this - exiting");
693     exit(report_summary());
694 }
695 
696 static bool sel_cr0_bug_check(struct svm_test *test)
697 {
698     return vmcb->control.exit_code == SVM_EXIT_CR0_SEL_WRITE;
699 }
700 
701 static void npt_nx_prepare(struct svm_test *test)
702 {
703     u64 *pte;
704 
705     test->scratch = rdmsr(MSR_EFER);
706     wrmsr(MSR_EFER, test->scratch | EFER_NX);
707 
708     /* Clear the guest's EFER.NX, it should not affect NPT behavior. */
709     vmcb->save.efer &= ~EFER_NX;
710 
711     pte = npt_get_pte((u64)null_test);
712 
713     *pte |= PT64_NX_MASK;
714 }
715 
716 static bool npt_nx_check(struct svm_test *test)
717 {
718     u64 *pte = npt_get_pte((u64)null_test);
719 
720     wrmsr(MSR_EFER, test->scratch);
721 
722     *pte &= ~PT64_NX_MASK;
723 
724     return (vmcb->control.exit_code == SVM_EXIT_NPF)
725            && (vmcb->control.exit_info_1 == 0x100000015ULL);
726 }
727 
728 static void npt_np_prepare(struct svm_test *test)
729 {
730     u64 *pte;
731 
732     scratch_page = alloc_page();
733     pte = npt_get_pte((u64)scratch_page);
734 
735     *pte &= ~1ULL;
736 }
737 
738 static void npt_np_test(struct svm_test *test)
739 {
740     (void) *(volatile u64 *)scratch_page;
741 }
742 
743 static bool npt_np_check(struct svm_test *test)
744 {
745     u64 *pte = npt_get_pte((u64)scratch_page);
746 
747     *pte |= 1ULL;
748 
749     return (vmcb->control.exit_code == SVM_EXIT_NPF)
750            && (vmcb->control.exit_info_1 == 0x100000004ULL);
751 }
752 
753 static void npt_us_prepare(struct svm_test *test)
754 {
755     u64 *pte;
756 
757     scratch_page = alloc_page();
758     pte = npt_get_pte((u64)scratch_page);
759 
760     *pte &= ~(1ULL << 2);
761 }
762 
763 static void npt_us_test(struct svm_test *test)
764 {
765     (void) *(volatile u64 *)scratch_page;
766 }
767 
768 static bool npt_us_check(struct svm_test *test)
769 {
770     u64 *pte = npt_get_pte((u64)scratch_page);
771 
772     *pte |= (1ULL << 2);
773 
774     return (vmcb->control.exit_code == SVM_EXIT_NPF)
775            && (vmcb->control.exit_info_1 == 0x100000005ULL);
776 }
777 
778 static void npt_rw_prepare(struct svm_test *test)
779 {
780 
781     u64 *pte;
782 
783     pte = npt_get_pte(0x80000);
784 
785     *pte &= ~(1ULL << 1);
786 }
787 
788 static void npt_rw_test(struct svm_test *test)
789 {
790     u64 *data = (void*)(0x80000);
791 
792     *data = 0;
793 }
794 
795 static bool npt_rw_check(struct svm_test *test)
796 {
797     u64 *pte = npt_get_pte(0x80000);
798 
799     *pte |= (1ULL << 1);
800 
801     return (vmcb->control.exit_code == SVM_EXIT_NPF)
802            && (vmcb->control.exit_info_1 == 0x100000007ULL);
803 }
804 
805 static void npt_rw_pfwalk_prepare(struct svm_test *test)
806 {
807 
808     u64 *pte;
809 
810     pte = npt_get_pte(read_cr3());
811 
812     *pte &= ~(1ULL << 1);
813 }
814 
815 static bool npt_rw_pfwalk_check(struct svm_test *test)
816 {
817     u64 *pte = npt_get_pte(read_cr3());
818 
819     *pte |= (1ULL << 1);
820 
821     return (vmcb->control.exit_code == SVM_EXIT_NPF)
822            && (vmcb->control.exit_info_1 == 0x200000007ULL)
823 	   && (vmcb->control.exit_info_2 == read_cr3());
824 }
825 
826 static void npt_l1mmio_prepare(struct svm_test *test)
827 {
828 }
829 
830 u32 nested_apic_version1;
831 u32 nested_apic_version2;
832 
833 static void npt_l1mmio_test(struct svm_test *test)
834 {
835     volatile u32 *data = (volatile void*)(0xfee00030UL);
836 
837     nested_apic_version1 = *data;
838     nested_apic_version2 = *data;
839 }
840 
841 static bool npt_l1mmio_check(struct svm_test *test)
842 {
843     volatile u32 *data = (volatile void*)(0xfee00030);
844     u32 lvr = *data;
845 
846     return nested_apic_version1 == lvr && nested_apic_version2 == lvr;
847 }
848 
849 static void npt_rw_l1mmio_prepare(struct svm_test *test)
850 {
851 
852     u64 *pte;
853 
854     pte = npt_get_pte(0xfee00080);
855 
856     *pte &= ~(1ULL << 1);
857 }
858 
859 static void npt_rw_l1mmio_test(struct svm_test *test)
860 {
861     volatile u32 *data = (volatile void*)(0xfee00080);
862 
863     *data = *data;
864 }
865 
866 static bool npt_rw_l1mmio_check(struct svm_test *test)
867 {
868     u64 *pte = npt_get_pte(0xfee00080);
869 
870     *pte |= (1ULL << 1);
871 
872     return (vmcb->control.exit_code == SVM_EXIT_NPF)
873            && (vmcb->control.exit_info_1 == 0x100000007ULL);
874 }
875 
876 #define TSC_ADJUST_VALUE    (1ll << 32)
877 #define TSC_OFFSET_VALUE    (~0ull << 48)
878 static bool ok;
879 
880 static bool tsc_adjust_supported(void)
881 {
882     return this_cpu_has(X86_FEATURE_TSC_ADJUST);
883 }
884 
885 static void tsc_adjust_prepare(struct svm_test *test)
886 {
887     default_prepare(test);
888     vmcb->control.tsc_offset = TSC_OFFSET_VALUE;
889 
890     wrmsr(MSR_IA32_TSC_ADJUST, -TSC_ADJUST_VALUE);
891     int64_t adjust = rdmsr(MSR_IA32_TSC_ADJUST);
892     ok = adjust == -TSC_ADJUST_VALUE;
893 }
894 
895 static void tsc_adjust_test(struct svm_test *test)
896 {
897     int64_t adjust = rdmsr(MSR_IA32_TSC_ADJUST);
898     ok &= adjust == -TSC_ADJUST_VALUE;
899 
900     uint64_t l1_tsc = rdtsc() - TSC_OFFSET_VALUE;
901     wrmsr(MSR_IA32_TSC, l1_tsc - TSC_ADJUST_VALUE);
902 
903     adjust = rdmsr(MSR_IA32_TSC_ADJUST);
904     ok &= adjust <= -2 * TSC_ADJUST_VALUE;
905 
906     uint64_t l1_tsc_end = rdtsc() - TSC_OFFSET_VALUE;
907     ok &= (l1_tsc_end + TSC_ADJUST_VALUE - l1_tsc) < TSC_ADJUST_VALUE;
908 
909     uint64_t l1_tsc_msr = rdmsr(MSR_IA32_TSC) - TSC_OFFSET_VALUE;
910     ok &= (l1_tsc_msr + TSC_ADJUST_VALUE - l1_tsc) < TSC_ADJUST_VALUE;
911 }
912 
913 static bool tsc_adjust_check(struct svm_test *test)
914 {
915     int64_t adjust = rdmsr(MSR_IA32_TSC_ADJUST);
916 
917     wrmsr(MSR_IA32_TSC_ADJUST, 0);
918     return ok && adjust <= -2 * TSC_ADJUST_VALUE;
919 }
920 
921 static void latency_prepare(struct svm_test *test)
922 {
923     default_prepare(test);
924     runs = LATENCY_RUNS;
925     latvmrun_min = latvmexit_min = -1ULL;
926     latvmrun_max = latvmexit_max = 0;
927     vmrun_sum = vmexit_sum = 0;
928     tsc_start = rdtsc();
929 }
930 
931 static void latency_test(struct svm_test *test)
932 {
933     u64 cycles;
934 
935 start:
936     tsc_end = rdtsc();
937 
938     cycles = tsc_end - tsc_start;
939 
940     if (cycles > latvmrun_max)
941         latvmrun_max = cycles;
942 
943     if (cycles < latvmrun_min)
944         latvmrun_min = cycles;
945 
946     vmrun_sum += cycles;
947 
948     tsc_start = rdtsc();
949 
950     asm volatile ("vmmcall" : : : "memory");
951     goto start;
952 }
953 
954 static bool latency_finished(struct svm_test *test)
955 {
956     u64 cycles;
957 
958     tsc_end = rdtsc();
959 
960     cycles = tsc_end - tsc_start;
961 
962     if (cycles > latvmexit_max)
963         latvmexit_max = cycles;
964 
965     if (cycles < latvmexit_min)
966         latvmexit_min = cycles;
967 
968     vmexit_sum += cycles;
969 
970     vmcb->save.rip += 3;
971 
972     runs -= 1;
973 
974     tsc_end = rdtsc();
975 
976     return runs == 0;
977 }
978 
979 static bool latency_finished_clean(struct svm_test *test)
980 {
981     vmcb->control.clean = VMCB_CLEAN_ALL;
982     return latency_finished(test);
983 }
984 
985 static bool latency_check(struct svm_test *test)
986 {
987     printf("    Latency VMRUN : max: %ld min: %ld avg: %ld\n", latvmrun_max,
988             latvmrun_min, vmrun_sum / LATENCY_RUNS);
989     printf("    Latency VMEXIT: max: %ld min: %ld avg: %ld\n", latvmexit_max,
990             latvmexit_min, vmexit_sum / LATENCY_RUNS);
991     return true;
992 }
993 
994 static void lat_svm_insn_prepare(struct svm_test *test)
995 {
996     default_prepare(test);
997     runs = LATENCY_RUNS;
998     latvmload_min = latvmsave_min = latstgi_min = latclgi_min = -1ULL;
999     latvmload_max = latvmsave_max = latstgi_max = latclgi_max = 0;
1000     vmload_sum = vmsave_sum = stgi_sum = clgi_sum;
1001 }
1002 
1003 static bool lat_svm_insn_finished(struct svm_test *test)
1004 {
1005     u64 vmcb_phys = virt_to_phys(vmcb);
1006     u64 cycles;
1007 
1008     for ( ; runs != 0; runs--) {
1009         tsc_start = rdtsc();
1010         asm volatile("vmload %0\n\t" : : "a"(vmcb_phys) : "memory");
1011         cycles = rdtsc() - tsc_start;
1012         if (cycles > latvmload_max)
1013             latvmload_max = cycles;
1014         if (cycles < latvmload_min)
1015             latvmload_min = cycles;
1016         vmload_sum += cycles;
1017 
1018         tsc_start = rdtsc();
1019         asm volatile("vmsave %0\n\t" : : "a"(vmcb_phys) : "memory");
1020         cycles = rdtsc() - tsc_start;
1021         if (cycles > latvmsave_max)
1022             latvmsave_max = cycles;
1023         if (cycles < latvmsave_min)
1024             latvmsave_min = cycles;
1025         vmsave_sum += cycles;
1026 
1027         tsc_start = rdtsc();
1028         asm volatile("stgi\n\t");
1029         cycles = rdtsc() - tsc_start;
1030         if (cycles > latstgi_max)
1031             latstgi_max = cycles;
1032         if (cycles < latstgi_min)
1033             latstgi_min = cycles;
1034         stgi_sum += cycles;
1035 
1036         tsc_start = rdtsc();
1037         asm volatile("clgi\n\t");
1038         cycles = rdtsc() - tsc_start;
1039         if (cycles > latclgi_max)
1040             latclgi_max = cycles;
1041         if (cycles < latclgi_min)
1042             latclgi_min = cycles;
1043         clgi_sum += cycles;
1044     }
1045 
1046     tsc_end = rdtsc();
1047 
1048     return true;
1049 }
1050 
1051 static bool lat_svm_insn_check(struct svm_test *test)
1052 {
1053     printf("    Latency VMLOAD: max: %ld min: %ld avg: %ld\n", latvmload_max,
1054             latvmload_min, vmload_sum / LATENCY_RUNS);
1055     printf("    Latency VMSAVE: max: %ld min: %ld avg: %ld\n", latvmsave_max,
1056             latvmsave_min, vmsave_sum / LATENCY_RUNS);
1057     printf("    Latency STGI:   max: %ld min: %ld avg: %ld\n", latstgi_max,
1058             latstgi_min, stgi_sum / LATENCY_RUNS);
1059     printf("    Latency CLGI:   max: %ld min: %ld avg: %ld\n", latclgi_max,
1060             latclgi_min, clgi_sum / LATENCY_RUNS);
1061     return true;
1062 }
1063 
1064 bool pending_event_ipi_fired;
1065 bool pending_event_guest_run;
1066 
1067 static void pending_event_ipi_isr(isr_regs_t *regs)
1068 {
1069     pending_event_ipi_fired = true;
1070     eoi();
1071 }
1072 
1073 static void pending_event_prepare(struct svm_test *test)
1074 {
1075     int ipi_vector = 0xf1;
1076 
1077     default_prepare(test);
1078 
1079     pending_event_ipi_fired = false;
1080 
1081     handle_irq(ipi_vector, pending_event_ipi_isr);
1082 
1083     pending_event_guest_run = false;
1084 
1085     vmcb->control.intercept |= (1ULL << INTERCEPT_INTR);
1086     vmcb->control.int_ctl |= V_INTR_MASKING_MASK;
1087 
1088     apic_icr_write(APIC_DEST_SELF | APIC_DEST_PHYSICAL |
1089                   APIC_DM_FIXED | ipi_vector, 0);
1090 
1091     set_test_stage(test, 0);
1092 }
1093 
1094 static void pending_event_test(struct svm_test *test)
1095 {
1096     pending_event_guest_run = true;
1097 }
1098 
1099 static bool pending_event_finished(struct svm_test *test)
1100 {
1101     switch (get_test_stage(test)) {
1102     case 0:
1103         if (vmcb->control.exit_code != SVM_EXIT_INTR) {
1104             report_fail("VMEXIT not due to pending interrupt. Exit reason 0x%x",
1105                         vmcb->control.exit_code);
1106             return true;
1107         }
1108 
1109         vmcb->control.intercept &= ~(1ULL << INTERCEPT_INTR);
1110         vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
1111 
1112         if (pending_event_guest_run) {
1113             report_fail("Guest ran before host received IPI\n");
1114             return true;
1115         }
1116 
1117         irq_enable();
1118         asm volatile ("nop");
1119         irq_disable();
1120 
1121         if (!pending_event_ipi_fired) {
1122             report_fail("Pending interrupt not dispatched after IRQ enabled\n");
1123             return true;
1124         }
1125         break;
1126 
1127     case 1:
1128         if (!pending_event_guest_run) {
1129             report_fail("Guest did not resume when no interrupt\n");
1130             return true;
1131         }
1132         break;
1133     }
1134 
1135     inc_test_stage(test);
1136 
1137     return get_test_stage(test) == 2;
1138 }
1139 
1140 static bool pending_event_check(struct svm_test *test)
1141 {
1142     return get_test_stage(test) == 2;
1143 }
1144 
1145 static void pending_event_cli_prepare(struct svm_test *test)
1146 {
1147     default_prepare(test);
1148 
1149     pending_event_ipi_fired = false;
1150 
1151     handle_irq(0xf1, pending_event_ipi_isr);
1152 
1153     apic_icr_write(APIC_DEST_SELF | APIC_DEST_PHYSICAL |
1154               APIC_DM_FIXED | 0xf1, 0);
1155 
1156     set_test_stage(test, 0);
1157 }
1158 
1159 static void pending_event_cli_prepare_gif_clear(struct svm_test *test)
1160 {
1161     asm("cli");
1162 }
1163 
1164 static void pending_event_cli_test(struct svm_test *test)
1165 {
1166     if (pending_event_ipi_fired == true) {
1167         set_test_stage(test, -1);
1168         report_fail("Interrupt preceeded guest");
1169         vmmcall();
1170     }
1171 
1172     /* VINTR_MASKING is zero.  This should cause the IPI to fire.  */
1173     irq_enable();
1174     asm volatile ("nop");
1175     irq_disable();
1176 
1177     if (pending_event_ipi_fired != true) {
1178         set_test_stage(test, -1);
1179         report_fail("Interrupt not triggered by guest");
1180     }
1181 
1182     vmmcall();
1183 
1184     /*
1185      * Now VINTR_MASKING=1, but no interrupt is pending so
1186      * the VINTR interception should be clear in VMCB02.  Check
1187      * that L0 did not leave a stale VINTR in the VMCB.
1188      */
1189     irq_enable();
1190     asm volatile ("nop");
1191     irq_disable();
1192 }
1193 
1194 static bool pending_event_cli_finished(struct svm_test *test)
1195 {
1196     if ( vmcb->control.exit_code != SVM_EXIT_VMMCALL) {
1197         report_fail("VM_EXIT return to host is not EXIT_VMMCALL exit reason 0x%x",
1198                     vmcb->control.exit_code);
1199         return true;
1200     }
1201 
1202     switch (get_test_stage(test)) {
1203     case 0:
1204         vmcb->save.rip += 3;
1205 
1206         pending_event_ipi_fired = false;
1207 
1208         vmcb->control.int_ctl |= V_INTR_MASKING_MASK;
1209 
1210 	/* Now entering again with VINTR_MASKING=1.  */
1211         apic_icr_write(APIC_DEST_SELF | APIC_DEST_PHYSICAL |
1212               APIC_DM_FIXED | 0xf1, 0);
1213 
1214         break;
1215 
1216     case 1:
1217         if (pending_event_ipi_fired == true) {
1218             report_fail("Interrupt triggered by guest");
1219             return true;
1220         }
1221 
1222         irq_enable();
1223         asm volatile ("nop");
1224         irq_disable();
1225 
1226         if (pending_event_ipi_fired != true) {
1227             report_fail("Interrupt not triggered by host");
1228             return true;
1229         }
1230 
1231         break;
1232 
1233     default:
1234         return true;
1235     }
1236 
1237     inc_test_stage(test);
1238 
1239     return get_test_stage(test) == 2;
1240 }
1241 
1242 static bool pending_event_cli_check(struct svm_test *test)
1243 {
1244     return get_test_stage(test) == 2;
1245 }
1246 
1247 #define TIMER_VECTOR    222
1248 
1249 static volatile bool timer_fired;
1250 
1251 static void timer_isr(isr_regs_t *regs)
1252 {
1253     timer_fired = true;
1254     apic_write(APIC_EOI, 0);
1255 }
1256 
1257 static void interrupt_prepare(struct svm_test *test)
1258 {
1259     default_prepare(test);
1260     handle_irq(TIMER_VECTOR, timer_isr);
1261     timer_fired = false;
1262     set_test_stage(test, 0);
1263 }
1264 
1265 static void interrupt_test(struct svm_test *test)
1266 {
1267     long long start, loops;
1268 
1269     apic_write(APIC_LVTT, TIMER_VECTOR);
1270     irq_enable();
1271     apic_write(APIC_TMICT, 1); //Timer Initial Count Register 0x380 one-shot
1272     for (loops = 0; loops < 10000000 && !timer_fired; loops++)
1273         asm volatile ("nop");
1274 
1275     report(timer_fired, "direct interrupt while running guest");
1276 
1277     if (!timer_fired) {
1278         set_test_stage(test, -1);
1279         vmmcall();
1280     }
1281 
1282     apic_write(APIC_TMICT, 0);
1283     irq_disable();
1284     vmmcall();
1285 
1286     timer_fired = false;
1287     apic_write(APIC_TMICT, 1);
1288     for (loops = 0; loops < 10000000 && !timer_fired; loops++)
1289         asm volatile ("nop");
1290 
1291     report(timer_fired, "intercepted interrupt while running guest");
1292 
1293     if (!timer_fired) {
1294         set_test_stage(test, -1);
1295         vmmcall();
1296     }
1297 
1298     irq_enable();
1299     apic_write(APIC_TMICT, 0);
1300     irq_disable();
1301 
1302     timer_fired = false;
1303     start = rdtsc();
1304     apic_write(APIC_TMICT, 1000000);
1305     safe_halt();
1306 
1307     report(rdtsc() - start > 10000 && timer_fired,
1308           "direct interrupt + hlt");
1309 
1310     if (!timer_fired) {
1311         set_test_stage(test, -1);
1312         vmmcall();
1313     }
1314 
1315     apic_write(APIC_TMICT, 0);
1316     irq_disable();
1317     vmmcall();
1318 
1319     timer_fired = false;
1320     start = rdtsc();
1321     apic_write(APIC_TMICT, 1000000);
1322     asm volatile ("hlt");
1323 
1324     report(rdtsc() - start > 10000 && timer_fired,
1325            "intercepted interrupt + hlt");
1326 
1327     if (!timer_fired) {
1328         set_test_stage(test, -1);
1329         vmmcall();
1330     }
1331 
1332     apic_write(APIC_TMICT, 0);
1333     irq_disable();
1334 }
1335 
1336 static bool interrupt_finished(struct svm_test *test)
1337 {
1338     switch (get_test_stage(test)) {
1339     case 0:
1340     case 2:
1341         if (vmcb->control.exit_code != SVM_EXIT_VMMCALL) {
1342             report_fail("VMEXIT not due to vmmcall. Exit reason 0x%x",
1343                         vmcb->control.exit_code);
1344             return true;
1345         }
1346         vmcb->save.rip += 3;
1347 
1348         vmcb->control.intercept |= (1ULL << INTERCEPT_INTR);
1349         vmcb->control.int_ctl |= V_INTR_MASKING_MASK;
1350         break;
1351 
1352     case 1:
1353     case 3:
1354         if (vmcb->control.exit_code != SVM_EXIT_INTR) {
1355             report_fail("VMEXIT not due to intr intercept. Exit reason 0x%x",
1356                         vmcb->control.exit_code);
1357             return true;
1358         }
1359 
1360         irq_enable();
1361         asm volatile ("nop");
1362         irq_disable();
1363 
1364         vmcb->control.intercept &= ~(1ULL << INTERCEPT_INTR);
1365         vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
1366         break;
1367 
1368     case 4:
1369         break;
1370 
1371     default:
1372         return true;
1373     }
1374 
1375     inc_test_stage(test);
1376 
1377     return get_test_stage(test) == 5;
1378 }
1379 
1380 static bool interrupt_check(struct svm_test *test)
1381 {
1382     return get_test_stage(test) == 5;
1383 }
1384 
1385 static volatile bool nmi_fired;
1386 
1387 static void nmi_handler(isr_regs_t *regs)
1388 {
1389     nmi_fired = true;
1390     apic_write(APIC_EOI, 0);
1391 }
1392 
1393 static void nmi_prepare(struct svm_test *test)
1394 {
1395     default_prepare(test);
1396     nmi_fired = false;
1397     handle_irq(NMI_VECTOR, nmi_handler);
1398     set_test_stage(test, 0);
1399 }
1400 
1401 static void nmi_test(struct svm_test *test)
1402 {
1403     apic_icr_write(APIC_DEST_SELF | APIC_DEST_PHYSICAL | APIC_DM_NMI | APIC_INT_ASSERT, 0);
1404 
1405     report(nmi_fired, "direct NMI while running guest");
1406 
1407     if (!nmi_fired)
1408         set_test_stage(test, -1);
1409 
1410     vmmcall();
1411 
1412     nmi_fired = false;
1413 
1414     apic_icr_write(APIC_DEST_SELF | APIC_DEST_PHYSICAL | APIC_DM_NMI | APIC_INT_ASSERT, 0);
1415 
1416     if (!nmi_fired) {
1417         report(nmi_fired, "intercepted pending NMI not dispatched");
1418         set_test_stage(test, -1);
1419     }
1420 
1421 }
1422 
1423 static bool nmi_finished(struct svm_test *test)
1424 {
1425     switch (get_test_stage(test)) {
1426     case 0:
1427         if (vmcb->control.exit_code != SVM_EXIT_VMMCALL) {
1428             report_fail("VMEXIT not due to vmmcall. Exit reason 0x%x",
1429                         vmcb->control.exit_code);
1430             return true;
1431         }
1432         vmcb->save.rip += 3;
1433 
1434         vmcb->control.intercept |= (1ULL << INTERCEPT_NMI);
1435         break;
1436 
1437     case 1:
1438         if (vmcb->control.exit_code != SVM_EXIT_NMI) {
1439             report_fail("VMEXIT not due to NMI intercept. Exit reason 0x%x",
1440                         vmcb->control.exit_code);
1441             return true;
1442         }
1443 
1444         report_pass("NMI intercept while running guest");
1445         break;
1446 
1447     case 2:
1448         break;
1449 
1450     default:
1451         return true;
1452     }
1453 
1454     inc_test_stage(test);
1455 
1456     return get_test_stage(test) == 3;
1457 }
1458 
1459 static bool nmi_check(struct svm_test *test)
1460 {
1461     return get_test_stage(test) == 3;
1462 }
1463 
1464 #define NMI_DELAY 100000000ULL
1465 
1466 static void nmi_message_thread(void *_test)
1467 {
1468     struct svm_test *test = _test;
1469 
1470     while (get_test_stage(test) != 1)
1471         pause();
1472 
1473     delay(NMI_DELAY);
1474 
1475     apic_icr_write(APIC_DEST_PHYSICAL | APIC_DM_NMI | APIC_INT_ASSERT, id_map[0]);
1476 
1477     while (get_test_stage(test) != 2)
1478         pause();
1479 
1480     delay(NMI_DELAY);
1481 
1482     apic_icr_write(APIC_DEST_PHYSICAL | APIC_DM_NMI | APIC_INT_ASSERT, id_map[0]);
1483 }
1484 
1485 static void nmi_hlt_test(struct svm_test *test)
1486 {
1487     long long start;
1488 
1489     on_cpu_async(1, nmi_message_thread, test);
1490 
1491     start = rdtsc();
1492 
1493     set_test_stage(test, 1);
1494 
1495     asm volatile ("hlt");
1496 
1497     report((rdtsc() - start > NMI_DELAY) && nmi_fired,
1498           "direct NMI + hlt");
1499 
1500     if (!nmi_fired)
1501         set_test_stage(test, -1);
1502 
1503     nmi_fired = false;
1504 
1505     vmmcall();
1506 
1507     start = rdtsc();
1508 
1509     set_test_stage(test, 2);
1510 
1511     asm volatile ("hlt");
1512 
1513     report((rdtsc() - start > NMI_DELAY) && nmi_fired,
1514            "intercepted NMI + hlt");
1515 
1516     if (!nmi_fired) {
1517         report(nmi_fired, "intercepted pending NMI not dispatched");
1518         set_test_stage(test, -1);
1519         vmmcall();
1520     }
1521 
1522     set_test_stage(test, 3);
1523 }
1524 
1525 static bool nmi_hlt_finished(struct svm_test *test)
1526 {
1527     switch (get_test_stage(test)) {
1528     case 1:
1529         if (vmcb->control.exit_code != SVM_EXIT_VMMCALL) {
1530             report_fail("VMEXIT not due to vmmcall. Exit reason 0x%x",
1531                         vmcb->control.exit_code);
1532             return true;
1533         }
1534         vmcb->save.rip += 3;
1535 
1536         vmcb->control.intercept |= (1ULL << INTERCEPT_NMI);
1537         break;
1538 
1539     case 2:
1540         if (vmcb->control.exit_code != SVM_EXIT_NMI) {
1541             report_fail("VMEXIT not due to NMI intercept. Exit reason 0x%x",
1542                         vmcb->control.exit_code);
1543             return true;
1544         }
1545 
1546         report_pass("NMI intercept while running guest");
1547         break;
1548 
1549     case 3:
1550         break;
1551 
1552     default:
1553         return true;
1554     }
1555 
1556     return get_test_stage(test) == 3;
1557 }
1558 
1559 static bool nmi_hlt_check(struct svm_test *test)
1560 {
1561     return get_test_stage(test) == 3;
1562 }
1563 
1564 static volatile int count_exc = 0;
1565 
1566 static void my_isr(struct ex_regs *r)
1567 {
1568         count_exc++;
1569 }
1570 
1571 static void exc_inject_prepare(struct svm_test *test)
1572 {
1573     default_prepare(test);
1574     handle_exception(DE_VECTOR, my_isr);
1575     handle_exception(NMI_VECTOR, my_isr);
1576 }
1577 
1578 
1579 static void exc_inject_test(struct svm_test *test)
1580 {
1581     asm volatile ("vmmcall\n\tvmmcall\n\t");
1582 }
1583 
1584 static bool exc_inject_finished(struct svm_test *test)
1585 {
1586     switch (get_test_stage(test)) {
1587     case 0:
1588         if (vmcb->control.exit_code != SVM_EXIT_VMMCALL) {
1589             report_fail("VMEXIT not due to vmmcall. Exit reason 0x%x",
1590                         vmcb->control.exit_code);
1591             return true;
1592         }
1593         vmcb->save.rip += 3;
1594         vmcb->control.event_inj = NMI_VECTOR | SVM_EVTINJ_TYPE_EXEPT | SVM_EVTINJ_VALID;
1595         break;
1596 
1597     case 1:
1598         if (vmcb->control.exit_code != SVM_EXIT_ERR) {
1599             report_fail("VMEXIT not due to error. Exit reason 0x%x",
1600                         vmcb->control.exit_code);
1601             return true;
1602         }
1603         report(count_exc == 0, "exception with vector 2 not injected");
1604         vmcb->control.event_inj = DE_VECTOR | SVM_EVTINJ_TYPE_EXEPT | SVM_EVTINJ_VALID;
1605         break;
1606 
1607     case 2:
1608         if (vmcb->control.exit_code != SVM_EXIT_VMMCALL) {
1609             report_fail("VMEXIT not due to vmmcall. Exit reason 0x%x",
1610                         vmcb->control.exit_code);
1611             return true;
1612         }
1613         vmcb->save.rip += 3;
1614         report(count_exc == 1, "divide overflow exception injected");
1615         report(!(vmcb->control.event_inj & SVM_EVTINJ_VALID), "eventinj.VALID cleared");
1616         break;
1617 
1618     default:
1619         return true;
1620     }
1621 
1622     inc_test_stage(test);
1623 
1624     return get_test_stage(test) == 3;
1625 }
1626 
1627 static bool exc_inject_check(struct svm_test *test)
1628 {
1629     return count_exc == 1 && get_test_stage(test) == 3;
1630 }
1631 
1632 static volatile bool virq_fired;
1633 
1634 static void virq_isr(isr_regs_t *regs)
1635 {
1636     virq_fired = true;
1637 }
1638 
1639 static void virq_inject_prepare(struct svm_test *test)
1640 {
1641     handle_irq(0xf1, virq_isr);
1642     default_prepare(test);
1643     vmcb->control.int_ctl = V_INTR_MASKING_MASK | V_IRQ_MASK |
1644                             (0x0f << V_INTR_PRIO_SHIFT); // Set to the highest priority
1645     vmcb->control.int_vector = 0xf1;
1646     virq_fired = false;
1647     set_test_stage(test, 0);
1648 }
1649 
1650 static void virq_inject_test(struct svm_test *test)
1651 {
1652     if (virq_fired) {
1653         report_fail("virtual interrupt fired before L2 sti");
1654         set_test_stage(test, -1);
1655         vmmcall();
1656     }
1657 
1658     irq_enable();
1659     asm volatile ("nop");
1660     irq_disable();
1661 
1662     if (!virq_fired) {
1663         report_fail("virtual interrupt not fired after L2 sti");
1664         set_test_stage(test, -1);
1665     }
1666 
1667     vmmcall();
1668 
1669     if (virq_fired) {
1670         report_fail("virtual interrupt fired before L2 sti after VINTR intercept");
1671         set_test_stage(test, -1);
1672         vmmcall();
1673     }
1674 
1675     irq_enable();
1676     asm volatile ("nop");
1677     irq_disable();
1678 
1679     if (!virq_fired) {
1680         report_fail("virtual interrupt not fired after return from VINTR intercept");
1681         set_test_stage(test, -1);
1682     }
1683 
1684     vmmcall();
1685 
1686     irq_enable();
1687     asm volatile ("nop");
1688     irq_disable();
1689 
1690     if (virq_fired) {
1691         report_fail("virtual interrupt fired when V_IRQ_PRIO less than V_TPR");
1692         set_test_stage(test, -1);
1693     }
1694 
1695     vmmcall();
1696     vmmcall();
1697 }
1698 
1699 static bool virq_inject_finished(struct svm_test *test)
1700 {
1701     vmcb->save.rip += 3;
1702 
1703     switch (get_test_stage(test)) {
1704     case 0:
1705         if (vmcb->control.exit_code != SVM_EXIT_VMMCALL) {
1706             report_fail("VMEXIT not due to vmmcall. Exit reason 0x%x",
1707                         vmcb->control.exit_code);
1708             return true;
1709         }
1710         if (vmcb->control.int_ctl & V_IRQ_MASK) {
1711             report_fail("V_IRQ not cleared on VMEXIT after firing");
1712             return true;
1713         }
1714         virq_fired = false;
1715         vmcb->control.intercept |= (1ULL << INTERCEPT_VINTR);
1716         vmcb->control.int_ctl = V_INTR_MASKING_MASK | V_IRQ_MASK |
1717                             (0x0f << V_INTR_PRIO_SHIFT);
1718         break;
1719 
1720     case 1:
1721         if (vmcb->control.exit_code != SVM_EXIT_VINTR) {
1722             report_fail("VMEXIT not due to vintr. Exit reason 0x%x",
1723                         vmcb->control.exit_code);
1724             return true;
1725         }
1726         if (virq_fired) {
1727             report_fail("V_IRQ fired before SVM_EXIT_VINTR");
1728             return true;
1729         }
1730         vmcb->control.intercept &= ~(1ULL << INTERCEPT_VINTR);
1731         break;
1732 
1733     case 2:
1734         if (vmcb->control.exit_code != SVM_EXIT_VMMCALL) {
1735             report_fail("VMEXIT not due to vmmcall. Exit reason 0x%x",
1736                         vmcb->control.exit_code);
1737             return true;
1738         }
1739         virq_fired = false;
1740         // Set irq to lower priority
1741         vmcb->control.int_ctl = V_INTR_MASKING_MASK | V_IRQ_MASK |
1742                             (0x08 << V_INTR_PRIO_SHIFT);
1743         // Raise guest TPR
1744         vmcb->control.int_ctl |= 0x0a & V_TPR_MASK;
1745         break;
1746 
1747     case 3:
1748         if (vmcb->control.exit_code != SVM_EXIT_VMMCALL) {
1749             report_fail("VMEXIT not due to vmmcall. Exit reason 0x%x",
1750                         vmcb->control.exit_code);
1751             return true;
1752         }
1753         vmcb->control.intercept |= (1ULL << INTERCEPT_VINTR);
1754         break;
1755 
1756     case 4:
1757         // INTERCEPT_VINTR should be ignored because V_INTR_PRIO < V_TPR
1758         if (vmcb->control.exit_code != SVM_EXIT_VMMCALL) {
1759             report_fail("VMEXIT not due to vmmcall. Exit reason 0x%x",
1760                         vmcb->control.exit_code);
1761             return true;
1762         }
1763         break;
1764 
1765     default:
1766         return true;
1767     }
1768 
1769     inc_test_stage(test);
1770 
1771     return get_test_stage(test) == 5;
1772 }
1773 
1774 static bool virq_inject_check(struct svm_test *test)
1775 {
1776     return get_test_stage(test) == 5;
1777 }
1778 
1779 /*
1780  * Detect nested guest RIP corruption as explained in kernel commit
1781  * b6162e82aef19fee9c32cb3fe9ac30d9116a8c73
1782  *
1783  * In the assembly loop below 'ins' is executed while IO instructions
1784  * are not intercepted; the instruction is emulated by L0.
1785  *
1786  * At the same time we are getting interrupts from the local APIC timer,
1787  * and we do intercept them in L1
1788  *
1789  * If the interrupt happens on the insb instruction, L0 will VMexit, emulate
1790  * the insb instruction and then it will inject the interrupt to L1 through
1791  * a nested VMexit.  Due to a bug, it would leave pre-emulation values of RIP,
1792  * RAX and RSP in the VMCB.
1793  *
1794  * In our intercept handler we detect the bug by checking that RIP is that of
1795  * the insb instruction, but its memory operand has already been written.
1796  * This means that insb was already executed.
1797  */
1798 
1799 static volatile int isr_cnt = 0;
1800 static volatile uint8_t io_port_var = 0xAA;
1801 extern const char insb_instruction_label[];
1802 
1803 static void reg_corruption_isr(isr_regs_t *regs)
1804 {
1805     isr_cnt++;
1806     apic_write(APIC_EOI, 0);
1807 }
1808 
1809 static void reg_corruption_prepare(struct svm_test *test)
1810 {
1811     default_prepare(test);
1812     set_test_stage(test, 0);
1813 
1814     vmcb->control.int_ctl = V_INTR_MASKING_MASK;
1815     vmcb->control.intercept |= (1ULL << INTERCEPT_INTR);
1816 
1817     handle_irq(TIMER_VECTOR, reg_corruption_isr);
1818 
1819     /* set local APIC to inject external interrupts */
1820     apic_write(APIC_TMICT, 0);
1821     apic_write(APIC_TDCR, 0);
1822     apic_write(APIC_LVTT, TIMER_VECTOR | APIC_LVT_TIMER_PERIODIC);
1823     apic_write(APIC_TMICT, 1000);
1824 }
1825 
1826 static void reg_corruption_test(struct svm_test *test)
1827 {
1828     /* this is endless loop, which is interrupted by the timer interrupt */
1829     asm volatile (
1830             "1:\n\t"
1831             "movw $0x4d0, %%dx\n\t" // IO port
1832             "lea %[io_port_var], %%rdi\n\t"
1833             "movb $0xAA, %[io_port_var]\n\t"
1834             "insb_instruction_label:\n\t"
1835             "insb\n\t"
1836             "jmp 1b\n\t"
1837 
1838             : [io_port_var] "=m" (io_port_var)
1839             : /* no inputs*/
1840             : "rdx", "rdi"
1841     );
1842 }
1843 
1844 static bool reg_corruption_finished(struct svm_test *test)
1845 {
1846     if (isr_cnt == 10000) {
1847         report_pass("No RIP corruption detected after %d timer interrupts",
1848                     isr_cnt);
1849         set_test_stage(test, 1);
1850         return true;
1851     }
1852 
1853     if (vmcb->control.exit_code == SVM_EXIT_INTR) {
1854 
1855         void* guest_rip = (void*)vmcb->save.rip;
1856 
1857         irq_enable();
1858         asm volatile ("nop");
1859         irq_disable();
1860 
1861         if (guest_rip == insb_instruction_label && io_port_var != 0xAA) {
1862             report_fail("RIP corruption detected after %d timer interrupts",
1863                         isr_cnt);
1864             return true;
1865         }
1866 
1867     }
1868     return false;
1869 }
1870 
1871 static bool reg_corruption_check(struct svm_test *test)
1872 {
1873     return get_test_stage(test) == 1;
1874 }
1875 
1876 static void get_tss_entry(void *data)
1877 {
1878     *((gdt_entry_t **)data) = get_tss_descr();
1879 }
1880 
1881 static int orig_cpu_count;
1882 
1883 static void init_startup_prepare(struct svm_test *test)
1884 {
1885     gdt_entry_t *tss_entry;
1886     int i;
1887 
1888     on_cpu(1, get_tss_entry, &tss_entry);
1889 
1890     orig_cpu_count = cpu_online_count;
1891 
1892     apic_icr_write(APIC_DEST_PHYSICAL | APIC_DM_INIT | APIC_INT_ASSERT,
1893                    id_map[1]);
1894 
1895     delay(100000000ULL);
1896 
1897     --cpu_online_count;
1898 
1899     tss_entry->type &= ~DESC_BUSY;
1900 
1901     apic_icr_write(APIC_DEST_PHYSICAL | APIC_DM_STARTUP, id_map[1]);
1902 
1903     for (i = 0; i < 5 && cpu_online_count < orig_cpu_count; i++)
1904        delay(100000000ULL);
1905 }
1906 
1907 static bool init_startup_finished(struct svm_test *test)
1908 {
1909     return true;
1910 }
1911 
1912 static bool init_startup_check(struct svm_test *test)
1913 {
1914     return cpu_online_count == orig_cpu_count;
1915 }
1916 
1917 static volatile bool init_intercept;
1918 
1919 static void init_intercept_prepare(struct svm_test *test)
1920 {
1921     init_intercept = false;
1922     vmcb->control.intercept |= (1ULL << INTERCEPT_INIT);
1923 }
1924 
1925 static void init_intercept_test(struct svm_test *test)
1926 {
1927     apic_icr_write(APIC_DEST_SELF | APIC_DEST_PHYSICAL | APIC_DM_INIT | APIC_INT_ASSERT, 0);
1928 }
1929 
1930 static bool init_intercept_finished(struct svm_test *test)
1931 {
1932     vmcb->save.rip += 3;
1933 
1934     if (vmcb->control.exit_code != SVM_EXIT_INIT) {
1935         report_fail("VMEXIT not due to init intercept. Exit reason 0x%x",
1936                     vmcb->control.exit_code);
1937 
1938         return true;
1939         }
1940 
1941     init_intercept = true;
1942 
1943     report_pass("INIT to vcpu intercepted");
1944 
1945     return true;
1946 }
1947 
1948 static bool init_intercept_check(struct svm_test *test)
1949 {
1950     return init_intercept;
1951 }
1952 
1953 /*
1954  * Setting host EFLAGS.TF causes a #DB trap after the VMRUN completes on the
1955  * host side (i.e., after the #VMEXIT from the guest).
1956  *
1957  * Setting host EFLAGS.RF suppresses any potential instruction breakpoint
1958  * match on the VMRUN and completion of the VMRUN instruction clears the
1959  * host EFLAGS.RF bit.
1960  *
1961  * [AMD APM]
1962  */
1963 static volatile u8 host_rflags_guest_main_flag = 0;
1964 static volatile u8 host_rflags_db_handler_flag = 0;
1965 static volatile bool host_rflags_ss_on_vmrun = false;
1966 static volatile bool host_rflags_vmrun_reached = false;
1967 static volatile bool host_rflags_set_tf = false;
1968 static volatile bool host_rflags_set_rf = false;
1969 static u64 rip_detected;
1970 
1971 extern u64 *vmrun_rip;
1972 
1973 static void host_rflags_db_handler(struct ex_regs *r)
1974 {
1975 	if (host_rflags_ss_on_vmrun) {
1976 		if (host_rflags_vmrun_reached) {
1977 			if (!host_rflags_set_rf) {
1978 				r->rflags &= ~X86_EFLAGS_TF;
1979 				rip_detected = r->rip;
1980 			} else {
1981 				r->rflags |= X86_EFLAGS_RF;
1982 				++host_rflags_db_handler_flag;
1983 			}
1984 		} else {
1985 			if (r->rip == (u64)&vmrun_rip) {
1986 				host_rflags_vmrun_reached = true;
1987 
1988 				if (host_rflags_set_rf) {
1989 					host_rflags_guest_main_flag = 0;
1990 					rip_detected = r->rip;
1991 					r->rflags &= ~X86_EFLAGS_TF;
1992 
1993 					/* Trigger #DB via debug registers */
1994 					write_dr0((void *)&vmrun_rip);
1995 					write_dr7(0x403);
1996 				}
1997 			}
1998 		}
1999 	} else {
2000 		r->rflags &= ~X86_EFLAGS_TF;
2001 	}
2002 }
2003 
2004 static void host_rflags_prepare(struct svm_test *test)
2005 {
2006 	default_prepare(test);
2007 	handle_exception(DB_VECTOR, host_rflags_db_handler);
2008 	set_test_stage(test, 0);
2009 }
2010 
2011 static void host_rflags_prepare_gif_clear(struct svm_test *test)
2012 {
2013 	if (host_rflags_set_tf)
2014 		write_rflags(read_rflags() | X86_EFLAGS_TF);
2015 }
2016 
2017 static void host_rflags_test(struct svm_test *test)
2018 {
2019 	while (1) {
2020 		if (get_test_stage(test) > 0) {
2021 			if ((host_rflags_set_tf && !host_rflags_ss_on_vmrun && !host_rflags_db_handler_flag) ||
2022 			    (host_rflags_set_rf && host_rflags_db_handler_flag == 1))
2023 				host_rflags_guest_main_flag = 1;
2024 		}
2025 
2026 		if (get_test_stage(test) == 4)
2027 			break;
2028 		vmmcall();
2029 	}
2030 }
2031 
2032 static bool host_rflags_finished(struct svm_test *test)
2033 {
2034 	switch (get_test_stage(test)) {
2035 	case 0:
2036 		if (vmcb->control.exit_code != SVM_EXIT_VMMCALL) {
2037 			report_fail("Unexpected VMEXIT. Exit reason 0x%x",
2038 				    vmcb->control.exit_code);
2039 			return true;
2040 		}
2041 		vmcb->save.rip += 3;
2042 		/*
2043 		 * Setting host EFLAGS.TF not immediately before VMRUN, causes
2044 		 * #DB trap before first guest instruction is executed
2045 		 */
2046 		host_rflags_set_tf = true;
2047 		break;
2048 	case 1:
2049 		if (vmcb->control.exit_code != SVM_EXIT_VMMCALL ||
2050 		    host_rflags_guest_main_flag != 1) {
2051 			report_fail("Unexpected VMEXIT or #DB handler"
2052 				    " invoked before guest main. Exit reason 0x%x",
2053 				    vmcb->control.exit_code);
2054 			return true;
2055 		}
2056 		vmcb->save.rip += 3;
2057 		/*
2058 		 * Setting host EFLAGS.TF immediately before VMRUN, causes #DB
2059 		 * trap after VMRUN completes on the host side (i.e., after
2060 		 * VMEXIT from guest).
2061 		 */
2062 		host_rflags_ss_on_vmrun = true;
2063 		break;
2064 	case 2:
2065 		if (vmcb->control.exit_code != SVM_EXIT_VMMCALL ||
2066 		    rip_detected != (u64)&vmrun_rip + 3) {
2067 			report_fail("Unexpected VMEXIT or RIP mismatch."
2068 				    " Exit reason 0x%x, RIP actual: %lx, RIP expected: "
2069 				    "%lx", vmcb->control.exit_code,
2070 				    (u64)&vmrun_rip + 3, rip_detected);
2071 			return true;
2072 		}
2073 		host_rflags_set_rf = true;
2074 		host_rflags_guest_main_flag = 0;
2075 		host_rflags_vmrun_reached = false;
2076 		vmcb->save.rip += 3;
2077 		break;
2078 	case 3:
2079 		if (vmcb->control.exit_code != SVM_EXIT_VMMCALL ||
2080 		    rip_detected != (u64)&vmrun_rip ||
2081 		    host_rflags_guest_main_flag != 1 ||
2082 		    host_rflags_db_handler_flag > 1 ||
2083 		    read_rflags() & X86_EFLAGS_RF) {
2084 			report_fail("Unexpected VMEXIT or RIP mismatch or "
2085 				    "EFLAGS.RF not cleared."
2086 				    " Exit reason 0x%x, RIP actual: %lx, RIP expected: "
2087 				    "%lx", vmcb->control.exit_code,
2088 				    (u64)&vmrun_rip, rip_detected);
2089 			return true;
2090 		}
2091 		host_rflags_set_tf = false;
2092 		host_rflags_set_rf = false;
2093 		vmcb->save.rip += 3;
2094 		break;
2095 	default:
2096 		return true;
2097 	}
2098 	inc_test_stage(test);
2099 	return get_test_stage(test) == 5;
2100 }
2101 
2102 static bool host_rflags_check(struct svm_test *test)
2103 {
2104 	return get_test_stage(test) == 4;
2105 }
2106 
2107 #define TEST(name) { #name, .v2 = name }
2108 
2109 /*
2110  * v2 tests
2111  */
2112 
2113 /*
2114  * Ensure that kvm recalculates the L1 guest's CPUID.01H:ECX.OSXSAVE
2115  * after VM-exit from an L2 guest that sets CR4.OSXSAVE to a different
2116  * value than in L1.
2117  */
2118 
2119 static void svm_cr4_osxsave_test_guest(struct svm_test *test)
2120 {
2121 	write_cr4(read_cr4() & ~X86_CR4_OSXSAVE);
2122 }
2123 
2124 static void svm_cr4_osxsave_test(void)
2125 {
2126 	if (!this_cpu_has(X86_FEATURE_XSAVE)) {
2127 		report_skip("XSAVE not detected");
2128 		return;
2129 	}
2130 
2131 	if (!(read_cr4() & X86_CR4_OSXSAVE)) {
2132 		unsigned long cr4 = read_cr4() | X86_CR4_OSXSAVE;
2133 
2134 		write_cr4(cr4);
2135 		vmcb->save.cr4 = cr4;
2136 	}
2137 
2138 	report(cpuid_osxsave(), "CPUID.01H:ECX.XSAVE set before VMRUN");
2139 
2140 	test_set_guest(svm_cr4_osxsave_test_guest);
2141 	report(svm_vmrun() == SVM_EXIT_VMMCALL,
2142 	       "svm_cr4_osxsave_test_guest finished with VMMCALL");
2143 
2144 	report(cpuid_osxsave(), "CPUID.01H:ECX.XSAVE set after VMRUN");
2145 }
2146 
2147 static void basic_guest_main(struct svm_test *test)
2148 {
2149 }
2150 
2151 
2152 #define SVM_TEST_REG_RESERVED_BITS(start, end, inc, str_name, reg, val,	\
2153 				   resv_mask)				\
2154 {									\
2155         u64 tmp, mask;							\
2156         int i;								\
2157 									\
2158         for (i = start; i <= end; i = i + inc) {			\
2159                 mask = 1ull << i;					\
2160                 if (!(mask & resv_mask))				\
2161                         continue;					\
2162                 tmp = val | mask;					\
2163 		reg = tmp;						\
2164 		report(svm_vmrun() == SVM_EXIT_ERR, "Test %s %d:%d: %lx",\
2165 		    str_name, end, start, tmp);				\
2166         }								\
2167 }
2168 
2169 #define SVM_TEST_CR_RESERVED_BITS(start, end, inc, cr, val, resv_mask,	\
2170 				  exit_code, test_name)			\
2171 {									\
2172 	u64 tmp, mask;							\
2173 	u32 r;								\
2174 	int i;								\
2175 									\
2176 	for (i = start; i <= end; i = i + inc) {			\
2177 		mask = 1ull << i;					\
2178 		if (!(mask & resv_mask))				\
2179 			continue;					\
2180 		tmp = val | mask;					\
2181 		switch (cr) {						\
2182 		case 0:							\
2183 			vmcb->save.cr0 = tmp;				\
2184 			break;						\
2185 		case 3:							\
2186 			vmcb->save.cr3 = tmp;				\
2187 			break;						\
2188 		case 4:							\
2189 			vmcb->save.cr4 = tmp;				\
2190 		}							\
2191 		r = svm_vmrun();					\
2192 		report(r == exit_code, "Test CR%d %s%d:%d: %lx, wanted exit 0x%x, got 0x%x",\
2193 		       cr, test_name, end, start, tmp, exit_code, r);	\
2194 	}								\
2195 }
2196 
2197 static void test_efer(void)
2198 {
2199 	/*
2200 	 * Un-setting EFER.SVME is illegal
2201 	 */
2202 	u64 efer_saved = vmcb->save.efer;
2203 	u64 efer = efer_saved;
2204 
2205 	report (svm_vmrun() == SVM_EXIT_VMMCALL, "EFER.SVME: %lx", efer);
2206 	efer &= ~EFER_SVME;
2207 	vmcb->save.efer = efer;
2208 	report (svm_vmrun() == SVM_EXIT_ERR, "EFER.SVME: %lx", efer);
2209 	vmcb->save.efer = efer_saved;
2210 
2211 	/*
2212 	 * EFER MBZ bits: 63:16, 9
2213 	 */
2214 	efer_saved = vmcb->save.efer;
2215 
2216 	SVM_TEST_REG_RESERVED_BITS(8, 9, 1, "EFER", vmcb->save.efer,
2217 	    efer_saved, SVM_EFER_RESERVED_MASK);
2218 	SVM_TEST_REG_RESERVED_BITS(16, 63, 4, "EFER", vmcb->save.efer,
2219 	    efer_saved, SVM_EFER_RESERVED_MASK);
2220 
2221 	/*
2222 	 * EFER.LME and CR0.PG are both set and CR4.PAE is zero.
2223 	 */
2224 	u64 cr0_saved = vmcb->save.cr0;
2225 	u64 cr0;
2226 	u64 cr4_saved = vmcb->save.cr4;
2227 	u64 cr4;
2228 
2229 	efer = efer_saved | EFER_LME;
2230 	vmcb->save.efer = efer;
2231 	cr0 = cr0_saved | X86_CR0_PG | X86_CR0_PE;
2232 	vmcb->save.cr0 = cr0;
2233 	cr4 = cr4_saved & ~X86_CR4_PAE;
2234 	vmcb->save.cr4 = cr4;
2235 	report(svm_vmrun() == SVM_EXIT_ERR, "EFER.LME=1 (%lx), "
2236 	    "CR0.PG=1 (%lx) and CR4.PAE=0 (%lx)", efer, cr0, cr4);
2237 
2238 	/*
2239 	 * EFER.LME and CR0.PG are both set and CR0.PE is zero.
2240 	 * CR4.PAE needs to be set as we otherwise cannot
2241 	 * determine if CR4.PAE=0 or CR0.PE=0 triggered the
2242 	 * SVM_EXIT_ERR.
2243 	 */
2244 	cr4 = cr4_saved | X86_CR4_PAE;
2245 	vmcb->save.cr4 = cr4;
2246 	cr0 &= ~X86_CR0_PE;
2247 	vmcb->save.cr0 = cr0;
2248 	report(svm_vmrun() == SVM_EXIT_ERR, "EFER.LME=1 (%lx), "
2249 	    "CR0.PG=1 and CR0.PE=0 (%lx)", efer, cr0);
2250 
2251 	/*
2252 	 * EFER.LME, CR0.PG, CR4.PAE, CS.L, and CS.D are all non-zero.
2253 	 */
2254 	u32 cs_attrib_saved = vmcb->save.cs.attrib;
2255 	u32 cs_attrib;
2256 
2257 	cr0 |= X86_CR0_PE;
2258 	vmcb->save.cr0 = cr0;
2259 	cs_attrib = cs_attrib_saved | SVM_SELECTOR_L_MASK |
2260 	    SVM_SELECTOR_DB_MASK;
2261 	vmcb->save.cs.attrib = cs_attrib;
2262 	report(svm_vmrun() == SVM_EXIT_ERR, "EFER.LME=1 (%lx), "
2263 	    "CR0.PG=1 (%lx), CR4.PAE=1 (%lx), CS.L=1 and CS.D=1 (%x)",
2264 	    efer, cr0, cr4, cs_attrib);
2265 
2266 	vmcb->save.cr0 = cr0_saved;
2267 	vmcb->save.cr4 = cr4_saved;
2268 	vmcb->save.efer = efer_saved;
2269 	vmcb->save.cs.attrib = cs_attrib_saved;
2270 }
2271 
2272 static void test_cr0(void)
2273 {
2274 	/*
2275 	 * Un-setting CR0.CD and setting CR0.NW is illegal combination
2276 	 */
2277 	u64 cr0_saved = vmcb->save.cr0;
2278 	u64 cr0 = cr0_saved;
2279 
2280 	cr0 |= X86_CR0_CD;
2281 	cr0 &= ~X86_CR0_NW;
2282 	vmcb->save.cr0 = cr0;
2283 	report (svm_vmrun() == SVM_EXIT_VMMCALL, "Test CR0 CD=1,NW=0: %lx",
2284 	    cr0);
2285 	cr0 |= X86_CR0_NW;
2286 	vmcb->save.cr0 = cr0;
2287 	report (svm_vmrun() == SVM_EXIT_VMMCALL, "Test CR0 CD=1,NW=1: %lx",
2288 	    cr0);
2289 	cr0 &= ~X86_CR0_NW;
2290 	cr0 &= ~X86_CR0_CD;
2291 	vmcb->save.cr0 = cr0;
2292 	report (svm_vmrun() == SVM_EXIT_VMMCALL, "Test CR0 CD=0,NW=0: %lx",
2293 	    cr0);
2294 	cr0 |= X86_CR0_NW;
2295 	vmcb->save.cr0 = cr0;
2296 	report (svm_vmrun() == SVM_EXIT_ERR, "Test CR0 CD=0,NW=1: %lx",
2297 	    cr0);
2298 	vmcb->save.cr0 = cr0_saved;
2299 
2300 	/*
2301 	 * CR0[63:32] are not zero
2302 	 */
2303 	cr0 = cr0_saved;
2304 
2305 	SVM_TEST_REG_RESERVED_BITS(32, 63, 4, "CR0", vmcb->save.cr0, cr0_saved,
2306 	    SVM_CR0_RESERVED_MASK);
2307 	vmcb->save.cr0 = cr0_saved;
2308 }
2309 
2310 static void test_cr3(void)
2311 {
2312 	/*
2313 	 * CR3 MBZ bits based on different modes:
2314 	 *   [63:52] - long mode
2315 	 */
2316 	u64 cr3_saved = vmcb->save.cr3;
2317 
2318 	SVM_TEST_CR_RESERVED_BITS(0, 63, 1, 3, cr3_saved,
2319 	    SVM_CR3_LONG_MBZ_MASK, SVM_EXIT_ERR, "");
2320 
2321 	vmcb->save.cr3 = cr3_saved & ~SVM_CR3_LONG_MBZ_MASK;
2322 	report(svm_vmrun() == SVM_EXIT_VMMCALL, "Test CR3 63:0: %lx",
2323 	    vmcb->save.cr3);
2324 
2325 	/*
2326 	 * CR3 non-MBZ reserved bits based on different modes:
2327 	 *   [11:5] [2:0] - long mode (PCIDE=0)
2328 	 *          [2:0] - PAE legacy mode
2329 	 */
2330 	u64 cr4_saved = vmcb->save.cr4;
2331 	u64 *pdpe = npt_get_pml4e();
2332 
2333 	/*
2334 	 * Long mode
2335 	 */
2336 	if (this_cpu_has(X86_FEATURE_PCID)) {
2337 		vmcb->save.cr4 = cr4_saved | X86_CR4_PCIDE;
2338 		SVM_TEST_CR_RESERVED_BITS(0, 11, 1, 3, cr3_saved,
2339 		    SVM_CR3_LONG_RESERVED_MASK, SVM_EXIT_VMMCALL, "(PCIDE=1) ");
2340 
2341 		vmcb->save.cr3 = cr3_saved & ~SVM_CR3_LONG_RESERVED_MASK;
2342 		report(svm_vmrun() == SVM_EXIT_VMMCALL, "Test CR3 63:0: %lx",
2343 		    vmcb->save.cr3);
2344 	}
2345 
2346 	vmcb->save.cr4 = cr4_saved & ~X86_CR4_PCIDE;
2347 
2348 	if (!npt_supported())
2349 		goto skip_npt_only;
2350 
2351 	/* Clear P (Present) bit in NPT in order to trigger #NPF */
2352 	pdpe[0] &= ~1ULL;
2353 
2354 	SVM_TEST_CR_RESERVED_BITS(0, 11, 1, 3, cr3_saved,
2355 	    SVM_CR3_LONG_RESERVED_MASK, SVM_EXIT_NPF, "(PCIDE=0) ");
2356 
2357 	pdpe[0] |= 1ULL;
2358 	vmcb->save.cr3 = cr3_saved;
2359 
2360 	/*
2361 	 * PAE legacy
2362 	 */
2363 	pdpe[0] &= ~1ULL;
2364 	vmcb->save.cr4 = cr4_saved | X86_CR4_PAE;
2365 	SVM_TEST_CR_RESERVED_BITS(0, 2, 1, 3, cr3_saved,
2366 	    SVM_CR3_PAE_LEGACY_RESERVED_MASK, SVM_EXIT_NPF, "(PAE) ");
2367 
2368 	pdpe[0] |= 1ULL;
2369 
2370 skip_npt_only:
2371 	vmcb->save.cr3 = cr3_saved;
2372 	vmcb->save.cr4 = cr4_saved;
2373 }
2374 
2375 /* Test CR4 MBZ bits based on legacy or long modes */
2376 static void test_cr4(void)
2377 {
2378 	u64 cr4_saved = vmcb->save.cr4;
2379 	u64 efer_saved = vmcb->save.efer;
2380 	u64 efer = efer_saved;
2381 
2382 	efer &= ~EFER_LME;
2383 	vmcb->save.efer = efer;
2384 	SVM_TEST_CR_RESERVED_BITS(12, 31, 1, 4, cr4_saved,
2385 	    SVM_CR4_LEGACY_RESERVED_MASK, SVM_EXIT_ERR, "");
2386 
2387 	efer |= EFER_LME;
2388 	vmcb->save.efer = efer;
2389 	SVM_TEST_CR_RESERVED_BITS(12, 31, 1, 4, cr4_saved,
2390 	    SVM_CR4_RESERVED_MASK, SVM_EXIT_ERR, "");
2391 	SVM_TEST_CR_RESERVED_BITS(32, 63, 4, 4, cr4_saved,
2392 	    SVM_CR4_RESERVED_MASK, SVM_EXIT_ERR, "");
2393 
2394 	vmcb->save.cr4 = cr4_saved;
2395 	vmcb->save.efer = efer_saved;
2396 }
2397 
2398 static void test_dr(void)
2399 {
2400 	/*
2401 	 * DR6[63:32] and DR7[63:32] are MBZ
2402 	 */
2403 	u64 dr_saved = vmcb->save.dr6;
2404 
2405 	SVM_TEST_REG_RESERVED_BITS(32, 63, 4, "DR6", vmcb->save.dr6, dr_saved,
2406 	    SVM_DR6_RESERVED_MASK);
2407 	vmcb->save.dr6 = dr_saved;
2408 
2409 	dr_saved = vmcb->save.dr7;
2410 	SVM_TEST_REG_RESERVED_BITS(32, 63, 4, "DR7", vmcb->save.dr7, dr_saved,
2411 	    SVM_DR7_RESERVED_MASK);
2412 
2413 	vmcb->save.dr7 = dr_saved;
2414 }
2415 
2416 /* TODO: verify if high 32-bits are sign- or zero-extended on bare metal */
2417 #define	TEST_BITMAP_ADDR(save_intercept, type, addr, exit_code,		\
2418 			 msg) {						\
2419 	vmcb->control.intercept = saved_intercept | 1ULL << type;	\
2420 	if (type == INTERCEPT_MSR_PROT)					\
2421 		vmcb->control.msrpm_base_pa = addr;			\
2422 	else								\
2423 		vmcb->control.iopm_base_pa = addr;			\
2424 	report(svm_vmrun() == exit_code,				\
2425 	    "Test %s address: %lx", msg, addr);                         \
2426 }
2427 
2428 /*
2429  * If the MSR or IOIO intercept table extends to a physical address that
2430  * is greater than or equal to the maximum supported physical address, the
2431  * guest state is illegal.
2432  *
2433  * The VMRUN instruction ignores the lower 12 bits of the address specified
2434  * in the VMCB.
2435  *
2436  * MSRPM spans 2 contiguous 4KB pages while IOPM spans 2 contiguous 4KB
2437  * pages + 1 byte.
2438  *
2439  * [APM vol 2]
2440  *
2441  * Note: Unallocated MSRPM addresses conforming to consistency checks, generate
2442  * #NPF.
2443  */
2444 static void test_msrpm_iopm_bitmap_addrs(void)
2445 {
2446 	u64 saved_intercept = vmcb->control.intercept;
2447 	u64 addr_beyond_limit = 1ull << cpuid_maxphyaddr();
2448 	u64 addr = virt_to_phys(msr_bitmap) & (~((1ull << 12) - 1));
2449 
2450 	TEST_BITMAP_ADDR(saved_intercept, INTERCEPT_MSR_PROT,
2451 			addr_beyond_limit - 2 * PAGE_SIZE, SVM_EXIT_ERR,
2452 			"MSRPM");
2453 	TEST_BITMAP_ADDR(saved_intercept, INTERCEPT_MSR_PROT,
2454 			addr_beyond_limit - 2 * PAGE_SIZE + 1, SVM_EXIT_ERR,
2455 			"MSRPM");
2456 	TEST_BITMAP_ADDR(saved_intercept, INTERCEPT_MSR_PROT,
2457 			addr_beyond_limit - PAGE_SIZE, SVM_EXIT_ERR,
2458 			"MSRPM");
2459 	TEST_BITMAP_ADDR(saved_intercept, INTERCEPT_MSR_PROT, addr,
2460 			SVM_EXIT_VMMCALL, "MSRPM");
2461 	addr |= (1ull << 12) - 1;
2462 	TEST_BITMAP_ADDR(saved_intercept, INTERCEPT_MSR_PROT, addr,
2463 			SVM_EXIT_VMMCALL, "MSRPM");
2464 
2465 	TEST_BITMAP_ADDR(saved_intercept, INTERCEPT_IOIO_PROT,
2466 			addr_beyond_limit - 4 * PAGE_SIZE, SVM_EXIT_VMMCALL,
2467 			"IOPM");
2468 	TEST_BITMAP_ADDR(saved_intercept, INTERCEPT_IOIO_PROT,
2469 			addr_beyond_limit - 3 * PAGE_SIZE, SVM_EXIT_VMMCALL,
2470 			"IOPM");
2471 	TEST_BITMAP_ADDR(saved_intercept, INTERCEPT_IOIO_PROT,
2472 			addr_beyond_limit - 2 * PAGE_SIZE - 2, SVM_EXIT_VMMCALL,
2473 			"IOPM");
2474 	TEST_BITMAP_ADDR(saved_intercept, INTERCEPT_IOIO_PROT,
2475 			addr_beyond_limit - 2 * PAGE_SIZE, SVM_EXIT_ERR,
2476 			"IOPM");
2477 	TEST_BITMAP_ADDR(saved_intercept, INTERCEPT_IOIO_PROT,
2478 			addr_beyond_limit - PAGE_SIZE, SVM_EXIT_ERR,
2479 			"IOPM");
2480 	addr = virt_to_phys(io_bitmap) & (~((1ull << 11) - 1));
2481 	TEST_BITMAP_ADDR(saved_intercept, INTERCEPT_IOIO_PROT, addr,
2482 			SVM_EXIT_VMMCALL, "IOPM");
2483 	addr |= (1ull << 12) - 1;
2484 	TEST_BITMAP_ADDR(saved_intercept, INTERCEPT_IOIO_PROT, addr,
2485 			SVM_EXIT_VMMCALL, "IOPM");
2486 
2487 	vmcb->control.intercept = saved_intercept;
2488 }
2489 
2490 /*
2491  * Unlike VMSAVE, VMRUN seems not to update the value of noncanonical
2492  * segment bases in the VMCB.  However, VMENTRY succeeds as documented.
2493  */
2494 #define TEST_CANONICAL_VMRUN(seg_base, msg)					\
2495 	saved_addr = seg_base;					\
2496 	seg_base = (seg_base & ((1ul << addr_limit) - 1)) | noncanonical_mask; \
2497 	return_value = svm_vmrun(); \
2498 	report(return_value == SVM_EXIT_VMMCALL, \
2499 			"Successful VMRUN with noncanonical %s.base", msg); \
2500 	seg_base = saved_addr;
2501 
2502 
2503 #define TEST_CANONICAL_VMLOAD(seg_base, msg)					\
2504 	saved_addr = seg_base;					\
2505 	seg_base = (seg_base & ((1ul << addr_limit) - 1)) | noncanonical_mask; \
2506 	asm volatile ("vmload %0" : : "a"(vmcb_phys) : "memory"); \
2507 	asm volatile ("vmsave %0" : : "a"(vmcb_phys) : "memory"); \
2508 	report(is_canonical(seg_base), \
2509 			"Test %s.base for canonical form: %lx", msg, seg_base); \
2510 	seg_base = saved_addr;
2511 
2512 static void test_canonicalization(void)
2513 {
2514 	u64 saved_addr;
2515 	u64 return_value;
2516 	u64 addr_limit;
2517 	u64 vmcb_phys = virt_to_phys(vmcb);
2518 
2519 	addr_limit = (this_cpu_has(X86_FEATURE_LA57)) ? 57 : 48;
2520 	u64 noncanonical_mask = NONCANONICAL & ~((1ul << addr_limit) - 1);
2521 
2522 	TEST_CANONICAL_VMLOAD(vmcb->save.fs.base, "FS");
2523 	TEST_CANONICAL_VMLOAD(vmcb->save.gs.base, "GS");
2524 	TEST_CANONICAL_VMLOAD(vmcb->save.ldtr.base, "LDTR");
2525 	TEST_CANONICAL_VMLOAD(vmcb->save.tr.base, "TR");
2526 	TEST_CANONICAL_VMLOAD(vmcb->save.kernel_gs_base, "KERNEL GS");
2527 	TEST_CANONICAL_VMRUN(vmcb->save.es.base, "ES");
2528 	TEST_CANONICAL_VMRUN(vmcb->save.cs.base, "CS");
2529 	TEST_CANONICAL_VMRUN(vmcb->save.ss.base, "SS");
2530 	TEST_CANONICAL_VMRUN(vmcb->save.ds.base, "DS");
2531 	TEST_CANONICAL_VMRUN(vmcb->save.gdtr.base, "GDTR");
2532 	TEST_CANONICAL_VMRUN(vmcb->save.idtr.base, "IDTR");
2533 }
2534 
2535 /*
2536  * When VMRUN loads a guest value of 1 in EFLAGS.TF, that value does not
2537  * cause a trace trap between the VMRUN and the first guest instruction, but
2538  * rather after completion of the first guest instruction.
2539  *
2540  * [APM vol 2]
2541  */
2542 u64 guest_rflags_test_trap_rip;
2543 
2544 static void guest_rflags_test_db_handler(struct ex_regs *r)
2545 {
2546 	guest_rflags_test_trap_rip = r->rip;
2547 	r->rflags &= ~X86_EFLAGS_TF;
2548 }
2549 
2550 static void svm_guest_state_test(void)
2551 {
2552 	test_set_guest(basic_guest_main);
2553 	test_efer();
2554 	test_cr0();
2555 	test_cr3();
2556 	test_cr4();
2557 	test_dr();
2558 	test_msrpm_iopm_bitmap_addrs();
2559 	test_canonicalization();
2560 }
2561 
2562 extern void guest_rflags_test_guest(struct svm_test *test);
2563 extern u64 *insn2;
2564 extern u64 *guest_end;
2565 
2566 asm("guest_rflags_test_guest:\n\t"
2567     "push %rbp\n\t"
2568     ".global insn2\n\t"
2569     "insn2:\n\t"
2570     "mov %rsp,%rbp\n\t"
2571     "vmmcall\n\t"
2572     "vmmcall\n\t"
2573     ".global guest_end\n\t"
2574     "guest_end:\n\t"
2575     "vmmcall\n\t"
2576     "pop %rbp\n\t"
2577     "ret");
2578 
2579 static void svm_test_singlestep(void)
2580 {
2581 	handle_exception(DB_VECTOR, guest_rflags_test_db_handler);
2582 
2583 	/*
2584 	 * Trap expected after completion of first guest instruction
2585 	 */
2586 	vmcb->save.rflags |= X86_EFLAGS_TF;
2587 	report (__svm_vmrun((u64)guest_rflags_test_guest) == SVM_EXIT_VMMCALL &&
2588 		guest_rflags_test_trap_rip == (u64)&insn2,
2589                "Test EFLAGS.TF on VMRUN: trap expected  after completion of first guest instruction");
2590 	/*
2591 	 * No trap expected
2592 	 */
2593 	guest_rflags_test_trap_rip = 0;
2594 	vmcb->save.rip += 3;
2595 	vmcb->save.rflags |= X86_EFLAGS_TF;
2596 	report (__svm_vmrun(vmcb->save.rip) == SVM_EXIT_VMMCALL &&
2597 		guest_rflags_test_trap_rip == 0, "Test EFLAGS.TF on VMRUN: trap not expected");
2598 
2599 	/*
2600 	 * Let guest finish execution
2601 	 */
2602 	vmcb->save.rip += 3;
2603 	report (__svm_vmrun(vmcb->save.rip) == SVM_EXIT_VMMCALL &&
2604 		vmcb->save.rip == (u64)&guest_end, "Test EFLAGS.TF on VMRUN: guest execution completion");
2605 }
2606 
2607 static void __svm_npt_rsvd_bits_test(u64 *pxe, u64 rsvd_bits, u64 efer,
2608 				     ulong cr4, u64 guest_efer, ulong guest_cr4)
2609 {
2610 	u64 pxe_orig = *pxe;
2611 	int exit_reason;
2612 	u64 pfec;
2613 
2614 	wrmsr(MSR_EFER, efer);
2615 	write_cr4(cr4);
2616 
2617 	vmcb->save.efer = guest_efer;
2618 	vmcb->save.cr4  = guest_cr4;
2619 
2620 	*pxe |= rsvd_bits;
2621 
2622 	exit_reason = svm_vmrun();
2623 
2624 	report(exit_reason == SVM_EXIT_NPF,
2625 	       "Wanted #NPF on rsvd bits = 0x%lx, got exit = 0x%x", rsvd_bits, exit_reason);
2626 
2627 	if (pxe == npt_get_pdpe() || pxe == npt_get_pml4e()) {
2628 		/*
2629 		 * The guest's page tables will blow up on a bad PDPE/PML4E,
2630 		 * before starting the final walk of the guest page.
2631 		 */
2632 		pfec = 0x20000000full;
2633 	} else {
2634 		/* RSVD #NPF on final walk of guest page. */
2635 		pfec = 0x10000000dULL;
2636 
2637 		/* PFEC.FETCH=1 if NX=1 *or* SMEP=1. */
2638 		if ((cr4 & X86_CR4_SMEP) || (efer & EFER_NX))
2639 			pfec |= 0x10;
2640 
2641 	}
2642 
2643 	report(vmcb->control.exit_info_1 == pfec,
2644 	       "Wanted PFEC = 0x%lx, got PFEC = %lx, PxE = 0x%lx.  "
2645 	       "host.NX = %u, host.SMEP = %u, guest.NX = %u, guest.SMEP = %u",
2646 	       pfec, vmcb->control.exit_info_1, *pxe,
2647 	       !!(efer & EFER_NX), !!(cr4 & X86_CR4_SMEP),
2648 	       !!(guest_efer & EFER_NX), !!(guest_cr4 & X86_CR4_SMEP));
2649 
2650 	*pxe = pxe_orig;
2651 }
2652 
2653 static void _svm_npt_rsvd_bits_test(u64 *pxe, u64 pxe_rsvd_bits,  u64 efer,
2654 				    ulong cr4, u64 guest_efer, ulong guest_cr4)
2655 {
2656 	u64 rsvd_bits;
2657 	int i;
2658 
2659 	/*
2660 	 * RDTSC or RDRAND can sometimes fail to generate a valid reserved bits
2661 	 */
2662 	if (!pxe_rsvd_bits) {
2663 		report_skip("svm_npt_rsvd_bits_test: Reserved bits are not valid");
2664 		return;
2665 	}
2666 
2667 	/*
2668 	 * Test all combinations of guest/host EFER.NX and CR4.SMEP.  If host
2669 	 * EFER.NX=0, use NX as the reserved bit, otherwise use the passed in
2670 	 * @pxe_rsvd_bits.
2671 	 */
2672 	for (i = 0; i < 16; i++) {
2673 		if (i & 1) {
2674 			rsvd_bits = pxe_rsvd_bits;
2675 			efer |= EFER_NX;
2676 		} else {
2677 			rsvd_bits = PT64_NX_MASK;
2678 			efer &= ~EFER_NX;
2679 		}
2680 		if (i & 2)
2681 			cr4 |= X86_CR4_SMEP;
2682 		else
2683 			cr4 &= ~X86_CR4_SMEP;
2684 		if (i & 4)
2685 			guest_efer |= EFER_NX;
2686 		else
2687 			guest_efer &= ~EFER_NX;
2688 		if (i & 8)
2689 			guest_cr4 |= X86_CR4_SMEP;
2690 		else
2691 			guest_cr4 &= ~X86_CR4_SMEP;
2692 
2693 		__svm_npt_rsvd_bits_test(pxe, rsvd_bits, efer, cr4,
2694 					 guest_efer, guest_cr4);
2695 	}
2696 }
2697 
2698 static u64 get_random_bits(u64 hi, u64 low)
2699 {
2700 	unsigned retry = 5;
2701 	u64 rsvd_bits = 0;
2702 
2703 	if (this_cpu_has(X86_FEATURE_RDRAND)) {
2704 		do {
2705 			rsvd_bits = (rdrand() << low) & GENMASK_ULL(hi, low);
2706 			retry--;
2707 		} while (!rsvd_bits && retry);
2708 	}
2709 
2710 	if (!rsvd_bits) {
2711 		retry = 5;
2712 		do {
2713 			rsvd_bits = (rdtsc() << low) & GENMASK_ULL(hi, low);
2714 			retry--;
2715 		} while (!rsvd_bits && retry);
2716 	}
2717 
2718 	return rsvd_bits;
2719 }
2720 
2721 
2722 static void svm_npt_rsvd_bits_test(void)
2723 {
2724 	u64   saved_efer, host_efer, sg_efer, guest_efer;
2725 	ulong saved_cr4,  host_cr4,  sg_cr4,  guest_cr4;
2726 
2727 	if (!npt_supported()) {
2728 		report_skip("NPT not supported");
2729 		return;
2730 	}
2731 
2732 	saved_efer = host_efer  = rdmsr(MSR_EFER);
2733 	saved_cr4  = host_cr4   = read_cr4();
2734 	sg_efer    = guest_efer = vmcb->save.efer;
2735 	sg_cr4     = guest_cr4  = vmcb->save.cr4;
2736 
2737 	test_set_guest(basic_guest_main);
2738 
2739 	/*
2740 	 * 4k PTEs don't have reserved bits if MAXPHYADDR >= 52, just skip the
2741 	 * sub-test.  The NX test is still valid, but the extra bit of coverage
2742 	 * isn't worth the extra complexity.
2743 	 */
2744 	if (cpuid_maxphyaddr() >= 52)
2745 		goto skip_pte_test;
2746 
2747 	_svm_npt_rsvd_bits_test(npt_get_pte((u64)basic_guest_main),
2748 				get_random_bits(51, cpuid_maxphyaddr()),
2749 				host_efer, host_cr4, guest_efer, guest_cr4);
2750 
2751 skip_pte_test:
2752 	_svm_npt_rsvd_bits_test(npt_get_pde((u64)basic_guest_main),
2753 				get_random_bits(20, 13) | PT_PAGE_SIZE_MASK,
2754 				host_efer, host_cr4, guest_efer, guest_cr4);
2755 
2756 	_svm_npt_rsvd_bits_test(npt_get_pdpe(),
2757 				PT_PAGE_SIZE_MASK |
2758 					(this_cpu_has(X86_FEATURE_GBPAGES) ? get_random_bits(29, 13) : 0),
2759 				host_efer, host_cr4, guest_efer, guest_cr4);
2760 
2761 	_svm_npt_rsvd_bits_test(npt_get_pml4e(), BIT_ULL(8),
2762 				host_efer, host_cr4, guest_efer, guest_cr4);
2763 
2764 	wrmsr(MSR_EFER, saved_efer);
2765 	write_cr4(saved_cr4);
2766 	vmcb->save.efer = sg_efer;
2767 	vmcb->save.cr4  = sg_cr4;
2768 }
2769 
2770 static bool volatile svm_errata_reproduced = false;
2771 static unsigned long volatile physical = 0;
2772 
2773 
2774 /*
2775  *
2776  * Test the following errata:
2777  * If the VMRUN/VMSAVE/VMLOAD are attempted by the nested guest,
2778  * the CPU would first check the EAX against host reserved memory
2779  * regions (so far only SMM_ADDR/SMM_MASK are known to cause it),
2780  * and only then signal #VMexit
2781  *
2782  * Try to reproduce this by trying vmsave on each possible 4K aligned memory
2783  * address in the low 4G where the SMM area has to reside.
2784  */
2785 
2786 static void gp_isr(struct ex_regs *r)
2787 {
2788     svm_errata_reproduced = true;
2789     /* skip over the vmsave instruction*/
2790     r->rip += 3;
2791 }
2792 
2793 static void svm_vmrun_errata_test(void)
2794 {
2795     unsigned long *last_page = NULL;
2796 
2797     handle_exception(GP_VECTOR, gp_isr);
2798 
2799     while (!svm_errata_reproduced) {
2800 
2801         unsigned long *page = alloc_pages(1);
2802 
2803         if (!page) {
2804             report_pass("All guest memory tested, no bug found");
2805             break;
2806         }
2807 
2808         physical = virt_to_phys(page);
2809 
2810         asm volatile (
2811             "mov %[_physical], %%rax\n\t"
2812             "vmsave %%rax\n\t"
2813 
2814             : [_physical] "=m" (physical)
2815             : /* no inputs*/
2816             : "rax" /*clobbers*/
2817         );
2818 
2819         if (svm_errata_reproduced) {
2820             report_fail("Got #GP exception - svm errata reproduced at 0x%lx",
2821                         physical);
2822             break;
2823         }
2824 
2825         *page = (unsigned long)last_page;
2826         last_page = page;
2827     }
2828 
2829     while (last_page) {
2830         unsigned long *page = last_page;
2831         last_page = (unsigned long *)*last_page;
2832         free_pages_by_order(page, 1);
2833     }
2834 }
2835 
2836 static void vmload_vmsave_guest_main(struct svm_test *test)
2837 {
2838 	u64 vmcb_phys = virt_to_phys(vmcb);
2839 
2840 	asm volatile ("vmload %0" : : "a"(vmcb_phys));
2841 	asm volatile ("vmsave %0" : : "a"(vmcb_phys));
2842 }
2843 
2844 static void svm_vmload_vmsave(void)
2845 {
2846 	u32 intercept_saved = vmcb->control.intercept;
2847 
2848 	test_set_guest(vmload_vmsave_guest_main);
2849 
2850 	/*
2851 	 * Disabling intercept for VMLOAD and VMSAVE doesn't cause
2852 	 * respective #VMEXIT to host
2853 	 */
2854 	vmcb->control.intercept &= ~(1ULL << INTERCEPT_VMLOAD);
2855 	vmcb->control.intercept &= ~(1ULL << INTERCEPT_VMSAVE);
2856 	svm_vmrun();
2857 	report(vmcb->control.exit_code == SVM_EXIT_VMMCALL, "Test "
2858 	    "VMLOAD/VMSAVE intercept: Expected VMMCALL #VMEXIT");
2859 
2860 	/*
2861 	 * Enabling intercept for VMLOAD and VMSAVE causes respective
2862 	 * #VMEXIT to host
2863 	 */
2864 	vmcb->control.intercept |= (1ULL << INTERCEPT_VMLOAD);
2865 	svm_vmrun();
2866 	report(vmcb->control.exit_code == SVM_EXIT_VMLOAD, "Test "
2867 	    "VMLOAD/VMSAVE intercept: Expected VMLOAD #VMEXIT");
2868 	vmcb->control.intercept &= ~(1ULL << INTERCEPT_VMLOAD);
2869 	vmcb->control.intercept |= (1ULL << INTERCEPT_VMSAVE);
2870 	svm_vmrun();
2871 	report(vmcb->control.exit_code == SVM_EXIT_VMSAVE, "Test "
2872 	    "VMLOAD/VMSAVE intercept: Expected VMSAVE #VMEXIT");
2873 	vmcb->control.intercept &= ~(1ULL << INTERCEPT_VMSAVE);
2874 	svm_vmrun();
2875 	report(vmcb->control.exit_code == SVM_EXIT_VMMCALL, "Test "
2876 	    "VMLOAD/VMSAVE intercept: Expected VMMCALL #VMEXIT");
2877 
2878 	vmcb->control.intercept |= (1ULL << INTERCEPT_VMLOAD);
2879 	svm_vmrun();
2880 	report(vmcb->control.exit_code == SVM_EXIT_VMLOAD, "Test "
2881 	    "VMLOAD/VMSAVE intercept: Expected VMLOAD #VMEXIT");
2882 	vmcb->control.intercept &= ~(1ULL << INTERCEPT_VMLOAD);
2883 	svm_vmrun();
2884 	report(vmcb->control.exit_code == SVM_EXIT_VMMCALL, "Test "
2885 	    "VMLOAD/VMSAVE intercept: Expected VMMCALL #VMEXIT");
2886 
2887 	vmcb->control.intercept |= (1ULL << INTERCEPT_VMSAVE);
2888 	svm_vmrun();
2889 	report(vmcb->control.exit_code == SVM_EXIT_VMSAVE, "Test "
2890 	    "VMLOAD/VMSAVE intercept: Expected VMSAVE #VMEXIT");
2891 	vmcb->control.intercept &= ~(1ULL << INTERCEPT_VMSAVE);
2892 	svm_vmrun();
2893 	report(vmcb->control.exit_code == SVM_EXIT_VMMCALL, "Test "
2894 	    "VMLOAD/VMSAVE intercept: Expected VMMCALL #VMEXIT");
2895 
2896 	vmcb->control.intercept = intercept_saved;
2897 }
2898 
2899 static void prepare_vgif_enabled(struct svm_test *test)
2900 {
2901     default_prepare(test);
2902 }
2903 
2904 static void test_vgif(struct svm_test *test)
2905 {
2906     asm volatile ("vmmcall\n\tstgi\n\tvmmcall\n\tclgi\n\tvmmcall\n\t");
2907 
2908 }
2909 
2910 static bool vgif_finished(struct svm_test *test)
2911 {
2912     switch (get_test_stage(test))
2913     {
2914     case 0:
2915         if (vmcb->control.exit_code != SVM_EXIT_VMMCALL) {
2916             report_fail("VMEXIT not due to vmmcall.");
2917             return true;
2918         }
2919         vmcb->control.int_ctl |= V_GIF_ENABLED_MASK;
2920         vmcb->save.rip += 3;
2921         inc_test_stage(test);
2922         break;
2923     case 1:
2924         if (vmcb->control.exit_code != SVM_EXIT_VMMCALL) {
2925             report_fail("VMEXIT not due to vmmcall.");
2926             return true;
2927         }
2928         if (!(vmcb->control.int_ctl & V_GIF_MASK)) {
2929             report_fail("Failed to set VGIF when executing STGI.");
2930             vmcb->control.int_ctl &= ~V_GIF_ENABLED_MASK;
2931             return true;
2932         }
2933         report_pass("STGI set VGIF bit.");
2934         vmcb->save.rip += 3;
2935         inc_test_stage(test);
2936         break;
2937     case 2:
2938         if (vmcb->control.exit_code != SVM_EXIT_VMMCALL) {
2939             report_fail("VMEXIT not due to vmmcall.");
2940             return true;
2941         }
2942         if (vmcb->control.int_ctl & V_GIF_MASK) {
2943             report_fail("Failed to clear VGIF when executing CLGI.");
2944             vmcb->control.int_ctl &= ~V_GIF_ENABLED_MASK;
2945             return true;
2946         }
2947         report_pass("CLGI cleared VGIF bit.");
2948         vmcb->save.rip += 3;
2949         inc_test_stage(test);
2950         vmcb->control.int_ctl &= ~V_GIF_ENABLED_MASK;
2951         break;
2952     default:
2953         return true;
2954         break;
2955     }
2956 
2957     return get_test_stage(test) == 3;
2958 }
2959 
2960 static bool vgif_check(struct svm_test *test)
2961 {
2962     return get_test_stage(test) == 3;
2963 }
2964 
2965 static int of_test_counter;
2966 
2967 static void guest_test_of_handler(struct ex_regs *r)
2968 {
2969     of_test_counter++;
2970 }
2971 
2972 static void svm_of_test_guest(struct svm_test *test)
2973 {
2974     struct far_pointer32 fp = {
2975         .offset = (uintptr_t)&&into,
2976         .selector = KERNEL_CS32,
2977     };
2978     uintptr_t rsp;
2979 
2980     asm volatile ("mov %%rsp, %0" : "=r"(rsp));
2981 
2982     if (fp.offset != (uintptr_t)&&into) {
2983         printf("Codee address too high.\n");
2984         return;
2985     }
2986 
2987     if ((u32)rsp != rsp) {
2988         printf("Stack address too high.\n");
2989     }
2990 
2991     asm goto("lcall *%0" : : "m" (fp) : "rax" : into);
2992     return;
2993 into:
2994 
2995     asm volatile (".code32;"
2996             "movl $0x7fffffff, %eax;"
2997             "addl %eax, %eax;"
2998             "into;"
2999             "lret;"
3000             ".code64");
3001     __builtin_unreachable();
3002 }
3003 
3004 static void svm_into_test(void)
3005 {
3006     handle_exception(OF_VECTOR, guest_test_of_handler);
3007     test_set_guest(svm_of_test_guest);
3008     report(svm_vmrun() == SVM_EXIT_VMMCALL && of_test_counter == 1,
3009         "#OF is generated in L2 exception handler0");
3010 }
3011 
3012 static int bp_test_counter;
3013 
3014 static void guest_test_bp_handler(struct ex_regs *r)
3015 {
3016     bp_test_counter++;
3017 }
3018 
3019 static void svm_bp_test_guest(struct svm_test *test)
3020 {
3021     asm volatile("int3");
3022 }
3023 
3024 static void svm_int3_test(void)
3025 {
3026     handle_exception(BP_VECTOR, guest_test_bp_handler);
3027     test_set_guest(svm_bp_test_guest);
3028     report(svm_vmrun() == SVM_EXIT_VMMCALL && bp_test_counter == 1,
3029         "#BP is handled in L2 exception handler");
3030 }
3031 
3032 static int nm_test_counter;
3033 
3034 static void guest_test_nm_handler(struct ex_regs *r)
3035 {
3036     nm_test_counter++;
3037     write_cr0(read_cr0() & ~X86_CR0_TS);
3038     write_cr0(read_cr0() & ~X86_CR0_EM);
3039 }
3040 
3041 static void svm_nm_test_guest(struct svm_test *test)
3042 {
3043     asm volatile("fnop");
3044 }
3045 
3046 /* This test checks that:
3047  *
3048  * (a) If CR0.TS is set in L2, #NM is handled by L2 when
3049  *     just an L2 handler is registered.
3050  *
3051  * (b) If CR0.TS is cleared and CR0.EM is set, #NM is handled
3052  *     by L2 when just an l2 handler is registered.
3053  *
3054  * (c) If CR0.TS and CR0.EM are cleared in L2, no exception
3055  *     is generated.
3056  */
3057 
3058 static void svm_nm_test(void)
3059 {
3060     handle_exception(NM_VECTOR, guest_test_nm_handler);
3061     write_cr0(read_cr0() & ~X86_CR0_TS);
3062     test_set_guest(svm_nm_test_guest);
3063 
3064     vmcb->save.cr0 = vmcb->save.cr0 | X86_CR0_TS;
3065     report(svm_vmrun() == SVM_EXIT_VMMCALL && nm_test_counter == 1,
3066         "fnop with CR0.TS set in L2, #NM is triggered");
3067 
3068     vmcb->save.cr0 = (vmcb->save.cr0 & ~X86_CR0_TS) | X86_CR0_EM;
3069     report(svm_vmrun() == SVM_EXIT_VMMCALL && nm_test_counter == 2,
3070         "fnop with CR0.EM set in L2, #NM is triggered");
3071 
3072     vmcb->save.cr0 = vmcb->save.cr0 & ~(X86_CR0_TS | X86_CR0_EM);
3073     report(svm_vmrun() == SVM_EXIT_VMMCALL && nm_test_counter == 2,
3074         "fnop with CR0.TS and CR0.EM unset no #NM excpetion");
3075 }
3076 
3077 struct svm_test svm_tests[] = {
3078     { "null", default_supported, default_prepare,
3079       default_prepare_gif_clear, null_test,
3080       default_finished, null_check },
3081     { "vmrun", default_supported, default_prepare,
3082       default_prepare_gif_clear, test_vmrun,
3083        default_finished, check_vmrun },
3084     { "ioio", default_supported, prepare_ioio,
3085        default_prepare_gif_clear, test_ioio,
3086        ioio_finished, check_ioio },
3087     { "vmrun intercept check", default_supported, prepare_no_vmrun_int,
3088       default_prepare_gif_clear, null_test, default_finished,
3089       check_no_vmrun_int },
3090     { "rsm", default_supported,
3091       prepare_rsm_intercept, default_prepare_gif_clear,
3092       test_rsm_intercept, finished_rsm_intercept, check_rsm_intercept },
3093     { "cr3 read intercept", default_supported,
3094       prepare_cr3_intercept, default_prepare_gif_clear,
3095       test_cr3_intercept, default_finished, check_cr3_intercept },
3096     { "cr3 read nointercept", default_supported, default_prepare,
3097       default_prepare_gif_clear, test_cr3_intercept, default_finished,
3098       check_cr3_nointercept },
3099     { "cr3 read intercept emulate", smp_supported,
3100       prepare_cr3_intercept_bypass, default_prepare_gif_clear,
3101       test_cr3_intercept_bypass, default_finished, check_cr3_intercept },
3102     { "dr intercept check", default_supported, prepare_dr_intercept,
3103       default_prepare_gif_clear, test_dr_intercept, dr_intercept_finished,
3104       check_dr_intercept },
3105     { "next_rip", next_rip_supported, prepare_next_rip,
3106       default_prepare_gif_clear, test_next_rip,
3107       default_finished, check_next_rip },
3108     { "msr intercept check", default_supported, prepare_msr_intercept,
3109       default_prepare_gif_clear, test_msr_intercept,
3110       msr_intercept_finished, check_msr_intercept },
3111     { "mode_switch", default_supported, prepare_mode_switch,
3112       default_prepare_gif_clear, test_mode_switch,
3113        mode_switch_finished, check_mode_switch },
3114     { "asid_zero", default_supported, prepare_asid_zero,
3115       default_prepare_gif_clear, test_asid_zero,
3116        default_finished, check_asid_zero },
3117     { "sel_cr0_bug", default_supported, sel_cr0_bug_prepare,
3118       default_prepare_gif_clear, sel_cr0_bug_test,
3119        sel_cr0_bug_finished, sel_cr0_bug_check },
3120     { "npt_nx", npt_supported, npt_nx_prepare,
3121       default_prepare_gif_clear, null_test,
3122       default_finished, npt_nx_check },
3123     { "npt_np", npt_supported, npt_np_prepare,
3124       default_prepare_gif_clear, npt_np_test,
3125       default_finished, npt_np_check },
3126     { "npt_us", npt_supported, npt_us_prepare,
3127       default_prepare_gif_clear, npt_us_test,
3128       default_finished, npt_us_check },
3129     { "npt_rw", npt_supported, npt_rw_prepare,
3130       default_prepare_gif_clear, npt_rw_test,
3131       default_finished, npt_rw_check },
3132     { "npt_rw_pfwalk", npt_supported, npt_rw_pfwalk_prepare,
3133       default_prepare_gif_clear, null_test,
3134       default_finished, npt_rw_pfwalk_check },
3135     { "npt_l1mmio", npt_supported, npt_l1mmio_prepare,
3136       default_prepare_gif_clear, npt_l1mmio_test,
3137       default_finished, npt_l1mmio_check },
3138     { "npt_rw_l1mmio", npt_supported, npt_rw_l1mmio_prepare,
3139       default_prepare_gif_clear, npt_rw_l1mmio_test,
3140       default_finished, npt_rw_l1mmio_check },
3141     { "tsc_adjust", tsc_adjust_supported, tsc_adjust_prepare,
3142       default_prepare_gif_clear, tsc_adjust_test,
3143       default_finished, tsc_adjust_check },
3144     { "latency_run_exit", default_supported, latency_prepare,
3145       default_prepare_gif_clear, latency_test,
3146       latency_finished, latency_check },
3147     { "latency_run_exit_clean", default_supported, latency_prepare,
3148       default_prepare_gif_clear, latency_test,
3149       latency_finished_clean, latency_check },
3150     { "latency_svm_insn", default_supported, lat_svm_insn_prepare,
3151       default_prepare_gif_clear, null_test,
3152       lat_svm_insn_finished, lat_svm_insn_check },
3153     { "exc_inject", default_supported, exc_inject_prepare,
3154       default_prepare_gif_clear, exc_inject_test,
3155       exc_inject_finished, exc_inject_check },
3156     { "pending_event", default_supported, pending_event_prepare,
3157       default_prepare_gif_clear,
3158       pending_event_test, pending_event_finished, pending_event_check },
3159     { "pending_event_cli", default_supported, pending_event_cli_prepare,
3160       pending_event_cli_prepare_gif_clear,
3161       pending_event_cli_test, pending_event_cli_finished,
3162       pending_event_cli_check },
3163     { "interrupt", default_supported, interrupt_prepare,
3164       default_prepare_gif_clear, interrupt_test,
3165       interrupt_finished, interrupt_check },
3166     { "nmi", default_supported, nmi_prepare,
3167       default_prepare_gif_clear, nmi_test,
3168       nmi_finished, nmi_check },
3169     { "nmi_hlt", smp_supported, nmi_prepare,
3170       default_prepare_gif_clear, nmi_hlt_test,
3171       nmi_hlt_finished, nmi_hlt_check },
3172     { "virq_inject", default_supported, virq_inject_prepare,
3173       default_prepare_gif_clear, virq_inject_test,
3174       virq_inject_finished, virq_inject_check },
3175     { "reg_corruption", default_supported, reg_corruption_prepare,
3176       default_prepare_gif_clear, reg_corruption_test,
3177       reg_corruption_finished, reg_corruption_check },
3178     { "svm_init_startup_test", smp_supported, init_startup_prepare,
3179       default_prepare_gif_clear, null_test,
3180       init_startup_finished, init_startup_check },
3181     { "svm_init_intercept_test", smp_supported, init_intercept_prepare,
3182       default_prepare_gif_clear, init_intercept_test,
3183       init_intercept_finished, init_intercept_check, .on_vcpu = 2 },
3184     { "host_rflags", default_supported, host_rflags_prepare,
3185       host_rflags_prepare_gif_clear, host_rflags_test,
3186       host_rflags_finished, host_rflags_check },
3187     { "vgif", vgif_supported, prepare_vgif_enabled,
3188       default_prepare_gif_clear, test_vgif, vgif_finished,
3189       vgif_check },
3190     TEST(svm_cr4_osxsave_test),
3191     TEST(svm_guest_state_test),
3192     TEST(svm_npt_rsvd_bits_test),
3193     TEST(svm_vmrun_errata_test),
3194     TEST(svm_vmload_vmsave),
3195     TEST(svm_test_singlestep),
3196     TEST(svm_nm_test),
3197     TEST(svm_int3_test),
3198     TEST(svm_into_test),
3199     { NULL, NULL, NULL, NULL, NULL, NULL, NULL }
3200 };
3201