xref: /kvm-unit-tests/x86/vmexit.c (revision 95a9408860fc8dacb73e9b302fb96536f91d5ccf)
1 #include "libcflat.h"
2 #include "acpi.h"
3 #include "smp.h"
4 #include "vmalloc.h"
5 #include "pci.h"
6 #include "x86/vm.h"
7 #include "x86/desc.h"
8 #include "x86/apic.h"
9 #include "x86/isr.h"
10 
11 #define IPI_TEST_VECTOR	0xb0
12 
13 struct test {
14 	void (*func)(void);
15 	const char *name;
16 	int (*valid)(void);
17 	int parallel;
18 	bool (*next)(struct test *);
19 };
20 
21 #define GOAL (1ull << 30)
22 
23 static int nr_cpus;
24 static u64 cr4_shadow;
25 
cpuid_test(void)26 static void cpuid_test(void)
27 {
28 	asm volatile ("push %%"R "bx; cpuid; pop %%"R "bx"
29 		      : : : "eax", "ecx", "edx");
30 }
31 
vmcall(void)32 static void vmcall(void)
33 {
34 	unsigned long a = 0, b, c, d;
35 
36 	asm volatile ("vmcall" : "+a"(a), "=b"(b), "=c"(c), "=d"(d));
37 }
38 
39 #define MSR_EFER 0xc0000080
40 #define EFER_NX_MASK            (1ull << 11)
41 
42 #ifdef __x86_64__
mov_from_cr8(void)43 static void mov_from_cr8(void)
44 {
45 	unsigned long cr8;
46 
47 	asm volatile ("mov %%cr8, %0" : "=r"(cr8));
48 }
49 
mov_to_cr8(void)50 static void mov_to_cr8(void)
51 {
52 	unsigned long cr8 = 0;
53 
54 	asm volatile ("mov %0, %%cr8" : : "r"(cr8));
55 }
56 #endif
57 
is_smp(void)58 static int is_smp(void)
59 {
60 	return cpu_count() > 1;
61 }
62 
nop(void * junk)63 static void nop(void *junk)
64 {
65 }
66 
67 volatile int x = 0;
68 volatile uint64_t tsc_eoi = 0;
69 volatile uint64_t tsc_ipi = 0;
70 
self_ipi_isr(isr_regs_t * regs)71 static void self_ipi_isr(isr_regs_t *regs)
72 {
73 	x++;
74 	uint64_t start = rdtsc();
75 	eoi();
76 	tsc_eoi += rdtsc() - start;
77 }
78 
x2apic_self_ipi(int vec)79 static void x2apic_self_ipi(int vec)
80 {
81 	uint64_t start = rdtsc();
82 	wrmsr(0x83f, vec);
83 	tsc_ipi += rdtsc() - start;
84 }
85 
apic_self_ipi(int vec)86 static void apic_self_ipi(int vec)
87 {
88 	uint64_t start = rdtsc();
89         apic_icr_write(APIC_INT_ASSERT | APIC_DEST_SELF | APIC_DEST_PHYSICAL |
90 		       APIC_DM_FIXED | IPI_TEST_VECTOR, vec);
91 	tsc_ipi += rdtsc() - start;
92 }
93 
self_ipi_sti_nop(void)94 static void self_ipi_sti_nop(void)
95 {
96 	x = 0;
97 	cli();
98 	apic_self_ipi(IPI_TEST_VECTOR);
99 	asm volatile("sti; nop");
100 	if (x != 1) printf("%d", x);
101 }
102 
self_ipi_sti_hlt(void)103 static void self_ipi_sti_hlt(void)
104 {
105 	x = 0;
106 	cli();
107 	apic_self_ipi(IPI_TEST_VECTOR);
108 	safe_halt();
109 	if (x != 1) printf("%d", x);
110 }
111 
self_ipi_tpr(void)112 static void self_ipi_tpr(void)
113 {
114 	x = 0;
115 	apic_set_tpr(0x0f);
116 	apic_self_ipi(IPI_TEST_VECTOR);
117 	apic_set_tpr(0x00);
118 	asm volatile("nop");
119 	if (x != 1) printf("%d", x);
120 }
121 
self_ipi_tpr_sti_nop(void)122 static void self_ipi_tpr_sti_nop(void)
123 {
124 	x = 0;
125 	cli();
126 	apic_set_tpr(0x0f);
127 	apic_self_ipi(IPI_TEST_VECTOR);
128 	apic_set_tpr(0x00);
129 	asm volatile("sti; nop");
130 	if (x != 1) printf("%d", x);
131 }
132 
self_ipi_tpr_sti_hlt(void)133 static void self_ipi_tpr_sti_hlt(void)
134 {
135 	x = 0;
136 	cli();
137 	apic_set_tpr(0x0f);
138 	apic_self_ipi(IPI_TEST_VECTOR);
139 	apic_set_tpr(0x00);
140 	safe_halt();
141 	if (x != 1) printf("%d", x);
142 }
143 
is_x2apic(void)144 static int is_x2apic(void)
145 {
146     return rdmsr(MSR_IA32_APICBASE) & APIC_EXTD;
147 }
148 
x2apic_self_ipi_sti_nop(void)149 static void x2apic_self_ipi_sti_nop(void)
150 {
151 	cli();
152 	x2apic_self_ipi(IPI_TEST_VECTOR);
153 	asm volatile("sti; nop");
154 }
155 
x2apic_self_ipi_sti_hlt(void)156 static void x2apic_self_ipi_sti_hlt(void)
157 {
158 	cli();
159 	x2apic_self_ipi(IPI_TEST_VECTOR);
160 	safe_halt();
161 }
162 
x2apic_self_ipi_tpr(void)163 static void x2apic_self_ipi_tpr(void)
164 {
165 	apic_set_tpr(0x0f);
166 	x2apic_self_ipi(IPI_TEST_VECTOR);
167 	apic_set_tpr(0x00);
168 	asm volatile("nop");
169 }
170 
x2apic_self_ipi_tpr_sti_nop(void)171 static void x2apic_self_ipi_tpr_sti_nop(void)
172 {
173 	cli();
174 	apic_set_tpr(0x0f);
175 	x2apic_self_ipi(IPI_TEST_VECTOR);
176 	apic_set_tpr(0x00);
177 	asm volatile("sti; nop");
178 }
179 
x2apic_self_ipi_tpr_sti_hlt(void)180 static void x2apic_self_ipi_tpr_sti_hlt(void)
181 {
182 	cli();
183 	apic_set_tpr(0x0f);
184 	x2apic_self_ipi(IPI_TEST_VECTOR);
185 	apic_set_tpr(0x00);
186 	safe_halt();
187 }
188 
ipi(void)189 static void ipi(void)
190 {
191 	uint64_t start = rdtsc();
192 	on_cpu(1, nop, 0);
193 	tsc_ipi += rdtsc() - start;
194 }
195 
ipi_halt(void)196 static void ipi_halt(void)
197 {
198 	unsigned long long t;
199 
200 	on_cpu(1, nop, 0);
201 	t = rdtsc() + 2000;
202 	while (rdtsc() < t)
203 		;
204 }
205 
206 int pm_tmr_blk;
inl_pmtimer(void)207 static void inl_pmtimer(void)
208 {
209     if (!pm_tmr_blk) {
210 	struct acpi_table_fadt *fadt;
211 
212 	fadt = find_acpi_table_addr(FACP_SIGNATURE);
213 	pm_tmr_blk = fadt->pm_tmr_blk;
214 	printf("PM timer port is %x\n", pm_tmr_blk);
215     }
216     inl(pm_tmr_blk);
217 }
218 
inl_nop_qemu(void)219 static void inl_nop_qemu(void)
220 {
221     inl(0x1234);
222 }
223 
inl_nop_kernel(void)224 static void inl_nop_kernel(void)
225 {
226     inb(0x4d0);
227 }
228 
outl_elcr_kernel(void)229 static void outl_elcr_kernel(void)
230 {
231     outb(0, 0x4d0);
232 }
233 
mov_dr(void)234 static void mov_dr(void)
235 {
236     asm volatile("mov %0, %%dr7" : : "r" (0x400L));
237 }
238 
ple_round_robin(void)239 static void ple_round_robin(void)
240 {
241 	struct counter {
242 		volatile int n1;
243 		int n2;
244 	} __attribute__((aligned(64)));
245 	static struct counter counters[64] = { { -1, 0 } };
246 	int me = smp_id();
247 	int you;
248 	volatile struct counter *p = &counters[me];
249 
250 	while (p->n1 == p->n2)
251 		asm volatile ("pause");
252 
253 	p->n2 = p->n1;
254 	you = me + 1;
255 	if (you == nr_cpus)
256 		you = 0;
257 	++counters[you].n1;
258 }
259 
rd_tsc_adjust_msr(void)260 static void rd_tsc_adjust_msr(void)
261 {
262 	rdmsr(MSR_IA32_TSC_ADJUST);
263 }
264 
wr_tsc_adjust_msr(void)265 static void wr_tsc_adjust_msr(void)
266 {
267 	wrmsr(MSR_IA32_TSC_ADJUST, 0x0);
268 }
269 
wr_kernel_gs_base(void)270 static void wr_kernel_gs_base(void)
271 {
272 	wrmsr(MSR_KERNEL_GS_BASE, 0x0);
273 }
274 
275 static struct pci_test {
276 	unsigned iobar;
277 	unsigned ioport;
278 	volatile void *memaddr;
279 	volatile void *mem;
280 	int test_idx;
281 	uint32_t data;
282 	uint32_t offset;
283 } pci_test = {
284 	.test_idx = -1
285 };
286 
pci_mem_testb(void)287 static void pci_mem_testb(void)
288 {
289 	*(volatile uint8_t *)pci_test.mem = pci_test.data;
290 }
291 
pci_mem_testw(void)292 static void pci_mem_testw(void)
293 {
294 	*(volatile uint16_t *)pci_test.mem = pci_test.data;
295 }
296 
pci_mem_testl(void)297 static void pci_mem_testl(void)
298 {
299 	*(volatile uint32_t *)pci_test.mem = pci_test.data;
300 }
301 
pci_io_testb(void)302 static void pci_io_testb(void)
303 {
304 	outb(pci_test.data, pci_test.ioport);
305 }
306 
pci_io_testw(void)307 static void pci_io_testw(void)
308 {
309 	outw(pci_test.data, pci_test.ioport);
310 }
311 
pci_io_testl(void)312 static void pci_io_testl(void)
313 {
314 	outl(pci_test.data, pci_test.ioport);
315 }
316 
ioreadb(unsigned long addr,bool io)317 static uint8_t ioreadb(unsigned long addr, bool io)
318 {
319 	if (io) {
320 		return inb(addr);
321 	} else {
322 		return *(volatile uint8_t *)addr;
323 	}
324 }
325 
ioreadl(unsigned long addr,bool io)326 static uint32_t ioreadl(unsigned long addr, bool io)
327 {
328 	/* Note: assumes little endian */
329 	if (io) {
330 		return inl(addr);
331 	} else {
332 		return *(volatile uint32_t *)addr;
333 	}
334 }
335 
iowriteb(unsigned long addr,uint8_t data,bool io)336 static void iowriteb(unsigned long addr, uint8_t data, bool io)
337 {
338 	if (io) {
339 		outb(data, addr);
340 	} else {
341 		*(volatile uint8_t *)addr = data;
342 	}
343 }
344 
pci_next(struct test * test,unsigned long addr,bool io)345 static bool pci_next(struct test *test, unsigned long addr, bool io)
346 {
347 	int i;
348 	uint8_t width;
349 
350 	if (!pci_test.memaddr) {
351 		test->func = NULL;
352 		return true;
353 	}
354 	pci_test.test_idx++;
355 	iowriteb(addr + offsetof(struct pci_test_dev_hdr, test),
356 		 pci_test.test_idx, io);
357 	width = ioreadb(addr + offsetof(struct pci_test_dev_hdr, width),
358 			io);
359 	switch (width) {
360 		case 1:
361 			test->func = io ? pci_io_testb : pci_mem_testb;
362 			break;
363 		case 2:
364 			test->func = io ? pci_io_testw : pci_mem_testw;
365 			break;
366 		case 4:
367 			test->func = io ? pci_io_testl : pci_mem_testl;
368 			break;
369 		default:
370 			/* Reset index for purposes of the next test */
371 			pci_test.test_idx = -1;
372 			test->func = NULL;
373 			return false;
374 	}
375 	pci_test.data = ioreadl(addr + offsetof(struct pci_test_dev_hdr, data),
376 				io);
377 	pci_test.offset = ioreadl(addr + offsetof(struct pci_test_dev_hdr,
378 						  offset), io);
379 	for (i = 0; i < pci_test.offset; ++i) {
380 		char c = ioreadb(addr + offsetof(struct pci_test_dev_hdr,
381 						 name) + i, io);
382 		if (!c) {
383 			break;
384 		}
385 		printf("%c",c);
386 	}
387 	printf(":");
388 	return true;
389 }
390 
pci_mem_next(struct test * test)391 static bool pci_mem_next(struct test *test)
392 {
393 	bool ret;
394 	ret = pci_next(test, ((unsigned long)pci_test.memaddr), false);
395 	if (ret) {
396 		pci_test.mem = pci_test.memaddr + pci_test.offset;
397 	}
398 	return ret;
399 }
400 
pci_io_next(struct test * test)401 static bool pci_io_next(struct test *test)
402 {
403 	bool ret;
404 	ret = pci_next(test, ((unsigned long)pci_test.iobar), true);
405 	if (ret) {
406 		pci_test.ioport = pci_test.iobar + pci_test.offset;
407 	}
408 	return ret;
409 }
410 
has_tscdeadline(void)411 static int has_tscdeadline(void)
412 {
413     uint32_t lvtt;
414 
415     if (this_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) {
416         lvtt = APIC_LVT_TIMER_TSCDEADLINE | IPI_TEST_VECTOR;
417         apic_write(APIC_LVTT, lvtt);
418         return 1;
419     } else {
420         return 0;
421     }
422 }
423 
tscdeadline_immed(void)424 static void tscdeadline_immed(void)
425 {
426 	wrmsr(MSR_IA32_TSCDEADLINE, rdtsc());
427 	asm volatile("nop");
428 }
429 
tscdeadline(void)430 static void tscdeadline(void)
431 {
432 	x = 0;
433 	wrmsr(MSR_IA32_TSCDEADLINE, rdtsc()+3000);
434 	while (x == 0) barrier();
435 }
436 
wr_tsx_ctrl_msr(void)437 static void wr_tsx_ctrl_msr(void)
438 {
439 	wrmsr(MSR_IA32_TSX_CTRL, 0);
440 }
441 
has_tsx_ctrl(void)442 static int has_tsx_ctrl(void)
443 {
444 	return this_cpu_has(X86_FEATURE_ARCH_CAPABILITIES) &&
445 	       (rdmsr(MSR_IA32_ARCH_CAPABILITIES) & ARCH_CAP_TSX_CTRL_MSR);
446 }
447 
wr_ibrs_msr(void)448 static void wr_ibrs_msr(void)
449 {
450 	wrmsr(MSR_IA32_SPEC_CTRL, 1);
451 	wrmsr(MSR_IA32_SPEC_CTRL, 0);
452 }
453 
has_ibpb(void)454 static int has_ibpb(void)
455 {
456 	return this_cpu_has(X86_FEATURE_SPEC_CTRL) ||
457 	       this_cpu_has(X86_FEATURE_AMD_IBPB);
458 }
459 
has_spec_ctrl(void)460 static int has_spec_ctrl(void)
461 {
462 	return this_cpu_has(X86_FEATURE_SPEC_CTRL);
463 }
464 
wr_ibpb_msr(void)465 static void wr_ibpb_msr(void)
466 {
467 	wrmsr(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
468 }
469 
toggle_cr0_wp(void)470 static void toggle_cr0_wp(void)
471 {
472 	write_cr0(X86_CR0_PE|X86_CR0_PG);
473 	write_cr0(X86_CR0_PE|X86_CR0_WP|X86_CR0_PG);
474 }
475 
toggle_cr4_pge(void)476 static void toggle_cr4_pge(void)
477 {
478 	write_cr4(cr4_shadow ^ X86_CR4_PGE);
479 	write_cr4(cr4_shadow);
480 }
481 
482 static struct test tests[] = {
483 	{ cpuid_test, "cpuid", .parallel = 1,  },
484 	{ vmcall, "vmcall", .parallel = 1, },
485 #ifdef __x86_64__
486 	{ mov_from_cr8, "mov_from_cr8", .parallel = 1, },
487 	{ mov_to_cr8, "mov_to_cr8" , .parallel = 1, },
488 #endif
489 	{ inl_pmtimer, "inl_from_pmtimer", .parallel = 1, },
490 	{ inl_nop_qemu, "inl_from_qemu", .parallel = 1 },
491 	{ inl_nop_kernel, "inl_from_kernel", .parallel = 1 },
492 	{ outl_elcr_kernel, "outl_to_kernel", .parallel = 1 },
493 	{ mov_dr, "mov_dr", .parallel = 1 },
494 	{ tscdeadline_immed, "tscdeadline_immed", has_tscdeadline, .parallel = 1, },
495 	{ tscdeadline, "tscdeadline", has_tscdeadline, .parallel = 1, },
496 	{ self_ipi_sti_nop, "self_ipi_sti_nop", .parallel = 0, },
497 	{ self_ipi_sti_hlt, "self_ipi_sti_hlt", .parallel = 0, },
498 	{ self_ipi_tpr, "self_ipi_tpr", .parallel = 0, },
499 	{ self_ipi_tpr_sti_nop, "self_ipi_tpr_sti_nop", .parallel = 0, },
500 	{ self_ipi_tpr_sti_hlt, "self_ipi_tpr_sti_hlt", .parallel = 0, },
501 	{ x2apic_self_ipi_sti_nop, "x2apic_self_ipi_sti_nop", is_x2apic, .parallel = 0, },
502 	{ x2apic_self_ipi_sti_hlt, "x2apic_self_ipi_sti_hlt", is_x2apic, .parallel = 0, },
503 	{ x2apic_self_ipi_tpr, "x2apic_self_ipi_tpr", is_x2apic, .parallel = 0, },
504 	{ x2apic_self_ipi_tpr_sti_nop, "x2apic_self_ipi_tpr_sti_nop", is_x2apic, .parallel = 0, },
505 	{ x2apic_self_ipi_tpr_sti_hlt, "x2apic_self_ipi_tpr_sti_hlt", is_x2apic, .parallel = 0, },
506 	{ ipi, "ipi", is_smp, .parallel = 0, },
507 	{ ipi_halt, "ipi_halt", is_smp, .parallel = 0, },
508 	{ ple_round_robin, "ple_round_robin", .parallel = 1 },
509 	{ wr_kernel_gs_base, "wr_kernel_gs_base", .parallel = 1 },
510 	{ wr_tsx_ctrl_msr, "wr_tsx_ctrl_msr", has_tsx_ctrl, .parallel = 1, },
511 	{ wr_ibrs_msr, "wr_ibrs_msr", has_spec_ctrl, .parallel = 1 },
512 	{ wr_ibpb_msr, "wr_ibpb_msr", has_ibpb, .parallel = 1 },
513 	{ wr_tsc_adjust_msr, "wr_tsc_adjust_msr", .parallel = 1 },
514 	{ rd_tsc_adjust_msr, "rd_tsc_adjust_msr", .parallel = 1 },
515 	{ toggle_cr0_wp, "toggle_cr0_wp" , .parallel = 1, },
516 	{ toggle_cr4_pge, "toggle_cr4_pge" , .parallel = 1, },
517 	{ NULL, "pci-mem", .parallel = 0, .next = pci_mem_next },
518 	{ NULL, "pci-io", .parallel = 0, .next = pci_io_next },
519 };
520 
521 unsigned iterations;
522 
run_test(void * _func)523 static void run_test(void *_func)
524 {
525     int i;
526     void (*func)(void) = _func;
527 
528     for (i = 0; i < iterations; ++i)
529         func();
530 }
531 
do_test(struct test * test)532 static bool do_test(struct test *test)
533 {
534 	int i;
535 	unsigned long long t1, t2;
536         void (*func)(void);
537 
538         iterations = 32;
539 
540         if (test->valid && !test->valid()) {
541 		printf("%s (skipped)\n", test->name);
542 		return false;
543 	}
544 
545 	if (test->next && !test->next(test)) {
546 		return false;
547 	}
548 
549 	func = test->func;
550         if (!func) {
551 		printf("%s (skipped)\n", test->name);
552 		return false;
553 	}
554 
555 	do {
556 		tsc_eoi = tsc_ipi = 0;
557 		iterations *= 2;
558 		t1 = rdtsc();
559 
560 		if (!test->parallel) {
561 			for (i = 0; i < iterations; ++i)
562 				func();
563 		} else {
564 			on_cpus(run_test, func);
565 		}
566 		t2 = rdtsc();
567 	} while ((t2 - t1) < GOAL);
568 	printf("%s %d\n", test->name, (int)((t2 - t1) / iterations));
569 	if (tsc_ipi)
570 		printf("  ipi %s %d\n", test->name, (int)(tsc_ipi / iterations));
571 	if (tsc_eoi)
572 		printf("  eoi %s %d\n", test->name, (int)(tsc_eoi / iterations));
573 
574 	return test->next;
575 }
576 
enable_nx(void * junk)577 static void enable_nx(void *junk)
578 {
579 	if (this_cpu_has(X86_FEATURE_NX))
580 		wrmsr(MSR_EFER, rdmsr(MSR_EFER) | EFER_NX_MASK);
581 }
582 
test_wanted(struct test * test,char * wanted[],int nwanted)583 static bool test_wanted(struct test *test, char *wanted[], int nwanted)
584 {
585 	int i;
586 
587 	if (!nwanted)
588 		return true;
589 
590 	for (i = 0; i < nwanted; ++i)
591 		if (strcmp(wanted[i], test->name) == 0)
592 			return true;
593 
594 	return false;
595 }
596 
main(int ac,char ** av)597 int main(int ac, char **av)
598 {
599 	int i;
600 	unsigned long membar = 0;
601 	struct pci_dev pcidev;
602 	int ret;
603 
604 	setup_vm();
605 	cr4_shadow = read_cr4();
606 	handle_irq(IPI_TEST_VECTOR, self_ipi_isr);
607 	nr_cpus = cpu_count();
608 
609 	sti();
610 	on_cpus(enable_nx, NULL);
611 
612 	ret = pci_find_dev(PCI_VENDOR_ID_REDHAT, PCI_DEVICE_ID_REDHAT_TEST);
613 	if (ret != PCIDEVADDR_INVALID) {
614 		pci_dev_init(&pcidev, ret);
615 		assert(pci_bar_is_memory(&pcidev, PCI_TESTDEV_BAR_MEM));
616 		assert(!pci_bar_is_memory(&pcidev, PCI_TESTDEV_BAR_IO));
617 		membar = pcidev.resource[PCI_TESTDEV_BAR_MEM];
618 		pci_test.memaddr = ioremap(membar, PAGE_SIZE);
619 		pci_test.iobar = pcidev.resource[PCI_TESTDEV_BAR_IO];
620 		printf("pci-testdev at %#x membar %lx iobar %x\n",
621 		       pcidev.bdf, membar, pci_test.iobar);
622 	}
623 
624 	for (i = 0; i < ARRAY_SIZE(tests); ++i)
625 		if (test_wanted(&tests[i], av + 1, ac - 1))
626 			while (do_test(&tests[i])) {}
627 
628 	return 0;
629 }
630