xref: /kvm-unit-tests/x86/vmexit.c (revision c604fa931a1cb70c3649ac1b7223178fc79eab6a)
1 #include "libcflat.h"
2 #include "smp.h"
3 #include "pci.h"
4 #include "x86/vm.h"
5 #include "x86/desc.h"
6 #include "x86/acpi.h"
7 #include "x86/apic.h"
8 #include "x86/isr.h"
9 
10 #define IPI_TEST_VECTOR	0xb0
11 
12 struct test {
13 	void (*func)(void);
14 	const char *name;
15 	int (*valid)(void);
16 	int parallel;
17 	bool (*next)(struct test *);
18 };
19 
20 #define GOAL (1ull << 30)
21 
22 static int nr_cpus;
23 static u64 cr4_shadow;
24 
25 static void cpuid_test(void)
26 {
27 	asm volatile ("push %%"R "bx; cpuid; pop %%"R "bx"
28 		      : : : "eax", "ecx", "edx");
29 }
30 
31 static void vmcall(void)
32 {
33 	unsigned long a = 0, b, c, d;
34 
35 	asm volatile ("vmcall" : "+a"(a), "=b"(b), "=c"(c), "=d"(d));
36 }
37 
38 #define MSR_EFER 0xc0000080
39 #define EFER_NX_MASK            (1ull << 11)
40 
41 #ifdef __x86_64__
42 static void mov_from_cr8(void)
43 {
44 	unsigned long cr8;
45 
46 	asm volatile ("mov %%cr8, %0" : "=r"(cr8));
47 }
48 
49 static void mov_to_cr8(void)
50 {
51 	unsigned long cr8 = 0;
52 
53 	asm volatile ("mov %0, %%cr8" : : "r"(cr8));
54 }
55 #endif
56 
57 static int is_smp(void)
58 {
59 	return cpu_count() > 1;
60 }
61 
62 static void nop(void *junk)
63 {
64 }
65 
66 volatile int x = 0;
67 volatile uint64_t tsc_eoi = 0;
68 volatile uint64_t tsc_ipi = 0;
69 
70 static void self_ipi_isr(isr_regs_t *regs)
71 {
72 	x++;
73 	uint64_t start = rdtsc();
74 	eoi();
75 	tsc_eoi += rdtsc() - start;
76 }
77 
78 static void x2apic_self_ipi(int vec)
79 {
80 	uint64_t start = rdtsc();
81 	wrmsr(0x83f, vec);
82 	tsc_ipi += rdtsc() - start;
83 }
84 
85 static void apic_self_ipi(int vec)
86 {
87 	uint64_t start = rdtsc();
88         apic_icr_write(APIC_INT_ASSERT | APIC_DEST_SELF | APIC_DEST_PHYSICAL |
89 		       APIC_DM_FIXED | IPI_TEST_VECTOR, vec);
90 	tsc_ipi += rdtsc() - start;
91 }
92 
93 static void self_ipi_sti_nop(void)
94 {
95 	x = 0;
96 	irq_disable();
97 	apic_self_ipi(IPI_TEST_VECTOR);
98 	asm volatile("sti; nop");
99 	if (x != 1) printf("%d", x);
100 }
101 
102 static void self_ipi_sti_hlt(void)
103 {
104 	x = 0;
105 	irq_disable();
106 	apic_self_ipi(IPI_TEST_VECTOR);
107 	safe_halt();
108 	if (x != 1) printf("%d", x);
109 }
110 
111 static void self_ipi_tpr(void)
112 {
113 	x = 0;
114 	apic_set_tpr(0x0f);
115 	apic_self_ipi(IPI_TEST_VECTOR);
116 	apic_set_tpr(0x00);
117 	asm volatile("nop");
118 	if (x != 1) printf("%d", x);
119 }
120 
121 static void self_ipi_tpr_sti_nop(void)
122 {
123 	x = 0;
124 	irq_disable();
125 	apic_set_tpr(0x0f);
126 	apic_self_ipi(IPI_TEST_VECTOR);
127 	apic_set_tpr(0x00);
128 	asm volatile("sti; nop");
129 	if (x != 1) printf("%d", x);
130 }
131 
132 static void self_ipi_tpr_sti_hlt(void)
133 {
134 	x = 0;
135 	irq_disable();
136 	apic_set_tpr(0x0f);
137 	apic_self_ipi(IPI_TEST_VECTOR);
138 	apic_set_tpr(0x00);
139 	safe_halt();
140 	if (x != 1) printf("%d", x);
141 }
142 
143 static int is_x2apic(void)
144 {
145     return rdmsr(MSR_IA32_APICBASE) & APIC_EXTD;
146 }
147 
148 static void x2apic_self_ipi_sti_nop(void)
149 {
150 	irq_disable();
151 	x2apic_self_ipi(IPI_TEST_VECTOR);
152 	asm volatile("sti; nop");
153 }
154 
155 static void x2apic_self_ipi_sti_hlt(void)
156 {
157 	irq_disable();
158 	x2apic_self_ipi(IPI_TEST_VECTOR);
159 	safe_halt();
160 }
161 
162 static void x2apic_self_ipi_tpr(void)
163 {
164 	apic_set_tpr(0x0f);
165 	x2apic_self_ipi(IPI_TEST_VECTOR);
166 	apic_set_tpr(0x00);
167 	asm volatile("nop");
168 }
169 
170 static void x2apic_self_ipi_tpr_sti_nop(void)
171 {
172 	irq_disable();
173 	apic_set_tpr(0x0f);
174 	x2apic_self_ipi(IPI_TEST_VECTOR);
175 	apic_set_tpr(0x00);
176 	asm volatile("sti; nop");
177 }
178 
179 static void x2apic_self_ipi_tpr_sti_hlt(void)
180 {
181 	irq_disable();
182 	apic_set_tpr(0x0f);
183 	x2apic_self_ipi(IPI_TEST_VECTOR);
184 	apic_set_tpr(0x00);
185 	safe_halt();
186 }
187 
188 static void ipi(void)
189 {
190 	uint64_t start = rdtsc();
191 	on_cpu(1, nop, 0);
192 	tsc_ipi += rdtsc() - start;
193 }
194 
195 static void ipi_halt(void)
196 {
197 	unsigned long long t;
198 
199 	on_cpu(1, nop, 0);
200 	t = rdtsc() + 2000;
201 	while (rdtsc() < t)
202 		;
203 }
204 
205 int pm_tmr_blk;
206 static void inl_pmtimer(void)
207 {
208     if (!pm_tmr_blk) {
209 	struct fadt_descriptor_rev1 *fadt;
210 
211 	fadt = find_acpi_table_addr(FACP_SIGNATURE);
212 	pm_tmr_blk = fadt->pm_tmr_blk;
213 	printf("PM timer port is %x\n", pm_tmr_blk);
214     }
215     inl(pm_tmr_blk);
216 }
217 
218 static void inl_nop_qemu(void)
219 {
220     inl(0x1234);
221 }
222 
223 static void inl_nop_kernel(void)
224 {
225     inb(0x4d0);
226 }
227 
228 static void outl_elcr_kernel(void)
229 {
230     outb(0, 0x4d0);
231 }
232 
233 static void mov_dr(void)
234 {
235     asm volatile("mov %0, %%dr7" : : "r" (0x400L));
236 }
237 
238 static void ple_round_robin(void)
239 {
240 	struct counter {
241 		volatile int n1;
242 		int n2;
243 	} __attribute__((aligned(64)));
244 	static struct counter counters[64] = { { -1, 0 } };
245 	int me = smp_id();
246 	int you;
247 	volatile struct counter *p = &counters[me];
248 
249 	while (p->n1 == p->n2)
250 		asm volatile ("pause");
251 
252 	p->n2 = p->n1;
253 	you = me + 1;
254 	if (you == nr_cpus)
255 		you = 0;
256 	++counters[you].n1;
257 }
258 
259 static void rd_tsc_adjust_msr(void)
260 {
261 	rdmsr(MSR_IA32_TSC_ADJUST);
262 }
263 
264 static void wr_tsc_adjust_msr(void)
265 {
266 	wrmsr(MSR_IA32_TSC_ADJUST, 0x0);
267 }
268 
269 static void wr_kernel_gs_base(void)
270 {
271 	wrmsr(MSR_KERNEL_GS_BASE, 0x0);
272 }
273 
274 static struct pci_test {
275 	unsigned iobar;
276 	unsigned ioport;
277 	volatile void *memaddr;
278 	volatile void *mem;
279 	int test_idx;
280 	uint32_t data;
281 	uint32_t offset;
282 } pci_test = {
283 	.test_idx = -1
284 };
285 
286 static void pci_mem_testb(void)
287 {
288 	*(volatile uint8_t *)pci_test.mem = pci_test.data;
289 }
290 
291 static void pci_mem_testw(void)
292 {
293 	*(volatile uint16_t *)pci_test.mem = pci_test.data;
294 }
295 
296 static void pci_mem_testl(void)
297 {
298 	*(volatile uint32_t *)pci_test.mem = pci_test.data;
299 }
300 
301 static void pci_io_testb(void)
302 {
303 	outb(pci_test.data, pci_test.ioport);
304 }
305 
306 static void pci_io_testw(void)
307 {
308 	outw(pci_test.data, pci_test.ioport);
309 }
310 
311 static void pci_io_testl(void)
312 {
313 	outl(pci_test.data, pci_test.ioport);
314 }
315 
316 static uint8_t ioreadb(unsigned long addr, bool io)
317 {
318 	if (io) {
319 		return inb(addr);
320 	} else {
321 		return *(volatile uint8_t *)addr;
322 	}
323 }
324 
325 static uint32_t ioreadl(unsigned long addr, bool io)
326 {
327 	/* Note: assumes little endian */
328 	if (io) {
329 		return inl(addr);
330 	} else {
331 		return *(volatile uint32_t *)addr;
332 	}
333 }
334 
335 static void iowriteb(unsigned long addr, uint8_t data, bool io)
336 {
337 	if (io) {
338 		outb(data, addr);
339 	} else {
340 		*(volatile uint8_t *)addr = data;
341 	}
342 }
343 
344 static bool pci_next(struct test *test, unsigned long addr, bool io)
345 {
346 	int i;
347 	uint8_t width;
348 
349 	if (!pci_test.memaddr) {
350 		test->func = NULL;
351 		return true;
352 	}
353 	pci_test.test_idx++;
354 	iowriteb(addr + offsetof(struct pci_test_dev_hdr, test),
355 		 pci_test.test_idx, io);
356 	width = ioreadb(addr + offsetof(struct pci_test_dev_hdr, width),
357 			io);
358 	switch (width) {
359 		case 1:
360 			test->func = io ? pci_io_testb : pci_mem_testb;
361 			break;
362 		case 2:
363 			test->func = io ? pci_io_testw : pci_mem_testw;
364 			break;
365 		case 4:
366 			test->func = io ? pci_io_testl : pci_mem_testl;
367 			break;
368 		default:
369 			/* Reset index for purposes of the next test */
370 			pci_test.test_idx = -1;
371 			test->func = NULL;
372 			return false;
373 	}
374 	pci_test.data = ioreadl(addr + offsetof(struct pci_test_dev_hdr, data),
375 				io);
376 	pci_test.offset = ioreadl(addr + offsetof(struct pci_test_dev_hdr,
377 						  offset), io);
378 	for (i = 0; i < pci_test.offset; ++i) {
379 		char c = ioreadb(addr + offsetof(struct pci_test_dev_hdr,
380 						 name) + i, io);
381 		if (!c) {
382 			break;
383 		}
384 		printf("%c",c);
385 	}
386 	printf(":");
387 	return true;
388 }
389 
390 static bool pci_mem_next(struct test *test)
391 {
392 	bool ret;
393 	ret = pci_next(test, ((unsigned long)pci_test.memaddr), false);
394 	if (ret) {
395 		pci_test.mem = pci_test.memaddr + pci_test.offset;
396 	}
397 	return ret;
398 }
399 
400 static bool pci_io_next(struct test *test)
401 {
402 	bool ret;
403 	ret = pci_next(test, ((unsigned long)pci_test.iobar), true);
404 	if (ret) {
405 		pci_test.ioport = pci_test.iobar + pci_test.offset;
406 	}
407 	return ret;
408 }
409 
410 static int has_tscdeadline(void)
411 {
412     uint32_t lvtt;
413 
414     if (this_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) {
415         lvtt = APIC_LVT_TIMER_TSCDEADLINE | IPI_TEST_VECTOR;
416         apic_write(APIC_LVTT, lvtt);
417         return 1;
418     } else {
419         return 0;
420     }
421 }
422 
423 static void tscdeadline_immed(void)
424 {
425 	wrmsr(MSR_IA32_TSCDEADLINE, rdtsc());
426 	asm volatile("nop");
427 }
428 
429 static void tscdeadline(void)
430 {
431 	x = 0;
432 	wrmsr(MSR_IA32_TSCDEADLINE, rdtsc()+3000);
433 	while (x == 0) barrier();
434 }
435 
436 static void wr_tsx_ctrl_msr(void)
437 {
438 	wrmsr(MSR_IA32_TSX_CTRL, 0);
439 }
440 
441 static int has_tsx_ctrl(void)
442 {
443     return this_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)
444 	    && (rdmsr(MSR_IA32_ARCH_CAPABILITIES) & ARCH_CAP_TSX_CTRL_MSR);
445 }
446 
447 static void wr_ibrs_msr(void)
448 {
449 	wrmsr(MSR_IA32_SPEC_CTRL, 1);
450 	wrmsr(MSR_IA32_SPEC_CTRL, 0);
451 }
452 
453 static int has_ibpb(void)
454 {
455     return has_spec_ctrl() || !!(this_cpu_has(X86_FEATURE_AMD_IBPB));
456 }
457 
458 static void wr_ibpb_msr(void)
459 {
460 	wrmsr(MSR_IA32_PRED_CMD, 1);
461 }
462 
463 static void toggle_cr0_wp(void)
464 {
465 	write_cr0(X86_CR0_PE|X86_CR0_PG);
466 	write_cr0(X86_CR0_PE|X86_CR0_WP|X86_CR0_PG);
467 }
468 
469 static void toggle_cr4_pge(void)
470 {
471 	write_cr4(cr4_shadow ^ X86_CR4_PGE);
472 	write_cr4(cr4_shadow);
473 }
474 
475 static struct test tests[] = {
476 	{ cpuid_test, "cpuid", .parallel = 1,  },
477 	{ vmcall, "vmcall", .parallel = 1, },
478 #ifdef __x86_64__
479 	{ mov_from_cr8, "mov_from_cr8", .parallel = 1, },
480 	{ mov_to_cr8, "mov_to_cr8" , .parallel = 1, },
481 #endif
482 	{ inl_pmtimer, "inl_from_pmtimer", .parallel = 1, },
483 	{ inl_nop_qemu, "inl_from_qemu", .parallel = 1 },
484 	{ inl_nop_kernel, "inl_from_kernel", .parallel = 1 },
485 	{ outl_elcr_kernel, "outl_to_kernel", .parallel = 1 },
486 	{ mov_dr, "mov_dr", .parallel = 1 },
487 	{ tscdeadline_immed, "tscdeadline_immed", has_tscdeadline, .parallel = 1, },
488 	{ tscdeadline, "tscdeadline", has_tscdeadline, .parallel = 1, },
489 	{ self_ipi_sti_nop, "self_ipi_sti_nop", .parallel = 0, },
490 	{ self_ipi_sti_hlt, "self_ipi_sti_hlt", .parallel = 0, },
491 	{ self_ipi_tpr, "self_ipi_tpr", .parallel = 0, },
492 	{ self_ipi_tpr_sti_nop, "self_ipi_tpr_sti_nop", .parallel = 0, },
493 	{ self_ipi_tpr_sti_hlt, "self_ipi_tpr_sti_hlt", .parallel = 0, },
494 	{ x2apic_self_ipi_sti_nop, "x2apic_self_ipi_sti_nop", is_x2apic, .parallel = 0, },
495 	{ x2apic_self_ipi_sti_hlt, "x2apic_self_ipi_sti_hlt", is_x2apic, .parallel = 0, },
496 	{ x2apic_self_ipi_tpr, "x2apic_self_ipi_tpr", is_x2apic, .parallel = 0, },
497 	{ x2apic_self_ipi_tpr_sti_nop, "x2apic_self_ipi_tpr_sti_nop", is_x2apic, .parallel = 0, },
498 	{ x2apic_self_ipi_tpr_sti_hlt, "x2apic_self_ipi_tpr_sti_hlt", is_x2apic, .parallel = 0, },
499 	{ ipi, "ipi", is_smp, .parallel = 0, },
500 	{ ipi_halt, "ipi_halt", is_smp, .parallel = 0, },
501 	{ ple_round_robin, "ple_round_robin", .parallel = 1 },
502 	{ wr_kernel_gs_base, "wr_kernel_gs_base", .parallel = 1 },
503 	{ wr_tsx_ctrl_msr, "wr_tsx_ctrl_msr", has_tsx_ctrl, .parallel = 1, },
504 	{ wr_ibrs_msr, "wr_ibrs_msr", has_spec_ctrl, .parallel = 1 },
505 	{ wr_ibpb_msr, "wr_ibpb_msr", has_ibpb, .parallel = 1 },
506 	{ wr_tsc_adjust_msr, "wr_tsc_adjust_msr", .parallel = 1 },
507 	{ rd_tsc_adjust_msr, "rd_tsc_adjust_msr", .parallel = 1 },
508 	{ toggle_cr0_wp, "toggle_cr0_wp" , .parallel = 1, },
509 	{ toggle_cr4_pge, "toggle_cr4_pge" , .parallel = 1, },
510 	{ NULL, "pci-mem", .parallel = 0, .next = pci_mem_next },
511 	{ NULL, "pci-io", .parallel = 0, .next = pci_io_next },
512 };
513 
514 unsigned iterations;
515 
516 static void run_test(void *_func)
517 {
518     int i;
519     void (*func)(void) = _func;
520 
521     for (i = 0; i < iterations; ++i)
522         func();
523 }
524 
525 static bool do_test(struct test *test)
526 {
527 	int i;
528 	unsigned long long t1, t2;
529         void (*func)(void);
530 
531         iterations = 32;
532 
533         if (test->valid && !test->valid()) {
534 		printf("%s (skipped)\n", test->name);
535 		return false;
536 	}
537 
538 	if (test->next && !test->next(test)) {
539 		return false;
540 	}
541 
542 	func = test->func;
543         if (!func) {
544 		printf("%s (skipped)\n", test->name);
545 		return false;
546 	}
547 
548 	do {
549 		tsc_eoi = tsc_ipi = 0;
550 		iterations *= 2;
551 		t1 = rdtsc();
552 
553 		if (!test->parallel) {
554 			for (i = 0; i < iterations; ++i)
555 				func();
556 		} else {
557 			on_cpus(run_test, func);
558 		}
559 		t2 = rdtsc();
560 	} while ((t2 - t1) < GOAL);
561 	printf("%s %d\n", test->name, (int)((t2 - t1) / iterations));
562 	if (tsc_ipi)
563 		printf("  ipi %s %d\n", test->name, (int)(tsc_ipi / iterations));
564 	if (tsc_eoi)
565 		printf("  eoi %s %d\n", test->name, (int)(tsc_eoi / iterations));
566 
567 	return test->next;
568 }
569 
570 static void enable_nx(void *junk)
571 {
572 	if (this_cpu_has(X86_FEATURE_NX))
573 		wrmsr(MSR_EFER, rdmsr(MSR_EFER) | EFER_NX_MASK);
574 }
575 
576 static bool test_wanted(struct test *test, char *wanted[], int nwanted)
577 {
578 	int i;
579 
580 	if (!nwanted)
581 		return true;
582 
583 	for (i = 0; i < nwanted; ++i)
584 		if (strcmp(wanted[i], test->name) == 0)
585 			return true;
586 
587 	return false;
588 }
589 
590 int main(int ac, char **av)
591 {
592 	int i;
593 	unsigned long membar = 0;
594 	struct pci_dev pcidev;
595 	int ret;
596 
597 	setup_vm();
598 	cr4_shadow = read_cr4();
599 	handle_irq(IPI_TEST_VECTOR, self_ipi_isr);
600 	nr_cpus = cpu_count();
601 
602 	irq_enable();
603 	on_cpus(enable_nx, NULL);
604 
605 	ret = pci_find_dev(PCI_VENDOR_ID_REDHAT, PCI_DEVICE_ID_REDHAT_TEST);
606 	if (ret != PCIDEVADDR_INVALID) {
607 		pci_dev_init(&pcidev, ret);
608 		assert(pci_bar_is_memory(&pcidev, PCI_TESTDEV_BAR_MEM));
609 		assert(!pci_bar_is_memory(&pcidev, PCI_TESTDEV_BAR_IO));
610 		membar = pcidev.resource[PCI_TESTDEV_BAR_MEM];
611 		pci_test.memaddr = ioremap(membar, PAGE_SIZE);
612 		pci_test.iobar = pcidev.resource[PCI_TESTDEV_BAR_IO];
613 		printf("pci-testdev at %#x membar %lx iobar %x\n",
614 		       pcidev.bdf, membar, pci_test.iobar);
615 	}
616 
617 	for (i = 0; i < ARRAY_SIZE(tests); ++i)
618 		if (test_wanted(&tests[i], av + 1, ac - 1))
619 			while (do_test(&tests[i])) {}
620 
621 	return 0;
622 }
623