xref: /kvm-unit-tests/x86/vmexit.c (revision 2c96b77ec9d3b1fcec7525174e23a6240ee05949)
1 #include "libcflat.h"
2 #include "smp.h"
3 #include "pci.h"
4 #include "x86/vm.h"
5 #include "x86/desc.h"
6 #include "x86/acpi.h"
7 #include "x86/apic.h"
8 #include "x86/isr.h"
9 
10 #define IPI_TEST_VECTOR	0xb0
11 
12 struct test {
13 	void (*func)(void);
14 	const char *name;
15 	int (*valid)(void);
16 	int parallel;
17 	bool (*next)(struct test *);
18 };
19 
20 #define GOAL (1ull << 30)
21 
22 static int nr_cpus;
23 
24 static void cpuid_test(void)
25 {
26 	asm volatile ("push %%"R "bx; cpuid; pop %%"R "bx"
27 		      : : : "eax", "ecx", "edx");
28 }
29 
30 static void vmcall(void)
31 {
32 	unsigned long a = 0, b, c, d;
33 
34 	asm volatile ("vmcall" : "+a"(a), "=b"(b), "=c"(c), "=d"(d));
35 }
36 
37 #define MSR_EFER 0xc0000080
38 #define EFER_NX_MASK            (1ull << 11)
39 
40 #ifdef __x86_64__
41 static void mov_from_cr8(void)
42 {
43 	unsigned long cr8;
44 
45 	asm volatile ("mov %%cr8, %0" : "=r"(cr8));
46 }
47 
48 static void mov_to_cr8(void)
49 {
50 	unsigned long cr8 = 0;
51 
52 	asm volatile ("mov %0, %%cr8" : : "r"(cr8));
53 }
54 #endif
55 
56 static int is_smp(void)
57 {
58 	return cpu_count() > 1;
59 }
60 
61 static void nop(void *junk)
62 {
63 }
64 
65 volatile int x = 0;
66 volatile uint64_t tsc_eoi = 0;
67 volatile uint64_t tsc_ipi = 0;
68 
69 static void self_ipi_isr(isr_regs_t *regs)
70 {
71 	x++;
72 	uint64_t start = rdtsc();
73 	eoi();
74 	tsc_eoi += rdtsc() - start;
75 }
76 
77 static void x2apic_self_ipi(int vec)
78 {
79 	uint64_t start = rdtsc();
80 	wrmsr(0x83f, vec);
81 	tsc_ipi += rdtsc() - start;
82 }
83 
84 static void apic_self_ipi(int vec)
85 {
86 	uint64_t start = rdtsc();
87         apic_icr_write(APIC_INT_ASSERT | APIC_DEST_SELF | APIC_DEST_PHYSICAL |
88 		       APIC_DM_FIXED | IPI_TEST_VECTOR, vec);
89 	tsc_ipi += rdtsc() - start;
90 }
91 
92 static void self_ipi_sti_nop(void)
93 {
94 	x = 0;
95 	irq_disable();
96 	apic_self_ipi(IPI_TEST_VECTOR);
97 	asm volatile("sti; nop");
98 	if (x != 1) printf("%d", x);
99 }
100 
101 static void self_ipi_sti_hlt(void)
102 {
103 	x = 0;
104 	irq_disable();
105 	apic_self_ipi(IPI_TEST_VECTOR);
106 	safe_halt();
107 	if (x != 1) printf("%d", x);
108 }
109 
110 static void self_ipi_tpr(void)
111 {
112 	x = 0;
113 	apic_set_tpr(0x0f);
114 	apic_self_ipi(IPI_TEST_VECTOR);
115 	apic_set_tpr(0x00);
116 	asm volatile("nop");
117 	if (x != 1) printf("%d", x);
118 }
119 
120 static void self_ipi_tpr_sti_nop(void)
121 {
122 	x = 0;
123 	irq_disable();
124 	apic_set_tpr(0x0f);
125 	apic_self_ipi(IPI_TEST_VECTOR);
126 	apic_set_tpr(0x00);
127 	asm volatile("sti; nop");
128 	if (x != 1) printf("%d", x);
129 }
130 
131 static void self_ipi_tpr_sti_hlt(void)
132 {
133 	x = 0;
134 	irq_disable();
135 	apic_set_tpr(0x0f);
136 	apic_self_ipi(IPI_TEST_VECTOR);
137 	apic_set_tpr(0x00);
138 	safe_halt();
139 	if (x != 1) printf("%d", x);
140 }
141 
142 static int is_x2apic(void)
143 {
144     return rdmsr(MSR_IA32_APICBASE) & APIC_EXTD;
145 }
146 
147 static void x2apic_self_ipi_sti_nop(void)
148 {
149 	irq_disable();
150 	x2apic_self_ipi(IPI_TEST_VECTOR);
151 	asm volatile("sti; nop");
152 }
153 
154 static void x2apic_self_ipi_sti_hlt(void)
155 {
156 	irq_disable();
157 	x2apic_self_ipi(IPI_TEST_VECTOR);
158 	safe_halt();
159 }
160 
161 static void x2apic_self_ipi_tpr(void)
162 {
163 	apic_set_tpr(0x0f);
164 	x2apic_self_ipi(IPI_TEST_VECTOR);
165 	apic_set_tpr(0x00);
166 	asm volatile("nop");
167 }
168 
169 static void x2apic_self_ipi_tpr_sti_nop(void)
170 {
171 	irq_disable();
172 	apic_set_tpr(0x0f);
173 	x2apic_self_ipi(IPI_TEST_VECTOR);
174 	apic_set_tpr(0x00);
175 	asm volatile("sti; nop");
176 }
177 
178 static void x2apic_self_ipi_tpr_sti_hlt(void)
179 {
180 	irq_disable();
181 	apic_set_tpr(0x0f);
182 	x2apic_self_ipi(IPI_TEST_VECTOR);
183 	apic_set_tpr(0x00);
184 	safe_halt();
185 }
186 
187 static void ipi(void)
188 {
189 	uint64_t start = rdtsc();
190 	on_cpu(1, nop, 0);
191 	tsc_ipi += rdtsc() - start;
192 }
193 
194 static void ipi_halt(void)
195 {
196 	unsigned long long t;
197 
198 	on_cpu(1, nop, 0);
199 	t = rdtsc() + 2000;
200 	while (rdtsc() < t)
201 		;
202 }
203 
204 int pm_tmr_blk;
205 static void inl_pmtimer(void)
206 {
207     if (!pm_tmr_blk) {
208 	struct fadt_descriptor_rev1 *fadt;
209 
210 	fadt = find_acpi_table_addr(FACP_SIGNATURE);
211 	pm_tmr_blk = fadt->pm_tmr_blk;
212 	printf("PM timer port is %x\n", pm_tmr_blk);
213     }
214     inl(pm_tmr_blk);
215 }
216 
217 static void inl_nop_qemu(void)
218 {
219     inl(0x1234);
220 }
221 
222 static void inl_nop_kernel(void)
223 {
224     inb(0x4d0);
225 }
226 
227 static void outl_elcr_kernel(void)
228 {
229     outb(0, 0x4d0);
230 }
231 
232 static void mov_dr(void)
233 {
234     asm volatile("mov %0, %%dr7" : : "r" (0x400L));
235 }
236 
237 static void ple_round_robin(void)
238 {
239 	struct counter {
240 		volatile int n1;
241 		int n2;
242 	} __attribute__((aligned(64)));
243 	static struct counter counters[64] = { { -1, 0 } };
244 	int me = smp_id();
245 	int you;
246 	volatile struct counter *p = &counters[me];
247 
248 	while (p->n1 == p->n2)
249 		asm volatile ("pause");
250 
251 	p->n2 = p->n1;
252 	you = me + 1;
253 	if (you == nr_cpus)
254 		you = 0;
255 	++counters[you].n1;
256 }
257 
258 static void rd_tsc_adjust_msr(void)
259 {
260 	rdmsr(MSR_IA32_TSC_ADJUST);
261 }
262 
263 static void wr_tsc_adjust_msr(void)
264 {
265 	wrmsr(MSR_IA32_TSC_ADJUST, 0x0);
266 }
267 
268 static void wr_kernel_gs_base(void)
269 {
270 	wrmsr(MSR_KERNEL_GS_BASE, 0x0);
271 }
272 
273 static struct pci_test {
274 	unsigned iobar;
275 	unsigned ioport;
276 	volatile void *memaddr;
277 	volatile void *mem;
278 	int test_idx;
279 	uint32_t data;
280 	uint32_t offset;
281 } pci_test = {
282 	.test_idx = -1
283 };
284 
285 static void pci_mem_testb(void)
286 {
287 	*(volatile uint8_t *)pci_test.mem = pci_test.data;
288 }
289 
290 static void pci_mem_testw(void)
291 {
292 	*(volatile uint16_t *)pci_test.mem = pci_test.data;
293 }
294 
295 static void pci_mem_testl(void)
296 {
297 	*(volatile uint32_t *)pci_test.mem = pci_test.data;
298 }
299 
300 static void pci_io_testb(void)
301 {
302 	outb(pci_test.data, pci_test.ioport);
303 }
304 
305 static void pci_io_testw(void)
306 {
307 	outw(pci_test.data, pci_test.ioport);
308 }
309 
310 static void pci_io_testl(void)
311 {
312 	outl(pci_test.data, pci_test.ioport);
313 }
314 
315 static uint8_t ioreadb(unsigned long addr, bool io)
316 {
317 	if (io) {
318 		return inb(addr);
319 	} else {
320 		return *(volatile uint8_t *)addr;
321 	}
322 }
323 
324 static uint32_t ioreadl(unsigned long addr, bool io)
325 {
326 	/* Note: assumes little endian */
327 	if (io) {
328 		return inl(addr);
329 	} else {
330 		return *(volatile uint32_t *)addr;
331 	}
332 }
333 
334 static void iowriteb(unsigned long addr, uint8_t data, bool io)
335 {
336 	if (io) {
337 		outb(data, addr);
338 	} else {
339 		*(volatile uint8_t *)addr = data;
340 	}
341 }
342 
343 static bool pci_next(struct test *test, unsigned long addr, bool io)
344 {
345 	int i;
346 	uint8_t width;
347 
348 	if (!pci_test.memaddr) {
349 		test->func = NULL;
350 		return true;
351 	}
352 	pci_test.test_idx++;
353 	iowriteb(addr + offsetof(struct pci_test_dev_hdr, test),
354 		 pci_test.test_idx, io);
355 	width = ioreadb(addr + offsetof(struct pci_test_dev_hdr, width),
356 			io);
357 	switch (width) {
358 		case 1:
359 			test->func = io ? pci_io_testb : pci_mem_testb;
360 			break;
361 		case 2:
362 			test->func = io ? pci_io_testw : pci_mem_testw;
363 			break;
364 		case 4:
365 			test->func = io ? pci_io_testl : pci_mem_testl;
366 			break;
367 		default:
368 			/* Reset index for purposes of the next test */
369 			pci_test.test_idx = -1;
370 			test->func = NULL;
371 			return false;
372 	}
373 	pci_test.data = ioreadl(addr + offsetof(struct pci_test_dev_hdr, data),
374 				io);
375 	pci_test.offset = ioreadl(addr + offsetof(struct pci_test_dev_hdr,
376 						  offset), io);
377 	for (i = 0; i < pci_test.offset; ++i) {
378 		char c = ioreadb(addr + offsetof(struct pci_test_dev_hdr,
379 						 name) + i, io);
380 		if (!c) {
381 			break;
382 		}
383 		printf("%c",c);
384 	}
385 	printf(":");
386 	return true;
387 }
388 
389 static bool pci_mem_next(struct test *test)
390 {
391 	bool ret;
392 	ret = pci_next(test, ((unsigned long)pci_test.memaddr), false);
393 	if (ret) {
394 		pci_test.mem = pci_test.memaddr + pci_test.offset;
395 	}
396 	return ret;
397 }
398 
399 static bool pci_io_next(struct test *test)
400 {
401 	bool ret;
402 	ret = pci_next(test, ((unsigned long)pci_test.iobar), true);
403 	if (ret) {
404 		pci_test.ioport = pci_test.iobar + pci_test.offset;
405 	}
406 	return ret;
407 }
408 
409 static int has_tscdeadline(void)
410 {
411     uint32_t lvtt;
412 
413     if (this_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) {
414         lvtt = APIC_LVT_TIMER_TSCDEADLINE | IPI_TEST_VECTOR;
415         apic_write(APIC_LVTT, lvtt);
416         return 1;
417     } else {
418         return 0;
419     }
420 }
421 
422 static void tscdeadline_immed(void)
423 {
424 	wrmsr(MSR_IA32_TSCDEADLINE, rdtsc());
425 	asm volatile("nop");
426 }
427 
428 static void tscdeadline(void)
429 {
430 	x = 0;
431 	wrmsr(MSR_IA32_TSCDEADLINE, rdtsc()+3000);
432 	while (x == 0) barrier();
433 }
434 
435 static void wr_tsx_ctrl_msr(void)
436 {
437 	wrmsr(MSR_IA32_TSX_CTRL, 0);
438 }
439 
440 static int has_tsx_ctrl(void)
441 {
442     return this_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)
443 	    && (rdmsr(MSR_IA32_ARCH_CAPABILITIES) & ARCH_CAP_TSX_CTRL_MSR);
444 }
445 
446 static void wr_ibrs_msr(void)
447 {
448 	wrmsr(MSR_IA32_SPEC_CTRL, 1);
449 	wrmsr(MSR_IA32_SPEC_CTRL, 0);
450 }
451 
452 static int has_ibpb(void)
453 {
454     return has_spec_ctrl() || !!(this_cpu_has(X86_FEATURE_AMD_IBPB));
455 }
456 
457 static void wr_ibpb_msr(void)
458 {
459 	wrmsr(MSR_IA32_PRED_CMD, 1);
460 }
461 
462 static struct test tests[] = {
463 	{ cpuid_test, "cpuid", .parallel = 1,  },
464 	{ vmcall, "vmcall", .parallel = 1, },
465 #ifdef __x86_64__
466 	{ mov_from_cr8, "mov_from_cr8", .parallel = 1, },
467 	{ mov_to_cr8, "mov_to_cr8" , .parallel = 1, },
468 #endif
469 	{ inl_pmtimer, "inl_from_pmtimer", .parallel = 1, },
470 	{ inl_nop_qemu, "inl_from_qemu", .parallel = 1 },
471 	{ inl_nop_kernel, "inl_from_kernel", .parallel = 1 },
472 	{ outl_elcr_kernel, "outl_to_kernel", .parallel = 1 },
473 	{ mov_dr, "mov_dr", .parallel = 1 },
474 	{ tscdeadline_immed, "tscdeadline_immed", has_tscdeadline, .parallel = 1, },
475 	{ tscdeadline, "tscdeadline", has_tscdeadline, .parallel = 1, },
476 	{ self_ipi_sti_nop, "self_ipi_sti_nop", .parallel = 0, },
477 	{ self_ipi_sti_hlt, "self_ipi_sti_hlt", .parallel = 0, },
478 	{ self_ipi_tpr, "self_ipi_tpr", .parallel = 0, },
479 	{ self_ipi_tpr_sti_nop, "self_ipi_tpr_sti_nop", .parallel = 0, },
480 	{ self_ipi_tpr_sti_hlt, "self_ipi_tpr_sti_hlt", .parallel = 0, },
481 	{ x2apic_self_ipi_sti_nop, "x2apic_self_ipi_sti_nop", is_x2apic, .parallel = 0, },
482 	{ x2apic_self_ipi_sti_hlt, "x2apic_self_ipi_sti_hlt", is_x2apic, .parallel = 0, },
483 	{ x2apic_self_ipi_tpr, "x2apic_self_ipi_tpr", is_x2apic, .parallel = 0, },
484 	{ x2apic_self_ipi_tpr_sti_nop, "x2apic_self_ipi_tpr_sti_nop", is_x2apic, .parallel = 0, },
485 	{ x2apic_self_ipi_tpr_sti_hlt, "x2apic_self_ipi_tpr_sti_hlt", is_x2apic, .parallel = 0, },
486 	{ ipi, "ipi", is_smp, .parallel = 0, },
487 	{ ipi_halt, "ipi_halt", is_smp, .parallel = 0, },
488 	{ ple_round_robin, "ple_round_robin", .parallel = 1 },
489 	{ wr_kernel_gs_base, "wr_kernel_gs_base", .parallel = 1 },
490 	{ wr_tsx_ctrl_msr, "wr_tsx_ctrl_msr", has_tsx_ctrl, .parallel = 1, },
491 	{ wr_ibrs_msr, "wr_ibrs_msr", has_spec_ctrl, .parallel = 1 },
492 	{ wr_ibpb_msr, "wr_ibpb_msr", has_ibpb, .parallel = 1 },
493 	{ wr_tsc_adjust_msr, "wr_tsc_adjust_msr", .parallel = 1 },
494 	{ rd_tsc_adjust_msr, "rd_tsc_adjust_msr", .parallel = 1 },
495 	{ NULL, "pci-mem", .parallel = 0, .next = pci_mem_next },
496 	{ NULL, "pci-io", .parallel = 0, .next = pci_io_next },
497 };
498 
499 unsigned iterations;
500 
501 static void run_test(void *_func)
502 {
503     int i;
504     void (*func)(void) = _func;
505 
506     for (i = 0; i < iterations; ++i)
507         func();
508 }
509 
510 static bool do_test(struct test *test)
511 {
512 	int i;
513 	unsigned long long t1, t2;
514         void (*func)(void);
515 
516         iterations = 32;
517 
518         if (test->valid && !test->valid()) {
519 		printf("%s (skipped)\n", test->name);
520 		return false;
521 	}
522 
523 	if (test->next && !test->next(test)) {
524 		return false;
525 	}
526 
527 	func = test->func;
528         if (!func) {
529 		printf("%s (skipped)\n", test->name);
530 		return false;
531 	}
532 
533 	do {
534 		tsc_eoi = tsc_ipi = 0;
535 		iterations *= 2;
536 		t1 = rdtsc();
537 
538 		if (!test->parallel) {
539 			for (i = 0; i < iterations; ++i)
540 				func();
541 		} else {
542 			on_cpus(run_test, func);
543 		}
544 		t2 = rdtsc();
545 	} while ((t2 - t1) < GOAL);
546 	printf("%s %d\n", test->name, (int)((t2 - t1) / iterations));
547 	if (tsc_ipi)
548 		printf("  ipi %s %d\n", test->name, (int)(tsc_ipi / iterations));
549 	if (tsc_eoi)
550 		printf("  eoi %s %d\n", test->name, (int)(tsc_eoi / iterations));
551 
552 	return test->next;
553 }
554 
555 static void enable_nx(void *junk)
556 {
557 	if (this_cpu_has(X86_FEATURE_NX))
558 		wrmsr(MSR_EFER, rdmsr(MSR_EFER) | EFER_NX_MASK);
559 }
560 
561 static bool test_wanted(struct test *test, char *wanted[], int nwanted)
562 {
563 	int i;
564 
565 	if (!nwanted)
566 		return true;
567 
568 	for (i = 0; i < nwanted; ++i)
569 		if (strcmp(wanted[i], test->name) == 0)
570 			return true;
571 
572 	return false;
573 }
574 
575 int main(int ac, char **av)
576 {
577 	int i;
578 	unsigned long membar = 0;
579 	struct pci_dev pcidev;
580 	int ret;
581 
582 	setup_vm();
583 	handle_irq(IPI_TEST_VECTOR, self_ipi_isr);
584 	nr_cpus = cpu_count();
585 
586 	irq_enable();
587 	on_cpus(enable_nx, NULL);
588 
589 	ret = pci_find_dev(PCI_VENDOR_ID_REDHAT, PCI_DEVICE_ID_REDHAT_TEST);
590 	if (ret != PCIDEVADDR_INVALID) {
591 		pci_dev_init(&pcidev, ret);
592 		assert(pci_bar_is_memory(&pcidev, PCI_TESTDEV_BAR_MEM));
593 		assert(!pci_bar_is_memory(&pcidev, PCI_TESTDEV_BAR_IO));
594 		membar = pcidev.resource[PCI_TESTDEV_BAR_MEM];
595 		pci_test.memaddr = ioremap(membar, PAGE_SIZE);
596 		pci_test.iobar = pcidev.resource[PCI_TESTDEV_BAR_IO];
597 		printf("pci-testdev at %#x membar %lx iobar %x\n",
598 		       pcidev.bdf, membar, pci_test.iobar);
599 	}
600 
601 	for (i = 0; i < ARRAY_SIZE(tests); ++i)
602 		if (test_wanted(&tests[i], av + 1, ac - 1))
603 			while (do_test(&tests[i])) {}
604 
605 	return 0;
606 }
607