xref: /kvm-unit-tests/arm/micro-bench.c (revision c604fa931a1cb70c3649ac1b7223178fc79eab6a)
1 /*
2  * Measure the cost of micro level operations.
3  *
4  * This test provides support for quantifying the cost of micro level
5  * operations. To improve precision in the measurements, one should
6  * consider pinning each VCPU to a specific physical CPU (PCPU) and to
7  * ensure no other task could run on that PCPU to skew the results.
8  * This can be achieved by enabling QMP server in the QEMU command in
9  * unittest.cfg for micro-bench, allowing a client program to get the
10  * thread_id for each VCPU thread from the QMP server. Based on that
11  * information, the client program can then pin the corresponding VCPUs to
12  * dedicated PCPUs and isolate interrupts and tasks from those PCPUs.
13  *
14  * Copyright Columbia University
15  * Author: Shih-Wei Li <shihwei@cs.columbia.edu>
16  * Author: Christoffer Dall <cdall@cs.columbia.edu>
17  * Author: Andrew Jones <drjones@redhat.com>
18  *
19  * This work is licensed under the terms of the GNU LGPL, version 2.
20  */
21 #include <libcflat.h>
22 #include <util.h>
23 #include <asm/gic.h>
24 #include <asm/gic-v3-its.h>
25 #include <asm/timer.h>
26 
27 #define NS_5_SECONDS		(5 * 1000 * 1000 * 1000UL)
28 #define QEMU_MMIO_ADDR		0x0a000008
29 
30 static u32 cntfrq;
31 
32 static volatile bool irq_ready, irq_received;
33 static int nr_ipi_received;
34 static unsigned long mmio_addr = QEMU_MMIO_ADDR;
35 
36 static void *vgic_dist_base;
37 static void (*write_eoir)(u32 irqstat);
38 
39 static void gic_irq_handler(struct pt_regs *regs)
40 {
41 	u32 irqstat = gic_read_iar();
42 	irq_ready = false;
43 	irq_received = true;
44 	gic_write_eoir(irqstat);
45 
46 	if (irqstat == PPI(TIMER_VTIMER_IRQ)) {
47 		write_sysreg((ARCH_TIMER_CTL_IMASK | ARCH_TIMER_CTL_ENABLE),
48 			     cntv_ctl_el0);
49 		isb();
50 	}
51 	irq_ready = true;
52 }
53 
54 static void gic_secondary_entry(void *data)
55 {
56 	install_irq_handler(EL1H_IRQ, gic_irq_handler);
57 	gic_enable_defaults();
58 	local_irq_enable();
59 	irq_ready = true;
60 	while (true)
61 		cpu_relax();
62 }
63 
64 static bool test_init(void)
65 {
66 	int v = gic_init();
67 
68 	if (!v) {
69 		printf("No supported gic present, skipping tests...\n");
70 		return false;
71 	}
72 
73 	if (nr_cpus < 2) {
74 		printf("At least two cpus required, skipping tests...\n");
75 		return false;
76 	}
77 
78 	switch (v) {
79 	case 2:
80 		vgic_dist_base = gicv2_dist_base();
81 		write_eoir = gicv2_write_eoir;
82 		break;
83 	case 3:
84 		vgic_dist_base = gicv3_dist_base();
85 		write_eoir = gicv3_write_eoir;
86 		break;
87 	}
88 
89 	irq_ready = false;
90 	gic_enable_defaults();
91 	on_cpu_async(1, gic_secondary_entry, NULL);
92 
93 	cntfrq = get_cntfrq();
94 	printf("Timer Frequency %d Hz (Output in microseconds)\n", cntfrq);
95 
96 	return true;
97 }
98 
99 static void gic_prep_common(void)
100 {
101 	unsigned tries = 1 << 28;
102 
103 	while (!irq_ready && tries--)
104 		cpu_relax();
105 	assert(irq_ready);
106 }
107 
108 static bool ipi_prep(void)
109 {
110 	u32 val;
111 
112 	val = readl(vgic_dist_base + GICD_CTLR);
113 	if (readl(vgic_dist_base + GICD_TYPER2) & GICD_TYPER2_nASSGIcap) {
114 		/* nASSGIreq can be changed only when GICD is disabled */
115 		val &= ~GICD_CTLR_ENABLE_G1A;
116 		val &= ~GICD_CTLR_nASSGIreq;
117 		writel(val, vgic_dist_base + GICD_CTLR);
118 		gicv3_dist_wait_for_rwp();
119 
120 		val |= GICD_CTLR_ENABLE_G1A;
121 		writel(val, vgic_dist_base + GICD_CTLR);
122 		gicv3_dist_wait_for_rwp();
123 	}
124 
125 	nr_ipi_received = 0;
126 	gic_prep_common();
127 	return true;
128 }
129 
130 static bool ipi_hw_prep(void)
131 {
132 	u32 val;
133 
134 	val = readl(vgic_dist_base + GICD_CTLR);
135 	if (readl(vgic_dist_base + GICD_TYPER2) & GICD_TYPER2_nASSGIcap) {
136 		/* nASSGIreq can be changed only when GICD is disabled */
137 		val &= ~GICD_CTLR_ENABLE_G1A;
138 		val |= GICD_CTLR_nASSGIreq;
139 		writel(val, vgic_dist_base + GICD_CTLR);
140 		gicv3_dist_wait_for_rwp();
141 
142 		val |= GICD_CTLR_ENABLE_G1A;
143 		writel(val, vgic_dist_base + GICD_CTLR);
144 		gicv3_dist_wait_for_rwp();
145 	} else {
146 		return false;
147 	}
148 
149 	nr_ipi_received = 0;
150 	gic_prep_common();
151 	return true;
152 }
153 
154 static void ipi_exec(void)
155 {
156 	unsigned tries = 1 << 28;
157 
158 	irq_received = false;
159 
160 	gic_ipi_send_single(1, 1);
161 
162 	while (!irq_received && tries--)
163 		cpu_relax();
164 
165 	if (irq_received)
166 		++nr_ipi_received;
167 
168 	assert_msg(irq_received, "failed to receive IPI in time, but received %d successfully\n", nr_ipi_received);
169 }
170 
171 static bool lpi_prep(void)
172 {
173 	struct its_collection *col1;
174 	struct its_device *dev2;
175 
176 	if (!gicv3_its_base())
177 		return false;
178 
179 	its_enable_defaults();
180 	dev2 = its_create_device(2 /* dev id */, 8 /* nb_ites */);
181 	col1 = its_create_collection(1 /* col id */, 1 /* target PE */);
182 	gicv3_lpi_set_config(8199, LPI_PROP_DEFAULT);
183 
184 	its_send_mapd_nv(dev2, true);
185 	its_send_mapc_nv(col1, true);
186 	its_send_invall_nv(col1);
187 	its_send_mapti_nv(dev2, 8199 /* lpi id */, 20 /* event id */, col1);
188 
189 	gic_prep_common();
190 	return true;
191 }
192 
193 static void lpi_exec(void)
194 {
195 	struct its_device *dev2;
196 	unsigned tries = 1 << 28;
197 	static int received = 0;
198 
199 	irq_received = false;
200 
201 	dev2 = its_get_device(2);
202 	its_send_int_nv(dev2, 20);
203 
204 	while (!irq_received && tries--)
205 		cpu_relax();
206 
207 	if (irq_received)
208 		++received;
209 
210 	assert_msg(irq_received, "failed to receive LPI in time, but received %d successfully\n", received);
211 }
212 
213 static bool timer_prep(void)
214 {
215 	void *gic_isenabler;
216 
217 	gic_enable_defaults();
218 	install_irq_handler(EL1H_IRQ, gic_irq_handler);
219 	local_irq_enable();
220 
221 	switch (gic_version()) {
222 	case 2:
223 		gic_isenabler = gicv2_dist_base() + GICD_ISENABLER;
224 		break;
225 	case 3:
226 		gic_isenabler = gicv3_sgi_base() + GICR_ISENABLER0;
227 		break;
228 	default:
229 		assert_msg(0, "Unreachable");
230 	}
231 
232 	writel(1 << PPI(TIMER_VTIMER_IRQ), gic_isenabler);
233 	write_sysreg(ARCH_TIMER_CTL_IMASK | ARCH_TIMER_CTL_ENABLE, cntv_ctl_el0);
234 	isb();
235 
236 	gic_prep_common();
237 	return true;
238 }
239 
240 static void timer_exec(void)
241 {
242 	u64 before_timer;
243 	u64 timer_10ms;
244 	unsigned tries = 1 << 28;
245 	static int received = 0;
246 
247 	irq_received = false;
248 
249 	before_timer = read_sysreg(cntvct_el0);
250 	timer_10ms = cntfrq / 100;
251 	write_sysreg(before_timer + timer_10ms, cntv_cval_el0);
252 	write_sysreg(ARCH_TIMER_CTL_ENABLE, cntv_ctl_el0);
253 	isb();
254 
255 	while (!irq_received && tries--)
256 		cpu_relax();
257 
258 	if (irq_received)
259 		++received;
260 
261 	assert_msg(irq_received, "failed to receive PPI in time, but received %d successfully\n", received);
262 }
263 
264 static void timer_post(uint64_t ntimes, uint64_t *total_ticks)
265 {
266 	/*
267 	 * We use a 10msec timer to test the latency of PPI,
268 	 * so we subtract the ticks of 10msec to get the
269 	 * actual latency
270 	 */
271 	*total_ticks -= ntimes * (cntfrq / 100);
272 }
273 
274 static void hvc_exec(void)
275 {
276 	asm volatile("mov w0, #0x4b000000; hvc #0" ::: "w0");
277 }
278 
279 static void *userspace_emulated_addr;
280 
281 static bool mmio_read_user_prep(void)
282 {
283 	/*
284 	 * FIXME: We need an MMIO address that we can safely read to test
285 	 * exits to userspace. Ideally, the test-dev would provide us this
286 	 * address (and one we could write to too), but until it does we
287 	 * use a virtio-mmio transport address. FIXME2: We should be getting
288 	 * this address (and the future test-dev address) from the devicetree,
289 	 * but so far we lazily hardcode it.
290 	 */
291 	userspace_emulated_addr = (void *)ioremap(mmio_addr, sizeof(u32));
292 	return true;
293 }
294 
295 static void mmio_read_user_exec(void)
296 {
297 	readl(userspace_emulated_addr);
298 }
299 
300 static void mmio_read_vgic_exec(void)
301 {
302 	readl(vgic_dist_base + GICD_IIDR);
303 }
304 
305 static void eoi_exec(void)
306 {
307 	int spurious_id = 1023; /* writes to EOI are ignored */
308 
309 	/* Avoid measuring assert(..) in gic_write_eoir */
310 	write_eoir(spurious_id);
311 }
312 
313 struct exit_test {
314 	const char *name;
315 	bool (*prep)(void);
316 	void (*exec)(void);
317 	void (*post)(uint64_t ntimes, uint64_t *total_ticks);
318 	u32 times;
319 	bool run;
320 };
321 
322 static struct exit_test tests[] = {
323 	{"hvc",			NULL,			hvc_exec,		NULL,		65536,		true},
324 	{"mmio_read_user",	mmio_read_user_prep,	mmio_read_user_exec,	NULL,		65536,		true},
325 	{"mmio_read_vgic",	NULL,			mmio_read_vgic_exec,	NULL,		65536,		true},
326 	{"eoi",			NULL,			eoi_exec,		NULL,		65536,		true},
327 	{"ipi",			ipi_prep,		ipi_exec,		NULL,		65536,		true},
328 	{"ipi_hw",		ipi_hw_prep,		ipi_exec,		NULL,		65536,		true},
329 	{"lpi",			lpi_prep,		lpi_exec,		NULL,		65536,		true},
330 	{"timer_10ms",		timer_prep,		timer_exec,		timer_post,	256,		true},
331 };
332 
333 struct ns_time {
334 	uint64_t ns;
335 	uint64_t ns_frac;
336 };
337 
338 #define PS_PER_SEC (1000 * 1000 * 1000 * 1000UL)
339 static void ticks_to_ns_time(uint64_t ticks, struct ns_time *ns_time)
340 {
341 	uint64_t ps_per_tick = PS_PER_SEC / cntfrq + !!(PS_PER_SEC % cntfrq);
342 	uint64_t ps;
343 
344 	ps = ticks * ps_per_tick;
345 	ns_time->ns = ps / 1000;
346 	ns_time->ns_frac = (ps % 1000) / 100;
347 }
348 
349 static void loop_test(struct exit_test *test)
350 {
351 	uint64_t start, end, total_ticks, ntimes = 0;
352 	struct ns_time avg_ns, total_ns = {};
353 
354 	total_ticks = 0;
355 	if (test->prep) {
356 		if(!test->prep()) {
357 			printf("%s test skipped\n", test->name);
358 			return;
359 		}
360 	}
361 
362 	while (ntimes < test->times && total_ns.ns < NS_5_SECONDS) {
363 		isb();
364 		start = read_sysreg(cntpct_el0);
365 		test->exec();
366 		isb();
367 		end = read_sysreg(cntpct_el0);
368 
369 		ntimes++;
370 		total_ticks += (end - start);
371 		ticks_to_ns_time(total_ticks, &total_ns);
372 	}
373 
374 	if (test->post) {
375 		test->post(ntimes, &total_ticks);
376 		ticks_to_ns_time(total_ticks, &total_ns);
377 	}
378 
379 	avg_ns.ns = total_ns.ns / ntimes;
380 	avg_ns.ns_frac = total_ns.ns_frac / ntimes;
381 
382 	printf("%-30s%15" PRId64 ".%-15" PRId64 "%15" PRId64 ".%-15" PRId64 "\n",
383 		test->name, total_ns.ns, total_ns.ns_frac, avg_ns.ns, avg_ns.ns_frac);
384 }
385 
386 static void parse_args(int argc, char **argv)
387 {
388 	int i, len;
389 	long val;
390 
391 	for (i = 1; i < argc; ++i) {
392 		len = parse_keyval(argv[i], &val);
393 		if (len == -1)
394 			continue;
395 
396 		if (strncmp(argv[i], "mmio-addr", len) == 0) {
397 			mmio_addr = val;
398 			report_info("found mmio_addr=0x%lx", mmio_addr);
399 		}
400 	}
401 }
402 
403 int main(int argc, char **argv)
404 {
405 	int i;
406 
407 	parse_args(argc, argv);
408 
409 	if (!test_init())
410 		return 1;
411 
412 	printf("\n%-30s%18s%13s%18s%13s\n", "name", "total ns", "", "avg ns", "");
413 	for (i = 0 ; i < 92; ++i)
414 		printf("%c", '-');
415 	printf("\n");
416 	for (i = 0; i < ARRAY_SIZE(tests); i++) {
417 		if (!tests[i].run)
418 			continue;
419 		assert(tests[i].name && tests[i].exec);
420 		loop_test(&tests[i]);
421 	}
422 
423 	return 0;
424 }
425