xref: /kvm-unit-tests/arm/micro-bench.c (revision d0891021d5ad244c99290b4515152a1f997a9404)
1 /*
2  * Measure the cost of micro level operations.
3  *
4  * This test provides support for quantifying the cost of micro level
5  * operations. To improve precision in the measurements, one should
6  * consider pinning each VCPU to a specific physical CPU (PCPU) and to
7  * ensure no other task could run on that PCPU to skew the results.
8  * This can be achieved by enabling QMP server in the QEMU command in
9  * unittest.cfg for micro-bench, allowing a client program to get the
10  * thread_id for each VCPU thread from the QMP server. Based on that
11  * information, the client program can then pin the corresponding VCPUs to
12  * dedicated PCPUs and isolate interrupts and tasks from those PCPUs.
13  *
14  * Copyright Columbia University
15  * Author: Shih-Wei Li <shihwei@cs.columbia.edu>
16  * Author: Christoffer Dall <cdall@cs.columbia.edu>
17  * Author: Andrew Jones <drjones@redhat.com>
18  *
19  * This work is licensed under the terms of the GNU LGPL, version 2.
20  */
21 #include <libcflat.h>
22 #include <util.h>
23 #include <asm/gic.h>
24 #include <asm/gic-v3-its.h>
25 #include <asm/timer.h>
26 
27 #define QEMU_MMIO_ADDR		0x0a000008
28 
29 static u32 cntfrq;
30 
31 static volatile bool irq_ready, irq_received;
32 static int nr_ipi_received;
33 static unsigned long mmio_addr = QEMU_MMIO_ADDR;
34 
35 static void *vgic_dist_base;
36 static void (*write_eoir)(u32 irqstat);
37 
gic_irq_handler(struct pt_regs * regs)38 static void gic_irq_handler(struct pt_regs *regs)
39 {
40 	u32 irqstat = gic_read_iar();
41 	irq_ready = false;
42 	irq_received = true;
43 	gic_write_eoir(irqstat);
44 
45 	if (irqstat == TIMER_VTIMER_IRQ) {
46 		write_sysreg((ARCH_TIMER_CTL_IMASK | ARCH_TIMER_CTL_ENABLE),
47 			     cntv_ctl_el0);
48 		isb();
49 	}
50 	irq_ready = true;
51 }
52 
gic_secondary_entry(void * data)53 static void gic_secondary_entry(void *data)
54 {
55 	install_irq_handler(EL1H_IRQ, gic_irq_handler);
56 	gic_enable_defaults();
57 	local_irq_enable();
58 	irq_ready = true;
59 	while (true)
60 		cpu_relax();
61 }
62 
test_init(void)63 static bool test_init(void)
64 {
65 	int v = gic_init();
66 
67 	if (!v) {
68 		printf("No supported gic present, skipping tests...\n");
69 		return false;
70 	}
71 
72 	if (nr_cpus < 2) {
73 		printf("At least two cpus required, skipping tests...\n");
74 		return false;
75 	}
76 
77 	switch (v) {
78 	case 2:
79 		vgic_dist_base = gicv2_dist_base();
80 		write_eoir = gicv2_write_eoir;
81 		break;
82 	case 3:
83 		vgic_dist_base = gicv3_dist_base();
84 		write_eoir = gicv3_write_eoir;
85 		break;
86 	}
87 
88 	irq_ready = false;
89 	gic_enable_defaults();
90 	on_cpu_async(1, gic_secondary_entry, NULL);
91 
92 	cntfrq = get_cntfrq();
93 	printf("Timer Frequency %d Hz (Output in microseconds)\n", cntfrq);
94 
95 	return true;
96 }
97 
gic_prep_common(void)98 static void gic_prep_common(void)
99 {
100 	unsigned tries = 1 << 28;
101 
102 	while (!irq_ready && tries--)
103 		cpu_relax();
104 	assert(irq_ready);
105 }
106 
ipi_prep(void)107 static bool ipi_prep(void)
108 {
109 	u32 val;
110 
111 	val = readl(vgic_dist_base + GICD_CTLR);
112 	if (readl(vgic_dist_base + GICD_TYPER2) & GICD_TYPER2_nASSGIcap) {
113 		/* nASSGIreq can be changed only when GICD is disabled */
114 		val &= ~GICD_CTLR_ENABLE_G1A;
115 		val &= ~GICD_CTLR_nASSGIreq;
116 		writel(val, vgic_dist_base + GICD_CTLR);
117 		gicv3_dist_wait_for_rwp();
118 
119 		val |= GICD_CTLR_ENABLE_G1A;
120 		writel(val, vgic_dist_base + GICD_CTLR);
121 		gicv3_dist_wait_for_rwp();
122 	}
123 
124 	nr_ipi_received = 0;
125 	gic_prep_common();
126 	return true;
127 }
128 
ipi_hw_prep(void)129 static bool ipi_hw_prep(void)
130 {
131 	u32 val;
132 
133 	val = readl(vgic_dist_base + GICD_CTLR);
134 	if (readl(vgic_dist_base + GICD_TYPER2) & GICD_TYPER2_nASSGIcap) {
135 		/* nASSGIreq can be changed only when GICD is disabled */
136 		val &= ~GICD_CTLR_ENABLE_G1A;
137 		val |= GICD_CTLR_nASSGIreq;
138 		writel(val, vgic_dist_base + GICD_CTLR);
139 		gicv3_dist_wait_for_rwp();
140 
141 		val |= GICD_CTLR_ENABLE_G1A;
142 		writel(val, vgic_dist_base + GICD_CTLR);
143 		gicv3_dist_wait_for_rwp();
144 	} else {
145 		return false;
146 	}
147 
148 	nr_ipi_received = 0;
149 	gic_prep_common();
150 	return true;
151 }
152 
ipi_exec(void)153 static void ipi_exec(void)
154 {
155 	unsigned tries = 1 << 28;
156 
157 	irq_received = false;
158 
159 	gic_ipi_send_single(1, 1);
160 
161 	while (!irq_received && tries--)
162 		cpu_relax();
163 
164 	if (irq_received)
165 		++nr_ipi_received;
166 
167 	assert_msg(irq_received, "failed to receive IPI in time, but received %d successfully\n", nr_ipi_received);
168 }
169 
lpi_prep(void)170 static bool lpi_prep(void)
171 {
172 	struct its_collection *col1;
173 	struct its_device *dev2;
174 
175 	if (!gicv3_its_base())
176 		return false;
177 
178 	its_enable_defaults();
179 	dev2 = its_create_device(2 /* dev id */, 8 /* nb_ites */);
180 	col1 = its_create_collection(1 /* col id */, 1 /* target PE */);
181 	gicv3_lpi_set_config(8199, LPI_PROP_DEFAULT);
182 
183 	its_send_mapd_nv(dev2, true);
184 	its_send_mapc_nv(col1, true);
185 	its_send_invall_nv(col1);
186 	its_send_mapti_nv(dev2, 8199 /* lpi id */, 20 /* event id */, col1);
187 
188 	gic_prep_common();
189 	return true;
190 }
191 
lpi_exec(void)192 static void lpi_exec(void)
193 {
194 	struct its_device *dev2;
195 	unsigned tries = 1 << 28;
196 	static int received = 0;
197 
198 	irq_received = false;
199 
200 	dev2 = its_get_device(2);
201 	its_send_int_nv(dev2, 20);
202 
203 	while (!irq_received && tries--)
204 		cpu_relax();
205 
206 	if (irq_received)
207 		++received;
208 
209 	assert_msg(irq_received, "failed to receive LPI in time, but received %d successfully\n", received);
210 }
211 
timer_prep(void)212 static bool timer_prep(void)
213 {
214 	gic_enable_defaults();
215 	install_irq_handler(EL1H_IRQ, gic_irq_handler);
216 	local_irq_enable();
217 
218 	gic_enable_irq(TIMER_VTIMER_IRQ);
219 	write_sysreg(ARCH_TIMER_CTL_IMASK | ARCH_TIMER_CTL_ENABLE, cntv_ctl_el0);
220 	isb();
221 
222 	gic_prep_common();
223 	return true;
224 }
225 
timer_exec(void)226 static void timer_exec(void)
227 {
228 	u64 before_timer;
229 	u64 timer_10ms;
230 	unsigned tries = 1 << 28;
231 	static int received = 0;
232 
233 	irq_received = false;
234 
235 	before_timer = read_sysreg(cntvct_el0);
236 	timer_10ms = cntfrq / 100;
237 	write_sysreg(before_timer + timer_10ms, cntv_cval_el0);
238 	write_sysreg(ARCH_TIMER_CTL_ENABLE, cntv_ctl_el0);
239 	isb();
240 
241 	while (!irq_received && tries--)
242 		cpu_relax();
243 
244 	if (irq_received)
245 		++received;
246 
247 	assert_msg(irq_received, "failed to receive PPI in time, but received %d successfully\n", received);
248 }
249 
timer_post(uint64_t ntimes,uint64_t * total_ticks)250 static void timer_post(uint64_t ntimes, uint64_t *total_ticks)
251 {
252 	/*
253 	 * We use a 10msec timer to test the latency of PPI,
254 	 * so we subtract the ticks of 10msec to get the
255 	 * actual latency
256 	 */
257 	*total_ticks -= ntimes * (cntfrq / 100);
258 }
259 
hvc_exec(void)260 static void hvc_exec(void)
261 {
262 	asm volatile("mov w0, #0x4b000000; hvc #0" ::: "w0");
263 }
264 
265 static void *userspace_emulated_addr;
266 
mmio_read_user_prep(void)267 static bool mmio_read_user_prep(void)
268 {
269 	/*
270 	 * FIXME: We need an MMIO address that we can safely read to test
271 	 * exits to userspace. Ideally, the test-dev would provide us this
272 	 * address (and one we could write to too), but until it does we
273 	 * use a virtio-mmio transport address. FIXME2: We should be getting
274 	 * this address (and the future test-dev address) from the devicetree,
275 	 * but so far we lazily hardcode it.
276 	 */
277 	userspace_emulated_addr = (void *)ioremap(mmio_addr, sizeof(u32));
278 	return true;
279 }
280 
mmio_read_user_exec(void)281 static void mmio_read_user_exec(void)
282 {
283 	readl(userspace_emulated_addr);
284 }
285 
mmio_read_vgic_exec(void)286 static void mmio_read_vgic_exec(void)
287 {
288 	readl(vgic_dist_base + GICD_IIDR);
289 }
290 
eoi_exec(void)291 static void eoi_exec(void)
292 {
293 	int spurious_id = 1023; /* writes to EOI are ignored */
294 
295 	/* Avoid measuring assert(..) in gic_write_eoir */
296 	write_eoir(spurious_id);
297 }
298 
299 struct exit_test {
300 	const char *name;
301 	bool (*prep)(void);
302 	void (*exec)(void);
303 	void (*post)(uint64_t ntimes, uint64_t *total_ticks);
304 	u32 times;
305 	bool run;
306 };
307 
308 static struct exit_test tests[] = {
309 	{"hvc",			NULL,			hvc_exec,		NULL,		65536,		true},
310 	{"mmio_read_user",	mmio_read_user_prep,	mmio_read_user_exec,	NULL,		65536,		true},
311 	{"mmio_read_vgic",	NULL,			mmio_read_vgic_exec,	NULL,		65536,		true},
312 	{"eoi",			NULL,			eoi_exec,		NULL,		65536,		true},
313 	{"ipi",			ipi_prep,		ipi_exec,		NULL,		65536,		true},
314 	{"ipi_hw",		ipi_hw_prep,		ipi_exec,		NULL,		65536,		true},
315 	{"lpi",			lpi_prep,		lpi_exec,		NULL,		65536,		true},
316 	{"timer_10ms",		timer_prep,		timer_exec,		timer_post,	256,		true},
317 };
318 
319 struct ns_time {
320 	uint64_t ns;
321 	uint64_t ns_frac;
322 };
323 
324 #define PS_PER_SEC (1000 * 1000 * 1000 * 1000UL)
ticks_to_ns_time(uint64_t ticks,struct ns_time * ns_time)325 static void ticks_to_ns_time(uint64_t ticks, struct ns_time *ns_time)
326 {
327 	uint64_t ps_per_tick = PS_PER_SEC / cntfrq + !!(PS_PER_SEC % cntfrq);
328 	uint64_t ps;
329 
330 	ps = ticks * ps_per_tick;
331 	ns_time->ns = ps / 1000;
332 	ns_time->ns_frac = (ps % 1000) / 100;
333 }
334 
loop_test(struct exit_test * test)335 static void loop_test(struct exit_test *test)
336 {
337 	uint64_t start, end, total_ticks, ntimes = 0;
338 	struct ns_time avg_ns, total_ns = {};
339 
340 	total_ticks = 0;
341 	if (test->prep) {
342 		if(!test->prep()) {
343 			printf("%s test skipped\n", test->name);
344 			return;
345 		}
346 	}
347 
348 	dsb(ish);
349 	isb();
350 	start = read_sysreg(cntvct_el0);
351 	isb();
352 	while (ntimes < test->times) {
353 		test->exec();
354 
355 		ntimes++;
356 	}
357 	dsb(ish);
358 	isb();
359 	end = read_sysreg(cntvct_el0);
360 
361 	total_ticks = end - start;
362 	ticks_to_ns_time(total_ticks, &total_ns);
363 
364 	if (test->post) {
365 		test->post(ntimes, &total_ticks);
366 		ticks_to_ns_time(total_ticks, &total_ns);
367 	}
368 
369 	avg_ns.ns = total_ns.ns / ntimes;
370 	avg_ns.ns_frac = total_ns.ns_frac / ntimes;
371 
372 	printf("%-30s%15" PRId64 ".%-15" PRId64 "%15" PRId64 ".%-15" PRId64 "\n",
373 		test->name, total_ns.ns, total_ns.ns_frac, avg_ns.ns, avg_ns.ns_frac);
374 }
375 
parse_args(int argc,char ** argv)376 static void parse_args(int argc, char **argv)
377 {
378 	int i, len;
379 	long val;
380 
381 	for (i = 1; i < argc; ++i) {
382 		len = parse_keyval(argv[i], &val);
383 		if (len == -1)
384 			continue;
385 
386 		if (strncmp(argv[i], "mmio-addr", len) == 0) {
387 			mmio_addr = val;
388 			report_info("found mmio_addr=0x%lx", mmio_addr);
389 		}
390 	}
391 }
392 
main(int argc,char ** argv)393 int main(int argc, char **argv)
394 {
395 	int i;
396 
397 	parse_args(argc, argv);
398 
399 	if (!test_init())
400 		return 1;
401 
402 	printf("\n%-30s%18s%13s%18s%13s\n", "name", "total ns", "", "avg ns", "");
403 	for (i = 0 ; i < 92; ++i)
404 		printf("%c", '-');
405 	printf("\n");
406 	for (i = 0; i < ARRAY_SIZE(tests); i++) {
407 		if (!tests[i].run)
408 			continue;
409 		assert(tests[i].name && tests[i].exec);
410 		loop_test(&tests[i]);
411 	}
412 
413 	return 0;
414 }
415