xref: /kvm-unit-tests/arm/micro-bench.c (revision 1d0f08f40d53daa39566842ec46a112db5f7e524)
1 /*
2  * Measure the cost of micro level operations.
3  *
4  * This test provides support for quantifying the cost of micro level
5  * operations. To improve precision in the measurements, one should
6  * consider pinning each VCPU to a specific physical CPU (PCPU) and to
7  * ensure no other task could run on that PCPU to skew the results.
8  * This can be achieved by enabling QMP server in the QEMU command in
9  * unittest.cfg for micro-bench, allowing a client program to get the
10  * thread_id for each VCPU thread from the QMP server. Based on that
11  * information, the client program can then pin the corresponding VCPUs to
12  * dedicated PCPUs and isolate interrupts and tasks from those PCPUs.
13  *
14  * Copyright Columbia University
15  * Author: Shih-Wei Li <shihwei@cs.columbia.edu>
16  * Author: Christoffer Dall <cdall@cs.columbia.edu>
17  * Author: Andrew Jones <drjones@redhat.com>
18  *
19  * This work is licensed under the terms of the GNU LGPL, version 2.
20  */
21 #include <libcflat.h>
22 #include <util.h>
23 #include <asm/gic.h>
24 #include <asm/gic-v3-its.h>
25 #include <asm/timer.h>
26 
27 #define NS_5_SECONDS		(5 * 1000 * 1000 * 1000UL)
28 #define QEMU_MMIO_ADDR		0x0a000008
29 
30 static u32 cntfrq;
31 
32 static volatile bool irq_ready, irq_received;
33 static int nr_ipi_received;
34 static unsigned long mmio_addr = QEMU_MMIO_ADDR;
35 
36 static void *vgic_dist_base;
37 static void (*write_eoir)(u32 irqstat);
38 
39 static void gic_irq_handler(struct pt_regs *regs)
40 {
41 	u32 irqstat = gic_read_iar();
42 	irq_ready = false;
43 	irq_received = true;
44 	gic_write_eoir(irqstat);
45 
46 	if (irqstat == TIMER_VTIMER_IRQ) {
47 		write_sysreg((ARCH_TIMER_CTL_IMASK | ARCH_TIMER_CTL_ENABLE),
48 			     cntv_ctl_el0);
49 		isb();
50 	}
51 	irq_ready = true;
52 }
53 
54 static void gic_secondary_entry(void *data)
55 {
56 	install_irq_handler(EL1H_IRQ, gic_irq_handler);
57 	gic_enable_defaults();
58 	local_irq_enable();
59 	irq_ready = true;
60 	while (true)
61 		cpu_relax();
62 }
63 
64 static bool test_init(void)
65 {
66 	int v = gic_init();
67 
68 	if (!v) {
69 		printf("No supported gic present, skipping tests...\n");
70 		return false;
71 	}
72 
73 	if (nr_cpus < 2) {
74 		printf("At least two cpus required, skipping tests...\n");
75 		return false;
76 	}
77 
78 	switch (v) {
79 	case 2:
80 		vgic_dist_base = gicv2_dist_base();
81 		write_eoir = gicv2_write_eoir;
82 		break;
83 	case 3:
84 		vgic_dist_base = gicv3_dist_base();
85 		write_eoir = gicv3_write_eoir;
86 		break;
87 	}
88 
89 	irq_ready = false;
90 	gic_enable_defaults();
91 	on_cpu_async(1, gic_secondary_entry, NULL);
92 
93 	cntfrq = get_cntfrq();
94 	printf("Timer Frequency %d Hz (Output in microseconds)\n", cntfrq);
95 
96 	return true;
97 }
98 
99 static void gic_prep_common(void)
100 {
101 	unsigned tries = 1 << 28;
102 
103 	while (!irq_ready && tries--)
104 		cpu_relax();
105 	assert(irq_ready);
106 }
107 
108 static bool ipi_prep(void)
109 {
110 	u32 val;
111 
112 	val = readl(vgic_dist_base + GICD_CTLR);
113 	if (readl(vgic_dist_base + GICD_TYPER2) & GICD_TYPER2_nASSGIcap) {
114 		/* nASSGIreq can be changed only when GICD is disabled */
115 		val &= ~GICD_CTLR_ENABLE_G1A;
116 		val &= ~GICD_CTLR_nASSGIreq;
117 		writel(val, vgic_dist_base + GICD_CTLR);
118 		gicv3_dist_wait_for_rwp();
119 
120 		val |= GICD_CTLR_ENABLE_G1A;
121 		writel(val, vgic_dist_base + GICD_CTLR);
122 		gicv3_dist_wait_for_rwp();
123 	}
124 
125 	nr_ipi_received = 0;
126 	gic_prep_common();
127 	return true;
128 }
129 
130 static bool ipi_hw_prep(void)
131 {
132 	u32 val;
133 
134 	val = readl(vgic_dist_base + GICD_CTLR);
135 	if (readl(vgic_dist_base + GICD_TYPER2) & GICD_TYPER2_nASSGIcap) {
136 		/* nASSGIreq can be changed only when GICD is disabled */
137 		val &= ~GICD_CTLR_ENABLE_G1A;
138 		val |= GICD_CTLR_nASSGIreq;
139 		writel(val, vgic_dist_base + GICD_CTLR);
140 		gicv3_dist_wait_for_rwp();
141 
142 		val |= GICD_CTLR_ENABLE_G1A;
143 		writel(val, vgic_dist_base + GICD_CTLR);
144 		gicv3_dist_wait_for_rwp();
145 	} else {
146 		return false;
147 	}
148 
149 	nr_ipi_received = 0;
150 	gic_prep_common();
151 	return true;
152 }
153 
154 static void ipi_exec(void)
155 {
156 	unsigned tries = 1 << 28;
157 
158 	irq_received = false;
159 
160 	gic_ipi_send_single(1, 1);
161 
162 	while (!irq_received && tries--)
163 		cpu_relax();
164 
165 	if (irq_received)
166 		++nr_ipi_received;
167 
168 	assert_msg(irq_received, "failed to receive IPI in time, but received %d successfully\n", nr_ipi_received);
169 }
170 
171 static bool lpi_prep(void)
172 {
173 	struct its_collection *col1;
174 	struct its_device *dev2;
175 
176 	if (!gicv3_its_base())
177 		return false;
178 
179 	its_enable_defaults();
180 	dev2 = its_create_device(2 /* dev id */, 8 /* nb_ites */);
181 	col1 = its_create_collection(1 /* col id */, 1 /* target PE */);
182 	gicv3_lpi_set_config(8199, LPI_PROP_DEFAULT);
183 
184 	its_send_mapd_nv(dev2, true);
185 	its_send_mapc_nv(col1, true);
186 	its_send_invall_nv(col1);
187 	its_send_mapti_nv(dev2, 8199 /* lpi id */, 20 /* event id */, col1);
188 
189 	gic_prep_common();
190 	return true;
191 }
192 
193 static void lpi_exec(void)
194 {
195 	struct its_device *dev2;
196 	unsigned tries = 1 << 28;
197 	static int received = 0;
198 
199 	irq_received = false;
200 
201 	dev2 = its_get_device(2);
202 	its_send_int_nv(dev2, 20);
203 
204 	while (!irq_received && tries--)
205 		cpu_relax();
206 
207 	if (irq_received)
208 		++received;
209 
210 	assert_msg(irq_received, "failed to receive LPI in time, but received %d successfully\n", received);
211 }
212 
213 static bool timer_prep(void)
214 {
215 	gic_enable_defaults();
216 	install_irq_handler(EL1H_IRQ, gic_irq_handler);
217 	local_irq_enable();
218 
219 	gic_enable_irq(TIMER_VTIMER_IRQ);
220 	write_sysreg(ARCH_TIMER_CTL_IMASK | ARCH_TIMER_CTL_ENABLE, cntv_ctl_el0);
221 	isb();
222 
223 	gic_prep_common();
224 	return true;
225 }
226 
227 static void timer_exec(void)
228 {
229 	u64 before_timer;
230 	u64 timer_10ms;
231 	unsigned tries = 1 << 28;
232 	static int received = 0;
233 
234 	irq_received = false;
235 
236 	before_timer = read_sysreg(cntvct_el0);
237 	timer_10ms = cntfrq / 100;
238 	write_sysreg(before_timer + timer_10ms, cntv_cval_el0);
239 	write_sysreg(ARCH_TIMER_CTL_ENABLE, cntv_ctl_el0);
240 	isb();
241 
242 	while (!irq_received && tries--)
243 		cpu_relax();
244 
245 	if (irq_received)
246 		++received;
247 
248 	assert_msg(irq_received, "failed to receive PPI in time, but received %d successfully\n", received);
249 }
250 
251 static void timer_post(uint64_t ntimes, uint64_t *total_ticks)
252 {
253 	/*
254 	 * We use a 10msec timer to test the latency of PPI,
255 	 * so we subtract the ticks of 10msec to get the
256 	 * actual latency
257 	 */
258 	*total_ticks -= ntimes * (cntfrq / 100);
259 }
260 
261 static void hvc_exec(void)
262 {
263 	asm volatile("mov w0, #0x4b000000; hvc #0" ::: "w0");
264 }
265 
266 static void *userspace_emulated_addr;
267 
268 static bool mmio_read_user_prep(void)
269 {
270 	/*
271 	 * FIXME: We need an MMIO address that we can safely read to test
272 	 * exits to userspace. Ideally, the test-dev would provide us this
273 	 * address (and one we could write to too), but until it does we
274 	 * use a virtio-mmio transport address. FIXME2: We should be getting
275 	 * this address (and the future test-dev address) from the devicetree,
276 	 * but so far we lazily hardcode it.
277 	 */
278 	userspace_emulated_addr = (void *)ioremap(mmio_addr, sizeof(u32));
279 	return true;
280 }
281 
282 static void mmio_read_user_exec(void)
283 {
284 	readl(userspace_emulated_addr);
285 }
286 
287 static void mmio_read_vgic_exec(void)
288 {
289 	readl(vgic_dist_base + GICD_IIDR);
290 }
291 
292 static void eoi_exec(void)
293 {
294 	int spurious_id = 1023; /* writes to EOI are ignored */
295 
296 	/* Avoid measuring assert(..) in gic_write_eoir */
297 	write_eoir(spurious_id);
298 }
299 
300 struct exit_test {
301 	const char *name;
302 	bool (*prep)(void);
303 	void (*exec)(void);
304 	void (*post)(uint64_t ntimes, uint64_t *total_ticks);
305 	u32 times;
306 	bool run;
307 };
308 
309 static struct exit_test tests[] = {
310 	{"hvc",			NULL,			hvc_exec,		NULL,		65536,		true},
311 	{"mmio_read_user",	mmio_read_user_prep,	mmio_read_user_exec,	NULL,		65536,		true},
312 	{"mmio_read_vgic",	NULL,			mmio_read_vgic_exec,	NULL,		65536,		true},
313 	{"eoi",			NULL,			eoi_exec,		NULL,		65536,		true},
314 	{"ipi",			ipi_prep,		ipi_exec,		NULL,		65536,		true},
315 	{"ipi_hw",		ipi_hw_prep,		ipi_exec,		NULL,		65536,		true},
316 	{"lpi",			lpi_prep,		lpi_exec,		NULL,		65536,		true},
317 	{"timer_10ms",		timer_prep,		timer_exec,		timer_post,	256,		true},
318 };
319 
320 struct ns_time {
321 	uint64_t ns;
322 	uint64_t ns_frac;
323 };
324 
325 #define PS_PER_SEC (1000 * 1000 * 1000 * 1000UL)
326 static void ticks_to_ns_time(uint64_t ticks, struct ns_time *ns_time)
327 {
328 	uint64_t ps_per_tick = PS_PER_SEC / cntfrq + !!(PS_PER_SEC % cntfrq);
329 	uint64_t ps;
330 
331 	ps = ticks * ps_per_tick;
332 	ns_time->ns = ps / 1000;
333 	ns_time->ns_frac = (ps % 1000) / 100;
334 }
335 
336 static void loop_test(struct exit_test *test)
337 {
338 	uint64_t start, end, total_ticks, ntimes = 0;
339 	struct ns_time avg_ns, total_ns = {};
340 
341 	total_ticks = 0;
342 	if (test->prep) {
343 		if(!test->prep()) {
344 			printf("%s test skipped\n", test->name);
345 			return;
346 		}
347 	}
348 
349 	while (ntimes < test->times && total_ns.ns < NS_5_SECONDS) {
350 		isb();
351 		start = read_sysreg(cntpct_el0);
352 		test->exec();
353 		isb();
354 		end = read_sysreg(cntpct_el0);
355 
356 		ntimes++;
357 		total_ticks += (end - start);
358 		ticks_to_ns_time(total_ticks, &total_ns);
359 	}
360 
361 	if (test->post) {
362 		test->post(ntimes, &total_ticks);
363 		ticks_to_ns_time(total_ticks, &total_ns);
364 	}
365 
366 	avg_ns.ns = total_ns.ns / ntimes;
367 	avg_ns.ns_frac = total_ns.ns_frac / ntimes;
368 
369 	printf("%-30s%15" PRId64 ".%-15" PRId64 "%15" PRId64 ".%-15" PRId64 "\n",
370 		test->name, total_ns.ns, total_ns.ns_frac, avg_ns.ns, avg_ns.ns_frac);
371 }
372 
373 static void parse_args(int argc, char **argv)
374 {
375 	int i, len;
376 	long val;
377 
378 	for (i = 1; i < argc; ++i) {
379 		len = parse_keyval(argv[i], &val);
380 		if (len == -1)
381 			continue;
382 
383 		if (strncmp(argv[i], "mmio-addr", len) == 0) {
384 			mmio_addr = val;
385 			report_info("found mmio_addr=0x%lx", mmio_addr);
386 		}
387 	}
388 }
389 
390 int main(int argc, char **argv)
391 {
392 	int i;
393 
394 	parse_args(argc, argv);
395 
396 	if (!test_init())
397 		return 1;
398 
399 	printf("\n%-30s%18s%13s%18s%13s\n", "name", "total ns", "", "avg ns", "");
400 	for (i = 0 ; i < 92; ++i)
401 		printf("%c", '-');
402 	printf("\n");
403 	for (i = 0; i < ARRAY_SIZE(tests); i++) {
404 		if (!tests[i].run)
405 			continue;
406 		assert(tests[i].name && tests[i].exec);
407 		loop_test(&tests[i]);
408 	}
409 
410 	return 0;
411 }
412