xref: /kvm-unit-tests/arm/micro-bench.c (revision be704aff683c54fc108deaafacc7cb89ad0648d9)
1 /*
2  * Measure the cost of micro level operations.
3  *
4  * This test provides support for quantifying the cost of micro level
5  * operations. To improve precision in the measurements, one should
6  * consider pinning each VCPU to a specific physical CPU (PCPU) and to
7  * ensure no other task could run on that PCPU to skew the results.
8  * This can be achieved by enabling QMP server in the QEMU command in
9  * unittest.cfg for micro-bench, allowing a client program to get the
10  * thread_id for each VCPU thread from the QMP server. Based on that
11  * information, the client program can then pin the corresponding VCPUs to
12  * dedicated PCPUs and isolate interrupts and tasks from those PCPUs.
13  *
14  * Copyright Columbia University
15  * Author: Shih-Wei Li <shihwei@cs.columbia.edu>
16  * Author: Christoffer Dall <cdall@cs.columbia.edu>
17  * Author: Andrew Jones <drjones@redhat.com>
18  *
19  * This work is licensed under the terms of the GNU LGPL, version 2.
20  */
21 #include <libcflat.h>
22 #include <asm/gic.h>
23 #include <asm/gic-v3-its.h>
24 #include <asm/timer.h>
25 
26 #define NS_5_SECONDS (5 * 1000 * 1000 * 1000UL)
27 
28 static u32 cntfrq;
29 
30 static volatile bool irq_ready, irq_received;
31 static int nr_ipi_received;
32 
33 static void *vgic_dist_base;
34 static void (*write_eoir)(u32 irqstat);
35 
36 static void gic_irq_handler(struct pt_regs *regs)
37 {
38 	u32 irqstat = gic_read_iar();
39 	irq_ready = false;
40 	irq_received = true;
41 	gic_write_eoir(irqstat);
42 
43 	if (irqstat == PPI(TIMER_VTIMER_IRQ)) {
44 		write_sysreg((ARCH_TIMER_CTL_IMASK | ARCH_TIMER_CTL_ENABLE),
45 			     cntv_ctl_el0);
46 		isb();
47 	}
48 	irq_ready = true;
49 }
50 
51 static void gic_secondary_entry(void *data)
52 {
53 	install_irq_handler(EL1H_IRQ, gic_irq_handler);
54 	gic_enable_defaults();
55 	local_irq_enable();
56 	irq_ready = true;
57 	while (true)
58 		cpu_relax();
59 }
60 
61 static bool test_init(void)
62 {
63 	int v = gic_init();
64 
65 	if (!v) {
66 		printf("No supported gic present, skipping tests...\n");
67 		return false;
68 	}
69 
70 	if (nr_cpus < 2) {
71 		printf("At least two cpus required, skipping tests...\n");
72 		return false;
73 	}
74 
75 	switch (v) {
76 	case 2:
77 		vgic_dist_base = gicv2_dist_base();
78 		write_eoir = gicv2_write_eoir;
79 		break;
80 	case 3:
81 		vgic_dist_base = gicv3_dist_base();
82 		write_eoir = gicv3_write_eoir;
83 		break;
84 	}
85 
86 	irq_ready = false;
87 	gic_enable_defaults();
88 	on_cpu_async(1, gic_secondary_entry, NULL);
89 
90 	cntfrq = get_cntfrq();
91 	printf("Timer Frequency %d Hz (Output in microseconds)\n", cntfrq);
92 
93 	return true;
94 }
95 
96 static void gic_prep_common(void)
97 {
98 	unsigned tries = 1 << 28;
99 
100 	while (!irq_ready && tries--)
101 		cpu_relax();
102 	assert(irq_ready);
103 }
104 
105 static bool ipi_prep(void)
106 {
107 	u32 val;
108 
109 	val = readl(vgic_dist_base + GICD_CTLR);
110 	if (readl(vgic_dist_base + GICD_TYPER2) & GICD_TYPER2_nASSGIcap) {
111 		/* nASSGIreq can be changed only when GICD is disabled */
112 		val &= ~GICD_CTLR_ENABLE_G1A;
113 		val &= ~GICD_CTLR_nASSGIreq;
114 		writel(val, vgic_dist_base + GICD_CTLR);
115 		gicv3_dist_wait_for_rwp();
116 
117 		val |= GICD_CTLR_ENABLE_G1A;
118 		writel(val, vgic_dist_base + GICD_CTLR);
119 		gicv3_dist_wait_for_rwp();
120 	}
121 
122 	nr_ipi_received = 0;
123 	gic_prep_common();
124 	return true;
125 }
126 
127 static bool ipi_hw_prep(void)
128 {
129 	u32 val;
130 
131 	val = readl(vgic_dist_base + GICD_CTLR);
132 	if (readl(vgic_dist_base + GICD_TYPER2) & GICD_TYPER2_nASSGIcap) {
133 		/* nASSGIreq can be changed only when GICD is disabled */
134 		val &= ~GICD_CTLR_ENABLE_G1A;
135 		val |= GICD_CTLR_nASSGIreq;
136 		writel(val, vgic_dist_base + GICD_CTLR);
137 		gicv3_dist_wait_for_rwp();
138 
139 		val |= GICD_CTLR_ENABLE_G1A;
140 		writel(val, vgic_dist_base + GICD_CTLR);
141 		gicv3_dist_wait_for_rwp();
142 	} else {
143 		return false;
144 	}
145 
146 	nr_ipi_received = 0;
147 	gic_prep_common();
148 	return true;
149 }
150 
151 static void ipi_exec(void)
152 {
153 	unsigned tries = 1 << 28;
154 
155 	irq_received = false;
156 
157 	gic_ipi_send_single(1, 1);
158 
159 	while (!irq_received && tries--)
160 		cpu_relax();
161 
162 	if (irq_received)
163 		++nr_ipi_received;
164 
165 	assert_msg(irq_received, "failed to receive IPI in time, but received %d successfully\n", nr_ipi_received);
166 }
167 
168 static bool lpi_prep(void)
169 {
170 	struct its_collection *col1;
171 	struct its_device *dev2;
172 
173 	if (!gicv3_its_base())
174 		return false;
175 
176 	its_enable_defaults();
177 	dev2 = its_create_device(2 /* dev id */, 8 /* nb_ites */);
178 	col1 = its_create_collection(1 /* col id */, 1 /* target PE */);
179 	gicv3_lpi_set_config(8199, LPI_PROP_DEFAULT);
180 
181 	its_send_mapd_nv(dev2, true);
182 	its_send_mapc_nv(col1, true);
183 	its_send_invall_nv(col1);
184 	its_send_mapti_nv(dev2, 8199 /* lpi id */, 20 /* event id */, col1);
185 
186 	gic_prep_common();
187 	return true;
188 }
189 
190 static void lpi_exec(void)
191 {
192 	struct its_device *dev2;
193 	unsigned tries = 1 << 28;
194 	static int received = 0;
195 
196 	irq_received = false;
197 
198 	dev2 = its_get_device(2);
199 	its_send_int_nv(dev2, 20);
200 
201 	while (!irq_received && tries--)
202 		cpu_relax();
203 
204 	if (irq_received)
205 		++received;
206 
207 	assert_msg(irq_received, "failed to receive LPI in time, but received %d successfully\n", received);
208 }
209 
210 static bool timer_prep(void)
211 {
212 	void *gic_isenabler;
213 
214 	gic_enable_defaults();
215 	install_irq_handler(EL1H_IRQ, gic_irq_handler);
216 	local_irq_enable();
217 
218 	switch (gic_version()) {
219 	case 2:
220 		gic_isenabler = gicv2_dist_base() + GICD_ISENABLER;
221 		break;
222 	case 3:
223 		gic_isenabler = gicv3_sgi_base() + GICR_ISENABLER0;
224 		break;
225 	default:
226 		assert_msg(0, "Unreachable");
227 	}
228 
229 	writel(1 << PPI(TIMER_VTIMER_IRQ), gic_isenabler);
230 	write_sysreg(ARCH_TIMER_CTL_ENABLE, cntv_ctl_el0);
231 	isb();
232 
233 	gic_prep_common();
234 	return true;
235 }
236 
237 static void timer_exec(void)
238 {
239 	u64 before_timer;
240 	u64 timer_10ms;
241 	unsigned tries = 1 << 28;
242 	static int received = 0;
243 
244 	irq_received = false;
245 
246 	before_timer = read_sysreg(cntvct_el0);
247 	timer_10ms = cntfrq / 100;
248 	write_sysreg(before_timer + timer_10ms, cntv_cval_el0);
249 	write_sysreg(ARCH_TIMER_CTL_ENABLE, cntv_ctl_el0);
250 	isb();
251 
252 	while (!irq_received && tries--)
253 		cpu_relax();
254 
255 	if (irq_received)
256 		++received;
257 
258 	assert_msg(irq_received, "failed to receive PPI in time, but received %d successfully\n", received);
259 }
260 
261 static void timer_post(uint64_t ntimes, uint64_t *total_ticks)
262 {
263 	/*
264 	 * We use a 10msec timer to test the latency of PPI,
265 	 * so we substract the ticks of 10msec to get the
266 	 * actual latency
267 	 */
268 	*total_ticks -= ntimes * (cntfrq / 100);
269 }
270 
271 static void hvc_exec(void)
272 {
273 	asm volatile("mov w0, #0x4b000000; hvc #0" ::: "w0");
274 }
275 
276 static void mmio_read_user_exec(void)
277 {
278 	/*
279 	 * FIXME: Read device-id in virtio mmio here in order to
280 	 * force an exit to userspace. This address needs to be
281 	 * updated in the future if any relevant changes in QEMU
282 	 * test-dev are made.
283 	 */
284 	void *userspace_emulated_addr = (void*)0x0a000008;
285 
286 	readl(userspace_emulated_addr);
287 }
288 
289 static void mmio_read_vgic_exec(void)
290 {
291 	readl(vgic_dist_base + GICD_IIDR);
292 }
293 
294 static void eoi_exec(void)
295 {
296 	int spurious_id = 1023; /* writes to EOI are ignored */
297 
298 	/* Avoid measuring assert(..) in gic_write_eoir */
299 	write_eoir(spurious_id);
300 }
301 
302 struct exit_test {
303 	const char *name;
304 	bool (*prep)(void);
305 	void (*exec)(void);
306 	void (*post)(uint64_t ntimes, uint64_t *total_ticks);
307 	u32 times;
308 	bool run;
309 };
310 
311 static struct exit_test tests[] = {
312 	{"hvc",			NULL,		hvc_exec,		NULL,		65536,		true},
313 	{"mmio_read_user",	NULL,		mmio_read_user_exec,	NULL,		65536,		true},
314 	{"mmio_read_vgic",	NULL,		mmio_read_vgic_exec,	NULL,		65536,		true},
315 	{"eoi",			NULL,		eoi_exec,		NULL,		65536,		true},
316 	{"ipi",			ipi_prep,	ipi_exec,		NULL,		65536,		true},
317 	{"ipi_hw",		ipi_hw_prep,	ipi_exec,		NULL,		65536,		true},
318 	{"lpi",			lpi_prep,	lpi_exec,		NULL,		65536,		true},
319 	{"timer_10ms",		timer_prep,	timer_exec,		timer_post,	256,		true},
320 };
321 
322 struct ns_time {
323 	uint64_t ns;
324 	uint64_t ns_frac;
325 };
326 
327 #define PS_PER_SEC (1000 * 1000 * 1000 * 1000UL)
328 static void ticks_to_ns_time(uint64_t ticks, struct ns_time *ns_time)
329 {
330 	uint64_t ps_per_tick = PS_PER_SEC / cntfrq + !!(PS_PER_SEC % cntfrq);
331 	uint64_t ps;
332 
333 	ps = ticks * ps_per_tick;
334 	ns_time->ns = ps / 1000;
335 	ns_time->ns_frac = (ps % 1000) / 100;
336 }
337 
338 static void loop_test(struct exit_test *test)
339 {
340 	uint64_t start, end, total_ticks, ntimes = 0;
341 	struct ns_time avg_ns, total_ns = {};
342 
343 	total_ticks = 0;
344 	if (test->prep) {
345 		if(!test->prep()) {
346 			printf("%s test skipped\n", test->name);
347 			return;
348 		}
349 	}
350 
351 	while (ntimes < test->times && total_ns.ns < NS_5_SECONDS) {
352 		isb();
353 		start = read_sysreg(cntpct_el0);
354 		test->exec();
355 		isb();
356 		end = read_sysreg(cntpct_el0);
357 
358 		ntimes++;
359 		total_ticks += (end - start);
360 		ticks_to_ns_time(total_ticks, &total_ns);
361 	}
362 
363 	if (test->post) {
364 		test->post(ntimes, &total_ticks);
365 		ticks_to_ns_time(total_ticks, &total_ns);
366 	}
367 
368 	avg_ns.ns = total_ns.ns / ntimes;
369 	avg_ns.ns_frac = total_ns.ns_frac / ntimes;
370 
371 	printf("%-30s%15" PRId64 ".%-15" PRId64 "%15" PRId64 ".%-15" PRId64 "\n",
372 		test->name, total_ns.ns, total_ns.ns_frac, avg_ns.ns, avg_ns.ns_frac);
373 }
374 
375 int main(int argc, char **argv)
376 {
377 	int i;
378 
379 	if (!test_init())
380 		return 1;
381 
382 	printf("\n%-30s%18s%13s%18s%13s\n", "name", "total ns", "", "avg ns", "");
383 	for (i = 0 ; i < 92; ++i)
384 		printf("%c", '-');
385 	printf("\n");
386 	for (i = 0; i < ARRAY_SIZE(tests); i++) {
387 		if (!tests[i].run)
388 			continue;
389 		assert(tests[i].name && tests[i].exec);
390 		loop_test(&tests[i]);
391 	}
392 
393 	return 0;
394 }
395