xref: /kvm-unit-tests/arm/micro-bench.c (revision b8d5a5b0ace6ef70d8f27b02ddab30f4e837902c)
1 /*
2  * Measure the cost of micro level operations.
3  *
4  * This test provides support for quantifying the cost of micro level
5  * operations. To improve precision in the measurements, one should
6  * consider pinning each VCPU to a specific physical CPU (PCPU) and to
7  * ensure no other task could run on that PCPU to skew the results.
8  * This can be achieved by enabling QMP server in the QEMU command in
9  * unittest.cfg for micro-bench, allowing a client program to get the
10  * thread_id for each VCPU thread from the QMP server. Based on that
11  * information, the client program can then pin the corresponding VCPUs to
12  * dedicated PCPUs and isolate interrupts and tasks from those PCPUs.
13  *
14  * Copyright Columbia University
15  * Author: Shih-Wei Li <shihwei@cs.columbia.edu>
16  * Author: Christoffer Dall <cdall@cs.columbia.edu>
17  * Author: Andrew Jones <drjones@redhat.com>
18  *
19  * This work is licensed under the terms of the GNU LGPL, version 2.
20  */
21 #include <libcflat.h>
22 #include <asm/gic.h>
23 #include <asm/gic-v3-its.h>
24 
25 static u32 cntfrq;
26 
27 static volatile bool irq_ready, irq_received;
28 static int nr_ipi_received;
29 
30 static void *vgic_dist_base;
31 static void (*write_eoir)(u32 irqstat);
32 
33 static void gic_irq_handler(struct pt_regs *regs)
34 {
35 	irq_ready = false;
36 	irq_received = true;
37 	gic_write_eoir(gic_read_iar());
38 	irq_ready = true;
39 }
40 
41 static void gic_secondary_entry(void *data)
42 {
43 	install_irq_handler(EL1H_IRQ, gic_irq_handler);
44 	gic_enable_defaults();
45 	local_irq_enable();
46 	irq_ready = true;
47 	while (true)
48 		cpu_relax();
49 }
50 
51 static bool test_init(void)
52 {
53 	int v = gic_init();
54 
55 	if (!v) {
56 		printf("No supported gic present, skipping tests...\n");
57 		return false;
58 	}
59 
60 	if (nr_cpus < 2) {
61 		printf("At least two cpus required, skipping tests...\n");
62 		return false;
63 	}
64 
65 	switch (v) {
66 	case 2:
67 		vgic_dist_base = gicv2_dist_base();
68 		write_eoir = gicv2_write_eoir;
69 		break;
70 	case 3:
71 		vgic_dist_base = gicv3_dist_base();
72 		write_eoir = gicv3_write_eoir;
73 		break;
74 	}
75 
76 	irq_ready = false;
77 	gic_enable_defaults();
78 	on_cpu_async(1, gic_secondary_entry, NULL);
79 
80 	cntfrq = get_cntfrq();
81 	printf("Timer Frequency %d Hz (Output in microseconds)\n", cntfrq);
82 
83 	return true;
84 }
85 
86 static void gic_prep_common(void)
87 {
88 	unsigned tries = 1 << 28;
89 
90 	while (!irq_ready && tries--)
91 		cpu_relax();
92 	assert(irq_ready);
93 }
94 
95 static bool ipi_prep(void)
96 {
97 	u32 val;
98 
99 	val = readl(vgic_dist_base + GICD_CTLR);
100 	if (readl(vgic_dist_base + GICD_TYPER2) & GICD_TYPER2_nASSGIcap) {
101 		/* nASSGIreq can be changed only when GICD is disabled */
102 		val &= ~GICD_CTLR_ENABLE_G1A;
103 		val &= ~GICD_CTLR_nASSGIreq;
104 		writel(val, vgic_dist_base + GICD_CTLR);
105 		gicv3_dist_wait_for_rwp();
106 
107 		val |= GICD_CTLR_ENABLE_G1A;
108 		writel(val, vgic_dist_base + GICD_CTLR);
109 		gicv3_dist_wait_for_rwp();
110 	}
111 
112 	nr_ipi_received = 0;
113 	gic_prep_common();
114 	return true;
115 }
116 
117 static bool ipi_hw_prep(void)
118 {
119 	u32 val;
120 
121 	val = readl(vgic_dist_base + GICD_CTLR);
122 	if (readl(vgic_dist_base + GICD_TYPER2) & GICD_TYPER2_nASSGIcap) {
123 		/* nASSGIreq can be changed only when GICD is disabled */
124 		val &= ~GICD_CTLR_ENABLE_G1A;
125 		val |= GICD_CTLR_nASSGIreq;
126 		writel(val, vgic_dist_base + GICD_CTLR);
127 		gicv3_dist_wait_for_rwp();
128 
129 		val |= GICD_CTLR_ENABLE_G1A;
130 		writel(val, vgic_dist_base + GICD_CTLR);
131 		gicv3_dist_wait_for_rwp();
132 	} else {
133 		return false;
134 	}
135 
136 	nr_ipi_received = 0;
137 	gic_prep_common();
138 	return true;
139 }
140 
141 static void ipi_exec(void)
142 {
143 	unsigned tries = 1 << 28;
144 
145 	irq_received = false;
146 
147 	gic_ipi_send_single(1, 1);
148 
149 	while (!irq_received && tries--)
150 		cpu_relax();
151 
152 	if (irq_received)
153 		++nr_ipi_received;
154 
155 	assert_msg(irq_received, "failed to receive IPI in time, but received %d successfully\n", nr_ipi_received);
156 }
157 
158 static bool lpi_prep(void)
159 {
160 	struct its_collection *col1;
161 	struct its_device *dev2;
162 
163 	if (!gicv3_its_base())
164 		return false;
165 
166 	its_enable_defaults();
167 	dev2 = its_create_device(2 /* dev id */, 8 /* nb_ites */);
168 	col1 = its_create_collection(1 /* col id */, 1 /* target PE */);
169 	gicv3_lpi_set_config(8199, LPI_PROP_DEFAULT);
170 
171 	its_send_mapd_nv(dev2, true);
172 	its_send_mapc_nv(col1, true);
173 	its_send_invall_nv(col1);
174 	its_send_mapti_nv(dev2, 8199 /* lpi id */, 20 /* event id */, col1);
175 
176 	gic_prep_common();
177 	return true;
178 }
179 
180 static void lpi_exec(void)
181 {
182 	struct its_device *dev2;
183 	unsigned tries = 1 << 28;
184 	static int received = 0;
185 
186 	irq_received = false;
187 
188 	dev2 = its_get_device(2);
189 	its_send_int_nv(dev2, 20);
190 
191 	while (!irq_received && tries--)
192 		cpu_relax();
193 
194 	if (irq_received)
195 		++received;
196 
197 	assert_msg(irq_received, "failed to receive LPI in time, but received %d successfully\n", received);
198 }
199 
200 static void hvc_exec(void)
201 {
202 	asm volatile("mov w0, #0x4b000000; hvc #0" ::: "w0");
203 }
204 
205 static void mmio_read_user_exec(void)
206 {
207 	/*
208 	 * FIXME: Read device-id in virtio mmio here in order to
209 	 * force an exit to userspace. This address needs to be
210 	 * updated in the future if any relevant changes in QEMU
211 	 * test-dev are made.
212 	 */
213 	void *userspace_emulated_addr = (void*)0x0a000008;
214 
215 	readl(userspace_emulated_addr);
216 }
217 
218 static void mmio_read_vgic_exec(void)
219 {
220 	readl(vgic_dist_base + GICD_IIDR);
221 }
222 
223 static void eoi_exec(void)
224 {
225 	int spurious_id = 1023; /* writes to EOI are ignored */
226 
227 	/* Avoid measuring assert(..) in gic_write_eoir */
228 	write_eoir(spurious_id);
229 }
230 
231 struct exit_test {
232 	const char *name;
233 	bool (*prep)(void);
234 	void (*exec)(void);
235 	u32 times;
236 	bool run;
237 };
238 
239 static struct exit_test tests[] = {
240 	{"hvc",			NULL,		hvc_exec,		65536,		true},
241 	{"mmio_read_user",	NULL,		mmio_read_user_exec,	65536,		true},
242 	{"mmio_read_vgic",	NULL,		mmio_read_vgic_exec,	65536,		true},
243 	{"eoi",			NULL,		eoi_exec,		65536,		true},
244 	{"ipi",			ipi_prep,	ipi_exec,		65536,		true},
245 	{"ipi_hw",		ipi_hw_prep,	ipi_exec,		65536,		true},
246 	{"lpi",			lpi_prep,	lpi_exec,		65536,		true},
247 };
248 
249 struct ns_time {
250 	uint64_t ns;
251 	uint64_t ns_frac;
252 };
253 
254 #define PS_PER_SEC (1000 * 1000 * 1000 * 1000UL)
255 static void ticks_to_ns_time(uint64_t ticks, struct ns_time *ns_time)
256 {
257 	uint64_t ps_per_tick = PS_PER_SEC / cntfrq + !!(PS_PER_SEC % cntfrq);
258 	uint64_t ps;
259 
260 	ps = ticks * ps_per_tick;
261 	ns_time->ns = ps / 1000;
262 	ns_time->ns_frac = (ps % 1000) / 100;
263 }
264 
265 static void loop_test(struct exit_test *test)
266 {
267 	uint64_t start, end, total_ticks, ntimes = 0;
268 	struct ns_time total_ns, avg_ns;
269 
270 	if (test->prep) {
271 		if(!test->prep()) {
272 			printf("%s test skipped\n", test->name);
273 			return;
274 		}
275 	}
276 	isb();
277 	start = read_sysreg(cntpct_el0);
278 	while (ntimes < test->times) {
279 		test->exec();
280 		ntimes++;
281 	}
282 	isb();
283 	end = read_sysreg(cntpct_el0);
284 
285 	total_ticks = end - start;
286 	ticks_to_ns_time(total_ticks, &total_ns);
287 	avg_ns.ns = total_ns.ns / ntimes;
288 	avg_ns.ns_frac = total_ns.ns_frac / ntimes;
289 
290 	printf("%-30s%15" PRId64 ".%-15" PRId64 "%15" PRId64 ".%-15" PRId64 "\n",
291 		test->name, total_ns.ns, total_ns.ns_frac, avg_ns.ns, avg_ns.ns_frac);
292 }
293 
294 int main(int argc, char **argv)
295 {
296 	int i;
297 
298 	if (!test_init())
299 		return 1;
300 
301 	printf("\n%-30s%18s%13s%18s%13s\n", "name", "total ns", "", "avg ns", "");
302 	for (i = 0 ; i < 92; ++i)
303 		printf("%c", '-');
304 	printf("\n");
305 	for (i = 0; i < ARRAY_SIZE(tests); i++) {
306 		if (!tests[i].run)
307 			continue;
308 		assert(tests[i].name && tests[i].exec);
309 		loop_test(&tests[i]);
310 	}
311 
312 	return 0;
313 }
314