1 /*
2 * Measure the cost of micro level operations.
3 *
4 * This test provides support for quantifying the cost of micro level
5 * operations. To improve precision in the measurements, one should
6 * consider pinning each VCPU to a specific physical CPU (PCPU) and to
7 * ensure no other task could run on that PCPU to skew the results.
8 * This can be achieved by enabling QMP server in the QEMU command in
9 * unittest.cfg for micro-bench, allowing a client program to get the
10 * thread_id for each VCPU thread from the QMP server. Based on that
11 * information, the client program can then pin the corresponding VCPUs to
12 * dedicated PCPUs and isolate interrupts and tasks from those PCPUs.
13 *
14 * Copyright Columbia University
15 * Author: Shih-Wei Li <shihwei@cs.columbia.edu>
16 * Author: Christoffer Dall <cdall@cs.columbia.edu>
17 * Author: Andrew Jones <drjones@redhat.com>
18 *
19 * This work is licensed under the terms of the GNU LGPL, version 2.
20 */
21 #include <libcflat.h>
22 #include <util.h>
23 #include <asm/gic.h>
24 #include <asm/gic-v3-its.h>
25 #include <asm/timer.h>
26
27 #define QEMU_MMIO_ADDR 0x0a000008
28
29 static u32 cntfrq;
30
31 static volatile bool irq_ready, irq_received;
32 static int nr_ipi_received;
33 static unsigned long mmio_addr = QEMU_MMIO_ADDR;
34
35 static void *vgic_dist_base;
36 static void (*write_eoir)(u32 irqstat);
37
gic_irq_handler(struct pt_regs * regs)38 static void gic_irq_handler(struct pt_regs *regs)
39 {
40 u32 irqstat = gic_read_iar();
41 irq_ready = false;
42 irq_received = true;
43 gic_write_eoir(irqstat);
44
45 if (irqstat == TIMER_VTIMER_IRQ) {
46 write_sysreg((ARCH_TIMER_CTL_IMASK | ARCH_TIMER_CTL_ENABLE),
47 cntv_ctl_el0);
48 isb();
49 }
50 irq_ready = true;
51 }
52
gic_secondary_entry(void * data)53 static void gic_secondary_entry(void *data)
54 {
55 install_irq_handler(EL1H_IRQ, gic_irq_handler);
56 gic_enable_defaults();
57 local_irq_enable();
58 irq_ready = true;
59 while (true)
60 cpu_relax();
61 }
62
test_init(void)63 static bool test_init(void)
64 {
65 int v = gic_init();
66
67 if (!v) {
68 printf("No supported gic present, skipping tests...\n");
69 return false;
70 }
71
72 if (nr_cpus < 2) {
73 printf("At least two cpus required, skipping tests...\n");
74 return false;
75 }
76
77 switch (v) {
78 case 2:
79 vgic_dist_base = gicv2_dist_base();
80 write_eoir = gicv2_write_eoir;
81 break;
82 case 3:
83 vgic_dist_base = gicv3_dist_base();
84 write_eoir = gicv3_write_eoir;
85 break;
86 }
87
88 irq_ready = false;
89 gic_enable_defaults();
90 on_cpu_async(1, gic_secondary_entry, NULL);
91
92 cntfrq = get_cntfrq();
93 printf("Timer Frequency %d Hz (Output in microseconds)\n", cntfrq);
94
95 return true;
96 }
97
gic_prep_common(void)98 static void gic_prep_common(void)
99 {
100 unsigned tries = 1 << 28;
101
102 while (!irq_ready && tries--)
103 cpu_relax();
104 assert(irq_ready);
105 }
106
ipi_prep(void)107 static bool ipi_prep(void)
108 {
109 u32 val;
110
111 val = readl(vgic_dist_base + GICD_CTLR);
112 if (readl(vgic_dist_base + GICD_TYPER2) & GICD_TYPER2_nASSGIcap) {
113 /* nASSGIreq can be changed only when GICD is disabled */
114 val &= ~GICD_CTLR_ENABLE_G1A;
115 val &= ~GICD_CTLR_nASSGIreq;
116 writel(val, vgic_dist_base + GICD_CTLR);
117 gicv3_dist_wait_for_rwp();
118
119 val |= GICD_CTLR_ENABLE_G1A;
120 writel(val, vgic_dist_base + GICD_CTLR);
121 gicv3_dist_wait_for_rwp();
122 }
123
124 nr_ipi_received = 0;
125 gic_prep_common();
126 return true;
127 }
128
ipi_hw_prep(void)129 static bool ipi_hw_prep(void)
130 {
131 u32 val;
132
133 val = readl(vgic_dist_base + GICD_CTLR);
134 if (readl(vgic_dist_base + GICD_TYPER2) & GICD_TYPER2_nASSGIcap) {
135 /* nASSGIreq can be changed only when GICD is disabled */
136 val &= ~GICD_CTLR_ENABLE_G1A;
137 val |= GICD_CTLR_nASSGIreq;
138 writel(val, vgic_dist_base + GICD_CTLR);
139 gicv3_dist_wait_for_rwp();
140
141 val |= GICD_CTLR_ENABLE_G1A;
142 writel(val, vgic_dist_base + GICD_CTLR);
143 gicv3_dist_wait_for_rwp();
144 } else {
145 return false;
146 }
147
148 nr_ipi_received = 0;
149 gic_prep_common();
150 return true;
151 }
152
ipi_exec(void)153 static void ipi_exec(void)
154 {
155 unsigned tries = 1 << 28;
156
157 irq_received = false;
158
159 gic_ipi_send_single(1, 1);
160
161 while (!irq_received && tries--)
162 cpu_relax();
163
164 if (irq_received)
165 ++nr_ipi_received;
166
167 assert_msg(irq_received, "failed to receive IPI in time, but received %d successfully\n", nr_ipi_received);
168 }
169
lpi_prep(void)170 static bool lpi_prep(void)
171 {
172 struct its_collection *col1;
173 struct its_device *dev2;
174
175 if (!gicv3_its_base())
176 return false;
177
178 its_enable_defaults();
179 dev2 = its_create_device(2 /* dev id */, 8 /* nb_ites */);
180 col1 = its_create_collection(1 /* col id */, 1 /* target PE */);
181 gicv3_lpi_set_config(8199, LPI_PROP_DEFAULT);
182
183 its_send_mapd_nv(dev2, true);
184 its_send_mapc_nv(col1, true);
185 its_send_invall_nv(col1);
186 its_send_mapti_nv(dev2, 8199 /* lpi id */, 20 /* event id */, col1);
187
188 gic_prep_common();
189 return true;
190 }
191
lpi_exec(void)192 static void lpi_exec(void)
193 {
194 struct its_device *dev2;
195 unsigned tries = 1 << 28;
196 static int received = 0;
197
198 irq_received = false;
199
200 dev2 = its_get_device(2);
201 its_send_int_nv(dev2, 20);
202
203 while (!irq_received && tries--)
204 cpu_relax();
205
206 if (irq_received)
207 ++received;
208
209 assert_msg(irq_received, "failed to receive LPI in time, but received %d successfully\n", received);
210 }
211
timer_prep(void)212 static bool timer_prep(void)
213 {
214 gic_enable_defaults();
215 install_irq_handler(EL1H_IRQ, gic_irq_handler);
216 local_irq_enable();
217
218 gic_enable_irq(TIMER_VTIMER_IRQ);
219 write_sysreg(ARCH_TIMER_CTL_IMASK | ARCH_TIMER_CTL_ENABLE, cntv_ctl_el0);
220 isb();
221
222 gic_prep_common();
223 return true;
224 }
225
timer_exec(void)226 static void timer_exec(void)
227 {
228 u64 before_timer;
229 u64 timer_10ms;
230 unsigned tries = 1 << 28;
231 static int received = 0;
232
233 irq_received = false;
234
235 before_timer = read_sysreg(cntvct_el0);
236 timer_10ms = cntfrq / 100;
237 write_sysreg(before_timer + timer_10ms, cntv_cval_el0);
238 write_sysreg(ARCH_TIMER_CTL_ENABLE, cntv_ctl_el0);
239 isb();
240
241 while (!irq_received && tries--)
242 cpu_relax();
243
244 if (irq_received)
245 ++received;
246
247 assert_msg(irq_received, "failed to receive PPI in time, but received %d successfully\n", received);
248 }
249
timer_post(uint64_t ntimes,uint64_t * total_ticks)250 static void timer_post(uint64_t ntimes, uint64_t *total_ticks)
251 {
252 /*
253 * We use a 10msec timer to test the latency of PPI,
254 * so we subtract the ticks of 10msec to get the
255 * actual latency
256 */
257 *total_ticks -= ntimes * (cntfrq / 100);
258 }
259
hvc_exec(void)260 static void hvc_exec(void)
261 {
262 asm volatile("mov w0, #0x4b000000; hvc #0" ::: "w0");
263 }
264
265 static void *userspace_emulated_addr;
266
mmio_read_user_prep(void)267 static bool mmio_read_user_prep(void)
268 {
269 /*
270 * FIXME: We need an MMIO address that we can safely read to test
271 * exits to userspace. Ideally, the test-dev would provide us this
272 * address (and one we could write to too), but until it does we
273 * use a virtio-mmio transport address. FIXME2: We should be getting
274 * this address (and the future test-dev address) from the devicetree,
275 * but so far we lazily hardcode it.
276 */
277 userspace_emulated_addr = (void *)ioremap(mmio_addr, sizeof(u32));
278 return true;
279 }
280
mmio_read_user_exec(void)281 static void mmio_read_user_exec(void)
282 {
283 readl(userspace_emulated_addr);
284 }
285
mmio_read_vgic_exec(void)286 static void mmio_read_vgic_exec(void)
287 {
288 readl(vgic_dist_base + GICD_IIDR);
289 }
290
eoi_exec(void)291 static void eoi_exec(void)
292 {
293 int spurious_id = 1023; /* writes to EOI are ignored */
294
295 /* Avoid measuring assert(..) in gic_write_eoir */
296 write_eoir(spurious_id);
297 }
298
299 struct exit_test {
300 const char *name;
301 bool (*prep)(void);
302 void (*exec)(void);
303 void (*post)(uint64_t ntimes, uint64_t *total_ticks);
304 u32 times;
305 bool run;
306 };
307
308 static struct exit_test tests[] = {
309 {"hvc", NULL, hvc_exec, NULL, 65536, true},
310 {"mmio_read_user", mmio_read_user_prep, mmio_read_user_exec, NULL, 65536, true},
311 {"mmio_read_vgic", NULL, mmio_read_vgic_exec, NULL, 65536, true},
312 {"eoi", NULL, eoi_exec, NULL, 65536, true},
313 {"ipi", ipi_prep, ipi_exec, NULL, 65536, true},
314 {"ipi_hw", ipi_hw_prep, ipi_exec, NULL, 65536, true},
315 {"lpi", lpi_prep, lpi_exec, NULL, 65536, true},
316 {"timer_10ms", timer_prep, timer_exec, timer_post, 256, true},
317 };
318
319 struct ns_time {
320 uint64_t ns;
321 uint64_t ns_frac;
322 };
323
324 #define PS_PER_SEC (1000 * 1000 * 1000 * 1000UL)
ticks_to_ns_time(uint64_t ticks,struct ns_time * ns_time)325 static void ticks_to_ns_time(uint64_t ticks, struct ns_time *ns_time)
326 {
327 uint64_t ps_per_tick = PS_PER_SEC / cntfrq + !!(PS_PER_SEC % cntfrq);
328 uint64_t ps;
329
330 ps = ticks * ps_per_tick;
331 ns_time->ns = ps / 1000;
332 ns_time->ns_frac = (ps % 1000) / 100;
333 }
334
loop_test(struct exit_test * test)335 static void loop_test(struct exit_test *test)
336 {
337 uint64_t start, end, total_ticks, ntimes = 0;
338 struct ns_time avg_ns, total_ns = {};
339
340 total_ticks = 0;
341 if (test->prep) {
342 if(!test->prep()) {
343 printf("%s test skipped\n", test->name);
344 return;
345 }
346 }
347
348 dsb(ish);
349 isb();
350 start = read_sysreg(cntvct_el0);
351 isb();
352 while (ntimes < test->times) {
353 test->exec();
354
355 ntimes++;
356 }
357 dsb(ish);
358 isb();
359 end = read_sysreg(cntvct_el0);
360
361 total_ticks = end - start;
362 ticks_to_ns_time(total_ticks, &total_ns);
363
364 if (test->post) {
365 test->post(ntimes, &total_ticks);
366 ticks_to_ns_time(total_ticks, &total_ns);
367 }
368
369 avg_ns.ns = total_ns.ns / ntimes;
370 avg_ns.ns_frac = total_ns.ns_frac / ntimes;
371
372 printf("%-30s%15" PRId64 ".%-15" PRId64 "%15" PRId64 ".%-15" PRId64 "\n",
373 test->name, total_ns.ns, total_ns.ns_frac, avg_ns.ns, avg_ns.ns_frac);
374 }
375
parse_args(int argc,char ** argv)376 static void parse_args(int argc, char **argv)
377 {
378 int i, len;
379 long val;
380
381 for (i = 1; i < argc; ++i) {
382 len = parse_keyval(argv[i], &val);
383 if (len == -1)
384 continue;
385
386 if (strncmp(argv[i], "mmio-addr", len) == 0) {
387 mmio_addr = val;
388 report_info("found mmio_addr=0x%lx", mmio_addr);
389 }
390 }
391 }
392
main(int argc,char ** argv)393 int main(int argc, char **argv)
394 {
395 int i;
396
397 parse_args(argc, argv);
398
399 if (!test_init())
400 return 1;
401
402 printf("\n%-30s%18s%13s%18s%13s\n", "name", "total ns", "", "avg ns", "");
403 for (i = 0 ; i < 92; ++i)
404 printf("%c", '-');
405 printf("\n");
406 for (i = 0; i < ARRAY_SIZE(tests); i++) {
407 if (!tests[i].run)
408 continue;
409 assert(tests[i].name && tests[i].exec);
410 loop_test(&tests[i]);
411 }
412
413 return 0;
414 }
415