1 /* 2 * Measure the cost of micro level operations. 3 * 4 * This test provides support for quantifying the cost of micro level 5 * operations. To improve precision in the measurements, one should 6 * consider pinning each VCPU to a specific physical CPU (PCPU) and to 7 * ensure no other task could run on that PCPU to skew the results. 8 * This can be achieved by enabling QMP server in the QEMU command in 9 * unittest.cfg for micro-bench, allowing a client program to get the 10 * thread_id for each VCPU thread from the QMP server. Based on that 11 * information, the client program can then pin the corresponding VCPUs to 12 * dedicated PCPUs and isolate interrupts and tasks from those PCPUs. 13 * 14 * Copyright Columbia University 15 * Author: Shih-Wei Li <shihwei@cs.columbia.edu> 16 * Author: Christoffer Dall <cdall@cs.columbia.edu> 17 * Author: Andrew Jones <drjones@redhat.com> 18 * 19 * This work is licensed under the terms of the GNU LGPL, version 2. 20 */ 21 #include <libcflat.h> 22 #include <util.h> 23 #include <asm/gic.h> 24 #include <asm/gic-v3-its.h> 25 #include <asm/timer.h> 26 27 #define QEMU_MMIO_ADDR 0x0a000008 28 29 static u32 cntfrq; 30 31 static volatile bool irq_ready, irq_received; 32 static int nr_ipi_received; 33 static unsigned long mmio_addr = QEMU_MMIO_ADDR; 34 35 static void *vgic_dist_base; 36 static void (*write_eoir)(u32 irqstat); 37 38 static void gic_irq_handler(struct pt_regs *regs) 39 { 40 u32 irqstat = gic_read_iar(); 41 irq_ready = false; 42 irq_received = true; 43 gic_write_eoir(irqstat); 44 45 if (irqstat == TIMER_VTIMER_IRQ) { 46 write_sysreg((ARCH_TIMER_CTL_IMASK | ARCH_TIMER_CTL_ENABLE), 47 cntv_ctl_el0); 48 isb(); 49 } 50 irq_ready = true; 51 } 52 53 static void gic_secondary_entry(void *data) 54 { 55 install_irq_handler(EL1H_IRQ, gic_irq_handler); 56 gic_enable_defaults(); 57 local_irq_enable(); 58 irq_ready = true; 59 while (true) 60 cpu_relax(); 61 } 62 63 static bool test_init(void) 64 { 65 int v = gic_init(); 66 67 if (!v) { 68 printf("No supported gic present, skipping tests...\n"); 69 return false; 70 } 71 72 if (nr_cpus < 2) { 73 printf("At least two cpus required, skipping tests...\n"); 74 return false; 75 } 76 77 switch (v) { 78 case 2: 79 vgic_dist_base = gicv2_dist_base(); 80 write_eoir = gicv2_write_eoir; 81 break; 82 case 3: 83 vgic_dist_base = gicv3_dist_base(); 84 write_eoir = gicv3_write_eoir; 85 break; 86 } 87 88 irq_ready = false; 89 gic_enable_defaults(); 90 on_cpu_async(1, gic_secondary_entry, NULL); 91 92 cntfrq = get_cntfrq(); 93 printf("Timer Frequency %d Hz (Output in microseconds)\n", cntfrq); 94 95 return true; 96 } 97 98 static void gic_prep_common(void) 99 { 100 unsigned tries = 1 << 28; 101 102 while (!irq_ready && tries--) 103 cpu_relax(); 104 assert(irq_ready); 105 } 106 107 static bool ipi_prep(void) 108 { 109 u32 val; 110 111 val = readl(vgic_dist_base + GICD_CTLR); 112 if (readl(vgic_dist_base + GICD_TYPER2) & GICD_TYPER2_nASSGIcap) { 113 /* nASSGIreq can be changed only when GICD is disabled */ 114 val &= ~GICD_CTLR_ENABLE_G1A; 115 val &= ~GICD_CTLR_nASSGIreq; 116 writel(val, vgic_dist_base + GICD_CTLR); 117 gicv3_dist_wait_for_rwp(); 118 119 val |= GICD_CTLR_ENABLE_G1A; 120 writel(val, vgic_dist_base + GICD_CTLR); 121 gicv3_dist_wait_for_rwp(); 122 } 123 124 nr_ipi_received = 0; 125 gic_prep_common(); 126 return true; 127 } 128 129 static bool ipi_hw_prep(void) 130 { 131 u32 val; 132 133 val = readl(vgic_dist_base + GICD_CTLR); 134 if (readl(vgic_dist_base + GICD_TYPER2) & GICD_TYPER2_nASSGIcap) { 135 /* nASSGIreq can be changed only when GICD is disabled */ 136 val &= ~GICD_CTLR_ENABLE_G1A; 137 val |= GICD_CTLR_nASSGIreq; 138 writel(val, vgic_dist_base + GICD_CTLR); 139 gicv3_dist_wait_for_rwp(); 140 141 val |= GICD_CTLR_ENABLE_G1A; 142 writel(val, vgic_dist_base + GICD_CTLR); 143 gicv3_dist_wait_for_rwp(); 144 } else { 145 return false; 146 } 147 148 nr_ipi_received = 0; 149 gic_prep_common(); 150 return true; 151 } 152 153 static void ipi_exec(void) 154 { 155 unsigned tries = 1 << 28; 156 157 irq_received = false; 158 159 gic_ipi_send_single(1, 1); 160 161 while (!irq_received && tries--) 162 cpu_relax(); 163 164 if (irq_received) 165 ++nr_ipi_received; 166 167 assert_msg(irq_received, "failed to receive IPI in time, but received %d successfully\n", nr_ipi_received); 168 } 169 170 static bool lpi_prep(void) 171 { 172 struct its_collection *col1; 173 struct its_device *dev2; 174 175 if (!gicv3_its_base()) 176 return false; 177 178 its_enable_defaults(); 179 dev2 = its_create_device(2 /* dev id */, 8 /* nb_ites */); 180 col1 = its_create_collection(1 /* col id */, 1 /* target PE */); 181 gicv3_lpi_set_config(8199, LPI_PROP_DEFAULT); 182 183 its_send_mapd_nv(dev2, true); 184 its_send_mapc_nv(col1, true); 185 its_send_invall_nv(col1); 186 its_send_mapti_nv(dev2, 8199 /* lpi id */, 20 /* event id */, col1); 187 188 gic_prep_common(); 189 return true; 190 } 191 192 static void lpi_exec(void) 193 { 194 struct its_device *dev2; 195 unsigned tries = 1 << 28; 196 static int received = 0; 197 198 irq_received = false; 199 200 dev2 = its_get_device(2); 201 its_send_int_nv(dev2, 20); 202 203 while (!irq_received && tries--) 204 cpu_relax(); 205 206 if (irq_received) 207 ++received; 208 209 assert_msg(irq_received, "failed to receive LPI in time, but received %d successfully\n", received); 210 } 211 212 static bool timer_prep(void) 213 { 214 gic_enable_defaults(); 215 install_irq_handler(EL1H_IRQ, gic_irq_handler); 216 local_irq_enable(); 217 218 gic_enable_irq(TIMER_VTIMER_IRQ); 219 write_sysreg(ARCH_TIMER_CTL_IMASK | ARCH_TIMER_CTL_ENABLE, cntv_ctl_el0); 220 isb(); 221 222 gic_prep_common(); 223 return true; 224 } 225 226 static void timer_exec(void) 227 { 228 u64 before_timer; 229 u64 timer_10ms; 230 unsigned tries = 1 << 28; 231 static int received = 0; 232 233 irq_received = false; 234 235 before_timer = read_sysreg(cntvct_el0); 236 timer_10ms = cntfrq / 100; 237 write_sysreg(before_timer + timer_10ms, cntv_cval_el0); 238 write_sysreg(ARCH_TIMER_CTL_ENABLE, cntv_ctl_el0); 239 isb(); 240 241 while (!irq_received && tries--) 242 cpu_relax(); 243 244 if (irq_received) 245 ++received; 246 247 assert_msg(irq_received, "failed to receive PPI in time, but received %d successfully\n", received); 248 } 249 250 static void timer_post(uint64_t ntimes, uint64_t *total_ticks) 251 { 252 /* 253 * We use a 10msec timer to test the latency of PPI, 254 * so we subtract the ticks of 10msec to get the 255 * actual latency 256 */ 257 *total_ticks -= ntimes * (cntfrq / 100); 258 } 259 260 static void hvc_exec(void) 261 { 262 asm volatile("mov w0, #0x4b000000; hvc #0" ::: "w0"); 263 } 264 265 static void *userspace_emulated_addr; 266 267 static bool mmio_read_user_prep(void) 268 { 269 /* 270 * FIXME: We need an MMIO address that we can safely read to test 271 * exits to userspace. Ideally, the test-dev would provide us this 272 * address (and one we could write to too), but until it does we 273 * use a virtio-mmio transport address. FIXME2: We should be getting 274 * this address (and the future test-dev address) from the devicetree, 275 * but so far we lazily hardcode it. 276 */ 277 userspace_emulated_addr = (void *)ioremap(mmio_addr, sizeof(u32)); 278 return true; 279 } 280 281 static void mmio_read_user_exec(void) 282 { 283 readl(userspace_emulated_addr); 284 } 285 286 static void mmio_read_vgic_exec(void) 287 { 288 readl(vgic_dist_base + GICD_IIDR); 289 } 290 291 static void eoi_exec(void) 292 { 293 int spurious_id = 1023; /* writes to EOI are ignored */ 294 295 /* Avoid measuring assert(..) in gic_write_eoir */ 296 write_eoir(spurious_id); 297 } 298 299 struct exit_test { 300 const char *name; 301 bool (*prep)(void); 302 void (*exec)(void); 303 void (*post)(uint64_t ntimes, uint64_t *total_ticks); 304 u32 times; 305 bool run; 306 }; 307 308 static struct exit_test tests[] = { 309 {"hvc", NULL, hvc_exec, NULL, 65536, true}, 310 {"mmio_read_user", mmio_read_user_prep, mmio_read_user_exec, NULL, 65536, true}, 311 {"mmio_read_vgic", NULL, mmio_read_vgic_exec, NULL, 65536, true}, 312 {"eoi", NULL, eoi_exec, NULL, 65536, true}, 313 {"ipi", ipi_prep, ipi_exec, NULL, 65536, true}, 314 {"ipi_hw", ipi_hw_prep, ipi_exec, NULL, 65536, true}, 315 {"lpi", lpi_prep, lpi_exec, NULL, 65536, true}, 316 {"timer_10ms", timer_prep, timer_exec, timer_post, 256, true}, 317 }; 318 319 struct ns_time { 320 uint64_t ns; 321 uint64_t ns_frac; 322 }; 323 324 #define PS_PER_SEC (1000 * 1000 * 1000 * 1000UL) 325 static void ticks_to_ns_time(uint64_t ticks, struct ns_time *ns_time) 326 { 327 uint64_t ps_per_tick = PS_PER_SEC / cntfrq + !!(PS_PER_SEC % cntfrq); 328 uint64_t ps; 329 330 ps = ticks * ps_per_tick; 331 ns_time->ns = ps / 1000; 332 ns_time->ns_frac = (ps % 1000) / 100; 333 } 334 335 static void loop_test(struct exit_test *test) 336 { 337 uint64_t start, end, total_ticks, ntimes = 0; 338 struct ns_time avg_ns, total_ns = {}; 339 340 total_ticks = 0; 341 if (test->prep) { 342 if(!test->prep()) { 343 printf("%s test skipped\n", test->name); 344 return; 345 } 346 } 347 348 dsb(ish); 349 isb(); 350 start = read_sysreg(cntvct_el0); 351 isb(); 352 while (ntimes < test->times) { 353 test->exec(); 354 355 ntimes++; 356 } 357 dsb(ish); 358 isb(); 359 end = read_sysreg(cntvct_el0); 360 361 total_ticks = end - start; 362 ticks_to_ns_time(total_ticks, &total_ns); 363 364 if (test->post) { 365 test->post(ntimes, &total_ticks); 366 ticks_to_ns_time(total_ticks, &total_ns); 367 } 368 369 avg_ns.ns = total_ns.ns / ntimes; 370 avg_ns.ns_frac = total_ns.ns_frac / ntimes; 371 372 printf("%-30s%15" PRId64 ".%-15" PRId64 "%15" PRId64 ".%-15" PRId64 "\n", 373 test->name, total_ns.ns, total_ns.ns_frac, avg_ns.ns, avg_ns.ns_frac); 374 } 375 376 static void parse_args(int argc, char **argv) 377 { 378 int i, len; 379 long val; 380 381 for (i = 1; i < argc; ++i) { 382 len = parse_keyval(argv[i], &val); 383 if (len == -1) 384 continue; 385 386 if (strncmp(argv[i], "mmio-addr", len) == 0) { 387 mmio_addr = val; 388 report_info("found mmio_addr=0x%lx", mmio_addr); 389 } 390 } 391 } 392 393 int main(int argc, char **argv) 394 { 395 int i; 396 397 parse_args(argc, argv); 398 399 if (!test_init()) 400 return 1; 401 402 printf("\n%-30s%18s%13s%18s%13s\n", "name", "total ns", "", "avg ns", ""); 403 for (i = 0 ; i < 92; ++i) 404 printf("%c", '-'); 405 printf("\n"); 406 for (i = 0; i < ARRAY_SIZE(tests); i++) { 407 if (!tests[i].run) 408 continue; 409 assert(tests[i].name && tests[i].exec); 410 loop_test(&tests[i]); 411 } 412 413 return 0; 414 } 415