1 /* 2 * Measure the cost of micro level operations. 3 * 4 * This test provides support for quantifying the cost of micro level 5 * operations. To improve precision in the measurements, one should 6 * consider pinning each VCPU to a specific physical CPU (PCPU) and to 7 * ensure no other task could run on that PCPU to skew the results. 8 * This can be achieved by enabling QMP server in the QEMU command in 9 * unittest.cfg for micro-bench, allowing a client program to get the 10 * thread_id for each VCPU thread from the QMP server. Based on that 11 * information, the client program can then pin the corresponding VCPUs to 12 * dedicated PCPUs and isolate interrupts and tasks from those PCPUs. 13 * 14 * Copyright Columbia University 15 * Author: Shih-Wei Li <shihwei@cs.columbia.edu> 16 * Author: Christoffer Dall <cdall@cs.columbia.edu> 17 * Author: Andrew Jones <drjones@redhat.com> 18 * 19 * This work is licensed under the terms of the GNU LGPL, version 2. 20 */ 21 #include <libcflat.h> 22 #include <util.h> 23 #include <asm/gic.h> 24 #include <asm/gic-v3-its.h> 25 #include <asm/timer.h> 26 27 #define NS_5_SECONDS (5 * 1000 * 1000 * 1000UL) 28 #define QEMU_MMIO_ADDR 0x0a000008 29 30 static u32 cntfrq; 31 32 static volatile bool irq_ready, irq_received; 33 static int nr_ipi_received; 34 static unsigned long mmio_addr = QEMU_MMIO_ADDR; 35 36 static void *vgic_dist_base; 37 static void (*write_eoir)(u32 irqstat); 38 39 static void gic_irq_handler(struct pt_regs *regs) 40 { 41 u32 irqstat = gic_read_iar(); 42 irq_ready = false; 43 irq_received = true; 44 gic_write_eoir(irqstat); 45 46 if (irqstat == PPI(TIMER_VTIMER_IRQ)) { 47 write_sysreg((ARCH_TIMER_CTL_IMASK | ARCH_TIMER_CTL_ENABLE), 48 cntv_ctl_el0); 49 isb(); 50 } 51 irq_ready = true; 52 } 53 54 static void gic_secondary_entry(void *data) 55 { 56 install_irq_handler(EL1H_IRQ, gic_irq_handler); 57 gic_enable_defaults(); 58 local_irq_enable(); 59 irq_ready = true; 60 while (true) 61 cpu_relax(); 62 } 63 64 static bool test_init(void) 65 { 66 int v = gic_init(); 67 68 if (!v) { 69 printf("No supported gic present, skipping tests...\n"); 70 return false; 71 } 72 73 if (nr_cpus < 2) { 74 printf("At least two cpus required, skipping tests...\n"); 75 return false; 76 } 77 78 switch (v) { 79 case 2: 80 vgic_dist_base = gicv2_dist_base(); 81 write_eoir = gicv2_write_eoir; 82 break; 83 case 3: 84 vgic_dist_base = gicv3_dist_base(); 85 write_eoir = gicv3_write_eoir; 86 break; 87 } 88 89 irq_ready = false; 90 gic_enable_defaults(); 91 on_cpu_async(1, gic_secondary_entry, NULL); 92 93 cntfrq = get_cntfrq(); 94 printf("Timer Frequency %d Hz (Output in microseconds)\n", cntfrq); 95 96 return true; 97 } 98 99 static void gic_prep_common(void) 100 { 101 unsigned tries = 1 << 28; 102 103 while (!irq_ready && tries--) 104 cpu_relax(); 105 assert(irq_ready); 106 } 107 108 static bool ipi_prep(void) 109 { 110 u32 val; 111 112 val = readl(vgic_dist_base + GICD_CTLR); 113 if (readl(vgic_dist_base + GICD_TYPER2) & GICD_TYPER2_nASSGIcap) { 114 /* nASSGIreq can be changed only when GICD is disabled */ 115 val &= ~GICD_CTLR_ENABLE_G1A; 116 val &= ~GICD_CTLR_nASSGIreq; 117 writel(val, vgic_dist_base + GICD_CTLR); 118 gicv3_dist_wait_for_rwp(); 119 120 val |= GICD_CTLR_ENABLE_G1A; 121 writel(val, vgic_dist_base + GICD_CTLR); 122 gicv3_dist_wait_for_rwp(); 123 } 124 125 nr_ipi_received = 0; 126 gic_prep_common(); 127 return true; 128 } 129 130 static bool ipi_hw_prep(void) 131 { 132 u32 val; 133 134 val = readl(vgic_dist_base + GICD_CTLR); 135 if (readl(vgic_dist_base + GICD_TYPER2) & GICD_TYPER2_nASSGIcap) { 136 /* nASSGIreq can be changed only when GICD is disabled */ 137 val &= ~GICD_CTLR_ENABLE_G1A; 138 val |= GICD_CTLR_nASSGIreq; 139 writel(val, vgic_dist_base + GICD_CTLR); 140 gicv3_dist_wait_for_rwp(); 141 142 val |= GICD_CTLR_ENABLE_G1A; 143 writel(val, vgic_dist_base + GICD_CTLR); 144 gicv3_dist_wait_for_rwp(); 145 } else { 146 return false; 147 } 148 149 nr_ipi_received = 0; 150 gic_prep_common(); 151 return true; 152 } 153 154 static void ipi_exec(void) 155 { 156 unsigned tries = 1 << 28; 157 158 irq_received = false; 159 160 gic_ipi_send_single(1, 1); 161 162 while (!irq_received && tries--) 163 cpu_relax(); 164 165 if (irq_received) 166 ++nr_ipi_received; 167 168 assert_msg(irq_received, "failed to receive IPI in time, but received %d successfully\n", nr_ipi_received); 169 } 170 171 static bool lpi_prep(void) 172 { 173 struct its_collection *col1; 174 struct its_device *dev2; 175 176 if (!gicv3_its_base()) 177 return false; 178 179 its_enable_defaults(); 180 dev2 = its_create_device(2 /* dev id */, 8 /* nb_ites */); 181 col1 = its_create_collection(1 /* col id */, 1 /* target PE */); 182 gicv3_lpi_set_config(8199, LPI_PROP_DEFAULT); 183 184 its_send_mapd_nv(dev2, true); 185 its_send_mapc_nv(col1, true); 186 its_send_invall_nv(col1); 187 its_send_mapti_nv(dev2, 8199 /* lpi id */, 20 /* event id */, col1); 188 189 gic_prep_common(); 190 return true; 191 } 192 193 static void lpi_exec(void) 194 { 195 struct its_device *dev2; 196 unsigned tries = 1 << 28; 197 static int received = 0; 198 199 irq_received = false; 200 201 dev2 = its_get_device(2); 202 its_send_int_nv(dev2, 20); 203 204 while (!irq_received && tries--) 205 cpu_relax(); 206 207 if (irq_received) 208 ++received; 209 210 assert_msg(irq_received, "failed to receive LPI in time, but received %d successfully\n", received); 211 } 212 213 static bool timer_prep(void) 214 { 215 void *gic_isenabler; 216 217 gic_enable_defaults(); 218 install_irq_handler(EL1H_IRQ, gic_irq_handler); 219 local_irq_enable(); 220 221 switch (gic_version()) { 222 case 2: 223 gic_isenabler = gicv2_dist_base() + GICD_ISENABLER; 224 break; 225 case 3: 226 gic_isenabler = gicv3_sgi_base() + GICR_ISENABLER0; 227 break; 228 default: 229 assert_msg(0, "Unreachable"); 230 } 231 232 writel(1 << PPI(TIMER_VTIMER_IRQ), gic_isenabler); 233 write_sysreg(ARCH_TIMER_CTL_IMASK | ARCH_TIMER_CTL_ENABLE, cntv_ctl_el0); 234 isb(); 235 236 gic_prep_common(); 237 return true; 238 } 239 240 static void timer_exec(void) 241 { 242 u64 before_timer; 243 u64 timer_10ms; 244 unsigned tries = 1 << 28; 245 static int received = 0; 246 247 irq_received = false; 248 249 before_timer = read_sysreg(cntvct_el0); 250 timer_10ms = cntfrq / 100; 251 write_sysreg(before_timer + timer_10ms, cntv_cval_el0); 252 write_sysreg(ARCH_TIMER_CTL_ENABLE, cntv_ctl_el0); 253 isb(); 254 255 while (!irq_received && tries--) 256 cpu_relax(); 257 258 if (irq_received) 259 ++received; 260 261 assert_msg(irq_received, "failed to receive PPI in time, but received %d successfully\n", received); 262 } 263 264 static void timer_post(uint64_t ntimes, uint64_t *total_ticks) 265 { 266 /* 267 * We use a 10msec timer to test the latency of PPI, 268 * so we subtract the ticks of 10msec to get the 269 * actual latency 270 */ 271 *total_ticks -= ntimes * (cntfrq / 100); 272 } 273 274 static void hvc_exec(void) 275 { 276 asm volatile("mov w0, #0x4b000000; hvc #0" ::: "w0"); 277 } 278 279 static void *userspace_emulated_addr; 280 281 static bool mmio_read_user_prep(void) 282 { 283 /* 284 * FIXME: We need an MMIO address that we can safely read to test 285 * exits to userspace. Ideally, the test-dev would provide us this 286 * address (and one we could write to too), but until it does we 287 * use a virtio-mmio transport address. FIXME2: We should be getting 288 * this address (and the future test-dev address) from the devicetree, 289 * but so far we lazily hardcode it. 290 */ 291 userspace_emulated_addr = (void *)ioremap(mmio_addr, sizeof(u32)); 292 return true; 293 } 294 295 static void mmio_read_user_exec(void) 296 { 297 readl(userspace_emulated_addr); 298 } 299 300 static void mmio_read_vgic_exec(void) 301 { 302 readl(vgic_dist_base + GICD_IIDR); 303 } 304 305 static void eoi_exec(void) 306 { 307 int spurious_id = 1023; /* writes to EOI are ignored */ 308 309 /* Avoid measuring assert(..) in gic_write_eoir */ 310 write_eoir(spurious_id); 311 } 312 313 struct exit_test { 314 const char *name; 315 bool (*prep)(void); 316 void (*exec)(void); 317 void (*post)(uint64_t ntimes, uint64_t *total_ticks); 318 u32 times; 319 bool run; 320 }; 321 322 static struct exit_test tests[] = { 323 {"hvc", NULL, hvc_exec, NULL, 65536, true}, 324 {"mmio_read_user", mmio_read_user_prep, mmio_read_user_exec, NULL, 65536, true}, 325 {"mmio_read_vgic", NULL, mmio_read_vgic_exec, NULL, 65536, true}, 326 {"eoi", NULL, eoi_exec, NULL, 65536, true}, 327 {"ipi", ipi_prep, ipi_exec, NULL, 65536, true}, 328 {"ipi_hw", ipi_hw_prep, ipi_exec, NULL, 65536, true}, 329 {"lpi", lpi_prep, lpi_exec, NULL, 65536, true}, 330 {"timer_10ms", timer_prep, timer_exec, timer_post, 256, true}, 331 }; 332 333 struct ns_time { 334 uint64_t ns; 335 uint64_t ns_frac; 336 }; 337 338 #define PS_PER_SEC (1000 * 1000 * 1000 * 1000UL) 339 static void ticks_to_ns_time(uint64_t ticks, struct ns_time *ns_time) 340 { 341 uint64_t ps_per_tick = PS_PER_SEC / cntfrq + !!(PS_PER_SEC % cntfrq); 342 uint64_t ps; 343 344 ps = ticks * ps_per_tick; 345 ns_time->ns = ps / 1000; 346 ns_time->ns_frac = (ps % 1000) / 100; 347 } 348 349 static void loop_test(struct exit_test *test) 350 { 351 uint64_t start, end, total_ticks, ntimes = 0; 352 struct ns_time avg_ns, total_ns = {}; 353 354 total_ticks = 0; 355 if (test->prep) { 356 if(!test->prep()) { 357 printf("%s test skipped\n", test->name); 358 return; 359 } 360 } 361 362 while (ntimes < test->times && total_ns.ns < NS_5_SECONDS) { 363 isb(); 364 start = read_sysreg(cntpct_el0); 365 test->exec(); 366 isb(); 367 end = read_sysreg(cntpct_el0); 368 369 ntimes++; 370 total_ticks += (end - start); 371 ticks_to_ns_time(total_ticks, &total_ns); 372 } 373 374 if (test->post) { 375 test->post(ntimes, &total_ticks); 376 ticks_to_ns_time(total_ticks, &total_ns); 377 } 378 379 avg_ns.ns = total_ns.ns / ntimes; 380 avg_ns.ns_frac = total_ns.ns_frac / ntimes; 381 382 printf("%-30s%15" PRId64 ".%-15" PRId64 "%15" PRId64 ".%-15" PRId64 "\n", 383 test->name, total_ns.ns, total_ns.ns_frac, avg_ns.ns, avg_ns.ns_frac); 384 } 385 386 static void parse_args(int argc, char **argv) 387 { 388 int i, len; 389 long val; 390 391 for (i = 1; i < argc; ++i) { 392 len = parse_keyval(argv[i], &val); 393 if (len == -1) 394 continue; 395 396 if (strncmp(argv[i], "mmio-addr", len) == 0) { 397 mmio_addr = val; 398 report_info("found mmio_addr=0x%lx", mmio_addr); 399 } 400 } 401 } 402 403 int main(int argc, char **argv) 404 { 405 int i; 406 407 parse_args(argc, argv); 408 409 if (!test_init()) 410 return 1; 411 412 printf("\n%-30s%18s%13s%18s%13s\n", "name", "total ns", "", "avg ns", ""); 413 for (i = 0 ; i < 92; ++i) 414 printf("%c", '-'); 415 printf("\n"); 416 for (i = 0; i < ARRAY_SIZE(tests); i++) { 417 if (!tests[i].run) 418 continue; 419 assert(tests[i].name && tests[i].exec); 420 loop_test(&tests[i]); 421 } 422 423 return 0; 424 } 425