1 /* 2 * Measure the cost of micro level operations. 3 * 4 * This test provides support for quantifying the cost of micro level 5 * operations. To improve precision in the measurements, one should 6 * consider pinning each VCPU to a specific physical CPU (PCPU) and to 7 * ensure no other task could run on that PCPU to skew the results. 8 * This can be achieved by enabling QMP server in the QEMU command in 9 * unittest.cfg for micro-bench, allowing a client program to get the 10 * thread_id for each VCPU thread from the QMP server. Based on that 11 * information, the client program can then pin the corresponding VCPUs to 12 * dedicated PCPUs and isolate interrupts and tasks from those PCPUs. 13 * 14 * Copyright Columbia University 15 * Author: Shih-Wei Li <shihwei@cs.columbia.edu> 16 * Author: Christoffer Dall <cdall@cs.columbia.edu> 17 * Author: Andrew Jones <drjones@redhat.com> 18 * 19 * This work is licensed under the terms of the GNU LGPL, version 2. 20 */ 21 #include <libcflat.h> 22 #include <util.h> 23 #include <asm/gic.h> 24 #include <asm/gic-v3-its.h> 25 #include <asm/timer.h> 26 27 #define NS_5_SECONDS (5 * 1000 * 1000 * 1000UL) 28 #define QEMU_MMIO_ADDR 0x0a000008 29 30 static u32 cntfrq; 31 32 static volatile bool irq_ready, irq_received; 33 static int nr_ipi_received; 34 static unsigned long mmio_addr = QEMU_MMIO_ADDR; 35 36 static void *vgic_dist_base; 37 static void (*write_eoir)(u32 irqstat); 38 39 static void gic_irq_handler(struct pt_regs *regs) 40 { 41 u32 irqstat = gic_read_iar(); 42 irq_ready = false; 43 irq_received = true; 44 gic_write_eoir(irqstat); 45 46 if (irqstat == TIMER_VTIMER_IRQ) { 47 write_sysreg((ARCH_TIMER_CTL_IMASK | ARCH_TIMER_CTL_ENABLE), 48 cntv_ctl_el0); 49 isb(); 50 } 51 irq_ready = true; 52 } 53 54 static void gic_secondary_entry(void *data) 55 { 56 install_irq_handler(EL1H_IRQ, gic_irq_handler); 57 gic_enable_defaults(); 58 local_irq_enable(); 59 irq_ready = true; 60 while (true) 61 cpu_relax(); 62 } 63 64 static bool test_init(void) 65 { 66 int v = gic_init(); 67 68 if (!v) { 69 printf("No supported gic present, skipping tests...\n"); 70 return false; 71 } 72 73 if (nr_cpus < 2) { 74 printf("At least two cpus required, skipping tests...\n"); 75 return false; 76 } 77 78 switch (v) { 79 case 2: 80 vgic_dist_base = gicv2_dist_base(); 81 write_eoir = gicv2_write_eoir; 82 break; 83 case 3: 84 vgic_dist_base = gicv3_dist_base(); 85 write_eoir = gicv3_write_eoir; 86 break; 87 } 88 89 irq_ready = false; 90 gic_enable_defaults(); 91 on_cpu_async(1, gic_secondary_entry, NULL); 92 93 cntfrq = get_cntfrq(); 94 printf("Timer Frequency %d Hz (Output in microseconds)\n", cntfrq); 95 96 return true; 97 } 98 99 static void gic_prep_common(void) 100 { 101 unsigned tries = 1 << 28; 102 103 while (!irq_ready && tries--) 104 cpu_relax(); 105 assert(irq_ready); 106 } 107 108 static bool ipi_prep(void) 109 { 110 u32 val; 111 112 val = readl(vgic_dist_base + GICD_CTLR); 113 if (readl(vgic_dist_base + GICD_TYPER2) & GICD_TYPER2_nASSGIcap) { 114 /* nASSGIreq can be changed only when GICD is disabled */ 115 val &= ~GICD_CTLR_ENABLE_G1A; 116 val &= ~GICD_CTLR_nASSGIreq; 117 writel(val, vgic_dist_base + GICD_CTLR); 118 gicv3_dist_wait_for_rwp(); 119 120 val |= GICD_CTLR_ENABLE_G1A; 121 writel(val, vgic_dist_base + GICD_CTLR); 122 gicv3_dist_wait_for_rwp(); 123 } 124 125 nr_ipi_received = 0; 126 gic_prep_common(); 127 return true; 128 } 129 130 static bool ipi_hw_prep(void) 131 { 132 u32 val; 133 134 val = readl(vgic_dist_base + GICD_CTLR); 135 if (readl(vgic_dist_base + GICD_TYPER2) & GICD_TYPER2_nASSGIcap) { 136 /* nASSGIreq can be changed only when GICD is disabled */ 137 val &= ~GICD_CTLR_ENABLE_G1A; 138 val |= GICD_CTLR_nASSGIreq; 139 writel(val, vgic_dist_base + GICD_CTLR); 140 gicv3_dist_wait_for_rwp(); 141 142 val |= GICD_CTLR_ENABLE_G1A; 143 writel(val, vgic_dist_base + GICD_CTLR); 144 gicv3_dist_wait_for_rwp(); 145 } else { 146 return false; 147 } 148 149 nr_ipi_received = 0; 150 gic_prep_common(); 151 return true; 152 } 153 154 static void ipi_exec(void) 155 { 156 unsigned tries = 1 << 28; 157 158 irq_received = false; 159 160 gic_ipi_send_single(1, 1); 161 162 while (!irq_received && tries--) 163 cpu_relax(); 164 165 if (irq_received) 166 ++nr_ipi_received; 167 168 assert_msg(irq_received, "failed to receive IPI in time, but received %d successfully\n", nr_ipi_received); 169 } 170 171 static bool lpi_prep(void) 172 { 173 struct its_collection *col1; 174 struct its_device *dev2; 175 176 if (!gicv3_its_base()) 177 return false; 178 179 its_enable_defaults(); 180 dev2 = its_create_device(2 /* dev id */, 8 /* nb_ites */); 181 col1 = its_create_collection(1 /* col id */, 1 /* target PE */); 182 gicv3_lpi_set_config(8199, LPI_PROP_DEFAULT); 183 184 its_send_mapd_nv(dev2, true); 185 its_send_mapc_nv(col1, true); 186 its_send_invall_nv(col1); 187 its_send_mapti_nv(dev2, 8199 /* lpi id */, 20 /* event id */, col1); 188 189 gic_prep_common(); 190 return true; 191 } 192 193 static void lpi_exec(void) 194 { 195 struct its_device *dev2; 196 unsigned tries = 1 << 28; 197 static int received = 0; 198 199 irq_received = false; 200 201 dev2 = its_get_device(2); 202 its_send_int_nv(dev2, 20); 203 204 while (!irq_received && tries--) 205 cpu_relax(); 206 207 if (irq_received) 208 ++received; 209 210 assert_msg(irq_received, "failed to receive LPI in time, but received %d successfully\n", received); 211 } 212 213 static bool timer_prep(void) 214 { 215 gic_enable_defaults(); 216 install_irq_handler(EL1H_IRQ, gic_irq_handler); 217 local_irq_enable(); 218 219 gic_enable_irq(TIMER_VTIMER_IRQ); 220 write_sysreg(ARCH_TIMER_CTL_IMASK | ARCH_TIMER_CTL_ENABLE, cntv_ctl_el0); 221 isb(); 222 223 gic_prep_common(); 224 return true; 225 } 226 227 static void timer_exec(void) 228 { 229 u64 before_timer; 230 u64 timer_10ms; 231 unsigned tries = 1 << 28; 232 static int received = 0; 233 234 irq_received = false; 235 236 before_timer = read_sysreg(cntvct_el0); 237 timer_10ms = cntfrq / 100; 238 write_sysreg(before_timer + timer_10ms, cntv_cval_el0); 239 write_sysreg(ARCH_TIMER_CTL_ENABLE, cntv_ctl_el0); 240 isb(); 241 242 while (!irq_received && tries--) 243 cpu_relax(); 244 245 if (irq_received) 246 ++received; 247 248 assert_msg(irq_received, "failed to receive PPI in time, but received %d successfully\n", received); 249 } 250 251 static void timer_post(uint64_t ntimes, uint64_t *total_ticks) 252 { 253 /* 254 * We use a 10msec timer to test the latency of PPI, 255 * so we subtract the ticks of 10msec to get the 256 * actual latency 257 */ 258 *total_ticks -= ntimes * (cntfrq / 100); 259 } 260 261 static void hvc_exec(void) 262 { 263 asm volatile("mov w0, #0x4b000000; hvc #0" ::: "w0"); 264 } 265 266 static void *userspace_emulated_addr; 267 268 static bool mmio_read_user_prep(void) 269 { 270 /* 271 * FIXME: We need an MMIO address that we can safely read to test 272 * exits to userspace. Ideally, the test-dev would provide us this 273 * address (and one we could write to too), but until it does we 274 * use a virtio-mmio transport address. FIXME2: We should be getting 275 * this address (and the future test-dev address) from the devicetree, 276 * but so far we lazily hardcode it. 277 */ 278 userspace_emulated_addr = (void *)ioremap(mmio_addr, sizeof(u32)); 279 return true; 280 } 281 282 static void mmio_read_user_exec(void) 283 { 284 readl(userspace_emulated_addr); 285 } 286 287 static void mmio_read_vgic_exec(void) 288 { 289 readl(vgic_dist_base + GICD_IIDR); 290 } 291 292 static void eoi_exec(void) 293 { 294 int spurious_id = 1023; /* writes to EOI are ignored */ 295 296 /* Avoid measuring assert(..) in gic_write_eoir */ 297 write_eoir(spurious_id); 298 } 299 300 struct exit_test { 301 const char *name; 302 bool (*prep)(void); 303 void (*exec)(void); 304 void (*post)(uint64_t ntimes, uint64_t *total_ticks); 305 u32 times; 306 bool run; 307 }; 308 309 static struct exit_test tests[] = { 310 {"hvc", NULL, hvc_exec, NULL, 65536, true}, 311 {"mmio_read_user", mmio_read_user_prep, mmio_read_user_exec, NULL, 65536, true}, 312 {"mmio_read_vgic", NULL, mmio_read_vgic_exec, NULL, 65536, true}, 313 {"eoi", NULL, eoi_exec, NULL, 65536, true}, 314 {"ipi", ipi_prep, ipi_exec, NULL, 65536, true}, 315 {"ipi_hw", ipi_hw_prep, ipi_exec, NULL, 65536, true}, 316 {"lpi", lpi_prep, lpi_exec, NULL, 65536, true}, 317 {"timer_10ms", timer_prep, timer_exec, timer_post, 256, true}, 318 }; 319 320 struct ns_time { 321 uint64_t ns; 322 uint64_t ns_frac; 323 }; 324 325 #define PS_PER_SEC (1000 * 1000 * 1000 * 1000UL) 326 static void ticks_to_ns_time(uint64_t ticks, struct ns_time *ns_time) 327 { 328 uint64_t ps_per_tick = PS_PER_SEC / cntfrq + !!(PS_PER_SEC % cntfrq); 329 uint64_t ps; 330 331 ps = ticks * ps_per_tick; 332 ns_time->ns = ps / 1000; 333 ns_time->ns_frac = (ps % 1000) / 100; 334 } 335 336 static void loop_test(struct exit_test *test) 337 { 338 uint64_t start, end, total_ticks, ntimes = 0; 339 struct ns_time avg_ns, total_ns = {}; 340 341 total_ticks = 0; 342 if (test->prep) { 343 if(!test->prep()) { 344 printf("%s test skipped\n", test->name); 345 return; 346 } 347 } 348 349 while (ntimes < test->times && total_ns.ns < NS_5_SECONDS) { 350 isb(); 351 start = read_sysreg(cntpct_el0); 352 test->exec(); 353 isb(); 354 end = read_sysreg(cntpct_el0); 355 356 ntimes++; 357 total_ticks += (end - start); 358 ticks_to_ns_time(total_ticks, &total_ns); 359 } 360 361 if (test->post) { 362 test->post(ntimes, &total_ticks); 363 ticks_to_ns_time(total_ticks, &total_ns); 364 } 365 366 avg_ns.ns = total_ns.ns / ntimes; 367 avg_ns.ns_frac = total_ns.ns_frac / ntimes; 368 369 printf("%-30s%15" PRId64 ".%-15" PRId64 "%15" PRId64 ".%-15" PRId64 "\n", 370 test->name, total_ns.ns, total_ns.ns_frac, avg_ns.ns, avg_ns.ns_frac); 371 } 372 373 static void parse_args(int argc, char **argv) 374 { 375 int i, len; 376 long val; 377 378 for (i = 1; i < argc; ++i) { 379 len = parse_keyval(argv[i], &val); 380 if (len == -1) 381 continue; 382 383 if (strncmp(argv[i], "mmio-addr", len) == 0) { 384 mmio_addr = val; 385 report_info("found mmio_addr=0x%lx", mmio_addr); 386 } 387 } 388 } 389 390 int main(int argc, char **argv) 391 { 392 int i; 393 394 parse_args(argc, argv); 395 396 if (!test_init()) 397 return 1; 398 399 printf("\n%-30s%18s%13s%18s%13s\n", "name", "total ns", "", "avg ns", ""); 400 for (i = 0 ; i < 92; ++i) 401 printf("%c", '-'); 402 printf("\n"); 403 for (i = 0; i < ARRAY_SIZE(tests); i++) { 404 if (!tests[i].run) 405 continue; 406 assert(tests[i].name && tests[i].exec); 407 loop_test(&tests[i]); 408 } 409 410 return 0; 411 } 412