1 /* 2 * Measure the cost of micro level operations. 3 * 4 * This test provides support for quantifying the cost of micro level 5 * operations. To improve precision in the measurements, one should 6 * consider pinning each VCPU to a specific physical CPU (PCPU) and to 7 * ensure no other task could run on that PCPU to skew the results. 8 * This can be achieved by enabling QMP server in the QEMU command in 9 * unittest.cfg for micro-bench, allowing a client program to get the 10 * thread_id for each VCPU thread from the QMP server. Based on that 11 * information, the client program can then pin the corresponding VCPUs to 12 * dedicated PCPUs and isolate interrupts and tasks from those PCPUs. 13 * 14 * Copyright Columbia University 15 * Author: Shih-Wei Li <shihwei@cs.columbia.edu> 16 * Author: Christoffer Dall <cdall@cs.columbia.edu> 17 * Author: Andrew Jones <drjones@redhat.com> 18 * 19 * This work is licensed under the terms of the GNU LGPL, version 2. 20 */ 21 #include <libcflat.h> 22 #include <asm/gic.h> 23 #include <asm/gic-v3-its.h> 24 #include <asm/timer.h> 25 26 #define NS_5_SECONDS (5 * 1000 * 1000 * 1000UL) 27 28 static u32 cntfrq; 29 30 static volatile bool irq_ready, irq_received; 31 static int nr_ipi_received; 32 33 static void *vgic_dist_base; 34 static void (*write_eoir)(u32 irqstat); 35 36 static void gic_irq_handler(struct pt_regs *regs) 37 { 38 u32 irqstat = gic_read_iar(); 39 irq_ready = false; 40 irq_received = true; 41 gic_write_eoir(irqstat); 42 43 if (irqstat == PPI(TIMER_VTIMER_IRQ)) { 44 write_sysreg((ARCH_TIMER_CTL_IMASK | ARCH_TIMER_CTL_ENABLE), 45 cntv_ctl_el0); 46 isb(); 47 } 48 irq_ready = true; 49 } 50 51 static void gic_secondary_entry(void *data) 52 { 53 install_irq_handler(EL1H_IRQ, gic_irq_handler); 54 gic_enable_defaults(); 55 local_irq_enable(); 56 irq_ready = true; 57 while (true) 58 cpu_relax(); 59 } 60 61 static bool test_init(void) 62 { 63 int v = gic_init(); 64 65 if (!v) { 66 printf("No supported gic present, skipping tests...\n"); 67 return false; 68 } 69 70 if (nr_cpus < 2) { 71 printf("At least two cpus required, skipping tests...\n"); 72 return false; 73 } 74 75 switch (v) { 76 case 2: 77 vgic_dist_base = gicv2_dist_base(); 78 write_eoir = gicv2_write_eoir; 79 break; 80 case 3: 81 vgic_dist_base = gicv3_dist_base(); 82 write_eoir = gicv3_write_eoir; 83 break; 84 } 85 86 irq_ready = false; 87 gic_enable_defaults(); 88 on_cpu_async(1, gic_secondary_entry, NULL); 89 90 cntfrq = get_cntfrq(); 91 printf("Timer Frequency %d Hz (Output in microseconds)\n", cntfrq); 92 93 return true; 94 } 95 96 static void gic_prep_common(void) 97 { 98 unsigned tries = 1 << 28; 99 100 while (!irq_ready && tries--) 101 cpu_relax(); 102 assert(irq_ready); 103 } 104 105 static bool ipi_prep(void) 106 { 107 u32 val; 108 109 val = readl(vgic_dist_base + GICD_CTLR); 110 if (readl(vgic_dist_base + GICD_TYPER2) & GICD_TYPER2_nASSGIcap) { 111 /* nASSGIreq can be changed only when GICD is disabled */ 112 val &= ~GICD_CTLR_ENABLE_G1A; 113 val &= ~GICD_CTLR_nASSGIreq; 114 writel(val, vgic_dist_base + GICD_CTLR); 115 gicv3_dist_wait_for_rwp(); 116 117 val |= GICD_CTLR_ENABLE_G1A; 118 writel(val, vgic_dist_base + GICD_CTLR); 119 gicv3_dist_wait_for_rwp(); 120 } 121 122 nr_ipi_received = 0; 123 gic_prep_common(); 124 return true; 125 } 126 127 static bool ipi_hw_prep(void) 128 { 129 u32 val; 130 131 val = readl(vgic_dist_base + GICD_CTLR); 132 if (readl(vgic_dist_base + GICD_TYPER2) & GICD_TYPER2_nASSGIcap) { 133 /* nASSGIreq can be changed only when GICD is disabled */ 134 val &= ~GICD_CTLR_ENABLE_G1A; 135 val |= GICD_CTLR_nASSGIreq; 136 writel(val, vgic_dist_base + GICD_CTLR); 137 gicv3_dist_wait_for_rwp(); 138 139 val |= GICD_CTLR_ENABLE_G1A; 140 writel(val, vgic_dist_base + GICD_CTLR); 141 gicv3_dist_wait_for_rwp(); 142 } else { 143 return false; 144 } 145 146 nr_ipi_received = 0; 147 gic_prep_common(); 148 return true; 149 } 150 151 static void ipi_exec(void) 152 { 153 unsigned tries = 1 << 28; 154 155 irq_received = false; 156 157 gic_ipi_send_single(1, 1); 158 159 while (!irq_received && tries--) 160 cpu_relax(); 161 162 if (irq_received) 163 ++nr_ipi_received; 164 165 assert_msg(irq_received, "failed to receive IPI in time, but received %d successfully\n", nr_ipi_received); 166 } 167 168 static bool lpi_prep(void) 169 { 170 struct its_collection *col1; 171 struct its_device *dev2; 172 173 if (!gicv3_its_base()) 174 return false; 175 176 its_enable_defaults(); 177 dev2 = its_create_device(2 /* dev id */, 8 /* nb_ites */); 178 col1 = its_create_collection(1 /* col id */, 1 /* target PE */); 179 gicv3_lpi_set_config(8199, LPI_PROP_DEFAULT); 180 181 its_send_mapd_nv(dev2, true); 182 its_send_mapc_nv(col1, true); 183 its_send_invall_nv(col1); 184 its_send_mapti_nv(dev2, 8199 /* lpi id */, 20 /* event id */, col1); 185 186 gic_prep_common(); 187 return true; 188 } 189 190 static void lpi_exec(void) 191 { 192 struct its_device *dev2; 193 unsigned tries = 1 << 28; 194 static int received = 0; 195 196 irq_received = false; 197 198 dev2 = its_get_device(2); 199 its_send_int_nv(dev2, 20); 200 201 while (!irq_received && tries--) 202 cpu_relax(); 203 204 if (irq_received) 205 ++received; 206 207 assert_msg(irq_received, "failed to receive LPI in time, but received %d successfully\n", received); 208 } 209 210 static bool timer_prep(void) 211 { 212 void *gic_isenabler; 213 214 gic_enable_defaults(); 215 install_irq_handler(EL1H_IRQ, gic_irq_handler); 216 local_irq_enable(); 217 218 switch (gic_version()) { 219 case 2: 220 gic_isenabler = gicv2_dist_base() + GICD_ISENABLER; 221 break; 222 case 3: 223 gic_isenabler = gicv3_sgi_base() + GICR_ISENABLER0; 224 break; 225 default: 226 assert_msg(0, "Unreachable"); 227 } 228 229 writel(1 << PPI(TIMER_VTIMER_IRQ), gic_isenabler); 230 write_sysreg(ARCH_TIMER_CTL_IMASK | ARCH_TIMER_CTL_ENABLE, cntv_ctl_el0); 231 isb(); 232 233 gic_prep_common(); 234 return true; 235 } 236 237 static void timer_exec(void) 238 { 239 u64 before_timer; 240 u64 timer_10ms; 241 unsigned tries = 1 << 28; 242 static int received = 0; 243 244 irq_received = false; 245 246 before_timer = read_sysreg(cntvct_el0); 247 timer_10ms = cntfrq / 100; 248 write_sysreg(before_timer + timer_10ms, cntv_cval_el0); 249 write_sysreg(ARCH_TIMER_CTL_ENABLE, cntv_ctl_el0); 250 isb(); 251 252 while (!irq_received && tries--) 253 cpu_relax(); 254 255 if (irq_received) 256 ++received; 257 258 assert_msg(irq_received, "failed to receive PPI in time, but received %d successfully\n", received); 259 } 260 261 static void timer_post(uint64_t ntimes, uint64_t *total_ticks) 262 { 263 /* 264 * We use a 10msec timer to test the latency of PPI, 265 * so we substract the ticks of 10msec to get the 266 * actual latency 267 */ 268 *total_ticks -= ntimes * (cntfrq / 100); 269 } 270 271 static void hvc_exec(void) 272 { 273 asm volatile("mov w0, #0x4b000000; hvc #0" ::: "w0"); 274 } 275 276 static void *userspace_emulated_addr; 277 278 static bool mmio_read_user_prep(void) 279 { 280 /* 281 * FIXME: Read device-id in virtio mmio here in order to 282 * force an exit to userspace. This address needs to be 283 * updated in the future if any relevant changes in QEMU 284 * test-dev are made. 285 */ 286 userspace_emulated_addr = (void*)ioremap(0x0a000008, sizeof(u32)); 287 return true; 288 } 289 290 static void mmio_read_user_exec(void) 291 { 292 readl(userspace_emulated_addr); 293 } 294 295 static void mmio_read_vgic_exec(void) 296 { 297 readl(vgic_dist_base + GICD_IIDR); 298 } 299 300 static void eoi_exec(void) 301 { 302 int spurious_id = 1023; /* writes to EOI are ignored */ 303 304 /* Avoid measuring assert(..) in gic_write_eoir */ 305 write_eoir(spurious_id); 306 } 307 308 struct exit_test { 309 const char *name; 310 bool (*prep)(void); 311 void (*exec)(void); 312 void (*post)(uint64_t ntimes, uint64_t *total_ticks); 313 u32 times; 314 bool run; 315 }; 316 317 static struct exit_test tests[] = { 318 {"hvc", NULL, hvc_exec, NULL, 65536, true}, 319 {"mmio_read_user", mmio_read_user_prep, mmio_read_user_exec, NULL, 65536, true}, 320 {"mmio_read_vgic", NULL, mmio_read_vgic_exec, NULL, 65536, true}, 321 {"eoi", NULL, eoi_exec, NULL, 65536, true}, 322 {"ipi", ipi_prep, ipi_exec, NULL, 65536, true}, 323 {"ipi_hw", ipi_hw_prep, ipi_exec, NULL, 65536, true}, 324 {"lpi", lpi_prep, lpi_exec, NULL, 65536, true}, 325 {"timer_10ms", timer_prep, timer_exec, timer_post, 256, true}, 326 }; 327 328 struct ns_time { 329 uint64_t ns; 330 uint64_t ns_frac; 331 }; 332 333 #define PS_PER_SEC (1000 * 1000 * 1000 * 1000UL) 334 static void ticks_to_ns_time(uint64_t ticks, struct ns_time *ns_time) 335 { 336 uint64_t ps_per_tick = PS_PER_SEC / cntfrq + !!(PS_PER_SEC % cntfrq); 337 uint64_t ps; 338 339 ps = ticks * ps_per_tick; 340 ns_time->ns = ps / 1000; 341 ns_time->ns_frac = (ps % 1000) / 100; 342 } 343 344 static void loop_test(struct exit_test *test) 345 { 346 uint64_t start, end, total_ticks, ntimes = 0; 347 struct ns_time avg_ns, total_ns = {}; 348 349 total_ticks = 0; 350 if (test->prep) { 351 if(!test->prep()) { 352 printf("%s test skipped\n", test->name); 353 return; 354 } 355 } 356 357 while (ntimes < test->times && total_ns.ns < NS_5_SECONDS) { 358 isb(); 359 start = read_sysreg(cntpct_el0); 360 test->exec(); 361 isb(); 362 end = read_sysreg(cntpct_el0); 363 364 ntimes++; 365 total_ticks += (end - start); 366 ticks_to_ns_time(total_ticks, &total_ns); 367 } 368 369 if (test->post) { 370 test->post(ntimes, &total_ticks); 371 ticks_to_ns_time(total_ticks, &total_ns); 372 } 373 374 avg_ns.ns = total_ns.ns / ntimes; 375 avg_ns.ns_frac = total_ns.ns_frac / ntimes; 376 377 printf("%-30s%15" PRId64 ".%-15" PRId64 "%15" PRId64 ".%-15" PRId64 "\n", 378 test->name, total_ns.ns, total_ns.ns_frac, avg_ns.ns, avg_ns.ns_frac); 379 } 380 381 int main(int argc, char **argv) 382 { 383 int i; 384 385 if (!test_init()) 386 return 1; 387 388 printf("\n%-30s%18s%13s%18s%13s\n", "name", "total ns", "", "avg ns", ""); 389 for (i = 0 ; i < 92; ++i) 390 printf("%c", '-'); 391 printf("\n"); 392 for (i = 0; i < ARRAY_SIZE(tests); i++) { 393 if (!tests[i].run) 394 continue; 395 assert(tests[i].name && tests[i].exec); 396 loop_test(&tests[i]); 397 } 398 399 return 0; 400 } 401