1 /* 2 * Measure the cost of micro level operations. 3 * 4 * This test provides support for quantifying the cost of micro level 5 * operations. To improve precision in the measurements, one should 6 * consider pinning each VCPU to a specific physical CPU (PCPU) and to 7 * ensure no other task could run on that PCPU to skew the results. 8 * This can be achieved by enabling QMP server in the QEMU command in 9 * unittest.cfg for micro-bench, allowing a client program to get the 10 * thread_id for each VCPU thread from the QMP server. Based on that 11 * information, the client program can then pin the corresponding VCPUs to 12 * dedicated PCPUs and isolate interrupts and tasks from those PCPUs. 13 * 14 * Copyright Columbia University 15 * Author: Shih-Wei Li <shihwei@cs.columbia.edu> 16 * Author: Christoffer Dall <cdall@cs.columbia.edu> 17 * Author: Andrew Jones <drjones@redhat.com> 18 * 19 * This work is licensed under the terms of the GNU LGPL, version 2. 20 */ 21 #include <libcflat.h> 22 #include <asm/gic.h> 23 #include <asm/gic-v3-its.h> 24 25 static u32 cntfrq; 26 27 static volatile bool irq_ready, irq_received; 28 static int nr_ipi_received; 29 30 static void *vgic_dist_base; 31 static void (*write_eoir)(u32 irqstat); 32 33 static void gic_irq_handler(struct pt_regs *regs) 34 { 35 irq_ready = false; 36 irq_received = true; 37 gic_write_eoir(gic_read_iar()); 38 irq_ready = true; 39 } 40 41 static void gic_secondary_entry(void *data) 42 { 43 install_irq_handler(EL1H_IRQ, gic_irq_handler); 44 gic_enable_defaults(); 45 local_irq_enable(); 46 irq_ready = true; 47 while (true) 48 cpu_relax(); 49 } 50 51 static bool test_init(void) 52 { 53 int v = gic_init(); 54 55 if (!v) { 56 printf("No supported gic present, skipping tests...\n"); 57 return false; 58 } 59 60 if (nr_cpus < 2) { 61 printf("At least two cpus required, skipping tests...\n"); 62 return false; 63 } 64 65 switch (v) { 66 case 2: 67 vgic_dist_base = gicv2_dist_base(); 68 write_eoir = gicv2_write_eoir; 69 break; 70 case 3: 71 vgic_dist_base = gicv3_dist_base(); 72 write_eoir = gicv3_write_eoir; 73 break; 74 } 75 76 irq_ready = false; 77 gic_enable_defaults(); 78 on_cpu_async(1, gic_secondary_entry, NULL); 79 80 cntfrq = get_cntfrq(); 81 printf("Timer Frequency %d Hz (Output in microseconds)\n", cntfrq); 82 83 return true; 84 } 85 86 static void gic_prep_common(void) 87 { 88 unsigned tries = 1 << 28; 89 90 while (!irq_ready && tries--) 91 cpu_relax(); 92 assert(irq_ready); 93 } 94 95 static bool ipi_prep(void) 96 { 97 u32 val; 98 99 val = readl(vgic_dist_base + GICD_CTLR); 100 if (readl(vgic_dist_base + GICD_TYPER2) & GICD_TYPER2_nASSGIcap) { 101 /* nASSGIreq can be changed only when GICD is disabled */ 102 val &= ~GICD_CTLR_ENABLE_G1A; 103 val &= ~GICD_CTLR_nASSGIreq; 104 writel(val, vgic_dist_base + GICD_CTLR); 105 gicv3_dist_wait_for_rwp(); 106 107 val |= GICD_CTLR_ENABLE_G1A; 108 writel(val, vgic_dist_base + GICD_CTLR); 109 gicv3_dist_wait_for_rwp(); 110 } 111 112 nr_ipi_received = 0; 113 gic_prep_common(); 114 return true; 115 } 116 117 static bool ipi_hw_prep(void) 118 { 119 u32 val; 120 121 val = readl(vgic_dist_base + GICD_CTLR); 122 if (readl(vgic_dist_base + GICD_TYPER2) & GICD_TYPER2_nASSGIcap) { 123 /* nASSGIreq can be changed only when GICD is disabled */ 124 val &= ~GICD_CTLR_ENABLE_G1A; 125 val |= GICD_CTLR_nASSGIreq; 126 writel(val, vgic_dist_base + GICD_CTLR); 127 gicv3_dist_wait_for_rwp(); 128 129 val |= GICD_CTLR_ENABLE_G1A; 130 writel(val, vgic_dist_base + GICD_CTLR); 131 gicv3_dist_wait_for_rwp(); 132 } else { 133 return false; 134 } 135 136 nr_ipi_received = 0; 137 gic_prep_common(); 138 return true; 139 } 140 141 static void ipi_exec(void) 142 { 143 unsigned tries = 1 << 28; 144 145 irq_received = false; 146 147 gic_ipi_send_single(1, 1); 148 149 while (!irq_received && tries--) 150 cpu_relax(); 151 152 if (irq_received) 153 ++nr_ipi_received; 154 155 assert_msg(irq_received, "failed to receive IPI in time, but received %d successfully\n", nr_ipi_received); 156 } 157 158 static bool lpi_prep(void) 159 { 160 struct its_collection *col1; 161 struct its_device *dev2; 162 163 if (!gicv3_its_base()) 164 return false; 165 166 its_enable_defaults(); 167 dev2 = its_create_device(2 /* dev id */, 8 /* nb_ites */); 168 col1 = its_create_collection(1 /* col id */, 1 /* target PE */); 169 gicv3_lpi_set_config(8199, LPI_PROP_DEFAULT); 170 171 its_send_mapd_nv(dev2, true); 172 its_send_mapc_nv(col1, true); 173 its_send_invall_nv(col1); 174 its_send_mapti_nv(dev2, 8199 /* lpi id */, 20 /* event id */, col1); 175 176 gic_prep_common(); 177 return true; 178 } 179 180 static void lpi_exec(void) 181 { 182 struct its_device *dev2; 183 unsigned tries = 1 << 28; 184 static int received = 0; 185 186 irq_received = false; 187 188 dev2 = its_get_device(2); 189 its_send_int_nv(dev2, 20); 190 191 while (!irq_received && tries--) 192 cpu_relax(); 193 194 if (irq_received) 195 ++received; 196 197 assert_msg(irq_received, "failed to receive LPI in time, but received %d successfully\n", received); 198 } 199 200 static void hvc_exec(void) 201 { 202 asm volatile("mov w0, #0x4b000000; hvc #0" ::: "w0"); 203 } 204 205 static void mmio_read_user_exec(void) 206 { 207 /* 208 * FIXME: Read device-id in virtio mmio here in order to 209 * force an exit to userspace. This address needs to be 210 * updated in the future if any relevant changes in QEMU 211 * test-dev are made. 212 */ 213 void *userspace_emulated_addr = (void*)0x0a000008; 214 215 readl(userspace_emulated_addr); 216 } 217 218 static void mmio_read_vgic_exec(void) 219 { 220 readl(vgic_dist_base + GICD_IIDR); 221 } 222 223 static void eoi_exec(void) 224 { 225 int spurious_id = 1023; /* writes to EOI are ignored */ 226 227 /* Avoid measuring assert(..) in gic_write_eoir */ 228 write_eoir(spurious_id); 229 } 230 231 struct exit_test { 232 const char *name; 233 bool (*prep)(void); 234 void (*exec)(void); 235 u32 times; 236 bool run; 237 }; 238 239 static struct exit_test tests[] = { 240 {"hvc", NULL, hvc_exec, 65536, true}, 241 {"mmio_read_user", NULL, mmio_read_user_exec, 65536, true}, 242 {"mmio_read_vgic", NULL, mmio_read_vgic_exec, 65536, true}, 243 {"eoi", NULL, eoi_exec, 65536, true}, 244 {"ipi", ipi_prep, ipi_exec, 65536, true}, 245 {"ipi_hw", ipi_hw_prep, ipi_exec, 65536, true}, 246 {"lpi", lpi_prep, lpi_exec, 65536, true}, 247 }; 248 249 struct ns_time { 250 uint64_t ns; 251 uint64_t ns_frac; 252 }; 253 254 #define PS_PER_SEC (1000 * 1000 * 1000 * 1000UL) 255 static void ticks_to_ns_time(uint64_t ticks, struct ns_time *ns_time) 256 { 257 uint64_t ps_per_tick = PS_PER_SEC / cntfrq + !!(PS_PER_SEC % cntfrq); 258 uint64_t ps; 259 260 ps = ticks * ps_per_tick; 261 ns_time->ns = ps / 1000; 262 ns_time->ns_frac = (ps % 1000) / 100; 263 } 264 265 static void loop_test(struct exit_test *test) 266 { 267 uint64_t start, end, total_ticks, ntimes = 0; 268 struct ns_time total_ns, avg_ns; 269 270 if (test->prep) { 271 if(!test->prep()) { 272 printf("%s test skipped\n", test->name); 273 return; 274 } 275 } 276 isb(); 277 start = read_sysreg(cntpct_el0); 278 while (ntimes < test->times) { 279 test->exec(); 280 ntimes++; 281 } 282 isb(); 283 end = read_sysreg(cntpct_el0); 284 285 total_ticks = end - start; 286 ticks_to_ns_time(total_ticks, &total_ns); 287 avg_ns.ns = total_ns.ns / ntimes; 288 avg_ns.ns_frac = total_ns.ns_frac / ntimes; 289 290 printf("%-30s%15" PRId64 ".%-15" PRId64 "%15" PRId64 ".%-15" PRId64 "\n", 291 test->name, total_ns.ns, total_ns.ns_frac, avg_ns.ns, avg_ns.ns_frac); 292 } 293 294 int main(int argc, char **argv) 295 { 296 int i; 297 298 if (!test_init()) 299 return 1; 300 301 printf("\n%-30s%18s%13s%18s%13s\n", "name", "total ns", "", "avg ns", ""); 302 for (i = 0 ; i < 92; ++i) 303 printf("%c", '-'); 304 printf("\n"); 305 for (i = 0; i < ARRAY_SIZE(tests); i++) { 306 if (!tests[i].run) 307 continue; 308 assert(tests[i].name && tests[i].exec); 309 loop_test(&tests[i]); 310 } 311 312 return 0; 313 } 314