1 /* 2 * Measure the cost of micro level operations. 3 * 4 * This test provides support for quantifying the cost of micro level 5 * operations. To improve precision in the measurements, one should 6 * consider pinning each VCPU to a specific physical CPU (PCPU) and to 7 * ensure no other task could run on that PCPU to skew the results. 8 * This can be achieved by enabling QMP server in the QEMU command in 9 * unittest.cfg for micro-bench, allowing a client program to get the 10 * thread_id for each VCPU thread from the QMP server. Based on that 11 * information, the client program can then pin the corresponding VCPUs to 12 * dedicated PCPUs and isolate interrupts and tasks from those PCPUs. 13 * 14 * Copyright Columbia University 15 * Author: Shih-Wei Li <shihwei@cs.columbia.edu> 16 * Author: Christoffer Dall <cdall@cs.columbia.edu> 17 * Author: Andrew Jones <drjones@redhat.com> 18 * 19 * This work is licensed under the terms of the GNU LGPL, version 2. 20 */ 21 #include <libcflat.h> 22 #include <asm/gic.h> 23 #include <asm/gic-v3-its.h> 24 25 #define NTIMES (1U << 16) 26 27 static u32 cntfrq; 28 29 static volatile bool irq_ready, irq_received; 30 static int nr_ipi_received; 31 32 static void *vgic_dist_base; 33 static void (*write_eoir)(u32 irqstat); 34 35 static void gic_irq_handler(struct pt_regs *regs) 36 { 37 irq_ready = false; 38 irq_received = true; 39 gic_write_eoir(gic_read_iar()); 40 irq_ready = true; 41 } 42 43 static void gic_secondary_entry(void *data) 44 { 45 install_irq_handler(EL1H_IRQ, gic_irq_handler); 46 gic_enable_defaults(); 47 local_irq_enable(); 48 irq_ready = true; 49 while (true) 50 cpu_relax(); 51 } 52 53 static bool test_init(void) 54 { 55 int v = gic_init(); 56 57 if (!v) { 58 printf("No supported gic present, skipping tests...\n"); 59 return false; 60 } 61 62 if (nr_cpus < 2) { 63 printf("At least two cpus required, skipping tests...\n"); 64 return false; 65 } 66 67 switch (v) { 68 case 2: 69 vgic_dist_base = gicv2_dist_base(); 70 write_eoir = gicv2_write_eoir; 71 break; 72 case 3: 73 vgic_dist_base = gicv3_dist_base(); 74 write_eoir = gicv3_write_eoir; 75 break; 76 } 77 78 irq_ready = false; 79 gic_enable_defaults(); 80 on_cpu_async(1, gic_secondary_entry, NULL); 81 82 cntfrq = get_cntfrq(); 83 printf("Timer Frequency %d Hz (Output in microseconds)\n", cntfrq); 84 85 return true; 86 } 87 88 static void gic_prep_common(void) 89 { 90 unsigned tries = 1 << 28; 91 92 while (!irq_ready && tries--) 93 cpu_relax(); 94 assert(irq_ready); 95 } 96 97 static bool ipi_prep(void) 98 { 99 u32 val; 100 101 val = readl(vgic_dist_base + GICD_CTLR); 102 if (readl(vgic_dist_base + GICD_TYPER2) & GICD_TYPER2_nASSGIcap) { 103 /* nASSGIreq can be changed only when GICD is disabled */ 104 val &= ~GICD_CTLR_ENABLE_G1A; 105 val &= ~GICD_CTLR_nASSGIreq; 106 writel(val, vgic_dist_base + GICD_CTLR); 107 gicv3_dist_wait_for_rwp(); 108 109 val |= GICD_CTLR_ENABLE_G1A; 110 writel(val, vgic_dist_base + GICD_CTLR); 111 gicv3_dist_wait_for_rwp(); 112 } 113 114 nr_ipi_received = 0; 115 gic_prep_common(); 116 return true; 117 } 118 119 static bool ipi_hw_prep(void) 120 { 121 u32 val; 122 123 val = readl(vgic_dist_base + GICD_CTLR); 124 if (readl(vgic_dist_base + GICD_TYPER2) & GICD_TYPER2_nASSGIcap) { 125 /* nASSGIreq can be changed only when GICD is disabled */ 126 val &= ~GICD_CTLR_ENABLE_G1A; 127 val |= GICD_CTLR_nASSGIreq; 128 writel(val, vgic_dist_base + GICD_CTLR); 129 gicv3_dist_wait_for_rwp(); 130 131 val |= GICD_CTLR_ENABLE_G1A; 132 writel(val, vgic_dist_base + GICD_CTLR); 133 gicv3_dist_wait_for_rwp(); 134 } else { 135 return false; 136 } 137 138 nr_ipi_received = 0; 139 gic_prep_common(); 140 return true; 141 } 142 143 static void ipi_exec(void) 144 { 145 unsigned tries = 1 << 28; 146 147 irq_received = false; 148 149 gic_ipi_send_single(1, 1); 150 151 while (!irq_received && tries--) 152 cpu_relax(); 153 154 if (irq_received) 155 ++nr_ipi_received; 156 157 assert_msg(irq_received, "failed to receive IPI in time, but received %d successfully\n", nr_ipi_received); 158 } 159 160 static bool lpi_prep(void) 161 { 162 struct its_collection *col1; 163 struct its_device *dev2; 164 165 if (!gicv3_its_base()) 166 return false; 167 168 its_enable_defaults(); 169 dev2 = its_create_device(2 /* dev id */, 8 /* nb_ites */); 170 col1 = its_create_collection(1 /* col id */, 1 /* target PE */); 171 gicv3_lpi_set_config(8199, LPI_PROP_DEFAULT); 172 173 its_send_mapd_nv(dev2, true); 174 its_send_mapc_nv(col1, true); 175 its_send_invall_nv(col1); 176 its_send_mapti_nv(dev2, 8199 /* lpi id */, 20 /* event id */, col1); 177 178 gic_prep_common(); 179 return true; 180 } 181 182 static void lpi_exec(void) 183 { 184 struct its_device *dev2; 185 unsigned tries = 1 << 28; 186 static int received = 0; 187 188 irq_received = false; 189 190 dev2 = its_get_device(2); 191 its_send_int_nv(dev2, 20); 192 193 while (!irq_received && tries--) 194 cpu_relax(); 195 196 if (irq_received) 197 ++received; 198 199 assert_msg(irq_received, "failed to receive LPI in time, but received %d successfully\n", received); 200 } 201 202 static void hvc_exec(void) 203 { 204 asm volatile("mov w0, #0x4b000000; hvc #0" ::: "w0"); 205 } 206 207 static void mmio_read_user_exec(void) 208 { 209 /* 210 * FIXME: Read device-id in virtio mmio here in order to 211 * force an exit to userspace. This address needs to be 212 * updated in the future if any relevant changes in QEMU 213 * test-dev are made. 214 */ 215 void *userspace_emulated_addr = (void*)0x0a000008; 216 217 readl(userspace_emulated_addr); 218 } 219 220 static void mmio_read_vgic_exec(void) 221 { 222 readl(vgic_dist_base + GICD_IIDR); 223 } 224 225 static void eoi_exec(void) 226 { 227 int spurious_id = 1023; /* writes to EOI are ignored */ 228 229 /* Avoid measuring assert(..) in gic_write_eoir */ 230 write_eoir(spurious_id); 231 } 232 233 struct exit_test { 234 const char *name; 235 bool (*prep)(void); 236 void (*exec)(void); 237 bool run; 238 }; 239 240 static struct exit_test tests[] = { 241 {"hvc", NULL, hvc_exec, true}, 242 {"mmio_read_user", NULL, mmio_read_user_exec, true}, 243 {"mmio_read_vgic", NULL, mmio_read_vgic_exec, true}, 244 {"eoi", NULL, eoi_exec, true}, 245 {"ipi", ipi_prep, ipi_exec, true}, 246 {"ipi_hw", ipi_hw_prep, ipi_exec, true}, 247 {"lpi", lpi_prep, lpi_exec, true}, 248 }; 249 250 struct ns_time { 251 uint64_t ns; 252 uint64_t ns_frac; 253 }; 254 255 #define PS_PER_SEC (1000 * 1000 * 1000 * 1000UL) 256 static void ticks_to_ns_time(uint64_t ticks, struct ns_time *ns_time) 257 { 258 uint64_t ps_per_tick = PS_PER_SEC / cntfrq + !!(PS_PER_SEC % cntfrq); 259 uint64_t ps; 260 261 ps = ticks * ps_per_tick; 262 ns_time->ns = ps / 1000; 263 ns_time->ns_frac = (ps % 1000) / 100; 264 } 265 266 static void loop_test(struct exit_test *test) 267 { 268 uint64_t start, end, total_ticks, ntimes = NTIMES; 269 struct ns_time total_ns, avg_ns; 270 271 if (test->prep) { 272 if(!test->prep()) { 273 printf("%s test skipped\n", test->name); 274 return; 275 } 276 } 277 isb(); 278 start = read_sysreg(cntpct_el0); 279 while (ntimes--) 280 test->exec(); 281 isb(); 282 end = read_sysreg(cntpct_el0); 283 284 total_ticks = end - start; 285 ticks_to_ns_time(total_ticks, &total_ns); 286 avg_ns.ns = total_ns.ns / NTIMES; 287 avg_ns.ns_frac = total_ns.ns_frac / NTIMES; 288 289 printf("%-30s%15" PRId64 ".%-15" PRId64 "%15" PRId64 ".%-15" PRId64 "\n", 290 test->name, total_ns.ns, total_ns.ns_frac, avg_ns.ns, avg_ns.ns_frac); 291 } 292 293 int main(int argc, char **argv) 294 { 295 int i; 296 297 if (!test_init()) 298 return 1; 299 300 printf("\n%-30s%18s%13s%18s%13s\n", "name", "total ns", "", "avg ns", ""); 301 for (i = 0 ; i < 92; ++i) 302 printf("%c", '-'); 303 printf("\n"); 304 for (i = 0; i < ARRAY_SIZE(tests); i++) { 305 if (!tests[i].run) 306 continue; 307 assert(tests[i].name && tests[i].exec); 308 loop_test(&tests[i]); 309 } 310 311 return 0; 312 } 313