1 2 #include "x86/msr.h" 3 #include "x86/processor.h" 4 #include "x86/apic-defs.h" 5 #include "x86/apic.h" 6 #include "x86/desc.h" 7 #include "x86/isr.h" 8 #include "alloc.h" 9 10 #include "libcflat.h" 11 #include <stdint.h> 12 13 #define FIXED_CNT_INDEX 32 14 #define PC_VECTOR 32 15 16 #define EVNSEL_EVENT_SHIFT 0 17 #define EVNTSEL_UMASK_SHIFT 8 18 #define EVNTSEL_USR_SHIFT 16 19 #define EVNTSEL_OS_SHIFT 17 20 #define EVNTSEL_EDGE_SHIFT 18 21 #define EVNTSEL_PC_SHIFT 19 22 #define EVNTSEL_INT_SHIFT 20 23 #define EVNTSEL_EN_SHIF 22 24 #define EVNTSEL_INV_SHIF 23 25 #define EVNTSEL_CMASK_SHIFT 24 26 27 #define EVNTSEL_EN (1 << EVNTSEL_EN_SHIF) 28 #define EVNTSEL_USR (1 << EVNTSEL_USR_SHIFT) 29 #define EVNTSEL_OS (1 << EVNTSEL_OS_SHIFT) 30 #define EVNTSEL_PC (1 << EVNTSEL_PC_SHIFT) 31 #define EVNTSEL_INT (1 << EVNTSEL_INT_SHIFT) 32 #define EVNTSEL_INV (1 << EVNTSEL_INV_SHIF) 33 34 #define N 1000000 35 36 typedef struct { 37 uint32_t ctr; 38 uint32_t config; 39 uint64_t count; 40 int idx; 41 } pmu_counter_t; 42 43 union cpuid10_eax { 44 struct { 45 unsigned int version_id:8; 46 unsigned int num_counters:8; 47 unsigned int bit_width:8; 48 unsigned int mask_length:8; 49 } split; 50 unsigned int full; 51 } eax; 52 53 union cpuid10_ebx { 54 struct { 55 unsigned int no_unhalted_core_cycles:1; 56 unsigned int no_instructions_retired:1; 57 unsigned int no_unhalted_reference_cycles:1; 58 unsigned int no_llc_reference:1; 59 unsigned int no_llc_misses:1; 60 unsigned int no_branch_instruction_retired:1; 61 unsigned int no_branch_misses_retired:1; 62 } split; 63 unsigned int full; 64 } ebx; 65 66 union cpuid10_edx { 67 struct { 68 unsigned int num_counters_fixed:5; 69 unsigned int bit_width_fixed:8; 70 unsigned int reserved:19; 71 } split; 72 unsigned int full; 73 } edx; 74 75 struct pmu_event { 76 const char *name; 77 uint32_t unit_sel; 78 int min; 79 int max; 80 } gp_events[] = { 81 {"core cycles", 0x003c, 1*N, 50*N}, 82 {"instructions", 0x00c0, 10*N, 10.2*N}, 83 {"ref cycles", 0x013c, 0.1*N, 30*N}, 84 {"llc refference", 0x4f2e, 1, 2*N}, 85 {"llc misses", 0x412e, 1, 1*N}, 86 {"branches", 0x00c4, 1*N, 1.1*N}, 87 {"branch misses", 0x00c5, 0, 0.1*N}, 88 }, fixed_events[] = { 89 {"fixed 1", MSR_CORE_PERF_FIXED_CTR0, 10*N, 10.2*N}, 90 {"fixed 2", MSR_CORE_PERF_FIXED_CTR0 + 1, 1*N, 30*N}, 91 {"fixed 3", MSR_CORE_PERF_FIXED_CTR0 + 2, 0.1*N, 30*N} 92 }; 93 94 static int num_counters; 95 96 char *buf; 97 98 static inline void loop(void) 99 { 100 unsigned long tmp, tmp2, tmp3; 101 102 asm volatile("1: mov (%1), %2; add $64, %1; nop; nop; nop; nop; nop; nop; nop; loop 1b" 103 : "=c"(tmp), "=r"(tmp2), "=r"(tmp3): "0"(N), "1"(buf)); 104 105 } 106 107 volatile uint64_t irq_received; 108 109 static void cnt_overflow(isr_regs_t *regs) 110 { 111 irq_received++; 112 apic_write(APIC_EOI, 0); 113 } 114 115 static bool check_irq(void) 116 { 117 int i; 118 irq_received = 0; 119 irq_enable(); 120 for (i = 0; i < 100000 && !irq_received; i++) 121 asm volatile("pause"); 122 irq_disable(); 123 return irq_received; 124 } 125 126 static bool is_gp(pmu_counter_t *evt) 127 { 128 return evt->ctr < MSR_CORE_PERF_FIXED_CTR0; 129 } 130 131 static int event_to_global_idx(pmu_counter_t *cnt) 132 { 133 return cnt->ctr - (is_gp(cnt) ? MSR_IA32_PERFCTR0 : 134 (MSR_CORE_PERF_FIXED_CTR0 - FIXED_CNT_INDEX)); 135 } 136 137 static struct pmu_event* get_counter_event(pmu_counter_t *cnt) 138 { 139 if (is_gp(cnt)) { 140 int i; 141 142 for (i = 0; i < sizeof(gp_events)/sizeof(gp_events[0]); i++) 143 if (gp_events[i].unit_sel == (cnt->config & 0xffff)) 144 return &gp_events[i]; 145 } else 146 return &fixed_events[cnt->ctr - MSR_CORE_PERF_FIXED_CTR0]; 147 148 return (void*)0; 149 } 150 151 static void global_enable(pmu_counter_t *cnt) 152 { 153 cnt->idx = event_to_global_idx(cnt); 154 155 wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, rdmsr(MSR_CORE_PERF_GLOBAL_CTRL) | 156 (1ull << cnt->idx)); 157 } 158 159 static void global_disable(pmu_counter_t *cnt) 160 { 161 wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, rdmsr(MSR_CORE_PERF_GLOBAL_CTRL) & 162 ~(1ull << cnt->idx)); 163 } 164 165 166 static void start_event(pmu_counter_t *evt) 167 { 168 wrmsr(evt->ctr, evt->count); 169 if (is_gp(evt)) 170 wrmsr(MSR_P6_EVNTSEL0 + event_to_global_idx(evt), 171 evt->config | EVNTSEL_EN); 172 else { 173 uint32_t ctrl = rdmsr(MSR_CORE_PERF_FIXED_CTR_CTRL); 174 int shift = (evt->ctr - MSR_CORE_PERF_FIXED_CTR0) * 4; 175 uint32_t usrospmi = 0; 176 177 if (evt->config & EVNTSEL_OS) 178 usrospmi |= (1 << 0); 179 if (evt->config & EVNTSEL_USR) 180 usrospmi |= (1 << 1); 181 if (evt->config & EVNTSEL_INT) 182 usrospmi |= (1 << 3); // PMI on overflow 183 ctrl = (ctrl & ~(0xf << shift)) | (usrospmi << shift); 184 wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, ctrl); 185 } 186 global_enable(evt); 187 apic_write(APIC_LVTPC, PC_VECTOR); 188 } 189 190 static void stop_event(pmu_counter_t *evt) 191 { 192 global_disable(evt); 193 if (is_gp(evt)) 194 wrmsr(MSR_P6_EVNTSEL0 + event_to_global_idx(evt), 195 evt->config & ~EVNTSEL_EN); 196 else { 197 uint32_t ctrl = rdmsr(MSR_CORE_PERF_FIXED_CTR_CTRL); 198 int shift = (evt->ctr - MSR_CORE_PERF_FIXED_CTR0) * 4; 199 wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, ctrl & ~(0xf << shift)); 200 } 201 evt->count = rdmsr(evt->ctr); 202 } 203 204 static void measure(pmu_counter_t *evt, int count) 205 { 206 int i; 207 for (i = 0; i < count; i++) 208 start_event(&evt[i]); 209 loop(); 210 for (i = 0; i < count; i++) 211 stop_event(&evt[i]); 212 } 213 214 static bool verify_event(uint64_t count, struct pmu_event *e) 215 { 216 // printf("%lld >= %lld <= %lld\n", e->min, count, e->max); 217 return count >= e->min && count <= e->max; 218 219 } 220 221 static bool verify_counter(pmu_counter_t *cnt) 222 { 223 return verify_event(cnt->count, get_counter_event(cnt)); 224 } 225 226 static void check_gp_counter(struct pmu_event *evt) 227 { 228 pmu_counter_t cnt = { 229 .ctr = MSR_IA32_PERFCTR0, 230 .config = EVNTSEL_OS | EVNTSEL_USR | evt->unit_sel, 231 }; 232 int i; 233 234 for (i = 0; i < num_counters; i++, cnt.ctr++) { 235 cnt.count = 0; 236 measure(&cnt, 1); 237 report(verify_event(cnt.count, evt), "%s-%d", evt->name, i); 238 } 239 } 240 241 static void check_gp_counters(void) 242 { 243 int i; 244 245 for (i = 0; i < sizeof(gp_events)/sizeof(gp_events[0]); i++) 246 if (!(ebx.full & (1 << i))) 247 check_gp_counter(&gp_events[i]); 248 else 249 printf("GP event '%s' is disabled\n", 250 gp_events[i].name); 251 } 252 253 static void check_fixed_counters(void) 254 { 255 pmu_counter_t cnt = { 256 .config = EVNTSEL_OS | EVNTSEL_USR, 257 }; 258 int i; 259 260 for (i = 0; i < edx.split.num_counters_fixed; i++) { 261 cnt.count = 0; 262 cnt.ctr = fixed_events[i].unit_sel; 263 measure(&cnt, 1); 264 report(verify_event(cnt.count, &fixed_events[i]), "fixed-%d", 265 i); 266 } 267 } 268 269 static void check_counters_many(void) 270 { 271 pmu_counter_t cnt[10]; 272 int i, n; 273 274 for (i = 0, n = 0; n < num_counters; i++) { 275 if (ebx.full & (1 << i)) 276 continue; 277 278 cnt[n].count = 0; 279 cnt[n].ctr = MSR_IA32_PERFCTR0 + n; 280 cnt[n].config = EVNTSEL_OS | EVNTSEL_USR | 281 gp_events[i % ARRAY_SIZE(gp_events)].unit_sel; 282 n++; 283 } 284 for (i = 0; i < edx.split.num_counters_fixed; i++) { 285 cnt[n].count = 0; 286 cnt[n].ctr = fixed_events[i].unit_sel; 287 cnt[n].config = EVNTSEL_OS | EVNTSEL_USR; 288 n++; 289 } 290 291 measure(cnt, n); 292 293 for (i = 0; i < n; i++) 294 if (!verify_counter(&cnt[i])) 295 break; 296 297 report(i == n, "all counters"); 298 } 299 300 static void check_counter_overflow(void) 301 { 302 uint64_t count; 303 int i; 304 pmu_counter_t cnt = { 305 .ctr = MSR_IA32_PERFCTR0, 306 .config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel /* instructions */, 307 .count = 0, 308 }; 309 measure(&cnt, 1); 310 count = cnt.count; 311 312 /* clear status before test */ 313 wrmsr(MSR_CORE_PERF_GLOBAL_OVF_CTRL, rdmsr(MSR_CORE_PERF_GLOBAL_STATUS)); 314 315 report_prefix_push("overflow"); 316 317 for (i = 0; i < num_counters + 1; i++, cnt.ctr++) { 318 uint64_t status; 319 int idx; 320 321 cnt.count = 1 - count; 322 323 if (i == num_counters) { 324 cnt.ctr = fixed_events[0].unit_sel; 325 cnt.count &= (1ul << edx.split.bit_width_fixed) - 1; 326 } 327 328 if (i % 2) 329 cnt.config |= EVNTSEL_INT; 330 else 331 cnt.config &= ~EVNTSEL_INT; 332 idx = event_to_global_idx(&cnt); 333 measure(&cnt, 1); 334 report(cnt.count == 1, "cntr-%d", i); 335 status = rdmsr(MSR_CORE_PERF_GLOBAL_STATUS); 336 report(status & (1ull << idx), "status-%d", i); 337 wrmsr(MSR_CORE_PERF_GLOBAL_OVF_CTRL, status); 338 status = rdmsr(MSR_CORE_PERF_GLOBAL_STATUS); 339 report(!(status & (1ull << idx)), "status clear-%d", i); 340 report(check_irq() == (i % 2), "irq-%d", i); 341 } 342 343 report_prefix_pop(); 344 } 345 346 static void check_gp_counter_cmask(void) 347 { 348 pmu_counter_t cnt = { 349 .ctr = MSR_IA32_PERFCTR0, 350 .config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel /* instructions */, 351 .count = 0, 352 }; 353 cnt.config |= (0x2 << EVNTSEL_CMASK_SHIFT); 354 measure(&cnt, 1); 355 report(cnt.count < gp_events[1].min, "cmask"); 356 } 357 358 static void do_rdpmc_fast(void *ptr) 359 { 360 pmu_counter_t *cnt = ptr; 361 uint32_t idx = (uint32_t)cnt->idx | (1u << 31); 362 363 if (!is_gp(cnt)) 364 idx |= 1 << 30; 365 366 cnt->count = rdpmc(idx); 367 } 368 369 370 static void check_rdpmc(void) 371 { 372 uint64_t val = 0x1f3456789ull; 373 bool exc; 374 int i; 375 376 report_prefix_push("rdpmc"); 377 378 for (i = 0; i < num_counters; i++) { 379 uint64_t x; 380 pmu_counter_t cnt = { 381 .ctr = MSR_IA32_PERFCTR0 + i, 382 .idx = i 383 }; 384 385 /* 386 * Only the low 32 bits are writable, and the value is 387 * sign-extended. 388 */ 389 x = (uint64_t)(int64_t)(int32_t)val; 390 391 /* Mask according to the number of supported bits */ 392 x &= (1ull << eax.split.bit_width) - 1; 393 394 wrmsr(MSR_IA32_PERFCTR0 + i, val); 395 report(rdpmc(i) == x, "cntr-%d", i); 396 397 exc = test_for_exception(GP_VECTOR, do_rdpmc_fast, &cnt); 398 if (exc) 399 report_skip("fast-%d", i); 400 else 401 report(cnt.count == (u32)val, "fast-%d", i); 402 } 403 for (i = 0; i < edx.split.num_counters_fixed; i++) { 404 uint64_t x = val & ((1ull << edx.split.bit_width_fixed) - 1); 405 pmu_counter_t cnt = { 406 .ctr = MSR_CORE_PERF_FIXED_CTR0 + i, 407 .idx = i 408 }; 409 410 wrmsr(MSR_CORE_PERF_FIXED_CTR0 + i, x); 411 report(rdpmc(i | (1 << 30)) == x, "fixed cntr-%d", i); 412 413 exc = test_for_exception(GP_VECTOR, do_rdpmc_fast, &cnt); 414 if (exc) 415 report_skip("fixed fast-%d", i); 416 else 417 report(cnt.count == (u32)x, "fixed fast-%d", i); 418 } 419 420 report_prefix_pop(); 421 } 422 423 static void check_running_counter_wrmsr(void) 424 { 425 uint64_t status; 426 pmu_counter_t evt = { 427 .ctr = MSR_IA32_PERFCTR0, 428 .config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel, 429 .count = 0, 430 }; 431 432 report_prefix_push("running counter wrmsr"); 433 434 start_event(&evt); 435 loop(); 436 wrmsr(MSR_IA32_PERFCTR0, 0); 437 stop_event(&evt); 438 report(evt.count < gp_events[1].min, "cntr"); 439 440 /* clear status before overflow test */ 441 wrmsr(MSR_CORE_PERF_GLOBAL_OVF_CTRL, 442 rdmsr(MSR_CORE_PERF_GLOBAL_STATUS)); 443 444 evt.count = 0; 445 start_event(&evt); 446 wrmsr(MSR_IA32_PERFCTR0, -1); 447 loop(); 448 stop_event(&evt); 449 status = rdmsr(MSR_CORE_PERF_GLOBAL_STATUS); 450 report(status & 1, "status"); 451 452 report_prefix_pop(); 453 } 454 455 int main(int ac, char **av) 456 { 457 struct cpuid id = cpuid(10); 458 459 setup_vm(); 460 setup_idt(); 461 handle_irq(PC_VECTOR, cnt_overflow); 462 buf = malloc(N*64); 463 464 eax.full = id.a; 465 ebx.full = id.b; 466 edx.full = id.d; 467 468 if (!eax.split.version_id) { 469 printf("No pmu is detected!\n"); 470 return report_summary(); 471 } 472 printf("PMU version: %d\n", eax.split.version_id); 473 printf("GP counters: %d\n", eax.split.num_counters); 474 printf("GP counter width: %d\n", eax.split.bit_width); 475 printf("Mask length: %d\n", eax.split.mask_length); 476 printf("Fixed counters: %d\n", edx.split.num_counters_fixed); 477 printf("Fixed counter width: %d\n", edx.split.bit_width_fixed); 478 479 num_counters = eax.split.num_counters; 480 481 apic_write(APIC_LVTPC, PC_VECTOR); 482 483 check_gp_counters(); 484 check_fixed_counters(); 485 check_rdpmc(); 486 check_counters_many(); 487 check_counter_overflow(); 488 check_gp_counter_cmask(); 489 check_running_counter_wrmsr(); 490 491 return report_summary(); 492 } 493