1 2 #include "x86/msr.h" 3 #include "x86/processor.h" 4 #include "x86/pmu.h" 5 #include "x86/apic-defs.h" 6 #include "x86/apic.h" 7 #include "x86/desc.h" 8 #include "x86/isr.h" 9 #include "vmalloc.h" 10 #include "alloc.h" 11 12 #include "libcflat.h" 13 #include <stdint.h> 14 15 #define N 1000000 16 17 // These values match the number of instructions and branches in the 18 // assembly block in check_emulated_instr(). 19 #define EXPECTED_INSTR 17 20 #define EXPECTED_BRNCH 5 21 22 /* Enable GLOBAL_CTRL + disable GLOBAL_CTRL + clflush/mfence instructions */ 23 #define EXTRA_INSNS (3 + 3 +2) 24 #define LOOP_INSNS (N * 10 + EXTRA_INSNS) 25 #define LOOP_BRANCHES (N) 26 #define LOOP_ASM(_wrmsr, _clflush) \ 27 _wrmsr "\n\t" \ 28 "mov %%ecx, %%edi; mov %%ebx, %%ecx;\n\t" \ 29 _clflush "\n\t" \ 30 "mfence;\n\t" \ 31 "1: mov (%1), %2; add $64, %1;\n\t" \ 32 "nop; nop; nop; nop; nop; nop; nop;\n\t" \ 33 "loop 1b;\n\t" \ 34 "mov %%edi, %%ecx; xor %%eax, %%eax; xor %%edx, %%edx;\n\t" \ 35 _wrmsr "\n\t" 36 37 #define _loop_asm(_wrmsr, _clflush) \ 38 do { \ 39 asm volatile(LOOP_ASM(_wrmsr, _clflush) \ 40 : "=b"(tmp), "=r"(tmp2), "=r"(tmp3) \ 41 : "a"(eax), "d"(edx), "c"(global_ctl), \ 42 "0"(N), "1"(buf) \ 43 : "edi"); \ 44 } while (0) 45 46 typedef struct { 47 uint32_t ctr; 48 uint32_t idx; 49 uint64_t config; 50 uint64_t count; 51 } pmu_counter_t; 52 53 struct pmu_event { 54 const char *name; 55 uint32_t unit_sel; 56 int min; 57 int max; 58 } intel_gp_events[] = { 59 {"core cycles", 0x003c, 1*N, 50*N}, 60 {"instructions", 0x00c0, 10*N, 10.2*N}, 61 {"ref cycles", 0x013c, 1*N, 30*N}, 62 {"llc references", 0x4f2e, 1, 2*N}, 63 {"llc misses", 0x412e, 1, 1*N}, 64 {"branches", 0x00c4, 1*N, 1.1*N}, 65 {"branch misses", 0x00c5, 0, 0.1*N}, 66 }, amd_gp_events[] = { 67 {"core cycles", 0x0076, 1*N, 50*N}, 68 {"instructions", 0x00c0, 10*N, 10.2*N}, 69 {"branches", 0x00c2, 1*N, 1.1*N}, 70 {"branch misses", 0x00c3, 0, 0.1*N}, 71 }, fixed_events[] = { 72 {"fixed 0", MSR_CORE_PERF_FIXED_CTR0, 10*N, 10.2*N}, 73 {"fixed 1", MSR_CORE_PERF_FIXED_CTR0 + 1, 1*N, 30*N}, 74 {"fixed 2", MSR_CORE_PERF_FIXED_CTR0 + 2, 0.1*N, 30*N} 75 }; 76 77 /* 78 * Events index in intel_gp_events[], ensure consistent with 79 * intel_gp_events[]. 80 */ 81 enum { 82 INTEL_INSTRUCTIONS_IDX = 1, 83 INTEL_REF_CYCLES_IDX = 2, 84 INTEL_LLC_MISSES_IDX = 4, 85 INTEL_BRANCHES_IDX = 5, 86 }; 87 88 /* 89 * Events index in amd_gp_events[], ensure consistent with 90 * amd_gp_events[]. 91 */ 92 enum { 93 AMD_INSTRUCTIONS_IDX = 1, 94 AMD_BRANCHES_IDX = 2, 95 }; 96 97 char *buf; 98 99 static struct pmu_event *gp_events; 100 static unsigned int gp_events_size; 101 static unsigned int fixed_counters_num; 102 103 static inline void __loop(void) 104 { 105 unsigned long tmp, tmp2, tmp3; 106 u32 global_ctl = 0; 107 u32 eax = 0; 108 u32 edx = 0; 109 110 if (this_cpu_has(X86_FEATURE_CLFLUSH)) 111 _loop_asm("nop", "clflush (%1)"); 112 else 113 _loop_asm("nop", "nop"); 114 } 115 116 /* 117 * Enable and disable counters in a whole asm blob to ensure 118 * no other instructions are counted in the window between 119 * counters enabling and really LOOP_ASM code executing. 120 * Thus counters can verify instructions and branches events 121 * against precise counts instead of a rough valid count range. 122 */ 123 static inline void __precise_loop(u64 cntrs) 124 { 125 unsigned long tmp, tmp2, tmp3; 126 u32 global_ctl = pmu.msr_global_ctl; 127 u32 eax = cntrs & (BIT_ULL(32) - 1); 128 u32 edx = cntrs >> 32; 129 130 if (this_cpu_has(X86_FEATURE_CLFLUSH)) 131 _loop_asm("wrmsr", "clflush (%1)"); 132 else 133 _loop_asm("wrmsr", "nop"); 134 } 135 136 static inline void loop(u64 cntrs) 137 { 138 if (!this_cpu_has_perf_global_ctrl()) 139 __loop(); 140 else 141 __precise_loop(cntrs); 142 } 143 144 static void adjust_events_range(struct pmu_event *gp_events, 145 int instruction_idx, int branch_idx) 146 { 147 /* 148 * If HW supports GLOBAL_CTRL MSR, enabling and disabling PMCs are 149 * moved in __precise_loop(). Thus, instructions and branches events 150 * can be verified against a precise count instead of a rough range. 151 * 152 * Skip the precise checks on AMD, as AMD CPUs count VMRUN as a branch 153 * instruction in guest context, which* leads to intermittent failures 154 * as the counts will vary depending on how many asynchronous VM-Exits 155 * occur while running the measured code, e.g. if the host takes IRQs. 156 */ 157 if (pmu.is_intel && this_cpu_has_perf_global_ctrl()) { 158 gp_events[instruction_idx].min = LOOP_INSNS; 159 gp_events[instruction_idx].max = LOOP_INSNS; 160 gp_events[branch_idx].min = LOOP_BRANCHES; 161 gp_events[branch_idx].max = LOOP_BRANCHES; 162 } 163 } 164 165 volatile uint64_t irq_received; 166 167 static void cnt_overflow(isr_regs_t *regs) 168 { 169 irq_received++; 170 apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); 171 apic_write(APIC_EOI, 0); 172 } 173 174 static bool check_irq(void) 175 { 176 int i; 177 irq_received = 0; 178 sti(); 179 for (i = 0; i < 100000 && !irq_received; i++) 180 asm volatile("pause"); 181 cli(); 182 return irq_received; 183 } 184 185 static bool is_gp(pmu_counter_t *evt) 186 { 187 if (!pmu.is_intel) 188 return true; 189 190 return evt->ctr < MSR_CORE_PERF_FIXED_CTR0 || 191 evt->ctr >= MSR_IA32_PMC0; 192 } 193 194 static int event_to_global_idx(pmu_counter_t *cnt) 195 { 196 if (pmu.is_intel) 197 return cnt->ctr - (is_gp(cnt) ? pmu.msr_gp_counter_base : 198 (MSR_CORE_PERF_FIXED_CTR0 - FIXED_CNT_INDEX)); 199 200 if (pmu.msr_gp_counter_base == MSR_F15H_PERF_CTR0) 201 return (cnt->ctr - pmu.msr_gp_counter_base) / 2; 202 else 203 return cnt->ctr - pmu.msr_gp_counter_base; 204 } 205 206 static struct pmu_event* get_counter_event(pmu_counter_t *cnt) 207 { 208 if (is_gp(cnt)) { 209 int i; 210 211 for (i = 0; i < gp_events_size; i++) 212 if (gp_events[i].unit_sel == (cnt->config & 0xffff)) 213 return &gp_events[i]; 214 } else { 215 unsigned int idx = cnt->ctr - MSR_CORE_PERF_FIXED_CTR0; 216 217 if (idx < ARRAY_SIZE(fixed_events)) 218 return &fixed_events[idx]; 219 } 220 221 return (void*)0; 222 } 223 224 static void global_enable(pmu_counter_t *cnt) 225 { 226 if (!this_cpu_has_perf_global_ctrl()) 227 return; 228 229 cnt->idx = event_to_global_idx(cnt); 230 wrmsr(pmu.msr_global_ctl, rdmsr(pmu.msr_global_ctl) | BIT_ULL(cnt->idx)); 231 } 232 233 static void global_disable(pmu_counter_t *cnt) 234 { 235 if (!this_cpu_has_perf_global_ctrl()) 236 return; 237 238 wrmsr(pmu.msr_global_ctl, rdmsr(pmu.msr_global_ctl) & ~BIT_ULL(cnt->idx)); 239 } 240 241 static void __start_event(pmu_counter_t *evt, uint64_t count) 242 { 243 evt->count = count; 244 wrmsr(evt->ctr, evt->count); 245 if (is_gp(evt)) { 246 wrmsr(MSR_GP_EVENT_SELECTx(event_to_global_idx(evt)), 247 evt->config | EVNTSEL_EN); 248 } else { 249 uint32_t ctrl = rdmsr(MSR_CORE_PERF_FIXED_CTR_CTRL); 250 int shift = (evt->ctr - MSR_CORE_PERF_FIXED_CTR0) * 4; 251 uint32_t usrospmi = 0; 252 253 if (evt->config & EVNTSEL_OS) 254 usrospmi |= (1 << 0); 255 if (evt->config & EVNTSEL_USR) 256 usrospmi |= (1 << 1); 257 if (evt->config & EVNTSEL_INT) 258 usrospmi |= (1 << 3); // PMI on overflow 259 ctrl = (ctrl & ~(0xf << shift)) | (usrospmi << shift); 260 wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, ctrl); 261 } 262 apic_write(APIC_LVTPC, PMI_VECTOR); 263 } 264 265 static void start_event(pmu_counter_t *evt) 266 { 267 __start_event(evt, 0); 268 global_enable(evt); 269 } 270 271 static void __stop_event(pmu_counter_t *evt) 272 { 273 if (is_gp(evt)) { 274 wrmsr(MSR_GP_EVENT_SELECTx(event_to_global_idx(evt)), 275 evt->config & ~EVNTSEL_EN); 276 } else { 277 uint32_t ctrl = rdmsr(MSR_CORE_PERF_FIXED_CTR_CTRL); 278 int shift = (evt->ctr - MSR_CORE_PERF_FIXED_CTR0) * 4; 279 wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, ctrl & ~(0xf << shift)); 280 } 281 evt->count = rdmsr(evt->ctr); 282 } 283 284 static void stop_event(pmu_counter_t *evt) 285 { 286 global_disable(evt); 287 __stop_event(evt); 288 } 289 290 static noinline void measure_many(pmu_counter_t *evt, int count) 291 { 292 int i; 293 u64 cntrs = 0; 294 295 for (i = 0; i < count; i++) { 296 __start_event(&evt[i], 0); 297 cntrs |= BIT_ULL(event_to_global_idx(&evt[i])); 298 } 299 loop(cntrs); 300 for (i = 0; i < count; i++) 301 __stop_event(&evt[i]); 302 } 303 304 static void measure_one(pmu_counter_t *evt) 305 { 306 measure_many(evt, 1); 307 } 308 309 static noinline void __measure(pmu_counter_t *evt, uint64_t count) 310 { 311 u64 cntrs = BIT_ULL(event_to_global_idx(evt)); 312 313 __start_event(evt, count); 314 loop(cntrs); 315 __stop_event(evt); 316 } 317 318 static bool verify_event(uint64_t count, struct pmu_event *e) 319 { 320 bool pass; 321 322 if (!e) 323 return false; 324 325 pass = count >= e->min && count <= e->max; 326 if (!pass) 327 printf("FAIL: %d <= %"PRId64" <= %d\n", e->min, count, e->max); 328 329 return pass; 330 } 331 332 static bool verify_counter(pmu_counter_t *cnt) 333 { 334 return verify_event(cnt->count, get_counter_event(cnt)); 335 } 336 337 static void check_gp_counter(struct pmu_event *evt) 338 { 339 pmu_counter_t cnt = { 340 .config = EVNTSEL_OS | EVNTSEL_USR | evt->unit_sel, 341 }; 342 int i; 343 344 for (i = 0; i < pmu.nr_gp_counters; i++) { 345 cnt.ctr = MSR_GP_COUNTERx(i); 346 measure_one(&cnt); 347 report(verify_event(cnt.count, evt), "%s-%d", evt->name, i); 348 } 349 } 350 351 static void check_gp_counters(void) 352 { 353 int i; 354 355 for (i = 0; i < gp_events_size; i++) 356 if (pmu_gp_counter_is_available(i)) 357 check_gp_counter(&gp_events[i]); 358 else 359 printf("GP event '%s' is disabled\n", 360 gp_events[i].name); 361 } 362 363 static void check_fixed_counters(void) 364 { 365 pmu_counter_t cnt = { 366 .config = EVNTSEL_OS | EVNTSEL_USR, 367 }; 368 int i; 369 370 for (i = 0; i < fixed_counters_num; i++) { 371 cnt.ctr = fixed_events[i].unit_sel; 372 measure_one(&cnt); 373 report(verify_event(cnt.count, &fixed_events[i]), "fixed-%d", i); 374 } 375 } 376 377 static void check_counters_many(void) 378 { 379 pmu_counter_t cnt[48]; 380 int i, n; 381 382 for (i = 0, n = 0; n < pmu.nr_gp_counters; i++) { 383 if (!pmu_gp_counter_is_available(i)) 384 continue; 385 386 cnt[n].ctr = MSR_GP_COUNTERx(n); 387 cnt[n].config = EVNTSEL_OS | EVNTSEL_USR | 388 gp_events[i % gp_events_size].unit_sel; 389 n++; 390 } 391 for (i = 0; i < fixed_counters_num; i++) { 392 cnt[n].ctr = fixed_events[i].unit_sel; 393 cnt[n].config = EVNTSEL_OS | EVNTSEL_USR; 394 n++; 395 } 396 397 assert(n <= ARRAY_SIZE(cnt)); 398 measure_many(cnt, n); 399 400 for (i = 0; i < n; i++) 401 if (!verify_counter(&cnt[i])) 402 break; 403 404 report(i == n, "all counters"); 405 } 406 407 static uint64_t measure_for_overflow(pmu_counter_t *cnt) 408 { 409 __measure(cnt, 0); 410 /* 411 * To generate overflow, i.e. roll over to '0', the initial count just 412 * needs to be preset to the negative expected count. However, as per 413 * Intel's SDM, the preset count needs to be incremented by 1 to ensure 414 * the overflow interrupt is generated immediately instead of possibly 415 * waiting for the overflow to propagate through the counter. 416 */ 417 assert(cnt->count > 1); 418 return 1 - cnt->count; 419 } 420 421 static void check_counter_overflow(void) 422 { 423 int i; 424 uint64_t overflow_preset; 425 int instruction_idx = pmu.is_intel ? 426 INTEL_INSTRUCTIONS_IDX : 427 AMD_INSTRUCTIONS_IDX; 428 429 pmu_counter_t cnt = { 430 .ctr = MSR_GP_COUNTERx(0), 431 .config = EVNTSEL_OS | EVNTSEL_USR | 432 gp_events[instruction_idx].unit_sel /* instructions */, 433 }; 434 overflow_preset = measure_for_overflow(&cnt); 435 436 /* clear status before test */ 437 if (this_cpu_has_perf_global_status()) 438 pmu_clear_global_status(); 439 440 report_prefix_push("overflow"); 441 442 for (i = 0; i < pmu.nr_gp_counters + 1; i++) { 443 uint64_t status; 444 int idx; 445 446 cnt.count = overflow_preset; 447 if (pmu_use_full_writes()) 448 cnt.count &= (1ull << pmu.gp_counter_width) - 1; 449 450 if (i == pmu.nr_gp_counters) { 451 if (!pmu.is_intel) 452 break; 453 454 cnt.ctr = fixed_events[0].unit_sel; 455 cnt.count = measure_for_overflow(&cnt); 456 cnt.count &= (1ull << pmu.gp_counter_width) - 1; 457 } else { 458 cnt.ctr = MSR_GP_COUNTERx(i); 459 } 460 461 if (i % 2) 462 cnt.config |= EVNTSEL_INT; 463 else 464 cnt.config &= ~EVNTSEL_INT; 465 idx = event_to_global_idx(&cnt); 466 __measure(&cnt, cnt.count); 467 if (pmu.is_intel) 468 report(cnt.count == 1, "cntr-%d", i); 469 else 470 report(cnt.count == 0xffffffffffff || cnt.count < 7, "cntr-%d", i); 471 472 if (!this_cpu_has_perf_global_status()) 473 continue; 474 475 status = rdmsr(pmu.msr_global_status); 476 report(status & (1ull << idx), "status-%d", i); 477 wrmsr(pmu.msr_global_status_clr, status); 478 status = rdmsr(pmu.msr_global_status); 479 report(!(status & (1ull << idx)), "status clear-%d", i); 480 report(check_irq() == (i % 2), "irq-%d", i); 481 } 482 483 report_prefix_pop(); 484 } 485 486 static void check_gp_counter_cmask(void) 487 { 488 int instruction_idx = pmu.is_intel ? 489 INTEL_INSTRUCTIONS_IDX : 490 AMD_INSTRUCTIONS_IDX; 491 492 pmu_counter_t cnt = { 493 .ctr = MSR_GP_COUNTERx(0), 494 .config = EVNTSEL_OS | EVNTSEL_USR | 495 gp_events[instruction_idx].unit_sel /* instructions */, 496 }; 497 cnt.config |= (0x2 << EVNTSEL_CMASK_SHIFT); 498 measure_one(&cnt); 499 report(cnt.count < gp_events[instruction_idx].min, "cmask"); 500 } 501 502 static void do_rdpmc_fast(void *ptr) 503 { 504 pmu_counter_t *cnt = ptr; 505 uint32_t idx = (uint32_t)cnt->idx | (1u << 31); 506 507 if (!is_gp(cnt)) 508 idx |= 1 << 30; 509 510 cnt->count = rdpmc(idx); 511 } 512 513 514 static void check_rdpmc(void) 515 { 516 uint64_t val = 0xff0123456789ull; 517 bool exc; 518 int i; 519 520 report_prefix_push("rdpmc"); 521 522 for (i = 0; i < pmu.nr_gp_counters; i++) { 523 uint64_t x; 524 pmu_counter_t cnt = { 525 .ctr = MSR_GP_COUNTERx(i), 526 .idx = i 527 }; 528 529 /* 530 * Without full-width writes, only the low 32 bits are writable, 531 * and the value is sign-extended. 532 */ 533 if (pmu.msr_gp_counter_base == MSR_IA32_PERFCTR0) 534 x = (uint64_t)(int64_t)(int32_t)val; 535 else 536 x = (uint64_t)(int64_t)val; 537 538 /* Mask according to the number of supported bits */ 539 x &= (1ull << pmu.gp_counter_width) - 1; 540 541 wrmsr(MSR_GP_COUNTERx(i), val); 542 report(rdpmc(i) == x, "cntr-%d", i); 543 544 exc = test_for_exception(GP_VECTOR, do_rdpmc_fast, &cnt); 545 if (exc) 546 report_skip("fast-%d", i); 547 else 548 report(cnt.count == (u32)val, "fast-%d", i); 549 } 550 for (i = 0; i < fixed_counters_num; i++) { 551 uint64_t x = val & ((1ull << pmu.fixed_counter_width) - 1); 552 pmu_counter_t cnt = { 553 .ctr = MSR_CORE_PERF_FIXED_CTR0 + i, 554 .idx = i 555 }; 556 557 wrmsr(MSR_PERF_FIXED_CTRx(i), x); 558 report(rdpmc(i | (1 << 30)) == x, "fixed cntr-%d", i); 559 560 exc = test_for_exception(GP_VECTOR, do_rdpmc_fast, &cnt); 561 if (exc) 562 report_skip("fixed fast-%d", i); 563 else 564 report(cnt.count == (u32)x, "fixed fast-%d", i); 565 } 566 567 report_prefix_pop(); 568 } 569 570 static void check_running_counter_wrmsr(void) 571 { 572 uint64_t status; 573 uint64_t count; 574 unsigned int instruction_idx = pmu.is_intel ? 575 INTEL_INSTRUCTIONS_IDX : 576 AMD_INSTRUCTIONS_IDX; 577 578 pmu_counter_t evt = { 579 .ctr = MSR_GP_COUNTERx(0), 580 .config = EVNTSEL_OS | EVNTSEL_USR | 581 gp_events[instruction_idx].unit_sel, 582 }; 583 584 report_prefix_push("running counter wrmsr"); 585 586 start_event(&evt); 587 __loop(); 588 wrmsr(MSR_GP_COUNTERx(0), 0); 589 stop_event(&evt); 590 report(evt.count < gp_events[instruction_idx].min, "cntr"); 591 592 /* clear status before overflow test */ 593 if (this_cpu_has_perf_global_status()) 594 pmu_clear_global_status(); 595 596 start_event(&evt); 597 598 count = -1; 599 if (pmu_use_full_writes()) 600 count &= (1ull << pmu.gp_counter_width) - 1; 601 602 wrmsr(MSR_GP_COUNTERx(0), count); 603 604 __loop(); 605 stop_event(&evt); 606 607 if (this_cpu_has_perf_global_status()) { 608 status = rdmsr(pmu.msr_global_status); 609 report(status & 1, "status msr bit"); 610 } 611 612 report_prefix_pop(); 613 } 614 615 static void check_emulated_instr(void) 616 { 617 uint64_t status, instr_start, brnch_start; 618 uint64_t gp_counter_width = (1ull << pmu.gp_counter_width) - 1; 619 unsigned int branch_idx = pmu.is_intel ? 620 INTEL_BRANCHES_IDX : AMD_BRANCHES_IDX; 621 unsigned int instruction_idx = pmu.is_intel ? 622 INTEL_INSTRUCTIONS_IDX : 623 AMD_INSTRUCTIONS_IDX; 624 pmu_counter_t brnch_cnt = { 625 .ctr = MSR_GP_COUNTERx(0), 626 /* branch instructions */ 627 .config = EVNTSEL_OS | EVNTSEL_USR | gp_events[branch_idx].unit_sel, 628 }; 629 pmu_counter_t instr_cnt = { 630 .ctr = MSR_GP_COUNTERx(1), 631 /* instructions */ 632 .config = EVNTSEL_OS | EVNTSEL_USR | gp_events[instruction_idx].unit_sel, 633 }; 634 report_prefix_push("emulated instruction"); 635 636 if (this_cpu_has_perf_global_status()) 637 pmu_clear_global_status(); 638 639 start_event(&brnch_cnt); 640 start_event(&instr_cnt); 641 642 brnch_start = -EXPECTED_BRNCH; 643 instr_start = -EXPECTED_INSTR; 644 wrmsr(MSR_GP_COUNTERx(0), brnch_start & gp_counter_width); 645 wrmsr(MSR_GP_COUNTERx(1), instr_start & gp_counter_width); 646 // KVM_FEP is a magic prefix that forces emulation so 647 // 'KVM_FEP "jne label\n"' just counts as a single instruction. 648 asm volatile( 649 "mov $0x0, %%eax\n" 650 "cmp $0x0, %%eax\n" 651 KVM_FEP "jne label\n" 652 KVM_FEP "jne label\n" 653 KVM_FEP "jne label\n" 654 KVM_FEP "jne label\n" 655 KVM_FEP "jne label\n" 656 "mov $0xa, %%eax\n" 657 "cpuid\n" 658 "mov $0xa, %%eax\n" 659 "cpuid\n" 660 "mov $0xa, %%eax\n" 661 "cpuid\n" 662 "mov $0xa, %%eax\n" 663 "cpuid\n" 664 "mov $0xa, %%eax\n" 665 "cpuid\n" 666 "label:\n" 667 : 668 : 669 : "eax", "ebx", "ecx", "edx"); 670 671 if (this_cpu_has_perf_global_ctrl()) 672 wrmsr(pmu.msr_global_ctl, 0); 673 674 stop_event(&brnch_cnt); 675 stop_event(&instr_cnt); 676 677 // Check that the end count - start count is at least the expected 678 // number of instructions and branches. 679 report(instr_cnt.count - instr_start >= EXPECTED_INSTR, 680 "instruction count"); 681 report(brnch_cnt.count - brnch_start >= EXPECTED_BRNCH, 682 "branch count"); 683 if (this_cpu_has_perf_global_status()) { 684 // Additionally check that those counters overflowed properly. 685 status = rdmsr(pmu.msr_global_status); 686 report(status & 1, "branch counter overflow"); 687 report(status & 2, "instruction counter overflow"); 688 } 689 690 report_prefix_pop(); 691 } 692 693 #define XBEGIN_STARTED (~0u) 694 static void check_tsx_cycles(void) 695 { 696 pmu_counter_t cnt; 697 unsigned int i, ret = 0; 698 699 if (!this_cpu_has(X86_FEATURE_RTM)) 700 return; 701 702 report_prefix_push("TSX cycles"); 703 704 for (i = 0; i < pmu.nr_gp_counters; i++) { 705 cnt.ctr = MSR_GP_COUNTERx(i); 706 707 if (i == 2) { 708 /* Transactional cycles committed only on gp counter 2 */ 709 cnt.config = EVNTSEL_OS | EVNTSEL_USR | 0x30000003c; 710 } else { 711 /* Transactional cycles */ 712 cnt.config = EVNTSEL_OS | EVNTSEL_USR | 0x10000003c; 713 } 714 715 start_event(&cnt); 716 717 asm volatile("xbegin 1f\n\t" 718 "1:\n\t" 719 : "+a" (ret) :: "memory"); 720 721 /* Generate a non-canonical #GP to trigger ABORT. */ 722 if (ret == XBEGIN_STARTED) 723 *(int *)NONCANONICAL = 0; 724 725 stop_event(&cnt); 726 727 report(cnt.count > 0, "gp cntr-%d with a value of %" PRId64 "", i, cnt.count); 728 } 729 730 report_prefix_pop(); 731 } 732 733 static void warm_up(void) 734 { 735 int i; 736 737 /* 738 * Since cycles event is always run as the first event, there would be 739 * a warm-up state to warm up the cache, it leads to the measured cycles 740 * value may exceed the pre-defined cycles upper boundary and cause 741 * false positive. To avoid this, introduce an warm-up state before 742 * the real verification. 743 */ 744 for (i = 0; i < 10; i++) 745 loop(0); 746 } 747 748 static void check_counters(void) 749 { 750 if (is_fep_available()) 751 check_emulated_instr(); 752 753 warm_up(); 754 check_gp_counters(); 755 check_fixed_counters(); 756 check_rdpmc(); 757 check_counters_many(); 758 check_counter_overflow(); 759 check_gp_counter_cmask(); 760 check_running_counter_wrmsr(); 761 check_tsx_cycles(); 762 } 763 764 static void do_unsupported_width_counter_write(void *index) 765 { 766 wrmsr(MSR_IA32_PMC0 + *((int *) index), 0xffffff0123456789ull); 767 } 768 769 static void check_gp_counters_write_width(void) 770 { 771 u64 val_64 = 0xffffff0123456789ull; 772 u64 val_32 = val_64 & ((1ull << 32) - 1); 773 u64 val_max_width = val_64 & ((1ull << pmu.gp_counter_width) - 1); 774 int i; 775 776 /* 777 * MSR_IA32_PERFCTRn supports 64-bit writes, 778 * but only the lowest 32 bits are valid. 779 */ 780 for (i = 0; i < pmu.nr_gp_counters; i++) { 781 wrmsr(MSR_IA32_PERFCTR0 + i, val_32); 782 assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32); 783 assert(rdmsr(MSR_IA32_PMC0 + i) == val_32); 784 785 wrmsr(MSR_IA32_PERFCTR0 + i, val_max_width); 786 assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32); 787 assert(rdmsr(MSR_IA32_PMC0 + i) == val_32); 788 789 wrmsr(MSR_IA32_PERFCTR0 + i, val_64); 790 assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32); 791 assert(rdmsr(MSR_IA32_PMC0 + i) == val_32); 792 } 793 794 /* 795 * MSR_IA32_PMCn supports writing values up to GP counter width, 796 * and only the lowest bits of GP counter width are valid. 797 */ 798 for (i = 0; i < pmu.nr_gp_counters; i++) { 799 wrmsr(MSR_IA32_PMC0 + i, val_32); 800 assert(rdmsr(MSR_IA32_PMC0 + i) == val_32); 801 assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32); 802 803 wrmsr(MSR_IA32_PMC0 + i, val_max_width); 804 assert(rdmsr(MSR_IA32_PMC0 + i) == val_max_width); 805 assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_max_width); 806 807 report(test_for_exception(GP_VECTOR, 808 do_unsupported_width_counter_write, &i), 809 "writing unsupported width to MSR_IA32_PMC%d raises #GP", i); 810 } 811 } 812 813 /* 814 * Per the SDM, reference cycles are currently implemented using the 815 * core crystal clock, TSC, or bus clock. Calibrate to the TSC 816 * frequency to set reasonable expectations. 817 */ 818 static void set_ref_cycle_expectations(void) 819 { 820 pmu_counter_t cnt = { 821 .ctr = MSR_IA32_PERFCTR0, 822 .config = EVNTSEL_OS | EVNTSEL_USR | 823 intel_gp_events[INTEL_REF_CYCLES_IDX].unit_sel, 824 }; 825 uint64_t tsc_delta; 826 uint64_t t0, t1, t2, t3; 827 828 /* Bit 2 enumerates the availability of reference cycles events. */ 829 if (!pmu.nr_gp_counters || !pmu_gp_counter_is_available(2)) 830 return; 831 832 if (this_cpu_has_perf_global_ctrl()) 833 wrmsr(pmu.msr_global_ctl, 0); 834 835 t0 = fenced_rdtsc(); 836 start_event(&cnt); 837 t1 = fenced_rdtsc(); 838 839 /* 840 * This loop has to run long enough to dominate the VM-exit 841 * costs for playing with the PMU MSRs on start and stop. 842 * 843 * On a 2.6GHz Ice Lake, with the TSC frequency at 104 times 844 * the core crystal clock, this function calculated a guest 845 * TSC : ref cycles ratio of around 105 with ECX initialized 846 * to one billion. 847 */ 848 asm volatile("loop ." : "+c"((int){1000000000ull})); 849 850 t2 = fenced_rdtsc(); 851 stop_event(&cnt); 852 t3 = fenced_rdtsc(); 853 854 tsc_delta = ((t2 - t1) + (t3 - t0)) / 2; 855 856 if (!tsc_delta) 857 return; 858 859 intel_gp_events[INTEL_REF_CYCLES_IDX].min = 860 (intel_gp_events[INTEL_REF_CYCLES_IDX].min * cnt.count) / tsc_delta; 861 intel_gp_events[INTEL_REF_CYCLES_IDX].max = 862 (intel_gp_events[INTEL_REF_CYCLES_IDX].max * cnt.count) / tsc_delta; 863 } 864 865 static void check_invalid_rdpmc_gp(void) 866 { 867 uint64_t val; 868 869 report(rdpmc_safe(64, &val) == GP_VECTOR, 870 "Expected #GP on RDPMC(64)"); 871 } 872 873 int main(int ac, char **av) 874 { 875 int instruction_idx; 876 int branch_idx; 877 878 setup_vm(); 879 handle_irq(PMI_VECTOR, cnt_overflow); 880 buf = malloc(N*64); 881 882 check_invalid_rdpmc_gp(); 883 884 if (pmu.is_intel) { 885 if (!pmu.version) { 886 report_skip("No Intel Arch PMU is detected!"); 887 return report_summary(); 888 } 889 gp_events = (struct pmu_event *)intel_gp_events; 890 gp_events_size = sizeof(intel_gp_events)/sizeof(intel_gp_events[0]); 891 instruction_idx = INTEL_INSTRUCTIONS_IDX; 892 branch_idx = INTEL_BRANCHES_IDX; 893 894 /* 895 * For legacy Intel CPUS without clflush/clflushopt support, 896 * there is no way to force to trigger a LLC miss, thus set 897 * the minimum value to 0 to avoid false positives. 898 */ 899 if (!this_cpu_has(X86_FEATURE_CLFLUSH)) 900 gp_events[INTEL_LLC_MISSES_IDX].min = 0; 901 902 report_prefix_push("Intel"); 903 set_ref_cycle_expectations(); 904 } else { 905 gp_events_size = sizeof(amd_gp_events)/sizeof(amd_gp_events[0]); 906 gp_events = (struct pmu_event *)amd_gp_events; 907 instruction_idx = AMD_INSTRUCTIONS_IDX; 908 branch_idx = AMD_BRANCHES_IDX; 909 report_prefix_push("AMD"); 910 } 911 adjust_events_range(gp_events, instruction_idx, branch_idx); 912 913 printf("PMU version: %d\n", pmu.version); 914 printf("GP counters: %d\n", pmu.nr_gp_counters); 915 printf("GP counter width: %d\n", pmu.gp_counter_width); 916 printf("Mask length: %d\n", pmu.gp_counter_mask_length); 917 printf("Fixed counters: %d\n", pmu.nr_fixed_counters); 918 printf("Fixed counter width: %d\n", pmu.fixed_counter_width); 919 920 fixed_counters_num = MIN(pmu.nr_fixed_counters, ARRAY_SIZE(fixed_events)); 921 if (pmu.nr_fixed_counters > ARRAY_SIZE(fixed_events)) 922 report_info("Fixed counters number %d > defined fixed events %u. " 923 "Please update test case.", pmu.nr_fixed_counters, 924 (uint32_t)ARRAY_SIZE(fixed_events)); 925 926 apic_write(APIC_LVTPC, PMI_VECTOR); 927 928 check_counters(); 929 930 if (pmu_has_full_writes()) { 931 pmu.msr_gp_counter_base = MSR_IA32_PMC0; 932 933 report_prefix_push("full-width writes"); 934 check_counters(); 935 check_gp_counters_write_width(); 936 report_prefix_pop(); 937 } 938 939 if (!pmu.is_intel) { 940 report_prefix_push("K7"); 941 pmu.nr_gp_counters = AMD64_NUM_COUNTERS; 942 pmu.msr_gp_counter_base = MSR_K7_PERFCTR0; 943 pmu.msr_gp_event_select_base = MSR_K7_EVNTSEL0; 944 check_counters(); 945 report_prefix_pop(); 946 } 947 948 return report_summary(); 949 } 950