1 2 #include "x86/msr.h" 3 #include "x86/processor.h" 4 #include "x86/pmu.h" 5 #include "x86/apic-defs.h" 6 #include "x86/apic.h" 7 #include "x86/desc.h" 8 #include "x86/isr.h" 9 #include "vmalloc.h" 10 #include "alloc.h" 11 12 #include "libcflat.h" 13 #include <stdint.h> 14 15 #define N 1000000 16 17 // These values match the number of instructions and branches in the 18 // assembly block in check_emulated_instr(). 19 #define EXPECTED_INSTR 17 20 #define EXPECTED_BRNCH 5 21 22 /* Enable GLOBAL_CTRL + disable GLOBAL_CTRL + clflush/mfence instructions */ 23 #define EXTRA_INSNS (3 + 3 +2) 24 #define LOOP_INSNS (N * 10 + EXTRA_INSNS) 25 #define LOOP_BRANCHES (N) 26 #define LOOP_ASM(_wrmsr, _clflush) \ 27 _wrmsr "\n\t" \ 28 "mov %%ecx, %%edi; mov %%ebx, %%ecx;\n\t" \ 29 _clflush "\n\t" \ 30 "mfence;\n\t" \ 31 "1: mov (%1), %2; add $64, %1;\n\t" \ 32 "nop; nop; nop; nop; nop; nop; nop;\n\t" \ 33 "loop 1b;\n\t" \ 34 "mov %%edi, %%ecx; xor %%eax, %%eax; xor %%edx, %%edx;\n\t" \ 35 _wrmsr "\n\t" 36 37 #define _loop_asm(_wrmsr, _clflush) \ 38 do { \ 39 asm volatile(LOOP_ASM(_wrmsr, _clflush) \ 40 : "=b"(tmp), "=r"(tmp2), "=r"(tmp3) \ 41 : "a"(eax), "d"(edx), "c"(global_ctl), \ 42 "0"(N), "1"(buf) \ 43 : "edi"); \ 44 } while (0) 45 46 typedef struct { 47 uint32_t ctr; 48 uint32_t idx; 49 uint64_t config; 50 uint64_t count; 51 } pmu_counter_t; 52 53 struct pmu_event { 54 const char *name; 55 uint32_t unit_sel; 56 int min; 57 int max; 58 } intel_gp_events[] = { 59 {"core cycles", 0x003c, 1*N, 50*N}, 60 {"instructions", 0x00c0, 10*N, 10.2*N}, 61 {"ref cycles", 0x013c, 1*N, 30*N}, 62 {"llc references", 0x4f2e, 1, 2*N}, 63 {"llc misses", 0x412e, 1, 1*N}, 64 {"branches", 0x00c4, 1*N, 1.1*N}, 65 {"branch misses", 0x00c5, 0, 0.1*N}, 66 }, amd_gp_events[] = { 67 {"core cycles", 0x0076, 1*N, 50*N}, 68 {"instructions", 0x00c0, 10*N, 10.2*N}, 69 {"branches", 0x00c2, 1*N, 1.1*N}, 70 {"branch misses", 0x00c3, 0, 0.1*N}, 71 }, fixed_events[] = { 72 {"fixed 0", MSR_CORE_PERF_FIXED_CTR0, 10*N, 10.2*N}, 73 {"fixed 1", MSR_CORE_PERF_FIXED_CTR0 + 1, 1*N, 30*N}, 74 {"fixed 2", MSR_CORE_PERF_FIXED_CTR0 + 2, 0.1*N, 30*N} 75 }; 76 77 /* 78 * Events index in intel_gp_events[], ensure consistent with 79 * intel_gp_events[]. 80 */ 81 enum { 82 INTEL_INSTRUCTIONS_IDX = 1, 83 INTEL_REF_CYCLES_IDX = 2, 84 INTEL_BRANCHES_IDX = 5, 85 }; 86 87 /* 88 * Events index in amd_gp_events[], ensure consistent with 89 * amd_gp_events[]. 90 */ 91 enum { 92 AMD_INSTRUCTIONS_IDX = 1, 93 AMD_BRANCHES_IDX = 2, 94 }; 95 96 char *buf; 97 98 static struct pmu_event *gp_events; 99 static unsigned int gp_events_size; 100 static unsigned int fixed_counters_num; 101 102 static inline void __loop(void) 103 { 104 unsigned long tmp, tmp2, tmp3; 105 u32 global_ctl = 0; 106 u32 eax = 0; 107 u32 edx = 0; 108 109 if (this_cpu_has(X86_FEATURE_CLFLUSH)) 110 _loop_asm("nop", "clflush (%1)"); 111 else 112 _loop_asm("nop", "nop"); 113 } 114 115 /* 116 * Enable and disable counters in a whole asm blob to ensure 117 * no other instructions are counted in the window between 118 * counters enabling and really LOOP_ASM code executing. 119 * Thus counters can verify instructions and branches events 120 * against precise counts instead of a rough valid count range. 121 */ 122 static inline void __precise_loop(u64 cntrs) 123 { 124 unsigned long tmp, tmp2, tmp3; 125 u32 global_ctl = pmu.msr_global_ctl; 126 u32 eax = cntrs & (BIT_ULL(32) - 1); 127 u32 edx = cntrs >> 32; 128 129 if (this_cpu_has(X86_FEATURE_CLFLUSH)) 130 _loop_asm("wrmsr", "clflush (%1)"); 131 else 132 _loop_asm("wrmsr", "nop"); 133 } 134 135 static inline void loop(u64 cntrs) 136 { 137 if (!this_cpu_has_perf_global_ctrl()) 138 __loop(); 139 else 140 __precise_loop(cntrs); 141 } 142 143 static void adjust_events_range(struct pmu_event *gp_events, 144 int instruction_idx, int branch_idx) 145 { 146 /* 147 * If HW supports GLOBAL_CTRL MSR, enabling and disabling PMCs are 148 * moved in __precise_loop(). Thus, instructions and branches events 149 * can be verified against a precise count instead of a rough range. 150 * 151 * Skip the precise checks on AMD, as AMD CPUs count VMRUN as a branch 152 * instruction in guest context, which* leads to intermittent failures 153 * as the counts will vary depending on how many asynchronous VM-Exits 154 * occur while running the measured code, e.g. if the host takes IRQs. 155 */ 156 if (pmu.is_intel && this_cpu_has_perf_global_ctrl()) { 157 gp_events[instruction_idx].min = LOOP_INSNS; 158 gp_events[instruction_idx].max = LOOP_INSNS; 159 gp_events[branch_idx].min = LOOP_BRANCHES; 160 gp_events[branch_idx].max = LOOP_BRANCHES; 161 } 162 } 163 164 volatile uint64_t irq_received; 165 166 static void cnt_overflow(isr_regs_t *regs) 167 { 168 irq_received++; 169 apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); 170 apic_write(APIC_EOI, 0); 171 } 172 173 static bool check_irq(void) 174 { 175 int i; 176 irq_received = 0; 177 sti(); 178 for (i = 0; i < 100000 && !irq_received; i++) 179 asm volatile("pause"); 180 cli(); 181 return irq_received; 182 } 183 184 static bool is_gp(pmu_counter_t *evt) 185 { 186 if (!pmu.is_intel) 187 return true; 188 189 return evt->ctr < MSR_CORE_PERF_FIXED_CTR0 || 190 evt->ctr >= MSR_IA32_PMC0; 191 } 192 193 static int event_to_global_idx(pmu_counter_t *cnt) 194 { 195 if (pmu.is_intel) 196 return cnt->ctr - (is_gp(cnt) ? pmu.msr_gp_counter_base : 197 (MSR_CORE_PERF_FIXED_CTR0 - FIXED_CNT_INDEX)); 198 199 if (pmu.msr_gp_counter_base == MSR_F15H_PERF_CTR0) 200 return (cnt->ctr - pmu.msr_gp_counter_base) / 2; 201 else 202 return cnt->ctr - pmu.msr_gp_counter_base; 203 } 204 205 static struct pmu_event* get_counter_event(pmu_counter_t *cnt) 206 { 207 if (is_gp(cnt)) { 208 int i; 209 210 for (i = 0; i < gp_events_size; i++) 211 if (gp_events[i].unit_sel == (cnt->config & 0xffff)) 212 return &gp_events[i]; 213 } else { 214 unsigned int idx = cnt->ctr - MSR_CORE_PERF_FIXED_CTR0; 215 216 if (idx < ARRAY_SIZE(fixed_events)) 217 return &fixed_events[idx]; 218 } 219 220 return (void*)0; 221 } 222 223 static void global_enable(pmu_counter_t *cnt) 224 { 225 if (!this_cpu_has_perf_global_ctrl()) 226 return; 227 228 cnt->idx = event_to_global_idx(cnt); 229 wrmsr(pmu.msr_global_ctl, rdmsr(pmu.msr_global_ctl) | BIT_ULL(cnt->idx)); 230 } 231 232 static void global_disable(pmu_counter_t *cnt) 233 { 234 if (!this_cpu_has_perf_global_ctrl()) 235 return; 236 237 wrmsr(pmu.msr_global_ctl, rdmsr(pmu.msr_global_ctl) & ~BIT_ULL(cnt->idx)); 238 } 239 240 static void __start_event(pmu_counter_t *evt, uint64_t count) 241 { 242 evt->count = count; 243 wrmsr(evt->ctr, evt->count); 244 if (is_gp(evt)) { 245 wrmsr(MSR_GP_EVENT_SELECTx(event_to_global_idx(evt)), 246 evt->config | EVNTSEL_EN); 247 } else { 248 uint32_t ctrl = rdmsr(MSR_CORE_PERF_FIXED_CTR_CTRL); 249 int shift = (evt->ctr - MSR_CORE_PERF_FIXED_CTR0) * 4; 250 uint32_t usrospmi = 0; 251 252 if (evt->config & EVNTSEL_OS) 253 usrospmi |= (1 << 0); 254 if (evt->config & EVNTSEL_USR) 255 usrospmi |= (1 << 1); 256 if (evt->config & EVNTSEL_INT) 257 usrospmi |= (1 << 3); // PMI on overflow 258 ctrl = (ctrl & ~(0xf << shift)) | (usrospmi << shift); 259 wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, ctrl); 260 } 261 apic_write(APIC_LVTPC, PMI_VECTOR); 262 } 263 264 static void start_event(pmu_counter_t *evt) 265 { 266 __start_event(evt, 0); 267 global_enable(evt); 268 } 269 270 static void __stop_event(pmu_counter_t *evt) 271 { 272 if (is_gp(evt)) { 273 wrmsr(MSR_GP_EVENT_SELECTx(event_to_global_idx(evt)), 274 evt->config & ~EVNTSEL_EN); 275 } else { 276 uint32_t ctrl = rdmsr(MSR_CORE_PERF_FIXED_CTR_CTRL); 277 int shift = (evt->ctr - MSR_CORE_PERF_FIXED_CTR0) * 4; 278 wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, ctrl & ~(0xf << shift)); 279 } 280 evt->count = rdmsr(evt->ctr); 281 } 282 283 static void stop_event(pmu_counter_t *evt) 284 { 285 global_disable(evt); 286 __stop_event(evt); 287 } 288 289 static noinline void measure_many(pmu_counter_t *evt, int count) 290 { 291 int i; 292 u64 cntrs = 0; 293 294 for (i = 0; i < count; i++) { 295 __start_event(&evt[i], 0); 296 cntrs |= BIT_ULL(event_to_global_idx(&evt[i])); 297 } 298 loop(cntrs); 299 for (i = 0; i < count; i++) 300 __stop_event(&evt[i]); 301 } 302 303 static void measure_one(pmu_counter_t *evt) 304 { 305 measure_many(evt, 1); 306 } 307 308 static noinline void __measure(pmu_counter_t *evt, uint64_t count) 309 { 310 u64 cntrs = BIT_ULL(event_to_global_idx(evt)); 311 312 __start_event(evt, count); 313 loop(cntrs); 314 __stop_event(evt); 315 } 316 317 static bool verify_event(uint64_t count, struct pmu_event *e) 318 { 319 bool pass; 320 321 if (!e) 322 return false; 323 324 pass = count >= e->min && count <= e->max; 325 if (!pass) 326 printf("FAIL: %d <= %"PRId64" <= %d\n", e->min, count, e->max); 327 328 return pass; 329 } 330 331 static bool verify_counter(pmu_counter_t *cnt) 332 { 333 return verify_event(cnt->count, get_counter_event(cnt)); 334 } 335 336 static void check_gp_counter(struct pmu_event *evt) 337 { 338 pmu_counter_t cnt = { 339 .config = EVNTSEL_OS | EVNTSEL_USR | evt->unit_sel, 340 }; 341 int i; 342 343 for (i = 0; i < pmu.nr_gp_counters; i++) { 344 cnt.ctr = MSR_GP_COUNTERx(i); 345 measure_one(&cnt); 346 report(verify_event(cnt.count, evt), "%s-%d", evt->name, i); 347 } 348 } 349 350 static void check_gp_counters(void) 351 { 352 int i; 353 354 for (i = 0; i < gp_events_size; i++) 355 if (pmu_gp_counter_is_available(i)) 356 check_gp_counter(&gp_events[i]); 357 else 358 printf("GP event '%s' is disabled\n", 359 gp_events[i].name); 360 } 361 362 static void check_fixed_counters(void) 363 { 364 pmu_counter_t cnt = { 365 .config = EVNTSEL_OS | EVNTSEL_USR, 366 }; 367 int i; 368 369 for (i = 0; i < fixed_counters_num; i++) { 370 cnt.ctr = fixed_events[i].unit_sel; 371 measure_one(&cnt); 372 report(verify_event(cnt.count, &fixed_events[i]), "fixed-%d", i); 373 } 374 } 375 376 static void check_counters_many(void) 377 { 378 pmu_counter_t cnt[48]; 379 int i, n; 380 381 for (i = 0, n = 0; n < pmu.nr_gp_counters; i++) { 382 if (!pmu_gp_counter_is_available(i)) 383 continue; 384 385 cnt[n].ctr = MSR_GP_COUNTERx(n); 386 cnt[n].config = EVNTSEL_OS | EVNTSEL_USR | 387 gp_events[i % gp_events_size].unit_sel; 388 n++; 389 } 390 for (i = 0; i < fixed_counters_num; i++) { 391 cnt[n].ctr = fixed_events[i].unit_sel; 392 cnt[n].config = EVNTSEL_OS | EVNTSEL_USR; 393 n++; 394 } 395 396 assert(n <= ARRAY_SIZE(cnt)); 397 measure_many(cnt, n); 398 399 for (i = 0; i < n; i++) 400 if (!verify_counter(&cnt[i])) 401 break; 402 403 report(i == n, "all counters"); 404 } 405 406 static uint64_t measure_for_overflow(pmu_counter_t *cnt) 407 { 408 __measure(cnt, 0); 409 /* 410 * To generate overflow, i.e. roll over to '0', the initial count just 411 * needs to be preset to the negative expected count. However, as per 412 * Intel's SDM, the preset count needs to be incremented by 1 to ensure 413 * the overflow interrupt is generated immediately instead of possibly 414 * waiting for the overflow to propagate through the counter. 415 */ 416 assert(cnt->count > 1); 417 return 1 - cnt->count; 418 } 419 420 static void check_counter_overflow(void) 421 { 422 int i; 423 uint64_t overflow_preset; 424 int instruction_idx = pmu.is_intel ? 425 INTEL_INSTRUCTIONS_IDX : 426 AMD_INSTRUCTIONS_IDX; 427 428 pmu_counter_t cnt = { 429 .ctr = MSR_GP_COUNTERx(0), 430 .config = EVNTSEL_OS | EVNTSEL_USR | 431 gp_events[instruction_idx].unit_sel /* instructions */, 432 }; 433 overflow_preset = measure_for_overflow(&cnt); 434 435 /* clear status before test */ 436 if (this_cpu_has_perf_global_status()) 437 pmu_clear_global_status(); 438 439 report_prefix_push("overflow"); 440 441 for (i = 0; i < pmu.nr_gp_counters + 1; i++) { 442 uint64_t status; 443 int idx; 444 445 cnt.count = overflow_preset; 446 if (pmu_use_full_writes()) 447 cnt.count &= (1ull << pmu.gp_counter_width) - 1; 448 449 if (i == pmu.nr_gp_counters) { 450 if (!pmu.is_intel) 451 break; 452 453 cnt.ctr = fixed_events[0].unit_sel; 454 cnt.count = measure_for_overflow(&cnt); 455 cnt.count &= (1ull << pmu.gp_counter_width) - 1; 456 } else { 457 cnt.ctr = MSR_GP_COUNTERx(i); 458 } 459 460 if (i % 2) 461 cnt.config |= EVNTSEL_INT; 462 else 463 cnt.config &= ~EVNTSEL_INT; 464 idx = event_to_global_idx(&cnt); 465 __measure(&cnt, cnt.count); 466 if (pmu.is_intel) 467 report(cnt.count == 1, "cntr-%d", i); 468 else 469 report(cnt.count == 0xffffffffffff || cnt.count < 7, "cntr-%d", i); 470 471 if (!this_cpu_has_perf_global_status()) 472 continue; 473 474 status = rdmsr(pmu.msr_global_status); 475 report(status & (1ull << idx), "status-%d", i); 476 wrmsr(pmu.msr_global_status_clr, status); 477 status = rdmsr(pmu.msr_global_status); 478 report(!(status & (1ull << idx)), "status clear-%d", i); 479 report(check_irq() == (i % 2), "irq-%d", i); 480 } 481 482 report_prefix_pop(); 483 } 484 485 static void check_gp_counter_cmask(void) 486 { 487 int instruction_idx = pmu.is_intel ? 488 INTEL_INSTRUCTIONS_IDX : 489 AMD_INSTRUCTIONS_IDX; 490 491 pmu_counter_t cnt = { 492 .ctr = MSR_GP_COUNTERx(0), 493 .config = EVNTSEL_OS | EVNTSEL_USR | 494 gp_events[instruction_idx].unit_sel /* instructions */, 495 }; 496 cnt.config |= (0x2 << EVNTSEL_CMASK_SHIFT); 497 measure_one(&cnt); 498 report(cnt.count < gp_events[instruction_idx].min, "cmask"); 499 } 500 501 static void do_rdpmc_fast(void *ptr) 502 { 503 pmu_counter_t *cnt = ptr; 504 uint32_t idx = (uint32_t)cnt->idx | (1u << 31); 505 506 if (!is_gp(cnt)) 507 idx |= 1 << 30; 508 509 cnt->count = rdpmc(idx); 510 } 511 512 513 static void check_rdpmc(void) 514 { 515 uint64_t val = 0xff0123456789ull; 516 bool exc; 517 int i; 518 519 report_prefix_push("rdpmc"); 520 521 for (i = 0; i < pmu.nr_gp_counters; i++) { 522 uint64_t x; 523 pmu_counter_t cnt = { 524 .ctr = MSR_GP_COUNTERx(i), 525 .idx = i 526 }; 527 528 /* 529 * Without full-width writes, only the low 32 bits are writable, 530 * and the value is sign-extended. 531 */ 532 if (pmu.msr_gp_counter_base == MSR_IA32_PERFCTR0) 533 x = (uint64_t)(int64_t)(int32_t)val; 534 else 535 x = (uint64_t)(int64_t)val; 536 537 /* Mask according to the number of supported bits */ 538 x &= (1ull << pmu.gp_counter_width) - 1; 539 540 wrmsr(MSR_GP_COUNTERx(i), val); 541 report(rdpmc(i) == x, "cntr-%d", i); 542 543 exc = test_for_exception(GP_VECTOR, do_rdpmc_fast, &cnt); 544 if (exc) 545 report_skip("fast-%d", i); 546 else 547 report(cnt.count == (u32)val, "fast-%d", i); 548 } 549 for (i = 0; i < fixed_counters_num; i++) { 550 uint64_t x = val & ((1ull << pmu.fixed_counter_width) - 1); 551 pmu_counter_t cnt = { 552 .ctr = MSR_CORE_PERF_FIXED_CTR0 + i, 553 .idx = i 554 }; 555 556 wrmsr(MSR_PERF_FIXED_CTRx(i), x); 557 report(rdpmc(i | (1 << 30)) == x, "fixed cntr-%d", i); 558 559 exc = test_for_exception(GP_VECTOR, do_rdpmc_fast, &cnt); 560 if (exc) 561 report_skip("fixed fast-%d", i); 562 else 563 report(cnt.count == (u32)x, "fixed fast-%d", i); 564 } 565 566 report_prefix_pop(); 567 } 568 569 static void check_running_counter_wrmsr(void) 570 { 571 uint64_t status; 572 uint64_t count; 573 unsigned int instruction_idx = pmu.is_intel ? 574 INTEL_INSTRUCTIONS_IDX : 575 AMD_INSTRUCTIONS_IDX; 576 577 pmu_counter_t evt = { 578 .ctr = MSR_GP_COUNTERx(0), 579 .config = EVNTSEL_OS | EVNTSEL_USR | 580 gp_events[instruction_idx].unit_sel, 581 }; 582 583 report_prefix_push("running counter wrmsr"); 584 585 start_event(&evt); 586 __loop(); 587 wrmsr(MSR_GP_COUNTERx(0), 0); 588 stop_event(&evt); 589 report(evt.count < gp_events[instruction_idx].min, "cntr"); 590 591 /* clear status before overflow test */ 592 if (this_cpu_has_perf_global_status()) 593 pmu_clear_global_status(); 594 595 start_event(&evt); 596 597 count = -1; 598 if (pmu_use_full_writes()) 599 count &= (1ull << pmu.gp_counter_width) - 1; 600 601 wrmsr(MSR_GP_COUNTERx(0), count); 602 603 __loop(); 604 stop_event(&evt); 605 606 if (this_cpu_has_perf_global_status()) { 607 status = rdmsr(pmu.msr_global_status); 608 report(status & 1, "status msr bit"); 609 } 610 611 report_prefix_pop(); 612 } 613 614 static void check_emulated_instr(void) 615 { 616 uint64_t status, instr_start, brnch_start; 617 uint64_t gp_counter_width = (1ull << pmu.gp_counter_width) - 1; 618 unsigned int branch_idx = pmu.is_intel ? 619 INTEL_BRANCHES_IDX : AMD_BRANCHES_IDX; 620 unsigned int instruction_idx = pmu.is_intel ? 621 INTEL_INSTRUCTIONS_IDX : 622 AMD_INSTRUCTIONS_IDX; 623 pmu_counter_t brnch_cnt = { 624 .ctr = MSR_GP_COUNTERx(0), 625 /* branch instructions */ 626 .config = EVNTSEL_OS | EVNTSEL_USR | gp_events[branch_idx].unit_sel, 627 }; 628 pmu_counter_t instr_cnt = { 629 .ctr = MSR_GP_COUNTERx(1), 630 /* instructions */ 631 .config = EVNTSEL_OS | EVNTSEL_USR | gp_events[instruction_idx].unit_sel, 632 }; 633 report_prefix_push("emulated instruction"); 634 635 if (this_cpu_has_perf_global_status()) 636 pmu_clear_global_status(); 637 638 start_event(&brnch_cnt); 639 start_event(&instr_cnt); 640 641 brnch_start = -EXPECTED_BRNCH; 642 instr_start = -EXPECTED_INSTR; 643 wrmsr(MSR_GP_COUNTERx(0), brnch_start & gp_counter_width); 644 wrmsr(MSR_GP_COUNTERx(1), instr_start & gp_counter_width); 645 // KVM_FEP is a magic prefix that forces emulation so 646 // 'KVM_FEP "jne label\n"' just counts as a single instruction. 647 asm volatile( 648 "mov $0x0, %%eax\n" 649 "cmp $0x0, %%eax\n" 650 KVM_FEP "jne label\n" 651 KVM_FEP "jne label\n" 652 KVM_FEP "jne label\n" 653 KVM_FEP "jne label\n" 654 KVM_FEP "jne label\n" 655 "mov $0xa, %%eax\n" 656 "cpuid\n" 657 "mov $0xa, %%eax\n" 658 "cpuid\n" 659 "mov $0xa, %%eax\n" 660 "cpuid\n" 661 "mov $0xa, %%eax\n" 662 "cpuid\n" 663 "mov $0xa, %%eax\n" 664 "cpuid\n" 665 "label:\n" 666 : 667 : 668 : "eax", "ebx", "ecx", "edx"); 669 670 if (this_cpu_has_perf_global_ctrl()) 671 wrmsr(pmu.msr_global_ctl, 0); 672 673 stop_event(&brnch_cnt); 674 stop_event(&instr_cnt); 675 676 // Check that the end count - start count is at least the expected 677 // number of instructions and branches. 678 report(instr_cnt.count - instr_start >= EXPECTED_INSTR, 679 "instruction count"); 680 report(brnch_cnt.count - brnch_start >= EXPECTED_BRNCH, 681 "branch count"); 682 if (this_cpu_has_perf_global_status()) { 683 // Additionally check that those counters overflowed properly. 684 status = rdmsr(pmu.msr_global_status); 685 report(status & 1, "branch counter overflow"); 686 report(status & 2, "instruction counter overflow"); 687 } 688 689 report_prefix_pop(); 690 } 691 692 #define XBEGIN_STARTED (~0u) 693 static void check_tsx_cycles(void) 694 { 695 pmu_counter_t cnt; 696 unsigned int i, ret = 0; 697 698 if (!this_cpu_has(X86_FEATURE_RTM)) 699 return; 700 701 report_prefix_push("TSX cycles"); 702 703 for (i = 0; i < pmu.nr_gp_counters; i++) { 704 cnt.ctr = MSR_GP_COUNTERx(i); 705 706 if (i == 2) { 707 /* Transactional cycles committed only on gp counter 2 */ 708 cnt.config = EVNTSEL_OS | EVNTSEL_USR | 0x30000003c; 709 } else { 710 /* Transactional cycles */ 711 cnt.config = EVNTSEL_OS | EVNTSEL_USR | 0x10000003c; 712 } 713 714 start_event(&cnt); 715 716 asm volatile("xbegin 1f\n\t" 717 "1:\n\t" 718 : "+a" (ret) :: "memory"); 719 720 /* Generate a non-canonical #GP to trigger ABORT. */ 721 if (ret == XBEGIN_STARTED) 722 *(int *)NONCANONICAL = 0; 723 724 stop_event(&cnt); 725 726 report(cnt.count > 0, "gp cntr-%d with a value of %" PRId64 "", i, cnt.count); 727 } 728 729 report_prefix_pop(); 730 } 731 732 static void warm_up(void) 733 { 734 int i; 735 736 /* 737 * Since cycles event is always run as the first event, there would be 738 * a warm-up state to warm up the cache, it leads to the measured cycles 739 * value may exceed the pre-defined cycles upper boundary and cause 740 * false positive. To avoid this, introduce an warm-up state before 741 * the real verification. 742 */ 743 for (i = 0; i < 10; i++) 744 loop(0); 745 } 746 747 static void check_counters(void) 748 { 749 if (is_fep_available()) 750 check_emulated_instr(); 751 752 warm_up(); 753 check_gp_counters(); 754 check_fixed_counters(); 755 check_rdpmc(); 756 check_counters_many(); 757 check_counter_overflow(); 758 check_gp_counter_cmask(); 759 check_running_counter_wrmsr(); 760 check_tsx_cycles(); 761 } 762 763 static void do_unsupported_width_counter_write(void *index) 764 { 765 wrmsr(MSR_IA32_PMC0 + *((int *) index), 0xffffff0123456789ull); 766 } 767 768 static void check_gp_counters_write_width(void) 769 { 770 u64 val_64 = 0xffffff0123456789ull; 771 u64 val_32 = val_64 & ((1ull << 32) - 1); 772 u64 val_max_width = val_64 & ((1ull << pmu.gp_counter_width) - 1); 773 int i; 774 775 /* 776 * MSR_IA32_PERFCTRn supports 64-bit writes, 777 * but only the lowest 32 bits are valid. 778 */ 779 for (i = 0; i < pmu.nr_gp_counters; i++) { 780 wrmsr(MSR_IA32_PERFCTR0 + i, val_32); 781 assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32); 782 assert(rdmsr(MSR_IA32_PMC0 + i) == val_32); 783 784 wrmsr(MSR_IA32_PERFCTR0 + i, val_max_width); 785 assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32); 786 assert(rdmsr(MSR_IA32_PMC0 + i) == val_32); 787 788 wrmsr(MSR_IA32_PERFCTR0 + i, val_64); 789 assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32); 790 assert(rdmsr(MSR_IA32_PMC0 + i) == val_32); 791 } 792 793 /* 794 * MSR_IA32_PMCn supports writing values up to GP counter width, 795 * and only the lowest bits of GP counter width are valid. 796 */ 797 for (i = 0; i < pmu.nr_gp_counters; i++) { 798 wrmsr(MSR_IA32_PMC0 + i, val_32); 799 assert(rdmsr(MSR_IA32_PMC0 + i) == val_32); 800 assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32); 801 802 wrmsr(MSR_IA32_PMC0 + i, val_max_width); 803 assert(rdmsr(MSR_IA32_PMC0 + i) == val_max_width); 804 assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_max_width); 805 806 report(test_for_exception(GP_VECTOR, 807 do_unsupported_width_counter_write, &i), 808 "writing unsupported width to MSR_IA32_PMC%d raises #GP", i); 809 } 810 } 811 812 /* 813 * Per the SDM, reference cycles are currently implemented using the 814 * core crystal clock, TSC, or bus clock. Calibrate to the TSC 815 * frequency to set reasonable expectations. 816 */ 817 static void set_ref_cycle_expectations(void) 818 { 819 pmu_counter_t cnt = { 820 .ctr = MSR_IA32_PERFCTR0, 821 .config = EVNTSEL_OS | EVNTSEL_USR | 822 intel_gp_events[INTEL_REF_CYCLES_IDX].unit_sel, 823 }; 824 uint64_t tsc_delta; 825 uint64_t t0, t1, t2, t3; 826 827 /* Bit 2 enumerates the availability of reference cycles events. */ 828 if (!pmu.nr_gp_counters || !pmu_gp_counter_is_available(2)) 829 return; 830 831 if (this_cpu_has_perf_global_ctrl()) 832 wrmsr(pmu.msr_global_ctl, 0); 833 834 t0 = fenced_rdtsc(); 835 start_event(&cnt); 836 t1 = fenced_rdtsc(); 837 838 /* 839 * This loop has to run long enough to dominate the VM-exit 840 * costs for playing with the PMU MSRs on start and stop. 841 * 842 * On a 2.6GHz Ice Lake, with the TSC frequency at 104 times 843 * the core crystal clock, this function calculated a guest 844 * TSC : ref cycles ratio of around 105 with ECX initialized 845 * to one billion. 846 */ 847 asm volatile("loop ." : "+c"((int){1000000000ull})); 848 849 t2 = fenced_rdtsc(); 850 stop_event(&cnt); 851 t3 = fenced_rdtsc(); 852 853 tsc_delta = ((t2 - t1) + (t3 - t0)) / 2; 854 855 if (!tsc_delta) 856 return; 857 858 intel_gp_events[INTEL_REF_CYCLES_IDX].min = 859 (intel_gp_events[INTEL_REF_CYCLES_IDX].min * cnt.count) / tsc_delta; 860 intel_gp_events[INTEL_REF_CYCLES_IDX].max = 861 (intel_gp_events[INTEL_REF_CYCLES_IDX].max * cnt.count) / tsc_delta; 862 } 863 864 static void check_invalid_rdpmc_gp(void) 865 { 866 uint64_t val; 867 868 report(rdpmc_safe(64, &val) == GP_VECTOR, 869 "Expected #GP on RDPMC(64)"); 870 } 871 872 int main(int ac, char **av) 873 { 874 int instruction_idx; 875 int branch_idx; 876 877 setup_vm(); 878 handle_irq(PMI_VECTOR, cnt_overflow); 879 buf = malloc(N*64); 880 881 check_invalid_rdpmc_gp(); 882 883 if (pmu.is_intel) { 884 if (!pmu.version) { 885 report_skip("No Intel Arch PMU is detected!"); 886 return report_summary(); 887 } 888 gp_events = (struct pmu_event *)intel_gp_events; 889 gp_events_size = sizeof(intel_gp_events)/sizeof(intel_gp_events[0]); 890 instruction_idx = INTEL_INSTRUCTIONS_IDX; 891 branch_idx = INTEL_BRANCHES_IDX; 892 report_prefix_push("Intel"); 893 set_ref_cycle_expectations(); 894 } else { 895 gp_events_size = sizeof(amd_gp_events)/sizeof(amd_gp_events[0]); 896 gp_events = (struct pmu_event *)amd_gp_events; 897 instruction_idx = AMD_INSTRUCTIONS_IDX; 898 branch_idx = AMD_BRANCHES_IDX; 899 report_prefix_push("AMD"); 900 } 901 adjust_events_range(gp_events, instruction_idx, branch_idx); 902 903 printf("PMU version: %d\n", pmu.version); 904 printf("GP counters: %d\n", pmu.nr_gp_counters); 905 printf("GP counter width: %d\n", pmu.gp_counter_width); 906 printf("Mask length: %d\n", pmu.gp_counter_mask_length); 907 printf("Fixed counters: %d\n", pmu.nr_fixed_counters); 908 printf("Fixed counter width: %d\n", pmu.fixed_counter_width); 909 910 fixed_counters_num = MIN(pmu.nr_fixed_counters, ARRAY_SIZE(fixed_events)); 911 if (pmu.nr_fixed_counters > ARRAY_SIZE(fixed_events)) 912 report_info("Fixed counters number %d > defined fixed events %u. " 913 "Please update test case.", pmu.nr_fixed_counters, 914 (uint32_t)ARRAY_SIZE(fixed_events)); 915 916 apic_write(APIC_LVTPC, PMI_VECTOR); 917 918 check_counters(); 919 920 if (pmu_has_full_writes()) { 921 pmu.msr_gp_counter_base = MSR_IA32_PMC0; 922 923 report_prefix_push("full-width writes"); 924 check_counters(); 925 check_gp_counters_write_width(); 926 report_prefix_pop(); 927 } 928 929 if (!pmu.is_intel) { 930 report_prefix_push("K7"); 931 pmu.nr_gp_counters = AMD64_NUM_COUNTERS; 932 pmu.msr_gp_counter_base = MSR_K7_PERFCTR0; 933 pmu.msr_gp_event_select_base = MSR_K7_EVNTSEL0; 934 check_counters(); 935 report_prefix_pop(); 936 } 937 938 return report_summary(); 939 } 940