1 2 #include "x86/msr.h" 3 #include "x86/processor.h" 4 #include "x86/pmu.h" 5 #include "x86/apic-defs.h" 6 #include "x86/apic.h" 7 #include "x86/desc.h" 8 #include "x86/isr.h" 9 #include "vmalloc.h" 10 #include "alloc.h" 11 12 #include "libcflat.h" 13 #include <stdint.h> 14 15 #define N 1000000 16 17 // These values match the number of instructions and branches in the 18 // assembly block in check_emulated_instr(). 19 #define EXPECTED_INSTR 17 20 #define EXPECTED_BRNCH 5 21 22 #define IBPB_JMP_INSNS 9 23 #define IBPB_JMP_BRANCHES 2 24 25 #if defined(__i386__) || defined(_M_IX86) /* i386 */ 26 #define IBPB_JMP_ASM(_wrmsr) \ 27 "mov $1, %%eax; xor %%edx, %%edx;\n\t" \ 28 "mov $73, %%ecx;\n\t" \ 29 _wrmsr "\n\t" \ 30 "call 1f\n\t" \ 31 "1: pop %%eax\n\t" \ 32 "add $(2f-1b), %%eax\n\t" \ 33 "jmp *%%eax;\n\t" \ 34 "nop;\n\t" \ 35 "2: nop;\n\t" 36 #else /* x86_64 */ 37 #define IBPB_JMP_ASM(_wrmsr) \ 38 "mov $1, %%eax; xor %%edx, %%edx;\n\t" \ 39 "mov $73, %%ecx;\n\t" \ 40 _wrmsr "\n\t" \ 41 "call 1f\n\t" \ 42 "1: pop %%rax\n\t" \ 43 "add $(2f-1b), %%rax\n\t" \ 44 "jmp *%%rax;\n\t" \ 45 "nop;\n\t" \ 46 "2: nop;\n\t" 47 #endif 48 49 /* GLOBAL_CTRL enable + disable + clflush/mfence + IBPB_JMP */ 50 #define EXTRA_INSNS (3 + 3 + 2 + IBPB_JMP_INSNS) 51 #define LOOP_INSNS (N * 10 + EXTRA_INSNS) 52 #define LOOP_BRANCHES (N + IBPB_JMP_BRANCHES) 53 #define LOOP_ASM(_wrmsr1, _clflush, _wrmsr2) \ 54 _wrmsr1 "\n\t" \ 55 "mov %%ecx, %%edi; mov %%ebx, %%ecx;\n\t" \ 56 _clflush "\n\t" \ 57 "mfence;\n\t" \ 58 "1: mov (%1), %2; add $64, %1;\n\t" \ 59 "nop; nop; nop; nop; nop; nop; nop;\n\t" \ 60 "loop 1b;\n\t" \ 61 IBPB_JMP_ASM(_wrmsr2) \ 62 "mov %%edi, %%ecx; xor %%eax, %%eax; xor %%edx, %%edx;\n\t" \ 63 _wrmsr1 "\n\t" 64 65 #define _loop_asm(_wrmsr1, _clflush, _wrmsr2) \ 66 do { \ 67 asm volatile(LOOP_ASM(_wrmsr1, _clflush, _wrmsr2) \ 68 : "=b"(tmp), "=r"(tmp2), "=r"(tmp3) \ 69 : "a"(eax), "d"(edx), "c"(global_ctl), \ 70 "0"(N), "1"(buf) \ 71 : "edi"); \ 72 } while (0) 73 74 typedef struct { 75 uint32_t ctr; 76 uint32_t idx; 77 uint64_t config; 78 uint64_t count; 79 } pmu_counter_t; 80 81 struct pmu_event { 82 const char *name; 83 uint32_t unit_sel; 84 int min; 85 int max; 86 } intel_gp_events[] = { 87 {"core cycles", 0x003c, 1*N, 50*N}, 88 {"instructions", 0x00c0, 10*N, 10.2*N}, 89 {"ref cycles", 0x013c, 1*N, 30*N}, 90 {"llc references", 0x4f2e, 1, 2*N}, 91 {"llc misses", 0x412e, 1, 1*N}, 92 {"branches", 0x00c4, 1*N, 1.1*N}, 93 {"branch misses", 0x00c5, 1, 0.1*N}, 94 }, amd_gp_events[] = { 95 {"core cycles", 0x0076, 1*N, 50*N}, 96 {"instructions", 0x00c0, 10*N, 10.2*N}, 97 {"branches", 0x00c2, 1*N, 1.1*N}, 98 {"branch misses", 0x00c3, 1, 0.1*N}, 99 }, fixed_events[] = { 100 {"fixed 0", MSR_CORE_PERF_FIXED_CTR0, 10*N, 10.2*N}, 101 {"fixed 1", MSR_CORE_PERF_FIXED_CTR0 + 1, 1*N, 30*N}, 102 {"fixed 2", MSR_CORE_PERF_FIXED_CTR0 + 2, 0.1*N, 30*N} 103 }; 104 105 /* 106 * Events index in intel_gp_events[], ensure consistent with 107 * intel_gp_events[]. 108 */ 109 enum { 110 INTEL_INSTRUCTIONS_IDX = 1, 111 INTEL_REF_CYCLES_IDX = 2, 112 INTEL_LLC_MISSES_IDX = 4, 113 INTEL_BRANCHES_IDX = 5, 114 INTEL_BRANCH_MISS_IDX = 6, 115 }; 116 117 /* 118 * Events index in amd_gp_events[], ensure consistent with 119 * amd_gp_events[]. 120 */ 121 enum { 122 AMD_INSTRUCTIONS_IDX = 1, 123 AMD_BRANCHES_IDX = 2, 124 AMD_BRANCH_MISS_IDX = 3, 125 }; 126 127 char *buf; 128 129 static struct pmu_event *gp_events; 130 static unsigned int gp_events_size; 131 static unsigned int fixed_counters_num; 132 133 static int has_ibpb(void) 134 { 135 return this_cpu_has(X86_FEATURE_SPEC_CTRL) || 136 this_cpu_has(X86_FEATURE_AMD_IBPB); 137 } 138 139 static inline void __loop(void) 140 { 141 unsigned long tmp, tmp2, tmp3; 142 u32 global_ctl = 0; 143 u32 eax = 0; 144 u32 edx = 0; 145 146 if (this_cpu_has(X86_FEATURE_CLFLUSH) && has_ibpb()) 147 _loop_asm("nop", "clflush (%1)", "wrmsr"); 148 else if (this_cpu_has(X86_FEATURE_CLFLUSH)) 149 _loop_asm("nop", "clflush (%1)", "nop"); 150 else if (has_ibpb()) 151 _loop_asm("nop", "nop", "wrmsr"); 152 else 153 _loop_asm("nop", "nop", "nop"); 154 } 155 156 /* 157 * Enable and disable counters in a whole asm blob to ensure 158 * no other instructions are counted in the window between 159 * counters enabling and really LOOP_ASM code executing. 160 * Thus counters can verify instructions and branches events 161 * against precise counts instead of a rough valid count range. 162 */ 163 static inline void __precise_loop(u64 cntrs) 164 { 165 unsigned long tmp, tmp2, tmp3; 166 u32 global_ctl = pmu.msr_global_ctl; 167 u32 eax = cntrs & (BIT_ULL(32) - 1); 168 u32 edx = cntrs >> 32; 169 170 if (this_cpu_has(X86_FEATURE_CLFLUSH) && has_ibpb()) 171 _loop_asm("wrmsr", "clflush (%1)", "wrmsr"); 172 else if (this_cpu_has(X86_FEATURE_CLFLUSH)) 173 _loop_asm("wrmsr", "clflush (%1)", "nop"); 174 else if (has_ibpb()) 175 _loop_asm("wrmsr", "nop", "wrmsr"); 176 else 177 _loop_asm("wrmsr", "nop", "nop"); 178 } 179 180 static inline void loop(u64 cntrs) 181 { 182 if (!this_cpu_has_perf_global_ctrl()) 183 __loop(); 184 else 185 __precise_loop(cntrs); 186 } 187 188 static void adjust_events_range(struct pmu_event *gp_events, 189 int instruction_idx, int branch_idx, 190 int branch_miss_idx) 191 { 192 /* 193 * If HW supports GLOBAL_CTRL MSR, enabling and disabling PMCs are 194 * moved in __precise_loop(). Thus, instructions and branches events 195 * can be verified against a precise count instead of a rough range. 196 * 197 * Skip the precise checks on AMD, as AMD CPUs count VMRUN as a branch 198 * instruction in guest context, which* leads to intermittent failures 199 * as the counts will vary depending on how many asynchronous VM-Exits 200 * occur while running the measured code, e.g. if the host takes IRQs. 201 */ 202 if (pmu.is_intel && this_cpu_has_perf_global_ctrl()) { 203 gp_events[instruction_idx].min = LOOP_INSNS; 204 gp_events[instruction_idx].max = LOOP_INSNS; 205 gp_events[branch_idx].min = LOOP_BRANCHES; 206 gp_events[branch_idx].max = LOOP_BRANCHES; 207 } 208 209 /* 210 * For CPUs without IBPB support, no way to force to trigger a branch 211 * miss and the measured branch misses is possible to be 0. Thus 212 * overwrite the lower boundary of branch misses event to 0 to avoid 213 * false positive. 214 */ 215 if (!has_ibpb()) 216 gp_events[branch_miss_idx].min = 0; 217 } 218 219 volatile uint64_t irq_received; 220 221 static void cnt_overflow(isr_regs_t *regs) 222 { 223 irq_received++; 224 apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); 225 apic_write(APIC_EOI, 0); 226 } 227 228 static bool check_irq(void) 229 { 230 int i; 231 irq_received = 0; 232 sti(); 233 for (i = 0; i < 100000 && !irq_received; i++) 234 asm volatile("pause"); 235 cli(); 236 return irq_received; 237 } 238 239 static bool is_gp(pmu_counter_t *evt) 240 { 241 if (!pmu.is_intel) 242 return true; 243 244 return evt->ctr < MSR_CORE_PERF_FIXED_CTR0 || 245 evt->ctr >= MSR_IA32_PMC0; 246 } 247 248 static int event_to_global_idx(pmu_counter_t *cnt) 249 { 250 if (pmu.is_intel) 251 return cnt->ctr - (is_gp(cnt) ? pmu.msr_gp_counter_base : 252 (MSR_CORE_PERF_FIXED_CTR0 - FIXED_CNT_INDEX)); 253 254 if (pmu.msr_gp_counter_base == MSR_F15H_PERF_CTR0) 255 return (cnt->ctr - pmu.msr_gp_counter_base) / 2; 256 else 257 return cnt->ctr - pmu.msr_gp_counter_base; 258 } 259 260 static struct pmu_event* get_counter_event(pmu_counter_t *cnt) 261 { 262 if (is_gp(cnt)) { 263 int i; 264 265 for (i = 0; i < gp_events_size; i++) 266 if (gp_events[i].unit_sel == (cnt->config & 0xffff)) 267 return &gp_events[i]; 268 } else { 269 unsigned int idx = cnt->ctr - MSR_CORE_PERF_FIXED_CTR0; 270 271 if (idx < ARRAY_SIZE(fixed_events)) 272 return &fixed_events[idx]; 273 } 274 275 return (void*)0; 276 } 277 278 static void global_enable(pmu_counter_t *cnt) 279 { 280 if (!this_cpu_has_perf_global_ctrl()) 281 return; 282 283 cnt->idx = event_to_global_idx(cnt); 284 wrmsr(pmu.msr_global_ctl, rdmsr(pmu.msr_global_ctl) | BIT_ULL(cnt->idx)); 285 } 286 287 static void global_disable(pmu_counter_t *cnt) 288 { 289 if (!this_cpu_has_perf_global_ctrl()) 290 return; 291 292 wrmsr(pmu.msr_global_ctl, rdmsr(pmu.msr_global_ctl) & ~BIT_ULL(cnt->idx)); 293 } 294 295 static void __start_event(pmu_counter_t *evt, uint64_t count) 296 { 297 evt->count = count; 298 wrmsr(evt->ctr, evt->count); 299 if (is_gp(evt)) { 300 wrmsr(MSR_GP_EVENT_SELECTx(event_to_global_idx(evt)), 301 evt->config | EVNTSEL_EN); 302 } else { 303 uint32_t ctrl = rdmsr(MSR_CORE_PERF_FIXED_CTR_CTRL); 304 int shift = (evt->ctr - MSR_CORE_PERF_FIXED_CTR0) * 4; 305 uint32_t usrospmi = 0; 306 307 if (evt->config & EVNTSEL_OS) 308 usrospmi |= (1 << 0); 309 if (evt->config & EVNTSEL_USR) 310 usrospmi |= (1 << 1); 311 if (evt->config & EVNTSEL_INT) 312 usrospmi |= (1 << 3); // PMI on overflow 313 ctrl = (ctrl & ~(0xf << shift)) | (usrospmi << shift); 314 wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, ctrl); 315 } 316 apic_write(APIC_LVTPC, PMI_VECTOR); 317 } 318 319 static void start_event(pmu_counter_t *evt) 320 { 321 __start_event(evt, 0); 322 global_enable(evt); 323 } 324 325 static void __stop_event(pmu_counter_t *evt) 326 { 327 if (is_gp(evt)) { 328 wrmsr(MSR_GP_EVENT_SELECTx(event_to_global_idx(evt)), 329 evt->config & ~EVNTSEL_EN); 330 } else { 331 uint32_t ctrl = rdmsr(MSR_CORE_PERF_FIXED_CTR_CTRL); 332 int shift = (evt->ctr - MSR_CORE_PERF_FIXED_CTR0) * 4; 333 wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, ctrl & ~(0xf << shift)); 334 } 335 evt->count = rdmsr(evt->ctr); 336 } 337 338 static void stop_event(pmu_counter_t *evt) 339 { 340 global_disable(evt); 341 __stop_event(evt); 342 } 343 344 static noinline void measure_many(pmu_counter_t *evt, int count) 345 { 346 int i; 347 u64 cntrs = 0; 348 349 for (i = 0; i < count; i++) { 350 __start_event(&evt[i], 0); 351 cntrs |= BIT_ULL(event_to_global_idx(&evt[i])); 352 } 353 loop(cntrs); 354 for (i = 0; i < count; i++) 355 __stop_event(&evt[i]); 356 } 357 358 static void measure_one(pmu_counter_t *evt) 359 { 360 measure_many(evt, 1); 361 } 362 363 static noinline void __measure(pmu_counter_t *evt, uint64_t count) 364 { 365 u64 cntrs = BIT_ULL(event_to_global_idx(evt)); 366 367 __start_event(evt, count); 368 loop(cntrs); 369 __stop_event(evt); 370 } 371 372 static bool verify_event(uint64_t count, struct pmu_event *e) 373 { 374 bool pass; 375 376 if (!e) 377 return false; 378 379 pass = count >= e->min && count <= e->max; 380 if (!pass) 381 printf("FAIL: %d <= %"PRId64" <= %d\n", e->min, count, e->max); 382 383 return pass; 384 } 385 386 static bool verify_counter(pmu_counter_t *cnt) 387 { 388 return verify_event(cnt->count, get_counter_event(cnt)); 389 } 390 391 static void check_gp_counter(struct pmu_event *evt) 392 { 393 pmu_counter_t cnt = { 394 .config = EVNTSEL_OS | EVNTSEL_USR | evt->unit_sel, 395 }; 396 int i; 397 398 for (i = 0; i < pmu.nr_gp_counters; i++) { 399 cnt.ctr = MSR_GP_COUNTERx(i); 400 measure_one(&cnt); 401 report(verify_event(cnt.count, evt), "%s-%d", evt->name, i); 402 } 403 } 404 405 static void check_gp_counters(void) 406 { 407 int i; 408 409 for (i = 0; i < gp_events_size; i++) 410 if (pmu_gp_counter_is_available(i)) 411 check_gp_counter(&gp_events[i]); 412 else 413 printf("GP event '%s' is disabled\n", 414 gp_events[i].name); 415 } 416 417 static void check_fixed_counters(void) 418 { 419 pmu_counter_t cnt = { 420 .config = EVNTSEL_OS | EVNTSEL_USR, 421 }; 422 int i; 423 424 for (i = 0; i < fixed_counters_num; i++) { 425 cnt.ctr = fixed_events[i].unit_sel; 426 measure_one(&cnt); 427 report(verify_event(cnt.count, &fixed_events[i]), "fixed-%d", i); 428 } 429 } 430 431 static void check_counters_many(void) 432 { 433 pmu_counter_t cnt[48]; 434 int i, n; 435 436 for (i = 0, n = 0; n < pmu.nr_gp_counters; i++) { 437 if (!pmu_gp_counter_is_available(i)) 438 continue; 439 440 cnt[n].ctr = MSR_GP_COUNTERx(n); 441 cnt[n].config = EVNTSEL_OS | EVNTSEL_USR | 442 gp_events[i % gp_events_size].unit_sel; 443 n++; 444 } 445 for (i = 0; i < fixed_counters_num; i++) { 446 cnt[n].ctr = fixed_events[i].unit_sel; 447 cnt[n].config = EVNTSEL_OS | EVNTSEL_USR; 448 n++; 449 } 450 451 assert(n <= ARRAY_SIZE(cnt)); 452 measure_many(cnt, n); 453 454 for (i = 0; i < n; i++) 455 if (!verify_counter(&cnt[i])) 456 break; 457 458 report(i == n, "all counters"); 459 } 460 461 static uint64_t measure_for_overflow(pmu_counter_t *cnt) 462 { 463 __measure(cnt, 0); 464 /* 465 * To generate overflow, i.e. roll over to '0', the initial count just 466 * needs to be preset to the negative expected count. However, as per 467 * Intel's SDM, the preset count needs to be incremented by 1 to ensure 468 * the overflow interrupt is generated immediately instead of possibly 469 * waiting for the overflow to propagate through the counter. 470 */ 471 assert(cnt->count > 1); 472 return 1 - cnt->count; 473 } 474 475 static void check_counter_overflow(void) 476 { 477 int i; 478 uint64_t overflow_preset; 479 int instruction_idx = pmu.is_intel ? 480 INTEL_INSTRUCTIONS_IDX : 481 AMD_INSTRUCTIONS_IDX; 482 483 pmu_counter_t cnt = { 484 .ctr = MSR_GP_COUNTERx(0), 485 .config = EVNTSEL_OS | EVNTSEL_USR | 486 gp_events[instruction_idx].unit_sel /* instructions */, 487 }; 488 overflow_preset = measure_for_overflow(&cnt); 489 490 /* clear status before test */ 491 if (this_cpu_has_perf_global_status()) 492 pmu_clear_global_status(); 493 494 report_prefix_push("overflow"); 495 496 for (i = 0; i < pmu.nr_gp_counters + 1; i++) { 497 uint64_t status; 498 int idx; 499 500 cnt.count = overflow_preset; 501 if (pmu_use_full_writes()) 502 cnt.count &= (1ull << pmu.gp_counter_width) - 1; 503 504 if (i == pmu.nr_gp_counters) { 505 if (!pmu.is_intel) 506 break; 507 508 cnt.ctr = fixed_events[0].unit_sel; 509 cnt.count = measure_for_overflow(&cnt); 510 cnt.count &= (1ull << pmu.gp_counter_width) - 1; 511 } else { 512 cnt.ctr = MSR_GP_COUNTERx(i); 513 } 514 515 if (i % 2) 516 cnt.config |= EVNTSEL_INT; 517 else 518 cnt.config &= ~EVNTSEL_INT; 519 idx = event_to_global_idx(&cnt); 520 __measure(&cnt, cnt.count); 521 if (pmu.is_intel) 522 report(cnt.count == 1, "cntr-%d", i); 523 else 524 report(cnt.count == 0xffffffffffff || cnt.count < 7, "cntr-%d", i); 525 526 if (!this_cpu_has_perf_global_status()) 527 continue; 528 529 status = rdmsr(pmu.msr_global_status); 530 report(status & (1ull << idx), "status-%d", i); 531 wrmsr(pmu.msr_global_status_clr, status); 532 status = rdmsr(pmu.msr_global_status); 533 report(!(status & (1ull << idx)), "status clear-%d", i); 534 report(check_irq() == (i % 2), "irq-%d", i); 535 } 536 537 report_prefix_pop(); 538 } 539 540 static void check_gp_counter_cmask(void) 541 { 542 int instruction_idx = pmu.is_intel ? 543 INTEL_INSTRUCTIONS_IDX : 544 AMD_INSTRUCTIONS_IDX; 545 546 pmu_counter_t cnt = { 547 .ctr = MSR_GP_COUNTERx(0), 548 .config = EVNTSEL_OS | EVNTSEL_USR | 549 gp_events[instruction_idx].unit_sel /* instructions */, 550 }; 551 cnt.config |= (0x2 << EVNTSEL_CMASK_SHIFT); 552 measure_one(&cnt); 553 report(cnt.count < gp_events[instruction_idx].min, "cmask"); 554 } 555 556 static void do_rdpmc_fast(void *ptr) 557 { 558 pmu_counter_t *cnt = ptr; 559 uint32_t idx = (uint32_t)cnt->idx | (1u << 31); 560 561 if (!is_gp(cnt)) 562 idx |= 1 << 30; 563 564 cnt->count = rdpmc(idx); 565 } 566 567 568 static void check_rdpmc(void) 569 { 570 uint64_t val = 0xff0123456789ull; 571 bool exc; 572 int i; 573 574 report_prefix_push("rdpmc"); 575 576 for (i = 0; i < pmu.nr_gp_counters; i++) { 577 uint64_t x; 578 pmu_counter_t cnt = { 579 .ctr = MSR_GP_COUNTERx(i), 580 .idx = i 581 }; 582 583 /* 584 * Without full-width writes, only the low 32 bits are writable, 585 * and the value is sign-extended. 586 */ 587 if (pmu.msr_gp_counter_base == MSR_IA32_PERFCTR0) 588 x = (uint64_t)(int64_t)(int32_t)val; 589 else 590 x = (uint64_t)(int64_t)val; 591 592 /* Mask according to the number of supported bits */ 593 x &= (1ull << pmu.gp_counter_width) - 1; 594 595 wrmsr(MSR_GP_COUNTERx(i), val); 596 report(rdpmc(i) == x, "cntr-%d", i); 597 598 exc = test_for_exception(GP_VECTOR, do_rdpmc_fast, &cnt); 599 if (exc) 600 report_skip("fast-%d", i); 601 else 602 report(cnt.count == (u32)val, "fast-%d", i); 603 } 604 for (i = 0; i < fixed_counters_num; i++) { 605 uint64_t x = val & ((1ull << pmu.fixed_counter_width) - 1); 606 pmu_counter_t cnt = { 607 .ctr = MSR_CORE_PERF_FIXED_CTR0 + i, 608 .idx = i 609 }; 610 611 wrmsr(MSR_PERF_FIXED_CTRx(i), x); 612 report(rdpmc(i | (1 << 30)) == x, "fixed cntr-%d", i); 613 614 exc = test_for_exception(GP_VECTOR, do_rdpmc_fast, &cnt); 615 if (exc) 616 report_skip("fixed fast-%d", i); 617 else 618 report(cnt.count == (u32)x, "fixed fast-%d", i); 619 } 620 621 report_prefix_pop(); 622 } 623 624 static void check_running_counter_wrmsr(void) 625 { 626 uint64_t status; 627 uint64_t count; 628 unsigned int instruction_idx = pmu.is_intel ? 629 INTEL_INSTRUCTIONS_IDX : 630 AMD_INSTRUCTIONS_IDX; 631 632 pmu_counter_t evt = { 633 .ctr = MSR_GP_COUNTERx(0), 634 .config = EVNTSEL_OS | EVNTSEL_USR | 635 gp_events[instruction_idx].unit_sel, 636 }; 637 638 report_prefix_push("running counter wrmsr"); 639 640 start_event(&evt); 641 __loop(); 642 wrmsr(MSR_GP_COUNTERx(0), 0); 643 stop_event(&evt); 644 report(evt.count < gp_events[instruction_idx].min, "cntr"); 645 646 /* clear status before overflow test */ 647 if (this_cpu_has_perf_global_status()) 648 pmu_clear_global_status(); 649 650 start_event(&evt); 651 652 count = -1; 653 if (pmu_use_full_writes()) 654 count &= (1ull << pmu.gp_counter_width) - 1; 655 656 wrmsr(MSR_GP_COUNTERx(0), count); 657 658 __loop(); 659 stop_event(&evt); 660 661 if (this_cpu_has_perf_global_status()) { 662 status = rdmsr(pmu.msr_global_status); 663 report(status & 1, "status msr bit"); 664 } 665 666 report_prefix_pop(); 667 } 668 669 static void check_emulated_instr(void) 670 { 671 uint64_t status, instr_start, brnch_start; 672 uint64_t gp_counter_width = (1ull << pmu.gp_counter_width) - 1; 673 unsigned int branch_idx = pmu.is_intel ? 674 INTEL_BRANCHES_IDX : AMD_BRANCHES_IDX; 675 unsigned int instruction_idx = pmu.is_intel ? 676 INTEL_INSTRUCTIONS_IDX : 677 AMD_INSTRUCTIONS_IDX; 678 pmu_counter_t brnch_cnt = { 679 .ctr = MSR_GP_COUNTERx(0), 680 /* branch instructions */ 681 .config = EVNTSEL_OS | EVNTSEL_USR | gp_events[branch_idx].unit_sel, 682 }; 683 pmu_counter_t instr_cnt = { 684 .ctr = MSR_GP_COUNTERx(1), 685 /* instructions */ 686 .config = EVNTSEL_OS | EVNTSEL_USR | gp_events[instruction_idx].unit_sel, 687 }; 688 report_prefix_push("emulated instruction"); 689 690 if (this_cpu_has_perf_global_status()) 691 pmu_clear_global_status(); 692 693 start_event(&brnch_cnt); 694 start_event(&instr_cnt); 695 696 brnch_start = -EXPECTED_BRNCH; 697 instr_start = -EXPECTED_INSTR; 698 wrmsr(MSR_GP_COUNTERx(0), brnch_start & gp_counter_width); 699 wrmsr(MSR_GP_COUNTERx(1), instr_start & gp_counter_width); 700 // KVM_FEP is a magic prefix that forces emulation so 701 // 'KVM_FEP "jne label\n"' just counts as a single instruction. 702 asm volatile( 703 "mov $0x0, %%eax\n" 704 "cmp $0x0, %%eax\n" 705 KVM_FEP "jne label\n" 706 KVM_FEP "jne label\n" 707 KVM_FEP "jne label\n" 708 KVM_FEP "jne label\n" 709 KVM_FEP "jne label\n" 710 "mov $0xa, %%eax\n" 711 "cpuid\n" 712 "mov $0xa, %%eax\n" 713 "cpuid\n" 714 "mov $0xa, %%eax\n" 715 "cpuid\n" 716 "mov $0xa, %%eax\n" 717 "cpuid\n" 718 "mov $0xa, %%eax\n" 719 "cpuid\n" 720 "label:\n" 721 : 722 : 723 : "eax", "ebx", "ecx", "edx"); 724 725 if (this_cpu_has_perf_global_ctrl()) 726 wrmsr(pmu.msr_global_ctl, 0); 727 728 stop_event(&brnch_cnt); 729 stop_event(&instr_cnt); 730 731 // Check that the end count - start count is at least the expected 732 // number of instructions and branches. 733 report(instr_cnt.count - instr_start >= EXPECTED_INSTR, 734 "instruction count"); 735 report(brnch_cnt.count - brnch_start >= EXPECTED_BRNCH, 736 "branch count"); 737 if (this_cpu_has_perf_global_status()) { 738 // Additionally check that those counters overflowed properly. 739 status = rdmsr(pmu.msr_global_status); 740 report(status & 1, "branch counter overflow"); 741 report(status & 2, "instruction counter overflow"); 742 } 743 744 report_prefix_pop(); 745 } 746 747 #define XBEGIN_STARTED (~0u) 748 static void check_tsx_cycles(void) 749 { 750 pmu_counter_t cnt; 751 unsigned int i, ret = 0; 752 753 if (!this_cpu_has(X86_FEATURE_RTM)) 754 return; 755 756 report_prefix_push("TSX cycles"); 757 758 for (i = 0; i < pmu.nr_gp_counters; i++) { 759 cnt.ctr = MSR_GP_COUNTERx(i); 760 761 if (i == 2) { 762 /* Transactional cycles committed only on gp counter 2 */ 763 cnt.config = EVNTSEL_OS | EVNTSEL_USR | 0x30000003c; 764 } else { 765 /* Transactional cycles */ 766 cnt.config = EVNTSEL_OS | EVNTSEL_USR | 0x10000003c; 767 } 768 769 start_event(&cnt); 770 771 asm volatile("xbegin 1f\n\t" 772 "1:\n\t" 773 : "+a" (ret) :: "memory"); 774 775 /* Generate a non-canonical #GP to trigger ABORT. */ 776 if (ret == XBEGIN_STARTED) 777 *(int *)NONCANONICAL = 0; 778 779 stop_event(&cnt); 780 781 report(cnt.count > 0, "gp cntr-%d with a value of %" PRId64 "", i, cnt.count); 782 } 783 784 report_prefix_pop(); 785 } 786 787 static void warm_up(void) 788 { 789 int i; 790 791 /* 792 * Since cycles event is always run as the first event, there would be 793 * a warm-up state to warm up the cache, it leads to the measured cycles 794 * value may exceed the pre-defined cycles upper boundary and cause 795 * false positive. To avoid this, introduce an warm-up state before 796 * the real verification. 797 */ 798 for (i = 0; i < 10; i++) 799 loop(0); 800 } 801 802 static void check_counters(void) 803 { 804 if (is_fep_available()) 805 check_emulated_instr(); 806 807 warm_up(); 808 check_gp_counters(); 809 check_fixed_counters(); 810 check_rdpmc(); 811 check_counters_many(); 812 check_counter_overflow(); 813 check_gp_counter_cmask(); 814 check_running_counter_wrmsr(); 815 check_tsx_cycles(); 816 } 817 818 static void do_unsupported_width_counter_write(void *index) 819 { 820 wrmsr(MSR_IA32_PMC0 + *((int *) index), 0xffffff0123456789ull); 821 } 822 823 static void check_gp_counters_write_width(void) 824 { 825 u64 val_64 = 0xffffff0123456789ull; 826 u64 val_32 = val_64 & ((1ull << 32) - 1); 827 u64 val_max_width = val_64 & ((1ull << pmu.gp_counter_width) - 1); 828 int i; 829 830 /* 831 * MSR_IA32_PERFCTRn supports 64-bit writes, 832 * but only the lowest 32 bits are valid. 833 */ 834 for (i = 0; i < pmu.nr_gp_counters; i++) { 835 wrmsr(MSR_IA32_PERFCTR0 + i, val_32); 836 assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32); 837 assert(rdmsr(MSR_IA32_PMC0 + i) == val_32); 838 839 wrmsr(MSR_IA32_PERFCTR0 + i, val_max_width); 840 assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32); 841 assert(rdmsr(MSR_IA32_PMC0 + i) == val_32); 842 843 wrmsr(MSR_IA32_PERFCTR0 + i, val_64); 844 assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32); 845 assert(rdmsr(MSR_IA32_PMC0 + i) == val_32); 846 } 847 848 /* 849 * MSR_IA32_PMCn supports writing values up to GP counter width, 850 * and only the lowest bits of GP counter width are valid. 851 */ 852 for (i = 0; i < pmu.nr_gp_counters; i++) { 853 wrmsr(MSR_IA32_PMC0 + i, val_32); 854 assert(rdmsr(MSR_IA32_PMC0 + i) == val_32); 855 assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32); 856 857 wrmsr(MSR_IA32_PMC0 + i, val_max_width); 858 assert(rdmsr(MSR_IA32_PMC0 + i) == val_max_width); 859 assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_max_width); 860 861 report(test_for_exception(GP_VECTOR, 862 do_unsupported_width_counter_write, &i), 863 "writing unsupported width to MSR_IA32_PMC%d raises #GP", i); 864 } 865 } 866 867 /* 868 * Per the SDM, reference cycles are currently implemented using the 869 * core crystal clock, TSC, or bus clock. Calibrate to the TSC 870 * frequency to set reasonable expectations. 871 */ 872 static void set_ref_cycle_expectations(void) 873 { 874 pmu_counter_t cnt = { 875 .ctr = MSR_IA32_PERFCTR0, 876 .config = EVNTSEL_OS | EVNTSEL_USR | 877 intel_gp_events[INTEL_REF_CYCLES_IDX].unit_sel, 878 }; 879 uint64_t tsc_delta; 880 uint64_t t0, t1, t2, t3; 881 882 /* Bit 2 enumerates the availability of reference cycles events. */ 883 if (!pmu.nr_gp_counters || !pmu_gp_counter_is_available(2)) 884 return; 885 886 if (this_cpu_has_perf_global_ctrl()) 887 wrmsr(pmu.msr_global_ctl, 0); 888 889 t0 = fenced_rdtsc(); 890 start_event(&cnt); 891 t1 = fenced_rdtsc(); 892 893 /* 894 * This loop has to run long enough to dominate the VM-exit 895 * costs for playing with the PMU MSRs on start and stop. 896 * 897 * On a 2.6GHz Ice Lake, with the TSC frequency at 104 times 898 * the core crystal clock, this function calculated a guest 899 * TSC : ref cycles ratio of around 105 with ECX initialized 900 * to one billion. 901 */ 902 asm volatile("loop ." : "+c"((int){1000000000ull})); 903 904 t2 = fenced_rdtsc(); 905 stop_event(&cnt); 906 t3 = fenced_rdtsc(); 907 908 tsc_delta = ((t2 - t1) + (t3 - t0)) / 2; 909 910 if (!tsc_delta) 911 return; 912 913 intel_gp_events[INTEL_REF_CYCLES_IDX].min = 914 (intel_gp_events[INTEL_REF_CYCLES_IDX].min * cnt.count) / tsc_delta; 915 intel_gp_events[INTEL_REF_CYCLES_IDX].max = 916 (intel_gp_events[INTEL_REF_CYCLES_IDX].max * cnt.count) / tsc_delta; 917 } 918 919 static void check_invalid_rdpmc_gp(void) 920 { 921 uint64_t val; 922 923 report(rdpmc_safe(64, &val) == GP_VECTOR, 924 "Expected #GP on RDPMC(64)"); 925 } 926 927 int main(int ac, char **av) 928 { 929 int instruction_idx; 930 int branch_idx; 931 int branch_miss_idx; 932 933 setup_vm(); 934 handle_irq(PMI_VECTOR, cnt_overflow); 935 buf = malloc(N*64); 936 937 check_invalid_rdpmc_gp(); 938 939 if (pmu.is_intel) { 940 if (!pmu.version) { 941 report_skip("No Intel Arch PMU is detected!"); 942 return report_summary(); 943 } 944 gp_events = (struct pmu_event *)intel_gp_events; 945 gp_events_size = sizeof(intel_gp_events)/sizeof(intel_gp_events[0]); 946 instruction_idx = INTEL_INSTRUCTIONS_IDX; 947 branch_idx = INTEL_BRANCHES_IDX; 948 branch_miss_idx = INTEL_BRANCH_MISS_IDX; 949 950 /* 951 * For legacy Intel CPUS without clflush/clflushopt support, 952 * there is no way to force to trigger a LLC miss, thus set 953 * the minimum value to 0 to avoid false positives. 954 */ 955 if (!this_cpu_has(X86_FEATURE_CLFLUSH)) 956 gp_events[INTEL_LLC_MISSES_IDX].min = 0; 957 958 report_prefix_push("Intel"); 959 set_ref_cycle_expectations(); 960 } else { 961 gp_events_size = sizeof(amd_gp_events)/sizeof(amd_gp_events[0]); 962 gp_events = (struct pmu_event *)amd_gp_events; 963 instruction_idx = AMD_INSTRUCTIONS_IDX; 964 branch_idx = AMD_BRANCHES_IDX; 965 branch_miss_idx = AMD_BRANCH_MISS_IDX; 966 report_prefix_push("AMD"); 967 } 968 adjust_events_range(gp_events, instruction_idx, branch_idx, branch_miss_idx); 969 970 printf("PMU version: %d\n", pmu.version); 971 printf("GP counters: %d\n", pmu.nr_gp_counters); 972 printf("GP counter width: %d\n", pmu.gp_counter_width); 973 printf("Mask length: %d\n", pmu.gp_counter_mask_length); 974 printf("Fixed counters: %d\n", pmu.nr_fixed_counters); 975 printf("Fixed counter width: %d\n", pmu.fixed_counter_width); 976 977 fixed_counters_num = MIN(pmu.nr_fixed_counters, ARRAY_SIZE(fixed_events)); 978 if (pmu.nr_fixed_counters > ARRAY_SIZE(fixed_events)) 979 report_info("Fixed counters number %d > defined fixed events %u. " 980 "Please update test case.", pmu.nr_fixed_counters, 981 (uint32_t)ARRAY_SIZE(fixed_events)); 982 983 apic_write(APIC_LVTPC, PMI_VECTOR); 984 985 check_counters(); 986 987 if (pmu_has_full_writes()) { 988 pmu.msr_gp_counter_base = MSR_IA32_PMC0; 989 990 report_prefix_push("full-width writes"); 991 check_counters(); 992 check_gp_counters_write_width(); 993 report_prefix_pop(); 994 } 995 996 if (!pmu.is_intel) { 997 report_prefix_push("K7"); 998 pmu.nr_gp_counters = AMD64_NUM_COUNTERS; 999 pmu.msr_gp_counter_base = MSR_K7_PERFCTR0; 1000 pmu.msr_gp_event_select_base = MSR_K7_EVNTSEL0; 1001 check_counters(); 1002 report_prefix_pop(); 1003 } 1004 1005 return report_summary(); 1006 } 1007