1 2 #include "x86/msr.h" 3 #include "x86/processor.h" 4 #include "x86/pmu.h" 5 #include "x86/apic-defs.h" 6 #include "x86/apic.h" 7 #include "x86/desc.h" 8 #include "x86/isr.h" 9 #include "vmalloc.h" 10 #include "alloc.h" 11 12 #include "libcflat.h" 13 #include <stdint.h> 14 15 #define N 1000000 16 17 // These values match the number of instructions and branches in the 18 // assembly block in check_emulated_instr(). 19 #define EXPECTED_INSTR 17 20 #define EXPECTED_BRNCH 5 21 22 #define IBPB_JMP_INSNS 9 23 #define IBPB_JMP_BRANCHES 2 24 25 #if defined(__i386__) || defined(_M_IX86) /* i386 */ 26 #define IBPB_JMP_ASM(_wrmsr) \ 27 "mov $1, %%eax; xor %%edx, %%edx;\n\t" \ 28 "mov $73, %%ecx;\n\t" \ 29 _wrmsr "\n\t" \ 30 "call 1f\n\t" \ 31 "1: pop %%eax\n\t" \ 32 "add $(2f-1b), %%eax\n\t" \ 33 "jmp *%%eax;\n\t" \ 34 "nop;\n\t" \ 35 "2: nop;\n\t" 36 #else /* x86_64 */ 37 #define IBPB_JMP_ASM(_wrmsr) \ 38 "mov $1, %%eax; xor %%edx, %%edx;\n\t" \ 39 "mov $73, %%ecx;\n\t" \ 40 _wrmsr "\n\t" \ 41 "call 1f\n\t" \ 42 "1: pop %%rax\n\t" \ 43 "add $(2f-1b), %%rax\n\t" \ 44 "jmp *%%rax;\n\t" \ 45 "nop;\n\t" \ 46 "2: nop;\n\t" 47 #endif 48 49 /* GLOBAL_CTRL enable + disable + clflush/mfence + IBPB_JMP */ 50 #define EXTRA_INSNS (3 + 3 + 2 + IBPB_JMP_INSNS) 51 #define LOOP_INSNS (N * 10 + EXTRA_INSNS) 52 #define LOOP_BRANCHES (N + IBPB_JMP_BRANCHES) 53 #define LOOP_ASM(_wrmsr1, _clflush, _wrmsr2) \ 54 _wrmsr1 "\n\t" \ 55 "mov %%ecx, %%edi; mov %%ebx, %%ecx;\n\t" \ 56 _clflush "\n\t" \ 57 "mfence;\n\t" \ 58 "1: mov (%1), %2; add $64, %1;\n\t" \ 59 "nop; nop; nop; nop; nop; nop; nop;\n\t" \ 60 "loop 1b;\n\t" \ 61 IBPB_JMP_ASM(_wrmsr2) \ 62 "mov %%edi, %%ecx; xor %%eax, %%eax; xor %%edx, %%edx;\n\t" \ 63 _wrmsr1 "\n\t" 64 65 #define _loop_asm(_wrmsr1, _clflush, _wrmsr2) \ 66 do { \ 67 asm volatile(LOOP_ASM(_wrmsr1, _clflush, _wrmsr2) \ 68 : "=b"(tmp), "=r"(tmp2), "=r"(tmp3) \ 69 : "a"(eax), "d"(edx), "c"(global_ctl), \ 70 "0"(N), "1"(buf) \ 71 : "edi"); \ 72 } while (0) 73 74 typedef struct { 75 uint32_t ctr; 76 uint32_t idx; 77 uint64_t config; 78 uint64_t count; 79 } pmu_counter_t; 80 81 struct pmu_event { 82 const char *name; 83 uint32_t unit_sel; 84 int min; 85 int max; 86 } intel_gp_events[] = { 87 {"core cycles", 0x003c, 1*N, 50*N}, 88 {"instructions", 0x00c0, 10*N, 10.2*N}, 89 {"ref cycles", 0x013c, 1*N, 30*N}, 90 {"llc references", 0x4f2e, 1, 2*N}, 91 {"llc misses", 0x412e, 1, 1*N}, 92 {"branches", 0x00c4, 1*N, 1.1*N}, 93 {"branch misses", 0x00c5, 0, 0.1*N}, 94 }, amd_gp_events[] = { 95 {"core cycles", 0x0076, 1*N, 50*N}, 96 {"instructions", 0x00c0, 10*N, 10.2*N}, 97 {"branches", 0x00c2, 1*N, 1.1*N}, 98 {"branch misses", 0x00c3, 0, 0.1*N}, 99 }, fixed_events[] = { 100 {"fixed 0", MSR_CORE_PERF_FIXED_CTR0, 10*N, 10.2*N}, 101 {"fixed 1", MSR_CORE_PERF_FIXED_CTR0 + 1, 1*N, 30*N}, 102 {"fixed 2", MSR_CORE_PERF_FIXED_CTR0 + 2, 0.1*N, 30*N} 103 }; 104 105 /* 106 * Events index in intel_gp_events[], ensure consistent with 107 * intel_gp_events[]. 108 */ 109 enum { 110 INTEL_INSTRUCTIONS_IDX = 1, 111 INTEL_REF_CYCLES_IDX = 2, 112 INTEL_LLC_MISSES_IDX = 4, 113 INTEL_BRANCHES_IDX = 5, 114 }; 115 116 /* 117 * Events index in amd_gp_events[], ensure consistent with 118 * amd_gp_events[]. 119 */ 120 enum { 121 AMD_INSTRUCTIONS_IDX = 1, 122 AMD_BRANCHES_IDX = 2, 123 }; 124 125 char *buf; 126 127 static struct pmu_event *gp_events; 128 static unsigned int gp_events_size; 129 static unsigned int fixed_counters_num; 130 131 static int has_ibpb(void) 132 { 133 return this_cpu_has(X86_FEATURE_SPEC_CTRL) || 134 this_cpu_has(X86_FEATURE_AMD_IBPB); 135 } 136 137 static inline void __loop(void) 138 { 139 unsigned long tmp, tmp2, tmp3; 140 u32 global_ctl = 0; 141 u32 eax = 0; 142 u32 edx = 0; 143 144 if (this_cpu_has(X86_FEATURE_CLFLUSH) && has_ibpb()) 145 _loop_asm("nop", "clflush (%1)", "wrmsr"); 146 else if (this_cpu_has(X86_FEATURE_CLFLUSH)) 147 _loop_asm("nop", "clflush (%1)", "nop"); 148 else if (has_ibpb()) 149 _loop_asm("nop", "nop", "wrmsr"); 150 else 151 _loop_asm("nop", "nop", "nop"); 152 } 153 154 /* 155 * Enable and disable counters in a whole asm blob to ensure 156 * no other instructions are counted in the window between 157 * counters enabling and really LOOP_ASM code executing. 158 * Thus counters can verify instructions and branches events 159 * against precise counts instead of a rough valid count range. 160 */ 161 static inline void __precise_loop(u64 cntrs) 162 { 163 unsigned long tmp, tmp2, tmp3; 164 u32 global_ctl = pmu.msr_global_ctl; 165 u32 eax = cntrs & (BIT_ULL(32) - 1); 166 u32 edx = cntrs >> 32; 167 168 if (this_cpu_has(X86_FEATURE_CLFLUSH) && has_ibpb()) 169 _loop_asm("wrmsr", "clflush (%1)", "wrmsr"); 170 else if (this_cpu_has(X86_FEATURE_CLFLUSH)) 171 _loop_asm("wrmsr", "clflush (%1)", "nop"); 172 else if (has_ibpb()) 173 _loop_asm("wrmsr", "nop", "wrmsr"); 174 else 175 _loop_asm("wrmsr", "nop", "nop"); 176 } 177 178 static inline void loop(u64 cntrs) 179 { 180 if (!this_cpu_has_perf_global_ctrl()) 181 __loop(); 182 else 183 __precise_loop(cntrs); 184 } 185 186 static void adjust_events_range(struct pmu_event *gp_events, 187 int instruction_idx, int branch_idx) 188 { 189 /* 190 * If HW supports GLOBAL_CTRL MSR, enabling and disabling PMCs are 191 * moved in __precise_loop(). Thus, instructions and branches events 192 * can be verified against a precise count instead of a rough range. 193 * 194 * Skip the precise checks on AMD, as AMD CPUs count VMRUN as a branch 195 * instruction in guest context, which* leads to intermittent failures 196 * as the counts will vary depending on how many asynchronous VM-Exits 197 * occur while running the measured code, e.g. if the host takes IRQs. 198 */ 199 if (pmu.is_intel && this_cpu_has_perf_global_ctrl()) { 200 gp_events[instruction_idx].min = LOOP_INSNS; 201 gp_events[instruction_idx].max = LOOP_INSNS; 202 gp_events[branch_idx].min = LOOP_BRANCHES; 203 gp_events[branch_idx].max = LOOP_BRANCHES; 204 } 205 } 206 207 volatile uint64_t irq_received; 208 209 static void cnt_overflow(isr_regs_t *regs) 210 { 211 irq_received++; 212 apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); 213 apic_write(APIC_EOI, 0); 214 } 215 216 static bool check_irq(void) 217 { 218 int i; 219 irq_received = 0; 220 sti(); 221 for (i = 0; i < 100000 && !irq_received; i++) 222 asm volatile("pause"); 223 cli(); 224 return irq_received; 225 } 226 227 static bool is_gp(pmu_counter_t *evt) 228 { 229 if (!pmu.is_intel) 230 return true; 231 232 return evt->ctr < MSR_CORE_PERF_FIXED_CTR0 || 233 evt->ctr >= MSR_IA32_PMC0; 234 } 235 236 static int event_to_global_idx(pmu_counter_t *cnt) 237 { 238 if (pmu.is_intel) 239 return cnt->ctr - (is_gp(cnt) ? pmu.msr_gp_counter_base : 240 (MSR_CORE_PERF_FIXED_CTR0 - FIXED_CNT_INDEX)); 241 242 if (pmu.msr_gp_counter_base == MSR_F15H_PERF_CTR0) 243 return (cnt->ctr - pmu.msr_gp_counter_base) / 2; 244 else 245 return cnt->ctr - pmu.msr_gp_counter_base; 246 } 247 248 static struct pmu_event* get_counter_event(pmu_counter_t *cnt) 249 { 250 if (is_gp(cnt)) { 251 int i; 252 253 for (i = 0; i < gp_events_size; i++) 254 if (gp_events[i].unit_sel == (cnt->config & 0xffff)) 255 return &gp_events[i]; 256 } else { 257 unsigned int idx = cnt->ctr - MSR_CORE_PERF_FIXED_CTR0; 258 259 if (idx < ARRAY_SIZE(fixed_events)) 260 return &fixed_events[idx]; 261 } 262 263 return (void*)0; 264 } 265 266 static void global_enable(pmu_counter_t *cnt) 267 { 268 if (!this_cpu_has_perf_global_ctrl()) 269 return; 270 271 cnt->idx = event_to_global_idx(cnt); 272 wrmsr(pmu.msr_global_ctl, rdmsr(pmu.msr_global_ctl) | BIT_ULL(cnt->idx)); 273 } 274 275 static void global_disable(pmu_counter_t *cnt) 276 { 277 if (!this_cpu_has_perf_global_ctrl()) 278 return; 279 280 wrmsr(pmu.msr_global_ctl, rdmsr(pmu.msr_global_ctl) & ~BIT_ULL(cnt->idx)); 281 } 282 283 static void __start_event(pmu_counter_t *evt, uint64_t count) 284 { 285 evt->count = count; 286 wrmsr(evt->ctr, evt->count); 287 if (is_gp(evt)) { 288 wrmsr(MSR_GP_EVENT_SELECTx(event_to_global_idx(evt)), 289 evt->config | EVNTSEL_EN); 290 } else { 291 uint32_t ctrl = rdmsr(MSR_CORE_PERF_FIXED_CTR_CTRL); 292 int shift = (evt->ctr - MSR_CORE_PERF_FIXED_CTR0) * 4; 293 uint32_t usrospmi = 0; 294 295 if (evt->config & EVNTSEL_OS) 296 usrospmi |= (1 << 0); 297 if (evt->config & EVNTSEL_USR) 298 usrospmi |= (1 << 1); 299 if (evt->config & EVNTSEL_INT) 300 usrospmi |= (1 << 3); // PMI on overflow 301 ctrl = (ctrl & ~(0xf << shift)) | (usrospmi << shift); 302 wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, ctrl); 303 } 304 apic_write(APIC_LVTPC, PMI_VECTOR); 305 } 306 307 static void start_event(pmu_counter_t *evt) 308 { 309 __start_event(evt, 0); 310 global_enable(evt); 311 } 312 313 static void __stop_event(pmu_counter_t *evt) 314 { 315 if (is_gp(evt)) { 316 wrmsr(MSR_GP_EVENT_SELECTx(event_to_global_idx(evt)), 317 evt->config & ~EVNTSEL_EN); 318 } else { 319 uint32_t ctrl = rdmsr(MSR_CORE_PERF_FIXED_CTR_CTRL); 320 int shift = (evt->ctr - MSR_CORE_PERF_FIXED_CTR0) * 4; 321 wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, ctrl & ~(0xf << shift)); 322 } 323 evt->count = rdmsr(evt->ctr); 324 } 325 326 static void stop_event(pmu_counter_t *evt) 327 { 328 global_disable(evt); 329 __stop_event(evt); 330 } 331 332 static noinline void measure_many(pmu_counter_t *evt, int count) 333 { 334 int i; 335 u64 cntrs = 0; 336 337 for (i = 0; i < count; i++) { 338 __start_event(&evt[i], 0); 339 cntrs |= BIT_ULL(event_to_global_idx(&evt[i])); 340 } 341 loop(cntrs); 342 for (i = 0; i < count; i++) 343 __stop_event(&evt[i]); 344 } 345 346 static void measure_one(pmu_counter_t *evt) 347 { 348 measure_many(evt, 1); 349 } 350 351 static noinline void __measure(pmu_counter_t *evt, uint64_t count) 352 { 353 u64 cntrs = BIT_ULL(event_to_global_idx(evt)); 354 355 __start_event(evt, count); 356 loop(cntrs); 357 __stop_event(evt); 358 } 359 360 static bool verify_event(uint64_t count, struct pmu_event *e) 361 { 362 bool pass; 363 364 if (!e) 365 return false; 366 367 pass = count >= e->min && count <= e->max; 368 if (!pass) 369 printf("FAIL: %d <= %"PRId64" <= %d\n", e->min, count, e->max); 370 371 return pass; 372 } 373 374 static bool verify_counter(pmu_counter_t *cnt) 375 { 376 return verify_event(cnt->count, get_counter_event(cnt)); 377 } 378 379 static void check_gp_counter(struct pmu_event *evt) 380 { 381 pmu_counter_t cnt = { 382 .config = EVNTSEL_OS | EVNTSEL_USR | evt->unit_sel, 383 }; 384 int i; 385 386 for (i = 0; i < pmu.nr_gp_counters; i++) { 387 cnt.ctr = MSR_GP_COUNTERx(i); 388 measure_one(&cnt); 389 report(verify_event(cnt.count, evt), "%s-%d", evt->name, i); 390 } 391 } 392 393 static void check_gp_counters(void) 394 { 395 int i; 396 397 for (i = 0; i < gp_events_size; i++) 398 if (pmu_gp_counter_is_available(i)) 399 check_gp_counter(&gp_events[i]); 400 else 401 printf("GP event '%s' is disabled\n", 402 gp_events[i].name); 403 } 404 405 static void check_fixed_counters(void) 406 { 407 pmu_counter_t cnt = { 408 .config = EVNTSEL_OS | EVNTSEL_USR, 409 }; 410 int i; 411 412 for (i = 0; i < fixed_counters_num; i++) { 413 cnt.ctr = fixed_events[i].unit_sel; 414 measure_one(&cnt); 415 report(verify_event(cnt.count, &fixed_events[i]), "fixed-%d", i); 416 } 417 } 418 419 static void check_counters_many(void) 420 { 421 pmu_counter_t cnt[48]; 422 int i, n; 423 424 for (i = 0, n = 0; n < pmu.nr_gp_counters; i++) { 425 if (!pmu_gp_counter_is_available(i)) 426 continue; 427 428 cnt[n].ctr = MSR_GP_COUNTERx(n); 429 cnt[n].config = EVNTSEL_OS | EVNTSEL_USR | 430 gp_events[i % gp_events_size].unit_sel; 431 n++; 432 } 433 for (i = 0; i < fixed_counters_num; i++) { 434 cnt[n].ctr = fixed_events[i].unit_sel; 435 cnt[n].config = EVNTSEL_OS | EVNTSEL_USR; 436 n++; 437 } 438 439 assert(n <= ARRAY_SIZE(cnt)); 440 measure_many(cnt, n); 441 442 for (i = 0; i < n; i++) 443 if (!verify_counter(&cnt[i])) 444 break; 445 446 report(i == n, "all counters"); 447 } 448 449 static uint64_t measure_for_overflow(pmu_counter_t *cnt) 450 { 451 __measure(cnt, 0); 452 /* 453 * To generate overflow, i.e. roll over to '0', the initial count just 454 * needs to be preset to the negative expected count. However, as per 455 * Intel's SDM, the preset count needs to be incremented by 1 to ensure 456 * the overflow interrupt is generated immediately instead of possibly 457 * waiting for the overflow to propagate through the counter. 458 */ 459 assert(cnt->count > 1); 460 return 1 - cnt->count; 461 } 462 463 static void check_counter_overflow(void) 464 { 465 int i; 466 uint64_t overflow_preset; 467 int instruction_idx = pmu.is_intel ? 468 INTEL_INSTRUCTIONS_IDX : 469 AMD_INSTRUCTIONS_IDX; 470 471 pmu_counter_t cnt = { 472 .ctr = MSR_GP_COUNTERx(0), 473 .config = EVNTSEL_OS | EVNTSEL_USR | 474 gp_events[instruction_idx].unit_sel /* instructions */, 475 }; 476 overflow_preset = measure_for_overflow(&cnt); 477 478 /* clear status before test */ 479 if (this_cpu_has_perf_global_status()) 480 pmu_clear_global_status(); 481 482 report_prefix_push("overflow"); 483 484 for (i = 0; i < pmu.nr_gp_counters + 1; i++) { 485 uint64_t status; 486 int idx; 487 488 cnt.count = overflow_preset; 489 if (pmu_use_full_writes()) 490 cnt.count &= (1ull << pmu.gp_counter_width) - 1; 491 492 if (i == pmu.nr_gp_counters) { 493 if (!pmu.is_intel) 494 break; 495 496 cnt.ctr = fixed_events[0].unit_sel; 497 cnt.count = measure_for_overflow(&cnt); 498 cnt.count &= (1ull << pmu.gp_counter_width) - 1; 499 } else { 500 cnt.ctr = MSR_GP_COUNTERx(i); 501 } 502 503 if (i % 2) 504 cnt.config |= EVNTSEL_INT; 505 else 506 cnt.config &= ~EVNTSEL_INT; 507 idx = event_to_global_idx(&cnt); 508 __measure(&cnt, cnt.count); 509 if (pmu.is_intel) 510 report(cnt.count == 1, "cntr-%d", i); 511 else 512 report(cnt.count == 0xffffffffffff || cnt.count < 7, "cntr-%d", i); 513 514 if (!this_cpu_has_perf_global_status()) 515 continue; 516 517 status = rdmsr(pmu.msr_global_status); 518 report(status & (1ull << idx), "status-%d", i); 519 wrmsr(pmu.msr_global_status_clr, status); 520 status = rdmsr(pmu.msr_global_status); 521 report(!(status & (1ull << idx)), "status clear-%d", i); 522 report(check_irq() == (i % 2), "irq-%d", i); 523 } 524 525 report_prefix_pop(); 526 } 527 528 static void check_gp_counter_cmask(void) 529 { 530 int instruction_idx = pmu.is_intel ? 531 INTEL_INSTRUCTIONS_IDX : 532 AMD_INSTRUCTIONS_IDX; 533 534 pmu_counter_t cnt = { 535 .ctr = MSR_GP_COUNTERx(0), 536 .config = EVNTSEL_OS | EVNTSEL_USR | 537 gp_events[instruction_idx].unit_sel /* instructions */, 538 }; 539 cnt.config |= (0x2 << EVNTSEL_CMASK_SHIFT); 540 measure_one(&cnt); 541 report(cnt.count < gp_events[instruction_idx].min, "cmask"); 542 } 543 544 static void do_rdpmc_fast(void *ptr) 545 { 546 pmu_counter_t *cnt = ptr; 547 uint32_t idx = (uint32_t)cnt->idx | (1u << 31); 548 549 if (!is_gp(cnt)) 550 idx |= 1 << 30; 551 552 cnt->count = rdpmc(idx); 553 } 554 555 556 static void check_rdpmc(void) 557 { 558 uint64_t val = 0xff0123456789ull; 559 bool exc; 560 int i; 561 562 report_prefix_push("rdpmc"); 563 564 for (i = 0; i < pmu.nr_gp_counters; i++) { 565 uint64_t x; 566 pmu_counter_t cnt = { 567 .ctr = MSR_GP_COUNTERx(i), 568 .idx = i 569 }; 570 571 /* 572 * Without full-width writes, only the low 32 bits are writable, 573 * and the value is sign-extended. 574 */ 575 if (pmu.msr_gp_counter_base == MSR_IA32_PERFCTR0) 576 x = (uint64_t)(int64_t)(int32_t)val; 577 else 578 x = (uint64_t)(int64_t)val; 579 580 /* Mask according to the number of supported bits */ 581 x &= (1ull << pmu.gp_counter_width) - 1; 582 583 wrmsr(MSR_GP_COUNTERx(i), val); 584 report(rdpmc(i) == x, "cntr-%d", i); 585 586 exc = test_for_exception(GP_VECTOR, do_rdpmc_fast, &cnt); 587 if (exc) 588 report_skip("fast-%d", i); 589 else 590 report(cnt.count == (u32)val, "fast-%d", i); 591 } 592 for (i = 0; i < fixed_counters_num; i++) { 593 uint64_t x = val & ((1ull << pmu.fixed_counter_width) - 1); 594 pmu_counter_t cnt = { 595 .ctr = MSR_CORE_PERF_FIXED_CTR0 + i, 596 .idx = i 597 }; 598 599 wrmsr(MSR_PERF_FIXED_CTRx(i), x); 600 report(rdpmc(i | (1 << 30)) == x, "fixed cntr-%d", i); 601 602 exc = test_for_exception(GP_VECTOR, do_rdpmc_fast, &cnt); 603 if (exc) 604 report_skip("fixed fast-%d", i); 605 else 606 report(cnt.count == (u32)x, "fixed fast-%d", i); 607 } 608 609 report_prefix_pop(); 610 } 611 612 static void check_running_counter_wrmsr(void) 613 { 614 uint64_t status; 615 uint64_t count; 616 unsigned int instruction_idx = pmu.is_intel ? 617 INTEL_INSTRUCTIONS_IDX : 618 AMD_INSTRUCTIONS_IDX; 619 620 pmu_counter_t evt = { 621 .ctr = MSR_GP_COUNTERx(0), 622 .config = EVNTSEL_OS | EVNTSEL_USR | 623 gp_events[instruction_idx].unit_sel, 624 }; 625 626 report_prefix_push("running counter wrmsr"); 627 628 start_event(&evt); 629 __loop(); 630 wrmsr(MSR_GP_COUNTERx(0), 0); 631 stop_event(&evt); 632 report(evt.count < gp_events[instruction_idx].min, "cntr"); 633 634 /* clear status before overflow test */ 635 if (this_cpu_has_perf_global_status()) 636 pmu_clear_global_status(); 637 638 start_event(&evt); 639 640 count = -1; 641 if (pmu_use_full_writes()) 642 count &= (1ull << pmu.gp_counter_width) - 1; 643 644 wrmsr(MSR_GP_COUNTERx(0), count); 645 646 __loop(); 647 stop_event(&evt); 648 649 if (this_cpu_has_perf_global_status()) { 650 status = rdmsr(pmu.msr_global_status); 651 report(status & 1, "status msr bit"); 652 } 653 654 report_prefix_pop(); 655 } 656 657 static void check_emulated_instr(void) 658 { 659 uint64_t status, instr_start, brnch_start; 660 uint64_t gp_counter_width = (1ull << pmu.gp_counter_width) - 1; 661 unsigned int branch_idx = pmu.is_intel ? 662 INTEL_BRANCHES_IDX : AMD_BRANCHES_IDX; 663 unsigned int instruction_idx = pmu.is_intel ? 664 INTEL_INSTRUCTIONS_IDX : 665 AMD_INSTRUCTIONS_IDX; 666 pmu_counter_t brnch_cnt = { 667 .ctr = MSR_GP_COUNTERx(0), 668 /* branch instructions */ 669 .config = EVNTSEL_OS | EVNTSEL_USR | gp_events[branch_idx].unit_sel, 670 }; 671 pmu_counter_t instr_cnt = { 672 .ctr = MSR_GP_COUNTERx(1), 673 /* instructions */ 674 .config = EVNTSEL_OS | EVNTSEL_USR | gp_events[instruction_idx].unit_sel, 675 }; 676 report_prefix_push("emulated instruction"); 677 678 if (this_cpu_has_perf_global_status()) 679 pmu_clear_global_status(); 680 681 start_event(&brnch_cnt); 682 start_event(&instr_cnt); 683 684 brnch_start = -EXPECTED_BRNCH; 685 instr_start = -EXPECTED_INSTR; 686 wrmsr(MSR_GP_COUNTERx(0), brnch_start & gp_counter_width); 687 wrmsr(MSR_GP_COUNTERx(1), instr_start & gp_counter_width); 688 // KVM_FEP is a magic prefix that forces emulation so 689 // 'KVM_FEP "jne label\n"' just counts as a single instruction. 690 asm volatile( 691 "mov $0x0, %%eax\n" 692 "cmp $0x0, %%eax\n" 693 KVM_FEP "jne label\n" 694 KVM_FEP "jne label\n" 695 KVM_FEP "jne label\n" 696 KVM_FEP "jne label\n" 697 KVM_FEP "jne label\n" 698 "mov $0xa, %%eax\n" 699 "cpuid\n" 700 "mov $0xa, %%eax\n" 701 "cpuid\n" 702 "mov $0xa, %%eax\n" 703 "cpuid\n" 704 "mov $0xa, %%eax\n" 705 "cpuid\n" 706 "mov $0xa, %%eax\n" 707 "cpuid\n" 708 "label:\n" 709 : 710 : 711 : "eax", "ebx", "ecx", "edx"); 712 713 if (this_cpu_has_perf_global_ctrl()) 714 wrmsr(pmu.msr_global_ctl, 0); 715 716 stop_event(&brnch_cnt); 717 stop_event(&instr_cnt); 718 719 // Check that the end count - start count is at least the expected 720 // number of instructions and branches. 721 report(instr_cnt.count - instr_start >= EXPECTED_INSTR, 722 "instruction count"); 723 report(brnch_cnt.count - brnch_start >= EXPECTED_BRNCH, 724 "branch count"); 725 if (this_cpu_has_perf_global_status()) { 726 // Additionally check that those counters overflowed properly. 727 status = rdmsr(pmu.msr_global_status); 728 report(status & 1, "branch counter overflow"); 729 report(status & 2, "instruction counter overflow"); 730 } 731 732 report_prefix_pop(); 733 } 734 735 #define XBEGIN_STARTED (~0u) 736 static void check_tsx_cycles(void) 737 { 738 pmu_counter_t cnt; 739 unsigned int i, ret = 0; 740 741 if (!this_cpu_has(X86_FEATURE_RTM)) 742 return; 743 744 report_prefix_push("TSX cycles"); 745 746 for (i = 0; i < pmu.nr_gp_counters; i++) { 747 cnt.ctr = MSR_GP_COUNTERx(i); 748 749 if (i == 2) { 750 /* Transactional cycles committed only on gp counter 2 */ 751 cnt.config = EVNTSEL_OS | EVNTSEL_USR | 0x30000003c; 752 } else { 753 /* Transactional cycles */ 754 cnt.config = EVNTSEL_OS | EVNTSEL_USR | 0x10000003c; 755 } 756 757 start_event(&cnt); 758 759 asm volatile("xbegin 1f\n\t" 760 "1:\n\t" 761 : "+a" (ret) :: "memory"); 762 763 /* Generate a non-canonical #GP to trigger ABORT. */ 764 if (ret == XBEGIN_STARTED) 765 *(int *)NONCANONICAL = 0; 766 767 stop_event(&cnt); 768 769 report(cnt.count > 0, "gp cntr-%d with a value of %" PRId64 "", i, cnt.count); 770 } 771 772 report_prefix_pop(); 773 } 774 775 static void warm_up(void) 776 { 777 int i; 778 779 /* 780 * Since cycles event is always run as the first event, there would be 781 * a warm-up state to warm up the cache, it leads to the measured cycles 782 * value may exceed the pre-defined cycles upper boundary and cause 783 * false positive. To avoid this, introduce an warm-up state before 784 * the real verification. 785 */ 786 for (i = 0; i < 10; i++) 787 loop(0); 788 } 789 790 static void check_counters(void) 791 { 792 if (is_fep_available()) 793 check_emulated_instr(); 794 795 warm_up(); 796 check_gp_counters(); 797 check_fixed_counters(); 798 check_rdpmc(); 799 check_counters_many(); 800 check_counter_overflow(); 801 check_gp_counter_cmask(); 802 check_running_counter_wrmsr(); 803 check_tsx_cycles(); 804 } 805 806 static void do_unsupported_width_counter_write(void *index) 807 { 808 wrmsr(MSR_IA32_PMC0 + *((int *) index), 0xffffff0123456789ull); 809 } 810 811 static void check_gp_counters_write_width(void) 812 { 813 u64 val_64 = 0xffffff0123456789ull; 814 u64 val_32 = val_64 & ((1ull << 32) - 1); 815 u64 val_max_width = val_64 & ((1ull << pmu.gp_counter_width) - 1); 816 int i; 817 818 /* 819 * MSR_IA32_PERFCTRn supports 64-bit writes, 820 * but only the lowest 32 bits are valid. 821 */ 822 for (i = 0; i < pmu.nr_gp_counters; i++) { 823 wrmsr(MSR_IA32_PERFCTR0 + i, val_32); 824 assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32); 825 assert(rdmsr(MSR_IA32_PMC0 + i) == val_32); 826 827 wrmsr(MSR_IA32_PERFCTR0 + i, val_max_width); 828 assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32); 829 assert(rdmsr(MSR_IA32_PMC0 + i) == val_32); 830 831 wrmsr(MSR_IA32_PERFCTR0 + i, val_64); 832 assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32); 833 assert(rdmsr(MSR_IA32_PMC0 + i) == val_32); 834 } 835 836 /* 837 * MSR_IA32_PMCn supports writing values up to GP counter width, 838 * and only the lowest bits of GP counter width are valid. 839 */ 840 for (i = 0; i < pmu.nr_gp_counters; i++) { 841 wrmsr(MSR_IA32_PMC0 + i, val_32); 842 assert(rdmsr(MSR_IA32_PMC0 + i) == val_32); 843 assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32); 844 845 wrmsr(MSR_IA32_PMC0 + i, val_max_width); 846 assert(rdmsr(MSR_IA32_PMC0 + i) == val_max_width); 847 assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_max_width); 848 849 report(test_for_exception(GP_VECTOR, 850 do_unsupported_width_counter_write, &i), 851 "writing unsupported width to MSR_IA32_PMC%d raises #GP", i); 852 } 853 } 854 855 /* 856 * Per the SDM, reference cycles are currently implemented using the 857 * core crystal clock, TSC, or bus clock. Calibrate to the TSC 858 * frequency to set reasonable expectations. 859 */ 860 static void set_ref_cycle_expectations(void) 861 { 862 pmu_counter_t cnt = { 863 .ctr = MSR_IA32_PERFCTR0, 864 .config = EVNTSEL_OS | EVNTSEL_USR | 865 intel_gp_events[INTEL_REF_CYCLES_IDX].unit_sel, 866 }; 867 uint64_t tsc_delta; 868 uint64_t t0, t1, t2, t3; 869 870 /* Bit 2 enumerates the availability of reference cycles events. */ 871 if (!pmu.nr_gp_counters || !pmu_gp_counter_is_available(2)) 872 return; 873 874 if (this_cpu_has_perf_global_ctrl()) 875 wrmsr(pmu.msr_global_ctl, 0); 876 877 t0 = fenced_rdtsc(); 878 start_event(&cnt); 879 t1 = fenced_rdtsc(); 880 881 /* 882 * This loop has to run long enough to dominate the VM-exit 883 * costs for playing with the PMU MSRs on start and stop. 884 * 885 * On a 2.6GHz Ice Lake, with the TSC frequency at 104 times 886 * the core crystal clock, this function calculated a guest 887 * TSC : ref cycles ratio of around 105 with ECX initialized 888 * to one billion. 889 */ 890 asm volatile("loop ." : "+c"((int){1000000000ull})); 891 892 t2 = fenced_rdtsc(); 893 stop_event(&cnt); 894 t3 = fenced_rdtsc(); 895 896 tsc_delta = ((t2 - t1) + (t3 - t0)) / 2; 897 898 if (!tsc_delta) 899 return; 900 901 intel_gp_events[INTEL_REF_CYCLES_IDX].min = 902 (intel_gp_events[INTEL_REF_CYCLES_IDX].min * cnt.count) / tsc_delta; 903 intel_gp_events[INTEL_REF_CYCLES_IDX].max = 904 (intel_gp_events[INTEL_REF_CYCLES_IDX].max * cnt.count) / tsc_delta; 905 } 906 907 static void check_invalid_rdpmc_gp(void) 908 { 909 uint64_t val; 910 911 report(rdpmc_safe(64, &val) == GP_VECTOR, 912 "Expected #GP on RDPMC(64)"); 913 } 914 915 int main(int ac, char **av) 916 { 917 int instruction_idx; 918 int branch_idx; 919 920 setup_vm(); 921 handle_irq(PMI_VECTOR, cnt_overflow); 922 buf = malloc(N*64); 923 924 check_invalid_rdpmc_gp(); 925 926 if (pmu.is_intel) { 927 if (!pmu.version) { 928 report_skip("No Intel Arch PMU is detected!"); 929 return report_summary(); 930 } 931 gp_events = (struct pmu_event *)intel_gp_events; 932 gp_events_size = sizeof(intel_gp_events)/sizeof(intel_gp_events[0]); 933 instruction_idx = INTEL_INSTRUCTIONS_IDX; 934 branch_idx = INTEL_BRANCHES_IDX; 935 936 /* 937 * For legacy Intel CPUS without clflush/clflushopt support, 938 * there is no way to force to trigger a LLC miss, thus set 939 * the minimum value to 0 to avoid false positives. 940 */ 941 if (!this_cpu_has(X86_FEATURE_CLFLUSH)) 942 gp_events[INTEL_LLC_MISSES_IDX].min = 0; 943 944 report_prefix_push("Intel"); 945 set_ref_cycle_expectations(); 946 } else { 947 gp_events_size = sizeof(amd_gp_events)/sizeof(amd_gp_events[0]); 948 gp_events = (struct pmu_event *)amd_gp_events; 949 instruction_idx = AMD_INSTRUCTIONS_IDX; 950 branch_idx = AMD_BRANCHES_IDX; 951 report_prefix_push("AMD"); 952 } 953 adjust_events_range(gp_events, instruction_idx, branch_idx); 954 955 printf("PMU version: %d\n", pmu.version); 956 printf("GP counters: %d\n", pmu.nr_gp_counters); 957 printf("GP counter width: %d\n", pmu.gp_counter_width); 958 printf("Mask length: %d\n", pmu.gp_counter_mask_length); 959 printf("Fixed counters: %d\n", pmu.nr_fixed_counters); 960 printf("Fixed counter width: %d\n", pmu.fixed_counter_width); 961 962 fixed_counters_num = MIN(pmu.nr_fixed_counters, ARRAY_SIZE(fixed_events)); 963 if (pmu.nr_fixed_counters > ARRAY_SIZE(fixed_events)) 964 report_info("Fixed counters number %d > defined fixed events %u. " 965 "Please update test case.", pmu.nr_fixed_counters, 966 (uint32_t)ARRAY_SIZE(fixed_events)); 967 968 apic_write(APIC_LVTPC, PMI_VECTOR); 969 970 check_counters(); 971 972 if (pmu_has_full_writes()) { 973 pmu.msr_gp_counter_base = MSR_IA32_PMC0; 974 975 report_prefix_push("full-width writes"); 976 check_counters(); 977 check_gp_counters_write_width(); 978 report_prefix_pop(); 979 } 980 981 if (!pmu.is_intel) { 982 report_prefix_push("K7"); 983 pmu.nr_gp_counters = AMD64_NUM_COUNTERS; 984 pmu.msr_gp_counter_base = MSR_K7_PERFCTR0; 985 pmu.msr_gp_event_select_base = MSR_K7_EVNTSEL0; 986 check_counters(); 987 report_prefix_pop(); 988 } 989 990 return report_summary(); 991 } 992