1 2 #include "x86/msr.h" 3 #include "x86/processor.h" 4 #include "x86/pmu.h" 5 #include "x86/apic-defs.h" 6 #include "x86/apic.h" 7 #include "x86/desc.h" 8 #include "x86/isr.h" 9 #include "vmalloc.h" 10 #include "alloc.h" 11 12 #include "libcflat.h" 13 #include <stdint.h> 14 15 #define N 1000000 16 17 // These values match the number of instructions and branches in the 18 // assembly block in check_emulated_instr(). 19 #define EXPECTED_INSTR 17 20 #define EXPECTED_BRNCH 5 21 22 #define LOOP_ASM(_wrmsr) \ 23 _wrmsr "\n\t" \ 24 "mov %%ecx, %%edi; mov %%ebx, %%ecx;\n\t" \ 25 "1: mov (%1), %2; add $64, %1;\n\t" \ 26 "nop; nop; nop; nop; nop; nop; nop;\n\t" \ 27 "loop 1b;\n\t" \ 28 "mov %%edi, %%ecx; xor %%eax, %%eax; xor %%edx, %%edx;\n\t" \ 29 _wrmsr "\n\t" 30 31 typedef struct { 32 uint32_t ctr; 33 uint32_t idx; 34 uint64_t config; 35 uint64_t count; 36 } pmu_counter_t; 37 38 struct pmu_event { 39 const char *name; 40 uint32_t unit_sel; 41 int min; 42 int max; 43 } intel_gp_events[] = { 44 {"core cycles", 0x003c, 1*N, 50*N}, 45 {"instructions", 0x00c0, 10*N, 10.2*N}, 46 {"ref cycles", 0x013c, 1*N, 30*N}, 47 {"llc references", 0x4f2e, 1, 2*N}, 48 {"llc misses", 0x412e, 1, 1*N}, 49 {"branches", 0x00c4, 1*N, 1.1*N}, 50 {"branch misses", 0x00c5, 0, 0.1*N}, 51 }, amd_gp_events[] = { 52 {"core cycles", 0x0076, 1*N, 50*N}, 53 {"instructions", 0x00c0, 10*N, 10.2*N}, 54 {"branches", 0x00c2, 1*N, 1.1*N}, 55 {"branch misses", 0x00c3, 0, 0.1*N}, 56 }, fixed_events[] = { 57 {"fixed 0", MSR_CORE_PERF_FIXED_CTR0, 10*N, 10.2*N}, 58 {"fixed 1", MSR_CORE_PERF_FIXED_CTR0 + 1, 1*N, 30*N}, 59 {"fixed 2", MSR_CORE_PERF_FIXED_CTR0 + 2, 0.1*N, 30*N} 60 }; 61 62 /* 63 * Events index in intel_gp_events[], ensure consistent with 64 * intel_gp_events[]. 65 */ 66 enum { 67 INTEL_INSTRUCTIONS_IDX = 1, 68 INTEL_REF_CYCLES_IDX = 2, 69 INTEL_BRANCHES_IDX = 5, 70 }; 71 72 /* 73 * Events index in amd_gp_events[], ensure consistent with 74 * amd_gp_events[]. 75 */ 76 enum { 77 AMD_INSTRUCTIONS_IDX = 1, 78 AMD_BRANCHES_IDX = 2, 79 }; 80 81 char *buf; 82 83 static struct pmu_event *gp_events; 84 static unsigned int gp_events_size; 85 static unsigned int fixed_counters_num; 86 87 88 static inline void __loop(void) 89 { 90 unsigned long tmp, tmp2, tmp3; 91 92 asm volatile(LOOP_ASM("nop") 93 : "=c"(tmp), "=r"(tmp2), "=r"(tmp3) 94 : "0"(N), "1"(buf)); 95 } 96 97 /* 98 * Enable and disable counters in a whole asm blob to ensure 99 * no other instructions are counted in the window between 100 * counters enabling and really LOOP_ASM code executing. 101 * Thus counters can verify instructions and branches events 102 * against precise counts instead of a rough valid count range. 103 */ 104 static inline void __precise_loop(u64 cntrs) 105 { 106 unsigned long tmp, tmp2, tmp3; 107 unsigned int global_ctl = pmu.msr_global_ctl; 108 u32 eax = cntrs & (BIT_ULL(32) - 1); 109 u32 edx = cntrs >> 32; 110 111 asm volatile(LOOP_ASM("wrmsr") 112 : "=b"(tmp), "=r"(tmp2), "=r"(tmp3) 113 : "a"(eax), "d"(edx), "c"(global_ctl), 114 "0"(N), "1"(buf) 115 : "edi"); 116 } 117 118 static inline void loop(u64 cntrs) 119 { 120 if (!this_cpu_has_perf_global_ctrl()) 121 __loop(); 122 else 123 __precise_loop(cntrs); 124 } 125 126 volatile uint64_t irq_received; 127 128 static void cnt_overflow(isr_regs_t *regs) 129 { 130 irq_received++; 131 apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); 132 apic_write(APIC_EOI, 0); 133 } 134 135 static bool check_irq(void) 136 { 137 int i; 138 irq_received = 0; 139 sti(); 140 for (i = 0; i < 100000 && !irq_received; i++) 141 asm volatile("pause"); 142 cli(); 143 return irq_received; 144 } 145 146 static bool is_gp(pmu_counter_t *evt) 147 { 148 if (!pmu.is_intel) 149 return true; 150 151 return evt->ctr < MSR_CORE_PERF_FIXED_CTR0 || 152 evt->ctr >= MSR_IA32_PMC0; 153 } 154 155 static int event_to_global_idx(pmu_counter_t *cnt) 156 { 157 if (pmu.is_intel) 158 return cnt->ctr - (is_gp(cnt) ? pmu.msr_gp_counter_base : 159 (MSR_CORE_PERF_FIXED_CTR0 - FIXED_CNT_INDEX)); 160 161 if (pmu.msr_gp_counter_base == MSR_F15H_PERF_CTR0) 162 return (cnt->ctr - pmu.msr_gp_counter_base) / 2; 163 else 164 return cnt->ctr - pmu.msr_gp_counter_base; 165 } 166 167 static struct pmu_event* get_counter_event(pmu_counter_t *cnt) 168 { 169 if (is_gp(cnt)) { 170 int i; 171 172 for (i = 0; i < gp_events_size; i++) 173 if (gp_events[i].unit_sel == (cnt->config & 0xffff)) 174 return &gp_events[i]; 175 } else { 176 unsigned int idx = cnt->ctr - MSR_CORE_PERF_FIXED_CTR0; 177 178 if (idx < ARRAY_SIZE(fixed_events)) 179 return &fixed_events[idx]; 180 } 181 182 return (void*)0; 183 } 184 185 static void global_enable(pmu_counter_t *cnt) 186 { 187 if (!this_cpu_has_perf_global_ctrl()) 188 return; 189 190 cnt->idx = event_to_global_idx(cnt); 191 wrmsr(pmu.msr_global_ctl, rdmsr(pmu.msr_global_ctl) | BIT_ULL(cnt->idx)); 192 } 193 194 static void global_disable(pmu_counter_t *cnt) 195 { 196 if (!this_cpu_has_perf_global_ctrl()) 197 return; 198 199 wrmsr(pmu.msr_global_ctl, rdmsr(pmu.msr_global_ctl) & ~BIT_ULL(cnt->idx)); 200 } 201 202 static void __start_event(pmu_counter_t *evt, uint64_t count) 203 { 204 evt->count = count; 205 wrmsr(evt->ctr, evt->count); 206 if (is_gp(evt)) { 207 wrmsr(MSR_GP_EVENT_SELECTx(event_to_global_idx(evt)), 208 evt->config | EVNTSEL_EN); 209 } else { 210 uint32_t ctrl = rdmsr(MSR_CORE_PERF_FIXED_CTR_CTRL); 211 int shift = (evt->ctr - MSR_CORE_PERF_FIXED_CTR0) * 4; 212 uint32_t usrospmi = 0; 213 214 if (evt->config & EVNTSEL_OS) 215 usrospmi |= (1 << 0); 216 if (evt->config & EVNTSEL_USR) 217 usrospmi |= (1 << 1); 218 if (evt->config & EVNTSEL_INT) 219 usrospmi |= (1 << 3); // PMI on overflow 220 ctrl = (ctrl & ~(0xf << shift)) | (usrospmi << shift); 221 wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, ctrl); 222 } 223 apic_write(APIC_LVTPC, PMI_VECTOR); 224 } 225 226 static void start_event(pmu_counter_t *evt) 227 { 228 __start_event(evt, 0); 229 global_enable(evt); 230 } 231 232 static void __stop_event(pmu_counter_t *evt) 233 { 234 if (is_gp(evt)) { 235 wrmsr(MSR_GP_EVENT_SELECTx(event_to_global_idx(evt)), 236 evt->config & ~EVNTSEL_EN); 237 } else { 238 uint32_t ctrl = rdmsr(MSR_CORE_PERF_FIXED_CTR_CTRL); 239 int shift = (evt->ctr - MSR_CORE_PERF_FIXED_CTR0) * 4; 240 wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, ctrl & ~(0xf << shift)); 241 } 242 evt->count = rdmsr(evt->ctr); 243 } 244 245 static void stop_event(pmu_counter_t *evt) 246 { 247 global_disable(evt); 248 __stop_event(evt); 249 } 250 251 static noinline void measure_many(pmu_counter_t *evt, int count) 252 { 253 int i; 254 u64 cntrs = 0; 255 256 for (i = 0; i < count; i++) { 257 __start_event(&evt[i], 0); 258 cntrs |= BIT_ULL(event_to_global_idx(&evt[i])); 259 } 260 loop(cntrs); 261 for (i = 0; i < count; i++) 262 __stop_event(&evt[i]); 263 } 264 265 static void measure_one(pmu_counter_t *evt) 266 { 267 measure_many(evt, 1); 268 } 269 270 static noinline void __measure(pmu_counter_t *evt, uint64_t count) 271 { 272 u64 cntrs = BIT_ULL(event_to_global_idx(evt)); 273 274 __start_event(evt, count); 275 loop(cntrs); 276 __stop_event(evt); 277 } 278 279 static bool verify_event(uint64_t count, struct pmu_event *e) 280 { 281 bool pass; 282 283 if (!e) 284 return false; 285 286 pass = count >= e->min && count <= e->max; 287 if (!pass) 288 printf("FAIL: %d <= %"PRId64" <= %d\n", e->min, count, e->max); 289 290 return pass; 291 } 292 293 static bool verify_counter(pmu_counter_t *cnt) 294 { 295 return verify_event(cnt->count, get_counter_event(cnt)); 296 } 297 298 static void check_gp_counter(struct pmu_event *evt) 299 { 300 pmu_counter_t cnt = { 301 .config = EVNTSEL_OS | EVNTSEL_USR | evt->unit_sel, 302 }; 303 int i; 304 305 for (i = 0; i < pmu.nr_gp_counters; i++) { 306 cnt.ctr = MSR_GP_COUNTERx(i); 307 measure_one(&cnt); 308 report(verify_event(cnt.count, evt), "%s-%d", evt->name, i); 309 } 310 } 311 312 static void check_gp_counters(void) 313 { 314 int i; 315 316 for (i = 0; i < gp_events_size; i++) 317 if (pmu_gp_counter_is_available(i)) 318 check_gp_counter(&gp_events[i]); 319 else 320 printf("GP event '%s' is disabled\n", 321 gp_events[i].name); 322 } 323 324 static void check_fixed_counters(void) 325 { 326 pmu_counter_t cnt = { 327 .config = EVNTSEL_OS | EVNTSEL_USR, 328 }; 329 int i; 330 331 for (i = 0; i < fixed_counters_num; i++) { 332 cnt.ctr = fixed_events[i].unit_sel; 333 measure_one(&cnt); 334 report(verify_event(cnt.count, &fixed_events[i]), "fixed-%d", i); 335 } 336 } 337 338 static void check_counters_many(void) 339 { 340 pmu_counter_t cnt[48]; 341 int i, n; 342 343 for (i = 0, n = 0; n < pmu.nr_gp_counters; i++) { 344 if (!pmu_gp_counter_is_available(i)) 345 continue; 346 347 cnt[n].ctr = MSR_GP_COUNTERx(n); 348 cnt[n].config = EVNTSEL_OS | EVNTSEL_USR | 349 gp_events[i % gp_events_size].unit_sel; 350 n++; 351 } 352 for (i = 0; i < fixed_counters_num; i++) { 353 cnt[n].ctr = fixed_events[i].unit_sel; 354 cnt[n].config = EVNTSEL_OS | EVNTSEL_USR; 355 n++; 356 } 357 358 assert(n <= ARRAY_SIZE(cnt)); 359 measure_many(cnt, n); 360 361 for (i = 0; i < n; i++) 362 if (!verify_counter(&cnt[i])) 363 break; 364 365 report(i == n, "all counters"); 366 } 367 368 static uint64_t measure_for_overflow(pmu_counter_t *cnt) 369 { 370 __measure(cnt, 0); 371 /* 372 * To generate overflow, i.e. roll over to '0', the initial count just 373 * needs to be preset to the negative expected count. However, as per 374 * Intel's SDM, the preset count needs to be incremented by 1 to ensure 375 * the overflow interrupt is generated immediately instead of possibly 376 * waiting for the overflow to propagate through the counter. 377 */ 378 assert(cnt->count > 1); 379 return 1 - cnt->count; 380 } 381 382 static void check_counter_overflow(void) 383 { 384 int i; 385 uint64_t overflow_preset; 386 int instruction_idx = pmu.is_intel ? 387 INTEL_INSTRUCTIONS_IDX : 388 AMD_INSTRUCTIONS_IDX; 389 390 pmu_counter_t cnt = { 391 .ctr = MSR_GP_COUNTERx(0), 392 .config = EVNTSEL_OS | EVNTSEL_USR | 393 gp_events[instruction_idx].unit_sel /* instructions */, 394 }; 395 overflow_preset = measure_for_overflow(&cnt); 396 397 /* clear status before test */ 398 if (this_cpu_has_perf_global_status()) 399 pmu_clear_global_status(); 400 401 report_prefix_push("overflow"); 402 403 for (i = 0; i < pmu.nr_gp_counters + 1; i++) { 404 uint64_t status; 405 int idx; 406 407 cnt.count = overflow_preset; 408 if (pmu_use_full_writes()) 409 cnt.count &= (1ull << pmu.gp_counter_width) - 1; 410 411 if (i == pmu.nr_gp_counters) { 412 if (!pmu.is_intel) 413 break; 414 415 cnt.ctr = fixed_events[0].unit_sel; 416 cnt.count = measure_for_overflow(&cnt); 417 cnt.count &= (1ull << pmu.gp_counter_width) - 1; 418 } else { 419 cnt.ctr = MSR_GP_COUNTERx(i); 420 } 421 422 if (i % 2) 423 cnt.config |= EVNTSEL_INT; 424 else 425 cnt.config &= ~EVNTSEL_INT; 426 idx = event_to_global_idx(&cnt); 427 __measure(&cnt, cnt.count); 428 if (pmu.is_intel) 429 report(cnt.count == 1, "cntr-%d", i); 430 else 431 report(cnt.count == 0xffffffffffff || cnt.count < 7, "cntr-%d", i); 432 433 if (!this_cpu_has_perf_global_status()) 434 continue; 435 436 status = rdmsr(pmu.msr_global_status); 437 report(status & (1ull << idx), "status-%d", i); 438 wrmsr(pmu.msr_global_status_clr, status); 439 status = rdmsr(pmu.msr_global_status); 440 report(!(status & (1ull << idx)), "status clear-%d", i); 441 report(check_irq() == (i % 2), "irq-%d", i); 442 } 443 444 report_prefix_pop(); 445 } 446 447 static void check_gp_counter_cmask(void) 448 { 449 int instruction_idx = pmu.is_intel ? 450 INTEL_INSTRUCTIONS_IDX : 451 AMD_INSTRUCTIONS_IDX; 452 453 pmu_counter_t cnt = { 454 .ctr = MSR_GP_COUNTERx(0), 455 .config = EVNTSEL_OS | EVNTSEL_USR | 456 gp_events[instruction_idx].unit_sel /* instructions */, 457 }; 458 cnt.config |= (0x2 << EVNTSEL_CMASK_SHIFT); 459 measure_one(&cnt); 460 report(cnt.count < gp_events[instruction_idx].min, "cmask"); 461 } 462 463 static void do_rdpmc_fast(void *ptr) 464 { 465 pmu_counter_t *cnt = ptr; 466 uint32_t idx = (uint32_t)cnt->idx | (1u << 31); 467 468 if (!is_gp(cnt)) 469 idx |= 1 << 30; 470 471 cnt->count = rdpmc(idx); 472 } 473 474 475 static void check_rdpmc(void) 476 { 477 uint64_t val = 0xff0123456789ull; 478 bool exc; 479 int i; 480 481 report_prefix_push("rdpmc"); 482 483 for (i = 0; i < pmu.nr_gp_counters; i++) { 484 uint64_t x; 485 pmu_counter_t cnt = { 486 .ctr = MSR_GP_COUNTERx(i), 487 .idx = i 488 }; 489 490 /* 491 * Without full-width writes, only the low 32 bits are writable, 492 * and the value is sign-extended. 493 */ 494 if (pmu.msr_gp_counter_base == MSR_IA32_PERFCTR0) 495 x = (uint64_t)(int64_t)(int32_t)val; 496 else 497 x = (uint64_t)(int64_t)val; 498 499 /* Mask according to the number of supported bits */ 500 x &= (1ull << pmu.gp_counter_width) - 1; 501 502 wrmsr(MSR_GP_COUNTERx(i), val); 503 report(rdpmc(i) == x, "cntr-%d", i); 504 505 exc = test_for_exception(GP_VECTOR, do_rdpmc_fast, &cnt); 506 if (exc) 507 report_skip("fast-%d", i); 508 else 509 report(cnt.count == (u32)val, "fast-%d", i); 510 } 511 for (i = 0; i < fixed_counters_num; i++) { 512 uint64_t x = val & ((1ull << pmu.fixed_counter_width) - 1); 513 pmu_counter_t cnt = { 514 .ctr = MSR_CORE_PERF_FIXED_CTR0 + i, 515 .idx = i 516 }; 517 518 wrmsr(MSR_PERF_FIXED_CTRx(i), x); 519 report(rdpmc(i | (1 << 30)) == x, "fixed cntr-%d", i); 520 521 exc = test_for_exception(GP_VECTOR, do_rdpmc_fast, &cnt); 522 if (exc) 523 report_skip("fixed fast-%d", i); 524 else 525 report(cnt.count == (u32)x, "fixed fast-%d", i); 526 } 527 528 report_prefix_pop(); 529 } 530 531 static void check_running_counter_wrmsr(void) 532 { 533 uint64_t status; 534 uint64_t count; 535 unsigned int instruction_idx = pmu.is_intel ? 536 INTEL_INSTRUCTIONS_IDX : 537 AMD_INSTRUCTIONS_IDX; 538 539 pmu_counter_t evt = { 540 .ctr = MSR_GP_COUNTERx(0), 541 .config = EVNTSEL_OS | EVNTSEL_USR | 542 gp_events[instruction_idx].unit_sel, 543 }; 544 545 report_prefix_push("running counter wrmsr"); 546 547 start_event(&evt); 548 __loop(); 549 wrmsr(MSR_GP_COUNTERx(0), 0); 550 stop_event(&evt); 551 report(evt.count < gp_events[instruction_idx].min, "cntr"); 552 553 /* clear status before overflow test */ 554 if (this_cpu_has_perf_global_status()) 555 pmu_clear_global_status(); 556 557 start_event(&evt); 558 559 count = -1; 560 if (pmu_use_full_writes()) 561 count &= (1ull << pmu.gp_counter_width) - 1; 562 563 wrmsr(MSR_GP_COUNTERx(0), count); 564 565 __loop(); 566 stop_event(&evt); 567 568 if (this_cpu_has_perf_global_status()) { 569 status = rdmsr(pmu.msr_global_status); 570 report(status & 1, "status msr bit"); 571 } 572 573 report_prefix_pop(); 574 } 575 576 static void check_emulated_instr(void) 577 { 578 uint64_t status, instr_start, brnch_start; 579 uint64_t gp_counter_width = (1ull << pmu.gp_counter_width) - 1; 580 unsigned int branch_idx = pmu.is_intel ? 581 INTEL_BRANCHES_IDX : AMD_BRANCHES_IDX; 582 unsigned int instruction_idx = pmu.is_intel ? 583 INTEL_INSTRUCTIONS_IDX : 584 AMD_INSTRUCTIONS_IDX; 585 pmu_counter_t brnch_cnt = { 586 .ctr = MSR_GP_COUNTERx(0), 587 /* branch instructions */ 588 .config = EVNTSEL_OS | EVNTSEL_USR | gp_events[branch_idx].unit_sel, 589 }; 590 pmu_counter_t instr_cnt = { 591 .ctr = MSR_GP_COUNTERx(1), 592 /* instructions */ 593 .config = EVNTSEL_OS | EVNTSEL_USR | gp_events[instruction_idx].unit_sel, 594 }; 595 report_prefix_push("emulated instruction"); 596 597 if (this_cpu_has_perf_global_status()) 598 pmu_clear_global_status(); 599 600 start_event(&brnch_cnt); 601 start_event(&instr_cnt); 602 603 brnch_start = -EXPECTED_BRNCH; 604 instr_start = -EXPECTED_INSTR; 605 wrmsr(MSR_GP_COUNTERx(0), brnch_start & gp_counter_width); 606 wrmsr(MSR_GP_COUNTERx(1), instr_start & gp_counter_width); 607 // KVM_FEP is a magic prefix that forces emulation so 608 // 'KVM_FEP "jne label\n"' just counts as a single instruction. 609 asm volatile( 610 "mov $0x0, %%eax\n" 611 "cmp $0x0, %%eax\n" 612 KVM_FEP "jne label\n" 613 KVM_FEP "jne label\n" 614 KVM_FEP "jne label\n" 615 KVM_FEP "jne label\n" 616 KVM_FEP "jne label\n" 617 "mov $0xa, %%eax\n" 618 "cpuid\n" 619 "mov $0xa, %%eax\n" 620 "cpuid\n" 621 "mov $0xa, %%eax\n" 622 "cpuid\n" 623 "mov $0xa, %%eax\n" 624 "cpuid\n" 625 "mov $0xa, %%eax\n" 626 "cpuid\n" 627 "label:\n" 628 : 629 : 630 : "eax", "ebx", "ecx", "edx"); 631 632 if (this_cpu_has_perf_global_ctrl()) 633 wrmsr(pmu.msr_global_ctl, 0); 634 635 stop_event(&brnch_cnt); 636 stop_event(&instr_cnt); 637 638 // Check that the end count - start count is at least the expected 639 // number of instructions and branches. 640 report(instr_cnt.count - instr_start >= EXPECTED_INSTR, 641 "instruction count"); 642 report(brnch_cnt.count - brnch_start >= EXPECTED_BRNCH, 643 "branch count"); 644 if (this_cpu_has_perf_global_status()) { 645 // Additionally check that those counters overflowed properly. 646 status = rdmsr(pmu.msr_global_status); 647 report(status & 1, "branch counter overflow"); 648 report(status & 2, "instruction counter overflow"); 649 } 650 651 report_prefix_pop(); 652 } 653 654 #define XBEGIN_STARTED (~0u) 655 static void check_tsx_cycles(void) 656 { 657 pmu_counter_t cnt; 658 unsigned int i, ret = 0; 659 660 if (!this_cpu_has(X86_FEATURE_RTM)) 661 return; 662 663 report_prefix_push("TSX cycles"); 664 665 for (i = 0; i < pmu.nr_gp_counters; i++) { 666 cnt.ctr = MSR_GP_COUNTERx(i); 667 668 if (i == 2) { 669 /* Transactional cycles committed only on gp counter 2 */ 670 cnt.config = EVNTSEL_OS | EVNTSEL_USR | 0x30000003c; 671 } else { 672 /* Transactional cycles */ 673 cnt.config = EVNTSEL_OS | EVNTSEL_USR | 0x10000003c; 674 } 675 676 start_event(&cnt); 677 678 asm volatile("xbegin 1f\n\t" 679 "1:\n\t" 680 : "+a" (ret) :: "memory"); 681 682 /* Generate a non-canonical #GP to trigger ABORT. */ 683 if (ret == XBEGIN_STARTED) 684 *(int *)NONCANONICAL = 0; 685 686 stop_event(&cnt); 687 688 report(cnt.count > 0, "gp cntr-%d with a value of %" PRId64 "", i, cnt.count); 689 } 690 691 report_prefix_pop(); 692 } 693 694 static void warm_up(void) 695 { 696 int i; 697 698 /* 699 * Since cycles event is always run as the first event, there would be 700 * a warm-up state to warm up the cache, it leads to the measured cycles 701 * value may exceed the pre-defined cycles upper boundary and cause 702 * false positive. To avoid this, introduce an warm-up state before 703 * the real verification. 704 */ 705 for (i = 0; i < 10; i++) 706 loop(0); 707 } 708 709 static void check_counters(void) 710 { 711 if (is_fep_available()) 712 check_emulated_instr(); 713 714 warm_up(); 715 check_gp_counters(); 716 check_fixed_counters(); 717 check_rdpmc(); 718 check_counters_many(); 719 check_counter_overflow(); 720 check_gp_counter_cmask(); 721 check_running_counter_wrmsr(); 722 check_tsx_cycles(); 723 } 724 725 static void do_unsupported_width_counter_write(void *index) 726 { 727 wrmsr(MSR_IA32_PMC0 + *((int *) index), 0xffffff0123456789ull); 728 } 729 730 static void check_gp_counters_write_width(void) 731 { 732 u64 val_64 = 0xffffff0123456789ull; 733 u64 val_32 = val_64 & ((1ull << 32) - 1); 734 u64 val_max_width = val_64 & ((1ull << pmu.gp_counter_width) - 1); 735 int i; 736 737 /* 738 * MSR_IA32_PERFCTRn supports 64-bit writes, 739 * but only the lowest 32 bits are valid. 740 */ 741 for (i = 0; i < pmu.nr_gp_counters; i++) { 742 wrmsr(MSR_IA32_PERFCTR0 + i, val_32); 743 assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32); 744 assert(rdmsr(MSR_IA32_PMC0 + i) == val_32); 745 746 wrmsr(MSR_IA32_PERFCTR0 + i, val_max_width); 747 assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32); 748 assert(rdmsr(MSR_IA32_PMC0 + i) == val_32); 749 750 wrmsr(MSR_IA32_PERFCTR0 + i, val_64); 751 assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32); 752 assert(rdmsr(MSR_IA32_PMC0 + i) == val_32); 753 } 754 755 /* 756 * MSR_IA32_PMCn supports writing values up to GP counter width, 757 * and only the lowest bits of GP counter width are valid. 758 */ 759 for (i = 0; i < pmu.nr_gp_counters; i++) { 760 wrmsr(MSR_IA32_PMC0 + i, val_32); 761 assert(rdmsr(MSR_IA32_PMC0 + i) == val_32); 762 assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32); 763 764 wrmsr(MSR_IA32_PMC0 + i, val_max_width); 765 assert(rdmsr(MSR_IA32_PMC0 + i) == val_max_width); 766 assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_max_width); 767 768 report(test_for_exception(GP_VECTOR, 769 do_unsupported_width_counter_write, &i), 770 "writing unsupported width to MSR_IA32_PMC%d raises #GP", i); 771 } 772 } 773 774 /* 775 * Per the SDM, reference cycles are currently implemented using the 776 * core crystal clock, TSC, or bus clock. Calibrate to the TSC 777 * frequency to set reasonable expectations. 778 */ 779 static void set_ref_cycle_expectations(void) 780 { 781 pmu_counter_t cnt = { 782 .ctr = MSR_IA32_PERFCTR0, 783 .config = EVNTSEL_OS | EVNTSEL_USR | 784 intel_gp_events[INTEL_REF_CYCLES_IDX].unit_sel, 785 }; 786 uint64_t tsc_delta; 787 uint64_t t0, t1, t2, t3; 788 789 /* Bit 2 enumerates the availability of reference cycles events. */ 790 if (!pmu.nr_gp_counters || !pmu_gp_counter_is_available(2)) 791 return; 792 793 if (this_cpu_has_perf_global_ctrl()) 794 wrmsr(pmu.msr_global_ctl, 0); 795 796 t0 = fenced_rdtsc(); 797 start_event(&cnt); 798 t1 = fenced_rdtsc(); 799 800 /* 801 * This loop has to run long enough to dominate the VM-exit 802 * costs for playing with the PMU MSRs on start and stop. 803 * 804 * On a 2.6GHz Ice Lake, with the TSC frequency at 104 times 805 * the core crystal clock, this function calculated a guest 806 * TSC : ref cycles ratio of around 105 with ECX initialized 807 * to one billion. 808 */ 809 asm volatile("loop ." : "+c"((int){1000000000ull})); 810 811 t2 = fenced_rdtsc(); 812 stop_event(&cnt); 813 t3 = fenced_rdtsc(); 814 815 tsc_delta = ((t2 - t1) + (t3 - t0)) / 2; 816 817 if (!tsc_delta) 818 return; 819 820 intel_gp_events[INTEL_REF_CYCLES_IDX].min = 821 (intel_gp_events[INTEL_REF_CYCLES_IDX].min * cnt.count) / tsc_delta; 822 intel_gp_events[INTEL_REF_CYCLES_IDX].max = 823 (intel_gp_events[INTEL_REF_CYCLES_IDX].max * cnt.count) / tsc_delta; 824 } 825 826 static void check_invalid_rdpmc_gp(void) 827 { 828 uint64_t val; 829 830 report(rdpmc_safe(64, &val) == GP_VECTOR, 831 "Expected #GP on RDPMC(64)"); 832 } 833 834 int main(int ac, char **av) 835 { 836 setup_vm(); 837 handle_irq(PMI_VECTOR, cnt_overflow); 838 buf = malloc(N*64); 839 840 check_invalid_rdpmc_gp(); 841 842 if (pmu.is_intel) { 843 if (!pmu.version) { 844 report_skip("No Intel Arch PMU is detected!"); 845 return report_summary(); 846 } 847 gp_events = (struct pmu_event *)intel_gp_events; 848 gp_events_size = sizeof(intel_gp_events)/sizeof(intel_gp_events[0]); 849 report_prefix_push("Intel"); 850 set_ref_cycle_expectations(); 851 } else { 852 gp_events_size = sizeof(amd_gp_events)/sizeof(amd_gp_events[0]); 853 gp_events = (struct pmu_event *)amd_gp_events; 854 report_prefix_push("AMD"); 855 } 856 857 printf("PMU version: %d\n", pmu.version); 858 printf("GP counters: %d\n", pmu.nr_gp_counters); 859 printf("GP counter width: %d\n", pmu.gp_counter_width); 860 printf("Mask length: %d\n", pmu.gp_counter_mask_length); 861 printf("Fixed counters: %d\n", pmu.nr_fixed_counters); 862 printf("Fixed counter width: %d\n", pmu.fixed_counter_width); 863 864 fixed_counters_num = MIN(pmu.nr_fixed_counters, ARRAY_SIZE(fixed_events)); 865 if (pmu.nr_fixed_counters > ARRAY_SIZE(fixed_events)) 866 report_info("Fixed counters number %d > defined fixed events %u. " 867 "Please update test case.", pmu.nr_fixed_counters, 868 (uint32_t)ARRAY_SIZE(fixed_events)); 869 870 apic_write(APIC_LVTPC, PMI_VECTOR); 871 872 check_counters(); 873 874 if (pmu_has_full_writes()) { 875 pmu.msr_gp_counter_base = MSR_IA32_PMC0; 876 877 report_prefix_push("full-width writes"); 878 check_counters(); 879 check_gp_counters_write_width(); 880 report_prefix_pop(); 881 } 882 883 if (!pmu.is_intel) { 884 report_prefix_push("K7"); 885 pmu.nr_gp_counters = AMD64_NUM_COUNTERS; 886 pmu.msr_gp_counter_base = MSR_K7_PERFCTR0; 887 pmu.msr_gp_event_select_base = MSR_K7_EVNTSEL0; 888 check_counters(); 889 report_prefix_pop(); 890 } 891 892 return report_summary(); 893 } 894