1 2 #include "x86/msr.h" 3 #include "x86/processor.h" 4 #include "x86/pmu.h" 5 #include "x86/apic-defs.h" 6 #include "x86/apic.h" 7 #include "x86/desc.h" 8 #include "x86/isr.h" 9 #include "vmalloc.h" 10 #include "alloc.h" 11 12 #include "libcflat.h" 13 #include <stdint.h> 14 15 #define N 1000000 16 17 // These values match the number of instructions and branches in the 18 // assembly block in check_emulated_instr(). 19 #define EXPECTED_INSTR 17 20 #define EXPECTED_BRNCH 5 21 22 typedef struct { 23 uint32_t ctr; 24 uint32_t idx; 25 uint64_t config; 26 uint64_t count; 27 } pmu_counter_t; 28 29 struct pmu_event { 30 const char *name; 31 uint32_t unit_sel; 32 int min; 33 int max; 34 } intel_gp_events[] = { 35 {"core cycles", 0x003c, 1*N, 50*N}, 36 {"instructions", 0x00c0, 10*N, 10.2*N}, 37 {"ref cycles", 0x013c, 1*N, 30*N}, 38 {"llc references", 0x4f2e, 1, 2*N}, 39 {"llc misses", 0x412e, 1, 1*N}, 40 {"branches", 0x00c4, 1*N, 1.1*N}, 41 {"branch misses", 0x00c5, 0, 0.1*N}, 42 }, amd_gp_events[] = { 43 {"core cycles", 0x0076, 1*N, 50*N}, 44 {"instructions", 0x00c0, 10*N, 10.2*N}, 45 {"branches", 0x00c2, 1*N, 1.1*N}, 46 {"branch misses", 0x00c3, 0, 0.1*N}, 47 }, fixed_events[] = { 48 {"fixed 0", MSR_CORE_PERF_FIXED_CTR0, 10*N, 10.2*N}, 49 {"fixed 1", MSR_CORE_PERF_FIXED_CTR0 + 1, 1*N, 30*N}, 50 {"fixed 2", MSR_CORE_PERF_FIXED_CTR0 + 2, 0.1*N, 30*N} 51 }; 52 53 /* 54 * Events index in intel_gp_events[], ensure consistent with 55 * intel_gp_events[]. 56 */ 57 enum { 58 INTEL_REF_CYCLES_IDX = 2, 59 INTEL_BRANCHES_IDX = 5, 60 }; 61 62 /* 63 * Events index in amd_gp_events[], ensure consistent with 64 * amd_gp_events[]. 65 */ 66 enum { 67 AMD_BRANCHES_IDX = 2, 68 }; 69 70 char *buf; 71 72 static struct pmu_event *gp_events; 73 static unsigned int gp_events_size; 74 static unsigned int fixed_counters_num; 75 76 static inline void loop(void) 77 { 78 unsigned long tmp, tmp2, tmp3; 79 80 asm volatile("1: mov (%1), %2; add $64, %1; nop; nop; nop; nop; nop; nop; nop; loop 1b" 81 : "=c"(tmp), "=r"(tmp2), "=r"(tmp3): "0"(N), "1"(buf)); 82 83 } 84 85 volatile uint64_t irq_received; 86 87 static void cnt_overflow(isr_regs_t *regs) 88 { 89 irq_received++; 90 apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); 91 apic_write(APIC_EOI, 0); 92 } 93 94 static bool check_irq(void) 95 { 96 int i; 97 irq_received = 0; 98 sti(); 99 for (i = 0; i < 100000 && !irq_received; i++) 100 asm volatile("pause"); 101 cli(); 102 return irq_received; 103 } 104 105 static bool is_gp(pmu_counter_t *evt) 106 { 107 if (!pmu.is_intel) 108 return true; 109 110 return evt->ctr < MSR_CORE_PERF_FIXED_CTR0 || 111 evt->ctr >= MSR_IA32_PMC0; 112 } 113 114 static int event_to_global_idx(pmu_counter_t *cnt) 115 { 116 if (pmu.is_intel) 117 return cnt->ctr - (is_gp(cnt) ? pmu.msr_gp_counter_base : 118 (MSR_CORE_PERF_FIXED_CTR0 - FIXED_CNT_INDEX)); 119 120 if (pmu.msr_gp_counter_base == MSR_F15H_PERF_CTR0) 121 return (cnt->ctr - pmu.msr_gp_counter_base) / 2; 122 else 123 return cnt->ctr - pmu.msr_gp_counter_base; 124 } 125 126 static struct pmu_event* get_counter_event(pmu_counter_t *cnt) 127 { 128 if (is_gp(cnt)) { 129 int i; 130 131 for (i = 0; i < gp_events_size; i++) 132 if (gp_events[i].unit_sel == (cnt->config & 0xffff)) 133 return &gp_events[i]; 134 } else { 135 unsigned int idx = cnt->ctr - MSR_CORE_PERF_FIXED_CTR0; 136 137 if (idx < ARRAY_SIZE(fixed_events)) 138 return &fixed_events[idx]; 139 } 140 141 return (void*)0; 142 } 143 144 static void global_enable(pmu_counter_t *cnt) 145 { 146 if (!this_cpu_has_perf_global_ctrl()) 147 return; 148 149 cnt->idx = event_to_global_idx(cnt); 150 wrmsr(pmu.msr_global_ctl, rdmsr(pmu.msr_global_ctl) | BIT_ULL(cnt->idx)); 151 } 152 153 static void global_disable(pmu_counter_t *cnt) 154 { 155 if (!this_cpu_has_perf_global_ctrl()) 156 return; 157 158 wrmsr(pmu.msr_global_ctl, rdmsr(pmu.msr_global_ctl) & ~BIT_ULL(cnt->idx)); 159 } 160 161 static void __start_event(pmu_counter_t *evt, uint64_t count) 162 { 163 evt->count = count; 164 wrmsr(evt->ctr, evt->count); 165 if (is_gp(evt)) { 166 wrmsr(MSR_GP_EVENT_SELECTx(event_to_global_idx(evt)), 167 evt->config | EVNTSEL_EN); 168 } else { 169 uint32_t ctrl = rdmsr(MSR_CORE_PERF_FIXED_CTR_CTRL); 170 int shift = (evt->ctr - MSR_CORE_PERF_FIXED_CTR0) * 4; 171 uint32_t usrospmi = 0; 172 173 if (evt->config & EVNTSEL_OS) 174 usrospmi |= (1 << 0); 175 if (evt->config & EVNTSEL_USR) 176 usrospmi |= (1 << 1); 177 if (evt->config & EVNTSEL_INT) 178 usrospmi |= (1 << 3); // PMI on overflow 179 ctrl = (ctrl & ~(0xf << shift)) | (usrospmi << shift); 180 wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, ctrl); 181 } 182 global_enable(evt); 183 apic_write(APIC_LVTPC, PMI_VECTOR); 184 } 185 186 static void start_event(pmu_counter_t *evt) 187 { 188 __start_event(evt, 0); 189 } 190 191 static void stop_event(pmu_counter_t *evt) 192 { 193 global_disable(evt); 194 if (is_gp(evt)) { 195 wrmsr(MSR_GP_EVENT_SELECTx(event_to_global_idx(evt)), 196 evt->config & ~EVNTSEL_EN); 197 } else { 198 uint32_t ctrl = rdmsr(MSR_CORE_PERF_FIXED_CTR_CTRL); 199 int shift = (evt->ctr - MSR_CORE_PERF_FIXED_CTR0) * 4; 200 wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, ctrl & ~(0xf << shift)); 201 } 202 evt->count = rdmsr(evt->ctr); 203 } 204 205 static noinline void measure_many(pmu_counter_t *evt, int count) 206 { 207 int i; 208 for (i = 0; i < count; i++) 209 start_event(&evt[i]); 210 loop(); 211 for (i = 0; i < count; i++) 212 stop_event(&evt[i]); 213 } 214 215 static void measure_one(pmu_counter_t *evt) 216 { 217 measure_many(evt, 1); 218 } 219 220 static noinline void __measure(pmu_counter_t *evt, uint64_t count) 221 { 222 __start_event(evt, count); 223 loop(); 224 stop_event(evt); 225 } 226 227 static bool verify_event(uint64_t count, struct pmu_event *e) 228 { 229 bool pass; 230 231 if (!e) 232 return false; 233 234 pass = count >= e->min && count <= e->max; 235 if (!pass) 236 printf("FAIL: %d <= %"PRId64" <= %d\n", e->min, count, e->max); 237 238 return pass; 239 } 240 241 static bool verify_counter(pmu_counter_t *cnt) 242 { 243 return verify_event(cnt->count, get_counter_event(cnt)); 244 } 245 246 static void check_gp_counter(struct pmu_event *evt) 247 { 248 pmu_counter_t cnt = { 249 .config = EVNTSEL_OS | EVNTSEL_USR | evt->unit_sel, 250 }; 251 int i; 252 253 for (i = 0; i < pmu.nr_gp_counters; i++) { 254 cnt.ctr = MSR_GP_COUNTERx(i); 255 measure_one(&cnt); 256 report(verify_event(cnt.count, evt), "%s-%d", evt->name, i); 257 } 258 } 259 260 static void check_gp_counters(void) 261 { 262 int i; 263 264 for (i = 0; i < gp_events_size; i++) 265 if (pmu_gp_counter_is_available(i)) 266 check_gp_counter(&gp_events[i]); 267 else 268 printf("GP event '%s' is disabled\n", 269 gp_events[i].name); 270 } 271 272 static void check_fixed_counters(void) 273 { 274 pmu_counter_t cnt = { 275 .config = EVNTSEL_OS | EVNTSEL_USR, 276 }; 277 int i; 278 279 for (i = 0; i < fixed_counters_num; i++) { 280 cnt.ctr = fixed_events[i].unit_sel; 281 measure_one(&cnt); 282 report(verify_event(cnt.count, &fixed_events[i]), "fixed-%d", i); 283 } 284 } 285 286 static void check_counters_many(void) 287 { 288 pmu_counter_t cnt[48]; 289 int i, n; 290 291 for (i = 0, n = 0; n < pmu.nr_gp_counters; i++) { 292 if (!pmu_gp_counter_is_available(i)) 293 continue; 294 295 cnt[n].ctr = MSR_GP_COUNTERx(n); 296 cnt[n].config = EVNTSEL_OS | EVNTSEL_USR | 297 gp_events[i % gp_events_size].unit_sel; 298 n++; 299 } 300 for (i = 0; i < fixed_counters_num; i++) { 301 cnt[n].ctr = fixed_events[i].unit_sel; 302 cnt[n].config = EVNTSEL_OS | EVNTSEL_USR; 303 n++; 304 } 305 306 assert(n <= ARRAY_SIZE(cnt)); 307 measure_many(cnt, n); 308 309 for (i = 0; i < n; i++) 310 if (!verify_counter(&cnt[i])) 311 break; 312 313 report(i == n, "all counters"); 314 } 315 316 static uint64_t measure_for_overflow(pmu_counter_t *cnt) 317 { 318 __measure(cnt, 0); 319 /* 320 * To generate overflow, i.e. roll over to '0', the initial count just 321 * needs to be preset to the negative expected count. However, as per 322 * Intel's SDM, the preset count needs to be incremented by 1 to ensure 323 * the overflow interrupt is generated immediately instead of possibly 324 * waiting for the overflow to propagate through the counter. 325 */ 326 assert(cnt->count > 1); 327 return 1 - cnt->count; 328 } 329 330 static void check_counter_overflow(void) 331 { 332 uint64_t overflow_preset; 333 int i; 334 pmu_counter_t cnt = { 335 .ctr = MSR_GP_COUNTERx(0), 336 .config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel /* instructions */, 337 }; 338 overflow_preset = measure_for_overflow(&cnt); 339 340 /* clear status before test */ 341 if (this_cpu_has_perf_global_status()) 342 pmu_clear_global_status(); 343 344 report_prefix_push("overflow"); 345 346 for (i = 0; i < pmu.nr_gp_counters + 1; i++) { 347 uint64_t status; 348 int idx; 349 350 cnt.count = overflow_preset; 351 if (pmu_use_full_writes()) 352 cnt.count &= (1ull << pmu.gp_counter_width) - 1; 353 354 if (i == pmu.nr_gp_counters) { 355 if (!pmu.is_intel) 356 break; 357 358 cnt.ctr = fixed_events[0].unit_sel; 359 cnt.count = measure_for_overflow(&cnt); 360 cnt.count &= (1ull << pmu.gp_counter_width) - 1; 361 } else { 362 cnt.ctr = MSR_GP_COUNTERx(i); 363 } 364 365 if (i % 2) 366 cnt.config |= EVNTSEL_INT; 367 else 368 cnt.config &= ~EVNTSEL_INT; 369 idx = event_to_global_idx(&cnt); 370 __measure(&cnt, cnt.count); 371 if (pmu.is_intel) 372 report(cnt.count == 1, "cntr-%d", i); 373 else 374 report(cnt.count == 0xffffffffffff || cnt.count < 7, "cntr-%d", i); 375 376 if (!this_cpu_has_perf_global_status()) 377 continue; 378 379 status = rdmsr(pmu.msr_global_status); 380 report(status & (1ull << idx), "status-%d", i); 381 wrmsr(pmu.msr_global_status_clr, status); 382 status = rdmsr(pmu.msr_global_status); 383 report(!(status & (1ull << idx)), "status clear-%d", i); 384 report(check_irq() == (i % 2), "irq-%d", i); 385 } 386 387 report_prefix_pop(); 388 } 389 390 static void check_gp_counter_cmask(void) 391 { 392 pmu_counter_t cnt = { 393 .ctr = MSR_GP_COUNTERx(0), 394 .config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel /* instructions */, 395 }; 396 cnt.config |= (0x2 << EVNTSEL_CMASK_SHIFT); 397 measure_one(&cnt); 398 report(cnt.count < gp_events[1].min, "cmask"); 399 } 400 401 static void do_rdpmc_fast(void *ptr) 402 { 403 pmu_counter_t *cnt = ptr; 404 uint32_t idx = (uint32_t)cnt->idx | (1u << 31); 405 406 if (!is_gp(cnt)) 407 idx |= 1 << 30; 408 409 cnt->count = rdpmc(idx); 410 } 411 412 413 static void check_rdpmc(void) 414 { 415 uint64_t val = 0xff0123456789ull; 416 bool exc; 417 int i; 418 419 report_prefix_push("rdpmc"); 420 421 for (i = 0; i < pmu.nr_gp_counters; i++) { 422 uint64_t x; 423 pmu_counter_t cnt = { 424 .ctr = MSR_GP_COUNTERx(i), 425 .idx = i 426 }; 427 428 /* 429 * Without full-width writes, only the low 32 bits are writable, 430 * and the value is sign-extended. 431 */ 432 if (pmu.msr_gp_counter_base == MSR_IA32_PERFCTR0) 433 x = (uint64_t)(int64_t)(int32_t)val; 434 else 435 x = (uint64_t)(int64_t)val; 436 437 /* Mask according to the number of supported bits */ 438 x &= (1ull << pmu.gp_counter_width) - 1; 439 440 wrmsr(MSR_GP_COUNTERx(i), val); 441 report(rdpmc(i) == x, "cntr-%d", i); 442 443 exc = test_for_exception(GP_VECTOR, do_rdpmc_fast, &cnt); 444 if (exc) 445 report_skip("fast-%d", i); 446 else 447 report(cnt.count == (u32)val, "fast-%d", i); 448 } 449 for (i = 0; i < fixed_counters_num; i++) { 450 uint64_t x = val & ((1ull << pmu.fixed_counter_width) - 1); 451 pmu_counter_t cnt = { 452 .ctr = MSR_CORE_PERF_FIXED_CTR0 + i, 453 .idx = i 454 }; 455 456 wrmsr(MSR_PERF_FIXED_CTRx(i), x); 457 report(rdpmc(i | (1 << 30)) == x, "fixed cntr-%d", i); 458 459 exc = test_for_exception(GP_VECTOR, do_rdpmc_fast, &cnt); 460 if (exc) 461 report_skip("fixed fast-%d", i); 462 else 463 report(cnt.count == (u32)x, "fixed fast-%d", i); 464 } 465 466 report_prefix_pop(); 467 } 468 469 static void check_running_counter_wrmsr(void) 470 { 471 uint64_t status; 472 uint64_t count; 473 pmu_counter_t evt = { 474 .ctr = MSR_GP_COUNTERx(0), 475 .config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel, 476 }; 477 478 report_prefix_push("running counter wrmsr"); 479 480 start_event(&evt); 481 loop(); 482 wrmsr(MSR_GP_COUNTERx(0), 0); 483 stop_event(&evt); 484 report(evt.count < gp_events[1].min, "cntr"); 485 486 /* clear status before overflow test */ 487 if (this_cpu_has_perf_global_status()) 488 pmu_clear_global_status(); 489 490 start_event(&evt); 491 492 count = -1; 493 if (pmu_use_full_writes()) 494 count &= (1ull << pmu.gp_counter_width) - 1; 495 496 wrmsr(MSR_GP_COUNTERx(0), count); 497 498 loop(); 499 stop_event(&evt); 500 501 if (this_cpu_has_perf_global_status()) { 502 status = rdmsr(pmu.msr_global_status); 503 report(status & 1, "status msr bit"); 504 } 505 506 report_prefix_pop(); 507 } 508 509 static void check_emulated_instr(void) 510 { 511 uint64_t status, instr_start, brnch_start; 512 uint64_t gp_counter_width = (1ull << pmu.gp_counter_width) - 1; 513 unsigned int branch_idx = pmu.is_intel ? 514 INTEL_BRANCHES_IDX : AMD_BRANCHES_IDX; 515 pmu_counter_t brnch_cnt = { 516 .ctr = MSR_GP_COUNTERx(0), 517 /* branch instructions */ 518 .config = EVNTSEL_OS | EVNTSEL_USR | gp_events[branch_idx].unit_sel, 519 }; 520 pmu_counter_t instr_cnt = { 521 .ctr = MSR_GP_COUNTERx(1), 522 /* instructions */ 523 .config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel, 524 }; 525 report_prefix_push("emulated instruction"); 526 527 if (this_cpu_has_perf_global_status()) 528 pmu_clear_global_status(); 529 530 start_event(&brnch_cnt); 531 start_event(&instr_cnt); 532 533 brnch_start = -EXPECTED_BRNCH; 534 instr_start = -EXPECTED_INSTR; 535 wrmsr(MSR_GP_COUNTERx(0), brnch_start & gp_counter_width); 536 wrmsr(MSR_GP_COUNTERx(1), instr_start & gp_counter_width); 537 // KVM_FEP is a magic prefix that forces emulation so 538 // 'KVM_FEP "jne label\n"' just counts as a single instruction. 539 asm volatile( 540 "mov $0x0, %%eax\n" 541 "cmp $0x0, %%eax\n" 542 KVM_FEP "jne label\n" 543 KVM_FEP "jne label\n" 544 KVM_FEP "jne label\n" 545 KVM_FEP "jne label\n" 546 KVM_FEP "jne label\n" 547 "mov $0xa, %%eax\n" 548 "cpuid\n" 549 "mov $0xa, %%eax\n" 550 "cpuid\n" 551 "mov $0xa, %%eax\n" 552 "cpuid\n" 553 "mov $0xa, %%eax\n" 554 "cpuid\n" 555 "mov $0xa, %%eax\n" 556 "cpuid\n" 557 "label:\n" 558 : 559 : 560 : "eax", "ebx", "ecx", "edx"); 561 562 if (this_cpu_has_perf_global_ctrl()) 563 wrmsr(pmu.msr_global_ctl, 0); 564 565 stop_event(&brnch_cnt); 566 stop_event(&instr_cnt); 567 568 // Check that the end count - start count is at least the expected 569 // number of instructions and branches. 570 report(instr_cnt.count - instr_start >= EXPECTED_INSTR, 571 "instruction count"); 572 report(brnch_cnt.count - brnch_start >= EXPECTED_BRNCH, 573 "branch count"); 574 if (this_cpu_has_perf_global_status()) { 575 // Additionally check that those counters overflowed properly. 576 status = rdmsr(pmu.msr_global_status); 577 report(status & 1, "branch counter overflow"); 578 report(status & 2, "instruction counter overflow"); 579 } 580 581 report_prefix_pop(); 582 } 583 584 #define XBEGIN_STARTED (~0u) 585 static void check_tsx_cycles(void) 586 { 587 pmu_counter_t cnt; 588 unsigned int i, ret = 0; 589 590 if (!this_cpu_has(X86_FEATURE_RTM)) 591 return; 592 593 report_prefix_push("TSX cycles"); 594 595 for (i = 0; i < pmu.nr_gp_counters; i++) { 596 cnt.ctr = MSR_GP_COUNTERx(i); 597 598 if (i == 2) { 599 /* Transactional cycles committed only on gp counter 2 */ 600 cnt.config = EVNTSEL_OS | EVNTSEL_USR | 0x30000003c; 601 } else { 602 /* Transactional cycles */ 603 cnt.config = EVNTSEL_OS | EVNTSEL_USR | 0x10000003c; 604 } 605 606 start_event(&cnt); 607 608 asm volatile("xbegin 1f\n\t" 609 "1:\n\t" 610 : "+a" (ret) :: "memory"); 611 612 /* Generate a non-canonical #GP to trigger ABORT. */ 613 if (ret == XBEGIN_STARTED) 614 *(int *)NONCANONICAL = 0; 615 616 stop_event(&cnt); 617 618 report(cnt.count > 0, "gp cntr-%d with a value of %" PRId64 "", i, cnt.count); 619 } 620 621 report_prefix_pop(); 622 } 623 624 static void warm_up(void) 625 { 626 int i; 627 628 /* 629 * Since cycles event is always run as the first event, there would be 630 * a warm-up state to warm up the cache, it leads to the measured cycles 631 * value may exceed the pre-defined cycles upper boundary and cause 632 * false positive. To avoid this, introduce an warm-up state before 633 * the real verification. 634 */ 635 for (i = 0; i < 10; i++) 636 loop(); 637 } 638 639 static void check_counters(void) 640 { 641 if (is_fep_available()) 642 check_emulated_instr(); 643 644 warm_up(); 645 check_gp_counters(); 646 check_fixed_counters(); 647 check_rdpmc(); 648 check_counters_many(); 649 check_counter_overflow(); 650 check_gp_counter_cmask(); 651 check_running_counter_wrmsr(); 652 check_tsx_cycles(); 653 } 654 655 static void do_unsupported_width_counter_write(void *index) 656 { 657 wrmsr(MSR_IA32_PMC0 + *((int *) index), 0xffffff0123456789ull); 658 } 659 660 static void check_gp_counters_write_width(void) 661 { 662 u64 val_64 = 0xffffff0123456789ull; 663 u64 val_32 = val_64 & ((1ull << 32) - 1); 664 u64 val_max_width = val_64 & ((1ull << pmu.gp_counter_width) - 1); 665 int i; 666 667 /* 668 * MSR_IA32_PERFCTRn supports 64-bit writes, 669 * but only the lowest 32 bits are valid. 670 */ 671 for (i = 0; i < pmu.nr_gp_counters; i++) { 672 wrmsr(MSR_IA32_PERFCTR0 + i, val_32); 673 assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32); 674 assert(rdmsr(MSR_IA32_PMC0 + i) == val_32); 675 676 wrmsr(MSR_IA32_PERFCTR0 + i, val_max_width); 677 assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32); 678 assert(rdmsr(MSR_IA32_PMC0 + i) == val_32); 679 680 wrmsr(MSR_IA32_PERFCTR0 + i, val_64); 681 assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32); 682 assert(rdmsr(MSR_IA32_PMC0 + i) == val_32); 683 } 684 685 /* 686 * MSR_IA32_PMCn supports writing values up to GP counter width, 687 * and only the lowest bits of GP counter width are valid. 688 */ 689 for (i = 0; i < pmu.nr_gp_counters; i++) { 690 wrmsr(MSR_IA32_PMC0 + i, val_32); 691 assert(rdmsr(MSR_IA32_PMC0 + i) == val_32); 692 assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32); 693 694 wrmsr(MSR_IA32_PMC0 + i, val_max_width); 695 assert(rdmsr(MSR_IA32_PMC0 + i) == val_max_width); 696 assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_max_width); 697 698 report(test_for_exception(GP_VECTOR, 699 do_unsupported_width_counter_write, &i), 700 "writing unsupported width to MSR_IA32_PMC%d raises #GP", i); 701 } 702 } 703 704 /* 705 * Per the SDM, reference cycles are currently implemented using the 706 * core crystal clock, TSC, or bus clock. Calibrate to the TSC 707 * frequency to set reasonable expectations. 708 */ 709 static void set_ref_cycle_expectations(void) 710 { 711 pmu_counter_t cnt = { 712 .ctr = MSR_IA32_PERFCTR0, 713 .config = EVNTSEL_OS | EVNTSEL_USR | 714 intel_gp_events[INTEL_REF_CYCLES_IDX].unit_sel, 715 }; 716 uint64_t tsc_delta; 717 uint64_t t0, t1, t2, t3; 718 719 /* Bit 2 enumerates the availability of reference cycles events. */ 720 if (!pmu.nr_gp_counters || !pmu_gp_counter_is_available(2)) 721 return; 722 723 if (this_cpu_has_perf_global_ctrl()) 724 wrmsr(pmu.msr_global_ctl, 0); 725 726 t0 = fenced_rdtsc(); 727 start_event(&cnt); 728 t1 = fenced_rdtsc(); 729 730 /* 731 * This loop has to run long enough to dominate the VM-exit 732 * costs for playing with the PMU MSRs on start and stop. 733 * 734 * On a 2.6GHz Ice Lake, with the TSC frequency at 104 times 735 * the core crystal clock, this function calculated a guest 736 * TSC : ref cycles ratio of around 105 with ECX initialized 737 * to one billion. 738 */ 739 asm volatile("loop ." : "+c"((int){1000000000ull})); 740 741 t2 = fenced_rdtsc(); 742 stop_event(&cnt); 743 t3 = fenced_rdtsc(); 744 745 tsc_delta = ((t2 - t1) + (t3 - t0)) / 2; 746 747 if (!tsc_delta) 748 return; 749 750 intel_gp_events[INTEL_REF_CYCLES_IDX].min = 751 (intel_gp_events[INTEL_REF_CYCLES_IDX].min * cnt.count) / tsc_delta; 752 intel_gp_events[INTEL_REF_CYCLES_IDX].max = 753 (intel_gp_events[INTEL_REF_CYCLES_IDX].max * cnt.count) / tsc_delta; 754 } 755 756 static void check_invalid_rdpmc_gp(void) 757 { 758 uint64_t val; 759 760 report(rdpmc_safe(64, &val) == GP_VECTOR, 761 "Expected #GP on RDPMC(64)"); 762 } 763 764 int main(int ac, char **av) 765 { 766 setup_vm(); 767 handle_irq(PMI_VECTOR, cnt_overflow); 768 buf = malloc(N*64); 769 770 check_invalid_rdpmc_gp(); 771 772 if (pmu.is_intel) { 773 if (!pmu.version) { 774 report_skip("No Intel Arch PMU is detected!"); 775 return report_summary(); 776 } 777 gp_events = (struct pmu_event *)intel_gp_events; 778 gp_events_size = sizeof(intel_gp_events)/sizeof(intel_gp_events[0]); 779 report_prefix_push("Intel"); 780 set_ref_cycle_expectations(); 781 } else { 782 gp_events_size = sizeof(amd_gp_events)/sizeof(amd_gp_events[0]); 783 gp_events = (struct pmu_event *)amd_gp_events; 784 report_prefix_push("AMD"); 785 } 786 787 printf("PMU version: %d\n", pmu.version); 788 printf("GP counters: %d\n", pmu.nr_gp_counters); 789 printf("GP counter width: %d\n", pmu.gp_counter_width); 790 printf("Mask length: %d\n", pmu.gp_counter_mask_length); 791 printf("Fixed counters: %d\n", pmu.nr_fixed_counters); 792 printf("Fixed counter width: %d\n", pmu.fixed_counter_width); 793 794 fixed_counters_num = MIN(pmu.nr_fixed_counters, ARRAY_SIZE(fixed_events)); 795 if (pmu.nr_fixed_counters > ARRAY_SIZE(fixed_events)) 796 report_info("Fixed counters number %d > defined fixed events %u. " 797 "Please update test case.", pmu.nr_fixed_counters, 798 (uint32_t)ARRAY_SIZE(fixed_events)); 799 800 apic_write(APIC_LVTPC, PMI_VECTOR); 801 802 check_counters(); 803 804 if (pmu_has_full_writes()) { 805 pmu.msr_gp_counter_base = MSR_IA32_PMC0; 806 807 report_prefix_push("full-width writes"); 808 check_counters(); 809 check_gp_counters_write_width(); 810 report_prefix_pop(); 811 } 812 813 if (!pmu.is_intel) { 814 report_prefix_push("K7"); 815 pmu.nr_gp_counters = AMD64_NUM_COUNTERS; 816 pmu.msr_gp_counter_base = MSR_K7_PERFCTR0; 817 pmu.msr_gp_event_select_base = MSR_K7_EVNTSEL0; 818 check_counters(); 819 report_prefix_pop(); 820 } 821 822 return report_summary(); 823 } 824