1 2 #include "x86/msr.h" 3 #include "x86/processor.h" 4 #include "x86/pmu.h" 5 #include "x86/apic-defs.h" 6 #include "x86/apic.h" 7 #include "x86/desc.h" 8 #include "x86/isr.h" 9 #include "vmalloc.h" 10 #include "alloc.h" 11 12 #include "libcflat.h" 13 #include <stdint.h> 14 15 #define N 1000000 16 17 // These values match the number of instructions and branches in the 18 // assembly block in check_emulated_instr(). 19 #define EXPECTED_INSTR 17 20 #define EXPECTED_BRNCH 5 21 22 typedef struct { 23 uint32_t ctr; 24 uint32_t idx; 25 uint64_t config; 26 uint64_t count; 27 } pmu_counter_t; 28 29 struct pmu_event { 30 const char *name; 31 uint32_t unit_sel; 32 int min; 33 int max; 34 } intel_gp_events[] = { 35 {"core cycles", 0x003c, 1*N, 50*N}, 36 {"instructions", 0x00c0, 10*N, 10.2*N}, 37 {"ref cycles", 0x013c, 1*N, 30*N}, 38 {"llc references", 0x4f2e, 1, 2*N}, 39 {"llc misses", 0x412e, 1, 1*N}, 40 {"branches", 0x00c4, 1*N, 1.1*N}, 41 {"branch misses", 0x00c5, 0, 0.1*N}, 42 }, amd_gp_events[] = { 43 {"core cycles", 0x0076, 1*N, 50*N}, 44 {"instructions", 0x00c0, 10*N, 10.2*N}, 45 {"branches", 0x00c2, 1*N, 1.1*N}, 46 {"branch misses", 0x00c3, 0, 0.1*N}, 47 }, fixed_events[] = { 48 {"fixed 0", MSR_CORE_PERF_FIXED_CTR0, 10*N, 10.2*N}, 49 {"fixed 1", MSR_CORE_PERF_FIXED_CTR0 + 1, 1*N, 30*N}, 50 {"fixed 2", MSR_CORE_PERF_FIXED_CTR0 + 2, 0.1*N, 30*N} 51 }; 52 53 /* 54 * Events index in intel_gp_events[], ensure consistent with 55 * intel_gp_events[]. 56 */ 57 enum { 58 INTEL_BRANCHES_IDX = 5, 59 }; 60 61 /* 62 * Events index in amd_gp_events[], ensure consistent with 63 * amd_gp_events[]. 64 */ 65 enum { 66 AMD_BRANCHES_IDX = 2, 67 }; 68 69 char *buf; 70 71 static struct pmu_event *gp_events; 72 static unsigned int gp_events_size; 73 static unsigned int fixed_counters_num; 74 75 static inline void loop(void) 76 { 77 unsigned long tmp, tmp2, tmp3; 78 79 asm volatile("1: mov (%1), %2; add $64, %1; nop; nop; nop; nop; nop; nop; nop; loop 1b" 80 : "=c"(tmp), "=r"(tmp2), "=r"(tmp3): "0"(N), "1"(buf)); 81 82 } 83 84 volatile uint64_t irq_received; 85 86 static void cnt_overflow(isr_regs_t *regs) 87 { 88 irq_received++; 89 apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); 90 apic_write(APIC_EOI, 0); 91 } 92 93 static bool check_irq(void) 94 { 95 int i; 96 irq_received = 0; 97 sti(); 98 for (i = 0; i < 100000 && !irq_received; i++) 99 asm volatile("pause"); 100 cli(); 101 return irq_received; 102 } 103 104 static bool is_gp(pmu_counter_t *evt) 105 { 106 if (!pmu.is_intel) 107 return true; 108 109 return evt->ctr < MSR_CORE_PERF_FIXED_CTR0 || 110 evt->ctr >= MSR_IA32_PMC0; 111 } 112 113 static int event_to_global_idx(pmu_counter_t *cnt) 114 { 115 if (pmu.is_intel) 116 return cnt->ctr - (is_gp(cnt) ? pmu.msr_gp_counter_base : 117 (MSR_CORE_PERF_FIXED_CTR0 - FIXED_CNT_INDEX)); 118 119 if (pmu.msr_gp_counter_base == MSR_F15H_PERF_CTR0) 120 return (cnt->ctr - pmu.msr_gp_counter_base) / 2; 121 else 122 return cnt->ctr - pmu.msr_gp_counter_base; 123 } 124 125 static struct pmu_event* get_counter_event(pmu_counter_t *cnt) 126 { 127 if (is_gp(cnt)) { 128 int i; 129 130 for (i = 0; i < gp_events_size; i++) 131 if (gp_events[i].unit_sel == (cnt->config & 0xffff)) 132 return &gp_events[i]; 133 } else { 134 unsigned int idx = cnt->ctr - MSR_CORE_PERF_FIXED_CTR0; 135 136 if (idx < ARRAY_SIZE(fixed_events)) 137 return &fixed_events[idx]; 138 } 139 140 return (void*)0; 141 } 142 143 static void global_enable(pmu_counter_t *cnt) 144 { 145 if (!this_cpu_has_perf_global_ctrl()) 146 return; 147 148 cnt->idx = event_to_global_idx(cnt); 149 wrmsr(pmu.msr_global_ctl, rdmsr(pmu.msr_global_ctl) | BIT_ULL(cnt->idx)); 150 } 151 152 static void global_disable(pmu_counter_t *cnt) 153 { 154 if (!this_cpu_has_perf_global_ctrl()) 155 return; 156 157 wrmsr(pmu.msr_global_ctl, rdmsr(pmu.msr_global_ctl) & ~BIT_ULL(cnt->idx)); 158 } 159 160 static void __start_event(pmu_counter_t *evt, uint64_t count) 161 { 162 evt->count = count; 163 wrmsr(evt->ctr, evt->count); 164 if (is_gp(evt)) { 165 wrmsr(MSR_GP_EVENT_SELECTx(event_to_global_idx(evt)), 166 evt->config | EVNTSEL_EN); 167 } else { 168 uint32_t ctrl = rdmsr(MSR_CORE_PERF_FIXED_CTR_CTRL); 169 int shift = (evt->ctr - MSR_CORE_PERF_FIXED_CTR0) * 4; 170 uint32_t usrospmi = 0; 171 172 if (evt->config & EVNTSEL_OS) 173 usrospmi |= (1 << 0); 174 if (evt->config & EVNTSEL_USR) 175 usrospmi |= (1 << 1); 176 if (evt->config & EVNTSEL_INT) 177 usrospmi |= (1 << 3); // PMI on overflow 178 ctrl = (ctrl & ~(0xf << shift)) | (usrospmi << shift); 179 wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, ctrl); 180 } 181 global_enable(evt); 182 apic_write(APIC_LVTPC, PMI_VECTOR); 183 } 184 185 static void start_event(pmu_counter_t *evt) 186 { 187 __start_event(evt, 0); 188 } 189 190 static void stop_event(pmu_counter_t *evt) 191 { 192 global_disable(evt); 193 if (is_gp(evt)) { 194 wrmsr(MSR_GP_EVENT_SELECTx(event_to_global_idx(evt)), 195 evt->config & ~EVNTSEL_EN); 196 } else { 197 uint32_t ctrl = rdmsr(MSR_CORE_PERF_FIXED_CTR_CTRL); 198 int shift = (evt->ctr - MSR_CORE_PERF_FIXED_CTR0) * 4; 199 wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, ctrl & ~(0xf << shift)); 200 } 201 evt->count = rdmsr(evt->ctr); 202 } 203 204 static noinline void measure_many(pmu_counter_t *evt, int count) 205 { 206 int i; 207 for (i = 0; i < count; i++) 208 start_event(&evt[i]); 209 loop(); 210 for (i = 0; i < count; i++) 211 stop_event(&evt[i]); 212 } 213 214 static void measure_one(pmu_counter_t *evt) 215 { 216 measure_many(evt, 1); 217 } 218 219 static noinline void __measure(pmu_counter_t *evt, uint64_t count) 220 { 221 __start_event(evt, count); 222 loop(); 223 stop_event(evt); 224 } 225 226 static bool verify_event(uint64_t count, struct pmu_event *e) 227 { 228 bool pass; 229 230 if (!e) 231 return false; 232 233 pass = count >= e->min && count <= e->max; 234 if (!pass) 235 printf("FAIL: %d <= %"PRId64" <= %d\n", e->min, count, e->max); 236 237 return pass; 238 } 239 240 static bool verify_counter(pmu_counter_t *cnt) 241 { 242 return verify_event(cnt->count, get_counter_event(cnt)); 243 } 244 245 static void check_gp_counter(struct pmu_event *evt) 246 { 247 pmu_counter_t cnt = { 248 .config = EVNTSEL_OS | EVNTSEL_USR | evt->unit_sel, 249 }; 250 int i; 251 252 for (i = 0; i < pmu.nr_gp_counters; i++) { 253 cnt.ctr = MSR_GP_COUNTERx(i); 254 measure_one(&cnt); 255 report(verify_event(cnt.count, evt), "%s-%d", evt->name, i); 256 } 257 } 258 259 static void check_gp_counters(void) 260 { 261 int i; 262 263 for (i = 0; i < gp_events_size; i++) 264 if (pmu_gp_counter_is_available(i)) 265 check_gp_counter(&gp_events[i]); 266 else 267 printf("GP event '%s' is disabled\n", 268 gp_events[i].name); 269 } 270 271 static void check_fixed_counters(void) 272 { 273 pmu_counter_t cnt = { 274 .config = EVNTSEL_OS | EVNTSEL_USR, 275 }; 276 int i; 277 278 for (i = 0; i < fixed_counters_num; i++) { 279 cnt.ctr = fixed_events[i].unit_sel; 280 measure_one(&cnt); 281 report(verify_event(cnt.count, &fixed_events[i]), "fixed-%d", i); 282 } 283 } 284 285 static void check_counters_many(void) 286 { 287 pmu_counter_t cnt[48]; 288 int i, n; 289 290 for (i = 0, n = 0; n < pmu.nr_gp_counters; i++) { 291 if (!pmu_gp_counter_is_available(i)) 292 continue; 293 294 cnt[n].ctr = MSR_GP_COUNTERx(n); 295 cnt[n].config = EVNTSEL_OS | EVNTSEL_USR | 296 gp_events[i % gp_events_size].unit_sel; 297 n++; 298 } 299 for (i = 0; i < fixed_counters_num; i++) { 300 cnt[n].ctr = fixed_events[i].unit_sel; 301 cnt[n].config = EVNTSEL_OS | EVNTSEL_USR; 302 n++; 303 } 304 305 assert(n <= ARRAY_SIZE(cnt)); 306 measure_many(cnt, n); 307 308 for (i = 0; i < n; i++) 309 if (!verify_counter(&cnt[i])) 310 break; 311 312 report(i == n, "all counters"); 313 } 314 315 static uint64_t measure_for_overflow(pmu_counter_t *cnt) 316 { 317 __measure(cnt, 0); 318 /* 319 * To generate overflow, i.e. roll over to '0', the initial count just 320 * needs to be preset to the negative expected count. However, as per 321 * Intel's SDM, the preset count needs to be incremented by 1 to ensure 322 * the overflow interrupt is generated immediately instead of possibly 323 * waiting for the overflow to propagate through the counter. 324 */ 325 assert(cnt->count > 1); 326 return 1 - cnt->count; 327 } 328 329 static void check_counter_overflow(void) 330 { 331 uint64_t overflow_preset; 332 int i; 333 pmu_counter_t cnt = { 334 .ctr = MSR_GP_COUNTERx(0), 335 .config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel /* instructions */, 336 }; 337 overflow_preset = measure_for_overflow(&cnt); 338 339 /* clear status before test */ 340 if (this_cpu_has_perf_global_status()) 341 pmu_clear_global_status(); 342 343 report_prefix_push("overflow"); 344 345 for (i = 0; i < pmu.nr_gp_counters + 1; i++) { 346 uint64_t status; 347 int idx; 348 349 cnt.count = overflow_preset; 350 if (pmu_use_full_writes()) 351 cnt.count &= (1ull << pmu.gp_counter_width) - 1; 352 353 if (i == pmu.nr_gp_counters) { 354 if (!pmu.is_intel) 355 break; 356 357 cnt.ctr = fixed_events[0].unit_sel; 358 cnt.count = measure_for_overflow(&cnt); 359 cnt.count &= (1ull << pmu.gp_counter_width) - 1; 360 } else { 361 cnt.ctr = MSR_GP_COUNTERx(i); 362 } 363 364 if (i % 2) 365 cnt.config |= EVNTSEL_INT; 366 else 367 cnt.config &= ~EVNTSEL_INT; 368 idx = event_to_global_idx(&cnt); 369 __measure(&cnt, cnt.count); 370 if (pmu.is_intel) 371 report(cnt.count == 1, "cntr-%d", i); 372 else 373 report(cnt.count == 0xffffffffffff || cnt.count < 7, "cntr-%d", i); 374 375 if (!this_cpu_has_perf_global_status()) 376 continue; 377 378 status = rdmsr(pmu.msr_global_status); 379 report(status & (1ull << idx), "status-%d", i); 380 wrmsr(pmu.msr_global_status_clr, status); 381 status = rdmsr(pmu.msr_global_status); 382 report(!(status & (1ull << idx)), "status clear-%d", i); 383 report(check_irq() == (i % 2), "irq-%d", i); 384 } 385 386 report_prefix_pop(); 387 } 388 389 static void check_gp_counter_cmask(void) 390 { 391 pmu_counter_t cnt = { 392 .ctr = MSR_GP_COUNTERx(0), 393 .config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel /* instructions */, 394 }; 395 cnt.config |= (0x2 << EVNTSEL_CMASK_SHIFT); 396 measure_one(&cnt); 397 report(cnt.count < gp_events[1].min, "cmask"); 398 } 399 400 static void do_rdpmc_fast(void *ptr) 401 { 402 pmu_counter_t *cnt = ptr; 403 uint32_t idx = (uint32_t)cnt->idx | (1u << 31); 404 405 if (!is_gp(cnt)) 406 idx |= 1 << 30; 407 408 cnt->count = rdpmc(idx); 409 } 410 411 412 static void check_rdpmc(void) 413 { 414 uint64_t val = 0xff0123456789ull; 415 bool exc; 416 int i; 417 418 report_prefix_push("rdpmc"); 419 420 for (i = 0; i < pmu.nr_gp_counters; i++) { 421 uint64_t x; 422 pmu_counter_t cnt = { 423 .ctr = MSR_GP_COUNTERx(i), 424 .idx = i 425 }; 426 427 /* 428 * Without full-width writes, only the low 32 bits are writable, 429 * and the value is sign-extended. 430 */ 431 if (pmu.msr_gp_counter_base == MSR_IA32_PERFCTR0) 432 x = (uint64_t)(int64_t)(int32_t)val; 433 else 434 x = (uint64_t)(int64_t)val; 435 436 /* Mask according to the number of supported bits */ 437 x &= (1ull << pmu.gp_counter_width) - 1; 438 439 wrmsr(MSR_GP_COUNTERx(i), val); 440 report(rdpmc(i) == x, "cntr-%d", i); 441 442 exc = test_for_exception(GP_VECTOR, do_rdpmc_fast, &cnt); 443 if (exc) 444 report_skip("fast-%d", i); 445 else 446 report(cnt.count == (u32)val, "fast-%d", i); 447 } 448 for (i = 0; i < fixed_counters_num; i++) { 449 uint64_t x = val & ((1ull << pmu.fixed_counter_width) - 1); 450 pmu_counter_t cnt = { 451 .ctr = MSR_CORE_PERF_FIXED_CTR0 + i, 452 .idx = i 453 }; 454 455 wrmsr(MSR_PERF_FIXED_CTRx(i), x); 456 report(rdpmc(i | (1 << 30)) == x, "fixed cntr-%d", i); 457 458 exc = test_for_exception(GP_VECTOR, do_rdpmc_fast, &cnt); 459 if (exc) 460 report_skip("fixed fast-%d", i); 461 else 462 report(cnt.count == (u32)x, "fixed fast-%d", i); 463 } 464 465 report_prefix_pop(); 466 } 467 468 static void check_running_counter_wrmsr(void) 469 { 470 uint64_t status; 471 uint64_t count; 472 pmu_counter_t evt = { 473 .ctr = MSR_GP_COUNTERx(0), 474 .config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel, 475 }; 476 477 report_prefix_push("running counter wrmsr"); 478 479 start_event(&evt); 480 loop(); 481 wrmsr(MSR_GP_COUNTERx(0), 0); 482 stop_event(&evt); 483 report(evt.count < gp_events[1].min, "cntr"); 484 485 /* clear status before overflow test */ 486 if (this_cpu_has_perf_global_status()) 487 pmu_clear_global_status(); 488 489 start_event(&evt); 490 491 count = -1; 492 if (pmu_use_full_writes()) 493 count &= (1ull << pmu.gp_counter_width) - 1; 494 495 wrmsr(MSR_GP_COUNTERx(0), count); 496 497 loop(); 498 stop_event(&evt); 499 500 if (this_cpu_has_perf_global_status()) { 501 status = rdmsr(pmu.msr_global_status); 502 report(status & 1, "status msr bit"); 503 } 504 505 report_prefix_pop(); 506 } 507 508 static void check_emulated_instr(void) 509 { 510 uint64_t status, instr_start, brnch_start; 511 uint64_t gp_counter_width = (1ull << pmu.gp_counter_width) - 1; 512 unsigned int branch_idx = pmu.is_intel ? 513 INTEL_BRANCHES_IDX : AMD_BRANCHES_IDX; 514 pmu_counter_t brnch_cnt = { 515 .ctr = MSR_GP_COUNTERx(0), 516 /* branch instructions */ 517 .config = EVNTSEL_OS | EVNTSEL_USR | gp_events[branch_idx].unit_sel, 518 }; 519 pmu_counter_t instr_cnt = { 520 .ctr = MSR_GP_COUNTERx(1), 521 /* instructions */ 522 .config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel, 523 }; 524 report_prefix_push("emulated instruction"); 525 526 if (this_cpu_has_perf_global_status()) 527 pmu_clear_global_status(); 528 529 start_event(&brnch_cnt); 530 start_event(&instr_cnt); 531 532 brnch_start = -EXPECTED_BRNCH; 533 instr_start = -EXPECTED_INSTR; 534 wrmsr(MSR_GP_COUNTERx(0), brnch_start & gp_counter_width); 535 wrmsr(MSR_GP_COUNTERx(1), instr_start & gp_counter_width); 536 // KVM_FEP is a magic prefix that forces emulation so 537 // 'KVM_FEP "jne label\n"' just counts as a single instruction. 538 asm volatile( 539 "mov $0x0, %%eax\n" 540 "cmp $0x0, %%eax\n" 541 KVM_FEP "jne label\n" 542 KVM_FEP "jne label\n" 543 KVM_FEP "jne label\n" 544 KVM_FEP "jne label\n" 545 KVM_FEP "jne label\n" 546 "mov $0xa, %%eax\n" 547 "cpuid\n" 548 "mov $0xa, %%eax\n" 549 "cpuid\n" 550 "mov $0xa, %%eax\n" 551 "cpuid\n" 552 "mov $0xa, %%eax\n" 553 "cpuid\n" 554 "mov $0xa, %%eax\n" 555 "cpuid\n" 556 "label:\n" 557 : 558 : 559 : "eax", "ebx", "ecx", "edx"); 560 561 if (this_cpu_has_perf_global_ctrl()) 562 wrmsr(pmu.msr_global_ctl, 0); 563 564 stop_event(&brnch_cnt); 565 stop_event(&instr_cnt); 566 567 // Check that the end count - start count is at least the expected 568 // number of instructions and branches. 569 report(instr_cnt.count - instr_start >= EXPECTED_INSTR, 570 "instruction count"); 571 report(brnch_cnt.count - brnch_start >= EXPECTED_BRNCH, 572 "branch count"); 573 if (this_cpu_has_perf_global_status()) { 574 // Additionally check that those counters overflowed properly. 575 status = rdmsr(pmu.msr_global_status); 576 report(status & 1, "branch counter overflow"); 577 report(status & 2, "instruction counter overflow"); 578 } 579 580 report_prefix_pop(); 581 } 582 583 #define XBEGIN_STARTED (~0u) 584 static void check_tsx_cycles(void) 585 { 586 pmu_counter_t cnt; 587 unsigned int i, ret = 0; 588 589 if (!this_cpu_has(X86_FEATURE_RTM)) 590 return; 591 592 report_prefix_push("TSX cycles"); 593 594 for (i = 0; i < pmu.nr_gp_counters; i++) { 595 cnt.ctr = MSR_GP_COUNTERx(i); 596 597 if (i == 2) { 598 /* Transactional cycles committed only on gp counter 2 */ 599 cnt.config = EVNTSEL_OS | EVNTSEL_USR | 0x30000003c; 600 } else { 601 /* Transactional cycles */ 602 cnt.config = EVNTSEL_OS | EVNTSEL_USR | 0x10000003c; 603 } 604 605 start_event(&cnt); 606 607 asm volatile("xbegin 1f\n\t" 608 "1:\n\t" 609 : "+a" (ret) :: "memory"); 610 611 /* Generate a non-canonical #GP to trigger ABORT. */ 612 if (ret == XBEGIN_STARTED) 613 *(int *)NONCANONICAL = 0; 614 615 stop_event(&cnt); 616 617 report(cnt.count > 0, "gp cntr-%d with a value of %" PRId64 "", i, cnt.count); 618 } 619 620 report_prefix_pop(); 621 } 622 623 static void warm_up(void) 624 { 625 int i; 626 627 /* 628 * Since cycles event is always run as the first event, there would be 629 * a warm-up state to warm up the cache, it leads to the measured cycles 630 * value may exceed the pre-defined cycles upper boundary and cause 631 * false positive. To avoid this, introduce an warm-up state before 632 * the real verification. 633 */ 634 for (i = 0; i < 10; i++) 635 loop(); 636 } 637 638 static void check_counters(void) 639 { 640 if (is_fep_available()) 641 check_emulated_instr(); 642 643 warm_up(); 644 check_gp_counters(); 645 check_fixed_counters(); 646 check_rdpmc(); 647 check_counters_many(); 648 check_counter_overflow(); 649 check_gp_counter_cmask(); 650 check_running_counter_wrmsr(); 651 check_tsx_cycles(); 652 } 653 654 static void do_unsupported_width_counter_write(void *index) 655 { 656 wrmsr(MSR_IA32_PMC0 + *((int *) index), 0xffffff0123456789ull); 657 } 658 659 static void check_gp_counters_write_width(void) 660 { 661 u64 val_64 = 0xffffff0123456789ull; 662 u64 val_32 = val_64 & ((1ull << 32) - 1); 663 u64 val_max_width = val_64 & ((1ull << pmu.gp_counter_width) - 1); 664 int i; 665 666 /* 667 * MSR_IA32_PERFCTRn supports 64-bit writes, 668 * but only the lowest 32 bits are valid. 669 */ 670 for (i = 0; i < pmu.nr_gp_counters; i++) { 671 wrmsr(MSR_IA32_PERFCTR0 + i, val_32); 672 assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32); 673 assert(rdmsr(MSR_IA32_PMC0 + i) == val_32); 674 675 wrmsr(MSR_IA32_PERFCTR0 + i, val_max_width); 676 assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32); 677 assert(rdmsr(MSR_IA32_PMC0 + i) == val_32); 678 679 wrmsr(MSR_IA32_PERFCTR0 + i, val_64); 680 assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32); 681 assert(rdmsr(MSR_IA32_PMC0 + i) == val_32); 682 } 683 684 /* 685 * MSR_IA32_PMCn supports writing values up to GP counter width, 686 * and only the lowest bits of GP counter width are valid. 687 */ 688 for (i = 0; i < pmu.nr_gp_counters; i++) { 689 wrmsr(MSR_IA32_PMC0 + i, val_32); 690 assert(rdmsr(MSR_IA32_PMC0 + i) == val_32); 691 assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32); 692 693 wrmsr(MSR_IA32_PMC0 + i, val_max_width); 694 assert(rdmsr(MSR_IA32_PMC0 + i) == val_max_width); 695 assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_max_width); 696 697 report(test_for_exception(GP_VECTOR, 698 do_unsupported_width_counter_write, &i), 699 "writing unsupported width to MSR_IA32_PMC%d raises #GP", i); 700 } 701 } 702 703 /* 704 * Per the SDM, reference cycles are currently implemented using the 705 * core crystal clock, TSC, or bus clock. Calibrate to the TSC 706 * frequency to set reasonable expectations. 707 */ 708 static void set_ref_cycle_expectations(void) 709 { 710 pmu_counter_t cnt = { 711 .ctr = MSR_IA32_PERFCTR0, 712 .config = EVNTSEL_OS | EVNTSEL_USR | intel_gp_events[2].unit_sel, 713 }; 714 uint64_t tsc_delta; 715 uint64_t t0, t1, t2, t3; 716 717 /* Bit 2 enumerates the availability of reference cycles events. */ 718 if (!pmu.nr_gp_counters || !pmu_gp_counter_is_available(2)) 719 return; 720 721 if (this_cpu_has_perf_global_ctrl()) 722 wrmsr(pmu.msr_global_ctl, 0); 723 724 t0 = fenced_rdtsc(); 725 start_event(&cnt); 726 t1 = fenced_rdtsc(); 727 728 /* 729 * This loop has to run long enough to dominate the VM-exit 730 * costs for playing with the PMU MSRs on start and stop. 731 * 732 * On a 2.6GHz Ice Lake, with the TSC frequency at 104 times 733 * the core crystal clock, this function calculated a guest 734 * TSC : ref cycles ratio of around 105 with ECX initialized 735 * to one billion. 736 */ 737 asm volatile("loop ." : "+c"((int){1000000000ull})); 738 739 t2 = fenced_rdtsc(); 740 stop_event(&cnt); 741 t3 = fenced_rdtsc(); 742 743 tsc_delta = ((t2 - t1) + (t3 - t0)) / 2; 744 745 if (!tsc_delta) 746 return; 747 748 intel_gp_events[2].min = (intel_gp_events[2].min * cnt.count) / tsc_delta; 749 intel_gp_events[2].max = (intel_gp_events[2].max * cnt.count) / tsc_delta; 750 } 751 752 static void check_invalid_rdpmc_gp(void) 753 { 754 uint64_t val; 755 756 report(rdpmc_safe(64, &val) == GP_VECTOR, 757 "Expected #GP on RDPMC(64)"); 758 } 759 760 int main(int ac, char **av) 761 { 762 setup_vm(); 763 handle_irq(PMI_VECTOR, cnt_overflow); 764 buf = malloc(N*64); 765 766 check_invalid_rdpmc_gp(); 767 768 if (pmu.is_intel) { 769 if (!pmu.version) { 770 report_skip("No Intel Arch PMU is detected!"); 771 return report_summary(); 772 } 773 gp_events = (struct pmu_event *)intel_gp_events; 774 gp_events_size = sizeof(intel_gp_events)/sizeof(intel_gp_events[0]); 775 report_prefix_push("Intel"); 776 set_ref_cycle_expectations(); 777 } else { 778 gp_events_size = sizeof(amd_gp_events)/sizeof(amd_gp_events[0]); 779 gp_events = (struct pmu_event *)amd_gp_events; 780 report_prefix_push("AMD"); 781 } 782 783 printf("PMU version: %d\n", pmu.version); 784 printf("GP counters: %d\n", pmu.nr_gp_counters); 785 printf("GP counter width: %d\n", pmu.gp_counter_width); 786 printf("Mask length: %d\n", pmu.gp_counter_mask_length); 787 printf("Fixed counters: %d\n", pmu.nr_fixed_counters); 788 printf("Fixed counter width: %d\n", pmu.fixed_counter_width); 789 790 fixed_counters_num = MIN(pmu.nr_fixed_counters, ARRAY_SIZE(fixed_events)); 791 if (pmu.nr_fixed_counters > ARRAY_SIZE(fixed_events)) 792 report_info("Fixed counters number %d > defined fixed events %u. " 793 "Please update test case.", pmu.nr_fixed_counters, 794 (uint32_t)ARRAY_SIZE(fixed_events)); 795 796 apic_write(APIC_LVTPC, PMI_VECTOR); 797 798 check_counters(); 799 800 if (pmu_has_full_writes()) { 801 pmu.msr_gp_counter_base = MSR_IA32_PMC0; 802 803 report_prefix_push("full-width writes"); 804 check_counters(); 805 check_gp_counters_write_width(); 806 report_prefix_pop(); 807 } 808 809 if (!pmu.is_intel) { 810 report_prefix_push("K7"); 811 pmu.nr_gp_counters = AMD64_NUM_COUNTERS; 812 pmu.msr_gp_counter_base = MSR_K7_PERFCTR0; 813 pmu.msr_gp_event_select_base = MSR_K7_EVNTSEL0; 814 check_counters(); 815 report_prefix_pop(); 816 } 817 818 return report_summary(); 819 } 820