1 2 #include "x86/msr.h" 3 #include "x86/processor.h" 4 #include "x86/pmu.h" 5 #include "x86/apic-defs.h" 6 #include "x86/apic.h" 7 #include "x86/desc.h" 8 #include "x86/isr.h" 9 #include "vmalloc.h" 10 #include "alloc.h" 11 12 #include "libcflat.h" 13 #include <stdint.h> 14 15 #define N 1000000 16 17 // These values match the number of instructions and branches in the 18 // assembly block in check_emulated_instr(). 19 #define EXPECTED_INSTR 17 20 #define EXPECTED_BRNCH 5 21 22 typedef struct { 23 uint32_t ctr; 24 uint32_t idx; 25 uint64_t config; 26 uint64_t count; 27 } pmu_counter_t; 28 29 struct pmu_event { 30 const char *name; 31 uint32_t unit_sel; 32 int min; 33 int max; 34 } intel_gp_events[] = { 35 {"core cycles", 0x003c, 1*N, 50*N}, 36 {"instructions", 0x00c0, 10*N, 10.2*N}, 37 {"ref cycles", 0x013c, 1*N, 30*N}, 38 {"llc references", 0x4f2e, 1, 2*N}, 39 {"llc misses", 0x412e, 1, 1*N}, 40 {"branches", 0x00c4, 1*N, 1.1*N}, 41 {"branch misses", 0x00c5, 0, 0.1*N}, 42 }, amd_gp_events[] = { 43 {"core cycles", 0x0076, 1*N, 50*N}, 44 {"instructions", 0x00c0, 10*N, 10.2*N}, 45 {"branches", 0x00c2, 1*N, 1.1*N}, 46 {"branch misses", 0x00c3, 0, 0.1*N}, 47 }, fixed_events[] = { 48 {"fixed 0", MSR_CORE_PERF_FIXED_CTR0, 10*N, 10.2*N}, 49 {"fixed 1", MSR_CORE_PERF_FIXED_CTR0 + 1, 1*N, 30*N}, 50 {"fixed 2", MSR_CORE_PERF_FIXED_CTR0 + 2, 0.1*N, 30*N} 51 }; 52 53 char *buf; 54 55 static struct pmu_event *gp_events; 56 static unsigned int gp_events_size; 57 static unsigned int fixed_counters_num; 58 59 static inline void loop(void) 60 { 61 unsigned long tmp, tmp2, tmp3; 62 63 asm volatile("1: mov (%1), %2; add $64, %1; nop; nop; nop; nop; nop; nop; nop; loop 1b" 64 : "=c"(tmp), "=r"(tmp2), "=r"(tmp3): "0"(N), "1"(buf)); 65 66 } 67 68 volatile uint64_t irq_received; 69 70 static void cnt_overflow(isr_regs_t *regs) 71 { 72 irq_received++; 73 apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); 74 apic_write(APIC_EOI, 0); 75 } 76 77 static bool check_irq(void) 78 { 79 int i; 80 irq_received = 0; 81 sti(); 82 for (i = 0; i < 100000 && !irq_received; i++) 83 asm volatile("pause"); 84 cli(); 85 return irq_received; 86 } 87 88 static bool is_gp(pmu_counter_t *evt) 89 { 90 if (!pmu.is_intel) 91 return true; 92 93 return evt->ctr < MSR_CORE_PERF_FIXED_CTR0 || 94 evt->ctr >= MSR_IA32_PMC0; 95 } 96 97 static int event_to_global_idx(pmu_counter_t *cnt) 98 { 99 if (pmu.is_intel) 100 return cnt->ctr - (is_gp(cnt) ? pmu.msr_gp_counter_base : 101 (MSR_CORE_PERF_FIXED_CTR0 - FIXED_CNT_INDEX)); 102 103 if (pmu.msr_gp_counter_base == MSR_F15H_PERF_CTR0) 104 return (cnt->ctr - pmu.msr_gp_counter_base) / 2; 105 else 106 return cnt->ctr - pmu.msr_gp_counter_base; 107 } 108 109 static struct pmu_event* get_counter_event(pmu_counter_t *cnt) 110 { 111 if (is_gp(cnt)) { 112 int i; 113 114 for (i = 0; i < gp_events_size; i++) 115 if (gp_events[i].unit_sel == (cnt->config & 0xffff)) 116 return &gp_events[i]; 117 } else { 118 unsigned int idx = cnt->ctr - MSR_CORE_PERF_FIXED_CTR0; 119 120 if (idx < ARRAY_SIZE(fixed_events)) 121 return &fixed_events[idx]; 122 } 123 124 return (void*)0; 125 } 126 127 static void global_enable(pmu_counter_t *cnt) 128 { 129 if (!this_cpu_has_perf_global_ctrl()) 130 return; 131 132 cnt->idx = event_to_global_idx(cnt); 133 wrmsr(pmu.msr_global_ctl, rdmsr(pmu.msr_global_ctl) | BIT_ULL(cnt->idx)); 134 } 135 136 static void global_disable(pmu_counter_t *cnt) 137 { 138 if (!this_cpu_has_perf_global_ctrl()) 139 return; 140 141 wrmsr(pmu.msr_global_ctl, rdmsr(pmu.msr_global_ctl) & ~BIT_ULL(cnt->idx)); 142 } 143 144 static void __start_event(pmu_counter_t *evt, uint64_t count) 145 { 146 evt->count = count; 147 wrmsr(evt->ctr, evt->count); 148 if (is_gp(evt)) { 149 wrmsr(MSR_GP_EVENT_SELECTx(event_to_global_idx(evt)), 150 evt->config | EVNTSEL_EN); 151 } else { 152 uint32_t ctrl = rdmsr(MSR_CORE_PERF_FIXED_CTR_CTRL); 153 int shift = (evt->ctr - MSR_CORE_PERF_FIXED_CTR0) * 4; 154 uint32_t usrospmi = 0; 155 156 if (evt->config & EVNTSEL_OS) 157 usrospmi |= (1 << 0); 158 if (evt->config & EVNTSEL_USR) 159 usrospmi |= (1 << 1); 160 if (evt->config & EVNTSEL_INT) 161 usrospmi |= (1 << 3); // PMI on overflow 162 ctrl = (ctrl & ~(0xf << shift)) | (usrospmi << shift); 163 wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, ctrl); 164 } 165 global_enable(evt); 166 apic_write(APIC_LVTPC, PMI_VECTOR); 167 } 168 169 static void start_event(pmu_counter_t *evt) 170 { 171 __start_event(evt, 0); 172 } 173 174 static void stop_event(pmu_counter_t *evt) 175 { 176 global_disable(evt); 177 if (is_gp(evt)) { 178 wrmsr(MSR_GP_EVENT_SELECTx(event_to_global_idx(evt)), 179 evt->config & ~EVNTSEL_EN); 180 } else { 181 uint32_t ctrl = rdmsr(MSR_CORE_PERF_FIXED_CTR_CTRL); 182 int shift = (evt->ctr - MSR_CORE_PERF_FIXED_CTR0) * 4; 183 wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, ctrl & ~(0xf << shift)); 184 } 185 evt->count = rdmsr(evt->ctr); 186 } 187 188 static noinline void measure_many(pmu_counter_t *evt, int count) 189 { 190 int i; 191 for (i = 0; i < count; i++) 192 start_event(&evt[i]); 193 loop(); 194 for (i = 0; i < count; i++) 195 stop_event(&evt[i]); 196 } 197 198 static void measure_one(pmu_counter_t *evt) 199 { 200 measure_many(evt, 1); 201 } 202 203 static noinline void __measure(pmu_counter_t *evt, uint64_t count) 204 { 205 __start_event(evt, count); 206 loop(); 207 stop_event(evt); 208 } 209 210 static bool verify_event(uint64_t count, struct pmu_event *e) 211 { 212 bool pass; 213 214 if (!e) 215 return false; 216 217 pass = count >= e->min && count <= e->max; 218 if (!pass) 219 printf("FAIL: %d <= %"PRId64" <= %d\n", e->min, count, e->max); 220 221 return pass; 222 } 223 224 static bool verify_counter(pmu_counter_t *cnt) 225 { 226 return verify_event(cnt->count, get_counter_event(cnt)); 227 } 228 229 static void check_gp_counter(struct pmu_event *evt) 230 { 231 pmu_counter_t cnt = { 232 .config = EVNTSEL_OS | EVNTSEL_USR | evt->unit_sel, 233 }; 234 int i; 235 236 for (i = 0; i < pmu.nr_gp_counters; i++) { 237 cnt.ctr = MSR_GP_COUNTERx(i); 238 measure_one(&cnt); 239 report(verify_event(cnt.count, evt), "%s-%d", evt->name, i); 240 } 241 } 242 243 static void check_gp_counters(void) 244 { 245 int i; 246 247 for (i = 0; i < gp_events_size; i++) 248 if (pmu_gp_counter_is_available(i)) 249 check_gp_counter(&gp_events[i]); 250 else 251 printf("GP event '%s' is disabled\n", 252 gp_events[i].name); 253 } 254 255 static void check_fixed_counters(void) 256 { 257 pmu_counter_t cnt = { 258 .config = EVNTSEL_OS | EVNTSEL_USR, 259 }; 260 int i; 261 262 for (i = 0; i < fixed_counters_num; i++) { 263 cnt.ctr = fixed_events[i].unit_sel; 264 measure_one(&cnt); 265 report(verify_event(cnt.count, &fixed_events[i]), "fixed-%d", i); 266 } 267 } 268 269 static void check_counters_many(void) 270 { 271 pmu_counter_t cnt[48]; 272 int i, n; 273 274 for (i = 0, n = 0; n < pmu.nr_gp_counters; i++) { 275 if (!pmu_gp_counter_is_available(i)) 276 continue; 277 278 cnt[n].ctr = MSR_GP_COUNTERx(n); 279 cnt[n].config = EVNTSEL_OS | EVNTSEL_USR | 280 gp_events[i % gp_events_size].unit_sel; 281 n++; 282 } 283 for (i = 0; i < fixed_counters_num; i++) { 284 cnt[n].ctr = fixed_events[i].unit_sel; 285 cnt[n].config = EVNTSEL_OS | EVNTSEL_USR; 286 n++; 287 } 288 289 assert(n <= ARRAY_SIZE(cnt)); 290 measure_many(cnt, n); 291 292 for (i = 0; i < n; i++) 293 if (!verify_counter(&cnt[i])) 294 break; 295 296 report(i == n, "all counters"); 297 } 298 299 static uint64_t measure_for_overflow(pmu_counter_t *cnt) 300 { 301 __measure(cnt, 0); 302 /* 303 * To generate overflow, i.e. roll over to '0', the initial count just 304 * needs to be preset to the negative expected count. However, as per 305 * Intel's SDM, the preset count needs to be incremented by 1 to ensure 306 * the overflow interrupt is generated immediately instead of possibly 307 * waiting for the overflow to propagate through the counter. 308 */ 309 assert(cnt->count > 1); 310 return 1 - cnt->count; 311 } 312 313 static void check_counter_overflow(void) 314 { 315 uint64_t overflow_preset; 316 int i; 317 pmu_counter_t cnt = { 318 .ctr = MSR_GP_COUNTERx(0), 319 .config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel /* instructions */, 320 }; 321 overflow_preset = measure_for_overflow(&cnt); 322 323 /* clear status before test */ 324 if (this_cpu_has_perf_global_status()) 325 pmu_clear_global_status(); 326 327 report_prefix_push("overflow"); 328 329 for (i = 0; i < pmu.nr_gp_counters + 1; i++) { 330 uint64_t status; 331 int idx; 332 333 cnt.count = overflow_preset; 334 if (pmu_use_full_writes()) 335 cnt.count &= (1ull << pmu.gp_counter_width) - 1; 336 337 if (i == pmu.nr_gp_counters) { 338 if (!pmu.is_intel) 339 break; 340 341 cnt.ctr = fixed_events[0].unit_sel; 342 cnt.count = measure_for_overflow(&cnt); 343 cnt.count &= (1ull << pmu.gp_counter_width) - 1; 344 } else { 345 cnt.ctr = MSR_GP_COUNTERx(i); 346 } 347 348 if (i % 2) 349 cnt.config |= EVNTSEL_INT; 350 else 351 cnt.config &= ~EVNTSEL_INT; 352 idx = event_to_global_idx(&cnt); 353 __measure(&cnt, cnt.count); 354 if (pmu.is_intel) 355 report(cnt.count == 1, "cntr-%d", i); 356 else 357 report(cnt.count == 0xffffffffffff || cnt.count < 7, "cntr-%d", i); 358 359 if (!this_cpu_has_perf_global_status()) 360 continue; 361 362 status = rdmsr(pmu.msr_global_status); 363 report(status & (1ull << idx), "status-%d", i); 364 wrmsr(pmu.msr_global_status_clr, status); 365 status = rdmsr(pmu.msr_global_status); 366 report(!(status & (1ull << idx)), "status clear-%d", i); 367 report(check_irq() == (i % 2), "irq-%d", i); 368 } 369 370 report_prefix_pop(); 371 } 372 373 static void check_gp_counter_cmask(void) 374 { 375 pmu_counter_t cnt = { 376 .ctr = MSR_GP_COUNTERx(0), 377 .config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel /* instructions */, 378 }; 379 cnt.config |= (0x2 << EVNTSEL_CMASK_SHIFT); 380 measure_one(&cnt); 381 report(cnt.count < gp_events[1].min, "cmask"); 382 } 383 384 static void do_rdpmc_fast(void *ptr) 385 { 386 pmu_counter_t *cnt = ptr; 387 uint32_t idx = (uint32_t)cnt->idx | (1u << 31); 388 389 if (!is_gp(cnt)) 390 idx |= 1 << 30; 391 392 cnt->count = rdpmc(idx); 393 } 394 395 396 static void check_rdpmc(void) 397 { 398 uint64_t val = 0xff0123456789ull; 399 bool exc; 400 int i; 401 402 report_prefix_push("rdpmc"); 403 404 for (i = 0; i < pmu.nr_gp_counters; i++) { 405 uint64_t x; 406 pmu_counter_t cnt = { 407 .ctr = MSR_GP_COUNTERx(i), 408 .idx = i 409 }; 410 411 /* 412 * Without full-width writes, only the low 32 bits are writable, 413 * and the value is sign-extended. 414 */ 415 if (pmu.msr_gp_counter_base == MSR_IA32_PERFCTR0) 416 x = (uint64_t)(int64_t)(int32_t)val; 417 else 418 x = (uint64_t)(int64_t)val; 419 420 /* Mask according to the number of supported bits */ 421 x &= (1ull << pmu.gp_counter_width) - 1; 422 423 wrmsr(MSR_GP_COUNTERx(i), val); 424 report(rdpmc(i) == x, "cntr-%d", i); 425 426 exc = test_for_exception(GP_VECTOR, do_rdpmc_fast, &cnt); 427 if (exc) 428 report_skip("fast-%d", i); 429 else 430 report(cnt.count == (u32)val, "fast-%d", i); 431 } 432 for (i = 0; i < fixed_counters_num; i++) { 433 uint64_t x = val & ((1ull << pmu.fixed_counter_width) - 1); 434 pmu_counter_t cnt = { 435 .ctr = MSR_CORE_PERF_FIXED_CTR0 + i, 436 .idx = i 437 }; 438 439 wrmsr(MSR_PERF_FIXED_CTRx(i), x); 440 report(rdpmc(i | (1 << 30)) == x, "fixed cntr-%d", i); 441 442 exc = test_for_exception(GP_VECTOR, do_rdpmc_fast, &cnt); 443 if (exc) 444 report_skip("fixed fast-%d", i); 445 else 446 report(cnt.count == (u32)x, "fixed fast-%d", i); 447 } 448 449 report_prefix_pop(); 450 } 451 452 static void check_running_counter_wrmsr(void) 453 { 454 uint64_t status; 455 uint64_t count; 456 pmu_counter_t evt = { 457 .ctr = MSR_GP_COUNTERx(0), 458 .config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel, 459 }; 460 461 report_prefix_push("running counter wrmsr"); 462 463 start_event(&evt); 464 loop(); 465 wrmsr(MSR_GP_COUNTERx(0), 0); 466 stop_event(&evt); 467 report(evt.count < gp_events[1].min, "cntr"); 468 469 /* clear status before overflow test */ 470 if (this_cpu_has_perf_global_status()) 471 pmu_clear_global_status(); 472 473 start_event(&evt); 474 475 count = -1; 476 if (pmu_use_full_writes()) 477 count &= (1ull << pmu.gp_counter_width) - 1; 478 479 wrmsr(MSR_GP_COUNTERx(0), count); 480 481 loop(); 482 stop_event(&evt); 483 484 if (this_cpu_has_perf_global_status()) { 485 status = rdmsr(pmu.msr_global_status); 486 report(status & 1, "status msr bit"); 487 } 488 489 report_prefix_pop(); 490 } 491 492 static void check_emulated_instr(void) 493 { 494 uint64_t status, instr_start, brnch_start; 495 uint64_t gp_counter_width = (1ull << pmu.gp_counter_width) - 1; 496 unsigned int branch_idx = pmu.is_intel ? 5 : 2; 497 pmu_counter_t brnch_cnt = { 498 .ctr = MSR_GP_COUNTERx(0), 499 /* branch instructions */ 500 .config = EVNTSEL_OS | EVNTSEL_USR | gp_events[branch_idx].unit_sel, 501 }; 502 pmu_counter_t instr_cnt = { 503 .ctr = MSR_GP_COUNTERx(1), 504 /* instructions */ 505 .config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel, 506 }; 507 report_prefix_push("emulated instruction"); 508 509 if (this_cpu_has_perf_global_status()) 510 pmu_clear_global_status(); 511 512 start_event(&brnch_cnt); 513 start_event(&instr_cnt); 514 515 brnch_start = -EXPECTED_BRNCH; 516 instr_start = -EXPECTED_INSTR; 517 wrmsr(MSR_GP_COUNTERx(0), brnch_start & gp_counter_width); 518 wrmsr(MSR_GP_COUNTERx(1), instr_start & gp_counter_width); 519 // KVM_FEP is a magic prefix that forces emulation so 520 // 'KVM_FEP "jne label\n"' just counts as a single instruction. 521 asm volatile( 522 "mov $0x0, %%eax\n" 523 "cmp $0x0, %%eax\n" 524 KVM_FEP "jne label\n" 525 KVM_FEP "jne label\n" 526 KVM_FEP "jne label\n" 527 KVM_FEP "jne label\n" 528 KVM_FEP "jne label\n" 529 "mov $0xa, %%eax\n" 530 "cpuid\n" 531 "mov $0xa, %%eax\n" 532 "cpuid\n" 533 "mov $0xa, %%eax\n" 534 "cpuid\n" 535 "mov $0xa, %%eax\n" 536 "cpuid\n" 537 "mov $0xa, %%eax\n" 538 "cpuid\n" 539 "label:\n" 540 : 541 : 542 : "eax", "ebx", "ecx", "edx"); 543 544 if (this_cpu_has_perf_global_ctrl()) 545 wrmsr(pmu.msr_global_ctl, 0); 546 547 stop_event(&brnch_cnt); 548 stop_event(&instr_cnt); 549 550 // Check that the end count - start count is at least the expected 551 // number of instructions and branches. 552 report(instr_cnt.count - instr_start >= EXPECTED_INSTR, 553 "instruction count"); 554 report(brnch_cnt.count - brnch_start >= EXPECTED_BRNCH, 555 "branch count"); 556 if (this_cpu_has_perf_global_status()) { 557 // Additionally check that those counters overflowed properly. 558 status = rdmsr(pmu.msr_global_status); 559 report(status & 1, "branch counter overflow"); 560 report(status & 2, "instruction counter overflow"); 561 } 562 563 report_prefix_pop(); 564 } 565 566 #define XBEGIN_STARTED (~0u) 567 static void check_tsx_cycles(void) 568 { 569 pmu_counter_t cnt; 570 unsigned int i, ret = 0; 571 572 if (!this_cpu_has(X86_FEATURE_RTM)) 573 return; 574 575 report_prefix_push("TSX cycles"); 576 577 for (i = 0; i < pmu.nr_gp_counters; i++) { 578 cnt.ctr = MSR_GP_COUNTERx(i); 579 580 if (i == 2) { 581 /* Transactional cycles committed only on gp counter 2 */ 582 cnt.config = EVNTSEL_OS | EVNTSEL_USR | 0x30000003c; 583 } else { 584 /* Transactional cycles */ 585 cnt.config = EVNTSEL_OS | EVNTSEL_USR | 0x10000003c; 586 } 587 588 start_event(&cnt); 589 590 asm volatile("xbegin 1f\n\t" 591 "1:\n\t" 592 : "+a" (ret) :: "memory"); 593 594 /* Generate a non-canonical #GP to trigger ABORT. */ 595 if (ret == XBEGIN_STARTED) 596 *(int *)NONCANONICAL = 0; 597 598 stop_event(&cnt); 599 600 report(cnt.count > 0, "gp cntr-%d with a value of %" PRId64 "", i, cnt.count); 601 } 602 603 report_prefix_pop(); 604 } 605 606 static void check_counters(void) 607 { 608 if (is_fep_available()) 609 check_emulated_instr(); 610 611 check_gp_counters(); 612 check_fixed_counters(); 613 check_rdpmc(); 614 check_counters_many(); 615 check_counter_overflow(); 616 check_gp_counter_cmask(); 617 check_running_counter_wrmsr(); 618 check_tsx_cycles(); 619 } 620 621 static void do_unsupported_width_counter_write(void *index) 622 { 623 wrmsr(MSR_IA32_PMC0 + *((int *) index), 0xffffff0123456789ull); 624 } 625 626 static void check_gp_counters_write_width(void) 627 { 628 u64 val_64 = 0xffffff0123456789ull; 629 u64 val_32 = val_64 & ((1ull << 32) - 1); 630 u64 val_max_width = val_64 & ((1ull << pmu.gp_counter_width) - 1); 631 int i; 632 633 /* 634 * MSR_IA32_PERFCTRn supports 64-bit writes, 635 * but only the lowest 32 bits are valid. 636 */ 637 for (i = 0; i < pmu.nr_gp_counters; i++) { 638 wrmsr(MSR_IA32_PERFCTR0 + i, val_32); 639 assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32); 640 assert(rdmsr(MSR_IA32_PMC0 + i) == val_32); 641 642 wrmsr(MSR_IA32_PERFCTR0 + i, val_max_width); 643 assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32); 644 assert(rdmsr(MSR_IA32_PMC0 + i) == val_32); 645 646 wrmsr(MSR_IA32_PERFCTR0 + i, val_64); 647 assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32); 648 assert(rdmsr(MSR_IA32_PMC0 + i) == val_32); 649 } 650 651 /* 652 * MSR_IA32_PMCn supports writing values up to GP counter width, 653 * and only the lowest bits of GP counter width are valid. 654 */ 655 for (i = 0; i < pmu.nr_gp_counters; i++) { 656 wrmsr(MSR_IA32_PMC0 + i, val_32); 657 assert(rdmsr(MSR_IA32_PMC0 + i) == val_32); 658 assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32); 659 660 wrmsr(MSR_IA32_PMC0 + i, val_max_width); 661 assert(rdmsr(MSR_IA32_PMC0 + i) == val_max_width); 662 assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_max_width); 663 664 report(test_for_exception(GP_VECTOR, 665 do_unsupported_width_counter_write, &i), 666 "writing unsupported width to MSR_IA32_PMC%d raises #GP", i); 667 } 668 } 669 670 /* 671 * Per the SDM, reference cycles are currently implemented using the 672 * core crystal clock, TSC, or bus clock. Calibrate to the TSC 673 * frequency to set reasonable expectations. 674 */ 675 static void set_ref_cycle_expectations(void) 676 { 677 pmu_counter_t cnt = { 678 .ctr = MSR_IA32_PERFCTR0, 679 .config = EVNTSEL_OS | EVNTSEL_USR | intel_gp_events[2].unit_sel, 680 }; 681 uint64_t tsc_delta; 682 uint64_t t0, t1, t2, t3; 683 684 /* Bit 2 enumerates the availability of reference cycles events. */ 685 if (!pmu.nr_gp_counters || !pmu_gp_counter_is_available(2)) 686 return; 687 688 if (this_cpu_has_perf_global_ctrl()) 689 wrmsr(pmu.msr_global_ctl, 0); 690 691 t0 = fenced_rdtsc(); 692 start_event(&cnt); 693 t1 = fenced_rdtsc(); 694 695 /* 696 * This loop has to run long enough to dominate the VM-exit 697 * costs for playing with the PMU MSRs on start and stop. 698 * 699 * On a 2.6GHz Ice Lake, with the TSC frequency at 104 times 700 * the core crystal clock, this function calculated a guest 701 * TSC : ref cycles ratio of around 105 with ECX initialized 702 * to one billion. 703 */ 704 asm volatile("loop ." : "+c"((int){1000000000ull})); 705 706 t2 = fenced_rdtsc(); 707 stop_event(&cnt); 708 t3 = fenced_rdtsc(); 709 710 tsc_delta = ((t2 - t1) + (t3 - t0)) / 2; 711 712 if (!tsc_delta) 713 return; 714 715 intel_gp_events[2].min = (intel_gp_events[2].min * cnt.count) / tsc_delta; 716 intel_gp_events[2].max = (intel_gp_events[2].max * cnt.count) / tsc_delta; 717 } 718 719 static void check_invalid_rdpmc_gp(void) 720 { 721 uint64_t val; 722 723 report(rdpmc_safe(64, &val) == GP_VECTOR, 724 "Expected #GP on RDPMC(64)"); 725 } 726 727 int main(int ac, char **av) 728 { 729 setup_vm(); 730 handle_irq(PMI_VECTOR, cnt_overflow); 731 buf = malloc(N*64); 732 733 check_invalid_rdpmc_gp(); 734 735 if (pmu.is_intel) { 736 if (!pmu.version) { 737 report_skip("No Intel Arch PMU is detected!"); 738 return report_summary(); 739 } 740 gp_events = (struct pmu_event *)intel_gp_events; 741 gp_events_size = sizeof(intel_gp_events)/sizeof(intel_gp_events[0]); 742 report_prefix_push("Intel"); 743 set_ref_cycle_expectations(); 744 } else { 745 gp_events_size = sizeof(amd_gp_events)/sizeof(amd_gp_events[0]); 746 gp_events = (struct pmu_event *)amd_gp_events; 747 report_prefix_push("AMD"); 748 } 749 750 printf("PMU version: %d\n", pmu.version); 751 printf("GP counters: %d\n", pmu.nr_gp_counters); 752 printf("GP counter width: %d\n", pmu.gp_counter_width); 753 printf("Mask length: %d\n", pmu.gp_counter_mask_length); 754 printf("Fixed counters: %d\n", pmu.nr_fixed_counters); 755 printf("Fixed counter width: %d\n", pmu.fixed_counter_width); 756 757 fixed_counters_num = MIN(pmu.nr_fixed_counters, ARRAY_SIZE(fixed_events)); 758 if (pmu.nr_fixed_counters > ARRAY_SIZE(fixed_events)) 759 report_info("Fixed counters number %d > defined fixed events %u. " 760 "Please update test case.", pmu.nr_fixed_counters, 761 (uint32_t)ARRAY_SIZE(fixed_events)); 762 763 apic_write(APIC_LVTPC, PMI_VECTOR); 764 765 check_counters(); 766 767 if (pmu_has_full_writes()) { 768 pmu.msr_gp_counter_base = MSR_IA32_PMC0; 769 770 report_prefix_push("full-width writes"); 771 check_counters(); 772 check_gp_counters_write_width(); 773 report_prefix_pop(); 774 } 775 776 if (!pmu.is_intel) { 777 report_prefix_push("K7"); 778 pmu.nr_gp_counters = AMD64_NUM_COUNTERS; 779 pmu.msr_gp_counter_base = MSR_K7_PERFCTR0; 780 pmu.msr_gp_event_select_base = MSR_K7_EVNTSEL0; 781 check_counters(); 782 report_prefix_pop(); 783 } 784 785 return report_summary(); 786 } 787