1 2 #include "x86/msr.h" 3 #include "x86/processor.h" 4 #include "x86/pmu.h" 5 #include "x86/apic-defs.h" 6 #include "x86/apic.h" 7 #include "x86/desc.h" 8 #include "x86/isr.h" 9 #include "vmalloc.h" 10 #include "alloc.h" 11 12 #include "libcflat.h" 13 #include <stdint.h> 14 15 #define N 1000000 16 17 // These values match the number of instructions and branches in the 18 // assembly block in check_emulated_instr(). 19 #define EXPECTED_INSTR 17 20 #define EXPECTED_BRNCH 5 21 22 typedef struct { 23 uint32_t ctr; 24 uint64_t config; 25 uint64_t count; 26 int idx; 27 } pmu_counter_t; 28 29 struct pmu_event { 30 const char *name; 31 uint32_t unit_sel; 32 int min; 33 int max; 34 } intel_gp_events[] = { 35 {"core cycles", 0x003c, 1*N, 50*N}, 36 {"instructions", 0x00c0, 10*N, 10.2*N}, 37 {"ref cycles", 0x013c, 1*N, 30*N}, 38 {"llc references", 0x4f2e, 1, 2*N}, 39 {"llc misses", 0x412e, 1, 1*N}, 40 {"branches", 0x00c4, 1*N, 1.1*N}, 41 {"branch misses", 0x00c5, 0, 0.1*N}, 42 }, amd_gp_events[] = { 43 {"core cycles", 0x0076, 1*N, 50*N}, 44 {"instructions", 0x00c0, 10*N, 10.2*N}, 45 {"branches", 0x00c2, 1*N, 1.1*N}, 46 {"branch misses", 0x00c3, 0, 0.1*N}, 47 }, fixed_events[] = { 48 {"fixed 1", MSR_CORE_PERF_FIXED_CTR0, 10*N, 10.2*N}, 49 {"fixed 2", MSR_CORE_PERF_FIXED_CTR0 + 1, 1*N, 30*N}, 50 {"fixed 3", MSR_CORE_PERF_FIXED_CTR0 + 2, 0.1*N, 30*N} 51 }; 52 53 char *buf; 54 55 static struct pmu_event *gp_events; 56 static unsigned int gp_events_size; 57 58 static inline void loop(void) 59 { 60 unsigned long tmp, tmp2, tmp3; 61 62 asm volatile("1: mov (%1), %2; add $64, %1; nop; nop; nop; nop; nop; nop; nop; loop 1b" 63 : "=c"(tmp), "=r"(tmp2), "=r"(tmp3): "0"(N), "1"(buf)); 64 65 } 66 67 volatile uint64_t irq_received; 68 69 static void cnt_overflow(isr_regs_t *regs) 70 { 71 irq_received++; 72 apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); 73 apic_write(APIC_EOI, 0); 74 } 75 76 static bool check_irq(void) 77 { 78 int i; 79 irq_received = 0; 80 sti(); 81 for (i = 0; i < 100000 && !irq_received; i++) 82 asm volatile("pause"); 83 cli(); 84 return irq_received; 85 } 86 87 static bool is_gp(pmu_counter_t *evt) 88 { 89 if (!pmu.is_intel) 90 return true; 91 92 return evt->ctr < MSR_CORE_PERF_FIXED_CTR0 || 93 evt->ctr >= MSR_IA32_PMC0; 94 } 95 96 static int event_to_global_idx(pmu_counter_t *cnt) 97 { 98 if (pmu.is_intel) 99 return cnt->ctr - (is_gp(cnt) ? pmu.msr_gp_counter_base : 100 (MSR_CORE_PERF_FIXED_CTR0 - FIXED_CNT_INDEX)); 101 102 if (pmu.msr_gp_counter_base == MSR_F15H_PERF_CTR0) 103 return (cnt->ctr - pmu.msr_gp_counter_base) / 2; 104 else 105 return cnt->ctr - pmu.msr_gp_counter_base; 106 } 107 108 static struct pmu_event* get_counter_event(pmu_counter_t *cnt) 109 { 110 if (is_gp(cnt)) { 111 int i; 112 113 for (i = 0; i < gp_events_size; i++) 114 if (gp_events[i].unit_sel == (cnt->config & 0xffff)) 115 return &gp_events[i]; 116 } else 117 return &fixed_events[cnt->ctr - MSR_CORE_PERF_FIXED_CTR0]; 118 119 return (void*)0; 120 } 121 122 static void global_enable(pmu_counter_t *cnt) 123 { 124 if (!this_cpu_has_perf_global_ctrl()) 125 return; 126 127 cnt->idx = event_to_global_idx(cnt); 128 wrmsr(pmu.msr_global_ctl, rdmsr(pmu.msr_global_ctl) | BIT_ULL(cnt->idx)); 129 } 130 131 static void global_disable(pmu_counter_t *cnt) 132 { 133 if (!this_cpu_has_perf_global_ctrl()) 134 return; 135 136 wrmsr(pmu.msr_global_ctl, rdmsr(pmu.msr_global_ctl) & ~BIT_ULL(cnt->idx)); 137 } 138 139 static void __start_event(pmu_counter_t *evt, uint64_t count) 140 { 141 evt->count = count; 142 wrmsr(evt->ctr, evt->count); 143 if (is_gp(evt)) { 144 wrmsr(MSR_GP_EVENT_SELECTx(event_to_global_idx(evt)), 145 evt->config | EVNTSEL_EN); 146 } else { 147 uint32_t ctrl = rdmsr(MSR_CORE_PERF_FIXED_CTR_CTRL); 148 int shift = (evt->ctr - MSR_CORE_PERF_FIXED_CTR0) * 4; 149 uint32_t usrospmi = 0; 150 151 if (evt->config & EVNTSEL_OS) 152 usrospmi |= (1 << 0); 153 if (evt->config & EVNTSEL_USR) 154 usrospmi |= (1 << 1); 155 if (evt->config & EVNTSEL_INT) 156 usrospmi |= (1 << 3); // PMI on overflow 157 ctrl = (ctrl & ~(0xf << shift)) | (usrospmi << shift); 158 wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, ctrl); 159 } 160 global_enable(evt); 161 apic_write(APIC_LVTPC, PMI_VECTOR); 162 } 163 164 static void start_event(pmu_counter_t *evt) 165 { 166 __start_event(evt, 0); 167 } 168 169 static void stop_event(pmu_counter_t *evt) 170 { 171 global_disable(evt); 172 if (is_gp(evt)) { 173 wrmsr(MSR_GP_EVENT_SELECTx(event_to_global_idx(evt)), 174 evt->config & ~EVNTSEL_EN); 175 } else { 176 uint32_t ctrl = rdmsr(MSR_CORE_PERF_FIXED_CTR_CTRL); 177 int shift = (evt->ctr - MSR_CORE_PERF_FIXED_CTR0) * 4; 178 wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, ctrl & ~(0xf << shift)); 179 } 180 evt->count = rdmsr(evt->ctr); 181 } 182 183 static noinline void measure_many(pmu_counter_t *evt, int count) 184 { 185 int i; 186 for (i = 0; i < count; i++) 187 start_event(&evt[i]); 188 loop(); 189 for (i = 0; i < count; i++) 190 stop_event(&evt[i]); 191 } 192 193 static void measure_one(pmu_counter_t *evt) 194 { 195 measure_many(evt, 1); 196 } 197 198 static noinline void __measure(pmu_counter_t *evt, uint64_t count) 199 { 200 __start_event(evt, count); 201 loop(); 202 stop_event(evt); 203 } 204 205 static bool verify_event(uint64_t count, struct pmu_event *e) 206 { 207 // printf("%d <= %ld <= %d\n", e->min, count, e->max); 208 return count >= e->min && count <= e->max; 209 210 } 211 212 static bool verify_counter(pmu_counter_t *cnt) 213 { 214 return verify_event(cnt->count, get_counter_event(cnt)); 215 } 216 217 static void check_gp_counter(struct pmu_event *evt) 218 { 219 pmu_counter_t cnt = { 220 .config = EVNTSEL_OS | EVNTSEL_USR | evt->unit_sel, 221 }; 222 int i; 223 224 for (i = 0; i < pmu.nr_gp_counters; i++) { 225 cnt.ctr = MSR_GP_COUNTERx(i); 226 measure_one(&cnt); 227 report(verify_event(cnt.count, evt), "%s-%d", evt->name, i); 228 } 229 } 230 231 static void check_gp_counters(void) 232 { 233 int i; 234 235 for (i = 0; i < gp_events_size; i++) 236 if (pmu_gp_counter_is_available(i)) 237 check_gp_counter(&gp_events[i]); 238 else 239 printf("GP event '%s' is disabled\n", 240 gp_events[i].name); 241 } 242 243 static void check_fixed_counters(void) 244 { 245 pmu_counter_t cnt = { 246 .config = EVNTSEL_OS | EVNTSEL_USR, 247 }; 248 int i; 249 250 for (i = 0; i < pmu.nr_fixed_counters; i++) { 251 cnt.ctr = fixed_events[i].unit_sel; 252 measure_one(&cnt); 253 report(verify_event(cnt.count, &fixed_events[i]), "fixed-%d", i); 254 } 255 } 256 257 static void check_counters_many(void) 258 { 259 pmu_counter_t cnt[10]; 260 int i, n; 261 262 for (i = 0, n = 0; n < pmu.nr_gp_counters; i++) { 263 if (!pmu_gp_counter_is_available(i)) 264 continue; 265 266 cnt[n].ctr = MSR_GP_COUNTERx(n); 267 cnt[n].config = EVNTSEL_OS | EVNTSEL_USR | 268 gp_events[i % gp_events_size].unit_sel; 269 n++; 270 } 271 for (i = 0; i < pmu.nr_fixed_counters; i++) { 272 cnt[n].ctr = fixed_events[i].unit_sel; 273 cnt[n].config = EVNTSEL_OS | EVNTSEL_USR; 274 n++; 275 } 276 277 measure_many(cnt, n); 278 279 for (i = 0; i < n; i++) 280 if (!verify_counter(&cnt[i])) 281 break; 282 283 report(i == n, "all counters"); 284 } 285 286 static uint64_t measure_for_overflow(pmu_counter_t *cnt) 287 { 288 __measure(cnt, 0); 289 /* 290 * To generate overflow, i.e. roll over to '0', the initial count just 291 * needs to be preset to the negative expected count. However, as per 292 * Intel's SDM, the preset count needs to be incremented by 1 to ensure 293 * the overflow interrupt is generated immediately instead of possibly 294 * waiting for the overflow to propagate through the counter. 295 */ 296 assert(cnt->count > 1); 297 return 1 - cnt->count; 298 } 299 300 static void check_counter_overflow(void) 301 { 302 uint64_t overflow_preset; 303 int i; 304 pmu_counter_t cnt = { 305 .ctr = MSR_GP_COUNTERx(0), 306 .config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel /* instructions */, 307 }; 308 overflow_preset = measure_for_overflow(&cnt); 309 310 /* clear status before test */ 311 if (this_cpu_has_perf_global_status()) 312 pmu_clear_global_status(); 313 314 report_prefix_push("overflow"); 315 316 for (i = 0; i < pmu.nr_gp_counters + 1; i++) { 317 uint64_t status; 318 int idx; 319 320 cnt.count = overflow_preset; 321 if (pmu_use_full_writes()) 322 cnt.count &= (1ull << pmu.gp_counter_width) - 1; 323 324 if (i == pmu.nr_gp_counters) { 325 if (!pmu.is_intel) 326 break; 327 328 cnt.ctr = fixed_events[0].unit_sel; 329 cnt.count = measure_for_overflow(&cnt); 330 cnt.count &= (1ull << pmu.gp_counter_width) - 1; 331 } else { 332 cnt.ctr = MSR_GP_COUNTERx(i); 333 } 334 335 if (i % 2) 336 cnt.config |= EVNTSEL_INT; 337 else 338 cnt.config &= ~EVNTSEL_INT; 339 idx = event_to_global_idx(&cnt); 340 __measure(&cnt, cnt.count); 341 if (pmu.is_intel) 342 report(cnt.count == 1, "cntr-%d", i); 343 else 344 report(cnt.count == 0xffffffffffff || cnt.count < 7, "cntr-%d", i); 345 346 if (!this_cpu_has_perf_global_status()) 347 continue; 348 349 status = rdmsr(pmu.msr_global_status); 350 report(status & (1ull << idx), "status-%d", i); 351 wrmsr(pmu.msr_global_status_clr, status); 352 status = rdmsr(pmu.msr_global_status); 353 report(!(status & (1ull << idx)), "status clear-%d", i); 354 report(check_irq() == (i % 2), "irq-%d", i); 355 } 356 357 report_prefix_pop(); 358 } 359 360 static void check_gp_counter_cmask(void) 361 { 362 pmu_counter_t cnt = { 363 .ctr = MSR_GP_COUNTERx(0), 364 .config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel /* instructions */, 365 }; 366 cnt.config |= (0x2 << EVNTSEL_CMASK_SHIFT); 367 measure_one(&cnt); 368 report(cnt.count < gp_events[1].min, "cmask"); 369 } 370 371 static void do_rdpmc_fast(void *ptr) 372 { 373 pmu_counter_t *cnt = ptr; 374 uint32_t idx = (uint32_t)cnt->idx | (1u << 31); 375 376 if (!is_gp(cnt)) 377 idx |= 1 << 30; 378 379 cnt->count = rdpmc(idx); 380 } 381 382 383 static void check_rdpmc(void) 384 { 385 uint64_t val = 0xff0123456789ull; 386 bool exc; 387 int i; 388 389 report_prefix_push("rdpmc"); 390 391 for (i = 0; i < pmu.nr_gp_counters; i++) { 392 uint64_t x; 393 pmu_counter_t cnt = { 394 .ctr = MSR_GP_COUNTERx(i), 395 .idx = i 396 }; 397 398 /* 399 * Without full-width writes, only the low 32 bits are writable, 400 * and the value is sign-extended. 401 */ 402 if (pmu.msr_gp_counter_base == MSR_IA32_PERFCTR0) 403 x = (uint64_t)(int64_t)(int32_t)val; 404 else 405 x = (uint64_t)(int64_t)val; 406 407 /* Mask according to the number of supported bits */ 408 x &= (1ull << pmu.gp_counter_width) - 1; 409 410 wrmsr(MSR_GP_COUNTERx(i), val); 411 report(rdpmc(i) == x, "cntr-%d", i); 412 413 exc = test_for_exception(GP_VECTOR, do_rdpmc_fast, &cnt); 414 if (exc) 415 report_skip("fast-%d", i); 416 else 417 report(cnt.count == (u32)val, "fast-%d", i); 418 } 419 for (i = 0; i < pmu.nr_fixed_counters; i++) { 420 uint64_t x = val & ((1ull << pmu.fixed_counter_width) - 1); 421 pmu_counter_t cnt = { 422 .ctr = MSR_CORE_PERF_FIXED_CTR0 + i, 423 .idx = i 424 }; 425 426 wrmsr(MSR_PERF_FIXED_CTRx(i), x); 427 report(rdpmc(i | (1 << 30)) == x, "fixed cntr-%d", i); 428 429 exc = test_for_exception(GP_VECTOR, do_rdpmc_fast, &cnt); 430 if (exc) 431 report_skip("fixed fast-%d", i); 432 else 433 report(cnt.count == (u32)x, "fixed fast-%d", i); 434 } 435 436 report_prefix_pop(); 437 } 438 439 static void check_running_counter_wrmsr(void) 440 { 441 uint64_t status; 442 uint64_t count; 443 pmu_counter_t evt = { 444 .ctr = MSR_GP_COUNTERx(0), 445 .config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel, 446 }; 447 448 report_prefix_push("running counter wrmsr"); 449 450 start_event(&evt); 451 loop(); 452 wrmsr(MSR_GP_COUNTERx(0), 0); 453 stop_event(&evt); 454 report(evt.count < gp_events[1].min, "cntr"); 455 456 /* clear status before overflow test */ 457 if (this_cpu_has_perf_global_status()) 458 pmu_clear_global_status(); 459 460 start_event(&evt); 461 462 count = -1; 463 if (pmu_use_full_writes()) 464 count &= (1ull << pmu.gp_counter_width) - 1; 465 466 wrmsr(MSR_GP_COUNTERx(0), count); 467 468 loop(); 469 stop_event(&evt); 470 471 if (this_cpu_has_perf_global_status()) { 472 status = rdmsr(pmu.msr_global_status); 473 report(status & 1, "status msr bit"); 474 } 475 476 report_prefix_pop(); 477 } 478 479 static void check_emulated_instr(void) 480 { 481 uint64_t status, instr_start, brnch_start; 482 uint64_t gp_counter_width = (1ull << pmu.gp_counter_width) - 1; 483 unsigned int branch_idx = pmu.is_intel ? 5 : 2; 484 pmu_counter_t brnch_cnt = { 485 .ctr = MSR_GP_COUNTERx(0), 486 /* branch instructions */ 487 .config = EVNTSEL_OS | EVNTSEL_USR | gp_events[branch_idx].unit_sel, 488 }; 489 pmu_counter_t instr_cnt = { 490 .ctr = MSR_GP_COUNTERx(1), 491 /* instructions */ 492 .config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel, 493 }; 494 report_prefix_push("emulated instruction"); 495 496 if (this_cpu_has_perf_global_status()) 497 pmu_clear_global_status(); 498 499 start_event(&brnch_cnt); 500 start_event(&instr_cnt); 501 502 brnch_start = -EXPECTED_BRNCH; 503 instr_start = -EXPECTED_INSTR; 504 wrmsr(MSR_GP_COUNTERx(0), brnch_start & gp_counter_width); 505 wrmsr(MSR_GP_COUNTERx(1), instr_start & gp_counter_width); 506 // KVM_FEP is a magic prefix that forces emulation so 507 // 'KVM_FEP "jne label\n"' just counts as a single instruction. 508 asm volatile( 509 "mov $0x0, %%eax\n" 510 "cmp $0x0, %%eax\n" 511 KVM_FEP "jne label\n" 512 KVM_FEP "jne label\n" 513 KVM_FEP "jne label\n" 514 KVM_FEP "jne label\n" 515 KVM_FEP "jne label\n" 516 "mov $0xa, %%eax\n" 517 "cpuid\n" 518 "mov $0xa, %%eax\n" 519 "cpuid\n" 520 "mov $0xa, %%eax\n" 521 "cpuid\n" 522 "mov $0xa, %%eax\n" 523 "cpuid\n" 524 "mov $0xa, %%eax\n" 525 "cpuid\n" 526 "label:\n" 527 : 528 : 529 : "eax", "ebx", "ecx", "edx"); 530 531 if (this_cpu_has_perf_global_ctrl()) 532 wrmsr(pmu.msr_global_ctl, 0); 533 534 stop_event(&brnch_cnt); 535 stop_event(&instr_cnt); 536 537 // Check that the end count - start count is at least the expected 538 // number of instructions and branches. 539 report(instr_cnt.count - instr_start >= EXPECTED_INSTR, 540 "instruction count"); 541 report(brnch_cnt.count - brnch_start >= EXPECTED_BRNCH, 542 "branch count"); 543 if (this_cpu_has_perf_global_status()) { 544 // Additionally check that those counters overflowed properly. 545 status = rdmsr(pmu.msr_global_status); 546 report(status & 1, "branch counter overflow"); 547 report(status & 2, "instruction counter overflow"); 548 } 549 550 report_prefix_pop(); 551 } 552 553 #define XBEGIN_STARTED (~0u) 554 static void check_tsx_cycles(void) 555 { 556 pmu_counter_t cnt; 557 unsigned int i, ret = 0; 558 559 if (!this_cpu_has(X86_FEATURE_RTM)) 560 return; 561 562 report_prefix_push("TSX cycles"); 563 564 for (i = 0; i < pmu.nr_gp_counters; i++) { 565 cnt.ctr = MSR_GP_COUNTERx(i); 566 567 if (i == 2) { 568 /* Transactional cycles committed only on gp counter 2 */ 569 cnt.config = EVNTSEL_OS | EVNTSEL_USR | 0x30000003c; 570 } else { 571 /* Transactional cycles */ 572 cnt.config = EVNTSEL_OS | EVNTSEL_USR | 0x10000003c; 573 } 574 575 start_event(&cnt); 576 577 asm volatile("xbegin 1f\n\t" 578 "1:\n\t" 579 : "+a" (ret) :: "memory"); 580 581 /* Generate a non-canonical #GP to trigger ABORT. */ 582 if (ret == XBEGIN_STARTED) 583 *(int *)NONCANONICAL = 0; 584 585 stop_event(&cnt); 586 587 report(cnt.count > 0, "gp cntr-%d with a value of %" PRId64 "", i, cnt.count); 588 } 589 590 report_prefix_pop(); 591 } 592 593 static void check_counters(void) 594 { 595 if (is_fep_available()) 596 check_emulated_instr(); 597 598 check_gp_counters(); 599 check_fixed_counters(); 600 check_rdpmc(); 601 check_counters_many(); 602 check_counter_overflow(); 603 check_gp_counter_cmask(); 604 check_running_counter_wrmsr(); 605 check_tsx_cycles(); 606 } 607 608 static void do_unsupported_width_counter_write(void *index) 609 { 610 wrmsr(MSR_IA32_PMC0 + *((int *) index), 0xffffff0123456789ull); 611 } 612 613 static void check_gp_counters_write_width(void) 614 { 615 u64 val_64 = 0xffffff0123456789ull; 616 u64 val_32 = val_64 & ((1ull << 32) - 1); 617 u64 val_max_width = val_64 & ((1ull << pmu.gp_counter_width) - 1); 618 int i; 619 620 /* 621 * MSR_IA32_PERFCTRn supports 64-bit writes, 622 * but only the lowest 32 bits are valid. 623 */ 624 for (i = 0; i < pmu.nr_gp_counters; i++) { 625 wrmsr(MSR_IA32_PERFCTR0 + i, val_32); 626 assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32); 627 assert(rdmsr(MSR_IA32_PMC0 + i) == val_32); 628 629 wrmsr(MSR_IA32_PERFCTR0 + i, val_max_width); 630 assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32); 631 assert(rdmsr(MSR_IA32_PMC0 + i) == val_32); 632 633 wrmsr(MSR_IA32_PERFCTR0 + i, val_64); 634 assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32); 635 assert(rdmsr(MSR_IA32_PMC0 + i) == val_32); 636 } 637 638 /* 639 * MSR_IA32_PMCn supports writing values up to GP counter width, 640 * and only the lowest bits of GP counter width are valid. 641 */ 642 for (i = 0; i < pmu.nr_gp_counters; i++) { 643 wrmsr(MSR_IA32_PMC0 + i, val_32); 644 assert(rdmsr(MSR_IA32_PMC0 + i) == val_32); 645 assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32); 646 647 wrmsr(MSR_IA32_PMC0 + i, val_max_width); 648 assert(rdmsr(MSR_IA32_PMC0 + i) == val_max_width); 649 assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_max_width); 650 651 report(test_for_exception(GP_VECTOR, 652 do_unsupported_width_counter_write, &i), 653 "writing unsupported width to MSR_IA32_PMC%d raises #GP", i); 654 } 655 } 656 657 /* 658 * Per the SDM, reference cycles are currently implemented using the 659 * core crystal clock, TSC, or bus clock. Calibrate to the TSC 660 * frequency to set reasonable expectations. 661 */ 662 static void set_ref_cycle_expectations(void) 663 { 664 pmu_counter_t cnt = { 665 .ctr = MSR_IA32_PERFCTR0, 666 .config = EVNTSEL_OS | EVNTSEL_USR | intel_gp_events[2].unit_sel, 667 }; 668 uint64_t tsc_delta; 669 uint64_t t0, t1, t2, t3; 670 671 /* Bit 2 enumerates the availability of reference cycles events. */ 672 if (!pmu.nr_gp_counters || !pmu_gp_counter_is_available(2)) 673 return; 674 675 if (this_cpu_has_perf_global_ctrl()) 676 wrmsr(pmu.msr_global_ctl, 0); 677 678 t0 = fenced_rdtsc(); 679 start_event(&cnt); 680 t1 = fenced_rdtsc(); 681 682 /* 683 * This loop has to run long enough to dominate the VM-exit 684 * costs for playing with the PMU MSRs on start and stop. 685 * 686 * On a 2.6GHz Ice Lake, with the TSC frequency at 104 times 687 * the core crystal clock, this function calculated a guest 688 * TSC : ref cycles ratio of around 105 with ECX initialized 689 * to one billion. 690 */ 691 asm volatile("loop ." : "+c"((int){1000000000ull})); 692 693 t2 = fenced_rdtsc(); 694 stop_event(&cnt); 695 t3 = fenced_rdtsc(); 696 697 tsc_delta = ((t2 - t1) + (t3 - t0)) / 2; 698 699 if (!tsc_delta) 700 return; 701 702 intel_gp_events[2].min = (intel_gp_events[2].min * cnt.count) / tsc_delta; 703 intel_gp_events[2].max = (intel_gp_events[2].max * cnt.count) / tsc_delta; 704 } 705 706 static void check_invalid_rdpmc_gp(void) 707 { 708 uint64_t val; 709 710 report(rdpmc_safe(64, &val) == GP_VECTOR, 711 "Expected #GP on RDPMC(64)"); 712 } 713 714 int main(int ac, char **av) 715 { 716 setup_vm(); 717 handle_irq(PMI_VECTOR, cnt_overflow); 718 buf = malloc(N*64); 719 720 check_invalid_rdpmc_gp(); 721 722 if (pmu.is_intel) { 723 if (!pmu.version) { 724 report_skip("No Intel Arch PMU is detected!"); 725 return report_summary(); 726 } 727 gp_events = (struct pmu_event *)intel_gp_events; 728 gp_events_size = sizeof(intel_gp_events)/sizeof(intel_gp_events[0]); 729 report_prefix_push("Intel"); 730 set_ref_cycle_expectations(); 731 } else { 732 gp_events_size = sizeof(amd_gp_events)/sizeof(amd_gp_events[0]); 733 gp_events = (struct pmu_event *)amd_gp_events; 734 report_prefix_push("AMD"); 735 } 736 737 printf("PMU version: %d\n", pmu.version); 738 printf("GP counters: %d\n", pmu.nr_gp_counters); 739 printf("GP counter width: %d\n", pmu.gp_counter_width); 740 printf("Mask length: %d\n", pmu.gp_counter_mask_length); 741 printf("Fixed counters: %d\n", pmu.nr_fixed_counters); 742 printf("Fixed counter width: %d\n", pmu.fixed_counter_width); 743 744 apic_write(APIC_LVTPC, PMI_VECTOR); 745 746 check_counters(); 747 748 if (pmu_has_full_writes()) { 749 pmu.msr_gp_counter_base = MSR_IA32_PMC0; 750 751 report_prefix_push("full-width writes"); 752 check_counters(); 753 check_gp_counters_write_width(); 754 report_prefix_pop(); 755 } 756 757 if (!pmu.is_intel) { 758 report_prefix_push("K7"); 759 pmu.nr_gp_counters = AMD64_NUM_COUNTERS; 760 pmu.msr_gp_counter_base = MSR_K7_PERFCTR0; 761 pmu.msr_gp_event_select_base = MSR_K7_EVNTSEL0; 762 check_counters(); 763 report_prefix_pop(); 764 } 765 766 return report_summary(); 767 } 768