1 2 #include "x86/msr.h" 3 #include "x86/processor.h" 4 #include "x86/pmu.h" 5 #include "x86/apic-defs.h" 6 #include "x86/apic.h" 7 #include "x86/desc.h" 8 #include "x86/isr.h" 9 #include "alloc.h" 10 11 #include "libcflat.h" 12 #include <stdint.h> 13 14 #define N 1000000 15 16 // These values match the number of instructions and branches in the 17 // assembly block in check_emulated_instr(). 18 #define EXPECTED_INSTR 17 19 #define EXPECTED_BRNCH 5 20 21 typedef struct { 22 uint32_t ctr; 23 uint64_t config; 24 uint64_t count; 25 int idx; 26 } pmu_counter_t; 27 28 struct pmu_event { 29 const char *name; 30 uint32_t unit_sel; 31 int min; 32 int max; 33 } intel_gp_events[] = { 34 {"core cycles", 0x003c, 1*N, 50*N}, 35 {"instructions", 0x00c0, 10*N, 10.2*N}, 36 {"ref cycles", 0x013c, 1*N, 30*N}, 37 {"llc references", 0x4f2e, 1, 2*N}, 38 {"llc misses", 0x412e, 1, 1*N}, 39 {"branches", 0x00c4, 1*N, 1.1*N}, 40 {"branch misses", 0x00c5, 0, 0.1*N}, 41 }, amd_gp_events[] = { 42 {"core cycles", 0x0076, 1*N, 50*N}, 43 {"instructions", 0x00c0, 10*N, 10.2*N}, 44 {"branches", 0x00c2, 1*N, 1.1*N}, 45 {"branch misses", 0x00c3, 0, 0.1*N}, 46 }, fixed_events[] = { 47 {"fixed 1", MSR_CORE_PERF_FIXED_CTR0, 10*N, 10.2*N}, 48 {"fixed 2", MSR_CORE_PERF_FIXED_CTR0 + 1, 1*N, 30*N}, 49 {"fixed 3", MSR_CORE_PERF_FIXED_CTR0 + 2, 0.1*N, 30*N} 50 }; 51 52 char *buf; 53 54 static struct pmu_event *gp_events; 55 static unsigned int gp_events_size; 56 57 static inline void loop(void) 58 { 59 unsigned long tmp, tmp2, tmp3; 60 61 asm volatile("1: mov (%1), %2; add $64, %1; nop; nop; nop; nop; nop; nop; nop; loop 1b" 62 : "=c"(tmp), "=r"(tmp2), "=r"(tmp3): "0"(N), "1"(buf)); 63 64 } 65 66 volatile uint64_t irq_received; 67 68 static void cnt_overflow(isr_regs_t *regs) 69 { 70 irq_received++; 71 apic_write(APIC_EOI, 0); 72 } 73 74 static bool check_irq(void) 75 { 76 int i; 77 irq_received = 0; 78 sti(); 79 for (i = 0; i < 100000 && !irq_received; i++) 80 asm volatile("pause"); 81 cli(); 82 return irq_received; 83 } 84 85 static bool is_gp(pmu_counter_t *evt) 86 { 87 if (!pmu.is_intel) 88 return true; 89 90 return evt->ctr < MSR_CORE_PERF_FIXED_CTR0 || 91 evt->ctr >= MSR_IA32_PMC0; 92 } 93 94 static int event_to_global_idx(pmu_counter_t *cnt) 95 { 96 if (pmu.is_intel) 97 return cnt->ctr - (is_gp(cnt) ? pmu.msr_gp_counter_base : 98 (MSR_CORE_PERF_FIXED_CTR0 - FIXED_CNT_INDEX)); 99 100 if (pmu.msr_gp_counter_base == MSR_F15H_PERF_CTR0) 101 return (cnt->ctr - pmu.msr_gp_counter_base) / 2; 102 else 103 return cnt->ctr - pmu.msr_gp_counter_base; 104 } 105 106 static struct pmu_event* get_counter_event(pmu_counter_t *cnt) 107 { 108 if (is_gp(cnt)) { 109 int i; 110 111 for (i = 0; i < gp_events_size; i++) 112 if (gp_events[i].unit_sel == (cnt->config & 0xffff)) 113 return &gp_events[i]; 114 } else 115 return &fixed_events[cnt->ctr - MSR_CORE_PERF_FIXED_CTR0]; 116 117 return (void*)0; 118 } 119 120 static void global_enable(pmu_counter_t *cnt) 121 { 122 if (!this_cpu_has_perf_global_ctrl()) 123 return; 124 125 cnt->idx = event_to_global_idx(cnt); 126 wrmsr(pmu.msr_global_ctl, rdmsr(pmu.msr_global_ctl) | BIT_ULL(cnt->idx)); 127 } 128 129 static void global_disable(pmu_counter_t *cnt) 130 { 131 if (!this_cpu_has_perf_global_ctrl()) 132 return; 133 134 wrmsr(pmu.msr_global_ctl, rdmsr(pmu.msr_global_ctl) & ~BIT_ULL(cnt->idx)); 135 } 136 137 static void __start_event(pmu_counter_t *evt, uint64_t count) 138 { 139 evt->count = count; 140 wrmsr(evt->ctr, evt->count); 141 if (is_gp(evt)) { 142 wrmsr(MSR_GP_EVENT_SELECTx(event_to_global_idx(evt)), 143 evt->config | EVNTSEL_EN); 144 } else { 145 uint32_t ctrl = rdmsr(MSR_CORE_PERF_FIXED_CTR_CTRL); 146 int shift = (evt->ctr - MSR_CORE_PERF_FIXED_CTR0) * 4; 147 uint32_t usrospmi = 0; 148 149 if (evt->config & EVNTSEL_OS) 150 usrospmi |= (1 << 0); 151 if (evt->config & EVNTSEL_USR) 152 usrospmi |= (1 << 1); 153 if (evt->config & EVNTSEL_INT) 154 usrospmi |= (1 << 3); // PMI on overflow 155 ctrl = (ctrl & ~(0xf << shift)) | (usrospmi << shift); 156 wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, ctrl); 157 } 158 global_enable(evt); 159 apic_write(APIC_LVTPC, PMI_VECTOR); 160 } 161 162 static void start_event(pmu_counter_t *evt) 163 { 164 __start_event(evt, 0); 165 } 166 167 static void stop_event(pmu_counter_t *evt) 168 { 169 global_disable(evt); 170 if (is_gp(evt)) { 171 wrmsr(MSR_GP_EVENT_SELECTx(event_to_global_idx(evt)), 172 evt->config & ~EVNTSEL_EN); 173 } else { 174 uint32_t ctrl = rdmsr(MSR_CORE_PERF_FIXED_CTR_CTRL); 175 int shift = (evt->ctr - MSR_CORE_PERF_FIXED_CTR0) * 4; 176 wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, ctrl & ~(0xf << shift)); 177 } 178 evt->count = rdmsr(evt->ctr); 179 } 180 181 static noinline void measure_many(pmu_counter_t *evt, int count) 182 { 183 int i; 184 for (i = 0; i < count; i++) 185 start_event(&evt[i]); 186 loop(); 187 for (i = 0; i < count; i++) 188 stop_event(&evt[i]); 189 } 190 191 static void measure_one(pmu_counter_t *evt) 192 { 193 measure_many(evt, 1); 194 } 195 196 static noinline void __measure(pmu_counter_t *evt, uint64_t count) 197 { 198 __start_event(evt, count); 199 loop(); 200 stop_event(evt); 201 } 202 203 static bool verify_event(uint64_t count, struct pmu_event *e) 204 { 205 // printf("%d <= %ld <= %d\n", e->min, count, e->max); 206 return count >= e->min && count <= e->max; 207 208 } 209 210 static bool verify_counter(pmu_counter_t *cnt) 211 { 212 return verify_event(cnt->count, get_counter_event(cnt)); 213 } 214 215 static void check_gp_counter(struct pmu_event *evt) 216 { 217 pmu_counter_t cnt = { 218 .config = EVNTSEL_OS | EVNTSEL_USR | evt->unit_sel, 219 }; 220 int i; 221 222 for (i = 0; i < pmu.nr_gp_counters; i++) { 223 cnt.ctr = MSR_GP_COUNTERx(i); 224 measure_one(&cnt); 225 report(verify_event(cnt.count, evt), "%s-%d", evt->name, i); 226 } 227 } 228 229 static void check_gp_counters(void) 230 { 231 int i; 232 233 for (i = 0; i < gp_events_size; i++) 234 if (pmu_gp_counter_is_available(i)) 235 check_gp_counter(&gp_events[i]); 236 else 237 printf("GP event '%s' is disabled\n", 238 gp_events[i].name); 239 } 240 241 static void check_fixed_counters(void) 242 { 243 pmu_counter_t cnt = { 244 .config = EVNTSEL_OS | EVNTSEL_USR, 245 }; 246 int i; 247 248 for (i = 0; i < pmu.nr_fixed_counters; i++) { 249 cnt.ctr = fixed_events[i].unit_sel; 250 measure_one(&cnt); 251 report(verify_event(cnt.count, &fixed_events[i]), "fixed-%d", i); 252 } 253 } 254 255 static void check_counters_many(void) 256 { 257 pmu_counter_t cnt[10]; 258 int i, n; 259 260 for (i = 0, n = 0; n < pmu.nr_gp_counters; i++) { 261 if (!pmu_gp_counter_is_available(i)) 262 continue; 263 264 cnt[n].ctr = MSR_GP_COUNTERx(n); 265 cnt[n].config = EVNTSEL_OS | EVNTSEL_USR | 266 gp_events[i % gp_events_size].unit_sel; 267 n++; 268 } 269 for (i = 0; i < pmu.nr_fixed_counters; i++) { 270 cnt[n].ctr = fixed_events[i].unit_sel; 271 cnt[n].config = EVNTSEL_OS | EVNTSEL_USR; 272 n++; 273 } 274 275 measure_many(cnt, n); 276 277 for (i = 0; i < n; i++) 278 if (!verify_counter(&cnt[i])) 279 break; 280 281 report(i == n, "all counters"); 282 } 283 284 static uint64_t measure_for_overflow(pmu_counter_t *cnt) 285 { 286 __measure(cnt, 0); 287 /* 288 * To generate overflow, i.e. roll over to '0', the initial count just 289 * needs to be preset to the negative expected count. However, as per 290 * Intel's SDM, the preset count needs to be incremented by 1 to ensure 291 * the overflow interrupt is generated immediately instead of possibly 292 * waiting for the overflow to propagate through the counter. 293 */ 294 assert(cnt->count > 1); 295 return 1 - cnt->count; 296 } 297 298 static void check_counter_overflow(void) 299 { 300 uint64_t overflow_preset; 301 int i; 302 pmu_counter_t cnt = { 303 .ctr = MSR_GP_COUNTERx(0), 304 .config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel /* instructions */, 305 }; 306 overflow_preset = measure_for_overflow(&cnt); 307 308 /* clear status before test */ 309 if (this_cpu_has_perf_global_status()) 310 pmu_clear_global_status(); 311 312 report_prefix_push("overflow"); 313 314 for (i = 0; i < pmu.nr_gp_counters + 1; i++) { 315 uint64_t status; 316 int idx; 317 318 cnt.count = overflow_preset; 319 if (pmu_use_full_writes()) 320 cnt.count &= (1ull << pmu.gp_counter_width) - 1; 321 322 if (i == pmu.nr_gp_counters) { 323 if (!pmu.is_intel) 324 break; 325 326 cnt.ctr = fixed_events[0].unit_sel; 327 cnt.count = measure_for_overflow(&cnt); 328 cnt.count &= (1ull << pmu.gp_counter_width) - 1; 329 } else { 330 cnt.ctr = MSR_GP_COUNTERx(i); 331 } 332 333 if (i % 2) 334 cnt.config |= EVNTSEL_INT; 335 else 336 cnt.config &= ~EVNTSEL_INT; 337 idx = event_to_global_idx(&cnt); 338 __measure(&cnt, cnt.count); 339 if (pmu.is_intel) 340 report(cnt.count == 1, "cntr-%d", i); 341 else 342 report(cnt.count == 0xffffffffffff || cnt.count < 7, "cntr-%d", i); 343 344 if (!this_cpu_has_perf_global_status()) 345 continue; 346 347 status = rdmsr(pmu.msr_global_status); 348 report(status & (1ull << idx), "status-%d", i); 349 wrmsr(pmu.msr_global_status_clr, status); 350 status = rdmsr(pmu.msr_global_status); 351 report(!(status & (1ull << idx)), "status clear-%d", i); 352 report(check_irq() == (i % 2), "irq-%d", i); 353 } 354 355 report_prefix_pop(); 356 } 357 358 static void check_gp_counter_cmask(void) 359 { 360 pmu_counter_t cnt = { 361 .ctr = MSR_GP_COUNTERx(0), 362 .config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel /* instructions */, 363 }; 364 cnt.config |= (0x2 << EVNTSEL_CMASK_SHIFT); 365 measure_one(&cnt); 366 report(cnt.count < gp_events[1].min, "cmask"); 367 } 368 369 static void do_rdpmc_fast(void *ptr) 370 { 371 pmu_counter_t *cnt = ptr; 372 uint32_t idx = (uint32_t)cnt->idx | (1u << 31); 373 374 if (!is_gp(cnt)) 375 idx |= 1 << 30; 376 377 cnt->count = rdpmc(idx); 378 } 379 380 381 static void check_rdpmc(void) 382 { 383 uint64_t val = 0xff0123456789ull; 384 bool exc; 385 int i; 386 387 report_prefix_push("rdpmc"); 388 389 for (i = 0; i < pmu.nr_gp_counters; i++) { 390 uint64_t x; 391 pmu_counter_t cnt = { 392 .ctr = MSR_GP_COUNTERx(i), 393 .idx = i 394 }; 395 396 /* 397 * Without full-width writes, only the low 32 bits are writable, 398 * and the value is sign-extended. 399 */ 400 if (pmu.msr_gp_counter_base == MSR_IA32_PERFCTR0) 401 x = (uint64_t)(int64_t)(int32_t)val; 402 else 403 x = (uint64_t)(int64_t)val; 404 405 /* Mask according to the number of supported bits */ 406 x &= (1ull << pmu.gp_counter_width) - 1; 407 408 wrmsr(MSR_GP_COUNTERx(i), val); 409 report(rdpmc(i) == x, "cntr-%d", i); 410 411 exc = test_for_exception(GP_VECTOR, do_rdpmc_fast, &cnt); 412 if (exc) 413 report_skip("fast-%d", i); 414 else 415 report(cnt.count == (u32)val, "fast-%d", i); 416 } 417 for (i = 0; i < pmu.nr_fixed_counters; i++) { 418 uint64_t x = val & ((1ull << pmu.fixed_counter_width) - 1); 419 pmu_counter_t cnt = { 420 .ctr = MSR_CORE_PERF_FIXED_CTR0 + i, 421 .idx = i 422 }; 423 424 wrmsr(MSR_PERF_FIXED_CTRx(i), x); 425 report(rdpmc(i | (1 << 30)) == x, "fixed cntr-%d", i); 426 427 exc = test_for_exception(GP_VECTOR, do_rdpmc_fast, &cnt); 428 if (exc) 429 report_skip("fixed fast-%d", i); 430 else 431 report(cnt.count == (u32)x, "fixed fast-%d", i); 432 } 433 434 report_prefix_pop(); 435 } 436 437 static void check_running_counter_wrmsr(void) 438 { 439 uint64_t status; 440 uint64_t count; 441 pmu_counter_t evt = { 442 .ctr = MSR_GP_COUNTERx(0), 443 .config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel, 444 }; 445 446 report_prefix_push("running counter wrmsr"); 447 448 start_event(&evt); 449 loop(); 450 wrmsr(MSR_GP_COUNTERx(0), 0); 451 stop_event(&evt); 452 report(evt.count < gp_events[1].min, "cntr"); 453 454 /* clear status before overflow test */ 455 if (this_cpu_has_perf_global_status()) 456 pmu_clear_global_status(); 457 458 start_event(&evt); 459 460 count = -1; 461 if (pmu_use_full_writes()) 462 count &= (1ull << pmu.gp_counter_width) - 1; 463 464 wrmsr(MSR_GP_COUNTERx(0), count); 465 466 loop(); 467 stop_event(&evt); 468 469 if (this_cpu_has_perf_global_status()) { 470 status = rdmsr(pmu.msr_global_status); 471 report(status & 1, "status msr bit"); 472 } 473 474 report_prefix_pop(); 475 } 476 477 static void check_emulated_instr(void) 478 { 479 uint64_t status, instr_start, brnch_start; 480 uint64_t gp_counter_width = (1ull << pmu.gp_counter_width) - 1; 481 unsigned int branch_idx = pmu.is_intel ? 5 : 2; 482 pmu_counter_t brnch_cnt = { 483 .ctr = MSR_GP_COUNTERx(0), 484 /* branch instructions */ 485 .config = EVNTSEL_OS | EVNTSEL_USR | gp_events[branch_idx].unit_sel, 486 }; 487 pmu_counter_t instr_cnt = { 488 .ctr = MSR_GP_COUNTERx(1), 489 /* instructions */ 490 .config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel, 491 }; 492 report_prefix_push("emulated instruction"); 493 494 if (this_cpu_has_perf_global_status()) 495 pmu_clear_global_status(); 496 497 start_event(&brnch_cnt); 498 start_event(&instr_cnt); 499 500 brnch_start = -EXPECTED_BRNCH; 501 instr_start = -EXPECTED_INSTR; 502 wrmsr(MSR_GP_COUNTERx(0), brnch_start & gp_counter_width); 503 wrmsr(MSR_GP_COUNTERx(1), instr_start & gp_counter_width); 504 // KVM_FEP is a magic prefix that forces emulation so 505 // 'KVM_FEP "jne label\n"' just counts as a single instruction. 506 asm volatile( 507 "mov $0x0, %%eax\n" 508 "cmp $0x0, %%eax\n" 509 KVM_FEP "jne label\n" 510 KVM_FEP "jne label\n" 511 KVM_FEP "jne label\n" 512 KVM_FEP "jne label\n" 513 KVM_FEP "jne label\n" 514 "mov $0xa, %%eax\n" 515 "cpuid\n" 516 "mov $0xa, %%eax\n" 517 "cpuid\n" 518 "mov $0xa, %%eax\n" 519 "cpuid\n" 520 "mov $0xa, %%eax\n" 521 "cpuid\n" 522 "mov $0xa, %%eax\n" 523 "cpuid\n" 524 "label:\n" 525 : 526 : 527 : "eax", "ebx", "ecx", "edx"); 528 529 if (this_cpu_has_perf_global_ctrl()) 530 wrmsr(pmu.msr_global_ctl, 0); 531 532 stop_event(&brnch_cnt); 533 stop_event(&instr_cnt); 534 535 // Check that the end count - start count is at least the expected 536 // number of instructions and branches. 537 report(instr_cnt.count - instr_start >= EXPECTED_INSTR, 538 "instruction count"); 539 report(brnch_cnt.count - brnch_start >= EXPECTED_BRNCH, 540 "branch count"); 541 if (this_cpu_has_perf_global_status()) { 542 // Additionally check that those counters overflowed properly. 543 status = rdmsr(pmu.msr_global_status); 544 report(status & 1, "branch counter overflow"); 545 report(status & 2, "instruction counter overflow"); 546 } 547 548 report_prefix_pop(); 549 } 550 551 #define XBEGIN_STARTED (~0u) 552 static void check_tsx_cycles(void) 553 { 554 pmu_counter_t cnt; 555 unsigned int i, ret = 0; 556 557 if (!this_cpu_has(X86_FEATURE_RTM)) 558 return; 559 560 report_prefix_push("TSX cycles"); 561 562 for (i = 0; i < pmu.nr_gp_counters; i++) { 563 cnt.ctr = MSR_GP_COUNTERx(i); 564 565 if (i == 2) { 566 /* Transactional cycles commited only on gp counter 2 */ 567 cnt.config = EVNTSEL_OS | EVNTSEL_USR | 0x30000003c; 568 } else { 569 /* Transactional cycles */ 570 cnt.config = EVNTSEL_OS | EVNTSEL_USR | 0x10000003c; 571 } 572 573 start_event(&cnt); 574 575 asm volatile("xbegin 1f\n\t" 576 "1:\n\t" 577 : "+a" (ret) :: "memory"); 578 579 /* Generate a non-canonical #GP to trigger ABORT. */ 580 if (ret == XBEGIN_STARTED) 581 *(int *)NONCANONICAL = 0; 582 583 stop_event(&cnt); 584 585 report(cnt.count > 0, "gp cntr-%d with a value of %" PRId64 "", i, cnt.count); 586 } 587 588 report_prefix_pop(); 589 } 590 591 static void check_counters(void) 592 { 593 if (is_fep_available()) 594 check_emulated_instr(); 595 596 check_gp_counters(); 597 check_fixed_counters(); 598 check_rdpmc(); 599 check_counters_many(); 600 check_counter_overflow(); 601 check_gp_counter_cmask(); 602 check_running_counter_wrmsr(); 603 check_tsx_cycles(); 604 } 605 606 static void do_unsupported_width_counter_write(void *index) 607 { 608 wrmsr(MSR_IA32_PMC0 + *((int *) index), 0xffffff0123456789ull); 609 } 610 611 static void check_gp_counters_write_width(void) 612 { 613 u64 val_64 = 0xffffff0123456789ull; 614 u64 val_32 = val_64 & ((1ull << 32) - 1); 615 u64 val_max_width = val_64 & ((1ull << pmu.gp_counter_width) - 1); 616 int i; 617 618 /* 619 * MSR_IA32_PERFCTRn supports 64-bit writes, 620 * but only the lowest 32 bits are valid. 621 */ 622 for (i = 0; i < pmu.nr_gp_counters; i++) { 623 wrmsr(MSR_IA32_PERFCTR0 + i, val_32); 624 assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32); 625 assert(rdmsr(MSR_IA32_PMC0 + i) == val_32); 626 627 wrmsr(MSR_IA32_PERFCTR0 + i, val_max_width); 628 assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32); 629 assert(rdmsr(MSR_IA32_PMC0 + i) == val_32); 630 631 wrmsr(MSR_IA32_PERFCTR0 + i, val_64); 632 assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32); 633 assert(rdmsr(MSR_IA32_PMC0 + i) == val_32); 634 } 635 636 /* 637 * MSR_IA32_PMCn supports writing values up to GP counter width, 638 * and only the lowest bits of GP counter width are valid. 639 */ 640 for (i = 0; i < pmu.nr_gp_counters; i++) { 641 wrmsr(MSR_IA32_PMC0 + i, val_32); 642 assert(rdmsr(MSR_IA32_PMC0 + i) == val_32); 643 assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32); 644 645 wrmsr(MSR_IA32_PMC0 + i, val_max_width); 646 assert(rdmsr(MSR_IA32_PMC0 + i) == val_max_width); 647 assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_max_width); 648 649 report(test_for_exception(GP_VECTOR, 650 do_unsupported_width_counter_write, &i), 651 "writing unsupported width to MSR_IA32_PMC%d raises #GP", i); 652 } 653 } 654 655 /* 656 * Per the SDM, reference cycles are currently implemented using the 657 * core crystal clock, TSC, or bus clock. Calibrate to the TSC 658 * frequency to set reasonable expectations. 659 */ 660 static void set_ref_cycle_expectations(void) 661 { 662 pmu_counter_t cnt = { 663 .ctr = MSR_IA32_PERFCTR0, 664 .config = EVNTSEL_OS | EVNTSEL_USR | intel_gp_events[2].unit_sel, 665 }; 666 uint64_t tsc_delta; 667 uint64_t t0, t1, t2, t3; 668 669 /* Bit 2 enumerates the availability of reference cycles events. */ 670 if (!pmu.nr_gp_counters || !pmu_gp_counter_is_available(2)) 671 return; 672 673 if (this_cpu_has_perf_global_ctrl()) 674 wrmsr(pmu.msr_global_ctl, 0); 675 676 t0 = fenced_rdtsc(); 677 start_event(&cnt); 678 t1 = fenced_rdtsc(); 679 680 /* 681 * This loop has to run long enough to dominate the VM-exit 682 * costs for playing with the PMU MSRs on start and stop. 683 * 684 * On a 2.6GHz Ice Lake, with the TSC frequency at 104 times 685 * the core crystal clock, this function calculated a guest 686 * TSC : ref cycles ratio of around 105 with ECX initialized 687 * to one billion. 688 */ 689 asm volatile("loop ." : "+c"((int){1000000000ull})); 690 691 t2 = fenced_rdtsc(); 692 stop_event(&cnt); 693 t3 = fenced_rdtsc(); 694 695 tsc_delta = ((t2 - t1) + (t3 - t0)) / 2; 696 697 if (!tsc_delta) 698 return; 699 700 intel_gp_events[2].min = (intel_gp_events[2].min * cnt.count) / tsc_delta; 701 intel_gp_events[2].max = (intel_gp_events[2].max * cnt.count) / tsc_delta; 702 } 703 704 static void check_invalid_rdpmc_gp(void) 705 { 706 uint64_t val; 707 708 report(rdpmc_safe(64, &val) == GP_VECTOR, 709 "Expected #GP on RDPMC(64)"); 710 } 711 712 int main(int ac, char **av) 713 { 714 setup_vm(); 715 handle_irq(PMI_VECTOR, cnt_overflow); 716 buf = malloc(N*64); 717 718 check_invalid_rdpmc_gp(); 719 720 if (pmu.is_intel) { 721 if (!pmu.version) { 722 report_skip("No Intel Arch PMU is detected!"); 723 return report_summary(); 724 } 725 gp_events = (struct pmu_event *)intel_gp_events; 726 gp_events_size = sizeof(intel_gp_events)/sizeof(intel_gp_events[0]); 727 report_prefix_push("Intel"); 728 set_ref_cycle_expectations(); 729 } else { 730 gp_events_size = sizeof(amd_gp_events)/sizeof(amd_gp_events[0]); 731 gp_events = (struct pmu_event *)amd_gp_events; 732 report_prefix_push("AMD"); 733 } 734 735 printf("PMU version: %d\n", pmu.version); 736 printf("GP counters: %d\n", pmu.nr_gp_counters); 737 printf("GP counter width: %d\n", pmu.gp_counter_width); 738 printf("Mask length: %d\n", pmu.gp_counter_mask_length); 739 printf("Fixed counters: %d\n", pmu.nr_fixed_counters); 740 printf("Fixed counter width: %d\n", pmu.fixed_counter_width); 741 742 apic_write(APIC_LVTPC, PMI_VECTOR); 743 744 check_counters(); 745 746 if (pmu_has_full_writes()) { 747 pmu.msr_gp_counter_base = MSR_IA32_PMC0; 748 749 report_prefix_push("full-width writes"); 750 check_counters(); 751 check_gp_counters_write_width(); 752 report_prefix_pop(); 753 } 754 755 if (!pmu.is_intel) { 756 report_prefix_push("K7"); 757 pmu.nr_gp_counters = AMD64_NUM_COUNTERS; 758 pmu.msr_gp_counter_base = MSR_K7_PERFCTR0; 759 pmu.msr_gp_event_select_base = MSR_K7_EVNTSEL0; 760 check_counters(); 761 report_prefix_pop(); 762 } 763 764 return report_summary(); 765 } 766