1 2 #include "x86/msr.h" 3 #include "x86/processor.h" 4 #include "x86/pmu.h" 5 #include "x86/apic-defs.h" 6 #include "x86/apic.h" 7 #include "x86/desc.h" 8 #include "x86/isr.h" 9 #include "vmalloc.h" 10 #include "alloc.h" 11 12 #include "libcflat.h" 13 #include <stdint.h> 14 15 #define N 1000000 16 17 // These values match the number of instructions and branches in the 18 // assembly block in check_emulated_instr(). 19 #define EXPECTED_INSTR 17 20 #define EXPECTED_BRNCH 5 21 22 /* Enable GLOBAL_CTRL + disable GLOBAL_CTRL instructions */ 23 #define EXTRA_INSNS (3 + 3) 24 #define LOOP_INSNS (N * 10 + EXTRA_INSNS) 25 #define LOOP_BRANCHES (N) 26 #define LOOP_ASM(_wrmsr) \ 27 _wrmsr "\n\t" \ 28 "mov %%ecx, %%edi; mov %%ebx, %%ecx;\n\t" \ 29 "1: mov (%1), %2; add $64, %1;\n\t" \ 30 "nop; nop; nop; nop; nop; nop; nop;\n\t" \ 31 "loop 1b;\n\t" \ 32 "mov %%edi, %%ecx; xor %%eax, %%eax; xor %%edx, %%edx;\n\t" \ 33 _wrmsr "\n\t" 34 35 typedef struct { 36 uint32_t ctr; 37 uint32_t idx; 38 uint64_t config; 39 uint64_t count; 40 } pmu_counter_t; 41 42 struct pmu_event { 43 const char *name; 44 uint32_t unit_sel; 45 int min; 46 int max; 47 } intel_gp_events[] = { 48 {"core cycles", 0x003c, 1*N, 50*N}, 49 {"instructions", 0x00c0, 10*N, 10.2*N}, 50 {"ref cycles", 0x013c, 1*N, 30*N}, 51 {"llc references", 0x4f2e, 1, 2*N}, 52 {"llc misses", 0x412e, 1, 1*N}, 53 {"branches", 0x00c4, 1*N, 1.1*N}, 54 {"branch misses", 0x00c5, 0, 0.1*N}, 55 }, amd_gp_events[] = { 56 {"core cycles", 0x0076, 1*N, 50*N}, 57 {"instructions", 0x00c0, 10*N, 10.2*N}, 58 {"branches", 0x00c2, 1*N, 1.1*N}, 59 {"branch misses", 0x00c3, 0, 0.1*N}, 60 }, fixed_events[] = { 61 {"fixed 0", MSR_CORE_PERF_FIXED_CTR0, 10*N, 10.2*N}, 62 {"fixed 1", MSR_CORE_PERF_FIXED_CTR0 + 1, 1*N, 30*N}, 63 {"fixed 2", MSR_CORE_PERF_FIXED_CTR0 + 2, 0.1*N, 30*N} 64 }; 65 66 /* 67 * Events index in intel_gp_events[], ensure consistent with 68 * intel_gp_events[]. 69 */ 70 enum { 71 INTEL_INSTRUCTIONS_IDX = 1, 72 INTEL_REF_CYCLES_IDX = 2, 73 INTEL_BRANCHES_IDX = 5, 74 }; 75 76 /* 77 * Events index in amd_gp_events[], ensure consistent with 78 * amd_gp_events[]. 79 */ 80 enum { 81 AMD_INSTRUCTIONS_IDX = 1, 82 AMD_BRANCHES_IDX = 2, 83 }; 84 85 char *buf; 86 87 static struct pmu_event *gp_events; 88 static unsigned int gp_events_size; 89 static unsigned int fixed_counters_num; 90 91 92 static inline void __loop(void) 93 { 94 unsigned long tmp, tmp2, tmp3; 95 96 asm volatile(LOOP_ASM("nop") 97 : "=c"(tmp), "=r"(tmp2), "=r"(tmp3) 98 : "0"(N), "1"(buf)); 99 } 100 101 /* 102 * Enable and disable counters in a whole asm blob to ensure 103 * no other instructions are counted in the window between 104 * counters enabling and really LOOP_ASM code executing. 105 * Thus counters can verify instructions and branches events 106 * against precise counts instead of a rough valid count range. 107 */ 108 static inline void __precise_loop(u64 cntrs) 109 { 110 unsigned long tmp, tmp2, tmp3; 111 unsigned int global_ctl = pmu.msr_global_ctl; 112 u32 eax = cntrs & (BIT_ULL(32) - 1); 113 u32 edx = cntrs >> 32; 114 115 asm volatile(LOOP_ASM("wrmsr") 116 : "=b"(tmp), "=r"(tmp2), "=r"(tmp3) 117 : "a"(eax), "d"(edx), "c"(global_ctl), 118 "0"(N), "1"(buf) 119 : "edi"); 120 } 121 122 static inline void loop(u64 cntrs) 123 { 124 if (!this_cpu_has_perf_global_ctrl()) 125 __loop(); 126 else 127 __precise_loop(cntrs); 128 } 129 130 static void adjust_events_range(struct pmu_event *gp_events, 131 int instruction_idx, int branch_idx) 132 { 133 /* 134 * If HW supports GLOBAL_CTRL MSR, enabling and disabling PMCs are 135 * moved in __precise_loop(). Thus, instructions and branches events 136 * can be verified against a precise count instead of a rough range. 137 * 138 * Skip the precise checks on AMD, as AMD CPUs count VMRUN as a branch 139 * instruction in guest context, which* leads to intermittent failures 140 * as the counts will vary depending on how many asynchronous VM-Exits 141 * occur while running the measured code, e.g. if the host takes IRQs. 142 */ 143 if (pmu.is_intel && this_cpu_has_perf_global_ctrl()) { 144 gp_events[instruction_idx].min = LOOP_INSNS; 145 gp_events[instruction_idx].max = LOOP_INSNS; 146 gp_events[branch_idx].min = LOOP_BRANCHES; 147 gp_events[branch_idx].max = LOOP_BRANCHES; 148 } 149 } 150 151 volatile uint64_t irq_received; 152 153 static void cnt_overflow(isr_regs_t *regs) 154 { 155 irq_received++; 156 apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); 157 apic_write(APIC_EOI, 0); 158 } 159 160 static bool check_irq(void) 161 { 162 int i; 163 irq_received = 0; 164 sti(); 165 for (i = 0; i < 100000 && !irq_received; i++) 166 asm volatile("pause"); 167 cli(); 168 return irq_received; 169 } 170 171 static bool is_gp(pmu_counter_t *evt) 172 { 173 if (!pmu.is_intel) 174 return true; 175 176 return evt->ctr < MSR_CORE_PERF_FIXED_CTR0 || 177 evt->ctr >= MSR_IA32_PMC0; 178 } 179 180 static int event_to_global_idx(pmu_counter_t *cnt) 181 { 182 if (pmu.is_intel) 183 return cnt->ctr - (is_gp(cnt) ? pmu.msr_gp_counter_base : 184 (MSR_CORE_PERF_FIXED_CTR0 - FIXED_CNT_INDEX)); 185 186 if (pmu.msr_gp_counter_base == MSR_F15H_PERF_CTR0) 187 return (cnt->ctr - pmu.msr_gp_counter_base) / 2; 188 else 189 return cnt->ctr - pmu.msr_gp_counter_base; 190 } 191 192 static struct pmu_event* get_counter_event(pmu_counter_t *cnt) 193 { 194 if (is_gp(cnt)) { 195 int i; 196 197 for (i = 0; i < gp_events_size; i++) 198 if (gp_events[i].unit_sel == (cnt->config & 0xffff)) 199 return &gp_events[i]; 200 } else { 201 unsigned int idx = cnt->ctr - MSR_CORE_PERF_FIXED_CTR0; 202 203 if (idx < ARRAY_SIZE(fixed_events)) 204 return &fixed_events[idx]; 205 } 206 207 return (void*)0; 208 } 209 210 static void global_enable(pmu_counter_t *cnt) 211 { 212 if (!this_cpu_has_perf_global_ctrl()) 213 return; 214 215 cnt->idx = event_to_global_idx(cnt); 216 wrmsr(pmu.msr_global_ctl, rdmsr(pmu.msr_global_ctl) | BIT_ULL(cnt->idx)); 217 } 218 219 static void global_disable(pmu_counter_t *cnt) 220 { 221 if (!this_cpu_has_perf_global_ctrl()) 222 return; 223 224 wrmsr(pmu.msr_global_ctl, rdmsr(pmu.msr_global_ctl) & ~BIT_ULL(cnt->idx)); 225 } 226 227 static void __start_event(pmu_counter_t *evt, uint64_t count) 228 { 229 evt->count = count; 230 wrmsr(evt->ctr, evt->count); 231 if (is_gp(evt)) { 232 wrmsr(MSR_GP_EVENT_SELECTx(event_to_global_idx(evt)), 233 evt->config | EVNTSEL_EN); 234 } else { 235 uint32_t ctrl = rdmsr(MSR_CORE_PERF_FIXED_CTR_CTRL); 236 int shift = (evt->ctr - MSR_CORE_PERF_FIXED_CTR0) * 4; 237 uint32_t usrospmi = 0; 238 239 if (evt->config & EVNTSEL_OS) 240 usrospmi |= (1 << 0); 241 if (evt->config & EVNTSEL_USR) 242 usrospmi |= (1 << 1); 243 if (evt->config & EVNTSEL_INT) 244 usrospmi |= (1 << 3); // PMI on overflow 245 ctrl = (ctrl & ~(0xf << shift)) | (usrospmi << shift); 246 wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, ctrl); 247 } 248 apic_write(APIC_LVTPC, PMI_VECTOR); 249 } 250 251 static void start_event(pmu_counter_t *evt) 252 { 253 __start_event(evt, 0); 254 global_enable(evt); 255 } 256 257 static void __stop_event(pmu_counter_t *evt) 258 { 259 if (is_gp(evt)) { 260 wrmsr(MSR_GP_EVENT_SELECTx(event_to_global_idx(evt)), 261 evt->config & ~EVNTSEL_EN); 262 } else { 263 uint32_t ctrl = rdmsr(MSR_CORE_PERF_FIXED_CTR_CTRL); 264 int shift = (evt->ctr - MSR_CORE_PERF_FIXED_CTR0) * 4; 265 wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, ctrl & ~(0xf << shift)); 266 } 267 evt->count = rdmsr(evt->ctr); 268 } 269 270 static void stop_event(pmu_counter_t *evt) 271 { 272 global_disable(evt); 273 __stop_event(evt); 274 } 275 276 static noinline void measure_many(pmu_counter_t *evt, int count) 277 { 278 int i; 279 u64 cntrs = 0; 280 281 for (i = 0; i < count; i++) { 282 __start_event(&evt[i], 0); 283 cntrs |= BIT_ULL(event_to_global_idx(&evt[i])); 284 } 285 loop(cntrs); 286 for (i = 0; i < count; i++) 287 __stop_event(&evt[i]); 288 } 289 290 static void measure_one(pmu_counter_t *evt) 291 { 292 measure_many(evt, 1); 293 } 294 295 static noinline void __measure(pmu_counter_t *evt, uint64_t count) 296 { 297 u64 cntrs = BIT_ULL(event_to_global_idx(evt)); 298 299 __start_event(evt, count); 300 loop(cntrs); 301 __stop_event(evt); 302 } 303 304 static bool verify_event(uint64_t count, struct pmu_event *e) 305 { 306 bool pass; 307 308 if (!e) 309 return false; 310 311 pass = count >= e->min && count <= e->max; 312 if (!pass) 313 printf("FAIL: %d <= %"PRId64" <= %d\n", e->min, count, e->max); 314 315 return pass; 316 } 317 318 static bool verify_counter(pmu_counter_t *cnt) 319 { 320 return verify_event(cnt->count, get_counter_event(cnt)); 321 } 322 323 static void check_gp_counter(struct pmu_event *evt) 324 { 325 pmu_counter_t cnt = { 326 .config = EVNTSEL_OS | EVNTSEL_USR | evt->unit_sel, 327 }; 328 int i; 329 330 for (i = 0; i < pmu.nr_gp_counters; i++) { 331 cnt.ctr = MSR_GP_COUNTERx(i); 332 measure_one(&cnt); 333 report(verify_event(cnt.count, evt), "%s-%d", evt->name, i); 334 } 335 } 336 337 static void check_gp_counters(void) 338 { 339 int i; 340 341 for (i = 0; i < gp_events_size; i++) 342 if (pmu_gp_counter_is_available(i)) 343 check_gp_counter(&gp_events[i]); 344 else 345 printf("GP event '%s' is disabled\n", 346 gp_events[i].name); 347 } 348 349 static void check_fixed_counters(void) 350 { 351 pmu_counter_t cnt = { 352 .config = EVNTSEL_OS | EVNTSEL_USR, 353 }; 354 int i; 355 356 for (i = 0; i < fixed_counters_num; i++) { 357 cnt.ctr = fixed_events[i].unit_sel; 358 measure_one(&cnt); 359 report(verify_event(cnt.count, &fixed_events[i]), "fixed-%d", i); 360 } 361 } 362 363 static void check_counters_many(void) 364 { 365 pmu_counter_t cnt[48]; 366 int i, n; 367 368 for (i = 0, n = 0; n < pmu.nr_gp_counters; i++) { 369 if (!pmu_gp_counter_is_available(i)) 370 continue; 371 372 cnt[n].ctr = MSR_GP_COUNTERx(n); 373 cnt[n].config = EVNTSEL_OS | EVNTSEL_USR | 374 gp_events[i % gp_events_size].unit_sel; 375 n++; 376 } 377 for (i = 0; i < fixed_counters_num; i++) { 378 cnt[n].ctr = fixed_events[i].unit_sel; 379 cnt[n].config = EVNTSEL_OS | EVNTSEL_USR; 380 n++; 381 } 382 383 assert(n <= ARRAY_SIZE(cnt)); 384 measure_many(cnt, n); 385 386 for (i = 0; i < n; i++) 387 if (!verify_counter(&cnt[i])) 388 break; 389 390 report(i == n, "all counters"); 391 } 392 393 static uint64_t measure_for_overflow(pmu_counter_t *cnt) 394 { 395 __measure(cnt, 0); 396 /* 397 * To generate overflow, i.e. roll over to '0', the initial count just 398 * needs to be preset to the negative expected count. However, as per 399 * Intel's SDM, the preset count needs to be incremented by 1 to ensure 400 * the overflow interrupt is generated immediately instead of possibly 401 * waiting for the overflow to propagate through the counter. 402 */ 403 assert(cnt->count > 1); 404 return 1 - cnt->count; 405 } 406 407 static void check_counter_overflow(void) 408 { 409 int i; 410 uint64_t overflow_preset; 411 int instruction_idx = pmu.is_intel ? 412 INTEL_INSTRUCTIONS_IDX : 413 AMD_INSTRUCTIONS_IDX; 414 415 pmu_counter_t cnt = { 416 .ctr = MSR_GP_COUNTERx(0), 417 .config = EVNTSEL_OS | EVNTSEL_USR | 418 gp_events[instruction_idx].unit_sel /* instructions */, 419 }; 420 overflow_preset = measure_for_overflow(&cnt); 421 422 /* clear status before test */ 423 if (this_cpu_has_perf_global_status()) 424 pmu_clear_global_status(); 425 426 report_prefix_push("overflow"); 427 428 for (i = 0; i < pmu.nr_gp_counters + 1; i++) { 429 uint64_t status; 430 int idx; 431 432 cnt.count = overflow_preset; 433 if (pmu_use_full_writes()) 434 cnt.count &= (1ull << pmu.gp_counter_width) - 1; 435 436 if (i == pmu.nr_gp_counters) { 437 if (!pmu.is_intel) 438 break; 439 440 cnt.ctr = fixed_events[0].unit_sel; 441 cnt.count = measure_for_overflow(&cnt); 442 cnt.count &= (1ull << pmu.gp_counter_width) - 1; 443 } else { 444 cnt.ctr = MSR_GP_COUNTERx(i); 445 } 446 447 if (i % 2) 448 cnt.config |= EVNTSEL_INT; 449 else 450 cnt.config &= ~EVNTSEL_INT; 451 idx = event_to_global_idx(&cnt); 452 __measure(&cnt, cnt.count); 453 if (pmu.is_intel) 454 report(cnt.count == 1, "cntr-%d", i); 455 else 456 report(cnt.count == 0xffffffffffff || cnt.count < 7, "cntr-%d", i); 457 458 if (!this_cpu_has_perf_global_status()) 459 continue; 460 461 status = rdmsr(pmu.msr_global_status); 462 report(status & (1ull << idx), "status-%d", i); 463 wrmsr(pmu.msr_global_status_clr, status); 464 status = rdmsr(pmu.msr_global_status); 465 report(!(status & (1ull << idx)), "status clear-%d", i); 466 report(check_irq() == (i % 2), "irq-%d", i); 467 } 468 469 report_prefix_pop(); 470 } 471 472 static void check_gp_counter_cmask(void) 473 { 474 int instruction_idx = pmu.is_intel ? 475 INTEL_INSTRUCTIONS_IDX : 476 AMD_INSTRUCTIONS_IDX; 477 478 pmu_counter_t cnt = { 479 .ctr = MSR_GP_COUNTERx(0), 480 .config = EVNTSEL_OS | EVNTSEL_USR | 481 gp_events[instruction_idx].unit_sel /* instructions */, 482 }; 483 cnt.config |= (0x2 << EVNTSEL_CMASK_SHIFT); 484 measure_one(&cnt); 485 report(cnt.count < gp_events[instruction_idx].min, "cmask"); 486 } 487 488 static void do_rdpmc_fast(void *ptr) 489 { 490 pmu_counter_t *cnt = ptr; 491 uint32_t idx = (uint32_t)cnt->idx | (1u << 31); 492 493 if (!is_gp(cnt)) 494 idx |= 1 << 30; 495 496 cnt->count = rdpmc(idx); 497 } 498 499 500 static void check_rdpmc(void) 501 { 502 uint64_t val = 0xff0123456789ull; 503 bool exc; 504 int i; 505 506 report_prefix_push("rdpmc"); 507 508 for (i = 0; i < pmu.nr_gp_counters; i++) { 509 uint64_t x; 510 pmu_counter_t cnt = { 511 .ctr = MSR_GP_COUNTERx(i), 512 .idx = i 513 }; 514 515 /* 516 * Without full-width writes, only the low 32 bits are writable, 517 * and the value is sign-extended. 518 */ 519 if (pmu.msr_gp_counter_base == MSR_IA32_PERFCTR0) 520 x = (uint64_t)(int64_t)(int32_t)val; 521 else 522 x = (uint64_t)(int64_t)val; 523 524 /* Mask according to the number of supported bits */ 525 x &= (1ull << pmu.gp_counter_width) - 1; 526 527 wrmsr(MSR_GP_COUNTERx(i), val); 528 report(rdpmc(i) == x, "cntr-%d", i); 529 530 exc = test_for_exception(GP_VECTOR, do_rdpmc_fast, &cnt); 531 if (exc) 532 report_skip("fast-%d", i); 533 else 534 report(cnt.count == (u32)val, "fast-%d", i); 535 } 536 for (i = 0; i < fixed_counters_num; i++) { 537 uint64_t x = val & ((1ull << pmu.fixed_counter_width) - 1); 538 pmu_counter_t cnt = { 539 .ctr = MSR_CORE_PERF_FIXED_CTR0 + i, 540 .idx = i 541 }; 542 543 wrmsr(MSR_PERF_FIXED_CTRx(i), x); 544 report(rdpmc(i | (1 << 30)) == x, "fixed cntr-%d", i); 545 546 exc = test_for_exception(GP_VECTOR, do_rdpmc_fast, &cnt); 547 if (exc) 548 report_skip("fixed fast-%d", i); 549 else 550 report(cnt.count == (u32)x, "fixed fast-%d", i); 551 } 552 553 report_prefix_pop(); 554 } 555 556 static void check_running_counter_wrmsr(void) 557 { 558 uint64_t status; 559 uint64_t count; 560 unsigned int instruction_idx = pmu.is_intel ? 561 INTEL_INSTRUCTIONS_IDX : 562 AMD_INSTRUCTIONS_IDX; 563 564 pmu_counter_t evt = { 565 .ctr = MSR_GP_COUNTERx(0), 566 .config = EVNTSEL_OS | EVNTSEL_USR | 567 gp_events[instruction_idx].unit_sel, 568 }; 569 570 report_prefix_push("running counter wrmsr"); 571 572 start_event(&evt); 573 __loop(); 574 wrmsr(MSR_GP_COUNTERx(0), 0); 575 stop_event(&evt); 576 report(evt.count < gp_events[instruction_idx].min, "cntr"); 577 578 /* clear status before overflow test */ 579 if (this_cpu_has_perf_global_status()) 580 pmu_clear_global_status(); 581 582 start_event(&evt); 583 584 count = -1; 585 if (pmu_use_full_writes()) 586 count &= (1ull << pmu.gp_counter_width) - 1; 587 588 wrmsr(MSR_GP_COUNTERx(0), count); 589 590 __loop(); 591 stop_event(&evt); 592 593 if (this_cpu_has_perf_global_status()) { 594 status = rdmsr(pmu.msr_global_status); 595 report(status & 1, "status msr bit"); 596 } 597 598 report_prefix_pop(); 599 } 600 601 static void check_emulated_instr(void) 602 { 603 uint64_t status, instr_start, brnch_start; 604 uint64_t gp_counter_width = (1ull << pmu.gp_counter_width) - 1; 605 unsigned int branch_idx = pmu.is_intel ? 606 INTEL_BRANCHES_IDX : AMD_BRANCHES_IDX; 607 unsigned int instruction_idx = pmu.is_intel ? 608 INTEL_INSTRUCTIONS_IDX : 609 AMD_INSTRUCTIONS_IDX; 610 pmu_counter_t brnch_cnt = { 611 .ctr = MSR_GP_COUNTERx(0), 612 /* branch instructions */ 613 .config = EVNTSEL_OS | EVNTSEL_USR | gp_events[branch_idx].unit_sel, 614 }; 615 pmu_counter_t instr_cnt = { 616 .ctr = MSR_GP_COUNTERx(1), 617 /* instructions */ 618 .config = EVNTSEL_OS | EVNTSEL_USR | gp_events[instruction_idx].unit_sel, 619 }; 620 report_prefix_push("emulated instruction"); 621 622 if (this_cpu_has_perf_global_status()) 623 pmu_clear_global_status(); 624 625 start_event(&brnch_cnt); 626 start_event(&instr_cnt); 627 628 brnch_start = -EXPECTED_BRNCH; 629 instr_start = -EXPECTED_INSTR; 630 wrmsr(MSR_GP_COUNTERx(0), brnch_start & gp_counter_width); 631 wrmsr(MSR_GP_COUNTERx(1), instr_start & gp_counter_width); 632 // KVM_FEP is a magic prefix that forces emulation so 633 // 'KVM_FEP "jne label\n"' just counts as a single instruction. 634 asm volatile( 635 "mov $0x0, %%eax\n" 636 "cmp $0x0, %%eax\n" 637 KVM_FEP "jne label\n" 638 KVM_FEP "jne label\n" 639 KVM_FEP "jne label\n" 640 KVM_FEP "jne label\n" 641 KVM_FEP "jne label\n" 642 "mov $0xa, %%eax\n" 643 "cpuid\n" 644 "mov $0xa, %%eax\n" 645 "cpuid\n" 646 "mov $0xa, %%eax\n" 647 "cpuid\n" 648 "mov $0xa, %%eax\n" 649 "cpuid\n" 650 "mov $0xa, %%eax\n" 651 "cpuid\n" 652 "label:\n" 653 : 654 : 655 : "eax", "ebx", "ecx", "edx"); 656 657 if (this_cpu_has_perf_global_ctrl()) 658 wrmsr(pmu.msr_global_ctl, 0); 659 660 stop_event(&brnch_cnt); 661 stop_event(&instr_cnt); 662 663 // Check that the end count - start count is at least the expected 664 // number of instructions and branches. 665 report(instr_cnt.count - instr_start >= EXPECTED_INSTR, 666 "instruction count"); 667 report(brnch_cnt.count - brnch_start >= EXPECTED_BRNCH, 668 "branch count"); 669 if (this_cpu_has_perf_global_status()) { 670 // Additionally check that those counters overflowed properly. 671 status = rdmsr(pmu.msr_global_status); 672 report(status & 1, "branch counter overflow"); 673 report(status & 2, "instruction counter overflow"); 674 } 675 676 report_prefix_pop(); 677 } 678 679 #define XBEGIN_STARTED (~0u) 680 static void check_tsx_cycles(void) 681 { 682 pmu_counter_t cnt; 683 unsigned int i, ret = 0; 684 685 if (!this_cpu_has(X86_FEATURE_RTM)) 686 return; 687 688 report_prefix_push("TSX cycles"); 689 690 for (i = 0; i < pmu.nr_gp_counters; i++) { 691 cnt.ctr = MSR_GP_COUNTERx(i); 692 693 if (i == 2) { 694 /* Transactional cycles committed only on gp counter 2 */ 695 cnt.config = EVNTSEL_OS | EVNTSEL_USR | 0x30000003c; 696 } else { 697 /* Transactional cycles */ 698 cnt.config = EVNTSEL_OS | EVNTSEL_USR | 0x10000003c; 699 } 700 701 start_event(&cnt); 702 703 asm volatile("xbegin 1f\n\t" 704 "1:\n\t" 705 : "+a" (ret) :: "memory"); 706 707 /* Generate a non-canonical #GP to trigger ABORT. */ 708 if (ret == XBEGIN_STARTED) 709 *(int *)NONCANONICAL = 0; 710 711 stop_event(&cnt); 712 713 report(cnt.count > 0, "gp cntr-%d with a value of %" PRId64 "", i, cnt.count); 714 } 715 716 report_prefix_pop(); 717 } 718 719 static void warm_up(void) 720 { 721 int i; 722 723 /* 724 * Since cycles event is always run as the first event, there would be 725 * a warm-up state to warm up the cache, it leads to the measured cycles 726 * value may exceed the pre-defined cycles upper boundary and cause 727 * false positive. To avoid this, introduce an warm-up state before 728 * the real verification. 729 */ 730 for (i = 0; i < 10; i++) 731 loop(0); 732 } 733 734 static void check_counters(void) 735 { 736 if (is_fep_available()) 737 check_emulated_instr(); 738 739 warm_up(); 740 check_gp_counters(); 741 check_fixed_counters(); 742 check_rdpmc(); 743 check_counters_many(); 744 check_counter_overflow(); 745 check_gp_counter_cmask(); 746 check_running_counter_wrmsr(); 747 check_tsx_cycles(); 748 } 749 750 static void do_unsupported_width_counter_write(void *index) 751 { 752 wrmsr(MSR_IA32_PMC0 + *((int *) index), 0xffffff0123456789ull); 753 } 754 755 static void check_gp_counters_write_width(void) 756 { 757 u64 val_64 = 0xffffff0123456789ull; 758 u64 val_32 = val_64 & ((1ull << 32) - 1); 759 u64 val_max_width = val_64 & ((1ull << pmu.gp_counter_width) - 1); 760 int i; 761 762 /* 763 * MSR_IA32_PERFCTRn supports 64-bit writes, 764 * but only the lowest 32 bits are valid. 765 */ 766 for (i = 0; i < pmu.nr_gp_counters; i++) { 767 wrmsr(MSR_IA32_PERFCTR0 + i, val_32); 768 assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32); 769 assert(rdmsr(MSR_IA32_PMC0 + i) == val_32); 770 771 wrmsr(MSR_IA32_PERFCTR0 + i, val_max_width); 772 assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32); 773 assert(rdmsr(MSR_IA32_PMC0 + i) == val_32); 774 775 wrmsr(MSR_IA32_PERFCTR0 + i, val_64); 776 assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32); 777 assert(rdmsr(MSR_IA32_PMC0 + i) == val_32); 778 } 779 780 /* 781 * MSR_IA32_PMCn supports writing values up to GP counter width, 782 * and only the lowest bits of GP counter width are valid. 783 */ 784 for (i = 0; i < pmu.nr_gp_counters; i++) { 785 wrmsr(MSR_IA32_PMC0 + i, val_32); 786 assert(rdmsr(MSR_IA32_PMC0 + i) == val_32); 787 assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32); 788 789 wrmsr(MSR_IA32_PMC0 + i, val_max_width); 790 assert(rdmsr(MSR_IA32_PMC0 + i) == val_max_width); 791 assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_max_width); 792 793 report(test_for_exception(GP_VECTOR, 794 do_unsupported_width_counter_write, &i), 795 "writing unsupported width to MSR_IA32_PMC%d raises #GP", i); 796 } 797 } 798 799 /* 800 * Per the SDM, reference cycles are currently implemented using the 801 * core crystal clock, TSC, or bus clock. Calibrate to the TSC 802 * frequency to set reasonable expectations. 803 */ 804 static void set_ref_cycle_expectations(void) 805 { 806 pmu_counter_t cnt = { 807 .ctr = MSR_IA32_PERFCTR0, 808 .config = EVNTSEL_OS | EVNTSEL_USR | 809 intel_gp_events[INTEL_REF_CYCLES_IDX].unit_sel, 810 }; 811 uint64_t tsc_delta; 812 uint64_t t0, t1, t2, t3; 813 814 /* Bit 2 enumerates the availability of reference cycles events. */ 815 if (!pmu.nr_gp_counters || !pmu_gp_counter_is_available(2)) 816 return; 817 818 if (this_cpu_has_perf_global_ctrl()) 819 wrmsr(pmu.msr_global_ctl, 0); 820 821 t0 = fenced_rdtsc(); 822 start_event(&cnt); 823 t1 = fenced_rdtsc(); 824 825 /* 826 * This loop has to run long enough to dominate the VM-exit 827 * costs for playing with the PMU MSRs on start and stop. 828 * 829 * On a 2.6GHz Ice Lake, with the TSC frequency at 104 times 830 * the core crystal clock, this function calculated a guest 831 * TSC : ref cycles ratio of around 105 with ECX initialized 832 * to one billion. 833 */ 834 asm volatile("loop ." : "+c"((int){1000000000ull})); 835 836 t2 = fenced_rdtsc(); 837 stop_event(&cnt); 838 t3 = fenced_rdtsc(); 839 840 tsc_delta = ((t2 - t1) + (t3 - t0)) / 2; 841 842 if (!tsc_delta) 843 return; 844 845 intel_gp_events[INTEL_REF_CYCLES_IDX].min = 846 (intel_gp_events[INTEL_REF_CYCLES_IDX].min * cnt.count) / tsc_delta; 847 intel_gp_events[INTEL_REF_CYCLES_IDX].max = 848 (intel_gp_events[INTEL_REF_CYCLES_IDX].max * cnt.count) / tsc_delta; 849 } 850 851 static void check_invalid_rdpmc_gp(void) 852 { 853 uint64_t val; 854 855 report(rdpmc_safe(64, &val) == GP_VECTOR, 856 "Expected #GP on RDPMC(64)"); 857 } 858 859 int main(int ac, char **av) 860 { 861 int instruction_idx; 862 int branch_idx; 863 864 setup_vm(); 865 handle_irq(PMI_VECTOR, cnt_overflow); 866 buf = malloc(N*64); 867 868 check_invalid_rdpmc_gp(); 869 870 if (pmu.is_intel) { 871 if (!pmu.version) { 872 report_skip("No Intel Arch PMU is detected!"); 873 return report_summary(); 874 } 875 gp_events = (struct pmu_event *)intel_gp_events; 876 gp_events_size = sizeof(intel_gp_events)/sizeof(intel_gp_events[0]); 877 instruction_idx = INTEL_INSTRUCTIONS_IDX; 878 branch_idx = INTEL_BRANCHES_IDX; 879 report_prefix_push("Intel"); 880 set_ref_cycle_expectations(); 881 } else { 882 gp_events_size = sizeof(amd_gp_events)/sizeof(amd_gp_events[0]); 883 gp_events = (struct pmu_event *)amd_gp_events; 884 instruction_idx = AMD_INSTRUCTIONS_IDX; 885 branch_idx = AMD_BRANCHES_IDX; 886 report_prefix_push("AMD"); 887 } 888 adjust_events_range(gp_events, instruction_idx, branch_idx); 889 890 printf("PMU version: %d\n", pmu.version); 891 printf("GP counters: %d\n", pmu.nr_gp_counters); 892 printf("GP counter width: %d\n", pmu.gp_counter_width); 893 printf("Mask length: %d\n", pmu.gp_counter_mask_length); 894 printf("Fixed counters: %d\n", pmu.nr_fixed_counters); 895 printf("Fixed counter width: %d\n", pmu.fixed_counter_width); 896 897 fixed_counters_num = MIN(pmu.nr_fixed_counters, ARRAY_SIZE(fixed_events)); 898 if (pmu.nr_fixed_counters > ARRAY_SIZE(fixed_events)) 899 report_info("Fixed counters number %d > defined fixed events %u. " 900 "Please update test case.", pmu.nr_fixed_counters, 901 (uint32_t)ARRAY_SIZE(fixed_events)); 902 903 apic_write(APIC_LVTPC, PMI_VECTOR); 904 905 check_counters(); 906 907 if (pmu_has_full_writes()) { 908 pmu.msr_gp_counter_base = MSR_IA32_PMC0; 909 910 report_prefix_push("full-width writes"); 911 check_counters(); 912 check_gp_counters_write_width(); 913 report_prefix_pop(); 914 } 915 916 if (!pmu.is_intel) { 917 report_prefix_push("K7"); 918 pmu.nr_gp_counters = AMD64_NUM_COUNTERS; 919 pmu.msr_gp_counter_base = MSR_K7_PERFCTR0; 920 pmu.msr_gp_event_select_base = MSR_K7_EVNTSEL0; 921 check_counters(); 922 report_prefix_pop(); 923 } 924 925 return report_summary(); 926 } 927