1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (C) 2023, Tencent, Inc.
4 */
5 #include <x86intrin.h>
6
7 #include "pmu.h"
8 #include "processor.h"
9
10 /* Number of iterations of the loop for the guest measurement payload. */
11 #define NUM_LOOPS 10
12
13 /* Each iteration of the loop retires one branch instruction. */
14 #define NUM_BRANCH_INSNS_RETIRED (NUM_LOOPS)
15
16 /*
17 * Number of instructions in each loop. 1 CLFLUSH/CLFLUSHOPT/NOP, 1 MFENCE,
18 * 1 LOOP.
19 */
20 #define NUM_INSNS_PER_LOOP 4
21
22 /*
23 * Number of "extra" instructions that will be counted, i.e. the number of
24 * instructions that are needed to set up the loop and then disable the
25 * counter. 2 MOV, 2 XOR, 1 WRMSR.
26 */
27 #define NUM_EXTRA_INSNS 5
28
29 /* Total number of instructions retired within the measured section. */
30 #define NUM_INSNS_RETIRED (NUM_LOOPS * NUM_INSNS_PER_LOOP + NUM_EXTRA_INSNS)
31
32 /* Track which architectural events are supported by hardware. */
33 static uint32_t hardware_pmu_arch_events;
34
35 static uint8_t kvm_pmu_version;
36 static bool kvm_has_perf_caps;
37
38 #define X86_PMU_FEATURE_NULL \
39 ({ \
40 struct kvm_x86_pmu_feature feature = {}; \
41 \
42 feature; \
43 })
44
pmu_is_null_feature(struct kvm_x86_pmu_feature event)45 static bool pmu_is_null_feature(struct kvm_x86_pmu_feature event)
46 {
47 return !(*(u64 *)&event);
48 }
49
50 struct kvm_intel_pmu_event {
51 struct kvm_x86_pmu_feature gp_event;
52 struct kvm_x86_pmu_feature fixed_event;
53 };
54
55 /*
56 * Wrap the array to appease the compiler, as the macros used to construct each
57 * kvm_x86_pmu_feature use syntax that's only valid in function scope, and the
58 * compiler often thinks the feature definitions aren't compile-time constants.
59 */
intel_event_to_feature(uint8_t idx)60 static struct kvm_intel_pmu_event intel_event_to_feature(uint8_t idx)
61 {
62 const struct kvm_intel_pmu_event __intel_event_to_feature[] = {
63 [INTEL_ARCH_CPU_CYCLES_INDEX] = { X86_PMU_FEATURE_CPU_CYCLES, X86_PMU_FEATURE_CPU_CYCLES_FIXED },
64 [INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX] = { X86_PMU_FEATURE_INSNS_RETIRED, X86_PMU_FEATURE_INSNS_RETIRED_FIXED },
65 /*
66 * Note, the fixed counter for reference cycles is NOT the same as the
67 * general purpose architectural event. The fixed counter explicitly
68 * counts at the same frequency as the TSC, whereas the GP event counts
69 * at a fixed, but uarch specific, frequency. Bundle them here for
70 * simplicity.
71 */
72 [INTEL_ARCH_REFERENCE_CYCLES_INDEX] = { X86_PMU_FEATURE_REFERENCE_CYCLES, X86_PMU_FEATURE_REFERENCE_TSC_CYCLES_FIXED },
73 [INTEL_ARCH_LLC_REFERENCES_INDEX] = { X86_PMU_FEATURE_LLC_REFERENCES, X86_PMU_FEATURE_NULL },
74 [INTEL_ARCH_LLC_MISSES_INDEX] = { X86_PMU_FEATURE_LLC_MISSES, X86_PMU_FEATURE_NULL },
75 [INTEL_ARCH_BRANCHES_RETIRED_INDEX] = { X86_PMU_FEATURE_BRANCH_INSNS_RETIRED, X86_PMU_FEATURE_NULL },
76 [INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX] = { X86_PMU_FEATURE_BRANCHES_MISPREDICTED, X86_PMU_FEATURE_NULL },
77 [INTEL_ARCH_TOPDOWN_SLOTS_INDEX] = { X86_PMU_FEATURE_TOPDOWN_SLOTS, X86_PMU_FEATURE_TOPDOWN_SLOTS_FIXED },
78 };
79
80 kvm_static_assert(ARRAY_SIZE(__intel_event_to_feature) == NR_INTEL_ARCH_EVENTS);
81
82 return __intel_event_to_feature[idx];
83 }
84
pmu_vm_create_with_one_vcpu(struct kvm_vcpu ** vcpu,void * guest_code,uint8_t pmu_version,uint64_t perf_capabilities)85 static struct kvm_vm *pmu_vm_create_with_one_vcpu(struct kvm_vcpu **vcpu,
86 void *guest_code,
87 uint8_t pmu_version,
88 uint64_t perf_capabilities)
89 {
90 struct kvm_vm *vm;
91
92 vm = vm_create_with_one_vcpu(vcpu, guest_code);
93 sync_global_to_guest(vm, kvm_pmu_version);
94 sync_global_to_guest(vm, hardware_pmu_arch_events);
95
96 /*
97 * Set PERF_CAPABILITIES before PMU version as KVM disallows enabling
98 * features via PERF_CAPABILITIES if the guest doesn't have a vPMU.
99 */
100 if (kvm_has_perf_caps)
101 vcpu_set_msr(*vcpu, MSR_IA32_PERF_CAPABILITIES, perf_capabilities);
102
103 vcpu_set_cpuid_property(*vcpu, X86_PROPERTY_PMU_VERSION, pmu_version);
104 return vm;
105 }
106
run_vcpu(struct kvm_vcpu * vcpu)107 static void run_vcpu(struct kvm_vcpu *vcpu)
108 {
109 struct ucall uc;
110
111 do {
112 vcpu_run(vcpu);
113 switch (get_ucall(vcpu, &uc)) {
114 case UCALL_SYNC:
115 break;
116 case UCALL_ABORT:
117 REPORT_GUEST_ASSERT(uc);
118 break;
119 case UCALL_PRINTF:
120 pr_info("%s", uc.buffer);
121 break;
122 case UCALL_DONE:
123 break;
124 default:
125 TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
126 }
127 } while (uc.cmd != UCALL_DONE);
128 }
129
guest_get_pmu_version(void)130 static uint8_t guest_get_pmu_version(void)
131 {
132 /*
133 * Return the effective PMU version, i.e. the minimum between what KVM
134 * supports and what is enumerated to the guest. The host deliberately
135 * advertises a PMU version to the guest beyond what is actually
136 * supported by KVM to verify KVM doesn't freak out and do something
137 * bizarre with an architecturally valid, but unsupported, version.
138 */
139 return min_t(uint8_t, kvm_pmu_version, this_cpu_property(X86_PROPERTY_PMU_VERSION));
140 }
141
142 /*
143 * If an architectural event is supported and guaranteed to generate at least
144 * one "hit, assert that its count is non-zero. If an event isn't supported or
145 * the test can't guarantee the associated action will occur, then all bets are
146 * off regarding the count, i.e. no checks can be done.
147 *
148 * Sanity check that in all cases, the event doesn't count when it's disabled,
149 * and that KVM correctly emulates the write of an arbitrary value.
150 */
guest_assert_event_count(uint8_t idx,uint32_t pmc,uint32_t pmc_msr)151 static void guest_assert_event_count(uint8_t idx, uint32_t pmc, uint32_t pmc_msr)
152 {
153 uint64_t count;
154
155 count = _rdpmc(pmc);
156 if (!(hardware_pmu_arch_events & BIT(idx)))
157 goto sanity_checks;
158
159 switch (idx) {
160 case INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX:
161 GUEST_ASSERT_EQ(count, NUM_INSNS_RETIRED);
162 break;
163 case INTEL_ARCH_BRANCHES_RETIRED_INDEX:
164 GUEST_ASSERT_EQ(count, NUM_BRANCH_INSNS_RETIRED);
165 break;
166 case INTEL_ARCH_LLC_REFERENCES_INDEX:
167 case INTEL_ARCH_LLC_MISSES_INDEX:
168 if (!this_cpu_has(X86_FEATURE_CLFLUSHOPT) &&
169 !this_cpu_has(X86_FEATURE_CLFLUSH))
170 break;
171 fallthrough;
172 case INTEL_ARCH_CPU_CYCLES_INDEX:
173 case INTEL_ARCH_REFERENCE_CYCLES_INDEX:
174 GUEST_ASSERT_NE(count, 0);
175 break;
176 case INTEL_ARCH_TOPDOWN_SLOTS_INDEX:
177 __GUEST_ASSERT(count >= NUM_INSNS_RETIRED,
178 "Expected top-down slots >= %u, got count = %lu",
179 NUM_INSNS_RETIRED, count);
180 break;
181 default:
182 break;
183 }
184
185 sanity_checks:
186 __asm__ __volatile__("loop ." : "+c"((int){NUM_LOOPS}));
187 GUEST_ASSERT_EQ(_rdpmc(pmc), count);
188
189 wrmsr(pmc_msr, 0xdead);
190 GUEST_ASSERT_EQ(_rdpmc(pmc), 0xdead);
191 }
192
193 /*
194 * Enable and disable the PMC in a monolithic asm blob to ensure that the
195 * compiler can't insert _any_ code into the measured sequence. Note, ECX
196 * doesn't need to be clobbered as the input value, @pmc_msr, is restored
197 * before the end of the sequence.
198 *
199 * If CLFUSH{,OPT} is supported, flush the cacheline containing (at least) the
200 * CLFUSH{,OPT} instruction on each loop iteration to force LLC references and
201 * misses, i.e. to allow testing that those events actually count.
202 *
203 * If forced emulation is enabled (and specified), force emulation on a subset
204 * of the measured code to verify that KVM correctly emulates instructions and
205 * branches retired events in conjunction with hardware also counting said
206 * events.
207 */
208 #define GUEST_MEASURE_EVENT(_msr, _value, clflush, FEP) \
209 do { \
210 __asm__ __volatile__("wrmsr\n\t" \
211 " mov $" __stringify(NUM_LOOPS) ", %%ecx\n\t" \
212 "1:\n\t" \
213 clflush "\n\t" \
214 "mfence\n\t" \
215 "mov %[m], %%eax\n\t" \
216 FEP "loop 1b\n\t" \
217 FEP "mov %%edi, %%ecx\n\t" \
218 FEP "xor %%eax, %%eax\n\t" \
219 FEP "xor %%edx, %%edx\n\t" \
220 "wrmsr\n\t" \
221 :: "a"((uint32_t)_value), "d"(_value >> 32), \
222 "c"(_msr), "D"(_msr), [m]"m"(kvm_pmu_version) \
223 ); \
224 } while (0)
225
226 #define GUEST_TEST_EVENT(_idx, _pmc, _pmc_msr, _ctrl_msr, _value, FEP) \
227 do { \
228 wrmsr(_pmc_msr, 0); \
229 \
230 if (this_cpu_has(X86_FEATURE_CLFLUSHOPT)) \
231 GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflushopt %[m]", FEP); \
232 else if (this_cpu_has(X86_FEATURE_CLFLUSH)) \
233 GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflush %[m]", FEP); \
234 else \
235 GUEST_MEASURE_EVENT(_ctrl_msr, _value, "nop", FEP); \
236 \
237 guest_assert_event_count(_idx, _pmc, _pmc_msr); \
238 } while (0)
239
__guest_test_arch_event(uint8_t idx,uint32_t pmc,uint32_t pmc_msr,uint32_t ctrl_msr,uint64_t ctrl_msr_value)240 static void __guest_test_arch_event(uint8_t idx, uint32_t pmc, uint32_t pmc_msr,
241 uint32_t ctrl_msr, uint64_t ctrl_msr_value)
242 {
243 GUEST_TEST_EVENT(idx, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, "");
244
245 if (is_forced_emulation_enabled)
246 GUEST_TEST_EVENT(idx, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, KVM_FEP);
247 }
248
guest_test_arch_event(uint8_t idx)249 static void guest_test_arch_event(uint8_t idx)
250 {
251 uint32_t nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
252 uint32_t pmu_version = guest_get_pmu_version();
253 /* PERF_GLOBAL_CTRL exists only for Architectural PMU Version 2+. */
254 bool guest_has_perf_global_ctrl = pmu_version >= 2;
255 struct kvm_x86_pmu_feature gp_event, fixed_event;
256 uint32_t base_pmc_msr;
257 unsigned int i;
258
259 /* The host side shouldn't invoke this without a guest PMU. */
260 GUEST_ASSERT(pmu_version);
261
262 if (this_cpu_has(X86_FEATURE_PDCM) &&
263 rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES)
264 base_pmc_msr = MSR_IA32_PMC0;
265 else
266 base_pmc_msr = MSR_IA32_PERFCTR0;
267
268 gp_event = intel_event_to_feature(idx).gp_event;
269 GUEST_ASSERT_EQ(idx, gp_event.f.bit);
270
271 GUEST_ASSERT(nr_gp_counters);
272
273 for (i = 0; i < nr_gp_counters; i++) {
274 uint64_t eventsel = ARCH_PERFMON_EVENTSEL_OS |
275 ARCH_PERFMON_EVENTSEL_ENABLE |
276 intel_pmu_arch_events[idx];
277
278 wrmsr(MSR_P6_EVNTSEL0 + i, 0);
279 if (guest_has_perf_global_ctrl)
280 wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, BIT_ULL(i));
281
282 __guest_test_arch_event(idx, i, base_pmc_msr + i,
283 MSR_P6_EVNTSEL0 + i, eventsel);
284 }
285
286 if (!guest_has_perf_global_ctrl)
287 return;
288
289 fixed_event = intel_event_to_feature(idx).fixed_event;
290 if (pmu_is_null_feature(fixed_event) || !this_pmu_has(fixed_event))
291 return;
292
293 i = fixed_event.f.bit;
294
295 wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL));
296
297 __guest_test_arch_event(idx, i | INTEL_RDPMC_FIXED,
298 MSR_CORE_PERF_FIXED_CTR0 + i,
299 MSR_CORE_PERF_GLOBAL_CTRL,
300 FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
301 }
302
guest_test_arch_events(void)303 static void guest_test_arch_events(void)
304 {
305 uint8_t i;
306
307 for (i = 0; i < NR_INTEL_ARCH_EVENTS; i++)
308 guest_test_arch_event(i);
309
310 GUEST_DONE();
311 }
312
test_arch_events(uint8_t pmu_version,uint64_t perf_capabilities,uint8_t length,uint8_t unavailable_mask)313 static void test_arch_events(uint8_t pmu_version, uint64_t perf_capabilities,
314 uint8_t length, uint8_t unavailable_mask)
315 {
316 struct kvm_vcpu *vcpu;
317 struct kvm_vm *vm;
318
319 /* Testing arch events requires a vPMU (there are no negative tests). */
320 if (!pmu_version)
321 return;
322
323 vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_arch_events,
324 pmu_version, perf_capabilities);
325
326 vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH,
327 length);
328 vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_EVENTS_MASK,
329 unavailable_mask);
330
331 run_vcpu(vcpu);
332
333 kvm_vm_free(vm);
334 }
335
336 /*
337 * Limit testing to MSRs that are actually defined by Intel (in the SDM). MSRs
338 * that aren't defined counter MSRs *probably* don't exist, but there's no
339 * guarantee that currently undefined MSR indices won't be used for something
340 * other than PMCs in the future.
341 */
342 #define MAX_NR_GP_COUNTERS 8
343 #define MAX_NR_FIXED_COUNTERS 3
344
345 #define GUEST_ASSERT_PMC_MSR_ACCESS(insn, msr, expect_gp, vector) \
346 __GUEST_ASSERT(expect_gp ? vector == GP_VECTOR : !vector, \
347 "Expected %s on " #insn "(0x%x), got vector %u", \
348 expect_gp ? "#GP" : "no fault", msr, vector) \
349
350 #define GUEST_ASSERT_PMC_VALUE(insn, msr, val, expected) \
351 __GUEST_ASSERT(val == expected, \
352 "Expected " #insn "(0x%x) to yield 0x%lx, got 0x%lx", \
353 msr, expected, val);
354
guest_test_rdpmc(uint32_t rdpmc_idx,bool expect_success,uint64_t expected_val)355 static void guest_test_rdpmc(uint32_t rdpmc_idx, bool expect_success,
356 uint64_t expected_val)
357 {
358 uint8_t vector;
359 uint64_t val;
360
361 vector = rdpmc_safe(rdpmc_idx, &val);
362 GUEST_ASSERT_PMC_MSR_ACCESS(RDPMC, rdpmc_idx, !expect_success, vector);
363 if (expect_success)
364 GUEST_ASSERT_PMC_VALUE(RDPMC, rdpmc_idx, val, expected_val);
365
366 if (!is_forced_emulation_enabled)
367 return;
368
369 vector = rdpmc_safe_fep(rdpmc_idx, &val);
370 GUEST_ASSERT_PMC_MSR_ACCESS(RDPMC, rdpmc_idx, !expect_success, vector);
371 if (expect_success)
372 GUEST_ASSERT_PMC_VALUE(RDPMC, rdpmc_idx, val, expected_val);
373 }
374
guest_rd_wr_counters(uint32_t base_msr,uint8_t nr_possible_counters,uint8_t nr_counters,uint32_t or_mask)375 static void guest_rd_wr_counters(uint32_t base_msr, uint8_t nr_possible_counters,
376 uint8_t nr_counters, uint32_t or_mask)
377 {
378 const bool pmu_has_fast_mode = !guest_get_pmu_version();
379 uint8_t i;
380
381 for (i = 0; i < nr_possible_counters; i++) {
382 /*
383 * TODO: Test a value that validates full-width writes and the
384 * width of the counters.
385 */
386 const uint64_t test_val = 0xffff;
387 const uint32_t msr = base_msr + i;
388
389 /*
390 * Fixed counters are supported if the counter is less than the
391 * number of enumerated contiguous counters *or* the counter is
392 * explicitly enumerated in the supported counters mask.
393 */
394 const bool expect_success = i < nr_counters || (or_mask & BIT(i));
395
396 /*
397 * KVM drops writes to MSR_P6_PERFCTR[0|1] if the counters are
398 * unsupported, i.e. doesn't #GP and reads back '0'.
399 */
400 const uint64_t expected_val = expect_success ? test_val : 0;
401 const bool expect_gp = !expect_success && msr != MSR_P6_PERFCTR0 &&
402 msr != MSR_P6_PERFCTR1;
403 uint32_t rdpmc_idx;
404 uint8_t vector;
405 uint64_t val;
406
407 vector = wrmsr_safe(msr, test_val);
408 GUEST_ASSERT_PMC_MSR_ACCESS(WRMSR, msr, expect_gp, vector);
409
410 vector = rdmsr_safe(msr, &val);
411 GUEST_ASSERT_PMC_MSR_ACCESS(RDMSR, msr, expect_gp, vector);
412
413 /* On #GP, the result of RDMSR is undefined. */
414 if (!expect_gp)
415 GUEST_ASSERT_PMC_VALUE(RDMSR, msr, val, expected_val);
416
417 /*
418 * Redo the read tests with RDPMC, which has different indexing
419 * semantics and additional capabilities.
420 */
421 rdpmc_idx = i;
422 if (base_msr == MSR_CORE_PERF_FIXED_CTR0)
423 rdpmc_idx |= INTEL_RDPMC_FIXED;
424
425 guest_test_rdpmc(rdpmc_idx, expect_success, expected_val);
426
427 /*
428 * KVM doesn't support non-architectural PMUs, i.e. it should
429 * impossible to have fast mode RDPMC. Verify that attempting
430 * to use fast RDPMC always #GPs.
431 */
432 GUEST_ASSERT(!expect_success || !pmu_has_fast_mode);
433 rdpmc_idx |= INTEL_RDPMC_FAST;
434 guest_test_rdpmc(rdpmc_idx, false, -1ull);
435
436 vector = wrmsr_safe(msr, 0);
437 GUEST_ASSERT_PMC_MSR_ACCESS(WRMSR, msr, expect_gp, vector);
438 }
439 }
440
guest_test_gp_counters(void)441 static void guest_test_gp_counters(void)
442 {
443 uint8_t pmu_version = guest_get_pmu_version();
444 uint8_t nr_gp_counters = 0;
445 uint32_t base_msr;
446
447 if (pmu_version)
448 nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
449
450 /*
451 * For v2+ PMUs, PERF_GLOBAL_CTRL's architectural post-RESET value is
452 * "Sets bits n-1:0 and clears the upper bits", where 'n' is the number
453 * of GP counters. If there are no GP counters, require KVM to leave
454 * PERF_GLOBAL_CTRL '0'. This edge case isn't covered by the SDM, but
455 * follow the spirit of the architecture and only globally enable GP
456 * counters, of which there are none.
457 */
458 if (pmu_version > 1) {
459 uint64_t global_ctrl = rdmsr(MSR_CORE_PERF_GLOBAL_CTRL);
460
461 if (nr_gp_counters)
462 GUEST_ASSERT_EQ(global_ctrl, GENMASK_ULL(nr_gp_counters - 1, 0));
463 else
464 GUEST_ASSERT_EQ(global_ctrl, 0);
465 }
466
467 if (this_cpu_has(X86_FEATURE_PDCM) &&
468 rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES)
469 base_msr = MSR_IA32_PMC0;
470 else
471 base_msr = MSR_IA32_PERFCTR0;
472
473 guest_rd_wr_counters(base_msr, MAX_NR_GP_COUNTERS, nr_gp_counters, 0);
474 GUEST_DONE();
475 }
476
test_gp_counters(uint8_t pmu_version,uint64_t perf_capabilities,uint8_t nr_gp_counters)477 static void test_gp_counters(uint8_t pmu_version, uint64_t perf_capabilities,
478 uint8_t nr_gp_counters)
479 {
480 struct kvm_vcpu *vcpu;
481 struct kvm_vm *vm;
482
483 vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_gp_counters,
484 pmu_version, perf_capabilities);
485
486 vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_NR_GP_COUNTERS,
487 nr_gp_counters);
488
489 run_vcpu(vcpu);
490
491 kvm_vm_free(vm);
492 }
493
guest_test_fixed_counters(void)494 static void guest_test_fixed_counters(void)
495 {
496 uint64_t supported_bitmask = 0;
497 uint8_t nr_fixed_counters = 0;
498 uint8_t i;
499
500 /* Fixed counters require Architectural vPMU Version 2+. */
501 if (guest_get_pmu_version() >= 2)
502 nr_fixed_counters = this_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
503
504 /*
505 * The supported bitmask for fixed counters was introduced in PMU
506 * version 5.
507 */
508 if (guest_get_pmu_version() >= 5)
509 supported_bitmask = this_cpu_property(X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK);
510
511 guest_rd_wr_counters(MSR_CORE_PERF_FIXED_CTR0, MAX_NR_FIXED_COUNTERS,
512 nr_fixed_counters, supported_bitmask);
513
514 for (i = 0; i < MAX_NR_FIXED_COUNTERS; i++) {
515 uint8_t vector;
516 uint64_t val;
517
518 if (i >= nr_fixed_counters && !(supported_bitmask & BIT_ULL(i))) {
519 vector = wrmsr_safe(MSR_CORE_PERF_FIXED_CTR_CTRL,
520 FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL));
521 __GUEST_ASSERT(vector == GP_VECTOR,
522 "Expected #GP for counter %u in FIXED_CTR_CTRL", i);
523
524 vector = wrmsr_safe(MSR_CORE_PERF_GLOBAL_CTRL,
525 FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
526 __GUEST_ASSERT(vector == GP_VECTOR,
527 "Expected #GP for counter %u in PERF_GLOBAL_CTRL", i);
528 continue;
529 }
530
531 wrmsr(MSR_CORE_PERF_FIXED_CTR0 + i, 0);
532 wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL));
533 wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
534 __asm__ __volatile__("loop ." : "+c"((int){NUM_LOOPS}));
535 wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
536 val = rdmsr(MSR_CORE_PERF_FIXED_CTR0 + i);
537
538 GUEST_ASSERT_NE(val, 0);
539 }
540 GUEST_DONE();
541 }
542
test_fixed_counters(uint8_t pmu_version,uint64_t perf_capabilities,uint8_t nr_fixed_counters,uint32_t supported_bitmask)543 static void test_fixed_counters(uint8_t pmu_version, uint64_t perf_capabilities,
544 uint8_t nr_fixed_counters,
545 uint32_t supported_bitmask)
546 {
547 struct kvm_vcpu *vcpu;
548 struct kvm_vm *vm;
549
550 vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_fixed_counters,
551 pmu_version, perf_capabilities);
552
553 vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK,
554 supported_bitmask);
555 vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_NR_FIXED_COUNTERS,
556 nr_fixed_counters);
557
558 run_vcpu(vcpu);
559
560 kvm_vm_free(vm);
561 }
562
test_intel_counters(void)563 static void test_intel_counters(void)
564 {
565 uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
566 uint8_t nr_gp_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
567 uint8_t pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION);
568 unsigned int i;
569 uint8_t v, j;
570 uint32_t k;
571
572 const uint64_t perf_caps[] = {
573 0,
574 PMU_CAP_FW_WRITES,
575 };
576
577 /*
578 * Test up to PMU v5, which is the current maximum version defined by
579 * Intel, i.e. is the last version that is guaranteed to be backwards
580 * compatible with KVM's existing behavior.
581 */
582 uint8_t max_pmu_version = max_t(typeof(pmu_version), pmu_version, 5);
583
584 /*
585 * Detect the existence of events that aren't supported by selftests.
586 * This will (obviously) fail any time hardware adds support for a new
587 * event, but it's worth paying that price to keep the test fresh.
588 */
589 TEST_ASSERT(this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH) <= NR_INTEL_ARCH_EVENTS,
590 "New architectural event(s) detected; please update this test (length = %u, mask = %x)",
591 this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH),
592 this_cpu_property(X86_PROPERTY_PMU_EVENTS_MASK));
593
594 /*
595 * Iterate over known arch events irrespective of KVM/hardware support
596 * to verify that KVM doesn't reject programming of events just because
597 * the *architectural* encoding is unsupported. Track which events are
598 * supported in hardware; the guest side will validate supported events
599 * count correctly, even if *enumeration* of the event is unsupported
600 * by KVM and/or isn't exposed to the guest.
601 */
602 for (i = 0; i < NR_INTEL_ARCH_EVENTS; i++) {
603 if (this_pmu_has(intel_event_to_feature(i).gp_event))
604 hardware_pmu_arch_events |= BIT(i);
605 }
606
607 for (v = 0; v <= max_pmu_version; v++) {
608 for (i = 0; i < ARRAY_SIZE(perf_caps); i++) {
609 if (!kvm_has_perf_caps && perf_caps[i])
610 continue;
611
612 pr_info("Testing arch events, PMU version %u, perf_caps = %lx\n",
613 v, perf_caps[i]);
614 /*
615 * To keep the total runtime reasonable, test every
616 * possible non-zero, non-reserved bitmap combination
617 * only with the native PMU version and the full bit
618 * vector length.
619 */
620 if (v == pmu_version) {
621 for (k = 1; k < (BIT(NR_INTEL_ARCH_EVENTS) - 1); k++)
622 test_arch_events(v, perf_caps[i], NR_INTEL_ARCH_EVENTS, k);
623 }
624 /*
625 * Test single bits for all PMU version and lengths up
626 * the number of events +1 (to verify KVM doesn't do
627 * weird things if the guest length is greater than the
628 * host length). Explicitly test a mask of '0' and all
629 * ones i.e. all events being available and unavailable.
630 */
631 for (j = 0; j <= NR_INTEL_ARCH_EVENTS + 1; j++) {
632 test_arch_events(v, perf_caps[i], j, 0);
633 test_arch_events(v, perf_caps[i], j, 0xff);
634
635 for (k = 0; k < NR_INTEL_ARCH_EVENTS; k++)
636 test_arch_events(v, perf_caps[i], j, BIT(k));
637 }
638
639 pr_info("Testing GP counters, PMU version %u, perf_caps = %lx\n",
640 v, perf_caps[i]);
641 for (j = 0; j <= nr_gp_counters; j++)
642 test_gp_counters(v, perf_caps[i], j);
643
644 pr_info("Testing fixed counters, PMU version %u, perf_caps = %lx\n",
645 v, perf_caps[i]);
646 for (j = 0; j <= nr_fixed_counters; j++) {
647 for (k = 0; k <= (BIT(nr_fixed_counters) - 1); k++)
648 test_fixed_counters(v, perf_caps[i], j, k);
649 }
650 }
651 }
652 }
653
main(int argc,char * argv[])654 int main(int argc, char *argv[])
655 {
656 TEST_REQUIRE(kvm_is_pmu_enabled());
657
658 TEST_REQUIRE(host_cpu_is_intel);
659 TEST_REQUIRE(kvm_cpu_has_p(X86_PROPERTY_PMU_VERSION));
660 TEST_REQUIRE(kvm_cpu_property(X86_PROPERTY_PMU_VERSION) > 0);
661
662 kvm_pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION);
663 kvm_has_perf_caps = kvm_cpu_has(X86_FEATURE_PDCM);
664
665 test_intel_counters();
666
667 return 0;
668 }
669