1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2023, Tencent, Inc.
4  */
5 #include <x86intrin.h>
6 
7 #include "pmu.h"
8 #include "processor.h"
9 
10 /* Number of iterations of the loop for the guest measurement payload. */
11 #define NUM_LOOPS			10
12 
13 /* Each iteration of the loop retires one branch instruction. */
14 #define NUM_BRANCH_INSNS_RETIRED	(NUM_LOOPS)
15 
16 /*
17  * Number of instructions in each loop. 1 CLFLUSH/CLFLUSHOPT/NOP, 1 MFENCE,
18  * 1 LOOP.
19  */
20 #define NUM_INSNS_PER_LOOP		4
21 
22 /*
23  * Number of "extra" instructions that will be counted, i.e. the number of
24  * instructions that are needed to set up the loop and then disable the
25  * counter.  2 MOV, 2 XOR, 1 WRMSR.
26  */
27 #define NUM_EXTRA_INSNS			5
28 
29 /* Total number of instructions retired within the measured section. */
30 #define NUM_INSNS_RETIRED		(NUM_LOOPS * NUM_INSNS_PER_LOOP + NUM_EXTRA_INSNS)
31 
32 /* Track which architectural events are supported by hardware. */
33 static uint32_t hardware_pmu_arch_events;
34 
35 static uint8_t kvm_pmu_version;
36 static bool kvm_has_perf_caps;
37 
38 #define X86_PMU_FEATURE_NULL						\
39 ({									\
40 	struct kvm_x86_pmu_feature feature = {};			\
41 									\
42 	feature;							\
43 })
44 
pmu_is_null_feature(struct kvm_x86_pmu_feature event)45 static bool pmu_is_null_feature(struct kvm_x86_pmu_feature event)
46 {
47 	return !(*(u64 *)&event);
48 }
49 
50 struct kvm_intel_pmu_event {
51 	struct kvm_x86_pmu_feature gp_event;
52 	struct kvm_x86_pmu_feature fixed_event;
53 };
54 
55 /*
56  * Wrap the array to appease the compiler, as the macros used to construct each
57  * kvm_x86_pmu_feature use syntax that's only valid in function scope, and the
58  * compiler often thinks the feature definitions aren't compile-time constants.
59  */
intel_event_to_feature(uint8_t idx)60 static struct kvm_intel_pmu_event intel_event_to_feature(uint8_t idx)
61 {
62 	const struct kvm_intel_pmu_event __intel_event_to_feature[] = {
63 		[INTEL_ARCH_CPU_CYCLES_INDEX]		 = { X86_PMU_FEATURE_CPU_CYCLES, X86_PMU_FEATURE_CPU_CYCLES_FIXED },
64 		[INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX]	 = { X86_PMU_FEATURE_INSNS_RETIRED, X86_PMU_FEATURE_INSNS_RETIRED_FIXED },
65 		/*
66 		 * Note, the fixed counter for reference cycles is NOT the same as the
67 		 * general purpose architectural event.  The fixed counter explicitly
68 		 * counts at the same frequency as the TSC, whereas the GP event counts
69 		 * at a fixed, but uarch specific, frequency.  Bundle them here for
70 		 * simplicity.
71 		 */
72 		[INTEL_ARCH_REFERENCE_CYCLES_INDEX]	 = { X86_PMU_FEATURE_REFERENCE_CYCLES, X86_PMU_FEATURE_REFERENCE_TSC_CYCLES_FIXED },
73 		[INTEL_ARCH_LLC_REFERENCES_INDEX]	 = { X86_PMU_FEATURE_LLC_REFERENCES, X86_PMU_FEATURE_NULL },
74 		[INTEL_ARCH_LLC_MISSES_INDEX]		 = { X86_PMU_FEATURE_LLC_MISSES, X86_PMU_FEATURE_NULL },
75 		[INTEL_ARCH_BRANCHES_RETIRED_INDEX]	 = { X86_PMU_FEATURE_BRANCH_INSNS_RETIRED, X86_PMU_FEATURE_NULL },
76 		[INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX] = { X86_PMU_FEATURE_BRANCHES_MISPREDICTED, X86_PMU_FEATURE_NULL },
77 		[INTEL_ARCH_TOPDOWN_SLOTS_INDEX]	 = { X86_PMU_FEATURE_TOPDOWN_SLOTS, X86_PMU_FEATURE_TOPDOWN_SLOTS_FIXED },
78 	};
79 
80 	kvm_static_assert(ARRAY_SIZE(__intel_event_to_feature) == NR_INTEL_ARCH_EVENTS);
81 
82 	return __intel_event_to_feature[idx];
83 }
84 
pmu_vm_create_with_one_vcpu(struct kvm_vcpu ** vcpu,void * guest_code,uint8_t pmu_version,uint64_t perf_capabilities)85 static struct kvm_vm *pmu_vm_create_with_one_vcpu(struct kvm_vcpu **vcpu,
86 						  void *guest_code,
87 						  uint8_t pmu_version,
88 						  uint64_t perf_capabilities)
89 {
90 	struct kvm_vm *vm;
91 
92 	vm = vm_create_with_one_vcpu(vcpu, guest_code);
93 	sync_global_to_guest(vm, kvm_pmu_version);
94 	sync_global_to_guest(vm, hardware_pmu_arch_events);
95 
96 	/*
97 	 * Set PERF_CAPABILITIES before PMU version as KVM disallows enabling
98 	 * features via PERF_CAPABILITIES if the guest doesn't have a vPMU.
99 	 */
100 	if (kvm_has_perf_caps)
101 		vcpu_set_msr(*vcpu, MSR_IA32_PERF_CAPABILITIES, perf_capabilities);
102 
103 	vcpu_set_cpuid_property(*vcpu, X86_PROPERTY_PMU_VERSION, pmu_version);
104 	return vm;
105 }
106 
run_vcpu(struct kvm_vcpu * vcpu)107 static void run_vcpu(struct kvm_vcpu *vcpu)
108 {
109 	struct ucall uc;
110 
111 	do {
112 		vcpu_run(vcpu);
113 		switch (get_ucall(vcpu, &uc)) {
114 		case UCALL_SYNC:
115 			break;
116 		case UCALL_ABORT:
117 			REPORT_GUEST_ASSERT(uc);
118 			break;
119 		case UCALL_PRINTF:
120 			pr_info("%s", uc.buffer);
121 			break;
122 		case UCALL_DONE:
123 			break;
124 		default:
125 			TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
126 		}
127 	} while (uc.cmd != UCALL_DONE);
128 }
129 
guest_get_pmu_version(void)130 static uint8_t guest_get_pmu_version(void)
131 {
132 	/*
133 	 * Return the effective PMU version, i.e. the minimum between what KVM
134 	 * supports and what is enumerated to the guest.  The host deliberately
135 	 * advertises a PMU version to the guest beyond what is actually
136 	 * supported by KVM to verify KVM doesn't freak out and do something
137 	 * bizarre with an architecturally valid, but unsupported, version.
138 	 */
139 	return min_t(uint8_t, kvm_pmu_version, this_cpu_property(X86_PROPERTY_PMU_VERSION));
140 }
141 
142 /*
143  * If an architectural event is supported and guaranteed to generate at least
144  * one "hit, assert that its count is non-zero.  If an event isn't supported or
145  * the test can't guarantee the associated action will occur, then all bets are
146  * off regarding the count, i.e. no checks can be done.
147  *
148  * Sanity check that in all cases, the event doesn't count when it's disabled,
149  * and that KVM correctly emulates the write of an arbitrary value.
150  */
guest_assert_event_count(uint8_t idx,uint32_t pmc,uint32_t pmc_msr)151 static void guest_assert_event_count(uint8_t idx, uint32_t pmc, uint32_t pmc_msr)
152 {
153 	uint64_t count;
154 
155 	count = _rdpmc(pmc);
156 	if (!(hardware_pmu_arch_events & BIT(idx)))
157 		goto sanity_checks;
158 
159 	switch (idx) {
160 	case INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX:
161 		GUEST_ASSERT_EQ(count, NUM_INSNS_RETIRED);
162 		break;
163 	case INTEL_ARCH_BRANCHES_RETIRED_INDEX:
164 		GUEST_ASSERT_EQ(count, NUM_BRANCH_INSNS_RETIRED);
165 		break;
166 	case INTEL_ARCH_LLC_REFERENCES_INDEX:
167 	case INTEL_ARCH_LLC_MISSES_INDEX:
168 		if (!this_cpu_has(X86_FEATURE_CLFLUSHOPT) &&
169 		    !this_cpu_has(X86_FEATURE_CLFLUSH))
170 			break;
171 		fallthrough;
172 	case INTEL_ARCH_CPU_CYCLES_INDEX:
173 	case INTEL_ARCH_REFERENCE_CYCLES_INDEX:
174 		GUEST_ASSERT_NE(count, 0);
175 		break;
176 	case INTEL_ARCH_TOPDOWN_SLOTS_INDEX:
177 		__GUEST_ASSERT(count >= NUM_INSNS_RETIRED,
178 			       "Expected top-down slots >= %u, got count = %lu",
179 			       NUM_INSNS_RETIRED, count);
180 		break;
181 	default:
182 		break;
183 	}
184 
185 sanity_checks:
186 	__asm__ __volatile__("loop ." : "+c"((int){NUM_LOOPS}));
187 	GUEST_ASSERT_EQ(_rdpmc(pmc), count);
188 
189 	wrmsr(pmc_msr, 0xdead);
190 	GUEST_ASSERT_EQ(_rdpmc(pmc), 0xdead);
191 }
192 
193 /*
194  * Enable and disable the PMC in a monolithic asm blob to ensure that the
195  * compiler can't insert _any_ code into the measured sequence.  Note, ECX
196  * doesn't need to be clobbered as the input value, @pmc_msr, is restored
197  * before the end of the sequence.
198  *
199  * If CLFUSH{,OPT} is supported, flush the cacheline containing (at least) the
200  * CLFUSH{,OPT} instruction on each loop iteration to force LLC references and
201  * misses, i.e. to allow testing that those events actually count.
202  *
203  * If forced emulation is enabled (and specified), force emulation on a subset
204  * of the measured code to verify that KVM correctly emulates instructions and
205  * branches retired events in conjunction with hardware also counting said
206  * events.
207  */
208 #define GUEST_MEASURE_EVENT(_msr, _value, clflush, FEP)				\
209 do {										\
210 	__asm__ __volatile__("wrmsr\n\t"					\
211 			     " mov $" __stringify(NUM_LOOPS) ", %%ecx\n\t"	\
212 			     "1:\n\t"						\
213 			     clflush "\n\t"					\
214 			     "mfence\n\t"					\
215 			     "mov %[m], %%eax\n\t"				\
216 			     FEP "loop 1b\n\t"					\
217 			     FEP "mov %%edi, %%ecx\n\t"				\
218 			     FEP "xor %%eax, %%eax\n\t"				\
219 			     FEP "xor %%edx, %%edx\n\t"				\
220 			     "wrmsr\n\t"					\
221 			     :: "a"((uint32_t)_value), "d"(_value >> 32),	\
222 				"c"(_msr), "D"(_msr), [m]"m"(kvm_pmu_version)	\
223 	);									\
224 } while (0)
225 
226 #define GUEST_TEST_EVENT(_idx, _pmc, _pmc_msr, _ctrl_msr, _value, FEP)		\
227 do {										\
228 	wrmsr(_pmc_msr, 0);							\
229 										\
230 	if (this_cpu_has(X86_FEATURE_CLFLUSHOPT))				\
231 		GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflushopt %[m]", FEP);	\
232 	else if (this_cpu_has(X86_FEATURE_CLFLUSH))				\
233 		GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflush  %[m]", FEP);	\
234 	else									\
235 		GUEST_MEASURE_EVENT(_ctrl_msr, _value, "nop", FEP);		\
236 										\
237 	guest_assert_event_count(_idx, _pmc, _pmc_msr);				\
238 } while (0)
239 
__guest_test_arch_event(uint8_t idx,uint32_t pmc,uint32_t pmc_msr,uint32_t ctrl_msr,uint64_t ctrl_msr_value)240 static void __guest_test_arch_event(uint8_t idx, uint32_t pmc, uint32_t pmc_msr,
241 				    uint32_t ctrl_msr, uint64_t ctrl_msr_value)
242 {
243 	GUEST_TEST_EVENT(idx, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, "");
244 
245 	if (is_forced_emulation_enabled)
246 		GUEST_TEST_EVENT(idx, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, KVM_FEP);
247 }
248 
guest_test_arch_event(uint8_t idx)249 static void guest_test_arch_event(uint8_t idx)
250 {
251 	uint32_t nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
252 	uint32_t pmu_version = guest_get_pmu_version();
253 	/* PERF_GLOBAL_CTRL exists only for Architectural PMU Version 2+. */
254 	bool guest_has_perf_global_ctrl = pmu_version >= 2;
255 	struct kvm_x86_pmu_feature gp_event, fixed_event;
256 	uint32_t base_pmc_msr;
257 	unsigned int i;
258 
259 	/* The host side shouldn't invoke this without a guest PMU. */
260 	GUEST_ASSERT(pmu_version);
261 
262 	if (this_cpu_has(X86_FEATURE_PDCM) &&
263 	    rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES)
264 		base_pmc_msr = MSR_IA32_PMC0;
265 	else
266 		base_pmc_msr = MSR_IA32_PERFCTR0;
267 
268 	gp_event = intel_event_to_feature(idx).gp_event;
269 	GUEST_ASSERT_EQ(idx, gp_event.f.bit);
270 
271 	GUEST_ASSERT(nr_gp_counters);
272 
273 	for (i = 0; i < nr_gp_counters; i++) {
274 		uint64_t eventsel = ARCH_PERFMON_EVENTSEL_OS |
275 				    ARCH_PERFMON_EVENTSEL_ENABLE |
276 				    intel_pmu_arch_events[idx];
277 
278 		wrmsr(MSR_P6_EVNTSEL0 + i, 0);
279 		if (guest_has_perf_global_ctrl)
280 			wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, BIT_ULL(i));
281 
282 		__guest_test_arch_event(idx, i, base_pmc_msr + i,
283 					MSR_P6_EVNTSEL0 + i, eventsel);
284 	}
285 
286 	if (!guest_has_perf_global_ctrl)
287 		return;
288 
289 	fixed_event = intel_event_to_feature(idx).fixed_event;
290 	if (pmu_is_null_feature(fixed_event) || !this_pmu_has(fixed_event))
291 		return;
292 
293 	i = fixed_event.f.bit;
294 
295 	wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL));
296 
297 	__guest_test_arch_event(idx, i | INTEL_RDPMC_FIXED,
298 				MSR_CORE_PERF_FIXED_CTR0 + i,
299 				MSR_CORE_PERF_GLOBAL_CTRL,
300 				FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
301 }
302 
guest_test_arch_events(void)303 static void guest_test_arch_events(void)
304 {
305 	uint8_t i;
306 
307 	for (i = 0; i < NR_INTEL_ARCH_EVENTS; i++)
308 		guest_test_arch_event(i);
309 
310 	GUEST_DONE();
311 }
312 
test_arch_events(uint8_t pmu_version,uint64_t perf_capabilities,uint8_t length,uint8_t unavailable_mask)313 static void test_arch_events(uint8_t pmu_version, uint64_t perf_capabilities,
314 			     uint8_t length, uint8_t unavailable_mask)
315 {
316 	struct kvm_vcpu *vcpu;
317 	struct kvm_vm *vm;
318 
319 	/* Testing arch events requires a vPMU (there are no negative tests). */
320 	if (!pmu_version)
321 		return;
322 
323 	vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_arch_events,
324 					 pmu_version, perf_capabilities);
325 
326 	vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH,
327 				length);
328 	vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_EVENTS_MASK,
329 				unavailable_mask);
330 
331 	run_vcpu(vcpu);
332 
333 	kvm_vm_free(vm);
334 }
335 
336 /*
337  * Limit testing to MSRs that are actually defined by Intel (in the SDM).  MSRs
338  * that aren't defined counter MSRs *probably* don't exist, but there's no
339  * guarantee that currently undefined MSR indices won't be used for something
340  * other than PMCs in the future.
341  */
342 #define MAX_NR_GP_COUNTERS	8
343 #define MAX_NR_FIXED_COUNTERS	3
344 
345 #define GUEST_ASSERT_PMC_MSR_ACCESS(insn, msr, expect_gp, vector)		\
346 __GUEST_ASSERT(expect_gp ? vector == GP_VECTOR : !vector,			\
347 	       "Expected %s on " #insn "(0x%x), got vector %u",			\
348 	       expect_gp ? "#GP" : "no fault", msr, vector)			\
349 
350 #define GUEST_ASSERT_PMC_VALUE(insn, msr, val, expected)			\
351 	__GUEST_ASSERT(val == expected,					\
352 		       "Expected " #insn "(0x%x) to yield 0x%lx, got 0x%lx",	\
353 		       msr, expected, val);
354 
guest_test_rdpmc(uint32_t rdpmc_idx,bool expect_success,uint64_t expected_val)355 static void guest_test_rdpmc(uint32_t rdpmc_idx, bool expect_success,
356 			     uint64_t expected_val)
357 {
358 	uint8_t vector;
359 	uint64_t val;
360 
361 	vector = rdpmc_safe(rdpmc_idx, &val);
362 	GUEST_ASSERT_PMC_MSR_ACCESS(RDPMC, rdpmc_idx, !expect_success, vector);
363 	if (expect_success)
364 		GUEST_ASSERT_PMC_VALUE(RDPMC, rdpmc_idx, val, expected_val);
365 
366 	if (!is_forced_emulation_enabled)
367 		return;
368 
369 	vector = rdpmc_safe_fep(rdpmc_idx, &val);
370 	GUEST_ASSERT_PMC_MSR_ACCESS(RDPMC, rdpmc_idx, !expect_success, vector);
371 	if (expect_success)
372 		GUEST_ASSERT_PMC_VALUE(RDPMC, rdpmc_idx, val, expected_val);
373 }
374 
guest_rd_wr_counters(uint32_t base_msr,uint8_t nr_possible_counters,uint8_t nr_counters,uint32_t or_mask)375 static void guest_rd_wr_counters(uint32_t base_msr, uint8_t nr_possible_counters,
376 				 uint8_t nr_counters, uint32_t or_mask)
377 {
378 	const bool pmu_has_fast_mode = !guest_get_pmu_version();
379 	uint8_t i;
380 
381 	for (i = 0; i < nr_possible_counters; i++) {
382 		/*
383 		 * TODO: Test a value that validates full-width writes and the
384 		 * width of the counters.
385 		 */
386 		const uint64_t test_val = 0xffff;
387 		const uint32_t msr = base_msr + i;
388 
389 		/*
390 		 * Fixed counters are supported if the counter is less than the
391 		 * number of enumerated contiguous counters *or* the counter is
392 		 * explicitly enumerated in the supported counters mask.
393 		 */
394 		const bool expect_success = i < nr_counters || (or_mask & BIT(i));
395 
396 		/*
397 		 * KVM drops writes to MSR_P6_PERFCTR[0|1] if the counters are
398 		 * unsupported, i.e. doesn't #GP and reads back '0'.
399 		 */
400 		const uint64_t expected_val = expect_success ? test_val : 0;
401 		const bool expect_gp = !expect_success && msr != MSR_P6_PERFCTR0 &&
402 				       msr != MSR_P6_PERFCTR1;
403 		uint32_t rdpmc_idx;
404 		uint8_t vector;
405 		uint64_t val;
406 
407 		vector = wrmsr_safe(msr, test_val);
408 		GUEST_ASSERT_PMC_MSR_ACCESS(WRMSR, msr, expect_gp, vector);
409 
410 		vector = rdmsr_safe(msr, &val);
411 		GUEST_ASSERT_PMC_MSR_ACCESS(RDMSR, msr, expect_gp, vector);
412 
413 		/* On #GP, the result of RDMSR is undefined. */
414 		if (!expect_gp)
415 			GUEST_ASSERT_PMC_VALUE(RDMSR, msr, val, expected_val);
416 
417 		/*
418 		 * Redo the read tests with RDPMC, which has different indexing
419 		 * semantics and additional capabilities.
420 		 */
421 		rdpmc_idx = i;
422 		if (base_msr == MSR_CORE_PERF_FIXED_CTR0)
423 			rdpmc_idx |= INTEL_RDPMC_FIXED;
424 
425 		guest_test_rdpmc(rdpmc_idx, expect_success, expected_val);
426 
427 		/*
428 		 * KVM doesn't support non-architectural PMUs, i.e. it should
429 		 * impossible to have fast mode RDPMC.  Verify that attempting
430 		 * to use fast RDPMC always #GPs.
431 		 */
432 		GUEST_ASSERT(!expect_success || !pmu_has_fast_mode);
433 		rdpmc_idx |= INTEL_RDPMC_FAST;
434 		guest_test_rdpmc(rdpmc_idx, false, -1ull);
435 
436 		vector = wrmsr_safe(msr, 0);
437 		GUEST_ASSERT_PMC_MSR_ACCESS(WRMSR, msr, expect_gp, vector);
438 	}
439 }
440 
guest_test_gp_counters(void)441 static void guest_test_gp_counters(void)
442 {
443 	uint8_t pmu_version = guest_get_pmu_version();
444 	uint8_t nr_gp_counters = 0;
445 	uint32_t base_msr;
446 
447 	if (pmu_version)
448 		nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
449 
450 	/*
451 	 * For v2+ PMUs, PERF_GLOBAL_CTRL's architectural post-RESET value is
452 	 * "Sets bits n-1:0 and clears the upper bits", where 'n' is the number
453 	 * of GP counters.  If there are no GP counters, require KVM to leave
454 	 * PERF_GLOBAL_CTRL '0'.  This edge case isn't covered by the SDM, but
455 	 * follow the spirit of the architecture and only globally enable GP
456 	 * counters, of which there are none.
457 	 */
458 	if (pmu_version > 1) {
459 		uint64_t global_ctrl = rdmsr(MSR_CORE_PERF_GLOBAL_CTRL);
460 
461 		if (nr_gp_counters)
462 			GUEST_ASSERT_EQ(global_ctrl, GENMASK_ULL(nr_gp_counters - 1, 0));
463 		else
464 			GUEST_ASSERT_EQ(global_ctrl, 0);
465 	}
466 
467 	if (this_cpu_has(X86_FEATURE_PDCM) &&
468 	    rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES)
469 		base_msr = MSR_IA32_PMC0;
470 	else
471 		base_msr = MSR_IA32_PERFCTR0;
472 
473 	guest_rd_wr_counters(base_msr, MAX_NR_GP_COUNTERS, nr_gp_counters, 0);
474 	GUEST_DONE();
475 }
476 
test_gp_counters(uint8_t pmu_version,uint64_t perf_capabilities,uint8_t nr_gp_counters)477 static void test_gp_counters(uint8_t pmu_version, uint64_t perf_capabilities,
478 			     uint8_t nr_gp_counters)
479 {
480 	struct kvm_vcpu *vcpu;
481 	struct kvm_vm *vm;
482 
483 	vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_gp_counters,
484 					 pmu_version, perf_capabilities);
485 
486 	vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_NR_GP_COUNTERS,
487 				nr_gp_counters);
488 
489 	run_vcpu(vcpu);
490 
491 	kvm_vm_free(vm);
492 }
493 
guest_test_fixed_counters(void)494 static void guest_test_fixed_counters(void)
495 {
496 	uint64_t supported_bitmask = 0;
497 	uint8_t nr_fixed_counters = 0;
498 	uint8_t i;
499 
500 	/* Fixed counters require Architectural vPMU Version 2+. */
501 	if (guest_get_pmu_version() >= 2)
502 		nr_fixed_counters = this_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
503 
504 	/*
505 	 * The supported bitmask for fixed counters was introduced in PMU
506 	 * version 5.
507 	 */
508 	if (guest_get_pmu_version() >= 5)
509 		supported_bitmask = this_cpu_property(X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK);
510 
511 	guest_rd_wr_counters(MSR_CORE_PERF_FIXED_CTR0, MAX_NR_FIXED_COUNTERS,
512 			     nr_fixed_counters, supported_bitmask);
513 
514 	for (i = 0; i < MAX_NR_FIXED_COUNTERS; i++) {
515 		uint8_t vector;
516 		uint64_t val;
517 
518 		if (i >= nr_fixed_counters && !(supported_bitmask & BIT_ULL(i))) {
519 			vector = wrmsr_safe(MSR_CORE_PERF_FIXED_CTR_CTRL,
520 					    FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL));
521 			__GUEST_ASSERT(vector == GP_VECTOR,
522 				       "Expected #GP for counter %u in FIXED_CTR_CTRL", i);
523 
524 			vector = wrmsr_safe(MSR_CORE_PERF_GLOBAL_CTRL,
525 					    FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
526 			__GUEST_ASSERT(vector == GP_VECTOR,
527 				       "Expected #GP for counter %u in PERF_GLOBAL_CTRL", i);
528 			continue;
529 		}
530 
531 		wrmsr(MSR_CORE_PERF_FIXED_CTR0 + i, 0);
532 		wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL));
533 		wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
534 		__asm__ __volatile__("loop ." : "+c"((int){NUM_LOOPS}));
535 		wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
536 		val = rdmsr(MSR_CORE_PERF_FIXED_CTR0 + i);
537 
538 		GUEST_ASSERT_NE(val, 0);
539 	}
540 	GUEST_DONE();
541 }
542 
test_fixed_counters(uint8_t pmu_version,uint64_t perf_capabilities,uint8_t nr_fixed_counters,uint32_t supported_bitmask)543 static void test_fixed_counters(uint8_t pmu_version, uint64_t perf_capabilities,
544 				uint8_t nr_fixed_counters,
545 				uint32_t supported_bitmask)
546 {
547 	struct kvm_vcpu *vcpu;
548 	struct kvm_vm *vm;
549 
550 	vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_fixed_counters,
551 					 pmu_version, perf_capabilities);
552 
553 	vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK,
554 				supported_bitmask);
555 	vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_NR_FIXED_COUNTERS,
556 				nr_fixed_counters);
557 
558 	run_vcpu(vcpu);
559 
560 	kvm_vm_free(vm);
561 }
562 
test_intel_counters(void)563 static void test_intel_counters(void)
564 {
565 	uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
566 	uint8_t nr_gp_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
567 	uint8_t pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION);
568 	unsigned int i;
569 	uint8_t v, j;
570 	uint32_t k;
571 
572 	const uint64_t perf_caps[] = {
573 		0,
574 		PMU_CAP_FW_WRITES,
575 	};
576 
577 	/*
578 	 * Test up to PMU v5, which is the current maximum version defined by
579 	 * Intel, i.e. is the last version that is guaranteed to be backwards
580 	 * compatible with KVM's existing behavior.
581 	 */
582 	uint8_t max_pmu_version = max_t(typeof(pmu_version), pmu_version, 5);
583 
584 	/*
585 	 * Detect the existence of events that aren't supported by selftests.
586 	 * This will (obviously) fail any time hardware adds support for a new
587 	 * event, but it's worth paying that price to keep the test fresh.
588 	 */
589 	TEST_ASSERT(this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH) <= NR_INTEL_ARCH_EVENTS,
590 		    "New architectural event(s) detected; please update this test (length = %u, mask = %x)",
591 		    this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH),
592 		    this_cpu_property(X86_PROPERTY_PMU_EVENTS_MASK));
593 
594 	/*
595 	 * Iterate over known arch events irrespective of KVM/hardware support
596 	 * to verify that KVM doesn't reject programming of events just because
597 	 * the *architectural* encoding is unsupported.  Track which events are
598 	 * supported in hardware; the guest side will validate supported events
599 	 * count correctly, even if *enumeration* of the event is unsupported
600 	 * by KVM and/or isn't exposed to the guest.
601 	 */
602 	for (i = 0; i < NR_INTEL_ARCH_EVENTS; i++) {
603 		if (this_pmu_has(intel_event_to_feature(i).gp_event))
604 			hardware_pmu_arch_events |= BIT(i);
605 	}
606 
607 	for (v = 0; v <= max_pmu_version; v++) {
608 		for (i = 0; i < ARRAY_SIZE(perf_caps); i++) {
609 			if (!kvm_has_perf_caps && perf_caps[i])
610 				continue;
611 
612 			pr_info("Testing arch events, PMU version %u, perf_caps = %lx\n",
613 				v, perf_caps[i]);
614 			/*
615 			 * To keep the total runtime reasonable, test every
616 			 * possible non-zero, non-reserved bitmap combination
617 			 * only with the native PMU version and the full bit
618 			 * vector length.
619 			 */
620 			if (v == pmu_version) {
621 				for (k = 1; k < (BIT(NR_INTEL_ARCH_EVENTS) - 1); k++)
622 					test_arch_events(v, perf_caps[i], NR_INTEL_ARCH_EVENTS, k);
623 			}
624 			/*
625 			 * Test single bits for all PMU version and lengths up
626 			 * the number of events +1 (to verify KVM doesn't do
627 			 * weird things if the guest length is greater than the
628 			 * host length).  Explicitly test a mask of '0' and all
629 			 * ones i.e. all events being available and unavailable.
630 			 */
631 			for (j = 0; j <= NR_INTEL_ARCH_EVENTS + 1; j++) {
632 				test_arch_events(v, perf_caps[i], j, 0);
633 				test_arch_events(v, perf_caps[i], j, 0xff);
634 
635 				for (k = 0; k < NR_INTEL_ARCH_EVENTS; k++)
636 					test_arch_events(v, perf_caps[i], j, BIT(k));
637 			}
638 
639 			pr_info("Testing GP counters, PMU version %u, perf_caps = %lx\n",
640 				v, perf_caps[i]);
641 			for (j = 0; j <= nr_gp_counters; j++)
642 				test_gp_counters(v, perf_caps[i], j);
643 
644 			pr_info("Testing fixed counters, PMU version %u, perf_caps = %lx\n",
645 				v, perf_caps[i]);
646 			for (j = 0; j <= nr_fixed_counters; j++) {
647 				for (k = 0; k <= (BIT(nr_fixed_counters) - 1); k++)
648 					test_fixed_counters(v, perf_caps[i], j, k);
649 			}
650 		}
651 	}
652 }
653 
main(int argc,char * argv[])654 int main(int argc, char *argv[])
655 {
656 	TEST_REQUIRE(kvm_is_pmu_enabled());
657 
658 	TEST_REQUIRE(host_cpu_is_intel);
659 	TEST_REQUIRE(kvm_cpu_has_p(X86_PROPERTY_PMU_VERSION));
660 	TEST_REQUIRE(kvm_cpu_property(X86_PROPERTY_PMU_VERSION) > 0);
661 
662 	kvm_pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION);
663 	kvm_has_perf_caps = kvm_cpu_has(X86_FEATURE_PDCM);
664 
665 	test_intel_counters();
666 
667 	return 0;
668 }
669