xref: /kvm-unit-tests/x86/pmu.c (revision f4e97f59869be45a0f6d906309b3ca234bf1649a)
1 
2 #include "x86/msr.h"
3 #include "x86/processor.h"
4 #include "x86/pmu.h"
5 #include "x86/apic-defs.h"
6 #include "x86/apic.h"
7 #include "x86/desc.h"
8 #include "x86/isr.h"
9 #include "vmalloc.h"
10 #include "alloc.h"
11 
12 #include "libcflat.h"
13 #include <stdint.h>
14 
15 #define N 1000000
16 
17 // These values match the number of instructions and branches in the
18 // assembly block in check_emulated_instr().
19 #define EXPECTED_INSTR 17
20 #define EXPECTED_BRNCH 5
21 
22 typedef struct {
23 	uint32_t ctr;
24 	uint32_t idx;
25 	uint64_t config;
26 	uint64_t count;
27 } pmu_counter_t;
28 
29 struct pmu_event {
30 	const char *name;
31 	uint32_t unit_sel;
32 	int min;
33 	int max;
34 } intel_gp_events[] = {
35 	{"core cycles", 0x003c, 1*N, 50*N},
36 	{"instructions", 0x00c0, 10*N, 10.2*N},
37 	{"ref cycles", 0x013c, 1*N, 30*N},
38 	{"llc references", 0x4f2e, 1, 2*N},
39 	{"llc misses", 0x412e, 1, 1*N},
40 	{"branches", 0x00c4, 1*N, 1.1*N},
41 	{"branch misses", 0x00c5, 0, 0.1*N},
42 }, amd_gp_events[] = {
43 	{"core cycles", 0x0076, 1*N, 50*N},
44 	{"instructions", 0x00c0, 10*N, 10.2*N},
45 	{"branches", 0x00c2, 1*N, 1.1*N},
46 	{"branch misses", 0x00c3, 0, 0.1*N},
47 }, fixed_events[] = {
48 	{"fixed 0", MSR_CORE_PERF_FIXED_CTR0, 10*N, 10.2*N},
49 	{"fixed 1", MSR_CORE_PERF_FIXED_CTR0 + 1, 1*N, 30*N},
50 	{"fixed 2", MSR_CORE_PERF_FIXED_CTR0 + 2, 0.1*N, 30*N}
51 };
52 
53 /*
54  * Events index in intel_gp_events[], ensure consistent with
55  * intel_gp_events[].
56  */
57 enum {
58 	INTEL_BRANCHES_IDX	= 5,
59 };
60 
61 /*
62  * Events index in amd_gp_events[], ensure consistent with
63  * amd_gp_events[].
64  */
65 enum {
66 	AMD_BRANCHES_IDX	= 2,
67 };
68 
69 char *buf;
70 
71 static struct pmu_event *gp_events;
72 static unsigned int gp_events_size;
73 static unsigned int fixed_counters_num;
74 
75 static inline void loop(void)
76 {
77 	unsigned long tmp, tmp2, tmp3;
78 
79 	asm volatile("1: mov (%1), %2; add $64, %1; nop; nop; nop; nop; nop; nop; nop; loop 1b"
80 			: "=c"(tmp), "=r"(tmp2), "=r"(tmp3): "0"(N), "1"(buf));
81 
82 }
83 
84 volatile uint64_t irq_received;
85 
86 static void cnt_overflow(isr_regs_t *regs)
87 {
88 	irq_received++;
89 	apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
90 	apic_write(APIC_EOI, 0);
91 }
92 
93 static bool check_irq(void)
94 {
95 	int i;
96 	irq_received = 0;
97 	sti();
98 	for (i = 0; i < 100000 && !irq_received; i++)
99 		asm volatile("pause");
100 	cli();
101 	return irq_received;
102 }
103 
104 static bool is_gp(pmu_counter_t *evt)
105 {
106 	if (!pmu.is_intel)
107 		return true;
108 
109 	return evt->ctr < MSR_CORE_PERF_FIXED_CTR0 ||
110 		evt->ctr >= MSR_IA32_PMC0;
111 }
112 
113 static int event_to_global_idx(pmu_counter_t *cnt)
114 {
115 	if (pmu.is_intel)
116 		return cnt->ctr - (is_gp(cnt) ? pmu.msr_gp_counter_base :
117 			(MSR_CORE_PERF_FIXED_CTR0 - FIXED_CNT_INDEX));
118 
119 	if (pmu.msr_gp_counter_base == MSR_F15H_PERF_CTR0)
120 		return (cnt->ctr - pmu.msr_gp_counter_base) / 2;
121 	else
122 		return cnt->ctr - pmu.msr_gp_counter_base;
123 }
124 
125 static struct pmu_event* get_counter_event(pmu_counter_t *cnt)
126 {
127 	if (is_gp(cnt)) {
128 		int i;
129 
130 		for (i = 0; i < gp_events_size; i++)
131 			if (gp_events[i].unit_sel == (cnt->config & 0xffff))
132 				return &gp_events[i];
133 	} else {
134 		unsigned int idx = cnt->ctr - MSR_CORE_PERF_FIXED_CTR0;
135 
136 		if (idx < ARRAY_SIZE(fixed_events))
137 			return &fixed_events[idx];
138 	}
139 
140 	return (void*)0;
141 }
142 
143 static void global_enable(pmu_counter_t *cnt)
144 {
145 	if (!this_cpu_has_perf_global_ctrl())
146 		return;
147 
148 	cnt->idx = event_to_global_idx(cnt);
149 	wrmsr(pmu.msr_global_ctl, rdmsr(pmu.msr_global_ctl) | BIT_ULL(cnt->idx));
150 }
151 
152 static void global_disable(pmu_counter_t *cnt)
153 {
154 	if (!this_cpu_has_perf_global_ctrl())
155 		return;
156 
157 	wrmsr(pmu.msr_global_ctl, rdmsr(pmu.msr_global_ctl) & ~BIT_ULL(cnt->idx));
158 }
159 
160 static void __start_event(pmu_counter_t *evt, uint64_t count)
161 {
162     evt->count = count;
163     wrmsr(evt->ctr, evt->count);
164     if (is_gp(evt)) {
165 	    wrmsr(MSR_GP_EVENT_SELECTx(event_to_global_idx(evt)),
166 		  evt->config | EVNTSEL_EN);
167     } else {
168 	    uint32_t ctrl = rdmsr(MSR_CORE_PERF_FIXED_CTR_CTRL);
169 	    int shift = (evt->ctr - MSR_CORE_PERF_FIXED_CTR0) * 4;
170 	    uint32_t usrospmi = 0;
171 
172 	    if (evt->config & EVNTSEL_OS)
173 		    usrospmi |= (1 << 0);
174 	    if (evt->config & EVNTSEL_USR)
175 		    usrospmi |= (1 << 1);
176 	    if (evt->config & EVNTSEL_INT)
177 		    usrospmi |= (1 << 3); // PMI on overflow
178 	    ctrl = (ctrl & ~(0xf << shift)) | (usrospmi << shift);
179 	    wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, ctrl);
180     }
181     global_enable(evt);
182     apic_write(APIC_LVTPC, PMI_VECTOR);
183 }
184 
185 static void start_event(pmu_counter_t *evt)
186 {
187 	__start_event(evt, 0);
188 }
189 
190 static void stop_event(pmu_counter_t *evt)
191 {
192 	global_disable(evt);
193 	if (is_gp(evt)) {
194 		wrmsr(MSR_GP_EVENT_SELECTx(event_to_global_idx(evt)),
195 		      evt->config & ~EVNTSEL_EN);
196 	} else {
197 		uint32_t ctrl = rdmsr(MSR_CORE_PERF_FIXED_CTR_CTRL);
198 		int shift = (evt->ctr - MSR_CORE_PERF_FIXED_CTR0) * 4;
199 		wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, ctrl & ~(0xf << shift));
200 	}
201 	evt->count = rdmsr(evt->ctr);
202 }
203 
204 static noinline void measure_many(pmu_counter_t *evt, int count)
205 {
206 	int i;
207 	for (i = 0; i < count; i++)
208 		start_event(&evt[i]);
209 	loop();
210 	for (i = 0; i < count; i++)
211 		stop_event(&evt[i]);
212 }
213 
214 static void measure_one(pmu_counter_t *evt)
215 {
216 	measure_many(evt, 1);
217 }
218 
219 static noinline void __measure(pmu_counter_t *evt, uint64_t count)
220 {
221 	__start_event(evt, count);
222 	loop();
223 	stop_event(evt);
224 }
225 
226 static bool verify_event(uint64_t count, struct pmu_event *e)
227 {
228 	bool pass;
229 
230 	if (!e)
231 		return false;
232 
233 	pass = count >= e->min && count <= e->max;
234 	if (!pass)
235 		printf("FAIL: %d <= %"PRId64" <= %d\n", e->min, count, e->max);
236 
237 	return pass;
238 }
239 
240 static bool verify_counter(pmu_counter_t *cnt)
241 {
242 	return verify_event(cnt->count, get_counter_event(cnt));
243 }
244 
245 static void check_gp_counter(struct pmu_event *evt)
246 {
247 	pmu_counter_t cnt = {
248 		.config = EVNTSEL_OS | EVNTSEL_USR | evt->unit_sel,
249 	};
250 	int i;
251 
252 	for (i = 0; i < pmu.nr_gp_counters; i++) {
253 		cnt.ctr = MSR_GP_COUNTERx(i);
254 		measure_one(&cnt);
255 		report(verify_event(cnt.count, evt), "%s-%d", evt->name, i);
256 	}
257 }
258 
259 static void check_gp_counters(void)
260 {
261 	int i;
262 
263 	for (i = 0; i < gp_events_size; i++)
264 		if (pmu_gp_counter_is_available(i))
265 			check_gp_counter(&gp_events[i]);
266 		else
267 			printf("GP event '%s' is disabled\n",
268 					gp_events[i].name);
269 }
270 
271 static void check_fixed_counters(void)
272 {
273 	pmu_counter_t cnt = {
274 		.config = EVNTSEL_OS | EVNTSEL_USR,
275 	};
276 	int i;
277 
278 	for (i = 0; i < fixed_counters_num; i++) {
279 		cnt.ctr = fixed_events[i].unit_sel;
280 		measure_one(&cnt);
281 		report(verify_event(cnt.count, &fixed_events[i]), "fixed-%d", i);
282 	}
283 }
284 
285 static void check_counters_many(void)
286 {
287 	pmu_counter_t cnt[48];
288 	int i, n;
289 
290 	for (i = 0, n = 0; n < pmu.nr_gp_counters; i++) {
291 		if (!pmu_gp_counter_is_available(i))
292 			continue;
293 
294 		cnt[n].ctr = MSR_GP_COUNTERx(n);
295 		cnt[n].config = EVNTSEL_OS | EVNTSEL_USR |
296 			gp_events[i % gp_events_size].unit_sel;
297 		n++;
298 	}
299 	for (i = 0; i < fixed_counters_num; i++) {
300 		cnt[n].ctr = fixed_events[i].unit_sel;
301 		cnt[n].config = EVNTSEL_OS | EVNTSEL_USR;
302 		n++;
303 	}
304 
305 	assert(n <= ARRAY_SIZE(cnt));
306 	measure_many(cnt, n);
307 
308 	for (i = 0; i < n; i++)
309 		if (!verify_counter(&cnt[i]))
310 			break;
311 
312 	report(i == n, "all counters");
313 }
314 
315 static uint64_t measure_for_overflow(pmu_counter_t *cnt)
316 {
317 	__measure(cnt, 0);
318 	/*
319 	 * To generate overflow, i.e. roll over to '0', the initial count just
320 	 * needs to be preset to the negative expected count.  However, as per
321 	 * Intel's SDM, the preset count needs to be incremented by 1 to ensure
322 	 * the overflow interrupt is generated immediately instead of possibly
323 	 * waiting for the overflow to propagate through the counter.
324 	 */
325 	assert(cnt->count > 1);
326 	return 1 - cnt->count;
327 }
328 
329 static void check_counter_overflow(void)
330 {
331 	uint64_t overflow_preset;
332 	int i;
333 	pmu_counter_t cnt = {
334 		.ctr = MSR_GP_COUNTERx(0),
335 		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel /* instructions */,
336 	};
337 	overflow_preset = measure_for_overflow(&cnt);
338 
339 	/* clear status before test */
340 	if (this_cpu_has_perf_global_status())
341 		pmu_clear_global_status();
342 
343 	report_prefix_push("overflow");
344 
345 	for (i = 0; i < pmu.nr_gp_counters + 1; i++) {
346 		uint64_t status;
347 		int idx;
348 
349 		cnt.count = overflow_preset;
350 		if (pmu_use_full_writes())
351 			cnt.count &= (1ull << pmu.gp_counter_width) - 1;
352 
353 		if (i == pmu.nr_gp_counters) {
354 			if (!pmu.is_intel)
355 				break;
356 
357 			cnt.ctr = fixed_events[0].unit_sel;
358 			cnt.count = measure_for_overflow(&cnt);
359 			cnt.count &= (1ull << pmu.gp_counter_width) - 1;
360 		} else {
361 			cnt.ctr = MSR_GP_COUNTERx(i);
362 		}
363 
364 		if (i % 2)
365 			cnt.config |= EVNTSEL_INT;
366 		else
367 			cnt.config &= ~EVNTSEL_INT;
368 		idx = event_to_global_idx(&cnt);
369 		__measure(&cnt, cnt.count);
370 		if (pmu.is_intel)
371 			report(cnt.count == 1, "cntr-%d", i);
372 		else
373 			report(cnt.count == 0xffffffffffff || cnt.count < 7, "cntr-%d", i);
374 
375 		if (!this_cpu_has_perf_global_status())
376 			continue;
377 
378 		status = rdmsr(pmu.msr_global_status);
379 		report(status & (1ull << idx), "status-%d", i);
380 		wrmsr(pmu.msr_global_status_clr, status);
381 		status = rdmsr(pmu.msr_global_status);
382 		report(!(status & (1ull << idx)), "status clear-%d", i);
383 		report(check_irq() == (i % 2), "irq-%d", i);
384 	}
385 
386 	report_prefix_pop();
387 }
388 
389 static void check_gp_counter_cmask(void)
390 {
391 	pmu_counter_t cnt = {
392 		.ctr = MSR_GP_COUNTERx(0),
393 		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel /* instructions */,
394 	};
395 	cnt.config |= (0x2 << EVNTSEL_CMASK_SHIFT);
396 	measure_one(&cnt);
397 	report(cnt.count < gp_events[1].min, "cmask");
398 }
399 
400 static void do_rdpmc_fast(void *ptr)
401 {
402 	pmu_counter_t *cnt = ptr;
403 	uint32_t idx = (uint32_t)cnt->idx | (1u << 31);
404 
405 	if (!is_gp(cnt))
406 		idx |= 1 << 30;
407 
408 	cnt->count = rdpmc(idx);
409 }
410 
411 
412 static void check_rdpmc(void)
413 {
414 	uint64_t val = 0xff0123456789ull;
415 	bool exc;
416 	int i;
417 
418 	report_prefix_push("rdpmc");
419 
420 	for (i = 0; i < pmu.nr_gp_counters; i++) {
421 		uint64_t x;
422 		pmu_counter_t cnt = {
423 			.ctr = MSR_GP_COUNTERx(i),
424 			.idx = i
425 		};
426 
427 	        /*
428 	         * Without full-width writes, only the low 32 bits are writable,
429 	         * and the value is sign-extended.
430 	         */
431 		if (pmu.msr_gp_counter_base == MSR_IA32_PERFCTR0)
432 			x = (uint64_t)(int64_t)(int32_t)val;
433 		else
434 			x = (uint64_t)(int64_t)val;
435 
436 		/* Mask according to the number of supported bits */
437 		x &= (1ull << pmu.gp_counter_width) - 1;
438 
439 		wrmsr(MSR_GP_COUNTERx(i), val);
440 		report(rdpmc(i) == x, "cntr-%d", i);
441 
442 		exc = test_for_exception(GP_VECTOR, do_rdpmc_fast, &cnt);
443 		if (exc)
444 			report_skip("fast-%d", i);
445 		else
446 			report(cnt.count == (u32)val, "fast-%d", i);
447 	}
448 	for (i = 0; i < fixed_counters_num; i++) {
449 		uint64_t x = val & ((1ull << pmu.fixed_counter_width) - 1);
450 		pmu_counter_t cnt = {
451 			.ctr = MSR_CORE_PERF_FIXED_CTR0 + i,
452 			.idx = i
453 		};
454 
455 		wrmsr(MSR_PERF_FIXED_CTRx(i), x);
456 		report(rdpmc(i | (1 << 30)) == x, "fixed cntr-%d", i);
457 
458 		exc = test_for_exception(GP_VECTOR, do_rdpmc_fast, &cnt);
459 		if (exc)
460 			report_skip("fixed fast-%d", i);
461 		else
462 			report(cnt.count == (u32)x, "fixed fast-%d", i);
463 	}
464 
465 	report_prefix_pop();
466 }
467 
468 static void check_running_counter_wrmsr(void)
469 {
470 	uint64_t status;
471 	uint64_t count;
472 	pmu_counter_t evt = {
473 		.ctr = MSR_GP_COUNTERx(0),
474 		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel,
475 	};
476 
477 	report_prefix_push("running counter wrmsr");
478 
479 	start_event(&evt);
480 	loop();
481 	wrmsr(MSR_GP_COUNTERx(0), 0);
482 	stop_event(&evt);
483 	report(evt.count < gp_events[1].min, "cntr");
484 
485 	/* clear status before overflow test */
486 	if (this_cpu_has_perf_global_status())
487 		pmu_clear_global_status();
488 
489 	start_event(&evt);
490 
491 	count = -1;
492 	if (pmu_use_full_writes())
493 		count &= (1ull << pmu.gp_counter_width) - 1;
494 
495 	wrmsr(MSR_GP_COUNTERx(0), count);
496 
497 	loop();
498 	stop_event(&evt);
499 
500 	if (this_cpu_has_perf_global_status()) {
501 		status = rdmsr(pmu.msr_global_status);
502 		report(status & 1, "status msr bit");
503 	}
504 
505 	report_prefix_pop();
506 }
507 
508 static void check_emulated_instr(void)
509 {
510 	uint64_t status, instr_start, brnch_start;
511 	uint64_t gp_counter_width = (1ull << pmu.gp_counter_width) - 1;
512 	unsigned int branch_idx = pmu.is_intel ?
513 				  INTEL_BRANCHES_IDX : AMD_BRANCHES_IDX;
514 	pmu_counter_t brnch_cnt = {
515 		.ctr = MSR_GP_COUNTERx(0),
516 		/* branch instructions */
517 		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[branch_idx].unit_sel,
518 	};
519 	pmu_counter_t instr_cnt = {
520 		.ctr = MSR_GP_COUNTERx(1),
521 		/* instructions */
522 		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel,
523 	};
524 	report_prefix_push("emulated instruction");
525 
526 	if (this_cpu_has_perf_global_status())
527 		pmu_clear_global_status();
528 
529 	start_event(&brnch_cnt);
530 	start_event(&instr_cnt);
531 
532 	brnch_start = -EXPECTED_BRNCH;
533 	instr_start = -EXPECTED_INSTR;
534 	wrmsr(MSR_GP_COUNTERx(0), brnch_start & gp_counter_width);
535 	wrmsr(MSR_GP_COUNTERx(1), instr_start & gp_counter_width);
536 	// KVM_FEP is a magic prefix that forces emulation so
537 	// 'KVM_FEP "jne label\n"' just counts as a single instruction.
538 	asm volatile(
539 		"mov $0x0, %%eax\n"
540 		"cmp $0x0, %%eax\n"
541 		KVM_FEP "jne label\n"
542 		KVM_FEP "jne label\n"
543 		KVM_FEP "jne label\n"
544 		KVM_FEP "jne label\n"
545 		KVM_FEP "jne label\n"
546 		"mov $0xa, %%eax\n"
547 		"cpuid\n"
548 		"mov $0xa, %%eax\n"
549 		"cpuid\n"
550 		"mov $0xa, %%eax\n"
551 		"cpuid\n"
552 		"mov $0xa, %%eax\n"
553 		"cpuid\n"
554 		"mov $0xa, %%eax\n"
555 		"cpuid\n"
556 		"label:\n"
557 		:
558 		:
559 		: "eax", "ebx", "ecx", "edx");
560 
561 	if (this_cpu_has_perf_global_ctrl())
562 		wrmsr(pmu.msr_global_ctl, 0);
563 
564 	stop_event(&brnch_cnt);
565 	stop_event(&instr_cnt);
566 
567 	// Check that the end count - start count is at least the expected
568 	// number of instructions and branches.
569 	report(instr_cnt.count - instr_start >= EXPECTED_INSTR,
570 	       "instruction count");
571 	report(brnch_cnt.count - brnch_start >= EXPECTED_BRNCH,
572 	       "branch count");
573 	if (this_cpu_has_perf_global_status()) {
574 		// Additionally check that those counters overflowed properly.
575 		status = rdmsr(pmu.msr_global_status);
576 		report(status & 1, "branch counter overflow");
577 		report(status & 2, "instruction counter overflow");
578 	}
579 
580 	report_prefix_pop();
581 }
582 
583 #define XBEGIN_STARTED (~0u)
584 static void check_tsx_cycles(void)
585 {
586 	pmu_counter_t cnt;
587 	unsigned int i, ret = 0;
588 
589 	if (!this_cpu_has(X86_FEATURE_RTM))
590 		return;
591 
592 	report_prefix_push("TSX cycles");
593 
594 	for (i = 0; i < pmu.nr_gp_counters; i++) {
595 		cnt.ctr = MSR_GP_COUNTERx(i);
596 
597 		if (i == 2) {
598 			/* Transactional cycles committed only on gp counter 2 */
599 			cnt.config = EVNTSEL_OS | EVNTSEL_USR | 0x30000003c;
600 		} else {
601 			/* Transactional cycles */
602 			cnt.config = EVNTSEL_OS | EVNTSEL_USR | 0x10000003c;
603 		}
604 
605 		start_event(&cnt);
606 
607 		asm volatile("xbegin 1f\n\t"
608 				"1:\n\t"
609 				: "+a" (ret) :: "memory");
610 
611 		/* Generate a non-canonical #GP to trigger ABORT. */
612 		if (ret == XBEGIN_STARTED)
613 			*(int *)NONCANONICAL = 0;
614 
615 		stop_event(&cnt);
616 
617 		report(cnt.count > 0, "gp cntr-%d with a value of %" PRId64 "", i, cnt.count);
618 	}
619 
620 	report_prefix_pop();
621 }
622 
623 static void warm_up(void)
624 {
625 	int i;
626 
627 	/*
628 	 * Since cycles event is always run as the first event, there would be
629 	 * a warm-up state to warm up the cache, it leads to the measured cycles
630 	 * value may exceed the pre-defined cycles upper boundary and cause
631 	 * false positive. To avoid this, introduce an warm-up state before
632 	 * the real verification.
633 	 */
634 	for (i = 0; i < 10; i++)
635 		loop();
636 }
637 
638 static void check_counters(void)
639 {
640 	if (is_fep_available())
641 		check_emulated_instr();
642 
643 	warm_up();
644 	check_gp_counters();
645 	check_fixed_counters();
646 	check_rdpmc();
647 	check_counters_many();
648 	check_counter_overflow();
649 	check_gp_counter_cmask();
650 	check_running_counter_wrmsr();
651 	check_tsx_cycles();
652 }
653 
654 static void do_unsupported_width_counter_write(void *index)
655 {
656 	wrmsr(MSR_IA32_PMC0 + *((int *) index), 0xffffff0123456789ull);
657 }
658 
659 static void check_gp_counters_write_width(void)
660 {
661 	u64 val_64 = 0xffffff0123456789ull;
662 	u64 val_32 = val_64 & ((1ull << 32) - 1);
663 	u64 val_max_width = val_64 & ((1ull << pmu.gp_counter_width) - 1);
664 	int i;
665 
666 	/*
667 	 * MSR_IA32_PERFCTRn supports 64-bit writes,
668 	 * but only the lowest 32 bits are valid.
669 	 */
670 	for (i = 0; i < pmu.nr_gp_counters; i++) {
671 		wrmsr(MSR_IA32_PERFCTR0 + i, val_32);
672 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32);
673 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_32);
674 
675 		wrmsr(MSR_IA32_PERFCTR0 + i, val_max_width);
676 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32);
677 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_32);
678 
679 		wrmsr(MSR_IA32_PERFCTR0 + i, val_64);
680 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32);
681 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_32);
682 	}
683 
684 	/*
685 	 * MSR_IA32_PMCn supports writing values up to GP counter width,
686 	 * and only the lowest bits of GP counter width are valid.
687 	 */
688 	for (i = 0; i < pmu.nr_gp_counters; i++) {
689 		wrmsr(MSR_IA32_PMC0 + i, val_32);
690 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_32);
691 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32);
692 
693 		wrmsr(MSR_IA32_PMC0 + i, val_max_width);
694 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_max_width);
695 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_max_width);
696 
697 		report(test_for_exception(GP_VECTOR,
698 			do_unsupported_width_counter_write, &i),
699 		"writing unsupported width to MSR_IA32_PMC%d raises #GP", i);
700 	}
701 }
702 
703 /*
704  * Per the SDM, reference cycles are currently implemented using the
705  * core crystal clock, TSC, or bus clock. Calibrate to the TSC
706  * frequency to set reasonable expectations.
707  */
708 static void set_ref_cycle_expectations(void)
709 {
710 	pmu_counter_t cnt = {
711 		.ctr = MSR_IA32_PERFCTR0,
712 		.config = EVNTSEL_OS | EVNTSEL_USR | intel_gp_events[2].unit_sel,
713 	};
714 	uint64_t tsc_delta;
715 	uint64_t t0, t1, t2, t3;
716 
717 	/* Bit 2 enumerates the availability of reference cycles events. */
718 	if (!pmu.nr_gp_counters || !pmu_gp_counter_is_available(2))
719 		return;
720 
721 	if (this_cpu_has_perf_global_ctrl())
722 		wrmsr(pmu.msr_global_ctl, 0);
723 
724 	t0 = fenced_rdtsc();
725 	start_event(&cnt);
726 	t1 = fenced_rdtsc();
727 
728 	/*
729 	 * This loop has to run long enough to dominate the VM-exit
730 	 * costs for playing with the PMU MSRs on start and stop.
731 	 *
732 	 * On a 2.6GHz Ice Lake, with the TSC frequency at 104 times
733 	 * the core crystal clock, this function calculated a guest
734 	 * TSC : ref cycles ratio of around 105 with ECX initialized
735 	 * to one billion.
736 	 */
737 	asm volatile("loop ." : "+c"((int){1000000000ull}));
738 
739 	t2 = fenced_rdtsc();
740 	stop_event(&cnt);
741 	t3 = fenced_rdtsc();
742 
743 	tsc_delta = ((t2 - t1) + (t3 - t0)) / 2;
744 
745 	if (!tsc_delta)
746 		return;
747 
748 	intel_gp_events[2].min = (intel_gp_events[2].min * cnt.count) / tsc_delta;
749 	intel_gp_events[2].max = (intel_gp_events[2].max * cnt.count) / tsc_delta;
750 }
751 
752 static void check_invalid_rdpmc_gp(void)
753 {
754 	uint64_t val;
755 
756 	report(rdpmc_safe(64, &val) == GP_VECTOR,
757 	       "Expected #GP on RDPMC(64)");
758 }
759 
760 int main(int ac, char **av)
761 {
762 	setup_vm();
763 	handle_irq(PMI_VECTOR, cnt_overflow);
764 	buf = malloc(N*64);
765 
766 	check_invalid_rdpmc_gp();
767 
768 	if (pmu.is_intel) {
769 		if (!pmu.version) {
770 			report_skip("No Intel Arch PMU is detected!");
771 			return report_summary();
772 		}
773 		gp_events = (struct pmu_event *)intel_gp_events;
774 		gp_events_size = sizeof(intel_gp_events)/sizeof(intel_gp_events[0]);
775 		report_prefix_push("Intel");
776 		set_ref_cycle_expectations();
777 	} else {
778 		gp_events_size = sizeof(amd_gp_events)/sizeof(amd_gp_events[0]);
779 		gp_events = (struct pmu_event *)amd_gp_events;
780 		report_prefix_push("AMD");
781 	}
782 
783 	printf("PMU version:         %d\n", pmu.version);
784 	printf("GP counters:         %d\n", pmu.nr_gp_counters);
785 	printf("GP counter width:    %d\n", pmu.gp_counter_width);
786 	printf("Mask length:         %d\n", pmu.gp_counter_mask_length);
787 	printf("Fixed counters:      %d\n", pmu.nr_fixed_counters);
788 	printf("Fixed counter width: %d\n", pmu.fixed_counter_width);
789 
790 	fixed_counters_num = MIN(pmu.nr_fixed_counters, ARRAY_SIZE(fixed_events));
791 	if (pmu.nr_fixed_counters > ARRAY_SIZE(fixed_events))
792 		report_info("Fixed counters number %d > defined fixed events %u.  "
793 			    "Please update test case.", pmu.nr_fixed_counters,
794 			    (uint32_t)ARRAY_SIZE(fixed_events));
795 
796 	apic_write(APIC_LVTPC, PMI_VECTOR);
797 
798 	check_counters();
799 
800 	if (pmu_has_full_writes()) {
801 		pmu.msr_gp_counter_base = MSR_IA32_PMC0;
802 
803 		report_prefix_push("full-width writes");
804 		check_counters();
805 		check_gp_counters_write_width();
806 		report_prefix_pop();
807 	}
808 
809 	if (!pmu.is_intel) {
810 		report_prefix_push("K7");
811 		pmu.nr_gp_counters = AMD64_NUM_COUNTERS;
812 		pmu.msr_gp_counter_base = MSR_K7_PERFCTR0;
813 		pmu.msr_gp_event_select_base = MSR_K7_EVNTSEL0;
814 		check_counters();
815 		report_prefix_pop();
816 	}
817 
818 	return report_summary();
819 }
820