xref: /kvm-unit-tests/x86/pmu.c (revision 846737f068d95d5d4652a8bc17332cdfd1e8d74b)
1 
2 #include "x86/msr.h"
3 #include "x86/processor.h"
4 #include "x86/pmu.h"
5 #include "x86/apic-defs.h"
6 #include "x86/apic.h"
7 #include "x86/desc.h"
8 #include "x86/isr.h"
9 #include "vmalloc.h"
10 #include "alloc.h"
11 
12 #include "libcflat.h"
13 #include <stdint.h>
14 
15 #define N 1000000
16 
17 // These values match the number of instructions and branches in the
18 // assembly block in check_emulated_instr().
19 #define EXPECTED_INSTR 17
20 #define EXPECTED_BRNCH 5
21 
22 typedef struct {
23 	uint32_t ctr;
24 	uint64_t config;
25 	uint64_t count;
26 	int idx;
27 } pmu_counter_t;
28 
29 struct pmu_event {
30 	const char *name;
31 	uint32_t unit_sel;
32 	int min;
33 	int max;
34 } intel_gp_events[] = {
35 	{"core cycles", 0x003c, 1*N, 50*N},
36 	{"instructions", 0x00c0, 10*N, 10.2*N},
37 	{"ref cycles", 0x013c, 1*N, 30*N},
38 	{"llc references", 0x4f2e, 1, 2*N},
39 	{"llc misses", 0x412e, 1, 1*N},
40 	{"branches", 0x00c4, 1*N, 1.1*N},
41 	{"branch misses", 0x00c5, 0, 0.1*N},
42 }, amd_gp_events[] = {
43 	{"core cycles", 0x0076, 1*N, 50*N},
44 	{"instructions", 0x00c0, 10*N, 10.2*N},
45 	{"branches", 0x00c2, 1*N, 1.1*N},
46 	{"branch misses", 0x00c3, 0, 0.1*N},
47 }, fixed_events[] = {
48 	{"fixed 1", MSR_CORE_PERF_FIXED_CTR0, 10*N, 10.2*N},
49 	{"fixed 2", MSR_CORE_PERF_FIXED_CTR0 + 1, 1*N, 30*N},
50 	{"fixed 3", MSR_CORE_PERF_FIXED_CTR0 + 2, 0.1*N, 30*N}
51 };
52 
53 char *buf;
54 
55 static struct pmu_event *gp_events;
56 static unsigned int gp_events_size;
57 
58 static inline void loop(void)
59 {
60 	unsigned long tmp, tmp2, tmp3;
61 
62 	asm volatile("1: mov (%1), %2; add $64, %1; nop; nop; nop; nop; nop; nop; nop; loop 1b"
63 			: "=c"(tmp), "=r"(tmp2), "=r"(tmp3): "0"(N), "1"(buf));
64 
65 }
66 
67 volatile uint64_t irq_received;
68 
69 static void cnt_overflow(isr_regs_t *regs)
70 {
71 	irq_received++;
72 	apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
73 	apic_write(APIC_EOI, 0);
74 }
75 
76 static bool check_irq(void)
77 {
78 	int i;
79 	irq_received = 0;
80 	sti();
81 	for (i = 0; i < 100000 && !irq_received; i++)
82 		asm volatile("pause");
83 	cli();
84 	return irq_received;
85 }
86 
87 static bool is_gp(pmu_counter_t *evt)
88 {
89 	if (!pmu.is_intel)
90 		return true;
91 
92 	return evt->ctr < MSR_CORE_PERF_FIXED_CTR0 ||
93 		evt->ctr >= MSR_IA32_PMC0;
94 }
95 
96 static int event_to_global_idx(pmu_counter_t *cnt)
97 {
98 	if (pmu.is_intel)
99 		return cnt->ctr - (is_gp(cnt) ? pmu.msr_gp_counter_base :
100 			(MSR_CORE_PERF_FIXED_CTR0 - FIXED_CNT_INDEX));
101 
102 	if (pmu.msr_gp_counter_base == MSR_F15H_PERF_CTR0)
103 		return (cnt->ctr - pmu.msr_gp_counter_base) / 2;
104 	else
105 		return cnt->ctr - pmu.msr_gp_counter_base;
106 }
107 
108 static struct pmu_event* get_counter_event(pmu_counter_t *cnt)
109 {
110 	if (is_gp(cnt)) {
111 		int i;
112 
113 		for (i = 0; i < gp_events_size; i++)
114 			if (gp_events[i].unit_sel == (cnt->config & 0xffff))
115 				return &gp_events[i];
116 	} else
117 		return &fixed_events[cnt->ctr - MSR_CORE_PERF_FIXED_CTR0];
118 
119 	return (void*)0;
120 }
121 
122 static void global_enable(pmu_counter_t *cnt)
123 {
124 	if (!this_cpu_has_perf_global_ctrl())
125 		return;
126 
127 	cnt->idx = event_to_global_idx(cnt);
128 	wrmsr(pmu.msr_global_ctl, rdmsr(pmu.msr_global_ctl) | BIT_ULL(cnt->idx));
129 }
130 
131 static void global_disable(pmu_counter_t *cnt)
132 {
133 	if (!this_cpu_has_perf_global_ctrl())
134 		return;
135 
136 	wrmsr(pmu.msr_global_ctl, rdmsr(pmu.msr_global_ctl) & ~BIT_ULL(cnt->idx));
137 }
138 
139 static void __start_event(pmu_counter_t *evt, uint64_t count)
140 {
141     evt->count = count;
142     wrmsr(evt->ctr, evt->count);
143     if (is_gp(evt)) {
144 	    wrmsr(MSR_GP_EVENT_SELECTx(event_to_global_idx(evt)),
145 		  evt->config | EVNTSEL_EN);
146     } else {
147 	    uint32_t ctrl = rdmsr(MSR_CORE_PERF_FIXED_CTR_CTRL);
148 	    int shift = (evt->ctr - MSR_CORE_PERF_FIXED_CTR0) * 4;
149 	    uint32_t usrospmi = 0;
150 
151 	    if (evt->config & EVNTSEL_OS)
152 		    usrospmi |= (1 << 0);
153 	    if (evt->config & EVNTSEL_USR)
154 		    usrospmi |= (1 << 1);
155 	    if (evt->config & EVNTSEL_INT)
156 		    usrospmi |= (1 << 3); // PMI on overflow
157 	    ctrl = (ctrl & ~(0xf << shift)) | (usrospmi << shift);
158 	    wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, ctrl);
159     }
160     global_enable(evt);
161     apic_write(APIC_LVTPC, PMI_VECTOR);
162 }
163 
164 static void start_event(pmu_counter_t *evt)
165 {
166 	__start_event(evt, 0);
167 }
168 
169 static void stop_event(pmu_counter_t *evt)
170 {
171 	global_disable(evt);
172 	if (is_gp(evt)) {
173 		wrmsr(MSR_GP_EVENT_SELECTx(event_to_global_idx(evt)),
174 		      evt->config & ~EVNTSEL_EN);
175 	} else {
176 		uint32_t ctrl = rdmsr(MSR_CORE_PERF_FIXED_CTR_CTRL);
177 		int shift = (evt->ctr - MSR_CORE_PERF_FIXED_CTR0) * 4;
178 		wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, ctrl & ~(0xf << shift));
179 	}
180 	evt->count = rdmsr(evt->ctr);
181 }
182 
183 static noinline void measure_many(pmu_counter_t *evt, int count)
184 {
185 	int i;
186 	for (i = 0; i < count; i++)
187 		start_event(&evt[i]);
188 	loop();
189 	for (i = 0; i < count; i++)
190 		stop_event(&evt[i]);
191 }
192 
193 static void measure_one(pmu_counter_t *evt)
194 {
195 	measure_many(evt, 1);
196 }
197 
198 static noinline void __measure(pmu_counter_t *evt, uint64_t count)
199 {
200 	__start_event(evt, count);
201 	loop();
202 	stop_event(evt);
203 }
204 
205 static bool verify_event(uint64_t count, struct pmu_event *e)
206 {
207 	// printf("%d <= %ld <= %d\n", e->min, count, e->max);
208 	return count >= e->min  && count <= e->max;
209 
210 }
211 
212 static bool verify_counter(pmu_counter_t *cnt)
213 {
214 	return verify_event(cnt->count, get_counter_event(cnt));
215 }
216 
217 static void check_gp_counter(struct pmu_event *evt)
218 {
219 	pmu_counter_t cnt = {
220 		.config = EVNTSEL_OS | EVNTSEL_USR | evt->unit_sel,
221 	};
222 	int i;
223 
224 	for (i = 0; i < pmu.nr_gp_counters; i++) {
225 		cnt.ctr = MSR_GP_COUNTERx(i);
226 		measure_one(&cnt);
227 		report(verify_event(cnt.count, evt), "%s-%d", evt->name, i);
228 	}
229 }
230 
231 static void check_gp_counters(void)
232 {
233 	int i;
234 
235 	for (i = 0; i < gp_events_size; i++)
236 		if (pmu_gp_counter_is_available(i))
237 			check_gp_counter(&gp_events[i]);
238 		else
239 			printf("GP event '%s' is disabled\n",
240 					gp_events[i].name);
241 }
242 
243 static void check_fixed_counters(void)
244 {
245 	pmu_counter_t cnt = {
246 		.config = EVNTSEL_OS | EVNTSEL_USR,
247 	};
248 	int i;
249 
250 	for (i = 0; i < pmu.nr_fixed_counters; i++) {
251 		cnt.ctr = fixed_events[i].unit_sel;
252 		measure_one(&cnt);
253 		report(verify_event(cnt.count, &fixed_events[i]), "fixed-%d", i);
254 	}
255 }
256 
257 static void check_counters_many(void)
258 {
259 	pmu_counter_t cnt[10];
260 	int i, n;
261 
262 	for (i = 0, n = 0; n < pmu.nr_gp_counters; i++) {
263 		if (!pmu_gp_counter_is_available(i))
264 			continue;
265 
266 		cnt[n].ctr = MSR_GP_COUNTERx(n);
267 		cnt[n].config = EVNTSEL_OS | EVNTSEL_USR |
268 			gp_events[i % gp_events_size].unit_sel;
269 		n++;
270 	}
271 	for (i = 0; i < pmu.nr_fixed_counters; i++) {
272 		cnt[n].ctr = fixed_events[i].unit_sel;
273 		cnt[n].config = EVNTSEL_OS | EVNTSEL_USR;
274 		n++;
275 	}
276 
277 	measure_many(cnt, n);
278 
279 	for (i = 0; i < n; i++)
280 		if (!verify_counter(&cnt[i]))
281 			break;
282 
283 	report(i == n, "all counters");
284 }
285 
286 static uint64_t measure_for_overflow(pmu_counter_t *cnt)
287 {
288 	__measure(cnt, 0);
289 	/*
290 	 * To generate overflow, i.e. roll over to '0', the initial count just
291 	 * needs to be preset to the negative expected count.  However, as per
292 	 * Intel's SDM, the preset count needs to be incremented by 1 to ensure
293 	 * the overflow interrupt is generated immediately instead of possibly
294 	 * waiting for the overflow to propagate through the counter.
295 	 */
296 	assert(cnt->count > 1);
297 	return 1 - cnt->count;
298 }
299 
300 static void check_counter_overflow(void)
301 {
302 	uint64_t overflow_preset;
303 	int i;
304 	pmu_counter_t cnt = {
305 		.ctr = MSR_GP_COUNTERx(0),
306 		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel /* instructions */,
307 	};
308 	overflow_preset = measure_for_overflow(&cnt);
309 
310 	/* clear status before test */
311 	if (this_cpu_has_perf_global_status())
312 		pmu_clear_global_status();
313 
314 	report_prefix_push("overflow");
315 
316 	for (i = 0; i < pmu.nr_gp_counters + 1; i++) {
317 		uint64_t status;
318 		int idx;
319 
320 		cnt.count = overflow_preset;
321 		if (pmu_use_full_writes())
322 			cnt.count &= (1ull << pmu.gp_counter_width) - 1;
323 
324 		if (i == pmu.nr_gp_counters) {
325 			if (!pmu.is_intel)
326 				break;
327 
328 			cnt.ctr = fixed_events[0].unit_sel;
329 			cnt.count = measure_for_overflow(&cnt);
330 			cnt.count &= (1ull << pmu.gp_counter_width) - 1;
331 		} else {
332 			cnt.ctr = MSR_GP_COUNTERx(i);
333 		}
334 
335 		if (i % 2)
336 			cnt.config |= EVNTSEL_INT;
337 		else
338 			cnt.config &= ~EVNTSEL_INT;
339 		idx = event_to_global_idx(&cnt);
340 		__measure(&cnt, cnt.count);
341 		if (pmu.is_intel)
342 			report(cnt.count == 1, "cntr-%d", i);
343 		else
344 			report(cnt.count == 0xffffffffffff || cnt.count < 7, "cntr-%d", i);
345 
346 		if (!this_cpu_has_perf_global_status())
347 			continue;
348 
349 		status = rdmsr(pmu.msr_global_status);
350 		report(status & (1ull << idx), "status-%d", i);
351 		wrmsr(pmu.msr_global_status_clr, status);
352 		status = rdmsr(pmu.msr_global_status);
353 		report(!(status & (1ull << idx)), "status clear-%d", i);
354 		report(check_irq() == (i % 2), "irq-%d", i);
355 	}
356 
357 	report_prefix_pop();
358 }
359 
360 static void check_gp_counter_cmask(void)
361 {
362 	pmu_counter_t cnt = {
363 		.ctr = MSR_GP_COUNTERx(0),
364 		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel /* instructions */,
365 	};
366 	cnt.config |= (0x2 << EVNTSEL_CMASK_SHIFT);
367 	measure_one(&cnt);
368 	report(cnt.count < gp_events[1].min, "cmask");
369 }
370 
371 static void do_rdpmc_fast(void *ptr)
372 {
373 	pmu_counter_t *cnt = ptr;
374 	uint32_t idx = (uint32_t)cnt->idx | (1u << 31);
375 
376 	if (!is_gp(cnt))
377 		idx |= 1 << 30;
378 
379 	cnt->count = rdpmc(idx);
380 }
381 
382 
383 static void check_rdpmc(void)
384 {
385 	uint64_t val = 0xff0123456789ull;
386 	bool exc;
387 	int i;
388 
389 	report_prefix_push("rdpmc");
390 
391 	for (i = 0; i < pmu.nr_gp_counters; i++) {
392 		uint64_t x;
393 		pmu_counter_t cnt = {
394 			.ctr = MSR_GP_COUNTERx(i),
395 			.idx = i
396 		};
397 
398 	        /*
399 	         * Without full-width writes, only the low 32 bits are writable,
400 	         * and the value is sign-extended.
401 	         */
402 		if (pmu.msr_gp_counter_base == MSR_IA32_PERFCTR0)
403 			x = (uint64_t)(int64_t)(int32_t)val;
404 		else
405 			x = (uint64_t)(int64_t)val;
406 
407 		/* Mask according to the number of supported bits */
408 		x &= (1ull << pmu.gp_counter_width) - 1;
409 
410 		wrmsr(MSR_GP_COUNTERx(i), val);
411 		report(rdpmc(i) == x, "cntr-%d", i);
412 
413 		exc = test_for_exception(GP_VECTOR, do_rdpmc_fast, &cnt);
414 		if (exc)
415 			report_skip("fast-%d", i);
416 		else
417 			report(cnt.count == (u32)val, "fast-%d", i);
418 	}
419 	for (i = 0; i < pmu.nr_fixed_counters; i++) {
420 		uint64_t x = val & ((1ull << pmu.fixed_counter_width) - 1);
421 		pmu_counter_t cnt = {
422 			.ctr = MSR_CORE_PERF_FIXED_CTR0 + i,
423 			.idx = i
424 		};
425 
426 		wrmsr(MSR_PERF_FIXED_CTRx(i), x);
427 		report(rdpmc(i | (1 << 30)) == x, "fixed cntr-%d", i);
428 
429 		exc = test_for_exception(GP_VECTOR, do_rdpmc_fast, &cnt);
430 		if (exc)
431 			report_skip("fixed fast-%d", i);
432 		else
433 			report(cnt.count == (u32)x, "fixed fast-%d", i);
434 	}
435 
436 	report_prefix_pop();
437 }
438 
439 static void check_running_counter_wrmsr(void)
440 {
441 	uint64_t status;
442 	uint64_t count;
443 	pmu_counter_t evt = {
444 		.ctr = MSR_GP_COUNTERx(0),
445 		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel,
446 	};
447 
448 	report_prefix_push("running counter wrmsr");
449 
450 	start_event(&evt);
451 	loop();
452 	wrmsr(MSR_GP_COUNTERx(0), 0);
453 	stop_event(&evt);
454 	report(evt.count < gp_events[1].min, "cntr");
455 
456 	/* clear status before overflow test */
457 	if (this_cpu_has_perf_global_status())
458 		pmu_clear_global_status();
459 
460 	start_event(&evt);
461 
462 	count = -1;
463 	if (pmu_use_full_writes())
464 		count &= (1ull << pmu.gp_counter_width) - 1;
465 
466 	wrmsr(MSR_GP_COUNTERx(0), count);
467 
468 	loop();
469 	stop_event(&evt);
470 
471 	if (this_cpu_has_perf_global_status()) {
472 		status = rdmsr(pmu.msr_global_status);
473 		report(status & 1, "status msr bit");
474 	}
475 
476 	report_prefix_pop();
477 }
478 
479 static void check_emulated_instr(void)
480 {
481 	uint64_t status, instr_start, brnch_start;
482 	uint64_t gp_counter_width = (1ull << pmu.gp_counter_width) - 1;
483 	unsigned int branch_idx = pmu.is_intel ? 5 : 2;
484 	pmu_counter_t brnch_cnt = {
485 		.ctr = MSR_GP_COUNTERx(0),
486 		/* branch instructions */
487 		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[branch_idx].unit_sel,
488 	};
489 	pmu_counter_t instr_cnt = {
490 		.ctr = MSR_GP_COUNTERx(1),
491 		/* instructions */
492 		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel,
493 	};
494 	report_prefix_push("emulated instruction");
495 
496 	if (this_cpu_has_perf_global_status())
497 		pmu_clear_global_status();
498 
499 	start_event(&brnch_cnt);
500 	start_event(&instr_cnt);
501 
502 	brnch_start = -EXPECTED_BRNCH;
503 	instr_start = -EXPECTED_INSTR;
504 	wrmsr(MSR_GP_COUNTERx(0), brnch_start & gp_counter_width);
505 	wrmsr(MSR_GP_COUNTERx(1), instr_start & gp_counter_width);
506 	// KVM_FEP is a magic prefix that forces emulation so
507 	// 'KVM_FEP "jne label\n"' just counts as a single instruction.
508 	asm volatile(
509 		"mov $0x0, %%eax\n"
510 		"cmp $0x0, %%eax\n"
511 		KVM_FEP "jne label\n"
512 		KVM_FEP "jne label\n"
513 		KVM_FEP "jne label\n"
514 		KVM_FEP "jne label\n"
515 		KVM_FEP "jne label\n"
516 		"mov $0xa, %%eax\n"
517 		"cpuid\n"
518 		"mov $0xa, %%eax\n"
519 		"cpuid\n"
520 		"mov $0xa, %%eax\n"
521 		"cpuid\n"
522 		"mov $0xa, %%eax\n"
523 		"cpuid\n"
524 		"mov $0xa, %%eax\n"
525 		"cpuid\n"
526 		"label:\n"
527 		:
528 		:
529 		: "eax", "ebx", "ecx", "edx");
530 
531 	if (this_cpu_has_perf_global_ctrl())
532 		wrmsr(pmu.msr_global_ctl, 0);
533 
534 	stop_event(&brnch_cnt);
535 	stop_event(&instr_cnt);
536 
537 	// Check that the end count - start count is at least the expected
538 	// number of instructions and branches.
539 	report(instr_cnt.count - instr_start >= EXPECTED_INSTR,
540 	       "instruction count");
541 	report(brnch_cnt.count - brnch_start >= EXPECTED_BRNCH,
542 	       "branch count");
543 	if (this_cpu_has_perf_global_status()) {
544 		// Additionally check that those counters overflowed properly.
545 		status = rdmsr(pmu.msr_global_status);
546 		report(status & 1, "branch counter overflow");
547 		report(status & 2, "instruction counter overflow");
548 	}
549 
550 	report_prefix_pop();
551 }
552 
553 #define XBEGIN_STARTED (~0u)
554 static void check_tsx_cycles(void)
555 {
556 	pmu_counter_t cnt;
557 	unsigned int i, ret = 0;
558 
559 	if (!this_cpu_has(X86_FEATURE_RTM))
560 		return;
561 
562 	report_prefix_push("TSX cycles");
563 
564 	for (i = 0; i < pmu.nr_gp_counters; i++) {
565 		cnt.ctr = MSR_GP_COUNTERx(i);
566 
567 		if (i == 2) {
568 			/* Transactional cycles committed only on gp counter 2 */
569 			cnt.config = EVNTSEL_OS | EVNTSEL_USR | 0x30000003c;
570 		} else {
571 			/* Transactional cycles */
572 			cnt.config = EVNTSEL_OS | EVNTSEL_USR | 0x10000003c;
573 		}
574 
575 		start_event(&cnt);
576 
577 		asm volatile("xbegin 1f\n\t"
578 				"1:\n\t"
579 				: "+a" (ret) :: "memory");
580 
581 		/* Generate a non-canonical #GP to trigger ABORT. */
582 		if (ret == XBEGIN_STARTED)
583 			*(int *)NONCANONICAL = 0;
584 
585 		stop_event(&cnt);
586 
587 		report(cnt.count > 0, "gp cntr-%d with a value of %" PRId64 "", i, cnt.count);
588 	}
589 
590 	report_prefix_pop();
591 }
592 
593 static void check_counters(void)
594 {
595 	if (is_fep_available())
596 		check_emulated_instr();
597 
598 	check_gp_counters();
599 	check_fixed_counters();
600 	check_rdpmc();
601 	check_counters_many();
602 	check_counter_overflow();
603 	check_gp_counter_cmask();
604 	check_running_counter_wrmsr();
605 	check_tsx_cycles();
606 }
607 
608 static void do_unsupported_width_counter_write(void *index)
609 {
610 	wrmsr(MSR_IA32_PMC0 + *((int *) index), 0xffffff0123456789ull);
611 }
612 
613 static void check_gp_counters_write_width(void)
614 {
615 	u64 val_64 = 0xffffff0123456789ull;
616 	u64 val_32 = val_64 & ((1ull << 32) - 1);
617 	u64 val_max_width = val_64 & ((1ull << pmu.gp_counter_width) - 1);
618 	int i;
619 
620 	/*
621 	 * MSR_IA32_PERFCTRn supports 64-bit writes,
622 	 * but only the lowest 32 bits are valid.
623 	 */
624 	for (i = 0; i < pmu.nr_gp_counters; i++) {
625 		wrmsr(MSR_IA32_PERFCTR0 + i, val_32);
626 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32);
627 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_32);
628 
629 		wrmsr(MSR_IA32_PERFCTR0 + i, val_max_width);
630 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32);
631 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_32);
632 
633 		wrmsr(MSR_IA32_PERFCTR0 + i, val_64);
634 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32);
635 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_32);
636 	}
637 
638 	/*
639 	 * MSR_IA32_PMCn supports writing values up to GP counter width,
640 	 * and only the lowest bits of GP counter width are valid.
641 	 */
642 	for (i = 0; i < pmu.nr_gp_counters; i++) {
643 		wrmsr(MSR_IA32_PMC0 + i, val_32);
644 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_32);
645 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32);
646 
647 		wrmsr(MSR_IA32_PMC0 + i, val_max_width);
648 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_max_width);
649 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_max_width);
650 
651 		report(test_for_exception(GP_VECTOR,
652 			do_unsupported_width_counter_write, &i),
653 		"writing unsupported width to MSR_IA32_PMC%d raises #GP", i);
654 	}
655 }
656 
657 /*
658  * Per the SDM, reference cycles are currently implemented using the
659  * core crystal clock, TSC, or bus clock. Calibrate to the TSC
660  * frequency to set reasonable expectations.
661  */
662 static void set_ref_cycle_expectations(void)
663 {
664 	pmu_counter_t cnt = {
665 		.ctr = MSR_IA32_PERFCTR0,
666 		.config = EVNTSEL_OS | EVNTSEL_USR | intel_gp_events[2].unit_sel,
667 	};
668 	uint64_t tsc_delta;
669 	uint64_t t0, t1, t2, t3;
670 
671 	/* Bit 2 enumerates the availability of reference cycles events. */
672 	if (!pmu.nr_gp_counters || !pmu_gp_counter_is_available(2))
673 		return;
674 
675 	if (this_cpu_has_perf_global_ctrl())
676 		wrmsr(pmu.msr_global_ctl, 0);
677 
678 	t0 = fenced_rdtsc();
679 	start_event(&cnt);
680 	t1 = fenced_rdtsc();
681 
682 	/*
683 	 * This loop has to run long enough to dominate the VM-exit
684 	 * costs for playing with the PMU MSRs on start and stop.
685 	 *
686 	 * On a 2.6GHz Ice Lake, with the TSC frequency at 104 times
687 	 * the core crystal clock, this function calculated a guest
688 	 * TSC : ref cycles ratio of around 105 with ECX initialized
689 	 * to one billion.
690 	 */
691 	asm volatile("loop ." : "+c"((int){1000000000ull}));
692 
693 	t2 = fenced_rdtsc();
694 	stop_event(&cnt);
695 	t3 = fenced_rdtsc();
696 
697 	tsc_delta = ((t2 - t1) + (t3 - t0)) / 2;
698 
699 	if (!tsc_delta)
700 		return;
701 
702 	intel_gp_events[2].min = (intel_gp_events[2].min * cnt.count) / tsc_delta;
703 	intel_gp_events[2].max = (intel_gp_events[2].max * cnt.count) / tsc_delta;
704 }
705 
706 static void check_invalid_rdpmc_gp(void)
707 {
708 	uint64_t val;
709 
710 	report(rdpmc_safe(64, &val) == GP_VECTOR,
711 	       "Expected #GP on RDPMC(64)");
712 }
713 
714 int main(int ac, char **av)
715 {
716 	setup_vm();
717 	handle_irq(PMI_VECTOR, cnt_overflow);
718 	buf = malloc(N*64);
719 
720 	check_invalid_rdpmc_gp();
721 
722 	if (pmu.is_intel) {
723 		if (!pmu.version) {
724 			report_skip("No Intel Arch PMU is detected!");
725 			return report_summary();
726 		}
727 		gp_events = (struct pmu_event *)intel_gp_events;
728 		gp_events_size = sizeof(intel_gp_events)/sizeof(intel_gp_events[0]);
729 		report_prefix_push("Intel");
730 		set_ref_cycle_expectations();
731 	} else {
732 		gp_events_size = sizeof(amd_gp_events)/sizeof(amd_gp_events[0]);
733 		gp_events = (struct pmu_event *)amd_gp_events;
734 		report_prefix_push("AMD");
735 	}
736 
737 	printf("PMU version:         %d\n", pmu.version);
738 	printf("GP counters:         %d\n", pmu.nr_gp_counters);
739 	printf("GP counter width:    %d\n", pmu.gp_counter_width);
740 	printf("Mask length:         %d\n", pmu.gp_counter_mask_length);
741 	printf("Fixed counters:      %d\n", pmu.nr_fixed_counters);
742 	printf("Fixed counter width: %d\n", pmu.fixed_counter_width);
743 
744 	apic_write(APIC_LVTPC, PMI_VECTOR);
745 
746 	check_counters();
747 
748 	if (pmu_has_full_writes()) {
749 		pmu.msr_gp_counter_base = MSR_IA32_PMC0;
750 
751 		report_prefix_push("full-width writes");
752 		check_counters();
753 		check_gp_counters_write_width();
754 		report_prefix_pop();
755 	}
756 
757 	if (!pmu.is_intel) {
758 		report_prefix_push("K7");
759 		pmu.nr_gp_counters = AMD64_NUM_COUNTERS;
760 		pmu.msr_gp_counter_base = MSR_K7_PERFCTR0;
761 		pmu.msr_gp_event_select_base = MSR_K7_EVNTSEL0;
762 		check_counters();
763 		report_prefix_pop();
764 	}
765 
766 	return report_summary();
767 }
768