xref: /kvm-unit-tests/x86/pmu.c (revision 9c07c92b2d89ca2a2af566b6e6ada9a4dfc3ac83)
1 
2 #include "x86/msr.h"
3 #include "x86/processor.h"
4 #include "x86/pmu.h"
5 #include "x86/apic-defs.h"
6 #include "x86/apic.h"
7 #include "x86/desc.h"
8 #include "x86/isr.h"
9 #include "vmalloc.h"
10 #include "alloc.h"
11 
12 #include "libcflat.h"
13 #include <stdint.h>
14 
15 #define N 1000000
16 
17 // These values match the number of instructions and branches in the
18 // assembly block in check_emulated_instr().
19 #define EXPECTED_INSTR 17
20 #define EXPECTED_BRNCH 5
21 
22 typedef struct {
23 	uint32_t ctr;
24 	uint32_t idx;
25 	uint64_t config;
26 	uint64_t count;
27 } pmu_counter_t;
28 
29 struct pmu_event {
30 	const char *name;
31 	uint32_t unit_sel;
32 	int min;
33 	int max;
34 } intel_gp_events[] = {
35 	{"core cycles", 0x003c, 1*N, 50*N},
36 	{"instructions", 0x00c0, 10*N, 10.2*N},
37 	{"ref cycles", 0x013c, 1*N, 30*N},
38 	{"llc references", 0x4f2e, 1, 2*N},
39 	{"llc misses", 0x412e, 1, 1*N},
40 	{"branches", 0x00c4, 1*N, 1.1*N},
41 	{"branch misses", 0x00c5, 0, 0.1*N},
42 }, amd_gp_events[] = {
43 	{"core cycles", 0x0076, 1*N, 50*N},
44 	{"instructions", 0x00c0, 10*N, 10.2*N},
45 	{"branches", 0x00c2, 1*N, 1.1*N},
46 	{"branch misses", 0x00c3, 0, 0.1*N},
47 }, fixed_events[] = {
48 	{"fixed 0", MSR_CORE_PERF_FIXED_CTR0, 10*N, 10.2*N},
49 	{"fixed 1", MSR_CORE_PERF_FIXED_CTR0 + 1, 1*N, 30*N},
50 	{"fixed 2", MSR_CORE_PERF_FIXED_CTR0 + 2, 0.1*N, 30*N}
51 };
52 
53 char *buf;
54 
55 static struct pmu_event *gp_events;
56 static unsigned int gp_events_size;
57 static unsigned int fixed_counters_num;
58 
59 static inline void loop(void)
60 {
61 	unsigned long tmp, tmp2, tmp3;
62 
63 	asm volatile("1: mov (%1), %2; add $64, %1; nop; nop; nop; nop; nop; nop; nop; loop 1b"
64 			: "=c"(tmp), "=r"(tmp2), "=r"(tmp3): "0"(N), "1"(buf));
65 
66 }
67 
68 volatile uint64_t irq_received;
69 
70 static void cnt_overflow(isr_regs_t *regs)
71 {
72 	irq_received++;
73 	apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
74 	apic_write(APIC_EOI, 0);
75 }
76 
77 static bool check_irq(void)
78 {
79 	int i;
80 	irq_received = 0;
81 	sti();
82 	for (i = 0; i < 100000 && !irq_received; i++)
83 		asm volatile("pause");
84 	cli();
85 	return irq_received;
86 }
87 
88 static bool is_gp(pmu_counter_t *evt)
89 {
90 	if (!pmu.is_intel)
91 		return true;
92 
93 	return evt->ctr < MSR_CORE_PERF_FIXED_CTR0 ||
94 		evt->ctr >= MSR_IA32_PMC0;
95 }
96 
97 static int event_to_global_idx(pmu_counter_t *cnt)
98 {
99 	if (pmu.is_intel)
100 		return cnt->ctr - (is_gp(cnt) ? pmu.msr_gp_counter_base :
101 			(MSR_CORE_PERF_FIXED_CTR0 - FIXED_CNT_INDEX));
102 
103 	if (pmu.msr_gp_counter_base == MSR_F15H_PERF_CTR0)
104 		return (cnt->ctr - pmu.msr_gp_counter_base) / 2;
105 	else
106 		return cnt->ctr - pmu.msr_gp_counter_base;
107 }
108 
109 static struct pmu_event* get_counter_event(pmu_counter_t *cnt)
110 {
111 	if (is_gp(cnt)) {
112 		int i;
113 
114 		for (i = 0; i < gp_events_size; i++)
115 			if (gp_events[i].unit_sel == (cnt->config & 0xffff))
116 				return &gp_events[i];
117 	} else {
118 		unsigned int idx = cnt->ctr - MSR_CORE_PERF_FIXED_CTR0;
119 
120 		if (idx < ARRAY_SIZE(fixed_events))
121 			return &fixed_events[idx];
122 	}
123 
124 	return (void*)0;
125 }
126 
127 static void global_enable(pmu_counter_t *cnt)
128 {
129 	if (!this_cpu_has_perf_global_ctrl())
130 		return;
131 
132 	cnt->idx = event_to_global_idx(cnt);
133 	wrmsr(pmu.msr_global_ctl, rdmsr(pmu.msr_global_ctl) | BIT_ULL(cnt->idx));
134 }
135 
136 static void global_disable(pmu_counter_t *cnt)
137 {
138 	if (!this_cpu_has_perf_global_ctrl())
139 		return;
140 
141 	wrmsr(pmu.msr_global_ctl, rdmsr(pmu.msr_global_ctl) & ~BIT_ULL(cnt->idx));
142 }
143 
144 static void __start_event(pmu_counter_t *evt, uint64_t count)
145 {
146     evt->count = count;
147     wrmsr(evt->ctr, evt->count);
148     if (is_gp(evt)) {
149 	    wrmsr(MSR_GP_EVENT_SELECTx(event_to_global_idx(evt)),
150 		  evt->config | EVNTSEL_EN);
151     } else {
152 	    uint32_t ctrl = rdmsr(MSR_CORE_PERF_FIXED_CTR_CTRL);
153 	    int shift = (evt->ctr - MSR_CORE_PERF_FIXED_CTR0) * 4;
154 	    uint32_t usrospmi = 0;
155 
156 	    if (evt->config & EVNTSEL_OS)
157 		    usrospmi |= (1 << 0);
158 	    if (evt->config & EVNTSEL_USR)
159 		    usrospmi |= (1 << 1);
160 	    if (evt->config & EVNTSEL_INT)
161 		    usrospmi |= (1 << 3); // PMI on overflow
162 	    ctrl = (ctrl & ~(0xf << shift)) | (usrospmi << shift);
163 	    wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, ctrl);
164     }
165     global_enable(evt);
166     apic_write(APIC_LVTPC, PMI_VECTOR);
167 }
168 
169 static void start_event(pmu_counter_t *evt)
170 {
171 	__start_event(evt, 0);
172 }
173 
174 static void stop_event(pmu_counter_t *evt)
175 {
176 	global_disable(evt);
177 	if (is_gp(evt)) {
178 		wrmsr(MSR_GP_EVENT_SELECTx(event_to_global_idx(evt)),
179 		      evt->config & ~EVNTSEL_EN);
180 	} else {
181 		uint32_t ctrl = rdmsr(MSR_CORE_PERF_FIXED_CTR_CTRL);
182 		int shift = (evt->ctr - MSR_CORE_PERF_FIXED_CTR0) * 4;
183 		wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, ctrl & ~(0xf << shift));
184 	}
185 	evt->count = rdmsr(evt->ctr);
186 }
187 
188 static noinline void measure_many(pmu_counter_t *evt, int count)
189 {
190 	int i;
191 	for (i = 0; i < count; i++)
192 		start_event(&evt[i]);
193 	loop();
194 	for (i = 0; i < count; i++)
195 		stop_event(&evt[i]);
196 }
197 
198 static void measure_one(pmu_counter_t *evt)
199 {
200 	measure_many(evt, 1);
201 }
202 
203 static noinline void __measure(pmu_counter_t *evt, uint64_t count)
204 {
205 	__start_event(evt, count);
206 	loop();
207 	stop_event(evt);
208 }
209 
210 static bool verify_event(uint64_t count, struct pmu_event *e)
211 {
212 	bool pass;
213 
214 	if (!e)
215 		return false;
216 
217 	pass = count >= e->min && count <= e->max;
218 	if (!pass)
219 		printf("FAIL: %d <= %"PRId64" <= %d\n", e->min, count, e->max);
220 
221 	return pass;
222 }
223 
224 static bool verify_counter(pmu_counter_t *cnt)
225 {
226 	return verify_event(cnt->count, get_counter_event(cnt));
227 }
228 
229 static void check_gp_counter(struct pmu_event *evt)
230 {
231 	pmu_counter_t cnt = {
232 		.config = EVNTSEL_OS | EVNTSEL_USR | evt->unit_sel,
233 	};
234 	int i;
235 
236 	for (i = 0; i < pmu.nr_gp_counters; i++) {
237 		cnt.ctr = MSR_GP_COUNTERx(i);
238 		measure_one(&cnt);
239 		report(verify_event(cnt.count, evt), "%s-%d", evt->name, i);
240 	}
241 }
242 
243 static void check_gp_counters(void)
244 {
245 	int i;
246 
247 	for (i = 0; i < gp_events_size; i++)
248 		if (pmu_gp_counter_is_available(i))
249 			check_gp_counter(&gp_events[i]);
250 		else
251 			printf("GP event '%s' is disabled\n",
252 					gp_events[i].name);
253 }
254 
255 static void check_fixed_counters(void)
256 {
257 	pmu_counter_t cnt = {
258 		.config = EVNTSEL_OS | EVNTSEL_USR,
259 	};
260 	int i;
261 
262 	for (i = 0; i < fixed_counters_num; i++) {
263 		cnt.ctr = fixed_events[i].unit_sel;
264 		measure_one(&cnt);
265 		report(verify_event(cnt.count, &fixed_events[i]), "fixed-%d", i);
266 	}
267 }
268 
269 static void check_counters_many(void)
270 {
271 	pmu_counter_t cnt[48];
272 	int i, n;
273 
274 	for (i = 0, n = 0; n < pmu.nr_gp_counters; i++) {
275 		if (!pmu_gp_counter_is_available(i))
276 			continue;
277 
278 		cnt[n].ctr = MSR_GP_COUNTERx(n);
279 		cnt[n].config = EVNTSEL_OS | EVNTSEL_USR |
280 			gp_events[i % gp_events_size].unit_sel;
281 		n++;
282 	}
283 	for (i = 0; i < fixed_counters_num; i++) {
284 		cnt[n].ctr = fixed_events[i].unit_sel;
285 		cnt[n].config = EVNTSEL_OS | EVNTSEL_USR;
286 		n++;
287 	}
288 
289 	assert(n <= ARRAY_SIZE(cnt));
290 	measure_many(cnt, n);
291 
292 	for (i = 0; i < n; i++)
293 		if (!verify_counter(&cnt[i]))
294 			break;
295 
296 	report(i == n, "all counters");
297 }
298 
299 static uint64_t measure_for_overflow(pmu_counter_t *cnt)
300 {
301 	__measure(cnt, 0);
302 	/*
303 	 * To generate overflow, i.e. roll over to '0', the initial count just
304 	 * needs to be preset to the negative expected count.  However, as per
305 	 * Intel's SDM, the preset count needs to be incremented by 1 to ensure
306 	 * the overflow interrupt is generated immediately instead of possibly
307 	 * waiting for the overflow to propagate through the counter.
308 	 */
309 	assert(cnt->count > 1);
310 	return 1 - cnt->count;
311 }
312 
313 static void check_counter_overflow(void)
314 {
315 	uint64_t overflow_preset;
316 	int i;
317 	pmu_counter_t cnt = {
318 		.ctr = MSR_GP_COUNTERx(0),
319 		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel /* instructions */,
320 	};
321 	overflow_preset = measure_for_overflow(&cnt);
322 
323 	/* clear status before test */
324 	if (this_cpu_has_perf_global_status())
325 		pmu_clear_global_status();
326 
327 	report_prefix_push("overflow");
328 
329 	for (i = 0; i < pmu.nr_gp_counters + 1; i++) {
330 		uint64_t status;
331 		int idx;
332 
333 		cnt.count = overflow_preset;
334 		if (pmu_use_full_writes())
335 			cnt.count &= (1ull << pmu.gp_counter_width) - 1;
336 
337 		if (i == pmu.nr_gp_counters) {
338 			if (!pmu.is_intel)
339 				break;
340 
341 			cnt.ctr = fixed_events[0].unit_sel;
342 			cnt.count = measure_for_overflow(&cnt);
343 			cnt.count &= (1ull << pmu.gp_counter_width) - 1;
344 		} else {
345 			cnt.ctr = MSR_GP_COUNTERx(i);
346 		}
347 
348 		if (i % 2)
349 			cnt.config |= EVNTSEL_INT;
350 		else
351 			cnt.config &= ~EVNTSEL_INT;
352 		idx = event_to_global_idx(&cnt);
353 		__measure(&cnt, cnt.count);
354 		if (pmu.is_intel)
355 			report(cnt.count == 1, "cntr-%d", i);
356 		else
357 			report(cnt.count == 0xffffffffffff || cnt.count < 7, "cntr-%d", i);
358 
359 		if (!this_cpu_has_perf_global_status())
360 			continue;
361 
362 		status = rdmsr(pmu.msr_global_status);
363 		report(status & (1ull << idx), "status-%d", i);
364 		wrmsr(pmu.msr_global_status_clr, status);
365 		status = rdmsr(pmu.msr_global_status);
366 		report(!(status & (1ull << idx)), "status clear-%d", i);
367 		report(check_irq() == (i % 2), "irq-%d", i);
368 	}
369 
370 	report_prefix_pop();
371 }
372 
373 static void check_gp_counter_cmask(void)
374 {
375 	pmu_counter_t cnt = {
376 		.ctr = MSR_GP_COUNTERx(0),
377 		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel /* instructions */,
378 	};
379 	cnt.config |= (0x2 << EVNTSEL_CMASK_SHIFT);
380 	measure_one(&cnt);
381 	report(cnt.count < gp_events[1].min, "cmask");
382 }
383 
384 static void do_rdpmc_fast(void *ptr)
385 {
386 	pmu_counter_t *cnt = ptr;
387 	uint32_t idx = (uint32_t)cnt->idx | (1u << 31);
388 
389 	if (!is_gp(cnt))
390 		idx |= 1 << 30;
391 
392 	cnt->count = rdpmc(idx);
393 }
394 
395 
396 static void check_rdpmc(void)
397 {
398 	uint64_t val = 0xff0123456789ull;
399 	bool exc;
400 	int i;
401 
402 	report_prefix_push("rdpmc");
403 
404 	for (i = 0; i < pmu.nr_gp_counters; i++) {
405 		uint64_t x;
406 		pmu_counter_t cnt = {
407 			.ctr = MSR_GP_COUNTERx(i),
408 			.idx = i
409 		};
410 
411 	        /*
412 	         * Without full-width writes, only the low 32 bits are writable,
413 	         * and the value is sign-extended.
414 	         */
415 		if (pmu.msr_gp_counter_base == MSR_IA32_PERFCTR0)
416 			x = (uint64_t)(int64_t)(int32_t)val;
417 		else
418 			x = (uint64_t)(int64_t)val;
419 
420 		/* Mask according to the number of supported bits */
421 		x &= (1ull << pmu.gp_counter_width) - 1;
422 
423 		wrmsr(MSR_GP_COUNTERx(i), val);
424 		report(rdpmc(i) == x, "cntr-%d", i);
425 
426 		exc = test_for_exception(GP_VECTOR, do_rdpmc_fast, &cnt);
427 		if (exc)
428 			report_skip("fast-%d", i);
429 		else
430 			report(cnt.count == (u32)val, "fast-%d", i);
431 	}
432 	for (i = 0; i < fixed_counters_num; i++) {
433 		uint64_t x = val & ((1ull << pmu.fixed_counter_width) - 1);
434 		pmu_counter_t cnt = {
435 			.ctr = MSR_CORE_PERF_FIXED_CTR0 + i,
436 			.idx = i
437 		};
438 
439 		wrmsr(MSR_PERF_FIXED_CTRx(i), x);
440 		report(rdpmc(i | (1 << 30)) == x, "fixed cntr-%d", i);
441 
442 		exc = test_for_exception(GP_VECTOR, do_rdpmc_fast, &cnt);
443 		if (exc)
444 			report_skip("fixed fast-%d", i);
445 		else
446 			report(cnt.count == (u32)x, "fixed fast-%d", i);
447 	}
448 
449 	report_prefix_pop();
450 }
451 
452 static void check_running_counter_wrmsr(void)
453 {
454 	uint64_t status;
455 	uint64_t count;
456 	pmu_counter_t evt = {
457 		.ctr = MSR_GP_COUNTERx(0),
458 		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel,
459 	};
460 
461 	report_prefix_push("running counter wrmsr");
462 
463 	start_event(&evt);
464 	loop();
465 	wrmsr(MSR_GP_COUNTERx(0), 0);
466 	stop_event(&evt);
467 	report(evt.count < gp_events[1].min, "cntr");
468 
469 	/* clear status before overflow test */
470 	if (this_cpu_has_perf_global_status())
471 		pmu_clear_global_status();
472 
473 	start_event(&evt);
474 
475 	count = -1;
476 	if (pmu_use_full_writes())
477 		count &= (1ull << pmu.gp_counter_width) - 1;
478 
479 	wrmsr(MSR_GP_COUNTERx(0), count);
480 
481 	loop();
482 	stop_event(&evt);
483 
484 	if (this_cpu_has_perf_global_status()) {
485 		status = rdmsr(pmu.msr_global_status);
486 		report(status & 1, "status msr bit");
487 	}
488 
489 	report_prefix_pop();
490 }
491 
492 static void check_emulated_instr(void)
493 {
494 	uint64_t status, instr_start, brnch_start;
495 	uint64_t gp_counter_width = (1ull << pmu.gp_counter_width) - 1;
496 	unsigned int branch_idx = pmu.is_intel ? 5 : 2;
497 	pmu_counter_t brnch_cnt = {
498 		.ctr = MSR_GP_COUNTERx(0),
499 		/* branch instructions */
500 		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[branch_idx].unit_sel,
501 	};
502 	pmu_counter_t instr_cnt = {
503 		.ctr = MSR_GP_COUNTERx(1),
504 		/* instructions */
505 		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel,
506 	};
507 	report_prefix_push("emulated instruction");
508 
509 	if (this_cpu_has_perf_global_status())
510 		pmu_clear_global_status();
511 
512 	start_event(&brnch_cnt);
513 	start_event(&instr_cnt);
514 
515 	brnch_start = -EXPECTED_BRNCH;
516 	instr_start = -EXPECTED_INSTR;
517 	wrmsr(MSR_GP_COUNTERx(0), brnch_start & gp_counter_width);
518 	wrmsr(MSR_GP_COUNTERx(1), instr_start & gp_counter_width);
519 	// KVM_FEP is a magic prefix that forces emulation so
520 	// 'KVM_FEP "jne label\n"' just counts as a single instruction.
521 	asm volatile(
522 		"mov $0x0, %%eax\n"
523 		"cmp $0x0, %%eax\n"
524 		KVM_FEP "jne label\n"
525 		KVM_FEP "jne label\n"
526 		KVM_FEP "jne label\n"
527 		KVM_FEP "jne label\n"
528 		KVM_FEP "jne label\n"
529 		"mov $0xa, %%eax\n"
530 		"cpuid\n"
531 		"mov $0xa, %%eax\n"
532 		"cpuid\n"
533 		"mov $0xa, %%eax\n"
534 		"cpuid\n"
535 		"mov $0xa, %%eax\n"
536 		"cpuid\n"
537 		"mov $0xa, %%eax\n"
538 		"cpuid\n"
539 		"label:\n"
540 		:
541 		:
542 		: "eax", "ebx", "ecx", "edx");
543 
544 	if (this_cpu_has_perf_global_ctrl())
545 		wrmsr(pmu.msr_global_ctl, 0);
546 
547 	stop_event(&brnch_cnt);
548 	stop_event(&instr_cnt);
549 
550 	// Check that the end count - start count is at least the expected
551 	// number of instructions and branches.
552 	report(instr_cnt.count - instr_start >= EXPECTED_INSTR,
553 	       "instruction count");
554 	report(brnch_cnt.count - brnch_start >= EXPECTED_BRNCH,
555 	       "branch count");
556 	if (this_cpu_has_perf_global_status()) {
557 		// Additionally check that those counters overflowed properly.
558 		status = rdmsr(pmu.msr_global_status);
559 		report(status & 1, "branch counter overflow");
560 		report(status & 2, "instruction counter overflow");
561 	}
562 
563 	report_prefix_pop();
564 }
565 
566 #define XBEGIN_STARTED (~0u)
567 static void check_tsx_cycles(void)
568 {
569 	pmu_counter_t cnt;
570 	unsigned int i, ret = 0;
571 
572 	if (!this_cpu_has(X86_FEATURE_RTM))
573 		return;
574 
575 	report_prefix_push("TSX cycles");
576 
577 	for (i = 0; i < pmu.nr_gp_counters; i++) {
578 		cnt.ctr = MSR_GP_COUNTERx(i);
579 
580 		if (i == 2) {
581 			/* Transactional cycles committed only on gp counter 2 */
582 			cnt.config = EVNTSEL_OS | EVNTSEL_USR | 0x30000003c;
583 		} else {
584 			/* Transactional cycles */
585 			cnt.config = EVNTSEL_OS | EVNTSEL_USR | 0x10000003c;
586 		}
587 
588 		start_event(&cnt);
589 
590 		asm volatile("xbegin 1f\n\t"
591 				"1:\n\t"
592 				: "+a" (ret) :: "memory");
593 
594 		/* Generate a non-canonical #GP to trigger ABORT. */
595 		if (ret == XBEGIN_STARTED)
596 			*(int *)NONCANONICAL = 0;
597 
598 		stop_event(&cnt);
599 
600 		report(cnt.count > 0, "gp cntr-%d with a value of %" PRId64 "", i, cnt.count);
601 	}
602 
603 	report_prefix_pop();
604 }
605 
606 static void check_counters(void)
607 {
608 	if (is_fep_available())
609 		check_emulated_instr();
610 
611 	check_gp_counters();
612 	check_fixed_counters();
613 	check_rdpmc();
614 	check_counters_many();
615 	check_counter_overflow();
616 	check_gp_counter_cmask();
617 	check_running_counter_wrmsr();
618 	check_tsx_cycles();
619 }
620 
621 static void do_unsupported_width_counter_write(void *index)
622 {
623 	wrmsr(MSR_IA32_PMC0 + *((int *) index), 0xffffff0123456789ull);
624 }
625 
626 static void check_gp_counters_write_width(void)
627 {
628 	u64 val_64 = 0xffffff0123456789ull;
629 	u64 val_32 = val_64 & ((1ull << 32) - 1);
630 	u64 val_max_width = val_64 & ((1ull << pmu.gp_counter_width) - 1);
631 	int i;
632 
633 	/*
634 	 * MSR_IA32_PERFCTRn supports 64-bit writes,
635 	 * but only the lowest 32 bits are valid.
636 	 */
637 	for (i = 0; i < pmu.nr_gp_counters; i++) {
638 		wrmsr(MSR_IA32_PERFCTR0 + i, val_32);
639 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32);
640 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_32);
641 
642 		wrmsr(MSR_IA32_PERFCTR0 + i, val_max_width);
643 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32);
644 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_32);
645 
646 		wrmsr(MSR_IA32_PERFCTR0 + i, val_64);
647 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32);
648 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_32);
649 	}
650 
651 	/*
652 	 * MSR_IA32_PMCn supports writing values up to GP counter width,
653 	 * and only the lowest bits of GP counter width are valid.
654 	 */
655 	for (i = 0; i < pmu.nr_gp_counters; i++) {
656 		wrmsr(MSR_IA32_PMC0 + i, val_32);
657 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_32);
658 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32);
659 
660 		wrmsr(MSR_IA32_PMC0 + i, val_max_width);
661 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_max_width);
662 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_max_width);
663 
664 		report(test_for_exception(GP_VECTOR,
665 			do_unsupported_width_counter_write, &i),
666 		"writing unsupported width to MSR_IA32_PMC%d raises #GP", i);
667 	}
668 }
669 
670 /*
671  * Per the SDM, reference cycles are currently implemented using the
672  * core crystal clock, TSC, or bus clock. Calibrate to the TSC
673  * frequency to set reasonable expectations.
674  */
675 static void set_ref_cycle_expectations(void)
676 {
677 	pmu_counter_t cnt = {
678 		.ctr = MSR_IA32_PERFCTR0,
679 		.config = EVNTSEL_OS | EVNTSEL_USR | intel_gp_events[2].unit_sel,
680 	};
681 	uint64_t tsc_delta;
682 	uint64_t t0, t1, t2, t3;
683 
684 	/* Bit 2 enumerates the availability of reference cycles events. */
685 	if (!pmu.nr_gp_counters || !pmu_gp_counter_is_available(2))
686 		return;
687 
688 	if (this_cpu_has_perf_global_ctrl())
689 		wrmsr(pmu.msr_global_ctl, 0);
690 
691 	t0 = fenced_rdtsc();
692 	start_event(&cnt);
693 	t1 = fenced_rdtsc();
694 
695 	/*
696 	 * This loop has to run long enough to dominate the VM-exit
697 	 * costs for playing with the PMU MSRs on start and stop.
698 	 *
699 	 * On a 2.6GHz Ice Lake, with the TSC frequency at 104 times
700 	 * the core crystal clock, this function calculated a guest
701 	 * TSC : ref cycles ratio of around 105 with ECX initialized
702 	 * to one billion.
703 	 */
704 	asm volatile("loop ." : "+c"((int){1000000000ull}));
705 
706 	t2 = fenced_rdtsc();
707 	stop_event(&cnt);
708 	t3 = fenced_rdtsc();
709 
710 	tsc_delta = ((t2 - t1) + (t3 - t0)) / 2;
711 
712 	if (!tsc_delta)
713 		return;
714 
715 	intel_gp_events[2].min = (intel_gp_events[2].min * cnt.count) / tsc_delta;
716 	intel_gp_events[2].max = (intel_gp_events[2].max * cnt.count) / tsc_delta;
717 }
718 
719 static void check_invalid_rdpmc_gp(void)
720 {
721 	uint64_t val;
722 
723 	report(rdpmc_safe(64, &val) == GP_VECTOR,
724 	       "Expected #GP on RDPMC(64)");
725 }
726 
727 int main(int ac, char **av)
728 {
729 	setup_vm();
730 	handle_irq(PMI_VECTOR, cnt_overflow);
731 	buf = malloc(N*64);
732 
733 	check_invalid_rdpmc_gp();
734 
735 	if (pmu.is_intel) {
736 		if (!pmu.version) {
737 			report_skip("No Intel Arch PMU is detected!");
738 			return report_summary();
739 		}
740 		gp_events = (struct pmu_event *)intel_gp_events;
741 		gp_events_size = sizeof(intel_gp_events)/sizeof(intel_gp_events[0]);
742 		report_prefix_push("Intel");
743 		set_ref_cycle_expectations();
744 	} else {
745 		gp_events_size = sizeof(amd_gp_events)/sizeof(amd_gp_events[0]);
746 		gp_events = (struct pmu_event *)amd_gp_events;
747 		report_prefix_push("AMD");
748 	}
749 
750 	printf("PMU version:         %d\n", pmu.version);
751 	printf("GP counters:         %d\n", pmu.nr_gp_counters);
752 	printf("GP counter width:    %d\n", pmu.gp_counter_width);
753 	printf("Mask length:         %d\n", pmu.gp_counter_mask_length);
754 	printf("Fixed counters:      %d\n", pmu.nr_fixed_counters);
755 	printf("Fixed counter width: %d\n", pmu.fixed_counter_width);
756 
757 	fixed_counters_num = MIN(pmu.nr_fixed_counters, ARRAY_SIZE(fixed_events));
758 	if (pmu.nr_fixed_counters > ARRAY_SIZE(fixed_events))
759 		report_info("Fixed counters number %d > defined fixed events %u.  "
760 			    "Please update test case.", pmu.nr_fixed_counters,
761 			    (uint32_t)ARRAY_SIZE(fixed_events));
762 
763 	apic_write(APIC_LVTPC, PMI_VECTOR);
764 
765 	check_counters();
766 
767 	if (pmu_has_full_writes()) {
768 		pmu.msr_gp_counter_base = MSR_IA32_PMC0;
769 
770 		report_prefix_push("full-width writes");
771 		check_counters();
772 		check_gp_counters_write_width();
773 		report_prefix_pop();
774 	}
775 
776 	if (!pmu.is_intel) {
777 		report_prefix_push("K7");
778 		pmu.nr_gp_counters = AMD64_NUM_COUNTERS;
779 		pmu.msr_gp_counter_base = MSR_K7_PERFCTR0;
780 		pmu.msr_gp_event_select_base = MSR_K7_EVNTSEL0;
781 		check_counters();
782 		report_prefix_pop();
783 	}
784 
785 	return report_summary();
786 }
787