xref: /kvm-unit-tests/x86/pmu.c (revision 25cc1ea7a8fd2df5536d4215fe3bba4ca521256e)
1 
2 #include "x86/msr.h"
3 #include "x86/processor.h"
4 #include "x86/pmu.h"
5 #include "x86/apic-defs.h"
6 #include "x86/apic.h"
7 #include "x86/desc.h"
8 #include "x86/isr.h"
9 #include "vmalloc.h"
10 #include "alloc.h"
11 
12 #include "libcflat.h"
13 #include <stdint.h>
14 
15 #define N 1000000
16 
17 // These values match the number of instructions and branches in the
18 // assembly block in check_emulated_instr().
19 #define EXPECTED_INSTR 17
20 #define EXPECTED_BRNCH 5
21 
22 typedef struct {
23 	uint32_t ctr;
24 	uint32_t idx;
25 	uint64_t config;
26 	uint64_t count;
27 } pmu_counter_t;
28 
29 struct pmu_event {
30 	const char *name;
31 	uint32_t unit_sel;
32 	int min;
33 	int max;
34 } intel_gp_events[] = {
35 	{"core cycles", 0x003c, 1*N, 50*N},
36 	{"instructions", 0x00c0, 10*N, 10.2*N},
37 	{"ref cycles", 0x013c, 1*N, 30*N},
38 	{"llc references", 0x4f2e, 1, 2*N},
39 	{"llc misses", 0x412e, 1, 1*N},
40 	{"branches", 0x00c4, 1*N, 1.1*N},
41 	{"branch misses", 0x00c5, 0, 0.1*N},
42 }, amd_gp_events[] = {
43 	{"core cycles", 0x0076, 1*N, 50*N},
44 	{"instructions", 0x00c0, 10*N, 10.2*N},
45 	{"branches", 0x00c2, 1*N, 1.1*N},
46 	{"branch misses", 0x00c3, 0, 0.1*N},
47 }, fixed_events[] = {
48 	{"fixed 0", MSR_CORE_PERF_FIXED_CTR0, 10*N, 10.2*N},
49 	{"fixed 1", MSR_CORE_PERF_FIXED_CTR0 + 1, 1*N, 30*N},
50 	{"fixed 2", MSR_CORE_PERF_FIXED_CTR0 + 2, 0.1*N, 30*N}
51 };
52 
53 /*
54  * Events index in intel_gp_events[], ensure consistent with
55  * intel_gp_events[].
56  */
57 enum {
58 	INTEL_REF_CYCLES_IDX	= 2,
59 	INTEL_BRANCHES_IDX	= 5,
60 };
61 
62 /*
63  * Events index in amd_gp_events[], ensure consistent with
64  * amd_gp_events[].
65  */
66 enum {
67 	AMD_BRANCHES_IDX	= 2,
68 };
69 
70 char *buf;
71 
72 static struct pmu_event *gp_events;
73 static unsigned int gp_events_size;
74 static unsigned int fixed_counters_num;
75 
76 static inline void loop(void)
77 {
78 	unsigned long tmp, tmp2, tmp3;
79 
80 	asm volatile("1: mov (%1), %2; add $64, %1; nop; nop; nop; nop; nop; nop; nop; loop 1b"
81 			: "=c"(tmp), "=r"(tmp2), "=r"(tmp3): "0"(N), "1"(buf));
82 
83 }
84 
85 volatile uint64_t irq_received;
86 
87 static void cnt_overflow(isr_regs_t *regs)
88 {
89 	irq_received++;
90 	apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
91 	apic_write(APIC_EOI, 0);
92 }
93 
94 static bool check_irq(void)
95 {
96 	int i;
97 	irq_received = 0;
98 	sti();
99 	for (i = 0; i < 100000 && !irq_received; i++)
100 		asm volatile("pause");
101 	cli();
102 	return irq_received;
103 }
104 
105 static bool is_gp(pmu_counter_t *evt)
106 {
107 	if (!pmu.is_intel)
108 		return true;
109 
110 	return evt->ctr < MSR_CORE_PERF_FIXED_CTR0 ||
111 		evt->ctr >= MSR_IA32_PMC0;
112 }
113 
114 static int event_to_global_idx(pmu_counter_t *cnt)
115 {
116 	if (pmu.is_intel)
117 		return cnt->ctr - (is_gp(cnt) ? pmu.msr_gp_counter_base :
118 			(MSR_CORE_PERF_FIXED_CTR0 - FIXED_CNT_INDEX));
119 
120 	if (pmu.msr_gp_counter_base == MSR_F15H_PERF_CTR0)
121 		return (cnt->ctr - pmu.msr_gp_counter_base) / 2;
122 	else
123 		return cnt->ctr - pmu.msr_gp_counter_base;
124 }
125 
126 static struct pmu_event* get_counter_event(pmu_counter_t *cnt)
127 {
128 	if (is_gp(cnt)) {
129 		int i;
130 
131 		for (i = 0; i < gp_events_size; i++)
132 			if (gp_events[i].unit_sel == (cnt->config & 0xffff))
133 				return &gp_events[i];
134 	} else {
135 		unsigned int idx = cnt->ctr - MSR_CORE_PERF_FIXED_CTR0;
136 
137 		if (idx < ARRAY_SIZE(fixed_events))
138 			return &fixed_events[idx];
139 	}
140 
141 	return (void*)0;
142 }
143 
144 static void global_enable(pmu_counter_t *cnt)
145 {
146 	if (!this_cpu_has_perf_global_ctrl())
147 		return;
148 
149 	cnt->idx = event_to_global_idx(cnt);
150 	wrmsr(pmu.msr_global_ctl, rdmsr(pmu.msr_global_ctl) | BIT_ULL(cnt->idx));
151 }
152 
153 static void global_disable(pmu_counter_t *cnt)
154 {
155 	if (!this_cpu_has_perf_global_ctrl())
156 		return;
157 
158 	wrmsr(pmu.msr_global_ctl, rdmsr(pmu.msr_global_ctl) & ~BIT_ULL(cnt->idx));
159 }
160 
161 static void __start_event(pmu_counter_t *evt, uint64_t count)
162 {
163     evt->count = count;
164     wrmsr(evt->ctr, evt->count);
165     if (is_gp(evt)) {
166 	    wrmsr(MSR_GP_EVENT_SELECTx(event_to_global_idx(evt)),
167 		  evt->config | EVNTSEL_EN);
168     } else {
169 	    uint32_t ctrl = rdmsr(MSR_CORE_PERF_FIXED_CTR_CTRL);
170 	    int shift = (evt->ctr - MSR_CORE_PERF_FIXED_CTR0) * 4;
171 	    uint32_t usrospmi = 0;
172 
173 	    if (evt->config & EVNTSEL_OS)
174 		    usrospmi |= (1 << 0);
175 	    if (evt->config & EVNTSEL_USR)
176 		    usrospmi |= (1 << 1);
177 	    if (evt->config & EVNTSEL_INT)
178 		    usrospmi |= (1 << 3); // PMI on overflow
179 	    ctrl = (ctrl & ~(0xf << shift)) | (usrospmi << shift);
180 	    wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, ctrl);
181     }
182     global_enable(evt);
183     apic_write(APIC_LVTPC, PMI_VECTOR);
184 }
185 
186 static void start_event(pmu_counter_t *evt)
187 {
188 	__start_event(evt, 0);
189 }
190 
191 static void stop_event(pmu_counter_t *evt)
192 {
193 	global_disable(evt);
194 	if (is_gp(evt)) {
195 		wrmsr(MSR_GP_EVENT_SELECTx(event_to_global_idx(evt)),
196 		      evt->config & ~EVNTSEL_EN);
197 	} else {
198 		uint32_t ctrl = rdmsr(MSR_CORE_PERF_FIXED_CTR_CTRL);
199 		int shift = (evt->ctr - MSR_CORE_PERF_FIXED_CTR0) * 4;
200 		wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, ctrl & ~(0xf << shift));
201 	}
202 	evt->count = rdmsr(evt->ctr);
203 }
204 
205 static noinline void measure_many(pmu_counter_t *evt, int count)
206 {
207 	int i;
208 	for (i = 0; i < count; i++)
209 		start_event(&evt[i]);
210 	loop();
211 	for (i = 0; i < count; i++)
212 		stop_event(&evt[i]);
213 }
214 
215 static void measure_one(pmu_counter_t *evt)
216 {
217 	measure_many(evt, 1);
218 }
219 
220 static noinline void __measure(pmu_counter_t *evt, uint64_t count)
221 {
222 	__start_event(evt, count);
223 	loop();
224 	stop_event(evt);
225 }
226 
227 static bool verify_event(uint64_t count, struct pmu_event *e)
228 {
229 	bool pass;
230 
231 	if (!e)
232 		return false;
233 
234 	pass = count >= e->min && count <= e->max;
235 	if (!pass)
236 		printf("FAIL: %d <= %"PRId64" <= %d\n", e->min, count, e->max);
237 
238 	return pass;
239 }
240 
241 static bool verify_counter(pmu_counter_t *cnt)
242 {
243 	return verify_event(cnt->count, get_counter_event(cnt));
244 }
245 
246 static void check_gp_counter(struct pmu_event *evt)
247 {
248 	pmu_counter_t cnt = {
249 		.config = EVNTSEL_OS | EVNTSEL_USR | evt->unit_sel,
250 	};
251 	int i;
252 
253 	for (i = 0; i < pmu.nr_gp_counters; i++) {
254 		cnt.ctr = MSR_GP_COUNTERx(i);
255 		measure_one(&cnt);
256 		report(verify_event(cnt.count, evt), "%s-%d", evt->name, i);
257 	}
258 }
259 
260 static void check_gp_counters(void)
261 {
262 	int i;
263 
264 	for (i = 0; i < gp_events_size; i++)
265 		if (pmu_gp_counter_is_available(i))
266 			check_gp_counter(&gp_events[i]);
267 		else
268 			printf("GP event '%s' is disabled\n",
269 					gp_events[i].name);
270 }
271 
272 static void check_fixed_counters(void)
273 {
274 	pmu_counter_t cnt = {
275 		.config = EVNTSEL_OS | EVNTSEL_USR,
276 	};
277 	int i;
278 
279 	for (i = 0; i < fixed_counters_num; i++) {
280 		cnt.ctr = fixed_events[i].unit_sel;
281 		measure_one(&cnt);
282 		report(verify_event(cnt.count, &fixed_events[i]), "fixed-%d", i);
283 	}
284 }
285 
286 static void check_counters_many(void)
287 {
288 	pmu_counter_t cnt[48];
289 	int i, n;
290 
291 	for (i = 0, n = 0; n < pmu.nr_gp_counters; i++) {
292 		if (!pmu_gp_counter_is_available(i))
293 			continue;
294 
295 		cnt[n].ctr = MSR_GP_COUNTERx(n);
296 		cnt[n].config = EVNTSEL_OS | EVNTSEL_USR |
297 			gp_events[i % gp_events_size].unit_sel;
298 		n++;
299 	}
300 	for (i = 0; i < fixed_counters_num; i++) {
301 		cnt[n].ctr = fixed_events[i].unit_sel;
302 		cnt[n].config = EVNTSEL_OS | EVNTSEL_USR;
303 		n++;
304 	}
305 
306 	assert(n <= ARRAY_SIZE(cnt));
307 	measure_many(cnt, n);
308 
309 	for (i = 0; i < n; i++)
310 		if (!verify_counter(&cnt[i]))
311 			break;
312 
313 	report(i == n, "all counters");
314 }
315 
316 static uint64_t measure_for_overflow(pmu_counter_t *cnt)
317 {
318 	__measure(cnt, 0);
319 	/*
320 	 * To generate overflow, i.e. roll over to '0', the initial count just
321 	 * needs to be preset to the negative expected count.  However, as per
322 	 * Intel's SDM, the preset count needs to be incremented by 1 to ensure
323 	 * the overflow interrupt is generated immediately instead of possibly
324 	 * waiting for the overflow to propagate through the counter.
325 	 */
326 	assert(cnt->count > 1);
327 	return 1 - cnt->count;
328 }
329 
330 static void check_counter_overflow(void)
331 {
332 	uint64_t overflow_preset;
333 	int i;
334 	pmu_counter_t cnt = {
335 		.ctr = MSR_GP_COUNTERx(0),
336 		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel /* instructions */,
337 	};
338 	overflow_preset = measure_for_overflow(&cnt);
339 
340 	/* clear status before test */
341 	if (this_cpu_has_perf_global_status())
342 		pmu_clear_global_status();
343 
344 	report_prefix_push("overflow");
345 
346 	for (i = 0; i < pmu.nr_gp_counters + 1; i++) {
347 		uint64_t status;
348 		int idx;
349 
350 		cnt.count = overflow_preset;
351 		if (pmu_use_full_writes())
352 			cnt.count &= (1ull << pmu.gp_counter_width) - 1;
353 
354 		if (i == pmu.nr_gp_counters) {
355 			if (!pmu.is_intel)
356 				break;
357 
358 			cnt.ctr = fixed_events[0].unit_sel;
359 			cnt.count = measure_for_overflow(&cnt);
360 			cnt.count &= (1ull << pmu.gp_counter_width) - 1;
361 		} else {
362 			cnt.ctr = MSR_GP_COUNTERx(i);
363 		}
364 
365 		if (i % 2)
366 			cnt.config |= EVNTSEL_INT;
367 		else
368 			cnt.config &= ~EVNTSEL_INT;
369 		idx = event_to_global_idx(&cnt);
370 		__measure(&cnt, cnt.count);
371 		if (pmu.is_intel)
372 			report(cnt.count == 1, "cntr-%d", i);
373 		else
374 			report(cnt.count == 0xffffffffffff || cnt.count < 7, "cntr-%d", i);
375 
376 		if (!this_cpu_has_perf_global_status())
377 			continue;
378 
379 		status = rdmsr(pmu.msr_global_status);
380 		report(status & (1ull << idx), "status-%d", i);
381 		wrmsr(pmu.msr_global_status_clr, status);
382 		status = rdmsr(pmu.msr_global_status);
383 		report(!(status & (1ull << idx)), "status clear-%d", i);
384 		report(check_irq() == (i % 2), "irq-%d", i);
385 	}
386 
387 	report_prefix_pop();
388 }
389 
390 static void check_gp_counter_cmask(void)
391 {
392 	pmu_counter_t cnt = {
393 		.ctr = MSR_GP_COUNTERx(0),
394 		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel /* instructions */,
395 	};
396 	cnt.config |= (0x2 << EVNTSEL_CMASK_SHIFT);
397 	measure_one(&cnt);
398 	report(cnt.count < gp_events[1].min, "cmask");
399 }
400 
401 static void do_rdpmc_fast(void *ptr)
402 {
403 	pmu_counter_t *cnt = ptr;
404 	uint32_t idx = (uint32_t)cnt->idx | (1u << 31);
405 
406 	if (!is_gp(cnt))
407 		idx |= 1 << 30;
408 
409 	cnt->count = rdpmc(idx);
410 }
411 
412 
413 static void check_rdpmc(void)
414 {
415 	uint64_t val = 0xff0123456789ull;
416 	bool exc;
417 	int i;
418 
419 	report_prefix_push("rdpmc");
420 
421 	for (i = 0; i < pmu.nr_gp_counters; i++) {
422 		uint64_t x;
423 		pmu_counter_t cnt = {
424 			.ctr = MSR_GP_COUNTERx(i),
425 			.idx = i
426 		};
427 
428 	        /*
429 	         * Without full-width writes, only the low 32 bits are writable,
430 	         * and the value is sign-extended.
431 	         */
432 		if (pmu.msr_gp_counter_base == MSR_IA32_PERFCTR0)
433 			x = (uint64_t)(int64_t)(int32_t)val;
434 		else
435 			x = (uint64_t)(int64_t)val;
436 
437 		/* Mask according to the number of supported bits */
438 		x &= (1ull << pmu.gp_counter_width) - 1;
439 
440 		wrmsr(MSR_GP_COUNTERx(i), val);
441 		report(rdpmc(i) == x, "cntr-%d", i);
442 
443 		exc = test_for_exception(GP_VECTOR, do_rdpmc_fast, &cnt);
444 		if (exc)
445 			report_skip("fast-%d", i);
446 		else
447 			report(cnt.count == (u32)val, "fast-%d", i);
448 	}
449 	for (i = 0; i < fixed_counters_num; i++) {
450 		uint64_t x = val & ((1ull << pmu.fixed_counter_width) - 1);
451 		pmu_counter_t cnt = {
452 			.ctr = MSR_CORE_PERF_FIXED_CTR0 + i,
453 			.idx = i
454 		};
455 
456 		wrmsr(MSR_PERF_FIXED_CTRx(i), x);
457 		report(rdpmc(i | (1 << 30)) == x, "fixed cntr-%d", i);
458 
459 		exc = test_for_exception(GP_VECTOR, do_rdpmc_fast, &cnt);
460 		if (exc)
461 			report_skip("fixed fast-%d", i);
462 		else
463 			report(cnt.count == (u32)x, "fixed fast-%d", i);
464 	}
465 
466 	report_prefix_pop();
467 }
468 
469 static void check_running_counter_wrmsr(void)
470 {
471 	uint64_t status;
472 	uint64_t count;
473 	pmu_counter_t evt = {
474 		.ctr = MSR_GP_COUNTERx(0),
475 		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel,
476 	};
477 
478 	report_prefix_push("running counter wrmsr");
479 
480 	start_event(&evt);
481 	loop();
482 	wrmsr(MSR_GP_COUNTERx(0), 0);
483 	stop_event(&evt);
484 	report(evt.count < gp_events[1].min, "cntr");
485 
486 	/* clear status before overflow test */
487 	if (this_cpu_has_perf_global_status())
488 		pmu_clear_global_status();
489 
490 	start_event(&evt);
491 
492 	count = -1;
493 	if (pmu_use_full_writes())
494 		count &= (1ull << pmu.gp_counter_width) - 1;
495 
496 	wrmsr(MSR_GP_COUNTERx(0), count);
497 
498 	loop();
499 	stop_event(&evt);
500 
501 	if (this_cpu_has_perf_global_status()) {
502 		status = rdmsr(pmu.msr_global_status);
503 		report(status & 1, "status msr bit");
504 	}
505 
506 	report_prefix_pop();
507 }
508 
509 static void check_emulated_instr(void)
510 {
511 	uint64_t status, instr_start, brnch_start;
512 	uint64_t gp_counter_width = (1ull << pmu.gp_counter_width) - 1;
513 	unsigned int branch_idx = pmu.is_intel ?
514 				  INTEL_BRANCHES_IDX : AMD_BRANCHES_IDX;
515 	pmu_counter_t brnch_cnt = {
516 		.ctr = MSR_GP_COUNTERx(0),
517 		/* branch instructions */
518 		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[branch_idx].unit_sel,
519 	};
520 	pmu_counter_t instr_cnt = {
521 		.ctr = MSR_GP_COUNTERx(1),
522 		/* instructions */
523 		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel,
524 	};
525 	report_prefix_push("emulated instruction");
526 
527 	if (this_cpu_has_perf_global_status())
528 		pmu_clear_global_status();
529 
530 	start_event(&brnch_cnt);
531 	start_event(&instr_cnt);
532 
533 	brnch_start = -EXPECTED_BRNCH;
534 	instr_start = -EXPECTED_INSTR;
535 	wrmsr(MSR_GP_COUNTERx(0), brnch_start & gp_counter_width);
536 	wrmsr(MSR_GP_COUNTERx(1), instr_start & gp_counter_width);
537 	// KVM_FEP is a magic prefix that forces emulation so
538 	// 'KVM_FEP "jne label\n"' just counts as a single instruction.
539 	asm volatile(
540 		"mov $0x0, %%eax\n"
541 		"cmp $0x0, %%eax\n"
542 		KVM_FEP "jne label\n"
543 		KVM_FEP "jne label\n"
544 		KVM_FEP "jne label\n"
545 		KVM_FEP "jne label\n"
546 		KVM_FEP "jne label\n"
547 		"mov $0xa, %%eax\n"
548 		"cpuid\n"
549 		"mov $0xa, %%eax\n"
550 		"cpuid\n"
551 		"mov $0xa, %%eax\n"
552 		"cpuid\n"
553 		"mov $0xa, %%eax\n"
554 		"cpuid\n"
555 		"mov $0xa, %%eax\n"
556 		"cpuid\n"
557 		"label:\n"
558 		:
559 		:
560 		: "eax", "ebx", "ecx", "edx");
561 
562 	if (this_cpu_has_perf_global_ctrl())
563 		wrmsr(pmu.msr_global_ctl, 0);
564 
565 	stop_event(&brnch_cnt);
566 	stop_event(&instr_cnt);
567 
568 	// Check that the end count - start count is at least the expected
569 	// number of instructions and branches.
570 	report(instr_cnt.count - instr_start >= EXPECTED_INSTR,
571 	       "instruction count");
572 	report(brnch_cnt.count - brnch_start >= EXPECTED_BRNCH,
573 	       "branch count");
574 	if (this_cpu_has_perf_global_status()) {
575 		// Additionally check that those counters overflowed properly.
576 		status = rdmsr(pmu.msr_global_status);
577 		report(status & 1, "branch counter overflow");
578 		report(status & 2, "instruction counter overflow");
579 	}
580 
581 	report_prefix_pop();
582 }
583 
584 #define XBEGIN_STARTED (~0u)
585 static void check_tsx_cycles(void)
586 {
587 	pmu_counter_t cnt;
588 	unsigned int i, ret = 0;
589 
590 	if (!this_cpu_has(X86_FEATURE_RTM))
591 		return;
592 
593 	report_prefix_push("TSX cycles");
594 
595 	for (i = 0; i < pmu.nr_gp_counters; i++) {
596 		cnt.ctr = MSR_GP_COUNTERx(i);
597 
598 		if (i == 2) {
599 			/* Transactional cycles committed only on gp counter 2 */
600 			cnt.config = EVNTSEL_OS | EVNTSEL_USR | 0x30000003c;
601 		} else {
602 			/* Transactional cycles */
603 			cnt.config = EVNTSEL_OS | EVNTSEL_USR | 0x10000003c;
604 		}
605 
606 		start_event(&cnt);
607 
608 		asm volatile("xbegin 1f\n\t"
609 				"1:\n\t"
610 				: "+a" (ret) :: "memory");
611 
612 		/* Generate a non-canonical #GP to trigger ABORT. */
613 		if (ret == XBEGIN_STARTED)
614 			*(int *)NONCANONICAL = 0;
615 
616 		stop_event(&cnt);
617 
618 		report(cnt.count > 0, "gp cntr-%d with a value of %" PRId64 "", i, cnt.count);
619 	}
620 
621 	report_prefix_pop();
622 }
623 
624 static void warm_up(void)
625 {
626 	int i;
627 
628 	/*
629 	 * Since cycles event is always run as the first event, there would be
630 	 * a warm-up state to warm up the cache, it leads to the measured cycles
631 	 * value may exceed the pre-defined cycles upper boundary and cause
632 	 * false positive. To avoid this, introduce an warm-up state before
633 	 * the real verification.
634 	 */
635 	for (i = 0; i < 10; i++)
636 		loop();
637 }
638 
639 static void check_counters(void)
640 {
641 	if (is_fep_available())
642 		check_emulated_instr();
643 
644 	warm_up();
645 	check_gp_counters();
646 	check_fixed_counters();
647 	check_rdpmc();
648 	check_counters_many();
649 	check_counter_overflow();
650 	check_gp_counter_cmask();
651 	check_running_counter_wrmsr();
652 	check_tsx_cycles();
653 }
654 
655 static void do_unsupported_width_counter_write(void *index)
656 {
657 	wrmsr(MSR_IA32_PMC0 + *((int *) index), 0xffffff0123456789ull);
658 }
659 
660 static void check_gp_counters_write_width(void)
661 {
662 	u64 val_64 = 0xffffff0123456789ull;
663 	u64 val_32 = val_64 & ((1ull << 32) - 1);
664 	u64 val_max_width = val_64 & ((1ull << pmu.gp_counter_width) - 1);
665 	int i;
666 
667 	/*
668 	 * MSR_IA32_PERFCTRn supports 64-bit writes,
669 	 * but only the lowest 32 bits are valid.
670 	 */
671 	for (i = 0; i < pmu.nr_gp_counters; i++) {
672 		wrmsr(MSR_IA32_PERFCTR0 + i, val_32);
673 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32);
674 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_32);
675 
676 		wrmsr(MSR_IA32_PERFCTR0 + i, val_max_width);
677 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32);
678 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_32);
679 
680 		wrmsr(MSR_IA32_PERFCTR0 + i, val_64);
681 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32);
682 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_32);
683 	}
684 
685 	/*
686 	 * MSR_IA32_PMCn supports writing values up to GP counter width,
687 	 * and only the lowest bits of GP counter width are valid.
688 	 */
689 	for (i = 0; i < pmu.nr_gp_counters; i++) {
690 		wrmsr(MSR_IA32_PMC0 + i, val_32);
691 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_32);
692 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32);
693 
694 		wrmsr(MSR_IA32_PMC0 + i, val_max_width);
695 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_max_width);
696 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_max_width);
697 
698 		report(test_for_exception(GP_VECTOR,
699 			do_unsupported_width_counter_write, &i),
700 		"writing unsupported width to MSR_IA32_PMC%d raises #GP", i);
701 	}
702 }
703 
704 /*
705  * Per the SDM, reference cycles are currently implemented using the
706  * core crystal clock, TSC, or bus clock. Calibrate to the TSC
707  * frequency to set reasonable expectations.
708  */
709 static void set_ref_cycle_expectations(void)
710 {
711 	pmu_counter_t cnt = {
712 		.ctr = MSR_IA32_PERFCTR0,
713 		.config = EVNTSEL_OS | EVNTSEL_USR |
714 			  intel_gp_events[INTEL_REF_CYCLES_IDX].unit_sel,
715 	};
716 	uint64_t tsc_delta;
717 	uint64_t t0, t1, t2, t3;
718 
719 	/* Bit 2 enumerates the availability of reference cycles events. */
720 	if (!pmu.nr_gp_counters || !pmu_gp_counter_is_available(2))
721 		return;
722 
723 	if (this_cpu_has_perf_global_ctrl())
724 		wrmsr(pmu.msr_global_ctl, 0);
725 
726 	t0 = fenced_rdtsc();
727 	start_event(&cnt);
728 	t1 = fenced_rdtsc();
729 
730 	/*
731 	 * This loop has to run long enough to dominate the VM-exit
732 	 * costs for playing with the PMU MSRs on start and stop.
733 	 *
734 	 * On a 2.6GHz Ice Lake, with the TSC frequency at 104 times
735 	 * the core crystal clock, this function calculated a guest
736 	 * TSC : ref cycles ratio of around 105 with ECX initialized
737 	 * to one billion.
738 	 */
739 	asm volatile("loop ." : "+c"((int){1000000000ull}));
740 
741 	t2 = fenced_rdtsc();
742 	stop_event(&cnt);
743 	t3 = fenced_rdtsc();
744 
745 	tsc_delta = ((t2 - t1) + (t3 - t0)) / 2;
746 
747 	if (!tsc_delta)
748 		return;
749 
750 	intel_gp_events[INTEL_REF_CYCLES_IDX].min =
751 		(intel_gp_events[INTEL_REF_CYCLES_IDX].min * cnt.count) / tsc_delta;
752 	intel_gp_events[INTEL_REF_CYCLES_IDX].max =
753 		(intel_gp_events[INTEL_REF_CYCLES_IDX].max * cnt.count) / tsc_delta;
754 }
755 
756 static void check_invalid_rdpmc_gp(void)
757 {
758 	uint64_t val;
759 
760 	report(rdpmc_safe(64, &val) == GP_VECTOR,
761 	       "Expected #GP on RDPMC(64)");
762 }
763 
764 int main(int ac, char **av)
765 {
766 	setup_vm();
767 	handle_irq(PMI_VECTOR, cnt_overflow);
768 	buf = malloc(N*64);
769 
770 	check_invalid_rdpmc_gp();
771 
772 	if (pmu.is_intel) {
773 		if (!pmu.version) {
774 			report_skip("No Intel Arch PMU is detected!");
775 			return report_summary();
776 		}
777 		gp_events = (struct pmu_event *)intel_gp_events;
778 		gp_events_size = sizeof(intel_gp_events)/sizeof(intel_gp_events[0]);
779 		report_prefix_push("Intel");
780 		set_ref_cycle_expectations();
781 	} else {
782 		gp_events_size = sizeof(amd_gp_events)/sizeof(amd_gp_events[0]);
783 		gp_events = (struct pmu_event *)amd_gp_events;
784 		report_prefix_push("AMD");
785 	}
786 
787 	printf("PMU version:         %d\n", pmu.version);
788 	printf("GP counters:         %d\n", pmu.nr_gp_counters);
789 	printf("GP counter width:    %d\n", pmu.gp_counter_width);
790 	printf("Mask length:         %d\n", pmu.gp_counter_mask_length);
791 	printf("Fixed counters:      %d\n", pmu.nr_fixed_counters);
792 	printf("Fixed counter width: %d\n", pmu.fixed_counter_width);
793 
794 	fixed_counters_num = MIN(pmu.nr_fixed_counters, ARRAY_SIZE(fixed_events));
795 	if (pmu.nr_fixed_counters > ARRAY_SIZE(fixed_events))
796 		report_info("Fixed counters number %d > defined fixed events %u.  "
797 			    "Please update test case.", pmu.nr_fixed_counters,
798 			    (uint32_t)ARRAY_SIZE(fixed_events));
799 
800 	apic_write(APIC_LVTPC, PMI_VECTOR);
801 
802 	check_counters();
803 
804 	if (pmu_has_full_writes()) {
805 		pmu.msr_gp_counter_base = MSR_IA32_PMC0;
806 
807 		report_prefix_push("full-width writes");
808 		check_counters();
809 		check_gp_counters_write_width();
810 		report_prefix_pop();
811 	}
812 
813 	if (!pmu.is_intel) {
814 		report_prefix_push("K7");
815 		pmu.nr_gp_counters = AMD64_NUM_COUNTERS;
816 		pmu.msr_gp_counter_base = MSR_K7_PERFCTR0;
817 		pmu.msr_gp_event_select_base = MSR_K7_EVNTSEL0;
818 		check_counters();
819 		report_prefix_pop();
820 	}
821 
822 	return report_summary();
823 }
824