xref: /kvm-unit-tests/x86/pmu.c (revision 5d6a3a547c3c066ddb4648492aeafc751fab0110)
1 
2 #include "x86/msr.h"
3 #include "x86/processor.h"
4 #include "x86/pmu.h"
5 #include "x86/apic-defs.h"
6 #include "x86/apic.h"
7 #include "x86/desc.h"
8 #include "x86/isr.h"
9 #include "vmalloc.h"
10 #include "alloc.h"
11 
12 #include "libcflat.h"
13 #include <stdint.h>
14 
15 #define N 1000000
16 
17 // These values match the number of instructions and branches in the
18 // assembly block in check_emulated_instr().
19 #define EXPECTED_INSTR 17
20 #define EXPECTED_BRNCH 5
21 
22 typedef struct {
23 	uint32_t ctr;
24 	uint64_t config;
25 	uint64_t count;
26 	int idx;
27 } pmu_counter_t;
28 
29 struct pmu_event {
30 	const char *name;
31 	uint32_t unit_sel;
32 	int min;
33 	int max;
34 } intel_gp_events[] = {
35 	{"core cycles", 0x003c, 1*N, 50*N},
36 	{"instructions", 0x00c0, 10*N, 10.2*N},
37 	{"ref cycles", 0x013c, 1*N, 30*N},
38 	{"llc references", 0x4f2e, 1, 2*N},
39 	{"llc misses", 0x412e, 1, 1*N},
40 	{"branches", 0x00c4, 1*N, 1.1*N},
41 	{"branch misses", 0x00c5, 0, 0.1*N},
42 }, amd_gp_events[] = {
43 	{"core cycles", 0x0076, 1*N, 50*N},
44 	{"instructions", 0x00c0, 10*N, 10.2*N},
45 	{"branches", 0x00c2, 1*N, 1.1*N},
46 	{"branch misses", 0x00c3, 0, 0.1*N},
47 }, fixed_events[] = {
48 	{"fixed 0", MSR_CORE_PERF_FIXED_CTR0, 10*N, 10.2*N},
49 	{"fixed 1", MSR_CORE_PERF_FIXED_CTR0 + 1, 1*N, 30*N},
50 	{"fixed 2", MSR_CORE_PERF_FIXED_CTR0 + 2, 0.1*N, 30*N}
51 };
52 
53 char *buf;
54 
55 static struct pmu_event *gp_events;
56 static unsigned int gp_events_size;
57 
58 static inline void loop(void)
59 {
60 	unsigned long tmp, tmp2, tmp3;
61 
62 	asm volatile("1: mov (%1), %2; add $64, %1; nop; nop; nop; nop; nop; nop; nop; loop 1b"
63 			: "=c"(tmp), "=r"(tmp2), "=r"(tmp3): "0"(N), "1"(buf));
64 
65 }
66 
67 volatile uint64_t irq_received;
68 
69 static void cnt_overflow(isr_regs_t *regs)
70 {
71 	irq_received++;
72 	apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
73 	apic_write(APIC_EOI, 0);
74 }
75 
76 static bool check_irq(void)
77 {
78 	int i;
79 	irq_received = 0;
80 	sti();
81 	for (i = 0; i < 100000 && !irq_received; i++)
82 		asm volatile("pause");
83 	cli();
84 	return irq_received;
85 }
86 
87 static bool is_gp(pmu_counter_t *evt)
88 {
89 	if (!pmu.is_intel)
90 		return true;
91 
92 	return evt->ctr < MSR_CORE_PERF_FIXED_CTR0 ||
93 		evt->ctr >= MSR_IA32_PMC0;
94 }
95 
96 static int event_to_global_idx(pmu_counter_t *cnt)
97 {
98 	if (pmu.is_intel)
99 		return cnt->ctr - (is_gp(cnt) ? pmu.msr_gp_counter_base :
100 			(MSR_CORE_PERF_FIXED_CTR0 - FIXED_CNT_INDEX));
101 
102 	if (pmu.msr_gp_counter_base == MSR_F15H_PERF_CTR0)
103 		return (cnt->ctr - pmu.msr_gp_counter_base) / 2;
104 	else
105 		return cnt->ctr - pmu.msr_gp_counter_base;
106 }
107 
108 static struct pmu_event* get_counter_event(pmu_counter_t *cnt)
109 {
110 	if (is_gp(cnt)) {
111 		int i;
112 
113 		for (i = 0; i < gp_events_size; i++)
114 			if (gp_events[i].unit_sel == (cnt->config & 0xffff))
115 				return &gp_events[i];
116 	} else
117 		return &fixed_events[cnt->ctr - MSR_CORE_PERF_FIXED_CTR0];
118 
119 	return (void*)0;
120 }
121 
122 static void global_enable(pmu_counter_t *cnt)
123 {
124 	if (!this_cpu_has_perf_global_ctrl())
125 		return;
126 
127 	cnt->idx = event_to_global_idx(cnt);
128 	wrmsr(pmu.msr_global_ctl, rdmsr(pmu.msr_global_ctl) | BIT_ULL(cnt->idx));
129 }
130 
131 static void global_disable(pmu_counter_t *cnt)
132 {
133 	if (!this_cpu_has_perf_global_ctrl())
134 		return;
135 
136 	wrmsr(pmu.msr_global_ctl, rdmsr(pmu.msr_global_ctl) & ~BIT_ULL(cnt->idx));
137 }
138 
139 static void __start_event(pmu_counter_t *evt, uint64_t count)
140 {
141     evt->count = count;
142     wrmsr(evt->ctr, evt->count);
143     if (is_gp(evt)) {
144 	    wrmsr(MSR_GP_EVENT_SELECTx(event_to_global_idx(evt)),
145 		  evt->config | EVNTSEL_EN);
146     } else {
147 	    uint32_t ctrl = rdmsr(MSR_CORE_PERF_FIXED_CTR_CTRL);
148 	    int shift = (evt->ctr - MSR_CORE_PERF_FIXED_CTR0) * 4;
149 	    uint32_t usrospmi = 0;
150 
151 	    if (evt->config & EVNTSEL_OS)
152 		    usrospmi |= (1 << 0);
153 	    if (evt->config & EVNTSEL_USR)
154 		    usrospmi |= (1 << 1);
155 	    if (evt->config & EVNTSEL_INT)
156 		    usrospmi |= (1 << 3); // PMI on overflow
157 	    ctrl = (ctrl & ~(0xf << shift)) | (usrospmi << shift);
158 	    wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, ctrl);
159     }
160     global_enable(evt);
161     apic_write(APIC_LVTPC, PMI_VECTOR);
162 }
163 
164 static void start_event(pmu_counter_t *evt)
165 {
166 	__start_event(evt, 0);
167 }
168 
169 static void stop_event(pmu_counter_t *evt)
170 {
171 	global_disable(evt);
172 	if (is_gp(evt)) {
173 		wrmsr(MSR_GP_EVENT_SELECTx(event_to_global_idx(evt)),
174 		      evt->config & ~EVNTSEL_EN);
175 	} else {
176 		uint32_t ctrl = rdmsr(MSR_CORE_PERF_FIXED_CTR_CTRL);
177 		int shift = (evt->ctr - MSR_CORE_PERF_FIXED_CTR0) * 4;
178 		wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, ctrl & ~(0xf << shift));
179 	}
180 	evt->count = rdmsr(evt->ctr);
181 }
182 
183 static noinline void measure_many(pmu_counter_t *evt, int count)
184 {
185 	int i;
186 	for (i = 0; i < count; i++)
187 		start_event(&evt[i]);
188 	loop();
189 	for (i = 0; i < count; i++)
190 		stop_event(&evt[i]);
191 }
192 
193 static void measure_one(pmu_counter_t *evt)
194 {
195 	measure_many(evt, 1);
196 }
197 
198 static noinline void __measure(pmu_counter_t *evt, uint64_t count)
199 {
200 	__start_event(evt, count);
201 	loop();
202 	stop_event(evt);
203 }
204 
205 static bool verify_event(uint64_t count, struct pmu_event *e)
206 {
207 	// printf("%d <= %ld <= %d\n", e->min, count, e->max);
208 	return count >= e->min && count <= e->max;
209 }
210 
211 static bool verify_counter(pmu_counter_t *cnt)
212 {
213 	return verify_event(cnt->count, get_counter_event(cnt));
214 }
215 
216 static void check_gp_counter(struct pmu_event *evt)
217 {
218 	pmu_counter_t cnt = {
219 		.config = EVNTSEL_OS | EVNTSEL_USR | evt->unit_sel,
220 	};
221 	int i;
222 
223 	for (i = 0; i < pmu.nr_gp_counters; i++) {
224 		cnt.ctr = MSR_GP_COUNTERx(i);
225 		measure_one(&cnt);
226 		report(verify_event(cnt.count, evt), "%s-%d", evt->name, i);
227 	}
228 }
229 
230 static void check_gp_counters(void)
231 {
232 	int i;
233 
234 	for (i = 0; i < gp_events_size; i++)
235 		if (pmu_gp_counter_is_available(i))
236 			check_gp_counter(&gp_events[i]);
237 		else
238 			printf("GP event '%s' is disabled\n",
239 					gp_events[i].name);
240 }
241 
242 static void check_fixed_counters(void)
243 {
244 	pmu_counter_t cnt = {
245 		.config = EVNTSEL_OS | EVNTSEL_USR,
246 	};
247 	int i;
248 
249 	for (i = 0; i < pmu.nr_fixed_counters; i++) {
250 		cnt.ctr = fixed_events[i].unit_sel;
251 		measure_one(&cnt);
252 		report(verify_event(cnt.count, &fixed_events[i]), "fixed-%d", i);
253 	}
254 }
255 
256 static void check_counters_many(void)
257 {
258 	pmu_counter_t cnt[10];
259 	int i, n;
260 
261 	for (i = 0, n = 0; n < pmu.nr_gp_counters; i++) {
262 		if (!pmu_gp_counter_is_available(i))
263 			continue;
264 
265 		cnt[n].ctr = MSR_GP_COUNTERx(n);
266 		cnt[n].config = EVNTSEL_OS | EVNTSEL_USR |
267 			gp_events[i % gp_events_size].unit_sel;
268 		n++;
269 	}
270 	for (i = 0; i < pmu.nr_fixed_counters; i++) {
271 		cnt[n].ctr = fixed_events[i].unit_sel;
272 		cnt[n].config = EVNTSEL_OS | EVNTSEL_USR;
273 		n++;
274 	}
275 
276 	measure_many(cnt, n);
277 
278 	for (i = 0; i < n; i++)
279 		if (!verify_counter(&cnt[i]))
280 			break;
281 
282 	report(i == n, "all counters");
283 }
284 
285 static uint64_t measure_for_overflow(pmu_counter_t *cnt)
286 {
287 	__measure(cnt, 0);
288 	/*
289 	 * To generate overflow, i.e. roll over to '0', the initial count just
290 	 * needs to be preset to the negative expected count.  However, as per
291 	 * Intel's SDM, the preset count needs to be incremented by 1 to ensure
292 	 * the overflow interrupt is generated immediately instead of possibly
293 	 * waiting for the overflow to propagate through the counter.
294 	 */
295 	assert(cnt->count > 1);
296 	return 1 - cnt->count;
297 }
298 
299 static void check_counter_overflow(void)
300 {
301 	uint64_t overflow_preset;
302 	int i;
303 	pmu_counter_t cnt = {
304 		.ctr = MSR_GP_COUNTERx(0),
305 		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel /* instructions */,
306 	};
307 	overflow_preset = measure_for_overflow(&cnt);
308 
309 	/* clear status before test */
310 	if (this_cpu_has_perf_global_status())
311 		pmu_clear_global_status();
312 
313 	report_prefix_push("overflow");
314 
315 	for (i = 0; i < pmu.nr_gp_counters + 1; i++) {
316 		uint64_t status;
317 		int idx;
318 
319 		cnt.count = overflow_preset;
320 		if (pmu_use_full_writes())
321 			cnt.count &= (1ull << pmu.gp_counter_width) - 1;
322 
323 		if (i == pmu.nr_gp_counters) {
324 			if (!pmu.is_intel)
325 				break;
326 
327 			cnt.ctr = fixed_events[0].unit_sel;
328 			cnt.count = measure_for_overflow(&cnt);
329 			cnt.count &= (1ull << pmu.gp_counter_width) - 1;
330 		} else {
331 			cnt.ctr = MSR_GP_COUNTERx(i);
332 		}
333 
334 		if (i % 2)
335 			cnt.config |= EVNTSEL_INT;
336 		else
337 			cnt.config &= ~EVNTSEL_INT;
338 		idx = event_to_global_idx(&cnt);
339 		__measure(&cnt, cnt.count);
340 		if (pmu.is_intel)
341 			report(cnt.count == 1, "cntr-%d", i);
342 		else
343 			report(cnt.count == 0xffffffffffff || cnt.count < 7, "cntr-%d", i);
344 
345 		if (!this_cpu_has_perf_global_status())
346 			continue;
347 
348 		status = rdmsr(pmu.msr_global_status);
349 		report(status & (1ull << idx), "status-%d", i);
350 		wrmsr(pmu.msr_global_status_clr, status);
351 		status = rdmsr(pmu.msr_global_status);
352 		report(!(status & (1ull << idx)), "status clear-%d", i);
353 		report(check_irq() == (i % 2), "irq-%d", i);
354 	}
355 
356 	report_prefix_pop();
357 }
358 
359 static void check_gp_counter_cmask(void)
360 {
361 	pmu_counter_t cnt = {
362 		.ctr = MSR_GP_COUNTERx(0),
363 		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel /* instructions */,
364 	};
365 	cnt.config |= (0x2 << EVNTSEL_CMASK_SHIFT);
366 	measure_one(&cnt);
367 	report(cnt.count < gp_events[1].min, "cmask");
368 }
369 
370 static void do_rdpmc_fast(void *ptr)
371 {
372 	pmu_counter_t *cnt = ptr;
373 	uint32_t idx = (uint32_t)cnt->idx | (1u << 31);
374 
375 	if (!is_gp(cnt))
376 		idx |= 1 << 30;
377 
378 	cnt->count = rdpmc(idx);
379 }
380 
381 
382 static void check_rdpmc(void)
383 {
384 	uint64_t val = 0xff0123456789ull;
385 	bool exc;
386 	int i;
387 
388 	report_prefix_push("rdpmc");
389 
390 	for (i = 0; i < pmu.nr_gp_counters; i++) {
391 		uint64_t x;
392 		pmu_counter_t cnt = {
393 			.ctr = MSR_GP_COUNTERx(i),
394 			.idx = i
395 		};
396 
397 	        /*
398 	         * Without full-width writes, only the low 32 bits are writable,
399 	         * and the value is sign-extended.
400 	         */
401 		if (pmu.msr_gp_counter_base == MSR_IA32_PERFCTR0)
402 			x = (uint64_t)(int64_t)(int32_t)val;
403 		else
404 			x = (uint64_t)(int64_t)val;
405 
406 		/* Mask according to the number of supported bits */
407 		x &= (1ull << pmu.gp_counter_width) - 1;
408 
409 		wrmsr(MSR_GP_COUNTERx(i), val);
410 		report(rdpmc(i) == x, "cntr-%d", i);
411 
412 		exc = test_for_exception(GP_VECTOR, do_rdpmc_fast, &cnt);
413 		if (exc)
414 			report_skip("fast-%d", i);
415 		else
416 			report(cnt.count == (u32)val, "fast-%d", i);
417 	}
418 	for (i = 0; i < pmu.nr_fixed_counters; i++) {
419 		uint64_t x = val & ((1ull << pmu.fixed_counter_width) - 1);
420 		pmu_counter_t cnt = {
421 			.ctr = MSR_CORE_PERF_FIXED_CTR0 + i,
422 			.idx = i
423 		};
424 
425 		wrmsr(MSR_PERF_FIXED_CTRx(i), x);
426 		report(rdpmc(i | (1 << 30)) == x, "fixed cntr-%d", i);
427 
428 		exc = test_for_exception(GP_VECTOR, do_rdpmc_fast, &cnt);
429 		if (exc)
430 			report_skip("fixed fast-%d", i);
431 		else
432 			report(cnt.count == (u32)x, "fixed fast-%d", i);
433 	}
434 
435 	report_prefix_pop();
436 }
437 
438 static void check_running_counter_wrmsr(void)
439 {
440 	uint64_t status;
441 	uint64_t count;
442 	pmu_counter_t evt = {
443 		.ctr = MSR_GP_COUNTERx(0),
444 		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel,
445 	};
446 
447 	report_prefix_push("running counter wrmsr");
448 
449 	start_event(&evt);
450 	loop();
451 	wrmsr(MSR_GP_COUNTERx(0), 0);
452 	stop_event(&evt);
453 	report(evt.count < gp_events[1].min, "cntr");
454 
455 	/* clear status before overflow test */
456 	if (this_cpu_has_perf_global_status())
457 		pmu_clear_global_status();
458 
459 	start_event(&evt);
460 
461 	count = -1;
462 	if (pmu_use_full_writes())
463 		count &= (1ull << pmu.gp_counter_width) - 1;
464 
465 	wrmsr(MSR_GP_COUNTERx(0), count);
466 
467 	loop();
468 	stop_event(&evt);
469 
470 	if (this_cpu_has_perf_global_status()) {
471 		status = rdmsr(pmu.msr_global_status);
472 		report(status & 1, "status msr bit");
473 	}
474 
475 	report_prefix_pop();
476 }
477 
478 static void check_emulated_instr(void)
479 {
480 	uint64_t status, instr_start, brnch_start;
481 	uint64_t gp_counter_width = (1ull << pmu.gp_counter_width) - 1;
482 	unsigned int branch_idx = pmu.is_intel ? 5 : 2;
483 	pmu_counter_t brnch_cnt = {
484 		.ctr = MSR_GP_COUNTERx(0),
485 		/* branch instructions */
486 		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[branch_idx].unit_sel,
487 	};
488 	pmu_counter_t instr_cnt = {
489 		.ctr = MSR_GP_COUNTERx(1),
490 		/* instructions */
491 		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel,
492 	};
493 	report_prefix_push("emulated instruction");
494 
495 	if (this_cpu_has_perf_global_status())
496 		pmu_clear_global_status();
497 
498 	start_event(&brnch_cnt);
499 	start_event(&instr_cnt);
500 
501 	brnch_start = -EXPECTED_BRNCH;
502 	instr_start = -EXPECTED_INSTR;
503 	wrmsr(MSR_GP_COUNTERx(0), brnch_start & gp_counter_width);
504 	wrmsr(MSR_GP_COUNTERx(1), instr_start & gp_counter_width);
505 	// KVM_FEP is a magic prefix that forces emulation so
506 	// 'KVM_FEP "jne label\n"' just counts as a single instruction.
507 	asm volatile(
508 		"mov $0x0, %%eax\n"
509 		"cmp $0x0, %%eax\n"
510 		KVM_FEP "jne label\n"
511 		KVM_FEP "jne label\n"
512 		KVM_FEP "jne label\n"
513 		KVM_FEP "jne label\n"
514 		KVM_FEP "jne label\n"
515 		"mov $0xa, %%eax\n"
516 		"cpuid\n"
517 		"mov $0xa, %%eax\n"
518 		"cpuid\n"
519 		"mov $0xa, %%eax\n"
520 		"cpuid\n"
521 		"mov $0xa, %%eax\n"
522 		"cpuid\n"
523 		"mov $0xa, %%eax\n"
524 		"cpuid\n"
525 		"label:\n"
526 		:
527 		:
528 		: "eax", "ebx", "ecx", "edx");
529 
530 	if (this_cpu_has_perf_global_ctrl())
531 		wrmsr(pmu.msr_global_ctl, 0);
532 
533 	stop_event(&brnch_cnt);
534 	stop_event(&instr_cnt);
535 
536 	// Check that the end count - start count is at least the expected
537 	// number of instructions and branches.
538 	report(instr_cnt.count - instr_start >= EXPECTED_INSTR,
539 	       "instruction count");
540 	report(brnch_cnt.count - brnch_start >= EXPECTED_BRNCH,
541 	       "branch count");
542 	if (this_cpu_has_perf_global_status()) {
543 		// Additionally check that those counters overflowed properly.
544 		status = rdmsr(pmu.msr_global_status);
545 		report(status & 1, "branch counter overflow");
546 		report(status & 2, "instruction counter overflow");
547 	}
548 
549 	report_prefix_pop();
550 }
551 
552 #define XBEGIN_STARTED (~0u)
553 static void check_tsx_cycles(void)
554 {
555 	pmu_counter_t cnt;
556 	unsigned int i, ret = 0;
557 
558 	if (!this_cpu_has(X86_FEATURE_RTM))
559 		return;
560 
561 	report_prefix_push("TSX cycles");
562 
563 	for (i = 0; i < pmu.nr_gp_counters; i++) {
564 		cnt.ctr = MSR_GP_COUNTERx(i);
565 
566 		if (i == 2) {
567 			/* Transactional cycles committed only on gp counter 2 */
568 			cnt.config = EVNTSEL_OS | EVNTSEL_USR | 0x30000003c;
569 		} else {
570 			/* Transactional cycles */
571 			cnt.config = EVNTSEL_OS | EVNTSEL_USR | 0x10000003c;
572 		}
573 
574 		start_event(&cnt);
575 
576 		asm volatile("xbegin 1f\n\t"
577 				"1:\n\t"
578 				: "+a" (ret) :: "memory");
579 
580 		/* Generate a non-canonical #GP to trigger ABORT. */
581 		if (ret == XBEGIN_STARTED)
582 			*(int *)NONCANONICAL = 0;
583 
584 		stop_event(&cnt);
585 
586 		report(cnt.count > 0, "gp cntr-%d with a value of %" PRId64 "", i, cnt.count);
587 	}
588 
589 	report_prefix_pop();
590 }
591 
592 static void check_counters(void)
593 {
594 	if (is_fep_available())
595 		check_emulated_instr();
596 
597 	check_gp_counters();
598 	check_fixed_counters();
599 	check_rdpmc();
600 	check_counters_many();
601 	check_counter_overflow();
602 	check_gp_counter_cmask();
603 	check_running_counter_wrmsr();
604 	check_tsx_cycles();
605 }
606 
607 static void do_unsupported_width_counter_write(void *index)
608 {
609 	wrmsr(MSR_IA32_PMC0 + *((int *) index), 0xffffff0123456789ull);
610 }
611 
612 static void check_gp_counters_write_width(void)
613 {
614 	u64 val_64 = 0xffffff0123456789ull;
615 	u64 val_32 = val_64 & ((1ull << 32) - 1);
616 	u64 val_max_width = val_64 & ((1ull << pmu.gp_counter_width) - 1);
617 	int i;
618 
619 	/*
620 	 * MSR_IA32_PERFCTRn supports 64-bit writes,
621 	 * but only the lowest 32 bits are valid.
622 	 */
623 	for (i = 0; i < pmu.nr_gp_counters; i++) {
624 		wrmsr(MSR_IA32_PERFCTR0 + i, val_32);
625 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32);
626 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_32);
627 
628 		wrmsr(MSR_IA32_PERFCTR0 + i, val_max_width);
629 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32);
630 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_32);
631 
632 		wrmsr(MSR_IA32_PERFCTR0 + i, val_64);
633 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32);
634 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_32);
635 	}
636 
637 	/*
638 	 * MSR_IA32_PMCn supports writing values up to GP counter width,
639 	 * and only the lowest bits of GP counter width are valid.
640 	 */
641 	for (i = 0; i < pmu.nr_gp_counters; i++) {
642 		wrmsr(MSR_IA32_PMC0 + i, val_32);
643 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_32);
644 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32);
645 
646 		wrmsr(MSR_IA32_PMC0 + i, val_max_width);
647 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_max_width);
648 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_max_width);
649 
650 		report(test_for_exception(GP_VECTOR,
651 			do_unsupported_width_counter_write, &i),
652 		"writing unsupported width to MSR_IA32_PMC%d raises #GP", i);
653 	}
654 }
655 
656 /*
657  * Per the SDM, reference cycles are currently implemented using the
658  * core crystal clock, TSC, or bus clock. Calibrate to the TSC
659  * frequency to set reasonable expectations.
660  */
661 static void set_ref_cycle_expectations(void)
662 {
663 	pmu_counter_t cnt = {
664 		.ctr = MSR_IA32_PERFCTR0,
665 		.config = EVNTSEL_OS | EVNTSEL_USR | intel_gp_events[2].unit_sel,
666 	};
667 	uint64_t tsc_delta;
668 	uint64_t t0, t1, t2, t3;
669 
670 	/* Bit 2 enumerates the availability of reference cycles events. */
671 	if (!pmu.nr_gp_counters || !pmu_gp_counter_is_available(2))
672 		return;
673 
674 	if (this_cpu_has_perf_global_ctrl())
675 		wrmsr(pmu.msr_global_ctl, 0);
676 
677 	t0 = fenced_rdtsc();
678 	start_event(&cnt);
679 	t1 = fenced_rdtsc();
680 
681 	/*
682 	 * This loop has to run long enough to dominate the VM-exit
683 	 * costs for playing with the PMU MSRs on start and stop.
684 	 *
685 	 * On a 2.6GHz Ice Lake, with the TSC frequency at 104 times
686 	 * the core crystal clock, this function calculated a guest
687 	 * TSC : ref cycles ratio of around 105 with ECX initialized
688 	 * to one billion.
689 	 */
690 	asm volatile("loop ." : "+c"((int){1000000000ull}));
691 
692 	t2 = fenced_rdtsc();
693 	stop_event(&cnt);
694 	t3 = fenced_rdtsc();
695 
696 	tsc_delta = ((t2 - t1) + (t3 - t0)) / 2;
697 
698 	if (!tsc_delta)
699 		return;
700 
701 	intel_gp_events[2].min = (intel_gp_events[2].min * cnt.count) / tsc_delta;
702 	intel_gp_events[2].max = (intel_gp_events[2].max * cnt.count) / tsc_delta;
703 }
704 
705 static void check_invalid_rdpmc_gp(void)
706 {
707 	uint64_t val;
708 
709 	report(rdpmc_safe(64, &val) == GP_VECTOR,
710 	       "Expected #GP on RDPMC(64)");
711 }
712 
713 int main(int ac, char **av)
714 {
715 	setup_vm();
716 	handle_irq(PMI_VECTOR, cnt_overflow);
717 	buf = malloc(N*64);
718 
719 	check_invalid_rdpmc_gp();
720 
721 	if (pmu.is_intel) {
722 		if (!pmu.version) {
723 			report_skip("No Intel Arch PMU is detected!");
724 			return report_summary();
725 		}
726 		gp_events = (struct pmu_event *)intel_gp_events;
727 		gp_events_size = sizeof(intel_gp_events)/sizeof(intel_gp_events[0]);
728 		report_prefix_push("Intel");
729 		set_ref_cycle_expectations();
730 	} else {
731 		gp_events_size = sizeof(amd_gp_events)/sizeof(amd_gp_events[0]);
732 		gp_events = (struct pmu_event *)amd_gp_events;
733 		report_prefix_push("AMD");
734 	}
735 
736 	printf("PMU version:         %d\n", pmu.version);
737 	printf("GP counters:         %d\n", pmu.nr_gp_counters);
738 	printf("GP counter width:    %d\n", pmu.gp_counter_width);
739 	printf("Mask length:         %d\n", pmu.gp_counter_mask_length);
740 	printf("Fixed counters:      %d\n", pmu.nr_fixed_counters);
741 	printf("Fixed counter width: %d\n", pmu.fixed_counter_width);
742 
743 	apic_write(APIC_LVTPC, PMI_VECTOR);
744 
745 	check_counters();
746 
747 	if (pmu_has_full_writes()) {
748 		pmu.msr_gp_counter_base = MSR_IA32_PMC0;
749 
750 		report_prefix_push("full-width writes");
751 		check_counters();
752 		check_gp_counters_write_width();
753 		report_prefix_pop();
754 	}
755 
756 	if (!pmu.is_intel) {
757 		report_prefix_push("K7");
758 		pmu.nr_gp_counters = AMD64_NUM_COUNTERS;
759 		pmu.msr_gp_counter_base = MSR_K7_PERFCTR0;
760 		pmu.msr_gp_event_select_base = MSR_K7_EVNTSEL0;
761 		check_counters();
762 		report_prefix_pop();
763 	}
764 
765 	return report_summary();
766 }
767