xref: /kvm-unit-tests/x86/pmu.c (revision dfc1fec2fbde04ad607e1aed560cf7059350c70f)
1 
2 #include "x86/msr.h"
3 #include "x86/processor.h"
4 #include "x86/pmu.h"
5 #include "x86/apic-defs.h"
6 #include "x86/apic.h"
7 #include "x86/desc.h"
8 #include "x86/isr.h"
9 #include "alloc.h"
10 
11 #include "libcflat.h"
12 #include <stdint.h>
13 
14 #define N 1000000
15 
16 // These values match the number of instructions and branches in the
17 // assembly block in check_emulated_instr().
18 #define EXPECTED_INSTR 17
19 #define EXPECTED_BRNCH 5
20 
21 typedef struct {
22 	uint32_t ctr;
23 	uint64_t config;
24 	uint64_t count;
25 	int idx;
26 } pmu_counter_t;
27 
28 struct pmu_event {
29 	const char *name;
30 	uint32_t unit_sel;
31 	int min;
32 	int max;
33 } intel_gp_events[] = {
34 	{"core cycles", 0x003c, 1*N, 50*N},
35 	{"instructions", 0x00c0, 10*N, 10.2*N},
36 	{"ref cycles", 0x013c, 1*N, 30*N},
37 	{"llc references", 0x4f2e, 1, 2*N},
38 	{"llc misses", 0x412e, 1, 1*N},
39 	{"branches", 0x00c4, 1*N, 1.1*N},
40 	{"branch misses", 0x00c5, 0, 0.1*N},
41 }, amd_gp_events[] = {
42 	{"core cycles", 0x0076, 1*N, 50*N},
43 	{"instructions", 0x00c0, 10*N, 10.2*N},
44 	{"branches", 0x00c2, 1*N, 1.1*N},
45 	{"branch misses", 0x00c3, 0, 0.1*N},
46 }, fixed_events[] = {
47 	{"fixed 1", MSR_CORE_PERF_FIXED_CTR0, 10*N, 10.2*N},
48 	{"fixed 2", MSR_CORE_PERF_FIXED_CTR0 + 1, 1*N, 30*N},
49 	{"fixed 3", MSR_CORE_PERF_FIXED_CTR0 + 2, 0.1*N, 30*N}
50 };
51 
52 char *buf;
53 
54 static struct pmu_event *gp_events;
55 static unsigned int gp_events_size;
56 
57 static inline void loop(void)
58 {
59 	unsigned long tmp, tmp2, tmp3;
60 
61 	asm volatile("1: mov (%1), %2; add $64, %1; nop; nop; nop; nop; nop; nop; nop; loop 1b"
62 			: "=c"(tmp), "=r"(tmp2), "=r"(tmp3): "0"(N), "1"(buf));
63 
64 }
65 
66 volatile uint64_t irq_received;
67 
68 static void cnt_overflow(isr_regs_t *regs)
69 {
70 	irq_received++;
71 	apic_write(APIC_EOI, 0);
72 }
73 
74 static bool check_irq(void)
75 {
76 	int i;
77 	irq_received = 0;
78 	sti();
79 	for (i = 0; i < 100000 && !irq_received; i++)
80 		asm volatile("pause");
81 	cli();
82 	return irq_received;
83 }
84 
85 static bool is_gp(pmu_counter_t *evt)
86 {
87 	if (!pmu.is_intel)
88 		return true;
89 
90 	return evt->ctr < MSR_CORE_PERF_FIXED_CTR0 ||
91 		evt->ctr >= MSR_IA32_PMC0;
92 }
93 
94 static int event_to_global_idx(pmu_counter_t *cnt)
95 {
96 	if (pmu.is_intel)
97 		return cnt->ctr - (is_gp(cnt) ? pmu.msr_gp_counter_base :
98 			(MSR_CORE_PERF_FIXED_CTR0 - FIXED_CNT_INDEX));
99 
100 	if (pmu.msr_gp_counter_base == MSR_F15H_PERF_CTR0)
101 		return (cnt->ctr - pmu.msr_gp_counter_base) / 2;
102 	else
103 		return cnt->ctr - pmu.msr_gp_counter_base;
104 }
105 
106 static struct pmu_event* get_counter_event(pmu_counter_t *cnt)
107 {
108 	if (is_gp(cnt)) {
109 		int i;
110 
111 		for (i = 0; i < gp_events_size; i++)
112 			if (gp_events[i].unit_sel == (cnt->config & 0xffff))
113 				return &gp_events[i];
114 	} else
115 		return &fixed_events[cnt->ctr - MSR_CORE_PERF_FIXED_CTR0];
116 
117 	return (void*)0;
118 }
119 
120 static void global_enable(pmu_counter_t *cnt)
121 {
122 	if (!this_cpu_has_perf_global_ctrl())
123 		return;
124 
125 	cnt->idx = event_to_global_idx(cnt);
126 	wrmsr(pmu.msr_global_ctl, rdmsr(pmu.msr_global_ctl) | BIT_ULL(cnt->idx));
127 }
128 
129 static void global_disable(pmu_counter_t *cnt)
130 {
131 	if (!this_cpu_has_perf_global_ctrl())
132 		return;
133 
134 	wrmsr(pmu.msr_global_ctl, rdmsr(pmu.msr_global_ctl) & ~BIT_ULL(cnt->idx));
135 }
136 
137 static void __start_event(pmu_counter_t *evt, uint64_t count)
138 {
139     evt->count = count;
140     wrmsr(evt->ctr, evt->count);
141     if (is_gp(evt)) {
142 	    wrmsr(MSR_GP_EVENT_SELECTx(event_to_global_idx(evt)),
143 		  evt->config | EVNTSEL_EN);
144     } else {
145 	    uint32_t ctrl = rdmsr(MSR_CORE_PERF_FIXED_CTR_CTRL);
146 	    int shift = (evt->ctr - MSR_CORE_PERF_FIXED_CTR0) * 4;
147 	    uint32_t usrospmi = 0;
148 
149 	    if (evt->config & EVNTSEL_OS)
150 		    usrospmi |= (1 << 0);
151 	    if (evt->config & EVNTSEL_USR)
152 		    usrospmi |= (1 << 1);
153 	    if (evt->config & EVNTSEL_INT)
154 		    usrospmi |= (1 << 3); // PMI on overflow
155 	    ctrl = (ctrl & ~(0xf << shift)) | (usrospmi << shift);
156 	    wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, ctrl);
157     }
158     global_enable(evt);
159     apic_write(APIC_LVTPC, PMI_VECTOR);
160 }
161 
162 static void start_event(pmu_counter_t *evt)
163 {
164 	__start_event(evt, 0);
165 }
166 
167 static void stop_event(pmu_counter_t *evt)
168 {
169 	global_disable(evt);
170 	if (is_gp(evt)) {
171 		wrmsr(MSR_GP_EVENT_SELECTx(event_to_global_idx(evt)),
172 		      evt->config & ~EVNTSEL_EN);
173 	} else {
174 		uint32_t ctrl = rdmsr(MSR_CORE_PERF_FIXED_CTR_CTRL);
175 		int shift = (evt->ctr - MSR_CORE_PERF_FIXED_CTR0) * 4;
176 		wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, ctrl & ~(0xf << shift));
177 	}
178 	evt->count = rdmsr(evt->ctr);
179 }
180 
181 static noinline void measure_many(pmu_counter_t *evt, int count)
182 {
183 	int i;
184 	for (i = 0; i < count; i++)
185 		start_event(&evt[i]);
186 	loop();
187 	for (i = 0; i < count; i++)
188 		stop_event(&evt[i]);
189 }
190 
191 static void measure_one(pmu_counter_t *evt)
192 {
193 	measure_many(evt, 1);
194 }
195 
196 static noinline void __measure(pmu_counter_t *evt, uint64_t count)
197 {
198 	__start_event(evt, count);
199 	loop();
200 	stop_event(evt);
201 }
202 
203 static bool verify_event(uint64_t count, struct pmu_event *e)
204 {
205 	// printf("%d <= %ld <= %d\n", e->min, count, e->max);
206 	return count >= e->min  && count <= e->max;
207 
208 }
209 
210 static bool verify_counter(pmu_counter_t *cnt)
211 {
212 	return verify_event(cnt->count, get_counter_event(cnt));
213 }
214 
215 static void check_gp_counter(struct pmu_event *evt)
216 {
217 	pmu_counter_t cnt = {
218 		.config = EVNTSEL_OS | EVNTSEL_USR | evt->unit_sel,
219 	};
220 	int i;
221 
222 	for (i = 0; i < pmu.nr_gp_counters; i++) {
223 		cnt.ctr = MSR_GP_COUNTERx(i);
224 		measure_one(&cnt);
225 		report(verify_event(cnt.count, evt), "%s-%d", evt->name, i);
226 	}
227 }
228 
229 static void check_gp_counters(void)
230 {
231 	int i;
232 
233 	for (i = 0; i < gp_events_size; i++)
234 		if (pmu_gp_counter_is_available(i))
235 			check_gp_counter(&gp_events[i]);
236 		else
237 			printf("GP event '%s' is disabled\n",
238 					gp_events[i].name);
239 }
240 
241 static void check_fixed_counters(void)
242 {
243 	pmu_counter_t cnt = {
244 		.config = EVNTSEL_OS | EVNTSEL_USR,
245 	};
246 	int i;
247 
248 	for (i = 0; i < pmu.nr_fixed_counters; i++) {
249 		cnt.ctr = fixed_events[i].unit_sel;
250 		measure_one(&cnt);
251 		report(verify_event(cnt.count, &fixed_events[i]), "fixed-%d", i);
252 	}
253 }
254 
255 static void check_counters_many(void)
256 {
257 	pmu_counter_t cnt[10];
258 	int i, n;
259 
260 	for (i = 0, n = 0; n < pmu.nr_gp_counters; i++) {
261 		if (!pmu_gp_counter_is_available(i))
262 			continue;
263 
264 		cnt[n].ctr = MSR_GP_COUNTERx(n);
265 		cnt[n].config = EVNTSEL_OS | EVNTSEL_USR |
266 			gp_events[i % gp_events_size].unit_sel;
267 		n++;
268 	}
269 	for (i = 0; i < pmu.nr_fixed_counters; i++) {
270 		cnt[n].ctr = fixed_events[i].unit_sel;
271 		cnt[n].config = EVNTSEL_OS | EVNTSEL_USR;
272 		n++;
273 	}
274 
275 	measure_many(cnt, n);
276 
277 	for (i = 0; i < n; i++)
278 		if (!verify_counter(&cnt[i]))
279 			break;
280 
281 	report(i == n, "all counters");
282 }
283 
284 static uint64_t measure_for_overflow(pmu_counter_t *cnt)
285 {
286 	__measure(cnt, 0);
287 	/*
288 	 * To generate overflow, i.e. roll over to '0', the initial count just
289 	 * needs to be preset to the negative expected count.  However, as per
290 	 * Intel's SDM, the preset count needs to be incremented by 1 to ensure
291 	 * the overflow interrupt is generated immediately instead of possibly
292 	 * waiting for the overflow to propagate through the counter.
293 	 */
294 	assert(cnt->count > 1);
295 	return 1 - cnt->count;
296 }
297 
298 static void check_counter_overflow(void)
299 {
300 	uint64_t overflow_preset;
301 	int i;
302 	pmu_counter_t cnt = {
303 		.ctr = MSR_GP_COUNTERx(0),
304 		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel /* instructions */,
305 	};
306 	overflow_preset = measure_for_overflow(&cnt);
307 
308 	/* clear status before test */
309 	if (this_cpu_has_perf_global_status())
310 		pmu_clear_global_status();
311 
312 	report_prefix_push("overflow");
313 
314 	for (i = 0; i < pmu.nr_gp_counters + 1; i++) {
315 		uint64_t status;
316 		int idx;
317 
318 		cnt.count = overflow_preset;
319 		if (pmu_use_full_writes())
320 			cnt.count &= (1ull << pmu.gp_counter_width) - 1;
321 
322 		if (i == pmu.nr_gp_counters) {
323 			if (!pmu.is_intel)
324 				break;
325 
326 			cnt.ctr = fixed_events[0].unit_sel;
327 			cnt.count = measure_for_overflow(&cnt);
328 			cnt.count &= (1ull << pmu.gp_counter_width) - 1;
329 		} else {
330 			cnt.ctr = MSR_GP_COUNTERx(i);
331 		}
332 
333 		if (i % 2)
334 			cnt.config |= EVNTSEL_INT;
335 		else
336 			cnt.config &= ~EVNTSEL_INT;
337 		idx = event_to_global_idx(&cnt);
338 		__measure(&cnt, cnt.count);
339 		if (pmu.is_intel)
340 			report(cnt.count == 1, "cntr-%d", i);
341 		else
342 			report(cnt.count == 0xffffffffffff || cnt.count < 7, "cntr-%d", i);
343 
344 		if (!this_cpu_has_perf_global_status())
345 			continue;
346 
347 		status = rdmsr(pmu.msr_global_status);
348 		report(status & (1ull << idx), "status-%d", i);
349 		wrmsr(pmu.msr_global_status_clr, status);
350 		status = rdmsr(pmu.msr_global_status);
351 		report(!(status & (1ull << idx)), "status clear-%d", i);
352 		report(check_irq() == (i % 2), "irq-%d", i);
353 	}
354 
355 	report_prefix_pop();
356 }
357 
358 static void check_gp_counter_cmask(void)
359 {
360 	pmu_counter_t cnt = {
361 		.ctr = MSR_GP_COUNTERx(0),
362 		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel /* instructions */,
363 	};
364 	cnt.config |= (0x2 << EVNTSEL_CMASK_SHIFT);
365 	measure_one(&cnt);
366 	report(cnt.count < gp_events[1].min, "cmask");
367 }
368 
369 static void do_rdpmc_fast(void *ptr)
370 {
371 	pmu_counter_t *cnt = ptr;
372 	uint32_t idx = (uint32_t)cnt->idx | (1u << 31);
373 
374 	if (!is_gp(cnt))
375 		idx |= 1 << 30;
376 
377 	cnt->count = rdpmc(idx);
378 }
379 
380 
381 static void check_rdpmc(void)
382 {
383 	uint64_t val = 0xff0123456789ull;
384 	bool exc;
385 	int i;
386 
387 	report_prefix_push("rdpmc");
388 
389 	for (i = 0; i < pmu.nr_gp_counters; i++) {
390 		uint64_t x;
391 		pmu_counter_t cnt = {
392 			.ctr = MSR_GP_COUNTERx(i),
393 			.idx = i
394 		};
395 
396 	        /*
397 	         * Without full-width writes, only the low 32 bits are writable,
398 	         * and the value is sign-extended.
399 	         */
400 		if (pmu.msr_gp_counter_base == MSR_IA32_PERFCTR0)
401 			x = (uint64_t)(int64_t)(int32_t)val;
402 		else
403 			x = (uint64_t)(int64_t)val;
404 
405 		/* Mask according to the number of supported bits */
406 		x &= (1ull << pmu.gp_counter_width) - 1;
407 
408 		wrmsr(MSR_GP_COUNTERx(i), val);
409 		report(rdpmc(i) == x, "cntr-%d", i);
410 
411 		exc = test_for_exception(GP_VECTOR, do_rdpmc_fast, &cnt);
412 		if (exc)
413 			report_skip("fast-%d", i);
414 		else
415 			report(cnt.count == (u32)val, "fast-%d", i);
416 	}
417 	for (i = 0; i < pmu.nr_fixed_counters; i++) {
418 		uint64_t x = val & ((1ull << pmu.fixed_counter_width) - 1);
419 		pmu_counter_t cnt = {
420 			.ctr = MSR_CORE_PERF_FIXED_CTR0 + i,
421 			.idx = i
422 		};
423 
424 		wrmsr(MSR_PERF_FIXED_CTRx(i), x);
425 		report(rdpmc(i | (1 << 30)) == x, "fixed cntr-%d", i);
426 
427 		exc = test_for_exception(GP_VECTOR, do_rdpmc_fast, &cnt);
428 		if (exc)
429 			report_skip("fixed fast-%d", i);
430 		else
431 			report(cnt.count == (u32)x, "fixed fast-%d", i);
432 	}
433 
434 	report_prefix_pop();
435 }
436 
437 static void check_running_counter_wrmsr(void)
438 {
439 	uint64_t status;
440 	uint64_t count;
441 	pmu_counter_t evt = {
442 		.ctr = MSR_GP_COUNTERx(0),
443 		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel,
444 	};
445 
446 	report_prefix_push("running counter wrmsr");
447 
448 	start_event(&evt);
449 	loop();
450 	wrmsr(MSR_GP_COUNTERx(0), 0);
451 	stop_event(&evt);
452 	report(evt.count < gp_events[1].min, "cntr");
453 
454 	/* clear status before overflow test */
455 	if (this_cpu_has_perf_global_status())
456 		pmu_clear_global_status();
457 
458 	start_event(&evt);
459 
460 	count = -1;
461 	if (pmu_use_full_writes())
462 		count &= (1ull << pmu.gp_counter_width) - 1;
463 
464 	wrmsr(MSR_GP_COUNTERx(0), count);
465 
466 	loop();
467 	stop_event(&evt);
468 
469 	if (this_cpu_has_perf_global_status()) {
470 		status = rdmsr(pmu.msr_global_status);
471 		report(status & 1, "status msr bit");
472 	}
473 
474 	report_prefix_pop();
475 }
476 
477 static void check_emulated_instr(void)
478 {
479 	uint64_t status, instr_start, brnch_start;
480 	uint64_t gp_counter_width = (1ull << pmu.gp_counter_width) - 1;
481 	unsigned int branch_idx = pmu.is_intel ? 5 : 2;
482 	pmu_counter_t brnch_cnt = {
483 		.ctr = MSR_GP_COUNTERx(0),
484 		/* branch instructions */
485 		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[branch_idx].unit_sel,
486 	};
487 	pmu_counter_t instr_cnt = {
488 		.ctr = MSR_GP_COUNTERx(1),
489 		/* instructions */
490 		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel,
491 	};
492 	report_prefix_push("emulated instruction");
493 
494 	if (this_cpu_has_perf_global_status())
495 		pmu_clear_global_status();
496 
497 	start_event(&brnch_cnt);
498 	start_event(&instr_cnt);
499 
500 	brnch_start = -EXPECTED_BRNCH;
501 	instr_start = -EXPECTED_INSTR;
502 	wrmsr(MSR_GP_COUNTERx(0), brnch_start & gp_counter_width);
503 	wrmsr(MSR_GP_COUNTERx(1), instr_start & gp_counter_width);
504 	// KVM_FEP is a magic prefix that forces emulation so
505 	// 'KVM_FEP "jne label\n"' just counts as a single instruction.
506 	asm volatile(
507 		"mov $0x0, %%eax\n"
508 		"cmp $0x0, %%eax\n"
509 		KVM_FEP "jne label\n"
510 		KVM_FEP "jne label\n"
511 		KVM_FEP "jne label\n"
512 		KVM_FEP "jne label\n"
513 		KVM_FEP "jne label\n"
514 		"mov $0xa, %%eax\n"
515 		"cpuid\n"
516 		"mov $0xa, %%eax\n"
517 		"cpuid\n"
518 		"mov $0xa, %%eax\n"
519 		"cpuid\n"
520 		"mov $0xa, %%eax\n"
521 		"cpuid\n"
522 		"mov $0xa, %%eax\n"
523 		"cpuid\n"
524 		"label:\n"
525 		:
526 		:
527 		: "eax", "ebx", "ecx", "edx");
528 
529 	if (this_cpu_has_perf_global_ctrl())
530 		wrmsr(pmu.msr_global_ctl, 0);
531 
532 	stop_event(&brnch_cnt);
533 	stop_event(&instr_cnt);
534 
535 	// Check that the end count - start count is at least the expected
536 	// number of instructions and branches.
537 	report(instr_cnt.count - instr_start >= EXPECTED_INSTR,
538 	       "instruction count");
539 	report(brnch_cnt.count - brnch_start >= EXPECTED_BRNCH,
540 	       "branch count");
541 	if (this_cpu_has_perf_global_status()) {
542 		// Additionally check that those counters overflowed properly.
543 		status = rdmsr(pmu.msr_global_status);
544 		report(status & 1, "branch counter overflow");
545 		report(status & 2, "instruction counter overflow");
546 	}
547 
548 	report_prefix_pop();
549 }
550 
551 #define XBEGIN_STARTED (~0u)
552 static void check_tsx_cycles(void)
553 {
554 	pmu_counter_t cnt;
555 	unsigned int i, ret = 0;
556 
557 	if (!this_cpu_has(X86_FEATURE_RTM))
558 		return;
559 
560 	report_prefix_push("TSX cycles");
561 
562 	for (i = 0; i < pmu.nr_gp_counters; i++) {
563 		cnt.ctr = MSR_GP_COUNTERx(i);
564 
565 		if (i == 2) {
566 			/* Transactional cycles commited only on gp counter 2 */
567 			cnt.config = EVNTSEL_OS | EVNTSEL_USR | 0x30000003c;
568 		} else {
569 			/* Transactional cycles */
570 			cnt.config = EVNTSEL_OS | EVNTSEL_USR | 0x10000003c;
571 		}
572 
573 		start_event(&cnt);
574 
575 		asm volatile("xbegin 1f\n\t"
576 				"1:\n\t"
577 				: "+a" (ret) :: "memory");
578 
579 		/* Generate a non-canonical #GP to trigger ABORT. */
580 		if (ret == XBEGIN_STARTED)
581 			*(int *)NONCANONICAL = 0;
582 
583 		stop_event(&cnt);
584 
585 		report(cnt.count > 0, "gp cntr-%d with a value of %" PRId64 "", i, cnt.count);
586 	}
587 
588 	report_prefix_pop();
589 }
590 
591 static void check_counters(void)
592 {
593 	if (is_fep_available())
594 		check_emulated_instr();
595 
596 	check_gp_counters();
597 	check_fixed_counters();
598 	check_rdpmc();
599 	check_counters_many();
600 	check_counter_overflow();
601 	check_gp_counter_cmask();
602 	check_running_counter_wrmsr();
603 	check_tsx_cycles();
604 }
605 
606 static void do_unsupported_width_counter_write(void *index)
607 {
608 	wrmsr(MSR_IA32_PMC0 + *((int *) index), 0xffffff0123456789ull);
609 }
610 
611 static void check_gp_counters_write_width(void)
612 {
613 	u64 val_64 = 0xffffff0123456789ull;
614 	u64 val_32 = val_64 & ((1ull << 32) - 1);
615 	u64 val_max_width = val_64 & ((1ull << pmu.gp_counter_width) - 1);
616 	int i;
617 
618 	/*
619 	 * MSR_IA32_PERFCTRn supports 64-bit writes,
620 	 * but only the lowest 32 bits are valid.
621 	 */
622 	for (i = 0; i < pmu.nr_gp_counters; i++) {
623 		wrmsr(MSR_IA32_PERFCTR0 + i, val_32);
624 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32);
625 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_32);
626 
627 		wrmsr(MSR_IA32_PERFCTR0 + i, val_max_width);
628 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32);
629 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_32);
630 
631 		wrmsr(MSR_IA32_PERFCTR0 + i, val_64);
632 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32);
633 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_32);
634 	}
635 
636 	/*
637 	 * MSR_IA32_PMCn supports writing values up to GP counter width,
638 	 * and only the lowest bits of GP counter width are valid.
639 	 */
640 	for (i = 0; i < pmu.nr_gp_counters; i++) {
641 		wrmsr(MSR_IA32_PMC0 + i, val_32);
642 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_32);
643 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32);
644 
645 		wrmsr(MSR_IA32_PMC0 + i, val_max_width);
646 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_max_width);
647 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_max_width);
648 
649 		report(test_for_exception(GP_VECTOR,
650 			do_unsupported_width_counter_write, &i),
651 		"writing unsupported width to MSR_IA32_PMC%d raises #GP", i);
652 	}
653 }
654 
655 /*
656  * Per the SDM, reference cycles are currently implemented using the
657  * core crystal clock, TSC, or bus clock. Calibrate to the TSC
658  * frequency to set reasonable expectations.
659  */
660 static void set_ref_cycle_expectations(void)
661 {
662 	pmu_counter_t cnt = {
663 		.ctr = MSR_IA32_PERFCTR0,
664 		.config = EVNTSEL_OS | EVNTSEL_USR | intel_gp_events[2].unit_sel,
665 	};
666 	uint64_t tsc_delta;
667 	uint64_t t0, t1, t2, t3;
668 
669 	/* Bit 2 enumerates the availability of reference cycles events. */
670 	if (!pmu.nr_gp_counters || !pmu_gp_counter_is_available(2))
671 		return;
672 
673 	if (this_cpu_has_perf_global_ctrl())
674 		wrmsr(pmu.msr_global_ctl, 0);
675 
676 	t0 = fenced_rdtsc();
677 	start_event(&cnt);
678 	t1 = fenced_rdtsc();
679 
680 	/*
681 	 * This loop has to run long enough to dominate the VM-exit
682 	 * costs for playing with the PMU MSRs on start and stop.
683 	 *
684 	 * On a 2.6GHz Ice Lake, with the TSC frequency at 104 times
685 	 * the core crystal clock, this function calculated a guest
686 	 * TSC : ref cycles ratio of around 105 with ECX initialized
687 	 * to one billion.
688 	 */
689 	asm volatile("loop ." : "+c"((int){1000000000ull}));
690 
691 	t2 = fenced_rdtsc();
692 	stop_event(&cnt);
693 	t3 = fenced_rdtsc();
694 
695 	tsc_delta = ((t2 - t1) + (t3 - t0)) / 2;
696 
697 	if (!tsc_delta)
698 		return;
699 
700 	intel_gp_events[2].min = (intel_gp_events[2].min * cnt.count) / tsc_delta;
701 	intel_gp_events[2].max = (intel_gp_events[2].max * cnt.count) / tsc_delta;
702 }
703 
704 static void check_invalid_rdpmc_gp(void)
705 {
706 	uint64_t val;
707 
708 	report(rdpmc_safe(64, &val) == GP_VECTOR,
709 	       "Expected #GP on RDPMC(64)");
710 }
711 
712 int main(int ac, char **av)
713 {
714 	setup_vm();
715 	handle_irq(PMI_VECTOR, cnt_overflow);
716 	buf = malloc(N*64);
717 
718 	check_invalid_rdpmc_gp();
719 
720 	if (pmu.is_intel) {
721 		if (!pmu.version) {
722 			report_skip("No Intel Arch PMU is detected!");
723 			return report_summary();
724 		}
725 		gp_events = (struct pmu_event *)intel_gp_events;
726 		gp_events_size = sizeof(intel_gp_events)/sizeof(intel_gp_events[0]);
727 		report_prefix_push("Intel");
728 		set_ref_cycle_expectations();
729 	} else {
730 		gp_events_size = sizeof(amd_gp_events)/sizeof(amd_gp_events[0]);
731 		gp_events = (struct pmu_event *)amd_gp_events;
732 		report_prefix_push("AMD");
733 	}
734 
735 	printf("PMU version:         %d\n", pmu.version);
736 	printf("GP counters:         %d\n", pmu.nr_gp_counters);
737 	printf("GP counter width:    %d\n", pmu.gp_counter_width);
738 	printf("Mask length:         %d\n", pmu.gp_counter_mask_length);
739 	printf("Fixed counters:      %d\n", pmu.nr_fixed_counters);
740 	printf("Fixed counter width: %d\n", pmu.fixed_counter_width);
741 
742 	apic_write(APIC_LVTPC, PMI_VECTOR);
743 
744 	check_counters();
745 
746 	if (pmu_has_full_writes()) {
747 		pmu.msr_gp_counter_base = MSR_IA32_PMC0;
748 
749 		report_prefix_push("full-width writes");
750 		check_counters();
751 		check_gp_counters_write_width();
752 		report_prefix_pop();
753 	}
754 
755 	if (!pmu.is_intel) {
756 		report_prefix_push("K7");
757 		pmu.nr_gp_counters = AMD64_NUM_COUNTERS;
758 		pmu.msr_gp_counter_base = MSR_K7_PERFCTR0;
759 		pmu.msr_gp_event_select_base = MSR_K7_EVNTSEL0;
760 		check_counters();
761 		report_prefix_pop();
762 	}
763 
764 	return report_summary();
765 }
766