xref: /kvm-unit-tests/x86/pmu.c (revision 85c755786de4dd98ca6f52225f0ee84309ba4e2f)
1 
2 #include "x86/msr.h"
3 #include "x86/processor.h"
4 #include "x86/pmu.h"
5 #include "x86/apic-defs.h"
6 #include "x86/apic.h"
7 #include "x86/desc.h"
8 #include "x86/isr.h"
9 #include "vmalloc.h"
10 #include "alloc.h"
11 
12 #include "libcflat.h"
13 #include <stdint.h>
14 
15 #define N 1000000
16 
17 // These values match the number of instructions and branches in the
18 // assembly block in check_emulated_instr().
19 #define EXPECTED_INSTR 17
20 #define EXPECTED_BRNCH 5
21 
22 typedef struct {
23 	uint32_t ctr;
24 	uint32_t idx;
25 	uint64_t config;
26 	uint64_t count;
27 } pmu_counter_t;
28 
29 struct pmu_event {
30 	const char *name;
31 	uint32_t unit_sel;
32 	int min;
33 	int max;
34 } intel_gp_events[] = {
35 	{"core cycles", 0x003c, 1*N, 50*N},
36 	{"instructions", 0x00c0, 10*N, 10.2*N},
37 	{"ref cycles", 0x013c, 1*N, 30*N},
38 	{"llc references", 0x4f2e, 1, 2*N},
39 	{"llc misses", 0x412e, 1, 1*N},
40 	{"branches", 0x00c4, 1*N, 1.1*N},
41 	{"branch misses", 0x00c5, 0, 0.1*N},
42 }, amd_gp_events[] = {
43 	{"core cycles", 0x0076, 1*N, 50*N},
44 	{"instructions", 0x00c0, 10*N, 10.2*N},
45 	{"branches", 0x00c2, 1*N, 1.1*N},
46 	{"branch misses", 0x00c3, 0, 0.1*N},
47 }, fixed_events[] = {
48 	{"fixed 0", MSR_CORE_PERF_FIXED_CTR0, 10*N, 10.2*N},
49 	{"fixed 1", MSR_CORE_PERF_FIXED_CTR0 + 1, 1*N, 30*N},
50 	{"fixed 2", MSR_CORE_PERF_FIXED_CTR0 + 2, 0.1*N, 30*N}
51 };
52 
53 /*
54  * Events index in intel_gp_events[], ensure consistent with
55  * intel_gp_events[].
56  */
57 enum {
58 	INTEL_INSTRUCTIONS_IDX  = 1,
59 	INTEL_REF_CYCLES_IDX	= 2,
60 	INTEL_BRANCHES_IDX	= 5,
61 };
62 
63 /*
64  * Events index in amd_gp_events[], ensure consistent with
65  * amd_gp_events[].
66  */
67 enum {
68 	AMD_INSTRUCTIONS_IDX    = 1,
69 	AMD_BRANCHES_IDX	= 2,
70 };
71 
72 char *buf;
73 
74 static struct pmu_event *gp_events;
75 static unsigned int gp_events_size;
76 static unsigned int fixed_counters_num;
77 
78 static inline void loop(void)
79 {
80 	unsigned long tmp, tmp2, tmp3;
81 
82 	asm volatile("1: mov (%1), %2; add $64, %1; nop; nop; nop; nop; nop; nop; nop; loop 1b"
83 			: "=c"(tmp), "=r"(tmp2), "=r"(tmp3): "0"(N), "1"(buf));
84 
85 }
86 
87 volatile uint64_t irq_received;
88 
89 static void cnt_overflow(isr_regs_t *regs)
90 {
91 	irq_received++;
92 	apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
93 	apic_write(APIC_EOI, 0);
94 }
95 
96 static bool check_irq(void)
97 {
98 	int i;
99 	irq_received = 0;
100 	sti();
101 	for (i = 0; i < 100000 && !irq_received; i++)
102 		asm volatile("pause");
103 	cli();
104 	return irq_received;
105 }
106 
107 static bool is_gp(pmu_counter_t *evt)
108 {
109 	if (!pmu.is_intel)
110 		return true;
111 
112 	return evt->ctr < MSR_CORE_PERF_FIXED_CTR0 ||
113 		evt->ctr >= MSR_IA32_PMC0;
114 }
115 
116 static int event_to_global_idx(pmu_counter_t *cnt)
117 {
118 	if (pmu.is_intel)
119 		return cnt->ctr - (is_gp(cnt) ? pmu.msr_gp_counter_base :
120 			(MSR_CORE_PERF_FIXED_CTR0 - FIXED_CNT_INDEX));
121 
122 	if (pmu.msr_gp_counter_base == MSR_F15H_PERF_CTR0)
123 		return (cnt->ctr - pmu.msr_gp_counter_base) / 2;
124 	else
125 		return cnt->ctr - pmu.msr_gp_counter_base;
126 }
127 
128 static struct pmu_event* get_counter_event(pmu_counter_t *cnt)
129 {
130 	if (is_gp(cnt)) {
131 		int i;
132 
133 		for (i = 0; i < gp_events_size; i++)
134 			if (gp_events[i].unit_sel == (cnt->config & 0xffff))
135 				return &gp_events[i];
136 	} else {
137 		unsigned int idx = cnt->ctr - MSR_CORE_PERF_FIXED_CTR0;
138 
139 		if (idx < ARRAY_SIZE(fixed_events))
140 			return &fixed_events[idx];
141 	}
142 
143 	return (void*)0;
144 }
145 
146 static void global_enable(pmu_counter_t *cnt)
147 {
148 	if (!this_cpu_has_perf_global_ctrl())
149 		return;
150 
151 	cnt->idx = event_to_global_idx(cnt);
152 	wrmsr(pmu.msr_global_ctl, rdmsr(pmu.msr_global_ctl) | BIT_ULL(cnt->idx));
153 }
154 
155 static void global_disable(pmu_counter_t *cnt)
156 {
157 	if (!this_cpu_has_perf_global_ctrl())
158 		return;
159 
160 	wrmsr(pmu.msr_global_ctl, rdmsr(pmu.msr_global_ctl) & ~BIT_ULL(cnt->idx));
161 }
162 
163 static void __start_event(pmu_counter_t *evt, uint64_t count)
164 {
165     evt->count = count;
166     wrmsr(evt->ctr, evt->count);
167     if (is_gp(evt)) {
168 	    wrmsr(MSR_GP_EVENT_SELECTx(event_to_global_idx(evt)),
169 		  evt->config | EVNTSEL_EN);
170     } else {
171 	    uint32_t ctrl = rdmsr(MSR_CORE_PERF_FIXED_CTR_CTRL);
172 	    int shift = (evt->ctr - MSR_CORE_PERF_FIXED_CTR0) * 4;
173 	    uint32_t usrospmi = 0;
174 
175 	    if (evt->config & EVNTSEL_OS)
176 		    usrospmi |= (1 << 0);
177 	    if (evt->config & EVNTSEL_USR)
178 		    usrospmi |= (1 << 1);
179 	    if (evt->config & EVNTSEL_INT)
180 		    usrospmi |= (1 << 3); // PMI on overflow
181 	    ctrl = (ctrl & ~(0xf << shift)) | (usrospmi << shift);
182 	    wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, ctrl);
183     }
184     global_enable(evt);
185     apic_write(APIC_LVTPC, PMI_VECTOR);
186 }
187 
188 static void start_event(pmu_counter_t *evt)
189 {
190 	__start_event(evt, 0);
191 }
192 
193 static void stop_event(pmu_counter_t *evt)
194 {
195 	global_disable(evt);
196 	if (is_gp(evt)) {
197 		wrmsr(MSR_GP_EVENT_SELECTx(event_to_global_idx(evt)),
198 		      evt->config & ~EVNTSEL_EN);
199 	} else {
200 		uint32_t ctrl = rdmsr(MSR_CORE_PERF_FIXED_CTR_CTRL);
201 		int shift = (evt->ctr - MSR_CORE_PERF_FIXED_CTR0) * 4;
202 		wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, ctrl & ~(0xf << shift));
203 	}
204 	evt->count = rdmsr(evt->ctr);
205 }
206 
207 static noinline void measure_many(pmu_counter_t *evt, int count)
208 {
209 	int i;
210 	for (i = 0; i < count; i++)
211 		start_event(&evt[i]);
212 	loop();
213 	for (i = 0; i < count; i++)
214 		stop_event(&evt[i]);
215 }
216 
217 static void measure_one(pmu_counter_t *evt)
218 {
219 	measure_many(evt, 1);
220 }
221 
222 static noinline void __measure(pmu_counter_t *evt, uint64_t count)
223 {
224 	__start_event(evt, count);
225 	loop();
226 	stop_event(evt);
227 }
228 
229 static bool verify_event(uint64_t count, struct pmu_event *e)
230 {
231 	bool pass;
232 
233 	if (!e)
234 		return false;
235 
236 	pass = count >= e->min && count <= e->max;
237 	if (!pass)
238 		printf("FAIL: %d <= %"PRId64" <= %d\n", e->min, count, e->max);
239 
240 	return pass;
241 }
242 
243 static bool verify_counter(pmu_counter_t *cnt)
244 {
245 	return verify_event(cnt->count, get_counter_event(cnt));
246 }
247 
248 static void check_gp_counter(struct pmu_event *evt)
249 {
250 	pmu_counter_t cnt = {
251 		.config = EVNTSEL_OS | EVNTSEL_USR | evt->unit_sel,
252 	};
253 	int i;
254 
255 	for (i = 0; i < pmu.nr_gp_counters; i++) {
256 		cnt.ctr = MSR_GP_COUNTERx(i);
257 		measure_one(&cnt);
258 		report(verify_event(cnt.count, evt), "%s-%d", evt->name, i);
259 	}
260 }
261 
262 static void check_gp_counters(void)
263 {
264 	int i;
265 
266 	for (i = 0; i < gp_events_size; i++)
267 		if (pmu_gp_counter_is_available(i))
268 			check_gp_counter(&gp_events[i]);
269 		else
270 			printf("GP event '%s' is disabled\n",
271 					gp_events[i].name);
272 }
273 
274 static void check_fixed_counters(void)
275 {
276 	pmu_counter_t cnt = {
277 		.config = EVNTSEL_OS | EVNTSEL_USR,
278 	};
279 	int i;
280 
281 	for (i = 0; i < fixed_counters_num; i++) {
282 		cnt.ctr = fixed_events[i].unit_sel;
283 		measure_one(&cnt);
284 		report(verify_event(cnt.count, &fixed_events[i]), "fixed-%d", i);
285 	}
286 }
287 
288 static void check_counters_many(void)
289 {
290 	pmu_counter_t cnt[48];
291 	int i, n;
292 
293 	for (i = 0, n = 0; n < pmu.nr_gp_counters; i++) {
294 		if (!pmu_gp_counter_is_available(i))
295 			continue;
296 
297 		cnt[n].ctr = MSR_GP_COUNTERx(n);
298 		cnt[n].config = EVNTSEL_OS | EVNTSEL_USR |
299 			gp_events[i % gp_events_size].unit_sel;
300 		n++;
301 	}
302 	for (i = 0; i < fixed_counters_num; i++) {
303 		cnt[n].ctr = fixed_events[i].unit_sel;
304 		cnt[n].config = EVNTSEL_OS | EVNTSEL_USR;
305 		n++;
306 	}
307 
308 	assert(n <= ARRAY_SIZE(cnt));
309 	measure_many(cnt, n);
310 
311 	for (i = 0; i < n; i++)
312 		if (!verify_counter(&cnt[i]))
313 			break;
314 
315 	report(i == n, "all counters");
316 }
317 
318 static uint64_t measure_for_overflow(pmu_counter_t *cnt)
319 {
320 	__measure(cnt, 0);
321 	/*
322 	 * To generate overflow, i.e. roll over to '0', the initial count just
323 	 * needs to be preset to the negative expected count.  However, as per
324 	 * Intel's SDM, the preset count needs to be incremented by 1 to ensure
325 	 * the overflow interrupt is generated immediately instead of possibly
326 	 * waiting for the overflow to propagate through the counter.
327 	 */
328 	assert(cnt->count > 1);
329 	return 1 - cnt->count;
330 }
331 
332 static void check_counter_overflow(void)
333 {
334 	int i;
335 	uint64_t overflow_preset;
336 	int instruction_idx = pmu.is_intel ?
337 			      INTEL_INSTRUCTIONS_IDX :
338 			      AMD_INSTRUCTIONS_IDX;
339 
340 	pmu_counter_t cnt = {
341 		.ctr = MSR_GP_COUNTERx(0),
342 		.config = EVNTSEL_OS | EVNTSEL_USR |
343 			  gp_events[instruction_idx].unit_sel /* instructions */,
344 	};
345 	overflow_preset = measure_for_overflow(&cnt);
346 
347 	/* clear status before test */
348 	if (this_cpu_has_perf_global_status())
349 		pmu_clear_global_status();
350 
351 	report_prefix_push("overflow");
352 
353 	for (i = 0; i < pmu.nr_gp_counters + 1; i++) {
354 		uint64_t status;
355 		int idx;
356 
357 		cnt.count = overflow_preset;
358 		if (pmu_use_full_writes())
359 			cnt.count &= (1ull << pmu.gp_counter_width) - 1;
360 
361 		if (i == pmu.nr_gp_counters) {
362 			if (!pmu.is_intel)
363 				break;
364 
365 			cnt.ctr = fixed_events[0].unit_sel;
366 			cnt.count = measure_for_overflow(&cnt);
367 			cnt.count &= (1ull << pmu.gp_counter_width) - 1;
368 		} else {
369 			cnt.ctr = MSR_GP_COUNTERx(i);
370 		}
371 
372 		if (i % 2)
373 			cnt.config |= EVNTSEL_INT;
374 		else
375 			cnt.config &= ~EVNTSEL_INT;
376 		idx = event_to_global_idx(&cnt);
377 		__measure(&cnt, cnt.count);
378 		if (pmu.is_intel)
379 			report(cnt.count == 1, "cntr-%d", i);
380 		else
381 			report(cnt.count == 0xffffffffffff || cnt.count < 7, "cntr-%d", i);
382 
383 		if (!this_cpu_has_perf_global_status())
384 			continue;
385 
386 		status = rdmsr(pmu.msr_global_status);
387 		report(status & (1ull << idx), "status-%d", i);
388 		wrmsr(pmu.msr_global_status_clr, status);
389 		status = rdmsr(pmu.msr_global_status);
390 		report(!(status & (1ull << idx)), "status clear-%d", i);
391 		report(check_irq() == (i % 2), "irq-%d", i);
392 	}
393 
394 	report_prefix_pop();
395 }
396 
397 static void check_gp_counter_cmask(void)
398 {
399 	int instruction_idx = pmu.is_intel ?
400 			      INTEL_INSTRUCTIONS_IDX :
401 			      AMD_INSTRUCTIONS_IDX;
402 
403 	pmu_counter_t cnt = {
404 		.ctr = MSR_GP_COUNTERx(0),
405 		.config = EVNTSEL_OS | EVNTSEL_USR |
406 			  gp_events[instruction_idx].unit_sel /* instructions */,
407 	};
408 	cnt.config |= (0x2 << EVNTSEL_CMASK_SHIFT);
409 	measure_one(&cnt);
410 	report(cnt.count < gp_events[instruction_idx].min, "cmask");
411 }
412 
413 static void do_rdpmc_fast(void *ptr)
414 {
415 	pmu_counter_t *cnt = ptr;
416 	uint32_t idx = (uint32_t)cnt->idx | (1u << 31);
417 
418 	if (!is_gp(cnt))
419 		idx |= 1 << 30;
420 
421 	cnt->count = rdpmc(idx);
422 }
423 
424 
425 static void check_rdpmc(void)
426 {
427 	uint64_t val = 0xff0123456789ull;
428 	bool exc;
429 	int i;
430 
431 	report_prefix_push("rdpmc");
432 
433 	for (i = 0; i < pmu.nr_gp_counters; i++) {
434 		uint64_t x;
435 		pmu_counter_t cnt = {
436 			.ctr = MSR_GP_COUNTERx(i),
437 			.idx = i
438 		};
439 
440 	        /*
441 	         * Without full-width writes, only the low 32 bits are writable,
442 	         * and the value is sign-extended.
443 	         */
444 		if (pmu.msr_gp_counter_base == MSR_IA32_PERFCTR0)
445 			x = (uint64_t)(int64_t)(int32_t)val;
446 		else
447 			x = (uint64_t)(int64_t)val;
448 
449 		/* Mask according to the number of supported bits */
450 		x &= (1ull << pmu.gp_counter_width) - 1;
451 
452 		wrmsr(MSR_GP_COUNTERx(i), val);
453 		report(rdpmc(i) == x, "cntr-%d", i);
454 
455 		exc = test_for_exception(GP_VECTOR, do_rdpmc_fast, &cnt);
456 		if (exc)
457 			report_skip("fast-%d", i);
458 		else
459 			report(cnt.count == (u32)val, "fast-%d", i);
460 	}
461 	for (i = 0; i < fixed_counters_num; i++) {
462 		uint64_t x = val & ((1ull << pmu.fixed_counter_width) - 1);
463 		pmu_counter_t cnt = {
464 			.ctr = MSR_CORE_PERF_FIXED_CTR0 + i,
465 			.idx = i
466 		};
467 
468 		wrmsr(MSR_PERF_FIXED_CTRx(i), x);
469 		report(rdpmc(i | (1 << 30)) == x, "fixed cntr-%d", i);
470 
471 		exc = test_for_exception(GP_VECTOR, do_rdpmc_fast, &cnt);
472 		if (exc)
473 			report_skip("fixed fast-%d", i);
474 		else
475 			report(cnt.count == (u32)x, "fixed fast-%d", i);
476 	}
477 
478 	report_prefix_pop();
479 }
480 
481 static void check_running_counter_wrmsr(void)
482 {
483 	uint64_t status;
484 	uint64_t count;
485 	unsigned int instruction_idx = pmu.is_intel ?
486 				       INTEL_INSTRUCTIONS_IDX :
487 				       AMD_INSTRUCTIONS_IDX;
488 
489 	pmu_counter_t evt = {
490 		.ctr = MSR_GP_COUNTERx(0),
491 		.config = EVNTSEL_OS | EVNTSEL_USR |
492 			  gp_events[instruction_idx].unit_sel,
493 	};
494 
495 	report_prefix_push("running counter wrmsr");
496 
497 	start_event(&evt);
498 	loop();
499 	wrmsr(MSR_GP_COUNTERx(0), 0);
500 	stop_event(&evt);
501 	report(evt.count < gp_events[instruction_idx].min, "cntr");
502 
503 	/* clear status before overflow test */
504 	if (this_cpu_has_perf_global_status())
505 		pmu_clear_global_status();
506 
507 	start_event(&evt);
508 
509 	count = -1;
510 	if (pmu_use_full_writes())
511 		count &= (1ull << pmu.gp_counter_width) - 1;
512 
513 	wrmsr(MSR_GP_COUNTERx(0), count);
514 
515 	loop();
516 	stop_event(&evt);
517 
518 	if (this_cpu_has_perf_global_status()) {
519 		status = rdmsr(pmu.msr_global_status);
520 		report(status & 1, "status msr bit");
521 	}
522 
523 	report_prefix_pop();
524 }
525 
526 static void check_emulated_instr(void)
527 {
528 	uint64_t status, instr_start, brnch_start;
529 	uint64_t gp_counter_width = (1ull << pmu.gp_counter_width) - 1;
530 	unsigned int branch_idx = pmu.is_intel ?
531 				  INTEL_BRANCHES_IDX : AMD_BRANCHES_IDX;
532 	unsigned int instruction_idx = pmu.is_intel ?
533 				       INTEL_INSTRUCTIONS_IDX :
534 				       AMD_INSTRUCTIONS_IDX;
535 	pmu_counter_t brnch_cnt = {
536 		.ctr = MSR_GP_COUNTERx(0),
537 		/* branch instructions */
538 		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[branch_idx].unit_sel,
539 	};
540 	pmu_counter_t instr_cnt = {
541 		.ctr = MSR_GP_COUNTERx(1),
542 		/* instructions */
543 		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[instruction_idx].unit_sel,
544 	};
545 	report_prefix_push("emulated instruction");
546 
547 	if (this_cpu_has_perf_global_status())
548 		pmu_clear_global_status();
549 
550 	start_event(&brnch_cnt);
551 	start_event(&instr_cnt);
552 
553 	brnch_start = -EXPECTED_BRNCH;
554 	instr_start = -EXPECTED_INSTR;
555 	wrmsr(MSR_GP_COUNTERx(0), brnch_start & gp_counter_width);
556 	wrmsr(MSR_GP_COUNTERx(1), instr_start & gp_counter_width);
557 	// KVM_FEP is a magic prefix that forces emulation so
558 	// 'KVM_FEP "jne label\n"' just counts as a single instruction.
559 	asm volatile(
560 		"mov $0x0, %%eax\n"
561 		"cmp $0x0, %%eax\n"
562 		KVM_FEP "jne label\n"
563 		KVM_FEP "jne label\n"
564 		KVM_FEP "jne label\n"
565 		KVM_FEP "jne label\n"
566 		KVM_FEP "jne label\n"
567 		"mov $0xa, %%eax\n"
568 		"cpuid\n"
569 		"mov $0xa, %%eax\n"
570 		"cpuid\n"
571 		"mov $0xa, %%eax\n"
572 		"cpuid\n"
573 		"mov $0xa, %%eax\n"
574 		"cpuid\n"
575 		"mov $0xa, %%eax\n"
576 		"cpuid\n"
577 		"label:\n"
578 		:
579 		:
580 		: "eax", "ebx", "ecx", "edx");
581 
582 	if (this_cpu_has_perf_global_ctrl())
583 		wrmsr(pmu.msr_global_ctl, 0);
584 
585 	stop_event(&brnch_cnt);
586 	stop_event(&instr_cnt);
587 
588 	// Check that the end count - start count is at least the expected
589 	// number of instructions and branches.
590 	report(instr_cnt.count - instr_start >= EXPECTED_INSTR,
591 	       "instruction count");
592 	report(brnch_cnt.count - brnch_start >= EXPECTED_BRNCH,
593 	       "branch count");
594 	if (this_cpu_has_perf_global_status()) {
595 		// Additionally check that those counters overflowed properly.
596 		status = rdmsr(pmu.msr_global_status);
597 		report(status & 1, "branch counter overflow");
598 		report(status & 2, "instruction counter overflow");
599 	}
600 
601 	report_prefix_pop();
602 }
603 
604 #define XBEGIN_STARTED (~0u)
605 static void check_tsx_cycles(void)
606 {
607 	pmu_counter_t cnt;
608 	unsigned int i, ret = 0;
609 
610 	if (!this_cpu_has(X86_FEATURE_RTM))
611 		return;
612 
613 	report_prefix_push("TSX cycles");
614 
615 	for (i = 0; i < pmu.nr_gp_counters; i++) {
616 		cnt.ctr = MSR_GP_COUNTERx(i);
617 
618 		if (i == 2) {
619 			/* Transactional cycles committed only on gp counter 2 */
620 			cnt.config = EVNTSEL_OS | EVNTSEL_USR | 0x30000003c;
621 		} else {
622 			/* Transactional cycles */
623 			cnt.config = EVNTSEL_OS | EVNTSEL_USR | 0x10000003c;
624 		}
625 
626 		start_event(&cnt);
627 
628 		asm volatile("xbegin 1f\n\t"
629 				"1:\n\t"
630 				: "+a" (ret) :: "memory");
631 
632 		/* Generate a non-canonical #GP to trigger ABORT. */
633 		if (ret == XBEGIN_STARTED)
634 			*(int *)NONCANONICAL = 0;
635 
636 		stop_event(&cnt);
637 
638 		report(cnt.count > 0, "gp cntr-%d with a value of %" PRId64 "", i, cnt.count);
639 	}
640 
641 	report_prefix_pop();
642 }
643 
644 static void warm_up(void)
645 {
646 	int i;
647 
648 	/*
649 	 * Since cycles event is always run as the first event, there would be
650 	 * a warm-up state to warm up the cache, it leads to the measured cycles
651 	 * value may exceed the pre-defined cycles upper boundary and cause
652 	 * false positive. To avoid this, introduce an warm-up state before
653 	 * the real verification.
654 	 */
655 	for (i = 0; i < 10; i++)
656 		loop();
657 }
658 
659 static void check_counters(void)
660 {
661 	if (is_fep_available())
662 		check_emulated_instr();
663 
664 	warm_up();
665 	check_gp_counters();
666 	check_fixed_counters();
667 	check_rdpmc();
668 	check_counters_many();
669 	check_counter_overflow();
670 	check_gp_counter_cmask();
671 	check_running_counter_wrmsr();
672 	check_tsx_cycles();
673 }
674 
675 static void do_unsupported_width_counter_write(void *index)
676 {
677 	wrmsr(MSR_IA32_PMC0 + *((int *) index), 0xffffff0123456789ull);
678 }
679 
680 static void check_gp_counters_write_width(void)
681 {
682 	u64 val_64 = 0xffffff0123456789ull;
683 	u64 val_32 = val_64 & ((1ull << 32) - 1);
684 	u64 val_max_width = val_64 & ((1ull << pmu.gp_counter_width) - 1);
685 	int i;
686 
687 	/*
688 	 * MSR_IA32_PERFCTRn supports 64-bit writes,
689 	 * but only the lowest 32 bits are valid.
690 	 */
691 	for (i = 0; i < pmu.nr_gp_counters; i++) {
692 		wrmsr(MSR_IA32_PERFCTR0 + i, val_32);
693 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32);
694 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_32);
695 
696 		wrmsr(MSR_IA32_PERFCTR0 + i, val_max_width);
697 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32);
698 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_32);
699 
700 		wrmsr(MSR_IA32_PERFCTR0 + i, val_64);
701 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32);
702 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_32);
703 	}
704 
705 	/*
706 	 * MSR_IA32_PMCn supports writing values up to GP counter width,
707 	 * and only the lowest bits of GP counter width are valid.
708 	 */
709 	for (i = 0; i < pmu.nr_gp_counters; i++) {
710 		wrmsr(MSR_IA32_PMC0 + i, val_32);
711 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_32);
712 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32);
713 
714 		wrmsr(MSR_IA32_PMC0 + i, val_max_width);
715 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_max_width);
716 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_max_width);
717 
718 		report(test_for_exception(GP_VECTOR,
719 			do_unsupported_width_counter_write, &i),
720 		"writing unsupported width to MSR_IA32_PMC%d raises #GP", i);
721 	}
722 }
723 
724 /*
725  * Per the SDM, reference cycles are currently implemented using the
726  * core crystal clock, TSC, or bus clock. Calibrate to the TSC
727  * frequency to set reasonable expectations.
728  */
729 static void set_ref_cycle_expectations(void)
730 {
731 	pmu_counter_t cnt = {
732 		.ctr = MSR_IA32_PERFCTR0,
733 		.config = EVNTSEL_OS | EVNTSEL_USR |
734 			  intel_gp_events[INTEL_REF_CYCLES_IDX].unit_sel,
735 	};
736 	uint64_t tsc_delta;
737 	uint64_t t0, t1, t2, t3;
738 
739 	/* Bit 2 enumerates the availability of reference cycles events. */
740 	if (!pmu.nr_gp_counters || !pmu_gp_counter_is_available(2))
741 		return;
742 
743 	if (this_cpu_has_perf_global_ctrl())
744 		wrmsr(pmu.msr_global_ctl, 0);
745 
746 	t0 = fenced_rdtsc();
747 	start_event(&cnt);
748 	t1 = fenced_rdtsc();
749 
750 	/*
751 	 * This loop has to run long enough to dominate the VM-exit
752 	 * costs for playing with the PMU MSRs on start and stop.
753 	 *
754 	 * On a 2.6GHz Ice Lake, with the TSC frequency at 104 times
755 	 * the core crystal clock, this function calculated a guest
756 	 * TSC : ref cycles ratio of around 105 with ECX initialized
757 	 * to one billion.
758 	 */
759 	asm volatile("loop ." : "+c"((int){1000000000ull}));
760 
761 	t2 = fenced_rdtsc();
762 	stop_event(&cnt);
763 	t3 = fenced_rdtsc();
764 
765 	tsc_delta = ((t2 - t1) + (t3 - t0)) / 2;
766 
767 	if (!tsc_delta)
768 		return;
769 
770 	intel_gp_events[INTEL_REF_CYCLES_IDX].min =
771 		(intel_gp_events[INTEL_REF_CYCLES_IDX].min * cnt.count) / tsc_delta;
772 	intel_gp_events[INTEL_REF_CYCLES_IDX].max =
773 		(intel_gp_events[INTEL_REF_CYCLES_IDX].max * cnt.count) / tsc_delta;
774 }
775 
776 static void check_invalid_rdpmc_gp(void)
777 {
778 	uint64_t val;
779 
780 	report(rdpmc_safe(64, &val) == GP_VECTOR,
781 	       "Expected #GP on RDPMC(64)");
782 }
783 
784 int main(int ac, char **av)
785 {
786 	setup_vm();
787 	handle_irq(PMI_VECTOR, cnt_overflow);
788 	buf = malloc(N*64);
789 
790 	check_invalid_rdpmc_gp();
791 
792 	if (pmu.is_intel) {
793 		if (!pmu.version) {
794 			report_skip("No Intel Arch PMU is detected!");
795 			return report_summary();
796 		}
797 		gp_events = (struct pmu_event *)intel_gp_events;
798 		gp_events_size = sizeof(intel_gp_events)/sizeof(intel_gp_events[0]);
799 		report_prefix_push("Intel");
800 		set_ref_cycle_expectations();
801 	} else {
802 		gp_events_size = sizeof(amd_gp_events)/sizeof(amd_gp_events[0]);
803 		gp_events = (struct pmu_event *)amd_gp_events;
804 		report_prefix_push("AMD");
805 	}
806 
807 	printf("PMU version:         %d\n", pmu.version);
808 	printf("GP counters:         %d\n", pmu.nr_gp_counters);
809 	printf("GP counter width:    %d\n", pmu.gp_counter_width);
810 	printf("Mask length:         %d\n", pmu.gp_counter_mask_length);
811 	printf("Fixed counters:      %d\n", pmu.nr_fixed_counters);
812 	printf("Fixed counter width: %d\n", pmu.fixed_counter_width);
813 
814 	fixed_counters_num = MIN(pmu.nr_fixed_counters, ARRAY_SIZE(fixed_events));
815 	if (pmu.nr_fixed_counters > ARRAY_SIZE(fixed_events))
816 		report_info("Fixed counters number %d > defined fixed events %u.  "
817 			    "Please update test case.", pmu.nr_fixed_counters,
818 			    (uint32_t)ARRAY_SIZE(fixed_events));
819 
820 	apic_write(APIC_LVTPC, PMI_VECTOR);
821 
822 	check_counters();
823 
824 	if (pmu_has_full_writes()) {
825 		pmu.msr_gp_counter_base = MSR_IA32_PMC0;
826 
827 		report_prefix_push("full-width writes");
828 		check_counters();
829 		check_gp_counters_write_width();
830 		report_prefix_pop();
831 	}
832 
833 	if (!pmu.is_intel) {
834 		report_prefix_push("K7");
835 		pmu.nr_gp_counters = AMD64_NUM_COUNTERS;
836 		pmu.msr_gp_counter_base = MSR_K7_PERFCTR0;
837 		pmu.msr_gp_event_select_base = MSR_K7_EVNTSEL0;
838 		check_counters();
839 		report_prefix_pop();
840 	}
841 
842 	return report_summary();
843 }
844