xref: /kvm-unit-tests/x86/pmu.c (revision 50f8e27e95e5d8f66d564cd8db7e3aa4866e5d77)
1 
2 #include "x86/msr.h"
3 #include "x86/processor.h"
4 #include "x86/pmu.h"
5 #include "x86/apic-defs.h"
6 #include "x86/apic.h"
7 #include "x86/desc.h"
8 #include "x86/isr.h"
9 #include "vmalloc.h"
10 #include "alloc.h"
11 
12 #include "libcflat.h"
13 #include <stdint.h>
14 
15 #define N 1000000
16 
17 // These values match the number of instructions and branches in the
18 // assembly block in check_emulated_instr().
19 #define EXPECTED_INSTR 17
20 #define EXPECTED_BRNCH 5
21 
22 #define LOOP_ASM(_wrmsr)						\
23 	_wrmsr "\n\t"							\
24 	"mov %%ecx, %%edi; mov %%ebx, %%ecx;\n\t"			\
25 	"1: mov (%1), %2; add $64, %1;\n\t"				\
26 	"nop; nop; nop; nop; nop; nop; nop;\n\t"			\
27 	"loop 1b;\n\t"							\
28 	"mov %%edi, %%ecx; xor %%eax, %%eax; xor %%edx, %%edx;\n\t"	\
29 	_wrmsr "\n\t"
30 
31 typedef struct {
32 	uint32_t ctr;
33 	uint32_t idx;
34 	uint64_t config;
35 	uint64_t count;
36 } pmu_counter_t;
37 
38 struct pmu_event {
39 	const char *name;
40 	uint32_t unit_sel;
41 	int min;
42 	int max;
43 } intel_gp_events[] = {
44 	{"core cycles", 0x003c, 1*N, 50*N},
45 	{"instructions", 0x00c0, 10*N, 10.2*N},
46 	{"ref cycles", 0x013c, 1*N, 30*N},
47 	{"llc references", 0x4f2e, 1, 2*N},
48 	{"llc misses", 0x412e, 1, 1*N},
49 	{"branches", 0x00c4, 1*N, 1.1*N},
50 	{"branch misses", 0x00c5, 0, 0.1*N},
51 }, amd_gp_events[] = {
52 	{"core cycles", 0x0076, 1*N, 50*N},
53 	{"instructions", 0x00c0, 10*N, 10.2*N},
54 	{"branches", 0x00c2, 1*N, 1.1*N},
55 	{"branch misses", 0x00c3, 0, 0.1*N},
56 }, fixed_events[] = {
57 	{"fixed 0", MSR_CORE_PERF_FIXED_CTR0, 10*N, 10.2*N},
58 	{"fixed 1", MSR_CORE_PERF_FIXED_CTR0 + 1, 1*N, 30*N},
59 	{"fixed 2", MSR_CORE_PERF_FIXED_CTR0 + 2, 0.1*N, 30*N}
60 };
61 
62 /*
63  * Events index in intel_gp_events[], ensure consistent with
64  * intel_gp_events[].
65  */
66 enum {
67 	INTEL_INSTRUCTIONS_IDX  = 1,
68 	INTEL_REF_CYCLES_IDX	= 2,
69 	INTEL_BRANCHES_IDX	= 5,
70 };
71 
72 /*
73  * Events index in amd_gp_events[], ensure consistent with
74  * amd_gp_events[].
75  */
76 enum {
77 	AMD_INSTRUCTIONS_IDX    = 1,
78 	AMD_BRANCHES_IDX	= 2,
79 };
80 
81 char *buf;
82 
83 static struct pmu_event *gp_events;
84 static unsigned int gp_events_size;
85 static unsigned int fixed_counters_num;
86 
87 
88 static inline void __loop(void)
89 {
90 	unsigned long tmp, tmp2, tmp3;
91 
92 	asm volatile(LOOP_ASM("nop")
93 		     : "=c"(tmp), "=r"(tmp2), "=r"(tmp3)
94 		     : "0"(N), "1"(buf));
95 }
96 
97 /*
98  * Enable and disable counters in a whole asm blob to ensure
99  * no other instructions are counted in the window between
100  * counters enabling and really LOOP_ASM code executing.
101  * Thus counters can verify instructions and branches events
102  * against precise counts instead of a rough valid count range.
103  */
104 static inline void __precise_loop(u64 cntrs)
105 {
106 	unsigned long tmp, tmp2, tmp3;
107 	unsigned int global_ctl = pmu.msr_global_ctl;
108 	u32 eax = cntrs & (BIT_ULL(32) - 1);
109 	u32 edx = cntrs >> 32;
110 
111 	asm volatile(LOOP_ASM("wrmsr")
112 		     : "=b"(tmp), "=r"(tmp2), "=r"(tmp3)
113 		     : "a"(eax), "d"(edx), "c"(global_ctl),
114 		       "0"(N), "1"(buf)
115 		     : "edi");
116 }
117 
118 static inline void loop(u64 cntrs)
119 {
120 	if (!this_cpu_has_perf_global_ctrl())
121 		__loop();
122 	else
123 		__precise_loop(cntrs);
124 }
125 
126 volatile uint64_t irq_received;
127 
128 static void cnt_overflow(isr_regs_t *regs)
129 {
130 	irq_received++;
131 	apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
132 	apic_write(APIC_EOI, 0);
133 }
134 
135 static bool check_irq(void)
136 {
137 	int i;
138 	irq_received = 0;
139 	sti();
140 	for (i = 0; i < 100000 && !irq_received; i++)
141 		asm volatile("pause");
142 	cli();
143 	return irq_received;
144 }
145 
146 static bool is_gp(pmu_counter_t *evt)
147 {
148 	if (!pmu.is_intel)
149 		return true;
150 
151 	return evt->ctr < MSR_CORE_PERF_FIXED_CTR0 ||
152 		evt->ctr >= MSR_IA32_PMC0;
153 }
154 
155 static int event_to_global_idx(pmu_counter_t *cnt)
156 {
157 	if (pmu.is_intel)
158 		return cnt->ctr - (is_gp(cnt) ? pmu.msr_gp_counter_base :
159 			(MSR_CORE_PERF_FIXED_CTR0 - FIXED_CNT_INDEX));
160 
161 	if (pmu.msr_gp_counter_base == MSR_F15H_PERF_CTR0)
162 		return (cnt->ctr - pmu.msr_gp_counter_base) / 2;
163 	else
164 		return cnt->ctr - pmu.msr_gp_counter_base;
165 }
166 
167 static struct pmu_event* get_counter_event(pmu_counter_t *cnt)
168 {
169 	if (is_gp(cnt)) {
170 		int i;
171 
172 		for (i = 0; i < gp_events_size; i++)
173 			if (gp_events[i].unit_sel == (cnt->config & 0xffff))
174 				return &gp_events[i];
175 	} else {
176 		unsigned int idx = cnt->ctr - MSR_CORE_PERF_FIXED_CTR0;
177 
178 		if (idx < ARRAY_SIZE(fixed_events))
179 			return &fixed_events[idx];
180 	}
181 
182 	return (void*)0;
183 }
184 
185 static void global_enable(pmu_counter_t *cnt)
186 {
187 	if (!this_cpu_has_perf_global_ctrl())
188 		return;
189 
190 	cnt->idx = event_to_global_idx(cnt);
191 	wrmsr(pmu.msr_global_ctl, rdmsr(pmu.msr_global_ctl) | BIT_ULL(cnt->idx));
192 }
193 
194 static void global_disable(pmu_counter_t *cnt)
195 {
196 	if (!this_cpu_has_perf_global_ctrl())
197 		return;
198 
199 	wrmsr(pmu.msr_global_ctl, rdmsr(pmu.msr_global_ctl) & ~BIT_ULL(cnt->idx));
200 }
201 
202 static void __start_event(pmu_counter_t *evt, uint64_t count)
203 {
204     evt->count = count;
205     wrmsr(evt->ctr, evt->count);
206     if (is_gp(evt)) {
207 	    wrmsr(MSR_GP_EVENT_SELECTx(event_to_global_idx(evt)),
208 		  evt->config | EVNTSEL_EN);
209     } else {
210 	    uint32_t ctrl = rdmsr(MSR_CORE_PERF_FIXED_CTR_CTRL);
211 	    int shift = (evt->ctr - MSR_CORE_PERF_FIXED_CTR0) * 4;
212 	    uint32_t usrospmi = 0;
213 
214 	    if (evt->config & EVNTSEL_OS)
215 		    usrospmi |= (1 << 0);
216 	    if (evt->config & EVNTSEL_USR)
217 		    usrospmi |= (1 << 1);
218 	    if (evt->config & EVNTSEL_INT)
219 		    usrospmi |= (1 << 3); // PMI on overflow
220 	    ctrl = (ctrl & ~(0xf << shift)) | (usrospmi << shift);
221 	    wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, ctrl);
222     }
223     apic_write(APIC_LVTPC, PMI_VECTOR);
224 }
225 
226 static void start_event(pmu_counter_t *evt)
227 {
228 	__start_event(evt, 0);
229 	global_enable(evt);
230 }
231 
232 static void __stop_event(pmu_counter_t *evt)
233 {
234 	if (is_gp(evt)) {
235 		wrmsr(MSR_GP_EVENT_SELECTx(event_to_global_idx(evt)),
236 		      evt->config & ~EVNTSEL_EN);
237 	} else {
238 		uint32_t ctrl = rdmsr(MSR_CORE_PERF_FIXED_CTR_CTRL);
239 		int shift = (evt->ctr - MSR_CORE_PERF_FIXED_CTR0) * 4;
240 		wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, ctrl & ~(0xf << shift));
241 	}
242 	evt->count = rdmsr(evt->ctr);
243 }
244 
245 static void stop_event(pmu_counter_t *evt)
246 {
247 	global_disable(evt);
248 	__stop_event(evt);
249 }
250 
251 static noinline void measure_many(pmu_counter_t *evt, int count)
252 {
253 	int i;
254 	u64 cntrs = 0;
255 
256 	for (i = 0; i < count; i++) {
257 		__start_event(&evt[i], 0);
258 		cntrs |= BIT_ULL(event_to_global_idx(&evt[i]));
259 	}
260 	loop(cntrs);
261 	for (i = 0; i < count; i++)
262 		__stop_event(&evt[i]);
263 }
264 
265 static void measure_one(pmu_counter_t *evt)
266 {
267 	measure_many(evt, 1);
268 }
269 
270 static noinline void __measure(pmu_counter_t *evt, uint64_t count)
271 {
272 	u64 cntrs = BIT_ULL(event_to_global_idx(evt));
273 
274 	__start_event(evt, count);
275 	loop(cntrs);
276 	__stop_event(evt);
277 }
278 
279 static bool verify_event(uint64_t count, struct pmu_event *e)
280 {
281 	bool pass;
282 
283 	if (!e)
284 		return false;
285 
286 	pass = count >= e->min && count <= e->max;
287 	if (!pass)
288 		printf("FAIL: %d <= %"PRId64" <= %d\n", e->min, count, e->max);
289 
290 	return pass;
291 }
292 
293 static bool verify_counter(pmu_counter_t *cnt)
294 {
295 	return verify_event(cnt->count, get_counter_event(cnt));
296 }
297 
298 static void check_gp_counter(struct pmu_event *evt)
299 {
300 	pmu_counter_t cnt = {
301 		.config = EVNTSEL_OS | EVNTSEL_USR | evt->unit_sel,
302 	};
303 	int i;
304 
305 	for (i = 0; i < pmu.nr_gp_counters; i++) {
306 		cnt.ctr = MSR_GP_COUNTERx(i);
307 		measure_one(&cnt);
308 		report(verify_event(cnt.count, evt), "%s-%d", evt->name, i);
309 	}
310 }
311 
312 static void check_gp_counters(void)
313 {
314 	int i;
315 
316 	for (i = 0; i < gp_events_size; i++)
317 		if (pmu_gp_counter_is_available(i))
318 			check_gp_counter(&gp_events[i]);
319 		else
320 			printf("GP event '%s' is disabled\n",
321 					gp_events[i].name);
322 }
323 
324 static void check_fixed_counters(void)
325 {
326 	pmu_counter_t cnt = {
327 		.config = EVNTSEL_OS | EVNTSEL_USR,
328 	};
329 	int i;
330 
331 	for (i = 0; i < fixed_counters_num; i++) {
332 		cnt.ctr = fixed_events[i].unit_sel;
333 		measure_one(&cnt);
334 		report(verify_event(cnt.count, &fixed_events[i]), "fixed-%d", i);
335 	}
336 }
337 
338 static void check_counters_many(void)
339 {
340 	pmu_counter_t cnt[48];
341 	int i, n;
342 
343 	for (i = 0, n = 0; n < pmu.nr_gp_counters; i++) {
344 		if (!pmu_gp_counter_is_available(i))
345 			continue;
346 
347 		cnt[n].ctr = MSR_GP_COUNTERx(n);
348 		cnt[n].config = EVNTSEL_OS | EVNTSEL_USR |
349 			gp_events[i % gp_events_size].unit_sel;
350 		n++;
351 	}
352 	for (i = 0; i < fixed_counters_num; i++) {
353 		cnt[n].ctr = fixed_events[i].unit_sel;
354 		cnt[n].config = EVNTSEL_OS | EVNTSEL_USR;
355 		n++;
356 	}
357 
358 	assert(n <= ARRAY_SIZE(cnt));
359 	measure_many(cnt, n);
360 
361 	for (i = 0; i < n; i++)
362 		if (!verify_counter(&cnt[i]))
363 			break;
364 
365 	report(i == n, "all counters");
366 }
367 
368 static uint64_t measure_for_overflow(pmu_counter_t *cnt)
369 {
370 	__measure(cnt, 0);
371 	/*
372 	 * To generate overflow, i.e. roll over to '0', the initial count just
373 	 * needs to be preset to the negative expected count.  However, as per
374 	 * Intel's SDM, the preset count needs to be incremented by 1 to ensure
375 	 * the overflow interrupt is generated immediately instead of possibly
376 	 * waiting for the overflow to propagate through the counter.
377 	 */
378 	assert(cnt->count > 1);
379 	return 1 - cnt->count;
380 }
381 
382 static void check_counter_overflow(void)
383 {
384 	int i;
385 	uint64_t overflow_preset;
386 	int instruction_idx = pmu.is_intel ?
387 			      INTEL_INSTRUCTIONS_IDX :
388 			      AMD_INSTRUCTIONS_IDX;
389 
390 	pmu_counter_t cnt = {
391 		.ctr = MSR_GP_COUNTERx(0),
392 		.config = EVNTSEL_OS | EVNTSEL_USR |
393 			  gp_events[instruction_idx].unit_sel /* instructions */,
394 	};
395 	overflow_preset = measure_for_overflow(&cnt);
396 
397 	/* clear status before test */
398 	if (this_cpu_has_perf_global_status())
399 		pmu_clear_global_status();
400 
401 	report_prefix_push("overflow");
402 
403 	for (i = 0; i < pmu.nr_gp_counters + 1; i++) {
404 		uint64_t status;
405 		int idx;
406 
407 		cnt.count = overflow_preset;
408 		if (pmu_use_full_writes())
409 			cnt.count &= (1ull << pmu.gp_counter_width) - 1;
410 
411 		if (i == pmu.nr_gp_counters) {
412 			if (!pmu.is_intel)
413 				break;
414 
415 			cnt.ctr = fixed_events[0].unit_sel;
416 			cnt.count = measure_for_overflow(&cnt);
417 			cnt.count &= (1ull << pmu.gp_counter_width) - 1;
418 		} else {
419 			cnt.ctr = MSR_GP_COUNTERx(i);
420 		}
421 
422 		if (i % 2)
423 			cnt.config |= EVNTSEL_INT;
424 		else
425 			cnt.config &= ~EVNTSEL_INT;
426 		idx = event_to_global_idx(&cnt);
427 		__measure(&cnt, cnt.count);
428 		if (pmu.is_intel)
429 			report(cnt.count == 1, "cntr-%d", i);
430 		else
431 			report(cnt.count == 0xffffffffffff || cnt.count < 7, "cntr-%d", i);
432 
433 		if (!this_cpu_has_perf_global_status())
434 			continue;
435 
436 		status = rdmsr(pmu.msr_global_status);
437 		report(status & (1ull << idx), "status-%d", i);
438 		wrmsr(pmu.msr_global_status_clr, status);
439 		status = rdmsr(pmu.msr_global_status);
440 		report(!(status & (1ull << idx)), "status clear-%d", i);
441 		report(check_irq() == (i % 2), "irq-%d", i);
442 	}
443 
444 	report_prefix_pop();
445 }
446 
447 static void check_gp_counter_cmask(void)
448 {
449 	int instruction_idx = pmu.is_intel ?
450 			      INTEL_INSTRUCTIONS_IDX :
451 			      AMD_INSTRUCTIONS_IDX;
452 
453 	pmu_counter_t cnt = {
454 		.ctr = MSR_GP_COUNTERx(0),
455 		.config = EVNTSEL_OS | EVNTSEL_USR |
456 			  gp_events[instruction_idx].unit_sel /* instructions */,
457 	};
458 	cnt.config |= (0x2 << EVNTSEL_CMASK_SHIFT);
459 	measure_one(&cnt);
460 	report(cnt.count < gp_events[instruction_idx].min, "cmask");
461 }
462 
463 static void do_rdpmc_fast(void *ptr)
464 {
465 	pmu_counter_t *cnt = ptr;
466 	uint32_t idx = (uint32_t)cnt->idx | (1u << 31);
467 
468 	if (!is_gp(cnt))
469 		idx |= 1 << 30;
470 
471 	cnt->count = rdpmc(idx);
472 }
473 
474 
475 static void check_rdpmc(void)
476 {
477 	uint64_t val = 0xff0123456789ull;
478 	bool exc;
479 	int i;
480 
481 	report_prefix_push("rdpmc");
482 
483 	for (i = 0; i < pmu.nr_gp_counters; i++) {
484 		uint64_t x;
485 		pmu_counter_t cnt = {
486 			.ctr = MSR_GP_COUNTERx(i),
487 			.idx = i
488 		};
489 
490 	        /*
491 	         * Without full-width writes, only the low 32 bits are writable,
492 	         * and the value is sign-extended.
493 	         */
494 		if (pmu.msr_gp_counter_base == MSR_IA32_PERFCTR0)
495 			x = (uint64_t)(int64_t)(int32_t)val;
496 		else
497 			x = (uint64_t)(int64_t)val;
498 
499 		/* Mask according to the number of supported bits */
500 		x &= (1ull << pmu.gp_counter_width) - 1;
501 
502 		wrmsr(MSR_GP_COUNTERx(i), val);
503 		report(rdpmc(i) == x, "cntr-%d", i);
504 
505 		exc = test_for_exception(GP_VECTOR, do_rdpmc_fast, &cnt);
506 		if (exc)
507 			report_skip("fast-%d", i);
508 		else
509 			report(cnt.count == (u32)val, "fast-%d", i);
510 	}
511 	for (i = 0; i < fixed_counters_num; i++) {
512 		uint64_t x = val & ((1ull << pmu.fixed_counter_width) - 1);
513 		pmu_counter_t cnt = {
514 			.ctr = MSR_CORE_PERF_FIXED_CTR0 + i,
515 			.idx = i
516 		};
517 
518 		wrmsr(MSR_PERF_FIXED_CTRx(i), x);
519 		report(rdpmc(i | (1 << 30)) == x, "fixed cntr-%d", i);
520 
521 		exc = test_for_exception(GP_VECTOR, do_rdpmc_fast, &cnt);
522 		if (exc)
523 			report_skip("fixed fast-%d", i);
524 		else
525 			report(cnt.count == (u32)x, "fixed fast-%d", i);
526 	}
527 
528 	report_prefix_pop();
529 }
530 
531 static void check_running_counter_wrmsr(void)
532 {
533 	uint64_t status;
534 	uint64_t count;
535 	unsigned int instruction_idx = pmu.is_intel ?
536 				       INTEL_INSTRUCTIONS_IDX :
537 				       AMD_INSTRUCTIONS_IDX;
538 
539 	pmu_counter_t evt = {
540 		.ctr = MSR_GP_COUNTERx(0),
541 		.config = EVNTSEL_OS | EVNTSEL_USR |
542 			  gp_events[instruction_idx].unit_sel,
543 	};
544 
545 	report_prefix_push("running counter wrmsr");
546 
547 	start_event(&evt);
548 	__loop();
549 	wrmsr(MSR_GP_COUNTERx(0), 0);
550 	stop_event(&evt);
551 	report(evt.count < gp_events[instruction_idx].min, "cntr");
552 
553 	/* clear status before overflow test */
554 	if (this_cpu_has_perf_global_status())
555 		pmu_clear_global_status();
556 
557 	start_event(&evt);
558 
559 	count = -1;
560 	if (pmu_use_full_writes())
561 		count &= (1ull << pmu.gp_counter_width) - 1;
562 
563 	wrmsr(MSR_GP_COUNTERx(0), count);
564 
565 	__loop();
566 	stop_event(&evt);
567 
568 	if (this_cpu_has_perf_global_status()) {
569 		status = rdmsr(pmu.msr_global_status);
570 		report(status & 1, "status msr bit");
571 	}
572 
573 	report_prefix_pop();
574 }
575 
576 static void check_emulated_instr(void)
577 {
578 	uint64_t status, instr_start, brnch_start;
579 	uint64_t gp_counter_width = (1ull << pmu.gp_counter_width) - 1;
580 	unsigned int branch_idx = pmu.is_intel ?
581 				  INTEL_BRANCHES_IDX : AMD_BRANCHES_IDX;
582 	unsigned int instruction_idx = pmu.is_intel ?
583 				       INTEL_INSTRUCTIONS_IDX :
584 				       AMD_INSTRUCTIONS_IDX;
585 	pmu_counter_t brnch_cnt = {
586 		.ctr = MSR_GP_COUNTERx(0),
587 		/* branch instructions */
588 		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[branch_idx].unit_sel,
589 	};
590 	pmu_counter_t instr_cnt = {
591 		.ctr = MSR_GP_COUNTERx(1),
592 		/* instructions */
593 		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[instruction_idx].unit_sel,
594 	};
595 	report_prefix_push("emulated instruction");
596 
597 	if (this_cpu_has_perf_global_status())
598 		pmu_clear_global_status();
599 
600 	start_event(&brnch_cnt);
601 	start_event(&instr_cnt);
602 
603 	brnch_start = -EXPECTED_BRNCH;
604 	instr_start = -EXPECTED_INSTR;
605 	wrmsr(MSR_GP_COUNTERx(0), brnch_start & gp_counter_width);
606 	wrmsr(MSR_GP_COUNTERx(1), instr_start & gp_counter_width);
607 	// KVM_FEP is a magic prefix that forces emulation so
608 	// 'KVM_FEP "jne label\n"' just counts as a single instruction.
609 	asm volatile(
610 		"mov $0x0, %%eax\n"
611 		"cmp $0x0, %%eax\n"
612 		KVM_FEP "jne label\n"
613 		KVM_FEP "jne label\n"
614 		KVM_FEP "jne label\n"
615 		KVM_FEP "jne label\n"
616 		KVM_FEP "jne label\n"
617 		"mov $0xa, %%eax\n"
618 		"cpuid\n"
619 		"mov $0xa, %%eax\n"
620 		"cpuid\n"
621 		"mov $0xa, %%eax\n"
622 		"cpuid\n"
623 		"mov $0xa, %%eax\n"
624 		"cpuid\n"
625 		"mov $0xa, %%eax\n"
626 		"cpuid\n"
627 		"label:\n"
628 		:
629 		:
630 		: "eax", "ebx", "ecx", "edx");
631 
632 	if (this_cpu_has_perf_global_ctrl())
633 		wrmsr(pmu.msr_global_ctl, 0);
634 
635 	stop_event(&brnch_cnt);
636 	stop_event(&instr_cnt);
637 
638 	// Check that the end count - start count is at least the expected
639 	// number of instructions and branches.
640 	report(instr_cnt.count - instr_start >= EXPECTED_INSTR,
641 	       "instruction count");
642 	report(brnch_cnt.count - brnch_start >= EXPECTED_BRNCH,
643 	       "branch count");
644 	if (this_cpu_has_perf_global_status()) {
645 		// Additionally check that those counters overflowed properly.
646 		status = rdmsr(pmu.msr_global_status);
647 		report(status & 1, "branch counter overflow");
648 		report(status & 2, "instruction counter overflow");
649 	}
650 
651 	report_prefix_pop();
652 }
653 
654 #define XBEGIN_STARTED (~0u)
655 static void check_tsx_cycles(void)
656 {
657 	pmu_counter_t cnt;
658 	unsigned int i, ret = 0;
659 
660 	if (!this_cpu_has(X86_FEATURE_RTM))
661 		return;
662 
663 	report_prefix_push("TSX cycles");
664 
665 	for (i = 0; i < pmu.nr_gp_counters; i++) {
666 		cnt.ctr = MSR_GP_COUNTERx(i);
667 
668 		if (i == 2) {
669 			/* Transactional cycles committed only on gp counter 2 */
670 			cnt.config = EVNTSEL_OS | EVNTSEL_USR | 0x30000003c;
671 		} else {
672 			/* Transactional cycles */
673 			cnt.config = EVNTSEL_OS | EVNTSEL_USR | 0x10000003c;
674 		}
675 
676 		start_event(&cnt);
677 
678 		asm volatile("xbegin 1f\n\t"
679 				"1:\n\t"
680 				: "+a" (ret) :: "memory");
681 
682 		/* Generate a non-canonical #GP to trigger ABORT. */
683 		if (ret == XBEGIN_STARTED)
684 			*(int *)NONCANONICAL = 0;
685 
686 		stop_event(&cnt);
687 
688 		report(cnt.count > 0, "gp cntr-%d with a value of %" PRId64 "", i, cnt.count);
689 	}
690 
691 	report_prefix_pop();
692 }
693 
694 static void warm_up(void)
695 {
696 	int i;
697 
698 	/*
699 	 * Since cycles event is always run as the first event, there would be
700 	 * a warm-up state to warm up the cache, it leads to the measured cycles
701 	 * value may exceed the pre-defined cycles upper boundary and cause
702 	 * false positive. To avoid this, introduce an warm-up state before
703 	 * the real verification.
704 	 */
705 	for (i = 0; i < 10; i++)
706 		loop(0);
707 }
708 
709 static void check_counters(void)
710 {
711 	if (is_fep_available())
712 		check_emulated_instr();
713 
714 	warm_up();
715 	check_gp_counters();
716 	check_fixed_counters();
717 	check_rdpmc();
718 	check_counters_many();
719 	check_counter_overflow();
720 	check_gp_counter_cmask();
721 	check_running_counter_wrmsr();
722 	check_tsx_cycles();
723 }
724 
725 static void do_unsupported_width_counter_write(void *index)
726 {
727 	wrmsr(MSR_IA32_PMC0 + *((int *) index), 0xffffff0123456789ull);
728 }
729 
730 static void check_gp_counters_write_width(void)
731 {
732 	u64 val_64 = 0xffffff0123456789ull;
733 	u64 val_32 = val_64 & ((1ull << 32) - 1);
734 	u64 val_max_width = val_64 & ((1ull << pmu.gp_counter_width) - 1);
735 	int i;
736 
737 	/*
738 	 * MSR_IA32_PERFCTRn supports 64-bit writes,
739 	 * but only the lowest 32 bits are valid.
740 	 */
741 	for (i = 0; i < pmu.nr_gp_counters; i++) {
742 		wrmsr(MSR_IA32_PERFCTR0 + i, val_32);
743 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32);
744 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_32);
745 
746 		wrmsr(MSR_IA32_PERFCTR0 + i, val_max_width);
747 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32);
748 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_32);
749 
750 		wrmsr(MSR_IA32_PERFCTR0 + i, val_64);
751 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32);
752 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_32);
753 	}
754 
755 	/*
756 	 * MSR_IA32_PMCn supports writing values up to GP counter width,
757 	 * and only the lowest bits of GP counter width are valid.
758 	 */
759 	for (i = 0; i < pmu.nr_gp_counters; i++) {
760 		wrmsr(MSR_IA32_PMC0 + i, val_32);
761 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_32);
762 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32);
763 
764 		wrmsr(MSR_IA32_PMC0 + i, val_max_width);
765 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_max_width);
766 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_max_width);
767 
768 		report(test_for_exception(GP_VECTOR,
769 			do_unsupported_width_counter_write, &i),
770 		"writing unsupported width to MSR_IA32_PMC%d raises #GP", i);
771 	}
772 }
773 
774 /*
775  * Per the SDM, reference cycles are currently implemented using the
776  * core crystal clock, TSC, or bus clock. Calibrate to the TSC
777  * frequency to set reasonable expectations.
778  */
779 static void set_ref_cycle_expectations(void)
780 {
781 	pmu_counter_t cnt = {
782 		.ctr = MSR_IA32_PERFCTR0,
783 		.config = EVNTSEL_OS | EVNTSEL_USR |
784 			  intel_gp_events[INTEL_REF_CYCLES_IDX].unit_sel,
785 	};
786 	uint64_t tsc_delta;
787 	uint64_t t0, t1, t2, t3;
788 
789 	/* Bit 2 enumerates the availability of reference cycles events. */
790 	if (!pmu.nr_gp_counters || !pmu_gp_counter_is_available(2))
791 		return;
792 
793 	if (this_cpu_has_perf_global_ctrl())
794 		wrmsr(pmu.msr_global_ctl, 0);
795 
796 	t0 = fenced_rdtsc();
797 	start_event(&cnt);
798 	t1 = fenced_rdtsc();
799 
800 	/*
801 	 * This loop has to run long enough to dominate the VM-exit
802 	 * costs for playing with the PMU MSRs on start and stop.
803 	 *
804 	 * On a 2.6GHz Ice Lake, with the TSC frequency at 104 times
805 	 * the core crystal clock, this function calculated a guest
806 	 * TSC : ref cycles ratio of around 105 with ECX initialized
807 	 * to one billion.
808 	 */
809 	asm volatile("loop ." : "+c"((int){1000000000ull}));
810 
811 	t2 = fenced_rdtsc();
812 	stop_event(&cnt);
813 	t3 = fenced_rdtsc();
814 
815 	tsc_delta = ((t2 - t1) + (t3 - t0)) / 2;
816 
817 	if (!tsc_delta)
818 		return;
819 
820 	intel_gp_events[INTEL_REF_CYCLES_IDX].min =
821 		(intel_gp_events[INTEL_REF_CYCLES_IDX].min * cnt.count) / tsc_delta;
822 	intel_gp_events[INTEL_REF_CYCLES_IDX].max =
823 		(intel_gp_events[INTEL_REF_CYCLES_IDX].max * cnt.count) / tsc_delta;
824 }
825 
826 static void check_invalid_rdpmc_gp(void)
827 {
828 	uint64_t val;
829 
830 	report(rdpmc_safe(64, &val) == GP_VECTOR,
831 	       "Expected #GP on RDPMC(64)");
832 }
833 
834 int main(int ac, char **av)
835 {
836 	setup_vm();
837 	handle_irq(PMI_VECTOR, cnt_overflow);
838 	buf = malloc(N*64);
839 
840 	check_invalid_rdpmc_gp();
841 
842 	if (pmu.is_intel) {
843 		if (!pmu.version) {
844 			report_skip("No Intel Arch PMU is detected!");
845 			return report_summary();
846 		}
847 		gp_events = (struct pmu_event *)intel_gp_events;
848 		gp_events_size = sizeof(intel_gp_events)/sizeof(intel_gp_events[0]);
849 		report_prefix_push("Intel");
850 		set_ref_cycle_expectations();
851 	} else {
852 		gp_events_size = sizeof(amd_gp_events)/sizeof(amd_gp_events[0]);
853 		gp_events = (struct pmu_event *)amd_gp_events;
854 		report_prefix_push("AMD");
855 	}
856 
857 	printf("PMU version:         %d\n", pmu.version);
858 	printf("GP counters:         %d\n", pmu.nr_gp_counters);
859 	printf("GP counter width:    %d\n", pmu.gp_counter_width);
860 	printf("Mask length:         %d\n", pmu.gp_counter_mask_length);
861 	printf("Fixed counters:      %d\n", pmu.nr_fixed_counters);
862 	printf("Fixed counter width: %d\n", pmu.fixed_counter_width);
863 
864 	fixed_counters_num = MIN(pmu.nr_fixed_counters, ARRAY_SIZE(fixed_events));
865 	if (pmu.nr_fixed_counters > ARRAY_SIZE(fixed_events))
866 		report_info("Fixed counters number %d > defined fixed events %u.  "
867 			    "Please update test case.", pmu.nr_fixed_counters,
868 			    (uint32_t)ARRAY_SIZE(fixed_events));
869 
870 	apic_write(APIC_LVTPC, PMI_VECTOR);
871 
872 	check_counters();
873 
874 	if (pmu_has_full_writes()) {
875 		pmu.msr_gp_counter_base = MSR_IA32_PMC0;
876 
877 		report_prefix_push("full-width writes");
878 		check_counters();
879 		check_gp_counters_write_width();
880 		report_prefix_pop();
881 	}
882 
883 	if (!pmu.is_intel) {
884 		report_prefix_push("K7");
885 		pmu.nr_gp_counters = AMD64_NUM_COUNTERS;
886 		pmu.msr_gp_counter_base = MSR_K7_PERFCTR0;
887 		pmu.msr_gp_event_select_base = MSR_K7_EVNTSEL0;
888 		check_counters();
889 		report_prefix_pop();
890 	}
891 
892 	return report_summary();
893 }
894