xref: /kvm-unit-tests/x86/pmu.c (revision 89126fa47d19d98bb486448ae37413a895aef8e8)
1 
2 #include "x86/msr.h"
3 #include "x86/processor.h"
4 #include "x86/pmu.h"
5 #include "x86/apic-defs.h"
6 #include "x86/apic.h"
7 #include "x86/desc.h"
8 #include "x86/isr.h"
9 #include "vmalloc.h"
10 #include "alloc.h"
11 
12 #include "libcflat.h"
13 #include <stdint.h>
14 
15 #define N 1000000
16 
17 // These values match the number of instructions and branches in the
18 // assembly block in check_emulated_instr().
19 #define EXPECTED_INSTR 17
20 #define EXPECTED_BRNCH 5
21 
22 /* Enable GLOBAL_CTRL + disable GLOBAL_CTRL instructions */
23 #define EXTRA_INSNS  (3 + 3)
24 #define LOOP_INSNS   (N * 10 + EXTRA_INSNS)
25 #define LOOP_BRANCHES  (N)
26 #define LOOP_ASM(_wrmsr)						\
27 	_wrmsr "\n\t"							\
28 	"mov %%ecx, %%edi; mov %%ebx, %%ecx;\n\t"			\
29 	"1: mov (%1), %2; add $64, %1;\n\t"				\
30 	"nop; nop; nop; nop; nop; nop; nop;\n\t"			\
31 	"loop 1b;\n\t"							\
32 	"mov %%edi, %%ecx; xor %%eax, %%eax; xor %%edx, %%edx;\n\t"	\
33 	_wrmsr "\n\t"
34 
35 typedef struct {
36 	uint32_t ctr;
37 	uint32_t idx;
38 	uint64_t config;
39 	uint64_t count;
40 } pmu_counter_t;
41 
42 struct pmu_event {
43 	const char *name;
44 	uint32_t unit_sel;
45 	int min;
46 	int max;
47 } intel_gp_events[] = {
48 	{"core cycles", 0x003c, 1*N, 50*N},
49 	{"instructions", 0x00c0, 10*N, 10.2*N},
50 	{"ref cycles", 0x013c, 1*N, 30*N},
51 	{"llc references", 0x4f2e, 1, 2*N},
52 	{"llc misses", 0x412e, 1, 1*N},
53 	{"branches", 0x00c4, 1*N, 1.1*N},
54 	{"branch misses", 0x00c5, 0, 0.1*N},
55 }, amd_gp_events[] = {
56 	{"core cycles", 0x0076, 1*N, 50*N},
57 	{"instructions", 0x00c0, 10*N, 10.2*N},
58 	{"branches", 0x00c2, 1*N, 1.1*N},
59 	{"branch misses", 0x00c3, 0, 0.1*N},
60 }, fixed_events[] = {
61 	{"fixed 0", MSR_CORE_PERF_FIXED_CTR0, 10*N, 10.2*N},
62 	{"fixed 1", MSR_CORE_PERF_FIXED_CTR0 + 1, 1*N, 30*N},
63 	{"fixed 2", MSR_CORE_PERF_FIXED_CTR0 + 2, 0.1*N, 30*N}
64 };
65 
66 /*
67  * Events index in intel_gp_events[], ensure consistent with
68  * intel_gp_events[].
69  */
70 enum {
71 	INTEL_INSTRUCTIONS_IDX  = 1,
72 	INTEL_REF_CYCLES_IDX	= 2,
73 	INTEL_BRANCHES_IDX	= 5,
74 };
75 
76 /*
77  * Events index in amd_gp_events[], ensure consistent with
78  * amd_gp_events[].
79  */
80 enum {
81 	AMD_INSTRUCTIONS_IDX    = 1,
82 	AMD_BRANCHES_IDX	= 2,
83 };
84 
85 char *buf;
86 
87 static struct pmu_event *gp_events;
88 static unsigned int gp_events_size;
89 static unsigned int fixed_counters_num;
90 
91 
92 static inline void __loop(void)
93 {
94 	unsigned long tmp, tmp2, tmp3;
95 
96 	asm volatile(LOOP_ASM("nop")
97 		     : "=c"(tmp), "=r"(tmp2), "=r"(tmp3)
98 		     : "0"(N), "1"(buf));
99 }
100 
101 /*
102  * Enable and disable counters in a whole asm blob to ensure
103  * no other instructions are counted in the window between
104  * counters enabling and really LOOP_ASM code executing.
105  * Thus counters can verify instructions and branches events
106  * against precise counts instead of a rough valid count range.
107  */
108 static inline void __precise_loop(u64 cntrs)
109 {
110 	unsigned long tmp, tmp2, tmp3;
111 	unsigned int global_ctl = pmu.msr_global_ctl;
112 	u32 eax = cntrs & (BIT_ULL(32) - 1);
113 	u32 edx = cntrs >> 32;
114 
115 	asm volatile(LOOP_ASM("wrmsr")
116 		     : "=b"(tmp), "=r"(tmp2), "=r"(tmp3)
117 		     : "a"(eax), "d"(edx), "c"(global_ctl),
118 		       "0"(N), "1"(buf)
119 		     : "edi");
120 }
121 
122 static inline void loop(u64 cntrs)
123 {
124 	if (!this_cpu_has_perf_global_ctrl())
125 		__loop();
126 	else
127 		__precise_loop(cntrs);
128 }
129 
130 static void adjust_events_range(struct pmu_event *gp_events,
131 				int instruction_idx, int branch_idx)
132 {
133 	/*
134 	 * If HW supports GLOBAL_CTRL MSR, enabling and disabling PMCs are
135 	 * moved in __precise_loop(). Thus, instructions and branches events
136 	 * can be verified against a precise count instead of a rough range.
137 	 *
138 	 * Skip the precise checks on AMD, as AMD CPUs count VMRUN as a branch
139 	 * instruction in guest context, which* leads to intermittent failures
140 	 * as the counts will vary depending on how many asynchronous VM-Exits
141 	 * occur while running the measured code, e.g. if the host takes IRQs.
142 	 */
143 	if (pmu.is_intel && this_cpu_has_perf_global_ctrl()) {
144 		gp_events[instruction_idx].min = LOOP_INSNS;
145 		gp_events[instruction_idx].max = LOOP_INSNS;
146 		gp_events[branch_idx].min = LOOP_BRANCHES;
147 		gp_events[branch_idx].max = LOOP_BRANCHES;
148 	}
149 }
150 
151 volatile uint64_t irq_received;
152 
153 static void cnt_overflow(isr_regs_t *regs)
154 {
155 	irq_received++;
156 	apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
157 	apic_write(APIC_EOI, 0);
158 }
159 
160 static bool check_irq(void)
161 {
162 	int i;
163 	irq_received = 0;
164 	sti();
165 	for (i = 0; i < 100000 && !irq_received; i++)
166 		asm volatile("pause");
167 	cli();
168 	return irq_received;
169 }
170 
171 static bool is_gp(pmu_counter_t *evt)
172 {
173 	if (!pmu.is_intel)
174 		return true;
175 
176 	return evt->ctr < MSR_CORE_PERF_FIXED_CTR0 ||
177 		evt->ctr >= MSR_IA32_PMC0;
178 }
179 
180 static int event_to_global_idx(pmu_counter_t *cnt)
181 {
182 	if (pmu.is_intel)
183 		return cnt->ctr - (is_gp(cnt) ? pmu.msr_gp_counter_base :
184 			(MSR_CORE_PERF_FIXED_CTR0 - FIXED_CNT_INDEX));
185 
186 	if (pmu.msr_gp_counter_base == MSR_F15H_PERF_CTR0)
187 		return (cnt->ctr - pmu.msr_gp_counter_base) / 2;
188 	else
189 		return cnt->ctr - pmu.msr_gp_counter_base;
190 }
191 
192 static struct pmu_event* get_counter_event(pmu_counter_t *cnt)
193 {
194 	if (is_gp(cnt)) {
195 		int i;
196 
197 		for (i = 0; i < gp_events_size; i++)
198 			if (gp_events[i].unit_sel == (cnt->config & 0xffff))
199 				return &gp_events[i];
200 	} else {
201 		unsigned int idx = cnt->ctr - MSR_CORE_PERF_FIXED_CTR0;
202 
203 		if (idx < ARRAY_SIZE(fixed_events))
204 			return &fixed_events[idx];
205 	}
206 
207 	return (void*)0;
208 }
209 
210 static void global_enable(pmu_counter_t *cnt)
211 {
212 	if (!this_cpu_has_perf_global_ctrl())
213 		return;
214 
215 	cnt->idx = event_to_global_idx(cnt);
216 	wrmsr(pmu.msr_global_ctl, rdmsr(pmu.msr_global_ctl) | BIT_ULL(cnt->idx));
217 }
218 
219 static void global_disable(pmu_counter_t *cnt)
220 {
221 	if (!this_cpu_has_perf_global_ctrl())
222 		return;
223 
224 	wrmsr(pmu.msr_global_ctl, rdmsr(pmu.msr_global_ctl) & ~BIT_ULL(cnt->idx));
225 }
226 
227 static void __start_event(pmu_counter_t *evt, uint64_t count)
228 {
229     evt->count = count;
230     wrmsr(evt->ctr, evt->count);
231     if (is_gp(evt)) {
232 	    wrmsr(MSR_GP_EVENT_SELECTx(event_to_global_idx(evt)),
233 		  evt->config | EVNTSEL_EN);
234     } else {
235 	    uint32_t ctrl = rdmsr(MSR_CORE_PERF_FIXED_CTR_CTRL);
236 	    int shift = (evt->ctr - MSR_CORE_PERF_FIXED_CTR0) * 4;
237 	    uint32_t usrospmi = 0;
238 
239 	    if (evt->config & EVNTSEL_OS)
240 		    usrospmi |= (1 << 0);
241 	    if (evt->config & EVNTSEL_USR)
242 		    usrospmi |= (1 << 1);
243 	    if (evt->config & EVNTSEL_INT)
244 		    usrospmi |= (1 << 3); // PMI on overflow
245 	    ctrl = (ctrl & ~(0xf << shift)) | (usrospmi << shift);
246 	    wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, ctrl);
247     }
248     apic_write(APIC_LVTPC, PMI_VECTOR);
249 }
250 
251 static void start_event(pmu_counter_t *evt)
252 {
253 	__start_event(evt, 0);
254 	global_enable(evt);
255 }
256 
257 static void __stop_event(pmu_counter_t *evt)
258 {
259 	if (is_gp(evt)) {
260 		wrmsr(MSR_GP_EVENT_SELECTx(event_to_global_idx(evt)),
261 		      evt->config & ~EVNTSEL_EN);
262 	} else {
263 		uint32_t ctrl = rdmsr(MSR_CORE_PERF_FIXED_CTR_CTRL);
264 		int shift = (evt->ctr - MSR_CORE_PERF_FIXED_CTR0) * 4;
265 		wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, ctrl & ~(0xf << shift));
266 	}
267 	evt->count = rdmsr(evt->ctr);
268 }
269 
270 static void stop_event(pmu_counter_t *evt)
271 {
272 	global_disable(evt);
273 	__stop_event(evt);
274 }
275 
276 static noinline void measure_many(pmu_counter_t *evt, int count)
277 {
278 	int i;
279 	u64 cntrs = 0;
280 
281 	for (i = 0; i < count; i++) {
282 		__start_event(&evt[i], 0);
283 		cntrs |= BIT_ULL(event_to_global_idx(&evt[i]));
284 	}
285 	loop(cntrs);
286 	for (i = 0; i < count; i++)
287 		__stop_event(&evt[i]);
288 }
289 
290 static void measure_one(pmu_counter_t *evt)
291 {
292 	measure_many(evt, 1);
293 }
294 
295 static noinline void __measure(pmu_counter_t *evt, uint64_t count)
296 {
297 	u64 cntrs = BIT_ULL(event_to_global_idx(evt));
298 
299 	__start_event(evt, count);
300 	loop(cntrs);
301 	__stop_event(evt);
302 }
303 
304 static bool verify_event(uint64_t count, struct pmu_event *e)
305 {
306 	bool pass;
307 
308 	if (!e)
309 		return false;
310 
311 	pass = count >= e->min && count <= e->max;
312 	if (!pass)
313 		printf("FAIL: %d <= %"PRId64" <= %d\n", e->min, count, e->max);
314 
315 	return pass;
316 }
317 
318 static bool verify_counter(pmu_counter_t *cnt)
319 {
320 	return verify_event(cnt->count, get_counter_event(cnt));
321 }
322 
323 static void check_gp_counter(struct pmu_event *evt)
324 {
325 	pmu_counter_t cnt = {
326 		.config = EVNTSEL_OS | EVNTSEL_USR | evt->unit_sel,
327 	};
328 	int i;
329 
330 	for (i = 0; i < pmu.nr_gp_counters; i++) {
331 		cnt.ctr = MSR_GP_COUNTERx(i);
332 		measure_one(&cnt);
333 		report(verify_event(cnt.count, evt), "%s-%d", evt->name, i);
334 	}
335 }
336 
337 static void check_gp_counters(void)
338 {
339 	int i;
340 
341 	for (i = 0; i < gp_events_size; i++)
342 		if (pmu_gp_counter_is_available(i))
343 			check_gp_counter(&gp_events[i]);
344 		else
345 			printf("GP event '%s' is disabled\n",
346 					gp_events[i].name);
347 }
348 
349 static void check_fixed_counters(void)
350 {
351 	pmu_counter_t cnt = {
352 		.config = EVNTSEL_OS | EVNTSEL_USR,
353 	};
354 	int i;
355 
356 	for (i = 0; i < fixed_counters_num; i++) {
357 		cnt.ctr = fixed_events[i].unit_sel;
358 		measure_one(&cnt);
359 		report(verify_event(cnt.count, &fixed_events[i]), "fixed-%d", i);
360 	}
361 }
362 
363 static void check_counters_many(void)
364 {
365 	pmu_counter_t cnt[48];
366 	int i, n;
367 
368 	for (i = 0, n = 0; n < pmu.nr_gp_counters; i++) {
369 		if (!pmu_gp_counter_is_available(i))
370 			continue;
371 
372 		cnt[n].ctr = MSR_GP_COUNTERx(n);
373 		cnt[n].config = EVNTSEL_OS | EVNTSEL_USR |
374 			gp_events[i % gp_events_size].unit_sel;
375 		n++;
376 	}
377 	for (i = 0; i < fixed_counters_num; i++) {
378 		cnt[n].ctr = fixed_events[i].unit_sel;
379 		cnt[n].config = EVNTSEL_OS | EVNTSEL_USR;
380 		n++;
381 	}
382 
383 	assert(n <= ARRAY_SIZE(cnt));
384 	measure_many(cnt, n);
385 
386 	for (i = 0; i < n; i++)
387 		if (!verify_counter(&cnt[i]))
388 			break;
389 
390 	report(i == n, "all counters");
391 }
392 
393 static uint64_t measure_for_overflow(pmu_counter_t *cnt)
394 {
395 	__measure(cnt, 0);
396 	/*
397 	 * To generate overflow, i.e. roll over to '0', the initial count just
398 	 * needs to be preset to the negative expected count.  However, as per
399 	 * Intel's SDM, the preset count needs to be incremented by 1 to ensure
400 	 * the overflow interrupt is generated immediately instead of possibly
401 	 * waiting for the overflow to propagate through the counter.
402 	 */
403 	assert(cnt->count > 1);
404 	return 1 - cnt->count;
405 }
406 
407 static void check_counter_overflow(void)
408 {
409 	int i;
410 	uint64_t overflow_preset;
411 	int instruction_idx = pmu.is_intel ?
412 			      INTEL_INSTRUCTIONS_IDX :
413 			      AMD_INSTRUCTIONS_IDX;
414 
415 	pmu_counter_t cnt = {
416 		.ctr = MSR_GP_COUNTERx(0),
417 		.config = EVNTSEL_OS | EVNTSEL_USR |
418 			  gp_events[instruction_idx].unit_sel /* instructions */,
419 	};
420 	overflow_preset = measure_for_overflow(&cnt);
421 
422 	/* clear status before test */
423 	if (this_cpu_has_perf_global_status())
424 		pmu_clear_global_status();
425 
426 	report_prefix_push("overflow");
427 
428 	for (i = 0; i < pmu.nr_gp_counters + 1; i++) {
429 		uint64_t status;
430 		int idx;
431 
432 		cnt.count = overflow_preset;
433 		if (pmu_use_full_writes())
434 			cnt.count &= (1ull << pmu.gp_counter_width) - 1;
435 
436 		if (i == pmu.nr_gp_counters) {
437 			if (!pmu.is_intel)
438 				break;
439 
440 			cnt.ctr = fixed_events[0].unit_sel;
441 			cnt.count = measure_for_overflow(&cnt);
442 			cnt.count &= (1ull << pmu.gp_counter_width) - 1;
443 		} else {
444 			cnt.ctr = MSR_GP_COUNTERx(i);
445 		}
446 
447 		if (i % 2)
448 			cnt.config |= EVNTSEL_INT;
449 		else
450 			cnt.config &= ~EVNTSEL_INT;
451 		idx = event_to_global_idx(&cnt);
452 		__measure(&cnt, cnt.count);
453 		if (pmu.is_intel)
454 			report(cnt.count == 1, "cntr-%d", i);
455 		else
456 			report(cnt.count == 0xffffffffffff || cnt.count < 7, "cntr-%d", i);
457 
458 		if (!this_cpu_has_perf_global_status())
459 			continue;
460 
461 		status = rdmsr(pmu.msr_global_status);
462 		report(status & (1ull << idx), "status-%d", i);
463 		wrmsr(pmu.msr_global_status_clr, status);
464 		status = rdmsr(pmu.msr_global_status);
465 		report(!(status & (1ull << idx)), "status clear-%d", i);
466 		report(check_irq() == (i % 2), "irq-%d", i);
467 	}
468 
469 	report_prefix_pop();
470 }
471 
472 static void check_gp_counter_cmask(void)
473 {
474 	int instruction_idx = pmu.is_intel ?
475 			      INTEL_INSTRUCTIONS_IDX :
476 			      AMD_INSTRUCTIONS_IDX;
477 
478 	pmu_counter_t cnt = {
479 		.ctr = MSR_GP_COUNTERx(0),
480 		.config = EVNTSEL_OS | EVNTSEL_USR |
481 			  gp_events[instruction_idx].unit_sel /* instructions */,
482 	};
483 	cnt.config |= (0x2 << EVNTSEL_CMASK_SHIFT);
484 	measure_one(&cnt);
485 	report(cnt.count < gp_events[instruction_idx].min, "cmask");
486 }
487 
488 static void do_rdpmc_fast(void *ptr)
489 {
490 	pmu_counter_t *cnt = ptr;
491 	uint32_t idx = (uint32_t)cnt->idx | (1u << 31);
492 
493 	if (!is_gp(cnt))
494 		idx |= 1 << 30;
495 
496 	cnt->count = rdpmc(idx);
497 }
498 
499 
500 static void check_rdpmc(void)
501 {
502 	uint64_t val = 0xff0123456789ull;
503 	bool exc;
504 	int i;
505 
506 	report_prefix_push("rdpmc");
507 
508 	for (i = 0; i < pmu.nr_gp_counters; i++) {
509 		uint64_t x;
510 		pmu_counter_t cnt = {
511 			.ctr = MSR_GP_COUNTERx(i),
512 			.idx = i
513 		};
514 
515 	        /*
516 	         * Without full-width writes, only the low 32 bits are writable,
517 	         * and the value is sign-extended.
518 	         */
519 		if (pmu.msr_gp_counter_base == MSR_IA32_PERFCTR0)
520 			x = (uint64_t)(int64_t)(int32_t)val;
521 		else
522 			x = (uint64_t)(int64_t)val;
523 
524 		/* Mask according to the number of supported bits */
525 		x &= (1ull << pmu.gp_counter_width) - 1;
526 
527 		wrmsr(MSR_GP_COUNTERx(i), val);
528 		report(rdpmc(i) == x, "cntr-%d", i);
529 
530 		exc = test_for_exception(GP_VECTOR, do_rdpmc_fast, &cnt);
531 		if (exc)
532 			report_skip("fast-%d", i);
533 		else
534 			report(cnt.count == (u32)val, "fast-%d", i);
535 	}
536 	for (i = 0; i < fixed_counters_num; i++) {
537 		uint64_t x = val & ((1ull << pmu.fixed_counter_width) - 1);
538 		pmu_counter_t cnt = {
539 			.ctr = MSR_CORE_PERF_FIXED_CTR0 + i,
540 			.idx = i
541 		};
542 
543 		wrmsr(MSR_PERF_FIXED_CTRx(i), x);
544 		report(rdpmc(i | (1 << 30)) == x, "fixed cntr-%d", i);
545 
546 		exc = test_for_exception(GP_VECTOR, do_rdpmc_fast, &cnt);
547 		if (exc)
548 			report_skip("fixed fast-%d", i);
549 		else
550 			report(cnt.count == (u32)x, "fixed fast-%d", i);
551 	}
552 
553 	report_prefix_pop();
554 }
555 
556 static void check_running_counter_wrmsr(void)
557 {
558 	uint64_t status;
559 	uint64_t count;
560 	unsigned int instruction_idx = pmu.is_intel ?
561 				       INTEL_INSTRUCTIONS_IDX :
562 				       AMD_INSTRUCTIONS_IDX;
563 
564 	pmu_counter_t evt = {
565 		.ctr = MSR_GP_COUNTERx(0),
566 		.config = EVNTSEL_OS | EVNTSEL_USR |
567 			  gp_events[instruction_idx].unit_sel,
568 	};
569 
570 	report_prefix_push("running counter wrmsr");
571 
572 	start_event(&evt);
573 	__loop();
574 	wrmsr(MSR_GP_COUNTERx(0), 0);
575 	stop_event(&evt);
576 	report(evt.count < gp_events[instruction_idx].min, "cntr");
577 
578 	/* clear status before overflow test */
579 	if (this_cpu_has_perf_global_status())
580 		pmu_clear_global_status();
581 
582 	start_event(&evt);
583 
584 	count = -1;
585 	if (pmu_use_full_writes())
586 		count &= (1ull << pmu.gp_counter_width) - 1;
587 
588 	wrmsr(MSR_GP_COUNTERx(0), count);
589 
590 	__loop();
591 	stop_event(&evt);
592 
593 	if (this_cpu_has_perf_global_status()) {
594 		status = rdmsr(pmu.msr_global_status);
595 		report(status & 1, "status msr bit");
596 	}
597 
598 	report_prefix_pop();
599 }
600 
601 static void check_emulated_instr(void)
602 {
603 	uint64_t status, instr_start, brnch_start;
604 	uint64_t gp_counter_width = (1ull << pmu.gp_counter_width) - 1;
605 	unsigned int branch_idx = pmu.is_intel ?
606 				  INTEL_BRANCHES_IDX : AMD_BRANCHES_IDX;
607 	unsigned int instruction_idx = pmu.is_intel ?
608 				       INTEL_INSTRUCTIONS_IDX :
609 				       AMD_INSTRUCTIONS_IDX;
610 	pmu_counter_t brnch_cnt = {
611 		.ctr = MSR_GP_COUNTERx(0),
612 		/* branch instructions */
613 		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[branch_idx].unit_sel,
614 	};
615 	pmu_counter_t instr_cnt = {
616 		.ctr = MSR_GP_COUNTERx(1),
617 		/* instructions */
618 		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[instruction_idx].unit_sel,
619 	};
620 	report_prefix_push("emulated instruction");
621 
622 	if (this_cpu_has_perf_global_status())
623 		pmu_clear_global_status();
624 
625 	start_event(&brnch_cnt);
626 	start_event(&instr_cnt);
627 
628 	brnch_start = -EXPECTED_BRNCH;
629 	instr_start = -EXPECTED_INSTR;
630 	wrmsr(MSR_GP_COUNTERx(0), brnch_start & gp_counter_width);
631 	wrmsr(MSR_GP_COUNTERx(1), instr_start & gp_counter_width);
632 	// KVM_FEP is a magic prefix that forces emulation so
633 	// 'KVM_FEP "jne label\n"' just counts as a single instruction.
634 	asm volatile(
635 		"mov $0x0, %%eax\n"
636 		"cmp $0x0, %%eax\n"
637 		KVM_FEP "jne label\n"
638 		KVM_FEP "jne label\n"
639 		KVM_FEP "jne label\n"
640 		KVM_FEP "jne label\n"
641 		KVM_FEP "jne label\n"
642 		"mov $0xa, %%eax\n"
643 		"cpuid\n"
644 		"mov $0xa, %%eax\n"
645 		"cpuid\n"
646 		"mov $0xa, %%eax\n"
647 		"cpuid\n"
648 		"mov $0xa, %%eax\n"
649 		"cpuid\n"
650 		"mov $0xa, %%eax\n"
651 		"cpuid\n"
652 		"label:\n"
653 		:
654 		:
655 		: "eax", "ebx", "ecx", "edx");
656 
657 	if (this_cpu_has_perf_global_ctrl())
658 		wrmsr(pmu.msr_global_ctl, 0);
659 
660 	stop_event(&brnch_cnt);
661 	stop_event(&instr_cnt);
662 
663 	// Check that the end count - start count is at least the expected
664 	// number of instructions and branches.
665 	report(instr_cnt.count - instr_start >= EXPECTED_INSTR,
666 	       "instruction count");
667 	report(brnch_cnt.count - brnch_start >= EXPECTED_BRNCH,
668 	       "branch count");
669 	if (this_cpu_has_perf_global_status()) {
670 		// Additionally check that those counters overflowed properly.
671 		status = rdmsr(pmu.msr_global_status);
672 		report(status & 1, "branch counter overflow");
673 		report(status & 2, "instruction counter overflow");
674 	}
675 
676 	report_prefix_pop();
677 }
678 
679 #define XBEGIN_STARTED (~0u)
680 static void check_tsx_cycles(void)
681 {
682 	pmu_counter_t cnt;
683 	unsigned int i, ret = 0;
684 
685 	if (!this_cpu_has(X86_FEATURE_RTM))
686 		return;
687 
688 	report_prefix_push("TSX cycles");
689 
690 	for (i = 0; i < pmu.nr_gp_counters; i++) {
691 		cnt.ctr = MSR_GP_COUNTERx(i);
692 
693 		if (i == 2) {
694 			/* Transactional cycles committed only on gp counter 2 */
695 			cnt.config = EVNTSEL_OS | EVNTSEL_USR | 0x30000003c;
696 		} else {
697 			/* Transactional cycles */
698 			cnt.config = EVNTSEL_OS | EVNTSEL_USR | 0x10000003c;
699 		}
700 
701 		start_event(&cnt);
702 
703 		asm volatile("xbegin 1f\n\t"
704 				"1:\n\t"
705 				: "+a" (ret) :: "memory");
706 
707 		/* Generate a non-canonical #GP to trigger ABORT. */
708 		if (ret == XBEGIN_STARTED)
709 			*(int *)NONCANONICAL = 0;
710 
711 		stop_event(&cnt);
712 
713 		report(cnt.count > 0, "gp cntr-%d with a value of %" PRId64 "", i, cnt.count);
714 	}
715 
716 	report_prefix_pop();
717 }
718 
719 static void warm_up(void)
720 {
721 	int i;
722 
723 	/*
724 	 * Since cycles event is always run as the first event, there would be
725 	 * a warm-up state to warm up the cache, it leads to the measured cycles
726 	 * value may exceed the pre-defined cycles upper boundary and cause
727 	 * false positive. To avoid this, introduce an warm-up state before
728 	 * the real verification.
729 	 */
730 	for (i = 0; i < 10; i++)
731 		loop(0);
732 }
733 
734 static void check_counters(void)
735 {
736 	if (is_fep_available())
737 		check_emulated_instr();
738 
739 	warm_up();
740 	check_gp_counters();
741 	check_fixed_counters();
742 	check_rdpmc();
743 	check_counters_many();
744 	check_counter_overflow();
745 	check_gp_counter_cmask();
746 	check_running_counter_wrmsr();
747 	check_tsx_cycles();
748 }
749 
750 static void do_unsupported_width_counter_write(void *index)
751 {
752 	wrmsr(MSR_IA32_PMC0 + *((int *) index), 0xffffff0123456789ull);
753 }
754 
755 static void check_gp_counters_write_width(void)
756 {
757 	u64 val_64 = 0xffffff0123456789ull;
758 	u64 val_32 = val_64 & ((1ull << 32) - 1);
759 	u64 val_max_width = val_64 & ((1ull << pmu.gp_counter_width) - 1);
760 	int i;
761 
762 	/*
763 	 * MSR_IA32_PERFCTRn supports 64-bit writes,
764 	 * but only the lowest 32 bits are valid.
765 	 */
766 	for (i = 0; i < pmu.nr_gp_counters; i++) {
767 		wrmsr(MSR_IA32_PERFCTR0 + i, val_32);
768 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32);
769 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_32);
770 
771 		wrmsr(MSR_IA32_PERFCTR0 + i, val_max_width);
772 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32);
773 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_32);
774 
775 		wrmsr(MSR_IA32_PERFCTR0 + i, val_64);
776 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32);
777 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_32);
778 	}
779 
780 	/*
781 	 * MSR_IA32_PMCn supports writing values up to GP counter width,
782 	 * and only the lowest bits of GP counter width are valid.
783 	 */
784 	for (i = 0; i < pmu.nr_gp_counters; i++) {
785 		wrmsr(MSR_IA32_PMC0 + i, val_32);
786 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_32);
787 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32);
788 
789 		wrmsr(MSR_IA32_PMC0 + i, val_max_width);
790 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_max_width);
791 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_max_width);
792 
793 		report(test_for_exception(GP_VECTOR,
794 			do_unsupported_width_counter_write, &i),
795 		"writing unsupported width to MSR_IA32_PMC%d raises #GP", i);
796 	}
797 }
798 
799 /*
800  * Per the SDM, reference cycles are currently implemented using the
801  * core crystal clock, TSC, or bus clock. Calibrate to the TSC
802  * frequency to set reasonable expectations.
803  */
804 static void set_ref_cycle_expectations(void)
805 {
806 	pmu_counter_t cnt = {
807 		.ctr = MSR_IA32_PERFCTR0,
808 		.config = EVNTSEL_OS | EVNTSEL_USR |
809 			  intel_gp_events[INTEL_REF_CYCLES_IDX].unit_sel,
810 	};
811 	uint64_t tsc_delta;
812 	uint64_t t0, t1, t2, t3;
813 
814 	/* Bit 2 enumerates the availability of reference cycles events. */
815 	if (!pmu.nr_gp_counters || !pmu_gp_counter_is_available(2))
816 		return;
817 
818 	if (this_cpu_has_perf_global_ctrl())
819 		wrmsr(pmu.msr_global_ctl, 0);
820 
821 	t0 = fenced_rdtsc();
822 	start_event(&cnt);
823 	t1 = fenced_rdtsc();
824 
825 	/*
826 	 * This loop has to run long enough to dominate the VM-exit
827 	 * costs for playing with the PMU MSRs on start and stop.
828 	 *
829 	 * On a 2.6GHz Ice Lake, with the TSC frequency at 104 times
830 	 * the core crystal clock, this function calculated a guest
831 	 * TSC : ref cycles ratio of around 105 with ECX initialized
832 	 * to one billion.
833 	 */
834 	asm volatile("loop ." : "+c"((int){1000000000ull}));
835 
836 	t2 = fenced_rdtsc();
837 	stop_event(&cnt);
838 	t3 = fenced_rdtsc();
839 
840 	tsc_delta = ((t2 - t1) + (t3 - t0)) / 2;
841 
842 	if (!tsc_delta)
843 		return;
844 
845 	intel_gp_events[INTEL_REF_CYCLES_IDX].min =
846 		(intel_gp_events[INTEL_REF_CYCLES_IDX].min * cnt.count) / tsc_delta;
847 	intel_gp_events[INTEL_REF_CYCLES_IDX].max =
848 		(intel_gp_events[INTEL_REF_CYCLES_IDX].max * cnt.count) / tsc_delta;
849 }
850 
851 static void check_invalid_rdpmc_gp(void)
852 {
853 	uint64_t val;
854 
855 	report(rdpmc_safe(64, &val) == GP_VECTOR,
856 	       "Expected #GP on RDPMC(64)");
857 }
858 
859 int main(int ac, char **av)
860 {
861 	int instruction_idx;
862 	int branch_idx;
863 
864 	setup_vm();
865 	handle_irq(PMI_VECTOR, cnt_overflow);
866 	buf = malloc(N*64);
867 
868 	check_invalid_rdpmc_gp();
869 
870 	if (pmu.is_intel) {
871 		if (!pmu.version) {
872 			report_skip("No Intel Arch PMU is detected!");
873 			return report_summary();
874 		}
875 		gp_events = (struct pmu_event *)intel_gp_events;
876 		gp_events_size = sizeof(intel_gp_events)/sizeof(intel_gp_events[0]);
877 		instruction_idx = INTEL_INSTRUCTIONS_IDX;
878 		branch_idx = INTEL_BRANCHES_IDX;
879 		report_prefix_push("Intel");
880 		set_ref_cycle_expectations();
881 	} else {
882 		gp_events_size = sizeof(amd_gp_events)/sizeof(amd_gp_events[0]);
883 		gp_events = (struct pmu_event *)amd_gp_events;
884 		instruction_idx = AMD_INSTRUCTIONS_IDX;
885 		branch_idx = AMD_BRANCHES_IDX;
886 		report_prefix_push("AMD");
887 	}
888 	adjust_events_range(gp_events, instruction_idx, branch_idx);
889 
890 	printf("PMU version:         %d\n", pmu.version);
891 	printf("GP counters:         %d\n", pmu.nr_gp_counters);
892 	printf("GP counter width:    %d\n", pmu.gp_counter_width);
893 	printf("Mask length:         %d\n", pmu.gp_counter_mask_length);
894 	printf("Fixed counters:      %d\n", pmu.nr_fixed_counters);
895 	printf("Fixed counter width: %d\n", pmu.fixed_counter_width);
896 
897 	fixed_counters_num = MIN(pmu.nr_fixed_counters, ARRAY_SIZE(fixed_events));
898 	if (pmu.nr_fixed_counters > ARRAY_SIZE(fixed_events))
899 		report_info("Fixed counters number %d > defined fixed events %u.  "
900 			    "Please update test case.", pmu.nr_fixed_counters,
901 			    (uint32_t)ARRAY_SIZE(fixed_events));
902 
903 	apic_write(APIC_LVTPC, PMI_VECTOR);
904 
905 	check_counters();
906 
907 	if (pmu_has_full_writes()) {
908 		pmu.msr_gp_counter_base = MSR_IA32_PMC0;
909 
910 		report_prefix_push("full-width writes");
911 		check_counters();
912 		check_gp_counters_write_width();
913 		report_prefix_pop();
914 	}
915 
916 	if (!pmu.is_intel) {
917 		report_prefix_push("K7");
918 		pmu.nr_gp_counters = AMD64_NUM_COUNTERS;
919 		pmu.msr_gp_counter_base = MSR_K7_PERFCTR0;
920 		pmu.msr_gp_event_select_base = MSR_K7_EVNTSEL0;
921 		check_counters();
922 		report_prefix_pop();
923 	}
924 
925 	return report_summary();
926 }
927