xref: /kvm-unit-tests/x86/pmu.c (revision 80e8b3d82fc53befe77728786530d286a78d0d78)
1 
2 #include "x86/msr.h"
3 #include "x86/processor.h"
4 #include "x86/apic-defs.h"
5 #include "x86/apic.h"
6 #include "x86/desc.h"
7 #include "x86/isr.h"
8 #include "alloc.h"
9 
10 #include "libcflat.h"
11 #include <stdint.h>
12 
13 #define FIXED_CNT_INDEX 32
14 #define PC_VECTOR	32
15 
16 #define EVNSEL_EVENT_SHIFT	0
17 #define EVNTSEL_UMASK_SHIFT	8
18 #define EVNTSEL_USR_SHIFT	16
19 #define EVNTSEL_OS_SHIFT	17
20 #define EVNTSEL_EDGE_SHIFT	18
21 #define EVNTSEL_PC_SHIFT	19
22 #define EVNTSEL_INT_SHIFT	20
23 #define EVNTSEL_EN_SHIF		22
24 #define EVNTSEL_INV_SHIF	23
25 #define EVNTSEL_CMASK_SHIFT	24
26 
27 #define EVNTSEL_EN	(1 << EVNTSEL_EN_SHIF)
28 #define EVNTSEL_USR	(1 << EVNTSEL_USR_SHIFT)
29 #define EVNTSEL_OS	(1 << EVNTSEL_OS_SHIFT)
30 #define EVNTSEL_PC	(1 << EVNTSEL_PC_SHIFT)
31 #define EVNTSEL_INT	(1 << EVNTSEL_INT_SHIFT)
32 #define EVNTSEL_INV	(1 << EVNTSEL_INV_SHIF)
33 
34 #define N 1000000
35 
36 typedef struct {
37 	uint32_t ctr;
38 	uint32_t config;
39 	uint64_t count;
40 	int idx;
41 } pmu_counter_t;
42 
43 union cpuid10_eax {
44 	struct {
45 		unsigned int version_id:8;
46 		unsigned int num_counters:8;
47 		unsigned int bit_width:8;
48 		unsigned int mask_length:8;
49 	} split;
50 	unsigned int full;
51 } eax;
52 
53 union cpuid10_ebx {
54 	struct {
55 		unsigned int no_unhalted_core_cycles:1;
56 		unsigned int no_instructions_retired:1;
57 		unsigned int no_unhalted_reference_cycles:1;
58 		unsigned int no_llc_reference:1;
59 		unsigned int no_llc_misses:1;
60 		unsigned int no_branch_instruction_retired:1;
61 		unsigned int no_branch_misses_retired:1;
62 	} split;
63 	unsigned int full;
64 } ebx;
65 
66 union cpuid10_edx {
67 	struct {
68 		unsigned int num_counters_fixed:5;
69 		unsigned int bit_width_fixed:8;
70 		unsigned int reserved:19;
71 	} split;
72 	unsigned int full;
73 } edx;
74 
75 struct pmu_event {
76 	const char *name;
77 	uint32_t unit_sel;
78 	int min;
79 	int max;
80 } gp_events[] = {
81 	{"core cycles", 0x003c, 1*N, 50*N},
82 	{"instructions", 0x00c0, 10*N, 10.2*N},
83 	{"ref cycles", 0x013c, 0.1*N, 30*N},
84 	{"llc refference", 0x4f2e, 1, 2*N},
85 	{"llc misses", 0x412e, 1, 1*N},
86 	{"branches", 0x00c4, 1*N, 1.1*N},
87 	{"branch misses", 0x00c5, 0, 0.1*N},
88 }, fixed_events[] = {
89 	{"fixed 1", MSR_CORE_PERF_FIXED_CTR0, 10*N, 10.2*N},
90 	{"fixed 2", MSR_CORE_PERF_FIXED_CTR0 + 1, 1*N, 30*N},
91 	{"fixed 3", MSR_CORE_PERF_FIXED_CTR0 + 2, 0.1*N, 30*N}
92 };
93 
94 #define PMU_CAP_FW_WRITES	(1ULL << 13)
95 static u64 gp_counter_base = MSR_IA32_PERFCTR0;
96 
97 static int num_counters;
98 
99 char *buf;
100 
101 static inline void loop(void)
102 {
103 	unsigned long tmp, tmp2, tmp3;
104 
105 	asm volatile("1: mov (%1), %2; add $64, %1; nop; nop; nop; nop; nop; nop; nop; loop 1b"
106 			: "=c"(tmp), "=r"(tmp2), "=r"(tmp3): "0"(N), "1"(buf));
107 
108 }
109 
110 volatile uint64_t irq_received;
111 
112 static void cnt_overflow(isr_regs_t *regs)
113 {
114 	irq_received++;
115 	apic_write(APIC_EOI, 0);
116 }
117 
118 static bool check_irq(void)
119 {
120 	int i;
121 	irq_received = 0;
122 	irq_enable();
123 	for (i = 0; i < 100000 && !irq_received; i++)
124 		asm volatile("pause");
125 	irq_disable();
126 	return irq_received;
127 }
128 
129 static bool is_gp(pmu_counter_t *evt)
130 {
131 	return evt->ctr < MSR_CORE_PERF_FIXED_CTR0 ||
132 		evt->ctr >= MSR_IA32_PMC0;
133 }
134 
135 static int event_to_global_idx(pmu_counter_t *cnt)
136 {
137 	return cnt->ctr - (is_gp(cnt) ? gp_counter_base :
138 		(MSR_CORE_PERF_FIXED_CTR0 - FIXED_CNT_INDEX));
139 }
140 
141 static struct pmu_event* get_counter_event(pmu_counter_t *cnt)
142 {
143 	if (is_gp(cnt)) {
144 		int i;
145 
146 		for (i = 0; i < sizeof(gp_events)/sizeof(gp_events[0]); i++)
147 			if (gp_events[i].unit_sel == (cnt->config & 0xffff))
148 				return &gp_events[i];
149 	} else
150 		return &fixed_events[cnt->ctr - MSR_CORE_PERF_FIXED_CTR0];
151 
152 	return (void*)0;
153 }
154 
155 static void global_enable(pmu_counter_t *cnt)
156 {
157 	cnt->idx = event_to_global_idx(cnt);
158 
159 	wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, rdmsr(MSR_CORE_PERF_GLOBAL_CTRL) |
160 			(1ull << cnt->idx));
161 }
162 
163 static void global_disable(pmu_counter_t *cnt)
164 {
165 	wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, rdmsr(MSR_CORE_PERF_GLOBAL_CTRL) &
166 			~(1ull << cnt->idx));
167 }
168 
169 
170 static void start_event(pmu_counter_t *evt)
171 {
172     wrmsr(evt->ctr, evt->count);
173     if (is_gp(evt))
174 	    wrmsr(MSR_P6_EVNTSEL0 + event_to_global_idx(evt),
175 			    evt->config | EVNTSEL_EN);
176     else {
177 	    uint32_t ctrl = rdmsr(MSR_CORE_PERF_FIXED_CTR_CTRL);
178 	    int shift = (evt->ctr - MSR_CORE_PERF_FIXED_CTR0) * 4;
179 	    uint32_t usrospmi = 0;
180 
181 	    if (evt->config & EVNTSEL_OS)
182 		    usrospmi |= (1 << 0);
183 	    if (evt->config & EVNTSEL_USR)
184 		    usrospmi |= (1 << 1);
185 	    if (evt->config & EVNTSEL_INT)
186 		    usrospmi |= (1 << 3); // PMI on overflow
187 	    ctrl = (ctrl & ~(0xf << shift)) | (usrospmi << shift);
188 	    wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, ctrl);
189     }
190     global_enable(evt);
191     apic_write(APIC_LVTPC, PC_VECTOR);
192 }
193 
194 static void stop_event(pmu_counter_t *evt)
195 {
196 	global_disable(evt);
197 	if (is_gp(evt))
198 		wrmsr(MSR_P6_EVNTSEL0 + event_to_global_idx(evt),
199 				evt->config & ~EVNTSEL_EN);
200 	else {
201 		uint32_t ctrl = rdmsr(MSR_CORE_PERF_FIXED_CTR_CTRL);
202 		int shift = (evt->ctr - MSR_CORE_PERF_FIXED_CTR0) * 4;
203 		wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, ctrl & ~(0xf << shift));
204 	}
205 	evt->count = rdmsr(evt->ctr);
206 }
207 
208 static void measure(pmu_counter_t *evt, int count)
209 {
210 	int i;
211 	for (i = 0; i < count; i++)
212 		start_event(&evt[i]);
213 	loop();
214 	for (i = 0; i < count; i++)
215 		stop_event(&evt[i]);
216 }
217 
218 static bool verify_event(uint64_t count, struct pmu_event *e)
219 {
220 	// printf("%lld >= %lld <= %lld\n", e->min, count, e->max);
221 	return count >= e->min  && count <= e->max;
222 
223 }
224 
225 static bool verify_counter(pmu_counter_t *cnt)
226 {
227 	return verify_event(cnt->count, get_counter_event(cnt));
228 }
229 
230 static void check_gp_counter(struct pmu_event *evt)
231 {
232 	pmu_counter_t cnt = {
233 		.ctr = gp_counter_base,
234 		.config = EVNTSEL_OS | EVNTSEL_USR | evt->unit_sel,
235 	};
236 	int i;
237 
238 	for (i = 0; i < num_counters; i++, cnt.ctr++) {
239 		cnt.count = 0;
240 		measure(&cnt, 1);
241 		report(verify_event(cnt.count, evt), "%s-%d", evt->name, i);
242 	}
243 }
244 
245 static void check_gp_counters(void)
246 {
247 	int i;
248 
249 	for (i = 0; i < sizeof(gp_events)/sizeof(gp_events[0]); i++)
250 		if (!(ebx.full & (1 << i)))
251 			check_gp_counter(&gp_events[i]);
252 		else
253 			printf("GP event '%s' is disabled\n",
254 					gp_events[i].name);
255 }
256 
257 static void check_fixed_counters(void)
258 {
259 	pmu_counter_t cnt = {
260 		.config = EVNTSEL_OS | EVNTSEL_USR,
261 	};
262 	int i;
263 
264 	for (i = 0; i < edx.split.num_counters_fixed; i++) {
265 		cnt.count = 0;
266 		cnt.ctr = fixed_events[i].unit_sel;
267 		measure(&cnt, 1);
268 		report(verify_event(cnt.count, &fixed_events[i]), "fixed-%d",
269 		       i);
270 	}
271 }
272 
273 static void check_counters_many(void)
274 {
275 	pmu_counter_t cnt[10];
276 	int i, n;
277 
278 	for (i = 0, n = 0; n < num_counters; i++) {
279 		if (ebx.full & (1 << i))
280 			continue;
281 
282 		cnt[n].count = 0;
283 		cnt[n].ctr = gp_counter_base + n;
284 		cnt[n].config = EVNTSEL_OS | EVNTSEL_USR |
285 			gp_events[i % ARRAY_SIZE(gp_events)].unit_sel;
286 		n++;
287 	}
288 	for (i = 0; i < edx.split.num_counters_fixed; i++) {
289 		cnt[n].count = 0;
290 		cnt[n].ctr = fixed_events[i].unit_sel;
291 		cnt[n].config = EVNTSEL_OS | EVNTSEL_USR;
292 		n++;
293 	}
294 
295 	measure(cnt, n);
296 
297 	for (i = 0; i < n; i++)
298 		if (!verify_counter(&cnt[i]))
299 			break;
300 
301 	report(i == n, "all counters");
302 }
303 
304 static void check_counter_overflow(void)
305 {
306 	uint64_t count;
307 	int i;
308 	pmu_counter_t cnt = {
309 		.ctr = gp_counter_base,
310 		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel /* instructions */,
311 		.count = 0,
312 	};
313 	measure(&cnt, 1);
314 	count = cnt.count;
315 
316 	/* clear status before test */
317 	wrmsr(MSR_CORE_PERF_GLOBAL_OVF_CTRL, rdmsr(MSR_CORE_PERF_GLOBAL_STATUS));
318 
319 	report_prefix_push("overflow");
320 
321 	for (i = 0; i < num_counters + 1; i++, cnt.ctr++) {
322 		uint64_t status;
323 		int idx;
324 
325 		cnt.count = 1 - count;
326 		if (gp_counter_base == MSR_IA32_PMC0)
327 			cnt.count &= (1ull << eax.split.bit_width) - 1;
328 
329 		if (i == num_counters) {
330 			cnt.ctr = fixed_events[0].unit_sel;
331 			cnt.count &= (1ull << edx.split.bit_width_fixed) - 1;
332 		}
333 
334 		if (i % 2)
335 			cnt.config |= EVNTSEL_INT;
336 		else
337 			cnt.config &= ~EVNTSEL_INT;
338 		idx = event_to_global_idx(&cnt);
339 		measure(&cnt, 1);
340 		report(cnt.count == 1, "cntr-%d", i);
341 		status = rdmsr(MSR_CORE_PERF_GLOBAL_STATUS);
342 		report(status & (1ull << idx), "status-%d", i);
343 		wrmsr(MSR_CORE_PERF_GLOBAL_OVF_CTRL, status);
344 		status = rdmsr(MSR_CORE_PERF_GLOBAL_STATUS);
345 		report(!(status & (1ull << idx)), "status clear-%d", i);
346 		report(check_irq() == (i % 2), "irq-%d", i);
347 	}
348 
349 	report_prefix_pop();
350 }
351 
352 static void check_gp_counter_cmask(void)
353 {
354 	pmu_counter_t cnt = {
355 		.ctr = gp_counter_base,
356 		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel /* instructions */,
357 		.count = 0,
358 	};
359 	cnt.config |= (0x2 << EVNTSEL_CMASK_SHIFT);
360 	measure(&cnt, 1);
361 	report(cnt.count < gp_events[1].min, "cmask");
362 }
363 
364 static void do_rdpmc_fast(void *ptr)
365 {
366 	pmu_counter_t *cnt = ptr;
367 	uint32_t idx = (uint32_t)cnt->idx | (1u << 31);
368 
369 	if (!is_gp(cnt))
370 		idx |= 1 << 30;
371 
372 	cnt->count = rdpmc(idx);
373 }
374 
375 
376 static void check_rdpmc(void)
377 {
378 	uint64_t val = 0xff0123456789ull;
379 	bool exc;
380 	int i;
381 
382 	report_prefix_push("rdpmc");
383 
384 	for (i = 0; i < num_counters; i++) {
385 		uint64_t x;
386 		pmu_counter_t cnt = {
387 			.ctr = gp_counter_base + i,
388 			.idx = i
389 		};
390 
391 	        /*
392 	         * Without full-width writes, only the low 32 bits are writable,
393 	         * and the value is sign-extended.
394 	         */
395 		if (gp_counter_base == MSR_IA32_PERFCTR0)
396 			x = (uint64_t)(int64_t)(int32_t)val;
397 		else
398 			x = (uint64_t)(int64_t)val;
399 
400 		/* Mask according to the number of supported bits */
401 		x &= (1ull << eax.split.bit_width) - 1;
402 
403 		wrmsr(gp_counter_base + i, val);
404 		report(rdpmc(i) == x, "cntr-%d", i);
405 
406 		exc = test_for_exception(GP_VECTOR, do_rdpmc_fast, &cnt);
407 		if (exc)
408 			report_skip("fast-%d", i);
409 		else
410 			report(cnt.count == (u32)val, "fast-%d", i);
411 	}
412 	for (i = 0; i < edx.split.num_counters_fixed; i++) {
413 		uint64_t x = val & ((1ull << edx.split.bit_width_fixed) - 1);
414 		pmu_counter_t cnt = {
415 			.ctr = MSR_CORE_PERF_FIXED_CTR0 + i,
416 			.idx = i
417 		};
418 
419 		wrmsr(MSR_CORE_PERF_FIXED_CTR0 + i, x);
420 		report(rdpmc(i | (1 << 30)) == x, "fixed cntr-%d", i);
421 
422 		exc = test_for_exception(GP_VECTOR, do_rdpmc_fast, &cnt);
423 		if (exc)
424 			report_skip("fixed fast-%d", i);
425 		else
426 			report(cnt.count == (u32)x, "fixed fast-%d", i);
427 	}
428 
429 	report_prefix_pop();
430 }
431 
432 static void check_running_counter_wrmsr(void)
433 {
434 	uint64_t status;
435 	uint64_t count;
436 	pmu_counter_t evt = {
437 		.ctr = gp_counter_base,
438 		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel,
439 		.count = 0,
440 	};
441 
442 	report_prefix_push("running counter wrmsr");
443 
444 	start_event(&evt);
445 	loop();
446 	wrmsr(gp_counter_base, 0);
447 	stop_event(&evt);
448 	report(evt.count < gp_events[1].min, "cntr");
449 
450 	/* clear status before overflow test */
451 	wrmsr(MSR_CORE_PERF_GLOBAL_OVF_CTRL,
452 	      rdmsr(MSR_CORE_PERF_GLOBAL_STATUS));
453 
454 	evt.count = 0;
455 	start_event(&evt);
456 
457 	count = -1;
458 	if (gp_counter_base == MSR_IA32_PMC0)
459 		count &= (1ull << eax.split.bit_width) - 1;
460 
461 	wrmsr(gp_counter_base, count);
462 
463 	loop();
464 	stop_event(&evt);
465 	status = rdmsr(MSR_CORE_PERF_GLOBAL_STATUS);
466 	report(status & 1, "status");
467 
468 	report_prefix_pop();
469 }
470 
471 static void check_counters(void)
472 {
473 	check_gp_counters();
474 	check_fixed_counters();
475 	check_rdpmc();
476 	check_counters_many();
477 	check_counter_overflow();
478 	check_gp_counter_cmask();
479 	check_running_counter_wrmsr();
480 }
481 
482 static void do_unsupported_width_counter_write(void *index)
483 {
484 	wrmsr(MSR_IA32_PMC0 + *((int *) index), 0xffffff0123456789ull);
485 }
486 
487 static void  check_gp_counters_write_width(void)
488 {
489 	u64 val_64 = 0xffffff0123456789ull;
490 	u64 val_32 = val_64 & ((1ull << 32) - 1);
491 	u64 val_max_width = val_64 & ((1ull << eax.split.bit_width) - 1);
492 	int i;
493 
494 	/*
495 	 * MSR_IA32_PERFCTRn supports 64-bit writes,
496 	 * but only the lowest 32 bits are valid.
497 	 */
498 	for (i = 0; i < num_counters; i++) {
499 		wrmsr(MSR_IA32_PERFCTR0 + i, val_32);
500 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32);
501 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_32);
502 
503 		wrmsr(MSR_IA32_PERFCTR0 + i, val_max_width);
504 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32);
505 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_32);
506 
507 		wrmsr(MSR_IA32_PERFCTR0 + i, val_64);
508 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32);
509 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_32);
510 	}
511 
512 	/*
513 	 * MSR_IA32_PMCn supports writing values ​​up to GP counter width,
514 	 * and only the lowest bits of GP counter width are valid.
515 	 */
516 	for (i = 0; i < num_counters; i++) {
517 		wrmsr(MSR_IA32_PMC0 + i, val_32);
518 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_32);
519 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32);
520 
521 		wrmsr(MSR_IA32_PMC0 + i, val_max_width);
522 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_max_width);
523 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_max_width);
524 
525 		report(test_for_exception(GP_VECTOR,
526 			do_unsupported_width_counter_write, &i),
527 		"writing unsupported width to MSR_IA32_PMC%d raises #GP", i);
528 	}
529 }
530 
531 int main(int ac, char **av)
532 {
533 	struct cpuid id = cpuid(10);
534 
535 	setup_vm();
536 	handle_irq(PC_VECTOR, cnt_overflow);
537 	buf = malloc(N*64);
538 
539 	eax.full = id.a;
540 	ebx.full = id.b;
541 	edx.full = id.d;
542 
543 	if (!eax.split.version_id) {
544 		printf("No pmu is detected!\n");
545 		return report_summary();
546 	}
547 
548 	if (eax.split.version_id == 1) {
549 		printf("PMU version 1 is not supported\n");
550 		return report_summary();
551 	}
552 
553 	printf("PMU version:         %d\n", eax.split.version_id);
554 	printf("GP counters:         %d\n", eax.split.num_counters);
555 	printf("GP counter width:    %d\n", eax.split.bit_width);
556 	printf("Mask length:         %d\n", eax.split.mask_length);
557 	printf("Fixed counters:      %d\n", edx.split.num_counters_fixed);
558 	printf("Fixed counter width: %d\n", edx.split.bit_width_fixed);
559 
560 	num_counters = eax.split.num_counters;
561 
562 	apic_write(APIC_LVTPC, PC_VECTOR);
563 
564 	check_counters();
565 
566 	if (rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES) {
567 		gp_counter_base = MSR_IA32_PMC0;
568 		report_prefix_push("full-width writes");
569 		check_counters();
570 		check_gp_counters_write_width();
571 	}
572 
573 	return report_summary();
574 }
575