xref: /kvm-unit-tests/x86/pmu.c (revision 4c8a99ca02252d4a2bee43f4558fe47ce5ab7ec0)
1 
2 #include "x86/msr.h"
3 #include "x86/processor.h"
4 #include "x86/apic-defs.h"
5 #include "x86/apic.h"
6 #include "x86/desc.h"
7 #include "x86/isr.h"
8 #include "alloc.h"
9 
10 #include "libcflat.h"
11 #include <stdint.h>
12 
13 #define FIXED_CNT_INDEX 32
14 #define PC_VECTOR	32
15 
16 #define EVNSEL_EVENT_SHIFT	0
17 #define EVNTSEL_UMASK_SHIFT	8
18 #define EVNTSEL_USR_SHIFT	16
19 #define EVNTSEL_OS_SHIFT	17
20 #define EVNTSEL_EDGE_SHIFT	18
21 #define EVNTSEL_PC_SHIFT	19
22 #define EVNTSEL_INT_SHIFT	20
23 #define EVNTSEL_EN_SHIF		22
24 #define EVNTSEL_INV_SHIF	23
25 #define EVNTSEL_CMASK_SHIFT	24
26 
27 #define EVNTSEL_EN	(1 << EVNTSEL_EN_SHIF)
28 #define EVNTSEL_USR	(1 << EVNTSEL_USR_SHIFT)
29 #define EVNTSEL_OS	(1 << EVNTSEL_OS_SHIFT)
30 #define EVNTSEL_PC	(1 << EVNTSEL_PC_SHIFT)
31 #define EVNTSEL_INT	(1 << EVNTSEL_INT_SHIFT)
32 #define EVNTSEL_INV	(1 << EVNTSEL_INV_SHIF)
33 
34 #define N 1000000
35 
36 // These values match the number of instructions and branches in the
37 // assembly block in check_emulated_instr().
38 #define EXPECTED_INSTR 17
39 #define EXPECTED_BRNCH 5
40 
41 typedef struct {
42 	uint32_t ctr;
43 	uint32_t config;
44 	uint64_t count;
45 	int idx;
46 } pmu_counter_t;
47 
48 struct pmu_event {
49 	const char *name;
50 	uint32_t unit_sel;
51 	int min;
52 	int max;
53 } gp_events[] = {
54 	{"core cycles", 0x003c, 1*N, 50*N},
55 	{"instructions", 0x00c0, 10*N, 10.2*N},
56 	{"ref cycles", 0x013c, 1*N, 30*N},
57 	{"llc references", 0x4f2e, 1, 2*N},
58 	{"llc misses", 0x412e, 1, 1*N},
59 	{"branches", 0x00c4, 1*N, 1.1*N},
60 	{"branch misses", 0x00c5, 0, 0.1*N},
61 }, fixed_events[] = {
62 	{"fixed 1", MSR_CORE_PERF_FIXED_CTR0, 10*N, 10.2*N},
63 	{"fixed 2", MSR_CORE_PERF_FIXED_CTR0 + 1, 1*N, 30*N},
64 	{"fixed 3", MSR_CORE_PERF_FIXED_CTR0 + 2, 0.1*N, 30*N}
65 };
66 
67 #define PMU_CAP_FW_WRITES	(1ULL << 13)
68 static u64 gp_counter_base = MSR_IA32_PERFCTR0;
69 
70 char *buf;
71 
72 static inline void loop(void)
73 {
74 	unsigned long tmp, tmp2, tmp3;
75 
76 	asm volatile("1: mov (%1), %2; add $64, %1; nop; nop; nop; nop; nop; nop; nop; loop 1b"
77 			: "=c"(tmp), "=r"(tmp2), "=r"(tmp3): "0"(N), "1"(buf));
78 
79 }
80 
81 volatile uint64_t irq_received;
82 
83 static void cnt_overflow(isr_regs_t *regs)
84 {
85 	irq_received++;
86 	apic_write(APIC_EOI, 0);
87 }
88 
89 static bool check_irq(void)
90 {
91 	int i;
92 	irq_received = 0;
93 	irq_enable();
94 	for (i = 0; i < 100000 && !irq_received; i++)
95 		asm volatile("pause");
96 	irq_disable();
97 	return irq_received;
98 }
99 
100 static bool is_gp(pmu_counter_t *evt)
101 {
102 	return evt->ctr < MSR_CORE_PERF_FIXED_CTR0 ||
103 		evt->ctr >= MSR_IA32_PMC0;
104 }
105 
106 static int event_to_global_idx(pmu_counter_t *cnt)
107 {
108 	return cnt->ctr - (is_gp(cnt) ? gp_counter_base :
109 		(MSR_CORE_PERF_FIXED_CTR0 - FIXED_CNT_INDEX));
110 }
111 
112 static struct pmu_event* get_counter_event(pmu_counter_t *cnt)
113 {
114 	if (is_gp(cnt)) {
115 		int i;
116 
117 		for (i = 0; i < sizeof(gp_events)/sizeof(gp_events[0]); i++)
118 			if (gp_events[i].unit_sel == (cnt->config & 0xffff))
119 				return &gp_events[i];
120 	} else
121 		return &fixed_events[cnt->ctr - MSR_CORE_PERF_FIXED_CTR0];
122 
123 	return (void*)0;
124 }
125 
126 static void global_enable(pmu_counter_t *cnt)
127 {
128 	cnt->idx = event_to_global_idx(cnt);
129 
130 	wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, rdmsr(MSR_CORE_PERF_GLOBAL_CTRL) |
131 			(1ull << cnt->idx));
132 }
133 
134 static void global_disable(pmu_counter_t *cnt)
135 {
136 	wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, rdmsr(MSR_CORE_PERF_GLOBAL_CTRL) &
137 			~(1ull << cnt->idx));
138 }
139 
140 
141 static void start_event(pmu_counter_t *evt)
142 {
143     wrmsr(evt->ctr, evt->count);
144     if (is_gp(evt))
145 	    wrmsr(MSR_P6_EVNTSEL0 + event_to_global_idx(evt),
146 			    evt->config | EVNTSEL_EN);
147     else {
148 	    uint32_t ctrl = rdmsr(MSR_CORE_PERF_FIXED_CTR_CTRL);
149 	    int shift = (evt->ctr - MSR_CORE_PERF_FIXED_CTR0) * 4;
150 	    uint32_t usrospmi = 0;
151 
152 	    if (evt->config & EVNTSEL_OS)
153 		    usrospmi |= (1 << 0);
154 	    if (evt->config & EVNTSEL_USR)
155 		    usrospmi |= (1 << 1);
156 	    if (evt->config & EVNTSEL_INT)
157 		    usrospmi |= (1 << 3); // PMI on overflow
158 	    ctrl = (ctrl & ~(0xf << shift)) | (usrospmi << shift);
159 	    wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, ctrl);
160     }
161     global_enable(evt);
162     apic_write(APIC_LVTPC, PC_VECTOR);
163 }
164 
165 static void stop_event(pmu_counter_t *evt)
166 {
167 	global_disable(evt);
168 	if (is_gp(evt))
169 		wrmsr(MSR_P6_EVNTSEL0 + event_to_global_idx(evt),
170 				evt->config & ~EVNTSEL_EN);
171 	else {
172 		uint32_t ctrl = rdmsr(MSR_CORE_PERF_FIXED_CTR_CTRL);
173 		int shift = (evt->ctr - MSR_CORE_PERF_FIXED_CTR0) * 4;
174 		wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, ctrl & ~(0xf << shift));
175 	}
176 	evt->count = rdmsr(evt->ctr);
177 }
178 
179 static void measure(pmu_counter_t *evt, int count)
180 {
181 	int i;
182 	for (i = 0; i < count; i++)
183 		start_event(&evt[i]);
184 	loop();
185 	for (i = 0; i < count; i++)
186 		stop_event(&evt[i]);
187 }
188 
189 static bool verify_event(uint64_t count, struct pmu_event *e)
190 {
191 	// printf("%d <= %ld <= %d\n", e->min, count, e->max);
192 	return count >= e->min  && count <= e->max;
193 
194 }
195 
196 static bool verify_counter(pmu_counter_t *cnt)
197 {
198 	return verify_event(cnt->count, get_counter_event(cnt));
199 }
200 
201 static void check_gp_counter(struct pmu_event *evt)
202 {
203 	int nr_gp_counters = pmu_nr_gp_counters();
204 	pmu_counter_t cnt = {
205 		.ctr = gp_counter_base,
206 		.config = EVNTSEL_OS | EVNTSEL_USR | evt->unit_sel,
207 	};
208 	int i;
209 
210 	for (i = 0; i < nr_gp_counters; i++, cnt.ctr++) {
211 		cnt.count = 0;
212 		measure(&cnt, 1);
213 		report(verify_event(cnt.count, evt), "%s-%d", evt->name, i);
214 	}
215 }
216 
217 static void check_gp_counters(void)
218 {
219 	int i;
220 
221 	for (i = 0; i < sizeof(gp_events)/sizeof(gp_events[0]); i++)
222 		if (pmu_gp_counter_is_available(i))
223 			check_gp_counter(&gp_events[i]);
224 		else
225 			printf("GP event '%s' is disabled\n",
226 					gp_events[i].name);
227 }
228 
229 static void check_fixed_counters(void)
230 {
231 	int nr_fixed_counters = pmu_nr_fixed_counters();
232 	pmu_counter_t cnt = {
233 		.config = EVNTSEL_OS | EVNTSEL_USR,
234 	};
235 	int i;
236 
237 	for (i = 0; i < nr_fixed_counters; i++) {
238 		cnt.count = 0;
239 		cnt.ctr = fixed_events[i].unit_sel;
240 		measure(&cnt, 1);
241 		report(verify_event(cnt.count, &fixed_events[i]), "fixed-%d", i);
242 	}
243 }
244 
245 static void check_counters_many(void)
246 {
247 	int nr_fixed_counters = pmu_nr_fixed_counters();
248 	int nr_gp_counters = pmu_nr_gp_counters();
249 	pmu_counter_t cnt[10];
250 	int i, n;
251 
252 	for (i = 0, n = 0; n < nr_gp_counters; i++) {
253 		if (!pmu_gp_counter_is_available(i))
254 			continue;
255 
256 		cnt[n].count = 0;
257 		cnt[n].ctr = gp_counter_base + n;
258 		cnt[n].config = EVNTSEL_OS | EVNTSEL_USR |
259 			gp_events[i % ARRAY_SIZE(gp_events)].unit_sel;
260 		n++;
261 	}
262 	for (i = 0; i < nr_fixed_counters; i++) {
263 		cnt[n].count = 0;
264 		cnt[n].ctr = fixed_events[i].unit_sel;
265 		cnt[n].config = EVNTSEL_OS | EVNTSEL_USR;
266 		n++;
267 	}
268 
269 	measure(cnt, n);
270 
271 	for (i = 0; i < n; i++)
272 		if (!verify_counter(&cnt[i]))
273 			break;
274 
275 	report(i == n, "all counters");
276 }
277 
278 static void check_counter_overflow(void)
279 {
280 	int nr_gp_counters = pmu_nr_gp_counters();
281 	uint64_t count;
282 	int i;
283 	pmu_counter_t cnt = {
284 		.ctr = gp_counter_base,
285 		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel /* instructions */,
286 		.count = 0,
287 	};
288 	measure(&cnt, 1);
289 	count = cnt.count;
290 
291 	/* clear status before test */
292 	wrmsr(MSR_CORE_PERF_GLOBAL_OVF_CTRL, rdmsr(MSR_CORE_PERF_GLOBAL_STATUS));
293 
294 	report_prefix_push("overflow");
295 
296 	for (i = 0; i < nr_gp_counters + 1; i++, cnt.ctr++) {
297 		uint64_t status;
298 		int idx;
299 
300 		cnt.count = 1 - count;
301 		if (gp_counter_base == MSR_IA32_PMC0)
302 			cnt.count &= (1ull << pmu_gp_counter_width()) - 1;
303 
304 		if (i == nr_gp_counters) {
305 			cnt.ctr = fixed_events[0].unit_sel;
306 			cnt.count &= (1ull << pmu_fixed_counter_width()) - 1;
307 		}
308 
309 		if (i % 2)
310 			cnt.config |= EVNTSEL_INT;
311 		else
312 			cnt.config &= ~EVNTSEL_INT;
313 		idx = event_to_global_idx(&cnt);
314 		measure(&cnt, 1);
315 		report(cnt.count == 1, "cntr-%d", i);
316 		status = rdmsr(MSR_CORE_PERF_GLOBAL_STATUS);
317 		report(status & (1ull << idx), "status-%d", i);
318 		wrmsr(MSR_CORE_PERF_GLOBAL_OVF_CTRL, status);
319 		status = rdmsr(MSR_CORE_PERF_GLOBAL_STATUS);
320 		report(!(status & (1ull << idx)), "status clear-%d", i);
321 		report(check_irq() == (i % 2), "irq-%d", i);
322 	}
323 
324 	report_prefix_pop();
325 }
326 
327 static void check_gp_counter_cmask(void)
328 {
329 	pmu_counter_t cnt = {
330 		.ctr = gp_counter_base,
331 		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel /* instructions */,
332 		.count = 0,
333 	};
334 	cnt.config |= (0x2 << EVNTSEL_CMASK_SHIFT);
335 	measure(&cnt, 1);
336 	report(cnt.count < gp_events[1].min, "cmask");
337 }
338 
339 static void do_rdpmc_fast(void *ptr)
340 {
341 	pmu_counter_t *cnt = ptr;
342 	uint32_t idx = (uint32_t)cnt->idx | (1u << 31);
343 
344 	if (!is_gp(cnt))
345 		idx |= 1 << 30;
346 
347 	cnt->count = rdpmc(idx);
348 }
349 
350 
351 static void check_rdpmc(void)
352 {
353 	int fixed_counter_width = pmu_fixed_counter_width();
354 	int nr_fixed_counters = pmu_nr_fixed_counters();
355 	u8 gp_counter_width = pmu_gp_counter_width();
356 	int nr_gp_counters = pmu_nr_gp_counters();
357 	uint64_t val = 0xff0123456789ull;
358 	bool exc;
359 	int i;
360 
361 	report_prefix_push("rdpmc");
362 
363 	for (i = 0; i < nr_gp_counters; i++) {
364 		uint64_t x;
365 		pmu_counter_t cnt = {
366 			.ctr = gp_counter_base + i,
367 			.idx = i
368 		};
369 
370 	        /*
371 	         * Without full-width writes, only the low 32 bits are writable,
372 	         * and the value is sign-extended.
373 	         */
374 		if (gp_counter_base == MSR_IA32_PERFCTR0)
375 			x = (uint64_t)(int64_t)(int32_t)val;
376 		else
377 			x = (uint64_t)(int64_t)val;
378 
379 		/* Mask according to the number of supported bits */
380 		x &= (1ull << gp_counter_width) - 1;
381 
382 		wrmsr(gp_counter_base + i, val);
383 		report(rdpmc(i) == x, "cntr-%d", i);
384 
385 		exc = test_for_exception(GP_VECTOR, do_rdpmc_fast, &cnt);
386 		if (exc)
387 			report_skip("fast-%d", i);
388 		else
389 			report(cnt.count == (u32)val, "fast-%d", i);
390 	}
391 	for (i = 0; i < nr_fixed_counters; i++) {
392 		uint64_t x = val & ((1ull << fixed_counter_width) - 1);
393 		pmu_counter_t cnt = {
394 			.ctr = MSR_CORE_PERF_FIXED_CTR0 + i,
395 			.idx = i
396 		};
397 
398 		wrmsr(MSR_CORE_PERF_FIXED_CTR0 + i, x);
399 		report(rdpmc(i | (1 << 30)) == x, "fixed cntr-%d", i);
400 
401 		exc = test_for_exception(GP_VECTOR, do_rdpmc_fast, &cnt);
402 		if (exc)
403 			report_skip("fixed fast-%d", i);
404 		else
405 			report(cnt.count == (u32)x, "fixed fast-%d", i);
406 	}
407 
408 	report_prefix_pop();
409 }
410 
411 static void check_running_counter_wrmsr(void)
412 {
413 	uint64_t status;
414 	uint64_t count;
415 	pmu_counter_t evt = {
416 		.ctr = gp_counter_base,
417 		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel,
418 		.count = 0,
419 	};
420 
421 	report_prefix_push("running counter wrmsr");
422 
423 	start_event(&evt);
424 	loop();
425 	wrmsr(gp_counter_base, 0);
426 	stop_event(&evt);
427 	report(evt.count < gp_events[1].min, "cntr");
428 
429 	/* clear status before overflow test */
430 	wrmsr(MSR_CORE_PERF_GLOBAL_OVF_CTRL,
431 	      rdmsr(MSR_CORE_PERF_GLOBAL_STATUS));
432 
433 	evt.count = 0;
434 	start_event(&evt);
435 
436 	count = -1;
437 	if (gp_counter_base == MSR_IA32_PMC0)
438 		count &= (1ull << pmu_gp_counter_width()) - 1;
439 
440 	wrmsr(gp_counter_base, count);
441 
442 	loop();
443 	stop_event(&evt);
444 	status = rdmsr(MSR_CORE_PERF_GLOBAL_STATUS);
445 	report(status & 1, "status");
446 
447 	report_prefix_pop();
448 }
449 
450 static void check_emulated_instr(void)
451 {
452 	uint64_t status, instr_start, brnch_start;
453 	pmu_counter_t brnch_cnt = {
454 		.ctr = MSR_IA32_PERFCTR0,
455 		/* branch instructions */
456 		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[5].unit_sel,
457 		.count = 0,
458 	};
459 	pmu_counter_t instr_cnt = {
460 		.ctr = MSR_IA32_PERFCTR0 + 1,
461 		/* instructions */
462 		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel,
463 		.count = 0,
464 	};
465 	report_prefix_push("emulated instruction");
466 
467 	wrmsr(MSR_CORE_PERF_GLOBAL_OVF_CTRL,
468 	      rdmsr(MSR_CORE_PERF_GLOBAL_STATUS));
469 
470 	start_event(&brnch_cnt);
471 	start_event(&instr_cnt);
472 
473 	brnch_start = -EXPECTED_BRNCH;
474 	instr_start = -EXPECTED_INSTR;
475 	wrmsr(MSR_IA32_PERFCTR0, brnch_start);
476 	wrmsr(MSR_IA32_PERFCTR0 + 1, instr_start);
477 	// KVM_FEP is a magic prefix that forces emulation so
478 	// 'KVM_FEP "jne label\n"' just counts as a single instruction.
479 	asm volatile(
480 		"mov $0x0, %%eax\n"
481 		"cmp $0x0, %%eax\n"
482 		KVM_FEP "jne label\n"
483 		KVM_FEP "jne label\n"
484 		KVM_FEP "jne label\n"
485 		KVM_FEP "jne label\n"
486 		KVM_FEP "jne label\n"
487 		"mov $0xa, %%eax\n"
488 		"cpuid\n"
489 		"mov $0xa, %%eax\n"
490 		"cpuid\n"
491 		"mov $0xa, %%eax\n"
492 		"cpuid\n"
493 		"mov $0xa, %%eax\n"
494 		"cpuid\n"
495 		"mov $0xa, %%eax\n"
496 		"cpuid\n"
497 		"label:\n"
498 		:
499 		:
500 		: "eax", "ebx", "ecx", "edx");
501 
502 	wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
503 
504 	stop_event(&brnch_cnt);
505 	stop_event(&instr_cnt);
506 
507 	// Check that the end count - start count is at least the expected
508 	// number of instructions and branches.
509 	report(instr_cnt.count - instr_start >= EXPECTED_INSTR,
510 	       "instruction count");
511 	report(brnch_cnt.count - brnch_start >= EXPECTED_BRNCH,
512 	       "branch count");
513 	// Additionally check that those counters overflowed properly.
514 	status = rdmsr(MSR_CORE_PERF_GLOBAL_STATUS);
515 	report(status & 1, "instruction counter overflow");
516 	report(status & 2, "branch counter overflow");
517 
518 	report_prefix_pop();
519 }
520 
521 static void check_counters(void)
522 {
523 	check_gp_counters();
524 	check_fixed_counters();
525 	check_rdpmc();
526 	check_counters_many();
527 	check_counter_overflow();
528 	check_gp_counter_cmask();
529 	check_running_counter_wrmsr();
530 }
531 
532 static void do_unsupported_width_counter_write(void *index)
533 {
534 	wrmsr(MSR_IA32_PMC0 + *((int *) index), 0xffffff0123456789ull);
535 }
536 
537 static void check_gp_counters_write_width(void)
538 {
539 	u64 val_64 = 0xffffff0123456789ull;
540 	u64 val_32 = val_64 & ((1ull << 32) - 1);
541 	u64 val_max_width = val_64 & ((1ull << pmu_gp_counter_width()) - 1);
542 	int nr_gp_counters = pmu_nr_gp_counters();
543 	int i;
544 
545 	/*
546 	 * MSR_IA32_PERFCTRn supports 64-bit writes,
547 	 * but only the lowest 32 bits are valid.
548 	 */
549 	for (i = 0; i < nr_gp_counters; i++) {
550 		wrmsr(MSR_IA32_PERFCTR0 + i, val_32);
551 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32);
552 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_32);
553 
554 		wrmsr(MSR_IA32_PERFCTR0 + i, val_max_width);
555 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32);
556 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_32);
557 
558 		wrmsr(MSR_IA32_PERFCTR0 + i, val_64);
559 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32);
560 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_32);
561 	}
562 
563 	/*
564 	 * MSR_IA32_PMCn supports writing values up to GP counter width,
565 	 * and only the lowest bits of GP counter width are valid.
566 	 */
567 	for (i = 0; i < nr_gp_counters; i++) {
568 		wrmsr(MSR_IA32_PMC0 + i, val_32);
569 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_32);
570 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32);
571 
572 		wrmsr(MSR_IA32_PMC0 + i, val_max_width);
573 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_max_width);
574 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_max_width);
575 
576 		report(test_for_exception(GP_VECTOR,
577 			do_unsupported_width_counter_write, &i),
578 		"writing unsupported width to MSR_IA32_PMC%d raises #GP", i);
579 	}
580 }
581 
582 /*
583  * Per the SDM, reference cycles are currently implemented using the
584  * core crystal clock, TSC, or bus clock. Calibrate to the TSC
585  * frequency to set reasonable expectations.
586  */
587 static void set_ref_cycle_expectations(void)
588 {
589 	pmu_counter_t cnt = {
590 		.ctr = MSR_IA32_PERFCTR0,
591 		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[2].unit_sel,
592 		.count = 0,
593 	};
594 	uint64_t tsc_delta;
595 	uint64_t t0, t1, t2, t3;
596 
597 	/* Bit 2 enumerates the availability of reference cycles events. */
598 	if (!pmu_nr_gp_counters() || !pmu_gp_counter_is_available(2))
599 		return;
600 
601 	wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
602 
603 	t0 = fenced_rdtsc();
604 	start_event(&cnt);
605 	t1 = fenced_rdtsc();
606 
607 	/*
608 	 * This loop has to run long enough to dominate the VM-exit
609 	 * costs for playing with the PMU MSRs on start and stop.
610 	 *
611 	 * On a 2.6GHz Ice Lake, with the TSC frequency at 104 times
612 	 * the core crystal clock, this function calculated a guest
613 	 * TSC : ref cycles ratio of around 105 with ECX initialized
614 	 * to one billion.
615 	 */
616 	asm volatile("loop ." : "+c"((int){1000000000ull}));
617 
618 	t2 = fenced_rdtsc();
619 	stop_event(&cnt);
620 	t3 = fenced_rdtsc();
621 
622 	tsc_delta = ((t2 - t1) + (t3 - t0)) / 2;
623 
624 	if (!tsc_delta)
625 		return;
626 
627 	gp_events[2].min = (gp_events[2].min * cnt.count) / tsc_delta;
628 	gp_events[2].max = (gp_events[2].max * cnt.count) / tsc_delta;
629 }
630 
631 int main(int ac, char **av)
632 {
633 	setup_vm();
634 	handle_irq(PC_VECTOR, cnt_overflow);
635 	buf = malloc(N*64);
636 
637 	if (!pmu_version()) {
638 		report_skip("No pmu is detected!");
639 		return report_summary();
640 	}
641 
642 	if (pmu_version() == 1) {
643 		report_skip("PMU version 1 is not supported.");
644 		return report_summary();
645 	}
646 
647 	set_ref_cycle_expectations();
648 
649 	printf("PMU version:         %d\n", pmu_version());
650 	printf("GP counters:         %d\n", pmu_nr_gp_counters());
651 	printf("GP counter width:    %d\n", pmu_gp_counter_width());
652 	printf("Mask length:         %d\n", pmu_gp_counter_mask_length());
653 	printf("Fixed counters:      %d\n", pmu_nr_fixed_counters());
654 	printf("Fixed counter width: %d\n", pmu_fixed_counter_width());
655 
656 	apic_write(APIC_LVTPC, PC_VECTOR);
657 
658 	if (is_fep_available())
659 		check_emulated_instr();
660 
661 	check_counters();
662 
663 	if (rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES) {
664 		gp_counter_base = MSR_IA32_PMC0;
665 		report_prefix_push("full-width writes");
666 		check_counters();
667 		check_gp_counters_write_width();
668 	}
669 
670 	return report_summary();
671 }
672