xref: /kvm-unit-tests/x86/pmu.c (revision 2719b92cb09a1ad90e7944ff4e14528ce1646a71)
1 
2 #include "x86/msr.h"
3 #include "x86/processor.h"
4 #include "x86/apic-defs.h"
5 #include "x86/apic.h"
6 #include "x86/desc.h"
7 #include "x86/isr.h"
8 #include "alloc.h"
9 
10 #include "libcflat.h"
11 #include <stdint.h>
12 
13 #define FIXED_CNT_INDEX 32
14 #define PC_VECTOR	32
15 
16 #define EVNSEL_EVENT_SHIFT	0
17 #define EVNTSEL_UMASK_SHIFT	8
18 #define EVNTSEL_USR_SHIFT	16
19 #define EVNTSEL_OS_SHIFT	17
20 #define EVNTSEL_EDGE_SHIFT	18
21 #define EVNTSEL_PC_SHIFT	19
22 #define EVNTSEL_INT_SHIFT	20
23 #define EVNTSEL_EN_SHIF		22
24 #define EVNTSEL_INV_SHIF	23
25 #define EVNTSEL_CMASK_SHIFT	24
26 
27 #define EVNTSEL_EN	(1 << EVNTSEL_EN_SHIF)
28 #define EVNTSEL_USR	(1 << EVNTSEL_USR_SHIFT)
29 #define EVNTSEL_OS	(1 << EVNTSEL_OS_SHIFT)
30 #define EVNTSEL_PC	(1 << EVNTSEL_PC_SHIFT)
31 #define EVNTSEL_INT	(1 << EVNTSEL_INT_SHIFT)
32 #define EVNTSEL_INV	(1 << EVNTSEL_INV_SHIF)
33 
34 #define N 1000000
35 
36 #define KVM_FEP "ud2; .byte 'k', 'v', 'm';"
37 // These values match the number of instructions and branches in the
38 // assembly block in check_emulated_instr().
39 #define EXPECTED_INSTR 17
40 #define EXPECTED_BRNCH 5
41 
42 typedef struct {
43 	uint32_t ctr;
44 	uint32_t config;
45 	uint64_t count;
46 	int idx;
47 } pmu_counter_t;
48 
49 struct pmu_event {
50 	const char *name;
51 	uint32_t unit_sel;
52 	int min;
53 	int max;
54 } gp_events[] = {
55 	{"core cycles", 0x003c, 1*N, 50*N},
56 	{"instructions", 0x00c0, 10*N, 10.2*N},
57 	{"ref cycles", 0x013c, 1*N, 30*N},
58 	{"llc references", 0x4f2e, 1, 2*N},
59 	{"llc misses", 0x412e, 1, 1*N},
60 	{"branches", 0x00c4, 1*N, 1.1*N},
61 	{"branch misses", 0x00c5, 0, 0.1*N},
62 }, fixed_events[] = {
63 	{"fixed 1", MSR_CORE_PERF_FIXED_CTR0, 10*N, 10.2*N},
64 	{"fixed 2", MSR_CORE_PERF_FIXED_CTR0 + 1, 1*N, 30*N},
65 	{"fixed 3", MSR_CORE_PERF_FIXED_CTR0 + 2, 0.1*N, 30*N}
66 };
67 
68 #define PMU_CAP_FW_WRITES	(1ULL << 13)
69 static u64 gp_counter_base = MSR_IA32_PERFCTR0;
70 
71 char *buf;
72 
73 static inline void loop(void)
74 {
75 	unsigned long tmp, tmp2, tmp3;
76 
77 	asm volatile("1: mov (%1), %2; add $64, %1; nop; nop; nop; nop; nop; nop; nop; loop 1b"
78 			: "=c"(tmp), "=r"(tmp2), "=r"(tmp3): "0"(N), "1"(buf));
79 
80 }
81 
82 volatile uint64_t irq_received;
83 
84 static void cnt_overflow(isr_regs_t *regs)
85 {
86 	irq_received++;
87 	apic_write(APIC_EOI, 0);
88 }
89 
90 static bool check_irq(void)
91 {
92 	int i;
93 	irq_received = 0;
94 	irq_enable();
95 	for (i = 0; i < 100000 && !irq_received; i++)
96 		asm volatile("pause");
97 	irq_disable();
98 	return irq_received;
99 }
100 
101 static bool is_gp(pmu_counter_t *evt)
102 {
103 	return evt->ctr < MSR_CORE_PERF_FIXED_CTR0 ||
104 		evt->ctr >= MSR_IA32_PMC0;
105 }
106 
107 static int event_to_global_idx(pmu_counter_t *cnt)
108 {
109 	return cnt->ctr - (is_gp(cnt) ? gp_counter_base :
110 		(MSR_CORE_PERF_FIXED_CTR0 - FIXED_CNT_INDEX));
111 }
112 
113 static struct pmu_event* get_counter_event(pmu_counter_t *cnt)
114 {
115 	if (is_gp(cnt)) {
116 		int i;
117 
118 		for (i = 0; i < sizeof(gp_events)/sizeof(gp_events[0]); i++)
119 			if (gp_events[i].unit_sel == (cnt->config & 0xffff))
120 				return &gp_events[i];
121 	} else
122 		return &fixed_events[cnt->ctr - MSR_CORE_PERF_FIXED_CTR0];
123 
124 	return (void*)0;
125 }
126 
127 static void global_enable(pmu_counter_t *cnt)
128 {
129 	cnt->idx = event_to_global_idx(cnt);
130 
131 	wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, rdmsr(MSR_CORE_PERF_GLOBAL_CTRL) |
132 			(1ull << cnt->idx));
133 }
134 
135 static void global_disable(pmu_counter_t *cnt)
136 {
137 	wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, rdmsr(MSR_CORE_PERF_GLOBAL_CTRL) &
138 			~(1ull << cnt->idx));
139 }
140 
141 
142 static void start_event(pmu_counter_t *evt)
143 {
144     wrmsr(evt->ctr, evt->count);
145     if (is_gp(evt))
146 	    wrmsr(MSR_P6_EVNTSEL0 + event_to_global_idx(evt),
147 			    evt->config | EVNTSEL_EN);
148     else {
149 	    uint32_t ctrl = rdmsr(MSR_CORE_PERF_FIXED_CTR_CTRL);
150 	    int shift = (evt->ctr - MSR_CORE_PERF_FIXED_CTR0) * 4;
151 	    uint32_t usrospmi = 0;
152 
153 	    if (evt->config & EVNTSEL_OS)
154 		    usrospmi |= (1 << 0);
155 	    if (evt->config & EVNTSEL_USR)
156 		    usrospmi |= (1 << 1);
157 	    if (evt->config & EVNTSEL_INT)
158 		    usrospmi |= (1 << 3); // PMI on overflow
159 	    ctrl = (ctrl & ~(0xf << shift)) | (usrospmi << shift);
160 	    wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, ctrl);
161     }
162     global_enable(evt);
163     apic_write(APIC_LVTPC, PC_VECTOR);
164 }
165 
166 static void stop_event(pmu_counter_t *evt)
167 {
168 	global_disable(evt);
169 	if (is_gp(evt))
170 		wrmsr(MSR_P6_EVNTSEL0 + event_to_global_idx(evt),
171 				evt->config & ~EVNTSEL_EN);
172 	else {
173 		uint32_t ctrl = rdmsr(MSR_CORE_PERF_FIXED_CTR_CTRL);
174 		int shift = (evt->ctr - MSR_CORE_PERF_FIXED_CTR0) * 4;
175 		wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, ctrl & ~(0xf << shift));
176 	}
177 	evt->count = rdmsr(evt->ctr);
178 }
179 
180 static void measure(pmu_counter_t *evt, int count)
181 {
182 	int i;
183 	for (i = 0; i < count; i++)
184 		start_event(&evt[i]);
185 	loop();
186 	for (i = 0; i < count; i++)
187 		stop_event(&evt[i]);
188 }
189 
190 static bool verify_event(uint64_t count, struct pmu_event *e)
191 {
192 	// printf("%d <= %ld <= %d\n", e->min, count, e->max);
193 	return count >= e->min  && count <= e->max;
194 
195 }
196 
197 static bool verify_counter(pmu_counter_t *cnt)
198 {
199 	return verify_event(cnt->count, get_counter_event(cnt));
200 }
201 
202 static void check_gp_counter(struct pmu_event *evt)
203 {
204 	int nr_gp_counters = pmu_nr_gp_counters();
205 	pmu_counter_t cnt = {
206 		.ctr = gp_counter_base,
207 		.config = EVNTSEL_OS | EVNTSEL_USR | evt->unit_sel,
208 	};
209 	int i;
210 
211 	for (i = 0; i < nr_gp_counters; i++, cnt.ctr++) {
212 		cnt.count = 0;
213 		measure(&cnt, 1);
214 		report(verify_event(cnt.count, evt), "%s-%d", evt->name, i);
215 	}
216 }
217 
218 static void check_gp_counters(void)
219 {
220 	int i;
221 
222 	for (i = 0; i < sizeof(gp_events)/sizeof(gp_events[0]); i++)
223 		if (pmu_gp_counter_is_available(i))
224 			check_gp_counter(&gp_events[i]);
225 		else
226 			printf("GP event '%s' is disabled\n",
227 					gp_events[i].name);
228 }
229 
230 static void check_fixed_counters(void)
231 {
232 	int nr_fixed_counters = pmu_nr_fixed_counters();
233 	pmu_counter_t cnt = {
234 		.config = EVNTSEL_OS | EVNTSEL_USR,
235 	};
236 	int i;
237 
238 	for (i = 0; i < nr_fixed_counters; i++) {
239 		cnt.count = 0;
240 		cnt.ctr = fixed_events[i].unit_sel;
241 		measure(&cnt, 1);
242 		report(verify_event(cnt.count, &fixed_events[i]), "fixed-%d", i);
243 	}
244 }
245 
246 static void check_counters_many(void)
247 {
248 	int nr_fixed_counters = pmu_nr_fixed_counters();
249 	int nr_gp_counters = pmu_nr_gp_counters();
250 	pmu_counter_t cnt[10];
251 	int i, n;
252 
253 	for (i = 0, n = 0; n < nr_gp_counters; i++) {
254 		if (!pmu_gp_counter_is_available(i))
255 			continue;
256 
257 		cnt[n].count = 0;
258 		cnt[n].ctr = gp_counter_base + n;
259 		cnt[n].config = EVNTSEL_OS | EVNTSEL_USR |
260 			gp_events[i % ARRAY_SIZE(gp_events)].unit_sel;
261 		n++;
262 	}
263 	for (i = 0; i < nr_fixed_counters; i++) {
264 		cnt[n].count = 0;
265 		cnt[n].ctr = fixed_events[i].unit_sel;
266 		cnt[n].config = EVNTSEL_OS | EVNTSEL_USR;
267 		n++;
268 	}
269 
270 	measure(cnt, n);
271 
272 	for (i = 0; i < n; i++)
273 		if (!verify_counter(&cnt[i]))
274 			break;
275 
276 	report(i == n, "all counters");
277 }
278 
279 static void check_counter_overflow(void)
280 {
281 	int nr_gp_counters = pmu_nr_gp_counters();
282 	uint64_t count;
283 	int i;
284 	pmu_counter_t cnt = {
285 		.ctr = gp_counter_base,
286 		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel /* instructions */,
287 		.count = 0,
288 	};
289 	measure(&cnt, 1);
290 	count = cnt.count;
291 
292 	/* clear status before test */
293 	wrmsr(MSR_CORE_PERF_GLOBAL_OVF_CTRL, rdmsr(MSR_CORE_PERF_GLOBAL_STATUS));
294 
295 	report_prefix_push("overflow");
296 
297 	for (i = 0; i < nr_gp_counters + 1; i++, cnt.ctr++) {
298 		uint64_t status;
299 		int idx;
300 
301 		cnt.count = 1 - count;
302 		if (gp_counter_base == MSR_IA32_PMC0)
303 			cnt.count &= (1ull << pmu_gp_counter_width()) - 1;
304 
305 		if (i == nr_gp_counters) {
306 			cnt.ctr = fixed_events[0].unit_sel;
307 			cnt.count &= (1ull << pmu_fixed_counter_width()) - 1;
308 		}
309 
310 		if (i % 2)
311 			cnt.config |= EVNTSEL_INT;
312 		else
313 			cnt.config &= ~EVNTSEL_INT;
314 		idx = event_to_global_idx(&cnt);
315 		measure(&cnt, 1);
316 		report(cnt.count == 1, "cntr-%d", i);
317 		status = rdmsr(MSR_CORE_PERF_GLOBAL_STATUS);
318 		report(status & (1ull << idx), "status-%d", i);
319 		wrmsr(MSR_CORE_PERF_GLOBAL_OVF_CTRL, status);
320 		status = rdmsr(MSR_CORE_PERF_GLOBAL_STATUS);
321 		report(!(status & (1ull << idx)), "status clear-%d", i);
322 		report(check_irq() == (i % 2), "irq-%d", i);
323 	}
324 
325 	report_prefix_pop();
326 }
327 
328 static void check_gp_counter_cmask(void)
329 {
330 	pmu_counter_t cnt = {
331 		.ctr = gp_counter_base,
332 		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel /* instructions */,
333 		.count = 0,
334 	};
335 	cnt.config |= (0x2 << EVNTSEL_CMASK_SHIFT);
336 	measure(&cnt, 1);
337 	report(cnt.count < gp_events[1].min, "cmask");
338 }
339 
340 static void do_rdpmc_fast(void *ptr)
341 {
342 	pmu_counter_t *cnt = ptr;
343 	uint32_t idx = (uint32_t)cnt->idx | (1u << 31);
344 
345 	if (!is_gp(cnt))
346 		idx |= 1 << 30;
347 
348 	cnt->count = rdpmc(idx);
349 }
350 
351 
352 static void check_rdpmc(void)
353 {
354 	int fixed_counter_width = pmu_fixed_counter_width();
355 	int nr_fixed_counters = pmu_nr_fixed_counters();
356 	u8 gp_counter_width = pmu_gp_counter_width();
357 	int nr_gp_counters = pmu_nr_gp_counters();
358 	uint64_t val = 0xff0123456789ull;
359 	bool exc;
360 	int i;
361 
362 	report_prefix_push("rdpmc");
363 
364 	for (i = 0; i < nr_gp_counters; i++) {
365 		uint64_t x;
366 		pmu_counter_t cnt = {
367 			.ctr = gp_counter_base + i,
368 			.idx = i
369 		};
370 
371 	        /*
372 	         * Without full-width writes, only the low 32 bits are writable,
373 	         * and the value is sign-extended.
374 	         */
375 		if (gp_counter_base == MSR_IA32_PERFCTR0)
376 			x = (uint64_t)(int64_t)(int32_t)val;
377 		else
378 			x = (uint64_t)(int64_t)val;
379 
380 		/* Mask according to the number of supported bits */
381 		x &= (1ull << gp_counter_width) - 1;
382 
383 		wrmsr(gp_counter_base + i, val);
384 		report(rdpmc(i) == x, "cntr-%d", i);
385 
386 		exc = test_for_exception(GP_VECTOR, do_rdpmc_fast, &cnt);
387 		if (exc)
388 			report_skip("fast-%d", i);
389 		else
390 			report(cnt.count == (u32)val, "fast-%d", i);
391 	}
392 	for (i = 0; i < nr_fixed_counters; i++) {
393 		uint64_t x = val & ((1ull << fixed_counter_width) - 1);
394 		pmu_counter_t cnt = {
395 			.ctr = MSR_CORE_PERF_FIXED_CTR0 + i,
396 			.idx = i
397 		};
398 
399 		wrmsr(MSR_CORE_PERF_FIXED_CTR0 + i, x);
400 		report(rdpmc(i | (1 << 30)) == x, "fixed cntr-%d", i);
401 
402 		exc = test_for_exception(GP_VECTOR, do_rdpmc_fast, &cnt);
403 		if (exc)
404 			report_skip("fixed fast-%d", i);
405 		else
406 			report(cnt.count == (u32)x, "fixed fast-%d", i);
407 	}
408 
409 	report_prefix_pop();
410 }
411 
412 static void check_running_counter_wrmsr(void)
413 {
414 	uint64_t status;
415 	uint64_t count;
416 	pmu_counter_t evt = {
417 		.ctr = gp_counter_base,
418 		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel,
419 		.count = 0,
420 	};
421 
422 	report_prefix_push("running counter wrmsr");
423 
424 	start_event(&evt);
425 	loop();
426 	wrmsr(gp_counter_base, 0);
427 	stop_event(&evt);
428 	report(evt.count < gp_events[1].min, "cntr");
429 
430 	/* clear status before overflow test */
431 	wrmsr(MSR_CORE_PERF_GLOBAL_OVF_CTRL,
432 	      rdmsr(MSR_CORE_PERF_GLOBAL_STATUS));
433 
434 	evt.count = 0;
435 	start_event(&evt);
436 
437 	count = -1;
438 	if (gp_counter_base == MSR_IA32_PMC0)
439 		count &= (1ull << pmu_gp_counter_width()) - 1;
440 
441 	wrmsr(gp_counter_base, count);
442 
443 	loop();
444 	stop_event(&evt);
445 	status = rdmsr(MSR_CORE_PERF_GLOBAL_STATUS);
446 	report(status & 1, "status");
447 
448 	report_prefix_pop();
449 }
450 
451 static void check_emulated_instr(void)
452 {
453 	uint64_t status, instr_start, brnch_start;
454 	pmu_counter_t brnch_cnt = {
455 		.ctr = MSR_IA32_PERFCTR0,
456 		/* branch instructions */
457 		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[5].unit_sel,
458 		.count = 0,
459 	};
460 	pmu_counter_t instr_cnt = {
461 		.ctr = MSR_IA32_PERFCTR0 + 1,
462 		/* instructions */
463 		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel,
464 		.count = 0,
465 	};
466 	report_prefix_push("emulated instruction");
467 
468 	wrmsr(MSR_CORE_PERF_GLOBAL_OVF_CTRL,
469 	      rdmsr(MSR_CORE_PERF_GLOBAL_STATUS));
470 
471 	start_event(&brnch_cnt);
472 	start_event(&instr_cnt);
473 
474 	brnch_start = -EXPECTED_BRNCH;
475 	instr_start = -EXPECTED_INSTR;
476 	wrmsr(MSR_IA32_PERFCTR0, brnch_start);
477 	wrmsr(MSR_IA32_PERFCTR0 + 1, instr_start);
478 	// KVM_FEP is a magic prefix that forces emulation so
479 	// 'KVM_FEP "jne label\n"' just counts as a single instruction.
480 	asm volatile(
481 		"mov $0x0, %%eax\n"
482 		"cmp $0x0, %%eax\n"
483 		KVM_FEP "jne label\n"
484 		KVM_FEP "jne label\n"
485 		KVM_FEP "jne label\n"
486 		KVM_FEP "jne label\n"
487 		KVM_FEP "jne label\n"
488 		"mov $0xa, %%eax\n"
489 		"cpuid\n"
490 		"mov $0xa, %%eax\n"
491 		"cpuid\n"
492 		"mov $0xa, %%eax\n"
493 		"cpuid\n"
494 		"mov $0xa, %%eax\n"
495 		"cpuid\n"
496 		"mov $0xa, %%eax\n"
497 		"cpuid\n"
498 		"label:\n"
499 		:
500 		:
501 		: "eax", "ebx", "ecx", "edx");
502 
503 	wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
504 
505 	stop_event(&brnch_cnt);
506 	stop_event(&instr_cnt);
507 
508 	// Check that the end count - start count is at least the expected
509 	// number of instructions and branches.
510 	report(instr_cnt.count - instr_start >= EXPECTED_INSTR,
511 	       "instruction count");
512 	report(brnch_cnt.count - brnch_start >= EXPECTED_BRNCH,
513 	       "branch count");
514 	// Additionally check that those counters overflowed properly.
515 	status = rdmsr(MSR_CORE_PERF_GLOBAL_STATUS);
516 	report(status & 1, "instruction counter overflow");
517 	report(status & 2, "branch counter overflow");
518 
519 	report_prefix_pop();
520 }
521 
522 static void check_counters(void)
523 {
524 	check_gp_counters();
525 	check_fixed_counters();
526 	check_rdpmc();
527 	check_counters_many();
528 	check_counter_overflow();
529 	check_gp_counter_cmask();
530 	check_running_counter_wrmsr();
531 }
532 
533 static void do_unsupported_width_counter_write(void *index)
534 {
535 	wrmsr(MSR_IA32_PMC0 + *((int *) index), 0xffffff0123456789ull);
536 }
537 
538 static void check_gp_counters_write_width(void)
539 {
540 	u64 val_64 = 0xffffff0123456789ull;
541 	u64 val_32 = val_64 & ((1ull << 32) - 1);
542 	u64 val_max_width = val_64 & ((1ull << pmu_gp_counter_width()) - 1);
543 	int nr_gp_counters = pmu_nr_gp_counters();
544 	int i;
545 
546 	/*
547 	 * MSR_IA32_PERFCTRn supports 64-bit writes,
548 	 * but only the lowest 32 bits are valid.
549 	 */
550 	for (i = 0; i < nr_gp_counters; i++) {
551 		wrmsr(MSR_IA32_PERFCTR0 + i, val_32);
552 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32);
553 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_32);
554 
555 		wrmsr(MSR_IA32_PERFCTR0 + i, val_max_width);
556 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32);
557 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_32);
558 
559 		wrmsr(MSR_IA32_PERFCTR0 + i, val_64);
560 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32);
561 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_32);
562 	}
563 
564 	/*
565 	 * MSR_IA32_PMCn supports writing values up to GP counter width,
566 	 * and only the lowest bits of GP counter width are valid.
567 	 */
568 	for (i = 0; i < nr_gp_counters; i++) {
569 		wrmsr(MSR_IA32_PMC0 + i, val_32);
570 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_32);
571 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32);
572 
573 		wrmsr(MSR_IA32_PMC0 + i, val_max_width);
574 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_max_width);
575 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_max_width);
576 
577 		report(test_for_exception(GP_VECTOR,
578 			do_unsupported_width_counter_write, &i),
579 		"writing unsupported width to MSR_IA32_PMC%d raises #GP", i);
580 	}
581 }
582 
583 /*
584  * Per the SDM, reference cycles are currently implemented using the
585  * core crystal clock, TSC, or bus clock. Calibrate to the TSC
586  * frequency to set reasonable expectations.
587  */
588 static void set_ref_cycle_expectations(void)
589 {
590 	pmu_counter_t cnt = {
591 		.ctr = MSR_IA32_PERFCTR0,
592 		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[2].unit_sel,
593 		.count = 0,
594 	};
595 	uint64_t tsc_delta;
596 	uint64_t t0, t1, t2, t3;
597 
598 	/* Bit 2 enumerates the availability of reference cycles events. */
599 	if (!pmu_nr_gp_counters() || !pmu_gp_counter_is_available(2))
600 		return;
601 
602 	wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
603 
604 	t0 = fenced_rdtsc();
605 	start_event(&cnt);
606 	t1 = fenced_rdtsc();
607 
608 	/*
609 	 * This loop has to run long enough to dominate the VM-exit
610 	 * costs for playing with the PMU MSRs on start and stop.
611 	 *
612 	 * On a 2.6GHz Ice Lake, with the TSC frequency at 104 times
613 	 * the core crystal clock, this function calculated a guest
614 	 * TSC : ref cycles ratio of around 105 with ECX initialized
615 	 * to one billion.
616 	 */
617 	asm volatile("loop ." : "+c"((int){1000000000ull}));
618 
619 	t2 = fenced_rdtsc();
620 	stop_event(&cnt);
621 	t3 = fenced_rdtsc();
622 
623 	tsc_delta = ((t2 - t1) + (t3 - t0)) / 2;
624 
625 	if (!tsc_delta)
626 		return;
627 
628 	gp_events[2].min = (gp_events[2].min * cnt.count) / tsc_delta;
629 	gp_events[2].max = (gp_events[2].max * cnt.count) / tsc_delta;
630 }
631 
632 int main(int ac, char **av)
633 {
634 	setup_vm();
635 	handle_irq(PC_VECTOR, cnt_overflow);
636 	buf = malloc(N*64);
637 
638 	if (!pmu_version()) {
639 		report_skip("No pmu is detected!");
640 		return report_summary();
641 	}
642 
643 	if (pmu_version() == 1) {
644 		report_skip("PMU version 1 is not supported.");
645 		return report_summary();
646 	}
647 
648 	set_ref_cycle_expectations();
649 
650 	printf("PMU version:         %d\n", pmu_version());
651 	printf("GP counters:         %d\n", pmu_nr_gp_counters());
652 	printf("GP counter width:    %d\n", pmu_gp_counter_width());
653 	printf("Mask length:         %d\n", pmu_gp_counter_mask_length());
654 	printf("Fixed counters:      %d\n", pmu_nr_fixed_counters());
655 	printf("Fixed counter width: %d\n", pmu_fixed_counter_width());
656 
657 	apic_write(APIC_LVTPC, PC_VECTOR);
658 
659 	if (ac > 1 && !strcmp(av[1], "emulation")) {
660 		check_emulated_instr();
661 	} else {
662 		check_counters();
663 
664 		if (rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES) {
665 			gp_counter_base = MSR_IA32_PMC0;
666 			report_prefix_push("full-width writes");
667 			check_counters();
668 			check_gp_counters_write_width();
669 		}
670 	}
671 
672 	return report_summary();
673 }
674