xref: /kvm-unit-tests/x86/pmu.c (revision 62ba50365a989ea84fd0158f3c309aca5839cfe9)
1 
2 #include "x86/msr.h"
3 #include "x86/processor.h"
4 #include "x86/pmu.h"
5 #include "x86/apic-defs.h"
6 #include "x86/apic.h"
7 #include "x86/desc.h"
8 #include "x86/isr.h"
9 #include "alloc.h"
10 
11 #include "libcflat.h"
12 #include <stdint.h>
13 
14 #define N 1000000
15 
16 // These values match the number of instructions and branches in the
17 // assembly block in check_emulated_instr().
18 #define EXPECTED_INSTR 17
19 #define EXPECTED_BRNCH 5
20 
21 typedef struct {
22 	uint32_t ctr;
23 	uint32_t config;
24 	uint64_t count;
25 	int idx;
26 } pmu_counter_t;
27 
28 struct pmu_event {
29 	const char *name;
30 	uint32_t unit_sel;
31 	int min;
32 	int max;
33 } gp_events[] = {
34 	{"core cycles", 0x003c, 1*N, 50*N},
35 	{"instructions", 0x00c0, 10*N, 10.2*N},
36 	{"ref cycles", 0x013c, 1*N, 30*N},
37 	{"llc references", 0x4f2e, 1, 2*N},
38 	{"llc misses", 0x412e, 1, 1*N},
39 	{"branches", 0x00c4, 1*N, 1.1*N},
40 	{"branch misses", 0x00c5, 0, 0.1*N},
41 }, fixed_events[] = {
42 	{"fixed 1", MSR_CORE_PERF_FIXED_CTR0, 10*N, 10.2*N},
43 	{"fixed 2", MSR_CORE_PERF_FIXED_CTR0 + 1, 1*N, 30*N},
44 	{"fixed 3", MSR_CORE_PERF_FIXED_CTR0 + 2, 0.1*N, 30*N}
45 };
46 
47 char *buf;
48 
49 static inline void loop(void)
50 {
51 	unsigned long tmp, tmp2, tmp3;
52 
53 	asm volatile("1: mov (%1), %2; add $64, %1; nop; nop; nop; nop; nop; nop; nop; loop 1b"
54 			: "=c"(tmp), "=r"(tmp2), "=r"(tmp3): "0"(N), "1"(buf));
55 
56 }
57 
58 volatile uint64_t irq_received;
59 
60 static void cnt_overflow(isr_regs_t *regs)
61 {
62 	irq_received++;
63 	apic_write(APIC_EOI, 0);
64 }
65 
66 static bool check_irq(void)
67 {
68 	int i;
69 	irq_received = 0;
70 	irq_enable();
71 	for (i = 0; i < 100000 && !irq_received; i++)
72 		asm volatile("pause");
73 	irq_disable();
74 	return irq_received;
75 }
76 
77 static bool is_gp(pmu_counter_t *evt)
78 {
79 	return evt->ctr < MSR_CORE_PERF_FIXED_CTR0 ||
80 		evt->ctr >= MSR_IA32_PMC0;
81 }
82 
83 static int event_to_global_idx(pmu_counter_t *cnt)
84 {
85 	return cnt->ctr - (is_gp(cnt) ? pmu.msr_gp_counter_base :
86 		(MSR_CORE_PERF_FIXED_CTR0 - FIXED_CNT_INDEX));
87 }
88 
89 static struct pmu_event* get_counter_event(pmu_counter_t *cnt)
90 {
91 	if (is_gp(cnt)) {
92 		int i;
93 
94 		for (i = 0; i < sizeof(gp_events)/sizeof(gp_events[0]); i++)
95 			if (gp_events[i].unit_sel == (cnt->config & 0xffff))
96 				return &gp_events[i];
97 	} else
98 		return &fixed_events[cnt->ctr - MSR_CORE_PERF_FIXED_CTR0];
99 
100 	return (void*)0;
101 }
102 
103 static void global_enable(pmu_counter_t *cnt)
104 {
105 	if (!this_cpu_has_perf_global_ctrl())
106 		return;
107 
108 	cnt->idx = event_to_global_idx(cnt);
109 	wrmsr(pmu.msr_global_ctl, rdmsr(pmu.msr_global_ctl) | BIT_ULL(cnt->idx));
110 }
111 
112 static void global_disable(pmu_counter_t *cnt)
113 {
114 	if (!this_cpu_has_perf_global_ctrl())
115 		return;
116 
117 	wrmsr(pmu.msr_global_ctl, rdmsr(pmu.msr_global_ctl) & ~BIT_ULL(cnt->idx));
118 }
119 
120 static void __start_event(pmu_counter_t *evt, uint64_t count)
121 {
122     evt->count = count;
123     wrmsr(evt->ctr, evt->count);
124     if (is_gp(evt)) {
125 	    wrmsr(MSR_GP_EVENT_SELECTx(event_to_global_idx(evt)),
126 		  evt->config | EVNTSEL_EN);
127     } else {
128 	    uint32_t ctrl = rdmsr(MSR_CORE_PERF_FIXED_CTR_CTRL);
129 	    int shift = (evt->ctr - MSR_CORE_PERF_FIXED_CTR0) * 4;
130 	    uint32_t usrospmi = 0;
131 
132 	    if (evt->config & EVNTSEL_OS)
133 		    usrospmi |= (1 << 0);
134 	    if (evt->config & EVNTSEL_USR)
135 		    usrospmi |= (1 << 1);
136 	    if (evt->config & EVNTSEL_INT)
137 		    usrospmi |= (1 << 3); // PMI on overflow
138 	    ctrl = (ctrl & ~(0xf << shift)) | (usrospmi << shift);
139 	    wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, ctrl);
140     }
141     global_enable(evt);
142     apic_write(APIC_LVTPC, PMI_VECTOR);
143 }
144 
145 static void start_event(pmu_counter_t *evt)
146 {
147 	__start_event(evt, 0);
148 }
149 
150 static void stop_event(pmu_counter_t *evt)
151 {
152 	global_disable(evt);
153 	if (is_gp(evt)) {
154 		wrmsr(MSR_GP_EVENT_SELECTx(event_to_global_idx(evt)),
155 		      evt->config & ~EVNTSEL_EN);
156 	} else {
157 		uint32_t ctrl = rdmsr(MSR_CORE_PERF_FIXED_CTR_CTRL);
158 		int shift = (evt->ctr - MSR_CORE_PERF_FIXED_CTR0) * 4;
159 		wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, ctrl & ~(0xf << shift));
160 	}
161 	evt->count = rdmsr(evt->ctr);
162 }
163 
164 static noinline void measure_many(pmu_counter_t *evt, int count)
165 {
166 	int i;
167 	for (i = 0; i < count; i++)
168 		start_event(&evt[i]);
169 	loop();
170 	for (i = 0; i < count; i++)
171 		stop_event(&evt[i]);
172 }
173 
174 static void measure_one(pmu_counter_t *evt)
175 {
176 	measure_many(evt, 1);
177 }
178 
179 static noinline void __measure(pmu_counter_t *evt, uint64_t count)
180 {
181 	__start_event(evt, count);
182 	loop();
183 	stop_event(evt);
184 }
185 
186 static bool verify_event(uint64_t count, struct pmu_event *e)
187 {
188 	// printf("%d <= %ld <= %d\n", e->min, count, e->max);
189 	return count >= e->min  && count <= e->max;
190 
191 }
192 
193 static bool verify_counter(pmu_counter_t *cnt)
194 {
195 	return verify_event(cnt->count, get_counter_event(cnt));
196 }
197 
198 static void check_gp_counter(struct pmu_event *evt)
199 {
200 	pmu_counter_t cnt = {
201 		.config = EVNTSEL_OS | EVNTSEL_USR | evt->unit_sel,
202 	};
203 	int i;
204 
205 	for (i = 0; i < pmu.nr_gp_counters; i++) {
206 		cnt.ctr = MSR_GP_COUNTERx(i);
207 		measure_one(&cnt);
208 		report(verify_event(cnt.count, evt), "%s-%d", evt->name, i);
209 	}
210 }
211 
212 static void check_gp_counters(void)
213 {
214 	int i;
215 
216 	for (i = 0; i < sizeof(gp_events)/sizeof(gp_events[0]); i++)
217 		if (pmu_gp_counter_is_available(i))
218 			check_gp_counter(&gp_events[i]);
219 		else
220 			printf("GP event '%s' is disabled\n",
221 					gp_events[i].name);
222 }
223 
224 static void check_fixed_counters(void)
225 {
226 	pmu_counter_t cnt = {
227 		.config = EVNTSEL_OS | EVNTSEL_USR,
228 	};
229 	int i;
230 
231 	for (i = 0; i < pmu.nr_fixed_counters; i++) {
232 		cnt.ctr = fixed_events[i].unit_sel;
233 		measure_one(&cnt);
234 		report(verify_event(cnt.count, &fixed_events[i]), "fixed-%d", i);
235 	}
236 }
237 
238 static void check_counters_many(void)
239 {
240 	pmu_counter_t cnt[10];
241 	int i, n;
242 
243 	for (i = 0, n = 0; n < pmu.nr_gp_counters; i++) {
244 		if (!pmu_gp_counter_is_available(i))
245 			continue;
246 
247 		cnt[n].ctr = MSR_GP_COUNTERx(n);
248 		cnt[n].config = EVNTSEL_OS | EVNTSEL_USR |
249 			gp_events[i % ARRAY_SIZE(gp_events)].unit_sel;
250 		n++;
251 	}
252 	for (i = 0; i < pmu.nr_fixed_counters; i++) {
253 		cnt[n].ctr = fixed_events[i].unit_sel;
254 		cnt[n].config = EVNTSEL_OS | EVNTSEL_USR;
255 		n++;
256 	}
257 
258 	measure_many(cnt, n);
259 
260 	for (i = 0; i < n; i++)
261 		if (!verify_counter(&cnt[i]))
262 			break;
263 
264 	report(i == n, "all counters");
265 }
266 
267 static uint64_t measure_for_overflow(pmu_counter_t *cnt)
268 {
269 	__measure(cnt, 0);
270 	/*
271 	 * To generate overflow, i.e. roll over to '0', the initial count just
272 	 * needs to be preset to the negative expected count.  However, as per
273 	 * Intel's SDM, the preset count needs to be incremented by 1 to ensure
274 	 * the overflow interrupt is generated immediately instead of possibly
275 	 * waiting for the overflow to propagate through the counter.
276 	 */
277 	assert(cnt->count > 1);
278 	return 1 - cnt->count;
279 }
280 
281 static void check_counter_overflow(void)
282 {
283 	uint64_t overflow_preset;
284 	int i;
285 	pmu_counter_t cnt = {
286 		.ctr = MSR_GP_COUNTERx(0),
287 		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel /* instructions */,
288 	};
289 	overflow_preset = measure_for_overflow(&cnt);
290 
291 	/* clear status before test */
292 	if (this_cpu_has_perf_global_status())
293 		pmu_clear_global_status();
294 
295 	report_prefix_push("overflow");
296 
297 	for (i = 0; i < pmu.nr_gp_counters + 1; i++) {
298 		uint64_t status;
299 		int idx;
300 
301 		cnt.count = overflow_preset;
302 		if (pmu_use_full_writes())
303 			cnt.count &= (1ull << pmu.gp_counter_width) - 1;
304 
305 		if (i == pmu.nr_gp_counters) {
306 			cnt.ctr = fixed_events[0].unit_sel;
307 			cnt.count = measure_for_overflow(&cnt);
308 			cnt.count &= (1ull << pmu.gp_counter_width) - 1;
309 		} else {
310 			cnt.ctr = MSR_GP_COUNTERx(i);
311 		}
312 
313 		if (i % 2)
314 			cnt.config |= EVNTSEL_INT;
315 		else
316 			cnt.config &= ~EVNTSEL_INT;
317 		idx = event_to_global_idx(&cnt);
318 		__measure(&cnt, cnt.count);
319 		report(cnt.count == 1, "cntr-%d", i);
320 
321 		if (!this_cpu_has_perf_global_status())
322 			continue;
323 
324 		status = rdmsr(pmu.msr_global_status);
325 		report(status & (1ull << idx), "status-%d", i);
326 		wrmsr(pmu.msr_global_status_clr, status);
327 		status = rdmsr(pmu.msr_global_status);
328 		report(!(status & (1ull << idx)), "status clear-%d", i);
329 		report(check_irq() == (i % 2), "irq-%d", i);
330 	}
331 
332 	report_prefix_pop();
333 }
334 
335 static void check_gp_counter_cmask(void)
336 {
337 	pmu_counter_t cnt = {
338 		.ctr = MSR_GP_COUNTERx(0),
339 		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel /* instructions */,
340 	};
341 	cnt.config |= (0x2 << EVNTSEL_CMASK_SHIFT);
342 	measure_one(&cnt);
343 	report(cnt.count < gp_events[1].min, "cmask");
344 }
345 
346 static void do_rdpmc_fast(void *ptr)
347 {
348 	pmu_counter_t *cnt = ptr;
349 	uint32_t idx = (uint32_t)cnt->idx | (1u << 31);
350 
351 	if (!is_gp(cnt))
352 		idx |= 1 << 30;
353 
354 	cnt->count = rdpmc(idx);
355 }
356 
357 
358 static void check_rdpmc(void)
359 {
360 	uint64_t val = 0xff0123456789ull;
361 	bool exc;
362 	int i;
363 
364 	report_prefix_push("rdpmc");
365 
366 	for (i = 0; i < pmu.nr_gp_counters; i++) {
367 		uint64_t x;
368 		pmu_counter_t cnt = {
369 			.ctr = MSR_GP_COUNTERx(i),
370 			.idx = i
371 		};
372 
373 	        /*
374 	         * Without full-width writes, only the low 32 bits are writable,
375 	         * and the value is sign-extended.
376 	         */
377 		if (pmu.msr_gp_counter_base == MSR_IA32_PERFCTR0)
378 			x = (uint64_t)(int64_t)(int32_t)val;
379 		else
380 			x = (uint64_t)(int64_t)val;
381 
382 		/* Mask according to the number of supported bits */
383 		x &= (1ull << pmu.gp_counter_width) - 1;
384 
385 		wrmsr(MSR_GP_COUNTERx(i), val);
386 		report(rdpmc(i) == x, "cntr-%d", i);
387 
388 		exc = test_for_exception(GP_VECTOR, do_rdpmc_fast, &cnt);
389 		if (exc)
390 			report_skip("fast-%d", i);
391 		else
392 			report(cnt.count == (u32)val, "fast-%d", i);
393 	}
394 	for (i = 0; i < pmu.nr_fixed_counters; i++) {
395 		uint64_t x = val & ((1ull << pmu.fixed_counter_width) - 1);
396 		pmu_counter_t cnt = {
397 			.ctr = MSR_CORE_PERF_FIXED_CTR0 + i,
398 			.idx = i
399 		};
400 
401 		wrmsr(MSR_PERF_FIXED_CTRx(i), x);
402 		report(rdpmc(i | (1 << 30)) == x, "fixed cntr-%d", i);
403 
404 		exc = test_for_exception(GP_VECTOR, do_rdpmc_fast, &cnt);
405 		if (exc)
406 			report_skip("fixed fast-%d", i);
407 		else
408 			report(cnt.count == (u32)x, "fixed fast-%d", i);
409 	}
410 
411 	report_prefix_pop();
412 }
413 
414 static void check_running_counter_wrmsr(void)
415 {
416 	uint64_t status;
417 	uint64_t count;
418 	pmu_counter_t evt = {
419 		.ctr = MSR_GP_COUNTERx(0),
420 		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel,
421 	};
422 
423 	report_prefix_push("running counter wrmsr");
424 
425 	start_event(&evt);
426 	loop();
427 	wrmsr(MSR_GP_COUNTERx(0), 0);
428 	stop_event(&evt);
429 	report(evt.count < gp_events[1].min, "cntr");
430 
431 	/* clear status before overflow test */
432 	if (this_cpu_has_perf_global_status())
433 		pmu_clear_global_status();
434 
435 	start_event(&evt);
436 
437 	count = -1;
438 	if (pmu_use_full_writes())
439 		count &= (1ull << pmu.gp_counter_width) - 1;
440 
441 	wrmsr(MSR_GP_COUNTERx(0), count);
442 
443 	loop();
444 	stop_event(&evt);
445 
446 	if (this_cpu_has_perf_global_status()) {
447 		status = rdmsr(pmu.msr_global_status);
448 		report(status & 1, "status msr bit");
449 	}
450 
451 	report_prefix_pop();
452 }
453 
454 static void check_emulated_instr(void)
455 {
456 	uint64_t status, instr_start, brnch_start;
457 	pmu_counter_t brnch_cnt = {
458 		.ctr = MSR_GP_COUNTERx(0),
459 		/* branch instructions */
460 		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[5].unit_sel,
461 	};
462 	pmu_counter_t instr_cnt = {
463 		.ctr = MSR_GP_COUNTERx(1),
464 		/* instructions */
465 		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel,
466 	};
467 	report_prefix_push("emulated instruction");
468 
469 	if (this_cpu_has_perf_global_status())
470 		pmu_clear_global_status();
471 
472 	start_event(&brnch_cnt);
473 	start_event(&instr_cnt);
474 
475 	brnch_start = -EXPECTED_BRNCH;
476 	instr_start = -EXPECTED_INSTR;
477 	wrmsr(MSR_GP_COUNTERx(0), brnch_start);
478 	wrmsr(MSR_GP_COUNTERx(1), instr_start);
479 	// KVM_FEP is a magic prefix that forces emulation so
480 	// 'KVM_FEP "jne label\n"' just counts as a single instruction.
481 	asm volatile(
482 		"mov $0x0, %%eax\n"
483 		"cmp $0x0, %%eax\n"
484 		KVM_FEP "jne label\n"
485 		KVM_FEP "jne label\n"
486 		KVM_FEP "jne label\n"
487 		KVM_FEP "jne label\n"
488 		KVM_FEP "jne label\n"
489 		"mov $0xa, %%eax\n"
490 		"cpuid\n"
491 		"mov $0xa, %%eax\n"
492 		"cpuid\n"
493 		"mov $0xa, %%eax\n"
494 		"cpuid\n"
495 		"mov $0xa, %%eax\n"
496 		"cpuid\n"
497 		"mov $0xa, %%eax\n"
498 		"cpuid\n"
499 		"label:\n"
500 		:
501 		:
502 		: "eax", "ebx", "ecx", "edx");
503 
504 	if (this_cpu_has_perf_global_ctrl())
505 		wrmsr(pmu.msr_global_ctl, 0);
506 
507 	stop_event(&brnch_cnt);
508 	stop_event(&instr_cnt);
509 
510 	// Check that the end count - start count is at least the expected
511 	// number of instructions and branches.
512 	report(instr_cnt.count - instr_start >= EXPECTED_INSTR,
513 	       "instruction count");
514 	report(brnch_cnt.count - brnch_start >= EXPECTED_BRNCH,
515 	       "branch count");
516 	if (this_cpu_has_perf_global_status()) {
517 		// Additionally check that those counters overflowed properly.
518 		status = rdmsr(pmu.msr_global_status);
519 		report(status & 1, "branch counter overflow");
520 		report(status & 2, "instruction counter overflow");
521 	}
522 
523 	report_prefix_pop();
524 }
525 
526 static void check_counters(void)
527 {
528 	if (is_fep_available())
529 		check_emulated_instr();
530 
531 	check_gp_counters();
532 	check_fixed_counters();
533 	check_rdpmc();
534 	check_counters_many();
535 	check_counter_overflow();
536 	check_gp_counter_cmask();
537 	check_running_counter_wrmsr();
538 }
539 
540 static void do_unsupported_width_counter_write(void *index)
541 {
542 	wrmsr(MSR_IA32_PMC0 + *((int *) index), 0xffffff0123456789ull);
543 }
544 
545 static void check_gp_counters_write_width(void)
546 {
547 	u64 val_64 = 0xffffff0123456789ull;
548 	u64 val_32 = val_64 & ((1ull << 32) - 1);
549 	u64 val_max_width = val_64 & ((1ull << pmu.gp_counter_width) - 1);
550 	int i;
551 
552 	/*
553 	 * MSR_IA32_PERFCTRn supports 64-bit writes,
554 	 * but only the lowest 32 bits are valid.
555 	 */
556 	for (i = 0; i < pmu.nr_gp_counters; i++) {
557 		wrmsr(MSR_IA32_PERFCTR0 + i, val_32);
558 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32);
559 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_32);
560 
561 		wrmsr(MSR_IA32_PERFCTR0 + i, val_max_width);
562 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32);
563 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_32);
564 
565 		wrmsr(MSR_IA32_PERFCTR0 + i, val_64);
566 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32);
567 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_32);
568 	}
569 
570 	/*
571 	 * MSR_IA32_PMCn supports writing values up to GP counter width,
572 	 * and only the lowest bits of GP counter width are valid.
573 	 */
574 	for (i = 0; i < pmu.nr_gp_counters; i++) {
575 		wrmsr(MSR_IA32_PMC0 + i, val_32);
576 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_32);
577 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32);
578 
579 		wrmsr(MSR_IA32_PMC0 + i, val_max_width);
580 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_max_width);
581 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_max_width);
582 
583 		report(test_for_exception(GP_VECTOR,
584 			do_unsupported_width_counter_write, &i),
585 		"writing unsupported width to MSR_IA32_PMC%d raises #GP", i);
586 	}
587 }
588 
589 /*
590  * Per the SDM, reference cycles are currently implemented using the
591  * core crystal clock, TSC, or bus clock. Calibrate to the TSC
592  * frequency to set reasonable expectations.
593  */
594 static void set_ref_cycle_expectations(void)
595 {
596 	pmu_counter_t cnt = {
597 		.ctr = MSR_IA32_PERFCTR0,
598 		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[2].unit_sel,
599 	};
600 	uint64_t tsc_delta;
601 	uint64_t t0, t1, t2, t3;
602 
603 	/* Bit 2 enumerates the availability of reference cycles events. */
604 	if (!pmu.nr_gp_counters || !pmu_gp_counter_is_available(2))
605 		return;
606 
607 	if (this_cpu_has_perf_global_ctrl())
608 		wrmsr(pmu.msr_global_ctl, 0);
609 
610 	t0 = fenced_rdtsc();
611 	start_event(&cnt);
612 	t1 = fenced_rdtsc();
613 
614 	/*
615 	 * This loop has to run long enough to dominate the VM-exit
616 	 * costs for playing with the PMU MSRs on start and stop.
617 	 *
618 	 * On a 2.6GHz Ice Lake, with the TSC frequency at 104 times
619 	 * the core crystal clock, this function calculated a guest
620 	 * TSC : ref cycles ratio of around 105 with ECX initialized
621 	 * to one billion.
622 	 */
623 	asm volatile("loop ." : "+c"((int){1000000000ull}));
624 
625 	t2 = fenced_rdtsc();
626 	stop_event(&cnt);
627 	t3 = fenced_rdtsc();
628 
629 	tsc_delta = ((t2 - t1) + (t3 - t0)) / 2;
630 
631 	if (!tsc_delta)
632 		return;
633 
634 	gp_events[2].min = (gp_events[2].min * cnt.count) / tsc_delta;
635 	gp_events[2].max = (gp_events[2].max * cnt.count) / tsc_delta;
636 }
637 
638 static void check_invalid_rdpmc_gp(void)
639 {
640 	uint64_t val;
641 
642 	report(rdpmc_safe(64, &val) == GP_VECTOR,
643 	       "Expected #GP on RDPMC(64)");
644 }
645 
646 int main(int ac, char **av)
647 {
648 	setup_vm();
649 	handle_irq(PMI_VECTOR, cnt_overflow);
650 	buf = malloc(N*64);
651 
652 	check_invalid_rdpmc_gp();
653 
654 	if (!pmu.version) {
655 		report_skip("No Intel Arch PMU is detected!");
656 		return report_summary();
657 	}
658 
659 	set_ref_cycle_expectations();
660 
661 	printf("PMU version:         %d\n", pmu.version);
662 	printf("GP counters:         %d\n", pmu.nr_gp_counters);
663 	printf("GP counter width:    %d\n", pmu.gp_counter_width);
664 	printf("Mask length:         %d\n", pmu.gp_counter_mask_length);
665 	printf("Fixed counters:      %d\n", pmu.nr_fixed_counters);
666 	printf("Fixed counter width: %d\n", pmu.fixed_counter_width);
667 
668 	apic_write(APIC_LVTPC, PMI_VECTOR);
669 
670 	check_counters();
671 
672 	if (pmu_has_full_writes()) {
673 		pmu.msr_gp_counter_base = MSR_IA32_PMC0;
674 
675 		report_prefix_push("full-width writes");
676 		check_counters();
677 		check_gp_counters_write_width();
678 		report_prefix_pop();
679 	}
680 
681 	return report_summary();
682 }
683