xref: /kvm-unit-tests/x86/pmu.c (revision c604fa931a1cb70c3649ac1b7223178fc79eab6a)
1 
2 #include "x86/msr.h"
3 #include "x86/processor.h"
4 #include "x86/apic-defs.h"
5 #include "x86/apic.h"
6 #include "x86/desc.h"
7 #include "x86/isr.h"
8 #include "alloc.h"
9 
10 #include "libcflat.h"
11 #include <stdint.h>
12 
13 #define FIXED_CNT_INDEX 32
14 #define PC_VECTOR	32
15 
16 #define EVNSEL_EVENT_SHIFT	0
17 #define EVNTSEL_UMASK_SHIFT	8
18 #define EVNTSEL_USR_SHIFT	16
19 #define EVNTSEL_OS_SHIFT	17
20 #define EVNTSEL_EDGE_SHIFT	18
21 #define EVNTSEL_PC_SHIFT	19
22 #define EVNTSEL_INT_SHIFT	20
23 #define EVNTSEL_EN_SHIF		22
24 #define EVNTSEL_INV_SHIF	23
25 #define EVNTSEL_CMASK_SHIFT	24
26 
27 #define EVNTSEL_EN	(1 << EVNTSEL_EN_SHIF)
28 #define EVNTSEL_USR	(1 << EVNTSEL_USR_SHIFT)
29 #define EVNTSEL_OS	(1 << EVNTSEL_OS_SHIFT)
30 #define EVNTSEL_PC	(1 << EVNTSEL_PC_SHIFT)
31 #define EVNTSEL_INT	(1 << EVNTSEL_INT_SHIFT)
32 #define EVNTSEL_INV	(1 << EVNTSEL_INV_SHIF)
33 
34 #define N 1000000
35 
36 #define KVM_FEP "ud2; .byte 'k', 'v', 'm';"
37 // These values match the number of instructions and branches in the
38 // assembly block in check_emulated_instr().
39 #define EXPECTED_INSTR 17
40 #define EXPECTED_BRNCH 5
41 
42 typedef struct {
43 	uint32_t ctr;
44 	uint32_t config;
45 	uint64_t count;
46 	int idx;
47 } pmu_counter_t;
48 
49 union cpuid10_eax {
50 	struct {
51 		unsigned int version_id:8;
52 		unsigned int num_counters:8;
53 		unsigned int bit_width:8;
54 		unsigned int mask_length:8;
55 	} split;
56 	unsigned int full;
57 } eax;
58 
59 union cpuid10_ebx {
60 	struct {
61 		unsigned int no_unhalted_core_cycles:1;
62 		unsigned int no_instructions_retired:1;
63 		unsigned int no_unhalted_reference_cycles:1;
64 		unsigned int no_llc_reference:1;
65 		unsigned int no_llc_misses:1;
66 		unsigned int no_branch_instruction_retired:1;
67 		unsigned int no_branch_misses_retired:1;
68 	} split;
69 	unsigned int full;
70 } ebx;
71 
72 union cpuid10_edx {
73 	struct {
74 		unsigned int num_counters_fixed:5;
75 		unsigned int bit_width_fixed:8;
76 		unsigned int reserved:19;
77 	} split;
78 	unsigned int full;
79 } edx;
80 
81 struct pmu_event {
82 	const char *name;
83 	uint32_t unit_sel;
84 	int min;
85 	int max;
86 } gp_events[] = {
87 	{"core cycles", 0x003c, 1*N, 50*N},
88 	{"instructions", 0x00c0, 10*N, 10.2*N},
89 	{"ref cycles", 0x013c, 1*N, 30*N},
90 	{"llc references", 0x4f2e, 1, 2*N},
91 	{"llc misses", 0x412e, 1, 1*N},
92 	{"branches", 0x00c4, 1*N, 1.1*N},
93 	{"branch misses", 0x00c5, 0, 0.1*N},
94 }, fixed_events[] = {
95 	{"fixed 1", MSR_CORE_PERF_FIXED_CTR0, 10*N, 10.2*N},
96 	{"fixed 2", MSR_CORE_PERF_FIXED_CTR0 + 1, 1*N, 30*N},
97 	{"fixed 3", MSR_CORE_PERF_FIXED_CTR0 + 2, 0.1*N, 30*N}
98 };
99 
100 #define PMU_CAP_FW_WRITES	(1ULL << 13)
101 static u64 gp_counter_base = MSR_IA32_PERFCTR0;
102 
103 static int num_counters;
104 
105 char *buf;
106 
107 static inline void loop(void)
108 {
109 	unsigned long tmp, tmp2, tmp3;
110 
111 	asm volatile("1: mov (%1), %2; add $64, %1; nop; nop; nop; nop; nop; nop; nop; loop 1b"
112 			: "=c"(tmp), "=r"(tmp2), "=r"(tmp3): "0"(N), "1"(buf));
113 
114 }
115 
116 volatile uint64_t irq_received;
117 
118 static void cnt_overflow(isr_regs_t *regs)
119 {
120 	irq_received++;
121 	apic_write(APIC_EOI, 0);
122 }
123 
124 static bool check_irq(void)
125 {
126 	int i;
127 	irq_received = 0;
128 	irq_enable();
129 	for (i = 0; i < 100000 && !irq_received; i++)
130 		asm volatile("pause");
131 	irq_disable();
132 	return irq_received;
133 }
134 
135 static bool is_gp(pmu_counter_t *evt)
136 {
137 	return evt->ctr < MSR_CORE_PERF_FIXED_CTR0 ||
138 		evt->ctr >= MSR_IA32_PMC0;
139 }
140 
141 static int event_to_global_idx(pmu_counter_t *cnt)
142 {
143 	return cnt->ctr - (is_gp(cnt) ? gp_counter_base :
144 		(MSR_CORE_PERF_FIXED_CTR0 - FIXED_CNT_INDEX));
145 }
146 
147 static struct pmu_event* get_counter_event(pmu_counter_t *cnt)
148 {
149 	if (is_gp(cnt)) {
150 		int i;
151 
152 		for (i = 0; i < sizeof(gp_events)/sizeof(gp_events[0]); i++)
153 			if (gp_events[i].unit_sel == (cnt->config & 0xffff))
154 				return &gp_events[i];
155 	} else
156 		return &fixed_events[cnt->ctr - MSR_CORE_PERF_FIXED_CTR0];
157 
158 	return (void*)0;
159 }
160 
161 static void global_enable(pmu_counter_t *cnt)
162 {
163 	cnt->idx = event_to_global_idx(cnt);
164 
165 	wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, rdmsr(MSR_CORE_PERF_GLOBAL_CTRL) |
166 			(1ull << cnt->idx));
167 }
168 
169 static void global_disable(pmu_counter_t *cnt)
170 {
171 	wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, rdmsr(MSR_CORE_PERF_GLOBAL_CTRL) &
172 			~(1ull << cnt->idx));
173 }
174 
175 
176 static void start_event(pmu_counter_t *evt)
177 {
178     wrmsr(evt->ctr, evt->count);
179     if (is_gp(evt))
180 	    wrmsr(MSR_P6_EVNTSEL0 + event_to_global_idx(evt),
181 			    evt->config | EVNTSEL_EN);
182     else {
183 	    uint32_t ctrl = rdmsr(MSR_CORE_PERF_FIXED_CTR_CTRL);
184 	    int shift = (evt->ctr - MSR_CORE_PERF_FIXED_CTR0) * 4;
185 	    uint32_t usrospmi = 0;
186 
187 	    if (evt->config & EVNTSEL_OS)
188 		    usrospmi |= (1 << 0);
189 	    if (evt->config & EVNTSEL_USR)
190 		    usrospmi |= (1 << 1);
191 	    if (evt->config & EVNTSEL_INT)
192 		    usrospmi |= (1 << 3); // PMI on overflow
193 	    ctrl = (ctrl & ~(0xf << shift)) | (usrospmi << shift);
194 	    wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, ctrl);
195     }
196     global_enable(evt);
197     apic_write(APIC_LVTPC, PC_VECTOR);
198 }
199 
200 static void stop_event(pmu_counter_t *evt)
201 {
202 	global_disable(evt);
203 	if (is_gp(evt))
204 		wrmsr(MSR_P6_EVNTSEL0 + event_to_global_idx(evt),
205 				evt->config & ~EVNTSEL_EN);
206 	else {
207 		uint32_t ctrl = rdmsr(MSR_CORE_PERF_FIXED_CTR_CTRL);
208 		int shift = (evt->ctr - MSR_CORE_PERF_FIXED_CTR0) * 4;
209 		wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, ctrl & ~(0xf << shift));
210 	}
211 	evt->count = rdmsr(evt->ctr);
212 }
213 
214 static void measure(pmu_counter_t *evt, int count)
215 {
216 	int i;
217 	for (i = 0; i < count; i++)
218 		start_event(&evt[i]);
219 	loop();
220 	for (i = 0; i < count; i++)
221 		stop_event(&evt[i]);
222 }
223 
224 static bool verify_event(uint64_t count, struct pmu_event *e)
225 {
226 	// printf("%d <= %ld <= %d\n", e->min, count, e->max);
227 	return count >= e->min  && count <= e->max;
228 
229 }
230 
231 static bool verify_counter(pmu_counter_t *cnt)
232 {
233 	return verify_event(cnt->count, get_counter_event(cnt));
234 }
235 
236 static void check_gp_counter(struct pmu_event *evt)
237 {
238 	pmu_counter_t cnt = {
239 		.ctr = gp_counter_base,
240 		.config = EVNTSEL_OS | EVNTSEL_USR | evt->unit_sel,
241 	};
242 	int i;
243 
244 	for (i = 0; i < num_counters; i++, cnt.ctr++) {
245 		cnt.count = 0;
246 		measure(&cnt, 1);
247 		report(verify_event(cnt.count, evt), "%s-%d", evt->name, i);
248 	}
249 }
250 
251 static void check_gp_counters(void)
252 {
253 	int i;
254 
255 	for (i = 0; i < sizeof(gp_events)/sizeof(gp_events[0]); i++)
256 		if (!(ebx.full & (1 << i)))
257 			check_gp_counter(&gp_events[i]);
258 		else
259 			printf("GP event '%s' is disabled\n",
260 					gp_events[i].name);
261 }
262 
263 static void check_fixed_counters(void)
264 {
265 	pmu_counter_t cnt = {
266 		.config = EVNTSEL_OS | EVNTSEL_USR,
267 	};
268 	int i;
269 
270 	for (i = 0; i < edx.split.num_counters_fixed; i++) {
271 		cnt.count = 0;
272 		cnt.ctr = fixed_events[i].unit_sel;
273 		measure(&cnt, 1);
274 		report(verify_event(cnt.count, &fixed_events[i]), "fixed-%d",
275 		       i);
276 	}
277 }
278 
279 static void check_counters_many(void)
280 {
281 	pmu_counter_t cnt[10];
282 	int i, n;
283 
284 	for (i = 0, n = 0; n < num_counters; i++) {
285 		if (ebx.full & (1 << i))
286 			continue;
287 
288 		cnt[n].count = 0;
289 		cnt[n].ctr = gp_counter_base + n;
290 		cnt[n].config = EVNTSEL_OS | EVNTSEL_USR |
291 			gp_events[i % ARRAY_SIZE(gp_events)].unit_sel;
292 		n++;
293 	}
294 	for (i = 0; i < edx.split.num_counters_fixed; i++) {
295 		cnt[n].count = 0;
296 		cnt[n].ctr = fixed_events[i].unit_sel;
297 		cnt[n].config = EVNTSEL_OS | EVNTSEL_USR;
298 		n++;
299 	}
300 
301 	measure(cnt, n);
302 
303 	for (i = 0; i < n; i++)
304 		if (!verify_counter(&cnt[i]))
305 			break;
306 
307 	report(i == n, "all counters");
308 }
309 
310 static void check_counter_overflow(void)
311 {
312 	uint64_t count;
313 	int i;
314 	pmu_counter_t cnt = {
315 		.ctr = gp_counter_base,
316 		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel /* instructions */,
317 		.count = 0,
318 	};
319 	measure(&cnt, 1);
320 	count = cnt.count;
321 
322 	/* clear status before test */
323 	wrmsr(MSR_CORE_PERF_GLOBAL_OVF_CTRL, rdmsr(MSR_CORE_PERF_GLOBAL_STATUS));
324 
325 	report_prefix_push("overflow");
326 
327 	for (i = 0; i < num_counters + 1; i++, cnt.ctr++) {
328 		uint64_t status;
329 		int idx;
330 
331 		cnt.count = 1 - count;
332 		if (gp_counter_base == MSR_IA32_PMC0)
333 			cnt.count &= (1ull << eax.split.bit_width) - 1;
334 
335 		if (i == num_counters) {
336 			cnt.ctr = fixed_events[0].unit_sel;
337 			cnt.count &= (1ull << edx.split.bit_width_fixed) - 1;
338 		}
339 
340 		if (i % 2)
341 			cnt.config |= EVNTSEL_INT;
342 		else
343 			cnt.config &= ~EVNTSEL_INT;
344 		idx = event_to_global_idx(&cnt);
345 		measure(&cnt, 1);
346 		report(cnt.count == 1, "cntr-%d", i);
347 		status = rdmsr(MSR_CORE_PERF_GLOBAL_STATUS);
348 		report(status & (1ull << idx), "status-%d", i);
349 		wrmsr(MSR_CORE_PERF_GLOBAL_OVF_CTRL, status);
350 		status = rdmsr(MSR_CORE_PERF_GLOBAL_STATUS);
351 		report(!(status & (1ull << idx)), "status clear-%d", i);
352 		report(check_irq() == (i % 2), "irq-%d", i);
353 	}
354 
355 	report_prefix_pop();
356 }
357 
358 static void check_gp_counter_cmask(void)
359 {
360 	pmu_counter_t cnt = {
361 		.ctr = gp_counter_base,
362 		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel /* instructions */,
363 		.count = 0,
364 	};
365 	cnt.config |= (0x2 << EVNTSEL_CMASK_SHIFT);
366 	measure(&cnt, 1);
367 	report(cnt.count < gp_events[1].min, "cmask");
368 }
369 
370 static void do_rdpmc_fast(void *ptr)
371 {
372 	pmu_counter_t *cnt = ptr;
373 	uint32_t idx = (uint32_t)cnt->idx | (1u << 31);
374 
375 	if (!is_gp(cnt))
376 		idx |= 1 << 30;
377 
378 	cnt->count = rdpmc(idx);
379 }
380 
381 
382 static void check_rdpmc(void)
383 {
384 	uint64_t val = 0xff0123456789ull;
385 	bool exc;
386 	int i;
387 
388 	report_prefix_push("rdpmc");
389 
390 	for (i = 0; i < num_counters; i++) {
391 		uint64_t x;
392 		pmu_counter_t cnt = {
393 			.ctr = gp_counter_base + i,
394 			.idx = i
395 		};
396 
397 	        /*
398 	         * Without full-width writes, only the low 32 bits are writable,
399 	         * and the value is sign-extended.
400 	         */
401 		if (gp_counter_base == MSR_IA32_PERFCTR0)
402 			x = (uint64_t)(int64_t)(int32_t)val;
403 		else
404 			x = (uint64_t)(int64_t)val;
405 
406 		/* Mask according to the number of supported bits */
407 		x &= (1ull << eax.split.bit_width) - 1;
408 
409 		wrmsr(gp_counter_base + i, val);
410 		report(rdpmc(i) == x, "cntr-%d", i);
411 
412 		exc = test_for_exception(GP_VECTOR, do_rdpmc_fast, &cnt);
413 		if (exc)
414 			report_skip("fast-%d", i);
415 		else
416 			report(cnt.count == (u32)val, "fast-%d", i);
417 	}
418 	for (i = 0; i < edx.split.num_counters_fixed; i++) {
419 		uint64_t x = val & ((1ull << edx.split.bit_width_fixed) - 1);
420 		pmu_counter_t cnt = {
421 			.ctr = MSR_CORE_PERF_FIXED_CTR0 + i,
422 			.idx = i
423 		};
424 
425 		wrmsr(MSR_CORE_PERF_FIXED_CTR0 + i, x);
426 		report(rdpmc(i | (1 << 30)) == x, "fixed cntr-%d", i);
427 
428 		exc = test_for_exception(GP_VECTOR, do_rdpmc_fast, &cnt);
429 		if (exc)
430 			report_skip("fixed fast-%d", i);
431 		else
432 			report(cnt.count == (u32)x, "fixed fast-%d", i);
433 	}
434 
435 	report_prefix_pop();
436 }
437 
438 static void check_running_counter_wrmsr(void)
439 {
440 	uint64_t status;
441 	uint64_t count;
442 	pmu_counter_t evt = {
443 		.ctr = gp_counter_base,
444 		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel,
445 		.count = 0,
446 	};
447 
448 	report_prefix_push("running counter wrmsr");
449 
450 	start_event(&evt);
451 	loop();
452 	wrmsr(gp_counter_base, 0);
453 	stop_event(&evt);
454 	report(evt.count < gp_events[1].min, "cntr");
455 
456 	/* clear status before overflow test */
457 	wrmsr(MSR_CORE_PERF_GLOBAL_OVF_CTRL,
458 	      rdmsr(MSR_CORE_PERF_GLOBAL_STATUS));
459 
460 	evt.count = 0;
461 	start_event(&evt);
462 
463 	count = -1;
464 	if (gp_counter_base == MSR_IA32_PMC0)
465 		count &= (1ull << eax.split.bit_width) - 1;
466 
467 	wrmsr(gp_counter_base, count);
468 
469 	loop();
470 	stop_event(&evt);
471 	status = rdmsr(MSR_CORE_PERF_GLOBAL_STATUS);
472 	report(status & 1, "status");
473 
474 	report_prefix_pop();
475 }
476 
477 static void check_emulated_instr(void)
478 {
479 	uint64_t status, instr_start, brnch_start;
480 	pmu_counter_t brnch_cnt = {
481 		.ctr = MSR_IA32_PERFCTR0,
482 		/* branch instructions */
483 		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[5].unit_sel,
484 		.count = 0,
485 	};
486 	pmu_counter_t instr_cnt = {
487 		.ctr = MSR_IA32_PERFCTR0 + 1,
488 		/* instructions */
489 		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel,
490 		.count = 0,
491 	};
492 	report_prefix_push("emulated instruction");
493 
494 	wrmsr(MSR_CORE_PERF_GLOBAL_OVF_CTRL,
495 	      rdmsr(MSR_CORE_PERF_GLOBAL_STATUS));
496 
497 	start_event(&brnch_cnt);
498 	start_event(&instr_cnt);
499 
500 	brnch_start = -EXPECTED_BRNCH;
501 	instr_start = -EXPECTED_INSTR;
502 	wrmsr(MSR_IA32_PERFCTR0, brnch_start);
503 	wrmsr(MSR_IA32_PERFCTR0 + 1, instr_start);
504 	// KVM_FEP is a magic prefix that forces emulation so
505 	// 'KVM_FEP "jne label\n"' just counts as a single instruction.
506 	asm volatile(
507 		"mov $0x0, %%eax\n"
508 		"cmp $0x0, %%eax\n"
509 		KVM_FEP "jne label\n"
510 		KVM_FEP "jne label\n"
511 		KVM_FEP "jne label\n"
512 		KVM_FEP "jne label\n"
513 		KVM_FEP "jne label\n"
514 		"mov $0xa, %%eax\n"
515 		"cpuid\n"
516 		"mov $0xa, %%eax\n"
517 		"cpuid\n"
518 		"mov $0xa, %%eax\n"
519 		"cpuid\n"
520 		"mov $0xa, %%eax\n"
521 		"cpuid\n"
522 		"mov $0xa, %%eax\n"
523 		"cpuid\n"
524 		"label:\n"
525 		:
526 		:
527 		: "eax", "ebx", "ecx", "edx");
528 
529 	wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
530 
531 	stop_event(&brnch_cnt);
532 	stop_event(&instr_cnt);
533 
534 	// Check that the end count - start count is at least the expected
535 	// number of instructions and branches.
536 	report(instr_cnt.count - instr_start >= EXPECTED_INSTR,
537 	       "instruction count");
538 	report(brnch_cnt.count - brnch_start >= EXPECTED_BRNCH,
539 	       "branch count");
540 	// Additionally check that those counters overflowed properly.
541 	status = rdmsr(MSR_CORE_PERF_GLOBAL_STATUS);
542 	report(status & 1, "instruction counter overflow");
543 	report(status & 2, "branch counter overflow");
544 
545 	report_prefix_pop();
546 }
547 
548 static void check_counters(void)
549 {
550 	check_gp_counters();
551 	check_fixed_counters();
552 	check_rdpmc();
553 	check_counters_many();
554 	check_counter_overflow();
555 	check_gp_counter_cmask();
556 	check_running_counter_wrmsr();
557 }
558 
559 static void do_unsupported_width_counter_write(void *index)
560 {
561 	wrmsr(MSR_IA32_PMC0 + *((int *) index), 0xffffff0123456789ull);
562 }
563 
564 static void  check_gp_counters_write_width(void)
565 {
566 	u64 val_64 = 0xffffff0123456789ull;
567 	u64 val_32 = val_64 & ((1ull << 32) - 1);
568 	u64 val_max_width = val_64 & ((1ull << eax.split.bit_width) - 1);
569 	int i;
570 
571 	/*
572 	 * MSR_IA32_PERFCTRn supports 64-bit writes,
573 	 * but only the lowest 32 bits are valid.
574 	 */
575 	for (i = 0; i < num_counters; i++) {
576 		wrmsr(MSR_IA32_PERFCTR0 + i, val_32);
577 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32);
578 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_32);
579 
580 		wrmsr(MSR_IA32_PERFCTR0 + i, val_max_width);
581 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32);
582 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_32);
583 
584 		wrmsr(MSR_IA32_PERFCTR0 + i, val_64);
585 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32);
586 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_32);
587 	}
588 
589 	/*
590 	 * MSR_IA32_PMCn supports writing values up to GP counter width,
591 	 * and only the lowest bits of GP counter width are valid.
592 	 */
593 	for (i = 0; i < num_counters; i++) {
594 		wrmsr(MSR_IA32_PMC0 + i, val_32);
595 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_32);
596 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32);
597 
598 		wrmsr(MSR_IA32_PMC0 + i, val_max_width);
599 		assert(rdmsr(MSR_IA32_PMC0 + i) == val_max_width);
600 		assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_max_width);
601 
602 		report(test_for_exception(GP_VECTOR,
603 			do_unsupported_width_counter_write, &i),
604 		"writing unsupported width to MSR_IA32_PMC%d raises #GP", i);
605 	}
606 }
607 
608 /*
609  * Per the SDM, reference cycles are currently implemented using the
610  * core crystal clock, TSC, or bus clock. Calibrate to the TSC
611  * frequency to set reasonable expectations.
612  */
613 static void set_ref_cycle_expectations(void)
614 {
615 	pmu_counter_t cnt = {
616 		.ctr = MSR_IA32_PERFCTR0,
617 		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[2].unit_sel,
618 		.count = 0,
619 	};
620 	uint64_t tsc_delta;
621 	uint64_t t0, t1, t2, t3;
622 
623 	if (!eax.split.num_counters || (ebx.full & (1 << 2)))
624 		return;
625 
626 	wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
627 
628 	t0 = fenced_rdtsc();
629 	start_event(&cnt);
630 	t1 = fenced_rdtsc();
631 
632 	/*
633 	 * This loop has to run long enough to dominate the VM-exit
634 	 * costs for playing with the PMU MSRs on start and stop.
635 	 *
636 	 * On a 2.6GHz Ice Lake, with the TSC frequency at 104 times
637 	 * the core crystal clock, this function calculated a guest
638 	 * TSC : ref cycles ratio of around 105 with ECX initialized
639 	 * to one billion.
640 	 */
641 	asm volatile("loop ." : "+c"((int){1000000000ull}));
642 
643 	t2 = fenced_rdtsc();
644 	stop_event(&cnt);
645 	t3 = fenced_rdtsc();
646 
647 	tsc_delta = ((t2 - t1) + (t3 - t0)) / 2;
648 
649 	if (!tsc_delta)
650 		return;
651 
652 	gp_events[2].min = (gp_events[2].min * cnt.count) / tsc_delta;
653 	gp_events[2].max = (gp_events[2].max * cnt.count) / tsc_delta;
654 }
655 
656 int main(int ac, char **av)
657 {
658 	struct cpuid id = cpuid(10);
659 
660 	setup_vm();
661 	handle_irq(PC_VECTOR, cnt_overflow);
662 	buf = malloc(N*64);
663 
664 	eax.full = id.a;
665 	ebx.full = id.b;
666 	edx.full = id.d;
667 
668 	if (!eax.split.version_id) {
669 		printf("No pmu is detected!\n");
670 		return report_summary();
671 	}
672 
673 	if (eax.split.version_id == 1) {
674 		printf("PMU version 1 is not supported\n");
675 		return report_summary();
676 	}
677 
678 	set_ref_cycle_expectations();
679 
680 	printf("PMU version:         %d\n", eax.split.version_id);
681 	printf("GP counters:         %d\n", eax.split.num_counters);
682 	printf("GP counter width:    %d\n", eax.split.bit_width);
683 	printf("Mask length:         %d\n", eax.split.mask_length);
684 	printf("Fixed counters:      %d\n", edx.split.num_counters_fixed);
685 	printf("Fixed counter width: %d\n", edx.split.bit_width_fixed);
686 
687 	num_counters = eax.split.num_counters;
688 
689 	apic_write(APIC_LVTPC, PC_VECTOR);
690 
691 	if (ac > 1 && !strcmp(av[1], "emulation")) {
692 		check_emulated_instr();
693 	} else {
694 		check_counters();
695 
696 		if (rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES) {
697 			gp_counter_base = MSR_IA32_PMC0;
698 			report_prefix_push("full-width writes");
699 			check_counters();
700 			check_gp_counters_write_width();
701 		}
702 	}
703 
704 	return report_summary();
705 }
706