xref: /kvm-unit-tests/x86/pmu_pebs.c (revision b36f35a82ff4cec5f71a68aa782332e2bc3488f7)
1 #include "x86/msr.h"
2 #include "x86/processor.h"
3 #include "x86/pmu.h"
4 #include "x86/isr.h"
5 #include "x86/apic.h"
6 #include "x86/apic-defs.h"
7 #include "x86/desc.h"
8 #include "alloc.h"
9 
10 #include "vm.h"
11 #include "types.h"
12 #include "processor.h"
13 #include "vmalloc.h"
14 #include "alloc_page.h"
15 
16 /* bits [63:48] provides the size of the current record in bytes */
17 #define	RECORD_SIZE_OFFSET	48
18 
19 static unsigned int max_nr_gp_events;
20 static unsigned long *ds_bufer;
21 static unsigned long *pebs_buffer;
22 static u64 ctr_start_val;
23 static bool has_baseline;
24 
25 struct debug_store {
26 	u64	bts_buffer_base;
27 	u64	bts_index;
28 	u64	bts_absolute_maximum;
29 	u64	bts_interrupt_threshold;
30 	u64	pebs_buffer_base;
31 	u64	pebs_index;
32 	u64	pebs_absolute_maximum;
33 	u64	pebs_interrupt_threshold;
34 	u64	pebs_event_reset[64];
35 };
36 
37 struct pebs_basic {
38 	u64 format_size;
39 	u64 ip;
40 	u64 applicable_counters;
41 	u64 tsc;
42 };
43 
44 struct pebs_meminfo {
45 	u64 address;
46 	u64 aux;
47 	u64 latency;
48 	u64 tsx_tuning;
49 };
50 
51 struct pebs_gprs {
52 	u64 flags, ip, ax, cx, dx, bx, sp, bp, si, di;
53 	u64 r8, r9, r10, r11, r12, r13, r14, r15;
54 };
55 
56 struct pebs_xmm {
57 	u64 xmm[16*2];	/* two entries for each register */
58 };
59 
60 struct lbr_entry {
61 	u64 from;
62 	u64 to;
63 	u64 info;
64 };
65 
66 enum pmc_type {
67 	GP = 0,
68 	FIXED,
69 };
70 
71 static uint32_t intel_arch_events[] = {
72 	0x00c4, /* PERF_COUNT_HW_BRANCH_INSTRUCTIONS */
73 	0x00c5, /* PERF_COUNT_HW_BRANCH_MISSES */
74 	0x0300, /* PERF_COUNT_HW_REF_CPU_CYCLES */
75 	0x003c, /* PERF_COUNT_HW_CPU_CYCLES */
76 	0x00c0, /* PERF_COUNT_HW_INSTRUCTIONS */
77 	0x013c, /* PERF_COUNT_HW_BUS_CYCLES */
78 	0x4f2e, /* PERF_COUNT_HW_CACHE_REFERENCES */
79 	0x412e, /* PERF_COUNT_HW_CACHE_MISSES */
80 };
81 
82 static u64 pebs_data_cfgs[] = {
83 	PEBS_DATACFG_MEMINFO,
84 	PEBS_DATACFG_GP,
85 	PEBS_DATACFG_XMMS,
86 	PEBS_DATACFG_LBRS | ((MAX_NUM_LBR_ENTRY -1) << PEBS_DATACFG_LBR_SHIFT),
87 };
88 
89 /* Iterating each counter value is a waste of time, pick a few typical values. */
90 static u64 counter_start_values[] = {
91 	/* if PEBS counter doesn't overflow at all */
92 	0,
93 	0xfffffffffff0,
94 	/* normal counter overflow to have PEBS records */
95 	0xfffffffffffe,
96 	/* test whether emulated instructions should trigger PEBS */
97 	0xffffffffffff,
98 };
99 
100 static unsigned int get_adaptive_pebs_record_size(u64 pebs_data_cfg)
101 {
102 	unsigned int sz = sizeof(struct pebs_basic);
103 
104 	if (!has_baseline)
105 		return sz;
106 
107 	if (pebs_data_cfg & PEBS_DATACFG_MEMINFO)
108 		sz += sizeof(struct pebs_meminfo);
109 	if (pebs_data_cfg & PEBS_DATACFG_GP)
110 		sz += sizeof(struct pebs_gprs);
111 	if (pebs_data_cfg & PEBS_DATACFG_XMMS)
112 		sz += sizeof(struct pebs_xmm);
113 	if (pebs_data_cfg & PEBS_DATACFG_LBRS)
114 		sz += MAX_NUM_LBR_ENTRY * sizeof(struct lbr_entry);
115 
116 	return sz;
117 }
118 
119 static void cnt_overflow(isr_regs_t *regs)
120 {
121 	apic_write(APIC_EOI, 0);
122 }
123 
124 static inline void workload(void)
125 {
126 	asm volatile(
127 		"mov $0x0, %%eax\n"
128 		"cmp $0x0, %%eax\n"
129 		"jne label2\n"
130 		"jne label2\n"
131 		"jne label2\n"
132 		"jne label2\n"
133 		"mov $0x0, %%eax\n"
134 		"cmp $0x0, %%eax\n"
135 		"jne label2\n"
136 		"jne label2\n"
137 		"jne label2\n"
138 		"jne label2\n"
139 		"mov $0xa, %%eax\n"
140 		"cpuid\n"
141 		"mov $0xa, %%eax\n"
142 		"cpuid\n"
143 		"mov $0xa, %%eax\n"
144 		"cpuid\n"
145 		"mov $0xa, %%eax\n"
146 		"cpuid\n"
147 		"mov $0xa, %%eax\n"
148 		"cpuid\n"
149 		"mov $0xa, %%eax\n"
150 		"cpuid\n"
151 		"label2:\n"
152 		:
153 		:
154 		: "eax", "ebx", "ecx", "edx");
155 }
156 
157 static inline void workload2(void)
158 {
159 	asm volatile(
160 		"mov $0x0, %%eax\n"
161 		"cmp $0x0, %%eax\n"
162 		"jne label3\n"
163 		"jne label3\n"
164 		"jne label3\n"
165 		"jne label3\n"
166 		"mov $0x0, %%eax\n"
167 		"cmp $0x0, %%eax\n"
168 		"jne label3\n"
169 		"jne label3\n"
170 		"jne label3\n"
171 		"jne label3\n"
172 		"mov $0xa, %%eax\n"
173 		"cpuid\n"
174 		"mov $0xa, %%eax\n"
175 		"cpuid\n"
176 		"mov $0xa, %%eax\n"
177 		"cpuid\n"
178 		"mov $0xa, %%eax\n"
179 		"cpuid\n"
180 		"mov $0xa, %%eax\n"
181 		"cpuid\n"
182 		"mov $0xa, %%eax\n"
183 		"cpuid\n"
184 		"label3:\n"
185 		:
186 		:
187 		: "eax", "ebx", "ecx", "edx");
188 }
189 
190 static void alloc_buffers(void)
191 {
192 	ds_bufer = alloc_page();
193 	force_4k_page(ds_bufer);
194 	memset(ds_bufer, 0x0, PAGE_SIZE);
195 
196 	pebs_buffer = alloc_page();
197 	force_4k_page(pebs_buffer);
198 	memset(pebs_buffer, 0x0, PAGE_SIZE);
199 }
200 
201 static void free_buffers(void)
202 {
203 	if (ds_bufer)
204 		free_page(ds_bufer);
205 
206 	if (pebs_buffer)
207 		free_page(pebs_buffer);
208 }
209 
210 static void pebs_enable(u64 bitmask, u64 pebs_data_cfg)
211 {
212 	static struct debug_store *ds;
213 	u64 baseline_extra_ctrl = 0, fixed_ctr_ctrl = 0;
214 	unsigned int idx;
215 
216 	if (has_baseline)
217 		wrmsr(MSR_PEBS_DATA_CFG, pebs_data_cfg);
218 
219 	ds = (struct debug_store *)ds_bufer;
220 	ds->pebs_index = ds->pebs_buffer_base = (unsigned long)pebs_buffer;
221 	ds->pebs_absolute_maximum = (unsigned long)pebs_buffer + PAGE_SIZE;
222 	ds->pebs_interrupt_threshold = ds->pebs_buffer_base +
223 		get_adaptive_pebs_record_size(pebs_data_cfg);
224 
225 	for (idx = 0; idx < pmu.nr_fixed_counters; idx++) {
226 		if (!(BIT_ULL(FIXED_CNT_INDEX + idx) & bitmask))
227 			continue;
228 		if (has_baseline)
229 			baseline_extra_ctrl = BIT(FIXED_CNT_INDEX + idx * 4);
230 		wrmsr(MSR_PERF_FIXED_CTRx(idx), ctr_start_val);
231 		fixed_ctr_ctrl |= (0xbULL << (idx * 4) | baseline_extra_ctrl);
232 	}
233 	if (fixed_ctr_ctrl)
234 		wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, fixed_ctr_ctrl);
235 
236 	for (idx = 0; idx < max_nr_gp_events; idx++) {
237 		if (!(BIT_ULL(idx) & bitmask))
238 			continue;
239 		if (has_baseline)
240 			baseline_extra_ctrl = ICL_EVENTSEL_ADAPTIVE;
241 		wrmsr(MSR_GP_EVENT_SELECTx(idx), EVNTSEL_EN | EVNTSEL_OS | EVNTSEL_USR |
242 						 intel_arch_events[idx] | baseline_extra_ctrl);
243 		wrmsr(MSR_GP_COUNTERx(idx), ctr_start_val);
244 	}
245 
246 	wrmsr(MSR_IA32_DS_AREA,  (unsigned long)ds_bufer);
247 	wrmsr(MSR_IA32_PEBS_ENABLE, bitmask);
248 	wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, bitmask);
249 }
250 
251 static void reset_pebs(void)
252 {
253 	memset(ds_bufer, 0x0, PAGE_SIZE);
254 	memset(pebs_buffer, 0x0, PAGE_SIZE);
255 	wrmsr(MSR_IA32_PEBS_ENABLE, 0);
256 	wrmsr(MSR_IA32_DS_AREA,  0);
257 	if (has_baseline)
258 		wrmsr(MSR_PEBS_DATA_CFG, 0);
259 
260 	wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
261 	wrmsr(MSR_CORE_PERF_GLOBAL_OVF_CTRL, rdmsr(MSR_CORE_PERF_GLOBAL_STATUS));
262 
263 	pmu_reset_all_counters();
264 }
265 
266 static void pebs_disable(unsigned int idx)
267 {
268 	/*
269 	* If we only clear the PEBS_ENABLE bit, the counter will continue to increment.
270 	* In this very tiny time window, if the counter overflows no pebs record will be generated,
271 	* but a normal counter irq. Test this fully with two ways.
272 	*/
273 	if (idx % 2)
274 		wrmsr(MSR_IA32_PEBS_ENABLE, 0);
275 
276 	wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
277 }
278 
279 static void check_pebs_records(u64 bitmask, u64 pebs_data_cfg)
280 {
281 	struct pebs_basic *pebs_rec = (struct pebs_basic *)pebs_buffer;
282 	struct debug_store *ds = (struct debug_store *)ds_bufer;
283 	unsigned int pebs_record_size = get_adaptive_pebs_record_size(pebs_data_cfg);
284 	unsigned int count = 0;
285 	bool expected, pebs_idx_match, pebs_size_match, data_cfg_match;
286 	void *cur_record;
287 
288 	expected = (ds->pebs_index == ds->pebs_buffer_base) && !pebs_rec->format_size;
289 	if (!(rdmsr(MSR_CORE_PERF_GLOBAL_STATUS) & GLOBAL_STATUS_BUFFER_OVF)) {
290 		report(expected, "No OVF irq, none PEBS records.");
291 		return;
292 	}
293 
294 	if (expected) {
295 		report(!expected, "A OVF irq, but none PEBS records.");
296 		return;
297 	}
298 
299 	expected = ds->pebs_index >= ds->pebs_interrupt_threshold;
300 	cur_record = (void *)pebs_buffer;
301 	do {
302 		pebs_rec = (struct pebs_basic *)cur_record;
303 		pebs_record_size = pebs_rec->format_size >> RECORD_SIZE_OFFSET;
304 		pebs_idx_match =
305 			pebs_rec->applicable_counters & bitmask;
306 		pebs_size_match =
307 			pebs_record_size == get_adaptive_pebs_record_size(pebs_data_cfg);
308 		data_cfg_match =
309 			(pebs_rec->format_size & GENMASK_ULL(47, 0)) == pebs_data_cfg;
310 		expected = pebs_idx_match && pebs_size_match && data_cfg_match;
311 		report(expected,
312 		       "PEBS record (written seq %d) is verified (inclduing size, counters and cfg).", count);
313 		cur_record = cur_record + pebs_record_size;
314 		count++;
315 	} while (expected && (void *)cur_record < (void *)ds->pebs_index);
316 
317 	if (!expected) {
318 		if (!pebs_idx_match)
319 			printf("FAIL: The applicable_counters (0x%lx) doesn't match with pmc_bitmask (0x%lx).\n",
320 			       pebs_rec->applicable_counters, bitmask);
321 		if (!pebs_size_match)
322 			printf("FAIL: The pebs_record_size (%d) doesn't match with MSR_PEBS_DATA_CFG (%d).\n",
323 			       pebs_record_size, get_adaptive_pebs_record_size(pebs_data_cfg));
324 		if (!data_cfg_match)
325 			printf("FAIL: The pebs_data_cfg (0x%lx) doesn't match with MSR_PEBS_DATA_CFG (0x%lx).\n",
326 			       pebs_rec->format_size & 0xffffffffffff, pebs_data_cfg);
327 	}
328 }
329 
330 static void check_one_counter(enum pmc_type type,
331 			      unsigned int idx, u64 pebs_data_cfg)
332 {
333 	int pebs_bit = BIT_ULL(type == FIXED ? FIXED_CNT_INDEX + idx : idx);
334 
335 	report_prefix_pushf("%s counter %d (0x%lx)",
336 			    type == FIXED ? "Extended Fixed" : "GP", idx, ctr_start_val);
337 	reset_pebs();
338 	pebs_enable(pebs_bit, pebs_data_cfg);
339 	workload();
340 	pebs_disable(idx);
341 	check_pebs_records(pebs_bit, pebs_data_cfg);
342 	report_prefix_pop();
343 }
344 
345 /* more than one PEBS records will be generated. */
346 static void check_multiple_counters(u64 bitmask, u64 pebs_data_cfg)
347 {
348 	reset_pebs();
349 	pebs_enable(bitmask, pebs_data_cfg);
350 	workload2();
351 	pebs_disable(0);
352 	check_pebs_records(bitmask, pebs_data_cfg);
353 }
354 
355 static void check_pebs_counters(u64 pebs_data_cfg)
356 {
357 	unsigned int idx;
358 	u64 bitmask = 0;
359 
360 	for (idx = 0; idx < pmu.nr_fixed_counters; idx++)
361 		check_one_counter(FIXED, idx, pebs_data_cfg);
362 
363 	for (idx = 0; idx < max_nr_gp_events; idx++)
364 		check_one_counter(GP, idx, pebs_data_cfg);
365 
366 	for (idx = 0; idx < pmu.nr_fixed_counters; idx++)
367 		bitmask |= BIT_ULL(FIXED_CNT_INDEX + idx);
368 	for (idx = 0; idx < max_nr_gp_events; idx += 2)
369 		bitmask |= BIT_ULL(idx);
370 	report_prefix_pushf("Multiple (0x%lx)", bitmask);
371 	check_multiple_counters(bitmask, pebs_data_cfg);
372 	report_prefix_pop();
373 }
374 
375 /*
376  * Known reasons for none PEBS records:
377  *	1. The selected event does not support PEBS;
378  *	2. From a core pmu perspective, the vCPU and pCPU models are not same;
379  * 	3. Guest counter has not yet overflowed or been cross-mapped by the host;
380  */
381 int main(int ac, char **av)
382 {
383 	unsigned int i, j;
384 
385 	setup_vm();
386 
387 	max_nr_gp_events = MIN(pmu.nr_gp_counters, ARRAY_SIZE(intel_arch_events));
388 
389 	printf("PMU version: %d\n", pmu.version);
390 
391 	has_baseline = pmu_has_pebs_baseline();
392 	if (pmu_has_full_writes())
393 		pmu_activate_full_writes();
394 
395 	if (!pmu.is_intel) {
396 		report_skip("PEBS requires Intel ICX or later, non-Intel detected");
397 		return report_summary();
398 	} else if (!pmu_has_pebs()) {
399 		report_skip("PEBS required PMU version 2, reported version is %d", pmu.version);
400 		return report_summary();
401 	} else if (!pmu_pebs_format()) {
402 		report_skip("PEBS not enumerated in PERF_CAPABILITIES");
403 		return report_summary();
404 	} else if (rdmsr(MSR_IA32_MISC_ENABLE) & MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL) {
405 		report_skip("PEBS unavailable according to MISC_ENABLE");
406 		return report_summary();
407 	}
408 
409 	printf("PEBS format: %d\n", pmu_pebs_format());
410 	printf("PEBS GP counters: %d\n", pmu.nr_gp_counters);
411 	printf("PEBS Fixed counters: %d\n", pmu.nr_fixed_counters);
412 	printf("PEBS baseline (Adaptive PEBS): %d\n", has_baseline);
413 
414 	handle_irq(PMI_VECTOR, cnt_overflow);
415 	alloc_buffers();
416 
417 	for (i = 0; i < ARRAY_SIZE(counter_start_values); i++) {
418 		ctr_start_val = counter_start_values[i];
419 		check_pebs_counters(0);
420 		if (!has_baseline)
421 			continue;
422 
423 		for (j = 0; j < ARRAY_SIZE(pebs_data_cfgs); j++) {
424 			report_prefix_pushf("Adaptive (0x%lx)", pebs_data_cfgs[j]);
425 			check_pebs_counters(pebs_data_cfgs[j]);
426 			report_prefix_pop();
427 		}
428 	}
429 
430 	free_buffers();
431 
432 	return report_summary();
433 }
434