xref: /kvm-unit-tests/x86/pmu_pebs.c (revision dca3f4c041143c8e8dc70c6890a19a5730310230)
1 #include "x86/msr.h"
2 #include "x86/processor.h"
3 #include "x86/pmu.h"
4 #include "x86/isr.h"
5 #include "x86/apic.h"
6 #include "x86/apic-defs.h"
7 #include "x86/desc.h"
8 #include "alloc.h"
9 
10 #include "vm.h"
11 #include "processor.h"
12 #include "vmalloc.h"
13 #include "alloc_page.h"
14 
15 /* bits [63:48] provides the size of the current record in bytes */
16 #define	RECORD_SIZE_OFFSET	48
17 
18 static unsigned int max_nr_gp_events;
19 static unsigned long *ds_bufer;
20 static unsigned long *pebs_buffer;
21 static u64 ctr_start_val;
22 static bool has_baseline;
23 
24 struct debug_store {
25 	u64	bts_buffer_base;
26 	u64	bts_index;
27 	u64	bts_absolute_maximum;
28 	u64	bts_interrupt_threshold;
29 	u64	pebs_buffer_base;
30 	u64	pebs_index;
31 	u64	pebs_absolute_maximum;
32 	u64	pebs_interrupt_threshold;
33 	u64	pebs_event_reset[64];
34 };
35 
36 struct pebs_basic {
37 	u64 format_size;
38 	u64 ip;
39 	u64 applicable_counters;
40 	u64 tsc;
41 };
42 
43 struct pebs_meminfo {
44 	u64 address;
45 	u64 aux;
46 	u64 latency;
47 	u64 tsx_tuning;
48 };
49 
50 struct pebs_gprs {
51 	u64 flags, ip, ax, cx, dx, bx, sp, bp, si, di;
52 	u64 r8, r9, r10, r11, r12, r13, r14, r15;
53 };
54 
55 struct pebs_xmm {
56 	u64 xmm[16*2];	/* two entries for each register */
57 };
58 
59 struct lbr_entry {
60 	u64 from;
61 	u64 to;
62 	u64 info;
63 };
64 
65 enum pmc_type {
66 	GP = 0,
67 	FIXED,
68 };
69 
70 static uint32_t intel_arch_events[] = {
71 	0x00c4, /* PERF_COUNT_HW_BRANCH_INSTRUCTIONS */
72 	0x00c5, /* PERF_COUNT_HW_BRANCH_MISSES */
73 	0x0300, /* PERF_COUNT_HW_REF_CPU_CYCLES */
74 	0x003c, /* PERF_COUNT_HW_CPU_CYCLES */
75 	0x00c0, /* PERF_COUNT_HW_INSTRUCTIONS */
76 	0x013c, /* PERF_COUNT_HW_BUS_CYCLES */
77 	0x4f2e, /* PERF_COUNT_HW_CACHE_REFERENCES */
78 	0x412e, /* PERF_COUNT_HW_CACHE_MISSES */
79 };
80 
81 /* Iterating each counter value is a waste of time, pick a few typical values. */
82 static u64 counter_start_values[] = {
83 	/* if PEBS counter doesn't overflow at all */
84 	0,
85 	0xfffffffffff0,
86 	/* normal counter overflow to have PEBS records */
87 	0xfffffffffffe,
88 	/* test whether emulated instructions should trigger PEBS */
89 	0xffffffffffff,
90 };
91 
get_pebs_record_size(u64 pebs_data_cfg,bool use_adaptive)92 static unsigned int get_pebs_record_size(u64 pebs_data_cfg, bool use_adaptive)
93 {
94 	unsigned int sz = sizeof(struct pebs_basic);
95 
96 	if (!use_adaptive)
97 		return sz;
98 
99 	if (pebs_data_cfg & PEBS_DATACFG_MEMINFO)
100 		sz += sizeof(struct pebs_meminfo);
101 	if (pebs_data_cfg & PEBS_DATACFG_GPRS)
102 		sz += sizeof(struct pebs_gprs);
103 	if (pebs_data_cfg & PEBS_DATACFG_XMMS)
104 		sz += sizeof(struct pebs_xmm);
105 	if (pebs_data_cfg & PEBS_DATACFG_LBRS)
106 		sz += MAX_NUM_LBR_ENTRY * sizeof(struct lbr_entry);
107 
108 	return sz;
109 }
110 
cnt_overflow(isr_regs_t * regs)111 static void cnt_overflow(isr_regs_t *regs)
112 {
113 	apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
114 	apic_write(APIC_EOI, 0);
115 }
116 
workload(void)117 static inline void workload(void)
118 {
119 	asm volatile(
120 		"mov $0x0, %%eax\n"
121 		"cmp $0x0, %%eax\n"
122 		"jne label2\n"
123 		"jne label2\n"
124 		"jne label2\n"
125 		"jne label2\n"
126 		"mov $0x0, %%eax\n"
127 		"cmp $0x0, %%eax\n"
128 		"jne label2\n"
129 		"jne label2\n"
130 		"jne label2\n"
131 		"jne label2\n"
132 		"mov $0xa, %%eax\n"
133 		"cpuid\n"
134 		"mov $0xa, %%eax\n"
135 		"cpuid\n"
136 		"mov $0xa, %%eax\n"
137 		"cpuid\n"
138 		"mov $0xa, %%eax\n"
139 		"cpuid\n"
140 		"mov $0xa, %%eax\n"
141 		"cpuid\n"
142 		"mov $0xa, %%eax\n"
143 		"cpuid\n"
144 		"label2:\n"
145 		:
146 		:
147 		: "eax", "ebx", "ecx", "edx");
148 }
149 
workload2(void)150 static inline void workload2(void)
151 {
152 	asm volatile(
153 		"mov $0x0, %%eax\n"
154 		"cmp $0x0, %%eax\n"
155 		"jne label3\n"
156 		"jne label3\n"
157 		"jne label3\n"
158 		"jne label3\n"
159 		"mov $0x0, %%eax\n"
160 		"cmp $0x0, %%eax\n"
161 		"jne label3\n"
162 		"jne label3\n"
163 		"jne label3\n"
164 		"jne label3\n"
165 		"mov $0xa, %%eax\n"
166 		"cpuid\n"
167 		"mov $0xa, %%eax\n"
168 		"cpuid\n"
169 		"mov $0xa, %%eax\n"
170 		"cpuid\n"
171 		"mov $0xa, %%eax\n"
172 		"cpuid\n"
173 		"mov $0xa, %%eax\n"
174 		"cpuid\n"
175 		"mov $0xa, %%eax\n"
176 		"cpuid\n"
177 		"label3:\n"
178 		:
179 		:
180 		: "eax", "ebx", "ecx", "edx");
181 }
182 
alloc_buffers(void)183 static void alloc_buffers(void)
184 {
185 	ds_bufer = alloc_page();
186 	force_4k_page(ds_bufer);
187 	memset(ds_bufer, 0x0, PAGE_SIZE);
188 
189 	pebs_buffer = alloc_page();
190 	force_4k_page(pebs_buffer);
191 	memset(pebs_buffer, 0x0, PAGE_SIZE);
192 }
193 
free_buffers(void)194 static void free_buffers(void)
195 {
196 	if (ds_bufer)
197 		free_page(ds_bufer);
198 
199 	if (pebs_buffer)
200 		free_page(pebs_buffer);
201 }
202 
pebs_enable(u64 bitmask,u64 pebs_data_cfg,bool use_adaptive)203 static void pebs_enable(u64 bitmask, u64 pebs_data_cfg, bool use_adaptive)
204 {
205 	static struct debug_store *ds;
206 	u64 adaptive_ctrl = 0, fixed_ctr_ctrl = 0;
207 	unsigned int idx;
208 
209 	if (has_baseline)
210 		wrmsr(MSR_PEBS_DATA_CFG, pebs_data_cfg);
211 
212 	ds = (struct debug_store *)ds_bufer;
213 	ds->pebs_index = ds->pebs_buffer_base = (unsigned long)pebs_buffer;
214 	ds->pebs_absolute_maximum = (unsigned long)pebs_buffer + PAGE_SIZE;
215 	ds->pebs_interrupt_threshold = ds->pebs_buffer_base +
216 		get_pebs_record_size(pebs_data_cfg, use_adaptive);
217 
218 	for (idx = 0; idx < pmu.nr_fixed_counters; idx++) {
219 		if (!(BIT_ULL(FIXED_CNT_INDEX + idx) & bitmask))
220 			continue;
221 		if (use_adaptive)
222 			adaptive_ctrl = BIT(FIXED_CNT_INDEX + idx * 4);
223 		wrmsr(MSR_PERF_FIXED_CTRx(idx), ctr_start_val);
224 		fixed_ctr_ctrl |= (0xbULL << (idx * 4) | adaptive_ctrl);
225 	}
226 	if (fixed_ctr_ctrl)
227 		wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, fixed_ctr_ctrl);
228 
229 	for (idx = 0; idx < max_nr_gp_events; idx++) {
230 		if (!(BIT_ULL(idx) & bitmask))
231 			continue;
232 		if (use_adaptive)
233 			adaptive_ctrl = ICL_EVENTSEL_ADAPTIVE;
234 		wrmsr(MSR_GP_EVENT_SELECTx(idx), EVNTSEL_EN | EVNTSEL_OS | EVNTSEL_USR |
235 						 intel_arch_events[idx] | adaptive_ctrl);
236 		wrmsr(MSR_GP_COUNTERx(idx), ctr_start_val);
237 	}
238 
239 	wrmsr(MSR_IA32_DS_AREA,  (unsigned long)ds_bufer);
240 	wrmsr(MSR_IA32_PEBS_ENABLE, bitmask);
241 	wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, bitmask);
242 }
243 
reset_pebs(void)244 static void reset_pebs(void)
245 {
246 	memset(ds_bufer, 0x0, PAGE_SIZE);
247 	memset(pebs_buffer, 0x0, PAGE_SIZE);
248 	wrmsr(MSR_IA32_PEBS_ENABLE, 0);
249 	wrmsr(MSR_IA32_DS_AREA,  0);
250 	if (has_baseline)
251 		wrmsr(MSR_PEBS_DATA_CFG, 0);
252 
253 	wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
254 	wrmsr(MSR_CORE_PERF_GLOBAL_OVF_CTRL, rdmsr(MSR_CORE_PERF_GLOBAL_STATUS));
255 
256 	pmu_reset_all_counters();
257 }
258 
pebs_disable(unsigned int idx)259 static void pebs_disable(unsigned int idx)
260 {
261 	/*
262 	* If we only clear the PEBS_ENABLE bit, the counter will continue to increment.
263 	* In this very tiny time window, if the counter overflows no pebs record will be generated,
264 	* but a normal counter irq. Test this fully with two ways.
265 	*/
266 	if (idx % 2)
267 		wrmsr(MSR_IA32_PEBS_ENABLE, 0);
268 
269 	wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
270 }
271 
check_pebs_records(u64 bitmask,u64 pebs_data_cfg,bool use_adaptive)272 static void check_pebs_records(u64 bitmask, u64 pebs_data_cfg, bool use_adaptive)
273 {
274 	struct pebs_basic *pebs_rec = (struct pebs_basic *)pebs_buffer;
275 	struct debug_store *ds = (struct debug_store *)ds_bufer;
276 	unsigned int pebs_record_size;
277 	unsigned int count = 0;
278 	bool expected, pebs_idx_match, pebs_size_match, data_cfg_match;
279 	void *cur_record;
280 
281 	expected = (ds->pebs_index == ds->pebs_buffer_base) && !pebs_rec->format_size;
282 	if (!(rdmsr(MSR_CORE_PERF_GLOBAL_STATUS) & GLOBAL_STATUS_BUFFER_OVF)) {
283 		report(expected, "No OVF irq, none PEBS records.");
284 		return;
285 	}
286 
287 	if (expected) {
288 		report(!expected, "A OVF irq, but none PEBS records.");
289 		return;
290 	}
291 
292 	expected = ds->pebs_index >= ds->pebs_interrupt_threshold;
293 	cur_record = (void *)pebs_buffer;
294 	do {
295 		pebs_rec = (struct pebs_basic *)cur_record;
296 		pebs_record_size = pebs_rec->format_size >> RECORD_SIZE_OFFSET;
297 		pebs_idx_match = pebs_rec->applicable_counters & bitmask;
298 		pebs_size_match = pebs_record_size == get_pebs_record_size(pebs_data_cfg, use_adaptive);
299 		data_cfg_match = (pebs_rec->format_size & GENMASK_ULL(47, 0)) == pebs_data_cfg;
300 		data_cfg_match = (pebs_rec->format_size & GENMASK_ULL(47, 0)) ==
301 				 (use_adaptive ? pebs_data_cfg : 0);
302 		expected = pebs_idx_match && pebs_size_match && data_cfg_match;
303 		report(expected,
304 		       "PEBS record (written seq %d) is verified (including size, counters and cfg).", count);
305 		if (use_adaptive && (pebs_data_cfg & PEBS_DATACFG_LBRS)) {
306 			unsigned int lbrs_offset = get_pebs_record_size(pebs_data_cfg & ~PEBS_DATACFG_LBRS, true);
307 			struct lbr_entry *pebs_lbrs = cur_record + lbrs_offset;
308 			int i;
309 
310 			for (i = 0; i < MAX_NUM_LBR_ENTRY; i++) {
311 				if (!pebs_lbrs[i].from && !pebs_lbrs[i].to)
312 					continue;
313 
314 				report_fail("PEBS LBR record %u isn't empty, got from = '%lx', to = '%lx', info = '%lx'",
315 					    i, pebs_lbrs[i].from, pebs_lbrs[i].to, pebs_lbrs[i].info);
316 			}
317 		}
318 		cur_record = cur_record + pebs_record_size;
319 		count++;
320 	} while (expected && (void *)cur_record < (void *)ds->pebs_index);
321 
322 	if (!expected) {
323 		if (!pebs_idx_match)
324 			printf("FAIL: The applicable_counters (0x%lx) doesn't match with pmc_bitmask (0x%lx).\n",
325 			       pebs_rec->applicable_counters, bitmask);
326 		if (!pebs_size_match)
327 			printf("FAIL: The pebs_record_size (%d) doesn't match with expected record size (%d).\n",
328 			       pebs_record_size, get_pebs_record_size(pebs_data_cfg, use_adaptive));
329 		if (!data_cfg_match)
330 			printf("FAIL: The pebs_data_cfg (0x%lx) doesn't match with the effective MSR_PEBS_DATA_CFG (0x%lx).\n",
331 			       pebs_rec->format_size & 0xffffffffffff, use_adaptive ? pebs_data_cfg : 0);
332 	}
333 }
334 
check_one_counter(enum pmc_type type,unsigned int idx,u64 pebs_data_cfg,bool use_adaptive)335 static void check_one_counter(enum pmc_type type, unsigned int idx,
336 			      u64 pebs_data_cfg, bool use_adaptive)
337 {
338 	int pebs_bit = BIT_ULL(type == FIXED ? FIXED_CNT_INDEX + idx : idx);
339 
340 	report_prefix_pushf("%s counter %d (0x%lx)",
341 			    type == FIXED ? "Extended Fixed" : "GP", idx, ctr_start_val);
342 	reset_pebs();
343 	pebs_enable(pebs_bit, pebs_data_cfg, use_adaptive);
344 	workload();
345 	pebs_disable(idx);
346 	check_pebs_records(pebs_bit, pebs_data_cfg, use_adaptive);
347 	report_prefix_pop();
348 }
349 
350 /* more than one PEBS records will be generated. */
check_multiple_counters(u64 bitmask,u64 pebs_data_cfg,bool use_adaptive)351 static void check_multiple_counters(u64 bitmask, u64 pebs_data_cfg,
352 				    bool use_adaptive)
353 {
354 	reset_pebs();
355 	pebs_enable(bitmask, pebs_data_cfg, use_adaptive);
356 	workload2();
357 	pebs_disable(0);
358 	check_pebs_records(bitmask, pebs_data_cfg, use_adaptive);
359 }
360 
check_pebs_counters(u64 pebs_data_cfg,bool use_adaptive)361 static void check_pebs_counters(u64 pebs_data_cfg, bool use_adaptive)
362 {
363 	unsigned int idx;
364 	u64 bitmask = 0;
365 
366 	for (idx = 0; has_baseline && idx < pmu.nr_fixed_counters; idx++)
367 		check_one_counter(FIXED, idx, pebs_data_cfg, use_adaptive);
368 
369 	for (idx = 0; idx < max_nr_gp_events; idx++)
370 		check_one_counter(GP, idx, pebs_data_cfg, use_adaptive);
371 
372 	for (idx = 0; has_baseline && idx < pmu.nr_fixed_counters; idx++)
373 		bitmask |= BIT_ULL(FIXED_CNT_INDEX + idx);
374 	for (idx = 0; idx < max_nr_gp_events; idx += 2)
375 		bitmask |= BIT_ULL(idx);
376 	report_prefix_pushf("Multiple (0x%lx)", bitmask);
377 	check_multiple_counters(bitmask, pebs_data_cfg, use_adaptive);
378 	report_prefix_pop();
379 }
380 
381 /*
382  * Known reasons for none PEBS records:
383  *	1. The selected event does not support PEBS;
384  *	2. From a core pmu perspective, the vCPU and pCPU models are not same;
385  * 	3. Guest counter has not yet overflowed or been cross-mapped by the host;
386  */
main(int ac,char ** av)387 int main(int ac, char **av)
388 {
389 	unsigned int i, j;
390 
391 	setup_vm();
392 
393 	max_nr_gp_events = MIN(pmu.nr_gp_counters, ARRAY_SIZE(intel_arch_events));
394 
395 	printf("PMU version: %d\n", pmu.version);
396 
397 	has_baseline = pmu_has_pebs_baseline();
398 	if (pmu_has_full_writes())
399 		pmu_activate_full_writes();
400 
401 	if (!pmu.is_intel) {
402 		report_skip("PEBS requires Intel ICX or later, non-Intel detected");
403 		return report_summary();
404 	} else if (!pmu_has_pebs()) {
405 		report_skip("PEBS required PMU version 2, reported version is %d", pmu.version);
406 		return report_summary();
407 	} else if (pmu_pebs_format() < 4) {
408 		report_skip("This test supports PEBS_Record_Format >= 4 only");
409 		return report_summary();
410 	} else if (rdmsr(MSR_IA32_MISC_ENABLE) & MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL) {
411 		report_skip("PEBS unavailable according to MISC_ENABLE");
412 		return report_summary();
413 	}
414 
415 	printf("PEBS format: %d\n", pmu_pebs_format());
416 	printf("PEBS GP counters: %d\n", pmu.nr_gp_counters);
417 	printf("PEBS Fixed counters: %d\n", pmu.nr_fixed_counters);
418 	printf("PEBS baseline (Adaptive PEBS): %d\n", has_baseline);
419 
420 	handle_irq(PMI_VECTOR, cnt_overflow);
421 	alloc_buffers();
422 
423 	for (i = 0; i < ARRAY_SIZE(counter_start_values); i++) {
424 		ctr_start_val = counter_start_values[i];
425 		check_pebs_counters(0, false);
426 		if (!has_baseline)
427 			continue;
428 
429 		for (j = 0; j <= PEBS_DATACFG_MASK; j++) {
430 			u64 pebs_data_cfg = j;
431 
432 			if (pebs_data_cfg & PEBS_DATACFG_LBRS)
433 				pebs_data_cfg |= ((MAX_NUM_LBR_ENTRY -1) << PEBS_DATACFG_LBR_SHIFT);
434 
435 			report_prefix_pushf("Adaptive (0x%lx)", pebs_data_cfg);
436 			check_pebs_counters(pebs_data_cfg, true);
437 			report_prefix_pop();
438 
439 			report_prefix_pushf("Ignored Adaptive (0x%lx)", pebs_data_cfg);
440 			check_pebs_counters(pebs_data_cfg, false);
441 			report_prefix_pop();
442 		}
443 	}
444 
445 	free_buffers();
446 
447 	return report_summary();
448 }
449