1 #include "x86/msr.h"
2 #include "x86/processor.h"
3 #include "x86/pmu.h"
4 #include "x86/isr.h"
5 #include "x86/apic.h"
6 #include "x86/apic-defs.h"
7 #include "x86/desc.h"
8 #include "alloc.h"
9
10 #include "vm.h"
11 #include "processor.h"
12 #include "vmalloc.h"
13 #include "alloc_page.h"
14
15 /* bits [63:48] provides the size of the current record in bytes */
16 #define RECORD_SIZE_OFFSET 48
17
18 static unsigned int max_nr_gp_events;
19 static unsigned long *ds_bufer;
20 static unsigned long *pebs_buffer;
21 static u64 ctr_start_val;
22 static bool has_baseline;
23
24 struct debug_store {
25 u64 bts_buffer_base;
26 u64 bts_index;
27 u64 bts_absolute_maximum;
28 u64 bts_interrupt_threshold;
29 u64 pebs_buffer_base;
30 u64 pebs_index;
31 u64 pebs_absolute_maximum;
32 u64 pebs_interrupt_threshold;
33 u64 pebs_event_reset[64];
34 };
35
36 struct pebs_basic {
37 u64 format_size;
38 u64 ip;
39 u64 applicable_counters;
40 u64 tsc;
41 };
42
43 struct pebs_meminfo {
44 u64 address;
45 u64 aux;
46 u64 latency;
47 u64 tsx_tuning;
48 };
49
50 struct pebs_gprs {
51 u64 flags, ip, ax, cx, dx, bx, sp, bp, si, di;
52 u64 r8, r9, r10, r11, r12, r13, r14, r15;
53 };
54
55 struct pebs_xmm {
56 u64 xmm[16*2]; /* two entries for each register */
57 };
58
59 struct lbr_entry {
60 u64 from;
61 u64 to;
62 u64 info;
63 };
64
65 enum pmc_type {
66 GP = 0,
67 FIXED,
68 };
69
70 static uint32_t intel_arch_events[] = {
71 0x00c4, /* PERF_COUNT_HW_BRANCH_INSTRUCTIONS */
72 0x00c5, /* PERF_COUNT_HW_BRANCH_MISSES */
73 0x0300, /* PERF_COUNT_HW_REF_CPU_CYCLES */
74 0x003c, /* PERF_COUNT_HW_CPU_CYCLES */
75 0x00c0, /* PERF_COUNT_HW_INSTRUCTIONS */
76 0x013c, /* PERF_COUNT_HW_BUS_CYCLES */
77 0x4f2e, /* PERF_COUNT_HW_CACHE_REFERENCES */
78 0x412e, /* PERF_COUNT_HW_CACHE_MISSES */
79 };
80
81 /* Iterating each counter value is a waste of time, pick a few typical values. */
82 static u64 counter_start_values[] = {
83 /* if PEBS counter doesn't overflow at all */
84 0,
85 0xfffffffffff0,
86 /* normal counter overflow to have PEBS records */
87 0xfffffffffffe,
88 /* test whether emulated instructions should trigger PEBS */
89 0xffffffffffff,
90 };
91
get_pebs_record_size(u64 pebs_data_cfg,bool use_adaptive)92 static unsigned int get_pebs_record_size(u64 pebs_data_cfg, bool use_adaptive)
93 {
94 unsigned int sz = sizeof(struct pebs_basic);
95
96 if (!use_adaptive)
97 return sz;
98
99 if (pebs_data_cfg & PEBS_DATACFG_MEMINFO)
100 sz += sizeof(struct pebs_meminfo);
101 if (pebs_data_cfg & PEBS_DATACFG_GPRS)
102 sz += sizeof(struct pebs_gprs);
103 if (pebs_data_cfg & PEBS_DATACFG_XMMS)
104 sz += sizeof(struct pebs_xmm);
105 if (pebs_data_cfg & PEBS_DATACFG_LBRS)
106 sz += MAX_NUM_LBR_ENTRY * sizeof(struct lbr_entry);
107
108 return sz;
109 }
110
cnt_overflow(isr_regs_t * regs)111 static void cnt_overflow(isr_regs_t *regs)
112 {
113 apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
114 apic_write(APIC_EOI, 0);
115 }
116
workload(void)117 static inline void workload(void)
118 {
119 asm volatile(
120 "mov $0x0, %%eax\n"
121 "cmp $0x0, %%eax\n"
122 "jne label2\n"
123 "jne label2\n"
124 "jne label2\n"
125 "jne label2\n"
126 "mov $0x0, %%eax\n"
127 "cmp $0x0, %%eax\n"
128 "jne label2\n"
129 "jne label2\n"
130 "jne label2\n"
131 "jne label2\n"
132 "mov $0xa, %%eax\n"
133 "cpuid\n"
134 "mov $0xa, %%eax\n"
135 "cpuid\n"
136 "mov $0xa, %%eax\n"
137 "cpuid\n"
138 "mov $0xa, %%eax\n"
139 "cpuid\n"
140 "mov $0xa, %%eax\n"
141 "cpuid\n"
142 "mov $0xa, %%eax\n"
143 "cpuid\n"
144 "label2:\n"
145 :
146 :
147 : "eax", "ebx", "ecx", "edx");
148 }
149
workload2(void)150 static inline void workload2(void)
151 {
152 asm volatile(
153 "mov $0x0, %%eax\n"
154 "cmp $0x0, %%eax\n"
155 "jne label3\n"
156 "jne label3\n"
157 "jne label3\n"
158 "jne label3\n"
159 "mov $0x0, %%eax\n"
160 "cmp $0x0, %%eax\n"
161 "jne label3\n"
162 "jne label3\n"
163 "jne label3\n"
164 "jne label3\n"
165 "mov $0xa, %%eax\n"
166 "cpuid\n"
167 "mov $0xa, %%eax\n"
168 "cpuid\n"
169 "mov $0xa, %%eax\n"
170 "cpuid\n"
171 "mov $0xa, %%eax\n"
172 "cpuid\n"
173 "mov $0xa, %%eax\n"
174 "cpuid\n"
175 "mov $0xa, %%eax\n"
176 "cpuid\n"
177 "label3:\n"
178 :
179 :
180 : "eax", "ebx", "ecx", "edx");
181 }
182
alloc_buffers(void)183 static void alloc_buffers(void)
184 {
185 ds_bufer = alloc_page();
186 force_4k_page(ds_bufer);
187 memset(ds_bufer, 0x0, PAGE_SIZE);
188
189 pebs_buffer = alloc_page();
190 force_4k_page(pebs_buffer);
191 memset(pebs_buffer, 0x0, PAGE_SIZE);
192 }
193
free_buffers(void)194 static void free_buffers(void)
195 {
196 if (ds_bufer)
197 free_page(ds_bufer);
198
199 if (pebs_buffer)
200 free_page(pebs_buffer);
201 }
202
pebs_enable(u64 bitmask,u64 pebs_data_cfg,bool use_adaptive)203 static void pebs_enable(u64 bitmask, u64 pebs_data_cfg, bool use_adaptive)
204 {
205 static struct debug_store *ds;
206 u64 adaptive_ctrl = 0, fixed_ctr_ctrl = 0;
207 unsigned int idx;
208
209 if (has_baseline)
210 wrmsr(MSR_PEBS_DATA_CFG, pebs_data_cfg);
211
212 ds = (struct debug_store *)ds_bufer;
213 ds->pebs_index = ds->pebs_buffer_base = (unsigned long)pebs_buffer;
214 ds->pebs_absolute_maximum = (unsigned long)pebs_buffer + PAGE_SIZE;
215 ds->pebs_interrupt_threshold = ds->pebs_buffer_base +
216 get_pebs_record_size(pebs_data_cfg, use_adaptive);
217
218 for (idx = 0; idx < pmu.nr_fixed_counters; idx++) {
219 if (!(BIT_ULL(FIXED_CNT_INDEX + idx) & bitmask))
220 continue;
221 if (use_adaptive)
222 adaptive_ctrl = BIT(FIXED_CNT_INDEX + idx * 4);
223 wrmsr(MSR_PERF_FIXED_CTRx(idx), ctr_start_val);
224 fixed_ctr_ctrl |= (0xbULL << (idx * 4) | adaptive_ctrl);
225 }
226 if (fixed_ctr_ctrl)
227 wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, fixed_ctr_ctrl);
228
229 for (idx = 0; idx < max_nr_gp_events; idx++) {
230 if (!(BIT_ULL(idx) & bitmask))
231 continue;
232 if (use_adaptive)
233 adaptive_ctrl = ICL_EVENTSEL_ADAPTIVE;
234 wrmsr(MSR_GP_EVENT_SELECTx(idx), EVNTSEL_EN | EVNTSEL_OS | EVNTSEL_USR |
235 intel_arch_events[idx] | adaptive_ctrl);
236 wrmsr(MSR_GP_COUNTERx(idx), ctr_start_val);
237 }
238
239 wrmsr(MSR_IA32_DS_AREA, (unsigned long)ds_bufer);
240 wrmsr(MSR_IA32_PEBS_ENABLE, bitmask);
241 wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, bitmask);
242 }
243
reset_pebs(void)244 static void reset_pebs(void)
245 {
246 memset(ds_bufer, 0x0, PAGE_SIZE);
247 memset(pebs_buffer, 0x0, PAGE_SIZE);
248 wrmsr(MSR_IA32_PEBS_ENABLE, 0);
249 wrmsr(MSR_IA32_DS_AREA, 0);
250 if (has_baseline)
251 wrmsr(MSR_PEBS_DATA_CFG, 0);
252
253 wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
254 wrmsr(MSR_CORE_PERF_GLOBAL_OVF_CTRL, rdmsr(MSR_CORE_PERF_GLOBAL_STATUS));
255
256 pmu_reset_all_counters();
257 }
258
pebs_disable(unsigned int idx)259 static void pebs_disable(unsigned int idx)
260 {
261 /*
262 * If we only clear the PEBS_ENABLE bit, the counter will continue to increment.
263 * In this very tiny time window, if the counter overflows no pebs record will be generated,
264 * but a normal counter irq. Test this fully with two ways.
265 */
266 if (idx % 2)
267 wrmsr(MSR_IA32_PEBS_ENABLE, 0);
268
269 wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
270 }
271
check_pebs_records(u64 bitmask,u64 pebs_data_cfg,bool use_adaptive)272 static void check_pebs_records(u64 bitmask, u64 pebs_data_cfg, bool use_adaptive)
273 {
274 struct pebs_basic *pebs_rec = (struct pebs_basic *)pebs_buffer;
275 struct debug_store *ds = (struct debug_store *)ds_bufer;
276 unsigned int pebs_record_size;
277 unsigned int count = 0;
278 bool expected, pebs_idx_match, pebs_size_match, data_cfg_match;
279 void *cur_record;
280
281 expected = (ds->pebs_index == ds->pebs_buffer_base) && !pebs_rec->format_size;
282 if (!(rdmsr(MSR_CORE_PERF_GLOBAL_STATUS) & GLOBAL_STATUS_BUFFER_OVF)) {
283 report(expected, "No OVF irq, none PEBS records.");
284 return;
285 }
286
287 if (expected) {
288 report(!expected, "A OVF irq, but none PEBS records.");
289 return;
290 }
291
292 expected = ds->pebs_index >= ds->pebs_interrupt_threshold;
293 cur_record = (void *)pebs_buffer;
294 do {
295 pebs_rec = (struct pebs_basic *)cur_record;
296 pebs_record_size = pebs_rec->format_size >> RECORD_SIZE_OFFSET;
297 pebs_idx_match = pebs_rec->applicable_counters & bitmask;
298 pebs_size_match = pebs_record_size == get_pebs_record_size(pebs_data_cfg, use_adaptive);
299 data_cfg_match = (pebs_rec->format_size & GENMASK_ULL(47, 0)) == pebs_data_cfg;
300 data_cfg_match = (pebs_rec->format_size & GENMASK_ULL(47, 0)) ==
301 (use_adaptive ? pebs_data_cfg : 0);
302 expected = pebs_idx_match && pebs_size_match && data_cfg_match;
303 report(expected,
304 "PEBS record (written seq %d) is verified (including size, counters and cfg).", count);
305 if (use_adaptive && (pebs_data_cfg & PEBS_DATACFG_LBRS)) {
306 unsigned int lbrs_offset = get_pebs_record_size(pebs_data_cfg & ~PEBS_DATACFG_LBRS, true);
307 struct lbr_entry *pebs_lbrs = cur_record + lbrs_offset;
308 int i;
309
310 for (i = 0; i < MAX_NUM_LBR_ENTRY; i++) {
311 if (!pebs_lbrs[i].from && !pebs_lbrs[i].to)
312 continue;
313
314 report_fail("PEBS LBR record %u isn't empty, got from = '%lx', to = '%lx', info = '%lx'",
315 i, pebs_lbrs[i].from, pebs_lbrs[i].to, pebs_lbrs[i].info);
316 }
317 }
318 cur_record = cur_record + pebs_record_size;
319 count++;
320 } while (expected && (void *)cur_record < (void *)ds->pebs_index);
321
322 if (!expected) {
323 if (!pebs_idx_match)
324 printf("FAIL: The applicable_counters (0x%lx) doesn't match with pmc_bitmask (0x%lx).\n",
325 pebs_rec->applicable_counters, bitmask);
326 if (!pebs_size_match)
327 printf("FAIL: The pebs_record_size (%d) doesn't match with expected record size (%d).\n",
328 pebs_record_size, get_pebs_record_size(pebs_data_cfg, use_adaptive));
329 if (!data_cfg_match)
330 printf("FAIL: The pebs_data_cfg (0x%lx) doesn't match with the effective MSR_PEBS_DATA_CFG (0x%lx).\n",
331 pebs_rec->format_size & 0xffffffffffff, use_adaptive ? pebs_data_cfg : 0);
332 }
333 }
334
check_one_counter(enum pmc_type type,unsigned int idx,u64 pebs_data_cfg,bool use_adaptive)335 static void check_one_counter(enum pmc_type type, unsigned int idx,
336 u64 pebs_data_cfg, bool use_adaptive)
337 {
338 int pebs_bit = BIT_ULL(type == FIXED ? FIXED_CNT_INDEX + idx : idx);
339
340 report_prefix_pushf("%s counter %d (0x%lx)",
341 type == FIXED ? "Extended Fixed" : "GP", idx, ctr_start_val);
342 reset_pebs();
343 pebs_enable(pebs_bit, pebs_data_cfg, use_adaptive);
344 workload();
345 pebs_disable(idx);
346 check_pebs_records(pebs_bit, pebs_data_cfg, use_adaptive);
347 report_prefix_pop();
348 }
349
350 /* more than one PEBS records will be generated. */
check_multiple_counters(u64 bitmask,u64 pebs_data_cfg,bool use_adaptive)351 static void check_multiple_counters(u64 bitmask, u64 pebs_data_cfg,
352 bool use_adaptive)
353 {
354 reset_pebs();
355 pebs_enable(bitmask, pebs_data_cfg, use_adaptive);
356 workload2();
357 pebs_disable(0);
358 check_pebs_records(bitmask, pebs_data_cfg, use_adaptive);
359 }
360
check_pebs_counters(u64 pebs_data_cfg,bool use_adaptive)361 static void check_pebs_counters(u64 pebs_data_cfg, bool use_adaptive)
362 {
363 unsigned int idx;
364 u64 bitmask = 0;
365
366 for (idx = 0; has_baseline && idx < pmu.nr_fixed_counters; idx++)
367 check_one_counter(FIXED, idx, pebs_data_cfg, use_adaptive);
368
369 for (idx = 0; idx < max_nr_gp_events; idx++)
370 check_one_counter(GP, idx, pebs_data_cfg, use_adaptive);
371
372 for (idx = 0; has_baseline && idx < pmu.nr_fixed_counters; idx++)
373 bitmask |= BIT_ULL(FIXED_CNT_INDEX + idx);
374 for (idx = 0; idx < max_nr_gp_events; idx += 2)
375 bitmask |= BIT_ULL(idx);
376 report_prefix_pushf("Multiple (0x%lx)", bitmask);
377 check_multiple_counters(bitmask, pebs_data_cfg, use_adaptive);
378 report_prefix_pop();
379 }
380
381 /*
382 * Known reasons for none PEBS records:
383 * 1. The selected event does not support PEBS;
384 * 2. From a core pmu perspective, the vCPU and pCPU models are not same;
385 * 3. Guest counter has not yet overflowed or been cross-mapped by the host;
386 */
main(int ac,char ** av)387 int main(int ac, char **av)
388 {
389 unsigned int i, j;
390
391 setup_vm();
392
393 max_nr_gp_events = MIN(pmu.nr_gp_counters, ARRAY_SIZE(intel_arch_events));
394
395 printf("PMU version: %d\n", pmu.version);
396
397 has_baseline = pmu_has_pebs_baseline();
398 if (pmu_has_full_writes())
399 pmu_activate_full_writes();
400
401 if (!pmu.is_intel) {
402 report_skip("PEBS requires Intel ICX or later, non-Intel detected");
403 return report_summary();
404 } else if (!pmu_has_pebs()) {
405 report_skip("PEBS required PMU version 2, reported version is %d", pmu.version);
406 return report_summary();
407 } else if (pmu_pebs_format() < 4) {
408 report_skip("This test supports PEBS_Record_Format >= 4 only");
409 return report_summary();
410 } else if (rdmsr(MSR_IA32_MISC_ENABLE) & MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL) {
411 report_skip("PEBS unavailable according to MISC_ENABLE");
412 return report_summary();
413 }
414
415 printf("PEBS format: %d\n", pmu_pebs_format());
416 printf("PEBS GP counters: %d\n", pmu.nr_gp_counters);
417 printf("PEBS Fixed counters: %d\n", pmu.nr_fixed_counters);
418 printf("PEBS baseline (Adaptive PEBS): %d\n", has_baseline);
419
420 handle_irq(PMI_VECTOR, cnt_overflow);
421 alloc_buffers();
422
423 for (i = 0; i < ARRAY_SIZE(counter_start_values); i++) {
424 ctr_start_val = counter_start_values[i];
425 check_pebs_counters(0, false);
426 if (!has_baseline)
427 continue;
428
429 for (j = 0; j <= PEBS_DATACFG_MASK; j++) {
430 u64 pebs_data_cfg = j;
431
432 if (pebs_data_cfg & PEBS_DATACFG_LBRS)
433 pebs_data_cfg |= ((MAX_NUM_LBR_ENTRY -1) << PEBS_DATACFG_LBR_SHIFT);
434
435 report_prefix_pushf("Adaptive (0x%lx)", pebs_data_cfg);
436 check_pebs_counters(pebs_data_cfg, true);
437 report_prefix_pop();
438
439 report_prefix_pushf("Ignored Adaptive (0x%lx)", pebs_data_cfg);
440 check_pebs_counters(pebs_data_cfg, false);
441 report_prefix_pop();
442 }
443 }
444
445 free_buffers();
446
447 return report_summary();
448 }
449