1 #include "x86/msr.h" 2 #include "x86/processor.h" 3 #include "x86/pmu.h" 4 #include "x86/isr.h" 5 #include "x86/apic.h" 6 #include "x86/apic-defs.h" 7 #include "x86/desc.h" 8 #include "alloc.h" 9 10 #include "vm.h" 11 #include "processor.h" 12 #include "vmalloc.h" 13 #include "alloc_page.h" 14 15 /* bits [63:48] provides the size of the current record in bytes */ 16 #define RECORD_SIZE_OFFSET 48 17 18 static unsigned int max_nr_gp_events; 19 static unsigned long *ds_bufer; 20 static unsigned long *pebs_buffer; 21 static u64 ctr_start_val; 22 static bool has_baseline; 23 24 struct debug_store { 25 u64 bts_buffer_base; 26 u64 bts_index; 27 u64 bts_absolute_maximum; 28 u64 bts_interrupt_threshold; 29 u64 pebs_buffer_base; 30 u64 pebs_index; 31 u64 pebs_absolute_maximum; 32 u64 pebs_interrupt_threshold; 33 u64 pebs_event_reset[64]; 34 }; 35 36 struct pebs_basic { 37 u64 format_size; 38 u64 ip; 39 u64 applicable_counters; 40 u64 tsc; 41 }; 42 43 struct pebs_meminfo { 44 u64 address; 45 u64 aux; 46 u64 latency; 47 u64 tsx_tuning; 48 }; 49 50 struct pebs_gprs { 51 u64 flags, ip, ax, cx, dx, bx, sp, bp, si, di; 52 u64 r8, r9, r10, r11, r12, r13, r14, r15; 53 }; 54 55 struct pebs_xmm { 56 u64 xmm[16*2]; /* two entries for each register */ 57 }; 58 59 struct lbr_entry { 60 u64 from; 61 u64 to; 62 u64 info; 63 }; 64 65 enum pmc_type { 66 GP = 0, 67 FIXED, 68 }; 69 70 static uint32_t intel_arch_events[] = { 71 0x00c4, /* PERF_COUNT_HW_BRANCH_INSTRUCTIONS */ 72 0x00c5, /* PERF_COUNT_HW_BRANCH_MISSES */ 73 0x0300, /* PERF_COUNT_HW_REF_CPU_CYCLES */ 74 0x003c, /* PERF_COUNT_HW_CPU_CYCLES */ 75 0x00c0, /* PERF_COUNT_HW_INSTRUCTIONS */ 76 0x013c, /* PERF_COUNT_HW_BUS_CYCLES */ 77 0x4f2e, /* PERF_COUNT_HW_CACHE_REFERENCES */ 78 0x412e, /* PERF_COUNT_HW_CACHE_MISSES */ 79 }; 80 81 /* Iterating each counter value is a waste of time, pick a few typical values. */ 82 static u64 counter_start_values[] = { 83 /* if PEBS counter doesn't overflow at all */ 84 0, 85 0xfffffffffff0, 86 /* normal counter overflow to have PEBS records */ 87 0xfffffffffffe, 88 /* test whether emulated instructions should trigger PEBS */ 89 0xffffffffffff, 90 }; 91 92 static unsigned int get_adaptive_pebs_record_size(u64 pebs_data_cfg) 93 { 94 unsigned int sz = sizeof(struct pebs_basic); 95 96 if (!has_baseline) 97 return sz; 98 99 if (pebs_data_cfg & PEBS_DATACFG_MEMINFO) 100 sz += sizeof(struct pebs_meminfo); 101 if (pebs_data_cfg & PEBS_DATACFG_GPRS) 102 sz += sizeof(struct pebs_gprs); 103 if (pebs_data_cfg & PEBS_DATACFG_XMMS) 104 sz += sizeof(struct pebs_xmm); 105 if (pebs_data_cfg & PEBS_DATACFG_LBRS) 106 sz += MAX_NUM_LBR_ENTRY * sizeof(struct lbr_entry); 107 108 return sz; 109 } 110 111 static void cnt_overflow(isr_regs_t *regs) 112 { 113 apic_write(APIC_EOI, 0); 114 } 115 116 static inline void workload(void) 117 { 118 asm volatile( 119 "mov $0x0, %%eax\n" 120 "cmp $0x0, %%eax\n" 121 "jne label2\n" 122 "jne label2\n" 123 "jne label2\n" 124 "jne label2\n" 125 "mov $0x0, %%eax\n" 126 "cmp $0x0, %%eax\n" 127 "jne label2\n" 128 "jne label2\n" 129 "jne label2\n" 130 "jne label2\n" 131 "mov $0xa, %%eax\n" 132 "cpuid\n" 133 "mov $0xa, %%eax\n" 134 "cpuid\n" 135 "mov $0xa, %%eax\n" 136 "cpuid\n" 137 "mov $0xa, %%eax\n" 138 "cpuid\n" 139 "mov $0xa, %%eax\n" 140 "cpuid\n" 141 "mov $0xa, %%eax\n" 142 "cpuid\n" 143 "label2:\n" 144 : 145 : 146 : "eax", "ebx", "ecx", "edx"); 147 } 148 149 static inline void workload2(void) 150 { 151 asm volatile( 152 "mov $0x0, %%eax\n" 153 "cmp $0x0, %%eax\n" 154 "jne label3\n" 155 "jne label3\n" 156 "jne label3\n" 157 "jne label3\n" 158 "mov $0x0, %%eax\n" 159 "cmp $0x0, %%eax\n" 160 "jne label3\n" 161 "jne label3\n" 162 "jne label3\n" 163 "jne label3\n" 164 "mov $0xa, %%eax\n" 165 "cpuid\n" 166 "mov $0xa, %%eax\n" 167 "cpuid\n" 168 "mov $0xa, %%eax\n" 169 "cpuid\n" 170 "mov $0xa, %%eax\n" 171 "cpuid\n" 172 "mov $0xa, %%eax\n" 173 "cpuid\n" 174 "mov $0xa, %%eax\n" 175 "cpuid\n" 176 "label3:\n" 177 : 178 : 179 : "eax", "ebx", "ecx", "edx"); 180 } 181 182 static void alloc_buffers(void) 183 { 184 ds_bufer = alloc_page(); 185 force_4k_page(ds_bufer); 186 memset(ds_bufer, 0x0, PAGE_SIZE); 187 188 pebs_buffer = alloc_page(); 189 force_4k_page(pebs_buffer); 190 memset(pebs_buffer, 0x0, PAGE_SIZE); 191 } 192 193 static void free_buffers(void) 194 { 195 if (ds_bufer) 196 free_page(ds_bufer); 197 198 if (pebs_buffer) 199 free_page(pebs_buffer); 200 } 201 202 static void pebs_enable(u64 bitmask, u64 pebs_data_cfg) 203 { 204 static struct debug_store *ds; 205 u64 baseline_extra_ctrl = 0, fixed_ctr_ctrl = 0; 206 unsigned int idx; 207 208 if (has_baseline) 209 wrmsr(MSR_PEBS_DATA_CFG, pebs_data_cfg); 210 211 ds = (struct debug_store *)ds_bufer; 212 ds->pebs_index = ds->pebs_buffer_base = (unsigned long)pebs_buffer; 213 ds->pebs_absolute_maximum = (unsigned long)pebs_buffer + PAGE_SIZE; 214 ds->pebs_interrupt_threshold = ds->pebs_buffer_base + 215 get_adaptive_pebs_record_size(pebs_data_cfg); 216 217 for (idx = 0; idx < pmu.nr_fixed_counters; idx++) { 218 if (!(BIT_ULL(FIXED_CNT_INDEX + idx) & bitmask)) 219 continue; 220 if (has_baseline) 221 baseline_extra_ctrl = BIT(FIXED_CNT_INDEX + idx * 4); 222 wrmsr(MSR_PERF_FIXED_CTRx(idx), ctr_start_val); 223 fixed_ctr_ctrl |= (0xbULL << (idx * 4) | baseline_extra_ctrl); 224 } 225 if (fixed_ctr_ctrl) 226 wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, fixed_ctr_ctrl); 227 228 for (idx = 0; idx < max_nr_gp_events; idx++) { 229 if (!(BIT_ULL(idx) & bitmask)) 230 continue; 231 if (has_baseline) 232 baseline_extra_ctrl = ICL_EVENTSEL_ADAPTIVE; 233 wrmsr(MSR_GP_EVENT_SELECTx(idx), EVNTSEL_EN | EVNTSEL_OS | EVNTSEL_USR | 234 intel_arch_events[idx] | baseline_extra_ctrl); 235 wrmsr(MSR_GP_COUNTERx(idx), ctr_start_val); 236 } 237 238 wrmsr(MSR_IA32_DS_AREA, (unsigned long)ds_bufer); 239 wrmsr(MSR_IA32_PEBS_ENABLE, bitmask); 240 wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, bitmask); 241 } 242 243 static void reset_pebs(void) 244 { 245 memset(ds_bufer, 0x0, PAGE_SIZE); 246 memset(pebs_buffer, 0x0, PAGE_SIZE); 247 wrmsr(MSR_IA32_PEBS_ENABLE, 0); 248 wrmsr(MSR_IA32_DS_AREA, 0); 249 if (has_baseline) 250 wrmsr(MSR_PEBS_DATA_CFG, 0); 251 252 wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0); 253 wrmsr(MSR_CORE_PERF_GLOBAL_OVF_CTRL, rdmsr(MSR_CORE_PERF_GLOBAL_STATUS)); 254 255 pmu_reset_all_counters(); 256 } 257 258 static void pebs_disable(unsigned int idx) 259 { 260 /* 261 * If we only clear the PEBS_ENABLE bit, the counter will continue to increment. 262 * In this very tiny time window, if the counter overflows no pebs record will be generated, 263 * but a normal counter irq. Test this fully with two ways. 264 */ 265 if (idx % 2) 266 wrmsr(MSR_IA32_PEBS_ENABLE, 0); 267 268 wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0); 269 } 270 271 static void check_pebs_records(u64 bitmask, u64 pebs_data_cfg) 272 { 273 struct pebs_basic *pebs_rec = (struct pebs_basic *)pebs_buffer; 274 struct debug_store *ds = (struct debug_store *)ds_bufer; 275 unsigned int pebs_record_size = get_adaptive_pebs_record_size(pebs_data_cfg); 276 unsigned int count = 0; 277 bool expected, pebs_idx_match, pebs_size_match, data_cfg_match; 278 void *cur_record; 279 280 expected = (ds->pebs_index == ds->pebs_buffer_base) && !pebs_rec->format_size; 281 if (!(rdmsr(MSR_CORE_PERF_GLOBAL_STATUS) & GLOBAL_STATUS_BUFFER_OVF)) { 282 report(expected, "No OVF irq, none PEBS records."); 283 return; 284 } 285 286 if (expected) { 287 report(!expected, "A OVF irq, but none PEBS records."); 288 return; 289 } 290 291 expected = ds->pebs_index >= ds->pebs_interrupt_threshold; 292 cur_record = (void *)pebs_buffer; 293 do { 294 pebs_rec = (struct pebs_basic *)cur_record; 295 pebs_record_size = pebs_rec->format_size >> RECORD_SIZE_OFFSET; 296 pebs_idx_match = 297 pebs_rec->applicable_counters & bitmask; 298 pebs_size_match = 299 pebs_record_size == get_adaptive_pebs_record_size(pebs_data_cfg); 300 data_cfg_match = 301 (pebs_rec->format_size & GENMASK_ULL(47, 0)) == pebs_data_cfg; 302 expected = pebs_idx_match && pebs_size_match && data_cfg_match; 303 report(expected, 304 "PEBS record (written seq %d) is verified (including size, counters and cfg).", count); 305 cur_record = cur_record + pebs_record_size; 306 count++; 307 } while (expected && (void *)cur_record < (void *)ds->pebs_index); 308 309 if (!expected) { 310 if (!pebs_idx_match) 311 printf("FAIL: The applicable_counters (0x%lx) doesn't match with pmc_bitmask (0x%lx).\n", 312 pebs_rec->applicable_counters, bitmask); 313 if (!pebs_size_match) 314 printf("FAIL: The pebs_record_size (%d) doesn't match with MSR_PEBS_DATA_CFG (%d).\n", 315 pebs_record_size, get_adaptive_pebs_record_size(pebs_data_cfg)); 316 if (!data_cfg_match) 317 printf("FAIL: The pebs_data_cfg (0x%lx) doesn't match with MSR_PEBS_DATA_CFG (0x%lx).\n", 318 pebs_rec->format_size & 0xffffffffffff, pebs_data_cfg); 319 } 320 } 321 322 static void check_one_counter(enum pmc_type type, 323 unsigned int idx, u64 pebs_data_cfg) 324 { 325 int pebs_bit = BIT_ULL(type == FIXED ? FIXED_CNT_INDEX + idx : idx); 326 327 report_prefix_pushf("%s counter %d (0x%lx)", 328 type == FIXED ? "Extended Fixed" : "GP", idx, ctr_start_val); 329 reset_pebs(); 330 pebs_enable(pebs_bit, pebs_data_cfg); 331 workload(); 332 pebs_disable(idx); 333 check_pebs_records(pebs_bit, pebs_data_cfg); 334 report_prefix_pop(); 335 } 336 337 /* more than one PEBS records will be generated. */ 338 static void check_multiple_counters(u64 bitmask, u64 pebs_data_cfg) 339 { 340 reset_pebs(); 341 pebs_enable(bitmask, pebs_data_cfg); 342 workload2(); 343 pebs_disable(0); 344 check_pebs_records(bitmask, pebs_data_cfg); 345 } 346 347 static void check_pebs_counters(u64 pebs_data_cfg) 348 { 349 unsigned int idx; 350 u64 bitmask = 0; 351 352 for (idx = 0; has_baseline && idx < pmu.nr_fixed_counters; idx++) 353 check_one_counter(FIXED, idx, pebs_data_cfg); 354 355 for (idx = 0; idx < max_nr_gp_events; idx++) 356 check_one_counter(GP, idx, pebs_data_cfg); 357 358 for (idx = 0; has_baseline && idx < pmu.nr_fixed_counters; idx++) 359 bitmask |= BIT_ULL(FIXED_CNT_INDEX + idx); 360 for (idx = 0; idx < max_nr_gp_events; idx += 2) 361 bitmask |= BIT_ULL(idx); 362 report_prefix_pushf("Multiple (0x%lx)", bitmask); 363 check_multiple_counters(bitmask, pebs_data_cfg); 364 report_prefix_pop(); 365 } 366 367 /* 368 * Known reasons for none PEBS records: 369 * 1. The selected event does not support PEBS; 370 * 2. From a core pmu perspective, the vCPU and pCPU models are not same; 371 * 3. Guest counter has not yet overflowed or been cross-mapped by the host; 372 */ 373 int main(int ac, char **av) 374 { 375 unsigned int i, j; 376 377 setup_vm(); 378 379 max_nr_gp_events = MIN(pmu.nr_gp_counters, ARRAY_SIZE(intel_arch_events)); 380 381 printf("PMU version: %d\n", pmu.version); 382 383 has_baseline = pmu_has_pebs_baseline(); 384 if (pmu_has_full_writes()) 385 pmu_activate_full_writes(); 386 387 if (!pmu.is_intel) { 388 report_skip("PEBS requires Intel ICX or later, non-Intel detected"); 389 return report_summary(); 390 } else if (!pmu_has_pebs()) { 391 report_skip("PEBS required PMU version 2, reported version is %d", pmu.version); 392 return report_summary(); 393 } else if (!pmu_pebs_format()) { 394 report_skip("PEBS not enumerated in PERF_CAPABILITIES"); 395 return report_summary(); 396 } else if (rdmsr(MSR_IA32_MISC_ENABLE) & MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL) { 397 report_skip("PEBS unavailable according to MISC_ENABLE"); 398 return report_summary(); 399 } 400 401 printf("PEBS format: %d\n", pmu_pebs_format()); 402 printf("PEBS GP counters: %d\n", pmu.nr_gp_counters); 403 printf("PEBS Fixed counters: %d\n", pmu.nr_fixed_counters); 404 printf("PEBS baseline (Adaptive PEBS): %d\n", has_baseline); 405 406 handle_irq(PMI_VECTOR, cnt_overflow); 407 alloc_buffers(); 408 409 for (i = 0; i < ARRAY_SIZE(counter_start_values); i++) { 410 ctr_start_val = counter_start_values[i]; 411 check_pebs_counters(0); 412 if (!has_baseline) 413 continue; 414 415 for (j = 0; j <= PEBS_DATACFG_MASK; j++) { 416 u64 pebs_data_cfg = j; 417 418 if (pebs_data_cfg & PEBS_DATACFG_LBRS) 419 pebs_data_cfg |= ((MAX_NUM_LBR_ENTRY -1) << PEBS_DATACFG_LBR_SHIFT); 420 421 report_prefix_pushf("Adaptive (0x%lx)", pebs_data_cfg); 422 check_pebs_counters(pebs_data_cfg); 423 report_prefix_pop(); 424 } 425 } 426 427 free_buffers(); 428 429 return report_summary(); 430 } 431