1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * AMD specific. Provide textual annotation for IBS raw sample data. 4 */ 5 6 #include <unistd.h> 7 #include <stdio.h> 8 #include <string.h> 9 #include <inttypes.h> 10 11 #include <linux/string.h> 12 #include "../../arch/x86/include/asm/amd/ibs.h" 13 14 #include "debug.h" 15 #include "session.h" 16 #include "evlist.h" 17 #include "sample-raw.h" 18 #include "util/sample.h" 19 20 static u32 cpu_family, cpu_model, ibs_fetch_type, ibs_op_type; 21 static bool zen4_ibs_extensions; 22 static bool ldlat_cap; 23 static bool dtlb_pgsize_cap; 24 25 static void pr_ibs_fetch_ctl(union ibs_fetch_ctl reg) 26 { 27 const char * const ic_miss_strs[] = { 28 " IcMiss 0", 29 " IcMiss 1", 30 }; 31 const char * const l1tlb_pgsz_strs[] = { 32 " L1TlbPgSz 4KB", 33 " L1TlbPgSz 2MB", 34 " L1TlbPgSz 1GB", 35 " L1TlbPgSz RESERVED" 36 }; 37 const char * const l1tlb_pgsz_strs_erratum1347[] = { 38 " L1TlbPgSz 4KB", 39 " L1TlbPgSz 16KB", 40 " L1TlbPgSz 2MB", 41 " L1TlbPgSz 1GB" 42 }; 43 const char *ic_miss_str = NULL; 44 const char *l1tlb_pgsz_str = NULL; 45 char l3_miss_str[sizeof(" L3MissOnly _ FetchOcMiss _ FetchL3Miss _")] = ""; 46 47 if (cpu_family == 0x19 && cpu_model < 0x10) { 48 /* 49 * Erratum #1238 workaround is to ignore MSRC001_1030[IbsIcMiss] 50 * Erratum #1347 workaround is to use table provided in erratum 51 */ 52 if (reg.phy_addr_valid) 53 l1tlb_pgsz_str = l1tlb_pgsz_strs_erratum1347[reg.l1tlb_pgsz]; 54 } else { 55 if (reg.phy_addr_valid) 56 l1tlb_pgsz_str = l1tlb_pgsz_strs[reg.l1tlb_pgsz]; 57 ic_miss_str = ic_miss_strs[reg.ic_miss]; 58 } 59 60 if (zen4_ibs_extensions) { 61 snprintf(l3_miss_str, sizeof(l3_miss_str), 62 " L3MissOnly %d FetchOcMiss %d FetchL3Miss %d", 63 reg.l3_miss_only, reg.fetch_oc_miss, reg.fetch_l3_miss); 64 } 65 66 printf("ibs_fetch_ctl:\t%016llx MaxCnt %7d Cnt %7d Lat %5d En %d Val %d Comp %d%s " 67 "PhyAddrValid %d%s L1TlbMiss %d L2TlbMiss %d RandEn %d%s%s\n", 68 reg.val, reg.fetch_maxcnt << 4, reg.fetch_cnt << 4, reg.fetch_lat, 69 reg.fetch_en, reg.fetch_val, reg.fetch_comp, ic_miss_str ? : "", 70 reg.phy_addr_valid, l1tlb_pgsz_str ? : "", reg.l1tlb_miss, reg.l2tlb_miss, 71 reg.rand_en, reg.fetch_comp ? (reg.fetch_l2_miss ? " L2Miss 1" : " L2Miss 0") : "", 72 l3_miss_str); 73 } 74 75 static void pr_ic_ibs_extd_ctl(union ic_ibs_extd_ctl reg) 76 { 77 printf("ic_ibs_ext_ctl:\t%016llx IbsItlbRefillLat %3d\n", reg.val, reg.itlb_refill_lat); 78 } 79 80 static void pr_ibs_op_ctl(union ibs_op_ctl reg) 81 { 82 char l3_miss_only[sizeof(" L3MissOnly _")] = ""; 83 char ldlat[sizeof(" LdLatThrsh __ LdLatEn _")] = ""; 84 85 if (zen4_ibs_extensions) 86 snprintf(l3_miss_only, sizeof(l3_miss_only), " L3MissOnly %d", reg.l3_miss_only); 87 88 if (ldlat_cap) { 89 snprintf(ldlat, sizeof(ldlat), " LdLatThrsh %2d LdLatEn %d", 90 reg.ldlat_thrsh, reg.ldlat_en); 91 } 92 93 printf("ibs_op_ctl:\t%016llx MaxCnt %9d%s En %d Val %d CntCtl %d=%s CurCnt %9d%s\n", 94 reg.val, ((reg.opmaxcnt_ext << 16) | reg.opmaxcnt) << 4, l3_miss_only, 95 reg.op_en, reg.op_val, reg.cnt_ctl, 96 reg.cnt_ctl ? "uOps" : "cycles", reg.opcurcnt, ldlat); 97 } 98 99 static void pr_ibs_op_data(union ibs_op_data reg) 100 { 101 printf("ibs_op_data:\t%016llx CompToRetCtr %5d TagToRetCtr %5d%s%s%s BrnRet %d " 102 " RipInvalid %d BrnFuse %d Microcode %d\n", 103 reg.val, reg.comp_to_ret_ctr, reg.tag_to_ret_ctr, 104 reg.op_brn_ret ? (reg.op_return ? " OpReturn 1" : " OpReturn 0") : "", 105 reg.op_brn_ret ? (reg.op_brn_taken ? " OpBrnTaken 1" : " OpBrnTaken 0") : "", 106 reg.op_brn_ret ? (reg.op_brn_misp ? " OpBrnMisp 1" : " OpBrnMisp 0") : "", 107 reg.op_brn_ret, reg.op_rip_invalid, reg.op_brn_fuse, reg.op_microcode); 108 } 109 110 static void pr_ibs_op_data2_extended(union ibs_op_data2 reg) 111 { 112 static const char * const data_src_str[] = { 113 "", 114 " DataSrc 1=Local L3 or other L1/L2 in CCX", 115 " DataSrc 2=Another CCX cache in the same NUMA node", 116 " DataSrc 3=DRAM", 117 " DataSrc 4=(reserved)", 118 " DataSrc 5=Another CCX cache in a different NUMA node", 119 " DataSrc 6=Long-latency DIMM", 120 " DataSrc 7=MMIO/Config/PCI/APIC", 121 " DataSrc 8=Extension Memory", 122 " DataSrc 9=(reserved)", 123 " DataSrc 10=(reserved)", 124 " DataSrc 11=(reserved)", 125 " DataSrc 12=Coherent Memory of a different processor type", 126 /* 13 to 31 are reserved. Avoid printing them. */ 127 }; 128 int data_src = (reg.data_src_hi << 3) | reg.data_src_lo; 129 130 printf("ibs_op_data2:\t%016llx %sRmtNode %d%s\n", reg.val, 131 (data_src == 1 || data_src == 2 || data_src == 5) ? 132 (reg.cache_hit_st ? "CacheHitSt 1=O-State " : "CacheHitSt 0=M-state ") : "", 133 reg.rmt_node, 134 data_src < (int)ARRAY_SIZE(data_src_str) ? data_src_str[data_src] : ""); 135 } 136 137 static void pr_ibs_op_data2_default(union ibs_op_data2 reg) 138 { 139 static const char * const data_src_str[] = { 140 "", 141 " DataSrc 1=(reserved)", 142 " DataSrc 2=Local node cache", 143 " DataSrc 3=DRAM", 144 " DataSrc 4=Remote node cache", 145 " DataSrc 5=(reserved)", 146 " DataSrc 6=(reserved)", 147 " DataSrc 7=Other" 148 }; 149 150 printf("ibs_op_data2:\t%016llx %sRmtNode %d%s\n", reg.val, 151 reg.data_src_lo == 2 ? (reg.cache_hit_st ? "CacheHitSt 1=O-State " 152 : "CacheHitSt 0=M-state ") : "", 153 reg.rmt_node, data_src_str[reg.data_src_lo]); 154 } 155 156 static void pr_ibs_op_data2(union ibs_op_data2 reg) 157 { 158 if (zen4_ibs_extensions) 159 return pr_ibs_op_data2_extended(reg); 160 pr_ibs_op_data2_default(reg); 161 } 162 163 static void pr_ibs_op_data3(union ibs_op_data3 reg) 164 { 165 static const char * const dc_page_sizes[] = { 166 " 4K", 167 " 2M", 168 " 1G", 169 " ??", 170 }; 171 char op_dc_miss_open_mem_reqs_str[sizeof(" OpDcMissOpenMemReqs __")] = ""; 172 char dc_l1_l2tlb_miss_str[sizeof(" DcL1TlbMiss _ DcL2TlbMiss _")] = ""; 173 char dc_l1tlb_hit_str[sizeof(" DcL1TlbHit2M _ DcL1TlbHit1G _")] = ""; 174 char op_mem_width_str[sizeof(" OpMemWidth _____ bytes")] = ""; 175 char dc_l2tlb_hit_2m_str[sizeof(" DcL2TlbHit2M _")] = ""; 176 char dc_l2tlb_hit_1g_str[sizeof(" DcL2TlbHit1G _")] = ""; 177 char dc_page_size_str[sizeof(" DcPageSize ____")] = ""; 178 char l2_miss_str[sizeof(" L2Miss _")] = ""; 179 180 /* 181 * Erratum #1293 182 * Ignore L2Miss and OpDcMissOpenMemReqs (and opdata2) if DcMissNoMabAlloc or SwPf set 183 */ 184 if (!(cpu_family == 0x19 && cpu_model < 0x10 && (reg.dc_miss_no_mab_alloc || reg.sw_pf))) { 185 snprintf(l2_miss_str, sizeof(l2_miss_str), " L2Miss %d", reg.l2_miss); 186 snprintf(op_dc_miss_open_mem_reqs_str, sizeof(op_dc_miss_open_mem_reqs_str), 187 " OpDcMissOpenMemReqs %2d", reg.op_dc_miss_open_mem_reqs); 188 } 189 190 if (reg.op_mem_width) 191 snprintf(op_mem_width_str, sizeof(op_mem_width_str), 192 " OpMemWidth %2d bytes", 1 << (reg.op_mem_width - 1)); 193 194 if (dtlb_pgsize_cap) { 195 if (reg.dc_phy_addr_valid) { 196 int idx = (reg.dc_l1tlb_hit_1g << 1) | reg.dc_l1tlb_hit_2m; 197 198 snprintf(dc_l1_l2tlb_miss_str, sizeof(dc_l1_l2tlb_miss_str), 199 " DcL1TlbMiss %d DcL2TlbMiss %d", 200 reg.dc_l1tlb_miss, reg.dc_l2tlb_miss); 201 snprintf(dc_page_size_str, sizeof(dc_page_size_str), 202 " DcPageSize %4s", dc_page_sizes[idx]); 203 } 204 } else { 205 snprintf(dc_l1_l2tlb_miss_str, sizeof(dc_l1_l2tlb_miss_str), 206 " DcL1TlbMiss %d DcL2TlbMiss %d", 207 reg.dc_l1tlb_miss, reg.dc_l2tlb_miss); 208 snprintf(dc_l1tlb_hit_str, sizeof(dc_l1tlb_hit_str), 209 " DcL1TlbHit2M %d DcL1TlbHit1G %d", 210 reg.dc_l1tlb_hit_2m, reg.dc_l1tlb_hit_1g); 211 snprintf(dc_l2tlb_hit_2m_str, sizeof(dc_l2tlb_hit_2m_str), 212 " DcL2TlbHit2M %d", reg.dc_l2tlb_hit_2m); 213 snprintf(dc_l2tlb_hit_1g_str, sizeof(dc_l2tlb_hit_1g_str), 214 " DcL2TlbHit1G %d", reg.dc_l2_tlb_hit_1g); 215 } 216 217 printf("ibs_op_data3:\t%016llx LdOp %d StOp %d%s%s%s DcMiss %d DcMisAcc %d " 218 "DcWcMemAcc %d DcUcMemAcc %d DcLockedOp %d DcMissNoMabAlloc %d " 219 "DcLinAddrValid %d DcPhyAddrValid %d%s%s SwPf %d%s%s " 220 "DcMissLat %5d TlbRefillLat %5d\n", 221 reg.val, reg.ld_op, reg.st_op, dc_l1_l2tlb_miss_str, 222 dtlb_pgsize_cap ? dc_page_size_str : dc_l1tlb_hit_str, 223 dc_l2tlb_hit_2m_str, reg.dc_miss, reg.dc_mis_acc, reg.dc_wc_mem_acc, 224 reg.dc_uc_mem_acc, reg.dc_locked_op, reg.dc_miss_no_mab_alloc, 225 reg.dc_lin_addr_valid, reg.dc_phy_addr_valid, dc_l2tlb_hit_1g_str, 226 l2_miss_str, reg.sw_pf, op_mem_width_str, op_dc_miss_open_mem_reqs_str, 227 reg.dc_miss_lat, reg.tlb_refill_lat); 228 } 229 230 /* 231 * IBS Op/Execution MSRs always saved, in order, are: 232 * IBS_OP_CTL, IBS_OP_RIP, IBS_OP_DATA, IBS_OP_DATA2, 233 * IBS_OP_DATA3, IBS_DC_LINADDR, IBS_DC_PHYSADDR, BP_IBSTGT_RIP 234 */ 235 static void amd_dump_ibs_op(struct perf_sample *sample) 236 { 237 struct perf_ibs_data *data = sample->raw_data; 238 union ibs_op_ctl *op_ctl = (union ibs_op_ctl *)data->data; 239 __u64 *rip = (__u64 *)op_ctl + 1; 240 union ibs_op_data *op_data = (union ibs_op_data *)(rip + 1); 241 union ibs_op_data3 *op_data3 = (union ibs_op_data3 *)(rip + 3); 242 243 pr_ibs_op_ctl(*op_ctl); 244 if (!op_data->op_rip_invalid) 245 printf("IbsOpRip:\t%016llx\n", *rip); 246 pr_ibs_op_data(*op_data); 247 /* 248 * Erratum #1293: ignore op_data2 if DcMissNoMabAlloc or SwPf are set 249 */ 250 if (!(cpu_family == 0x19 && cpu_model < 0x10 && 251 (op_data3->dc_miss_no_mab_alloc || op_data3->sw_pf))) 252 pr_ibs_op_data2(*(union ibs_op_data2 *)(rip + 2)); 253 pr_ibs_op_data3(*op_data3); 254 if (op_data3->dc_lin_addr_valid) 255 printf("IbsDCLinAd:\t%016llx\n", *(rip + 4)); 256 if (op_data3->dc_phy_addr_valid) 257 printf("IbsDCPhysAd:\t%016llx\n", *(rip + 5)); 258 if (op_data->op_brn_ret && *(rip + 6)) 259 printf("IbsBrTarget:\t%016llx\n", *(rip + 6)); 260 } 261 262 /* 263 * IBS Fetch MSRs always saved, in order, are: 264 * IBS_FETCH_CTL, IBS_FETCH_LINADDR, IBS_FETCH_PHYSADDR, IC_IBS_EXTD_CTL 265 */ 266 static void amd_dump_ibs_fetch(struct perf_sample *sample) 267 { 268 struct perf_ibs_data *data = sample->raw_data; 269 union ibs_fetch_ctl *fetch_ctl = (union ibs_fetch_ctl *)data->data; 270 __u64 *addr = (__u64 *)fetch_ctl + 1; 271 union ic_ibs_extd_ctl *extd_ctl = (union ic_ibs_extd_ctl *)addr + 2; 272 273 pr_ibs_fetch_ctl(*fetch_ctl); 274 printf("IbsFetchLinAd:\t%016llx\n", *addr++); 275 if (fetch_ctl->phy_addr_valid) 276 printf("IbsFetchPhysAd:\t%016llx\n", *addr); 277 pr_ic_ibs_extd_ctl(*extd_ctl); 278 } 279 280 /* 281 * Test for enable and valid bits in captured control MSRs. 282 */ 283 static bool is_valid_ibs_fetch_sample(struct perf_sample *sample) 284 { 285 struct perf_ibs_data *data = sample->raw_data; 286 union ibs_fetch_ctl *fetch_ctl = (union ibs_fetch_ctl *)data->data; 287 288 if (fetch_ctl->fetch_en && fetch_ctl->fetch_val) 289 return true; 290 291 return false; 292 } 293 294 static bool is_valid_ibs_op_sample(struct perf_sample *sample) 295 { 296 struct perf_ibs_data *data = sample->raw_data; 297 union ibs_op_ctl *op_ctl = (union ibs_op_ctl *)data->data; 298 299 if (op_ctl->op_en && op_ctl->op_val) 300 return true; 301 302 return false; 303 } 304 305 /* AMD vendor specific raw sample function. Check for PERF_RECORD_SAMPLE events 306 * and if the event was triggered by IBS, display its raw data with decoded text. 307 * The function is only invoked when the dump flag -D is set. 308 */ 309 void evlist__amd_sample_raw(struct evlist *evlist, union perf_event *event, 310 struct perf_sample *sample) 311 { 312 struct evsel *evsel; 313 314 if (event->header.type != PERF_RECORD_SAMPLE || !sample->raw_size) 315 return; 316 317 evsel = evlist__event2evsel(evlist, event); 318 if (!evsel) 319 return; 320 321 if (evsel->core.attr.type == ibs_fetch_type) { 322 if (!is_valid_ibs_fetch_sample(sample)) { 323 pr_debug("Invalid raw IBS Fetch MSR data encountered\n"); 324 return; 325 } 326 amd_dump_ibs_fetch(sample); 327 } else if (evsel->core.attr.type == ibs_op_type) { 328 if (!is_valid_ibs_op_sample(sample)) { 329 pr_debug("Invalid raw IBS Op MSR data encountered\n"); 330 return; 331 } 332 amd_dump_ibs_op(sample); 333 } 334 } 335 336 static void parse_cpuid(struct perf_env *env) 337 { 338 const char *cpuid; 339 int ret; 340 341 cpuid = perf_env__cpuid(env); 342 /* 343 * cpuid = "AuthenticAMD,family,model,stepping" 344 */ 345 ret = sscanf(cpuid, "%*[^,],%u,%u", &cpu_family, &cpu_model); 346 if (ret != 2) 347 pr_debug("problem parsing cpuid\n"); 348 } 349 350 /* 351 * Find and assign the type number used for ibs_op or ibs_fetch samples. 352 * Device names can be large - we are only interested in the first 9 characters, 353 * to match "ibs_fetch". 354 */ 355 bool evlist__has_amd_ibs(struct evlist *evlist) 356 { 357 struct perf_env *env = evlist->env; 358 int ret, nr_pmu_mappings = perf_env__nr_pmu_mappings(env); 359 const char *pmu_mapping = perf_env__pmu_mappings(env); 360 char name[sizeof("ibs_fetch")]; 361 u32 type; 362 363 while (nr_pmu_mappings--) { 364 ret = sscanf(pmu_mapping, "%u:%9s", &type, name); 365 if (ret == 2) { 366 if (strstarts(name, "ibs_op")) 367 ibs_op_type = type; 368 else if (strstarts(name, "ibs_fetch")) 369 ibs_fetch_type = type; 370 } 371 pmu_mapping += strlen(pmu_mapping) + 1 /* '\0' */; 372 } 373 374 if (perf_env__find_pmu_cap(env, "ibs_op", "zen4_ibs_extensions")) 375 zen4_ibs_extensions = 1; 376 377 if (perf_env__find_pmu_cap(env, "ibs_op", "ldlat")) 378 ldlat_cap = 1; 379 380 if (perf_env__find_pmu_cap(env, "ibs_op", "dtlb_pgsize")) 381 dtlb_pgsize_cap = 1; 382 383 if (ibs_fetch_type || ibs_op_type) { 384 if (!cpu_family) 385 parse_cpuid(env); 386 return true; 387 } 388 389 return false; 390 } 391