1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * AMD specific. Provide textual annotation for IBS raw sample data.
4 */
5
6 #include <unistd.h>
7 #include <stdio.h>
8 #include <string.h>
9 #include <inttypes.h>
10
11 #include <linux/string.h>
12 #include "../../arch/x86/include/asm/amd/ibs.h"
13
14 #include "debug.h"
15 #include "session.h"
16 #include "evlist.h"
17 #include "sample-raw.h"
18 #include "util/sample.h"
19
20 static u32 cpu_family, cpu_model, ibs_fetch_type, ibs_op_type;
21 static bool zen4_ibs_extensions;
22 static bool ldlat_cap;
23 static bool dtlb_pgsize_cap;
24
pr_ibs_fetch_ctl(union ibs_fetch_ctl reg)25 static void pr_ibs_fetch_ctl(union ibs_fetch_ctl reg)
26 {
27 const char * const ic_miss_strs[] = {
28 " IcMiss 0",
29 " IcMiss 1",
30 };
31 const char * const l1tlb_pgsz_strs[] = {
32 " L1TlbPgSz 4KB",
33 " L1TlbPgSz 2MB",
34 " L1TlbPgSz 1GB",
35 " L1TlbPgSz RESERVED"
36 };
37 const char * const l1tlb_pgsz_strs_erratum1347[] = {
38 " L1TlbPgSz 4KB",
39 " L1TlbPgSz 16KB",
40 " L1TlbPgSz 2MB",
41 " L1TlbPgSz 1GB"
42 };
43 const char *ic_miss_str = NULL;
44 const char *l1tlb_pgsz_str = NULL;
45 char l3_miss_str[sizeof(" L3MissOnly _ FetchOcMiss _ FetchL3Miss _")] = "";
46
47 if (cpu_family == 0x19 && cpu_model < 0x10) {
48 /*
49 * Erratum #1238 workaround is to ignore MSRC001_1030[IbsIcMiss]
50 * Erratum #1347 workaround is to use table provided in erratum
51 */
52 if (reg.phy_addr_valid)
53 l1tlb_pgsz_str = l1tlb_pgsz_strs_erratum1347[reg.l1tlb_pgsz];
54 } else {
55 if (reg.phy_addr_valid)
56 l1tlb_pgsz_str = l1tlb_pgsz_strs[reg.l1tlb_pgsz];
57 ic_miss_str = ic_miss_strs[reg.ic_miss];
58 }
59
60 if (zen4_ibs_extensions) {
61 snprintf(l3_miss_str, sizeof(l3_miss_str),
62 " L3MissOnly %d FetchOcMiss %d FetchL3Miss %d",
63 reg.l3_miss_only, reg.fetch_oc_miss, reg.fetch_l3_miss);
64 }
65
66 printf("ibs_fetch_ctl:\t%016llx MaxCnt %7d Cnt %7d Lat %5d En %d Val %d Comp %d%s "
67 "PhyAddrValid %d%s L1TlbMiss %d L2TlbMiss %d RandEn %d%s%s\n",
68 reg.val, reg.fetch_maxcnt << 4, reg.fetch_cnt << 4, reg.fetch_lat,
69 reg.fetch_en, reg.fetch_val, reg.fetch_comp, ic_miss_str ? : "",
70 reg.phy_addr_valid, l1tlb_pgsz_str ? : "", reg.l1tlb_miss, reg.l2tlb_miss,
71 reg.rand_en, reg.fetch_comp ? (reg.fetch_l2_miss ? " L2Miss 1" : " L2Miss 0") : "",
72 l3_miss_str);
73 }
74
pr_ic_ibs_extd_ctl(union ic_ibs_extd_ctl reg)75 static void pr_ic_ibs_extd_ctl(union ic_ibs_extd_ctl reg)
76 {
77 printf("ic_ibs_ext_ctl:\t%016llx IbsItlbRefillLat %3d\n", reg.val, reg.itlb_refill_lat);
78 }
79
pr_ibs_op_ctl(union ibs_op_ctl reg)80 static void pr_ibs_op_ctl(union ibs_op_ctl reg)
81 {
82 char l3_miss_only[sizeof(" L3MissOnly _")] = "";
83 char ldlat[sizeof(" LdLatThrsh __ LdLatEn _")] = "";
84
85 if (zen4_ibs_extensions)
86 snprintf(l3_miss_only, sizeof(l3_miss_only), " L3MissOnly %d", reg.l3_miss_only);
87
88 if (ldlat_cap) {
89 snprintf(ldlat, sizeof(ldlat), " LdLatThrsh %2d LdLatEn %d",
90 reg.ldlat_thrsh, reg.ldlat_en);
91 }
92
93 printf("ibs_op_ctl:\t%016llx MaxCnt %9d%s En %d Val %d CntCtl %d=%s CurCnt %9d%s\n",
94 reg.val, ((reg.opmaxcnt_ext << 16) | reg.opmaxcnt) << 4, l3_miss_only,
95 reg.op_en, reg.op_val, reg.cnt_ctl,
96 reg.cnt_ctl ? "uOps" : "cycles", reg.opcurcnt, ldlat);
97 }
98
pr_ibs_op_data(union ibs_op_data reg)99 static void pr_ibs_op_data(union ibs_op_data reg)
100 {
101 printf("ibs_op_data:\t%016llx CompToRetCtr %5d TagToRetCtr %5d%s%s%s BrnRet %d "
102 " RipInvalid %d BrnFuse %d Microcode %d\n",
103 reg.val, reg.comp_to_ret_ctr, reg.tag_to_ret_ctr,
104 reg.op_brn_ret ? (reg.op_return ? " OpReturn 1" : " OpReturn 0") : "",
105 reg.op_brn_ret ? (reg.op_brn_taken ? " OpBrnTaken 1" : " OpBrnTaken 0") : "",
106 reg.op_brn_ret ? (reg.op_brn_misp ? " OpBrnMisp 1" : " OpBrnMisp 0") : "",
107 reg.op_brn_ret, reg.op_rip_invalid, reg.op_brn_fuse, reg.op_microcode);
108 }
109
pr_ibs_op_data2_extended(union ibs_op_data2 reg)110 static void pr_ibs_op_data2_extended(union ibs_op_data2 reg)
111 {
112 static const char * const data_src_str[] = {
113 "",
114 " DataSrc 1=Local L3 or other L1/L2 in CCX",
115 " DataSrc 2=Another CCX cache in the same NUMA node",
116 " DataSrc 3=DRAM",
117 " DataSrc 4=(reserved)",
118 " DataSrc 5=Another CCX cache in a different NUMA node",
119 " DataSrc 6=Long-latency DIMM",
120 " DataSrc 7=MMIO/Config/PCI/APIC",
121 " DataSrc 8=Extension Memory",
122 " DataSrc 9=(reserved)",
123 " DataSrc 10=(reserved)",
124 " DataSrc 11=(reserved)",
125 " DataSrc 12=Coherent Memory of a different processor type",
126 /* 13 to 31 are reserved. Avoid printing them. */
127 };
128 int data_src = (reg.data_src_hi << 3) | reg.data_src_lo;
129
130 printf("ibs_op_data2:\t%016llx %sRmtNode %d%s\n", reg.val,
131 (data_src == 1 || data_src == 2 || data_src == 5) ?
132 (reg.cache_hit_st ? "CacheHitSt 1=O-State " : "CacheHitSt 0=M-state ") : "",
133 reg.rmt_node,
134 data_src < (int)ARRAY_SIZE(data_src_str) ? data_src_str[data_src] : "");
135 }
136
pr_ibs_op_data2_default(union ibs_op_data2 reg)137 static void pr_ibs_op_data2_default(union ibs_op_data2 reg)
138 {
139 static const char * const data_src_str[] = {
140 "",
141 " DataSrc 1=(reserved)",
142 " DataSrc 2=Local node cache",
143 " DataSrc 3=DRAM",
144 " DataSrc 4=Remote node cache",
145 " DataSrc 5=(reserved)",
146 " DataSrc 6=(reserved)",
147 " DataSrc 7=Other"
148 };
149
150 printf("ibs_op_data2:\t%016llx %sRmtNode %d%s\n", reg.val,
151 reg.data_src_lo == 2 ? (reg.cache_hit_st ? "CacheHitSt 1=O-State "
152 : "CacheHitSt 0=M-state ") : "",
153 reg.rmt_node, data_src_str[reg.data_src_lo]);
154 }
155
pr_ibs_op_data2(union ibs_op_data2 reg)156 static void pr_ibs_op_data2(union ibs_op_data2 reg)
157 {
158 if (zen4_ibs_extensions)
159 return pr_ibs_op_data2_extended(reg);
160 pr_ibs_op_data2_default(reg);
161 }
162
pr_ibs_op_data3(union ibs_op_data3 reg)163 static void pr_ibs_op_data3(union ibs_op_data3 reg)
164 {
165 static const char * const dc_page_sizes[] = {
166 " 4K",
167 " 2M",
168 " 1G",
169 " ??",
170 };
171 char op_dc_miss_open_mem_reqs_str[sizeof(" OpDcMissOpenMemReqs __")] = "";
172 char dc_l1_l2tlb_miss_str[sizeof(" DcL1TlbMiss _ DcL2TlbMiss _")] = "";
173 char dc_l1tlb_hit_str[sizeof(" DcL1TlbHit2M _ DcL1TlbHit1G _")] = "";
174 char op_mem_width_str[sizeof(" OpMemWidth _____ bytes")] = "";
175 char dc_l2tlb_hit_2m_str[sizeof(" DcL2TlbHit2M _")] = "";
176 char dc_l2tlb_hit_1g_str[sizeof(" DcL2TlbHit1G _")] = "";
177 char dc_page_size_str[sizeof(" DcPageSize ____")] = "";
178 char l2_miss_str[sizeof(" L2Miss _")] = "";
179
180 /*
181 * Erratum #1293
182 * Ignore L2Miss and OpDcMissOpenMemReqs (and opdata2) if DcMissNoMabAlloc or SwPf set
183 */
184 if (!(cpu_family == 0x19 && cpu_model < 0x10 && (reg.dc_miss_no_mab_alloc || reg.sw_pf))) {
185 snprintf(l2_miss_str, sizeof(l2_miss_str), " L2Miss %d", reg.l2_miss);
186 snprintf(op_dc_miss_open_mem_reqs_str, sizeof(op_dc_miss_open_mem_reqs_str),
187 " OpDcMissOpenMemReqs %2d", reg.op_dc_miss_open_mem_reqs);
188 }
189
190 if (reg.op_mem_width)
191 snprintf(op_mem_width_str, sizeof(op_mem_width_str),
192 " OpMemWidth %2d bytes", 1 << (reg.op_mem_width - 1));
193
194 if (dtlb_pgsize_cap) {
195 if (reg.dc_phy_addr_valid) {
196 int idx = (reg.dc_l1tlb_hit_1g << 1) | reg.dc_l1tlb_hit_2m;
197
198 snprintf(dc_l1_l2tlb_miss_str, sizeof(dc_l1_l2tlb_miss_str),
199 " DcL1TlbMiss %d DcL2TlbMiss %d",
200 reg.dc_l1tlb_miss, reg.dc_l2tlb_miss);
201 snprintf(dc_page_size_str, sizeof(dc_page_size_str),
202 " DcPageSize %4s", dc_page_sizes[idx]);
203 }
204 } else {
205 snprintf(dc_l1_l2tlb_miss_str, sizeof(dc_l1_l2tlb_miss_str),
206 " DcL1TlbMiss %d DcL2TlbMiss %d",
207 reg.dc_l1tlb_miss, reg.dc_l2tlb_miss);
208 snprintf(dc_l1tlb_hit_str, sizeof(dc_l1tlb_hit_str),
209 " DcL1TlbHit2M %d DcL1TlbHit1G %d",
210 reg.dc_l1tlb_hit_2m, reg.dc_l1tlb_hit_1g);
211 snprintf(dc_l2tlb_hit_2m_str, sizeof(dc_l2tlb_hit_2m_str),
212 " DcL2TlbHit2M %d", reg.dc_l2tlb_hit_2m);
213 snprintf(dc_l2tlb_hit_1g_str, sizeof(dc_l2tlb_hit_1g_str),
214 " DcL2TlbHit1G %d", reg.dc_l2_tlb_hit_1g);
215 }
216
217 printf("ibs_op_data3:\t%016llx LdOp %d StOp %d%s%s%s DcMiss %d DcMisAcc %d "
218 "DcWcMemAcc %d DcUcMemAcc %d DcLockedOp %d DcMissNoMabAlloc %d "
219 "DcLinAddrValid %d DcPhyAddrValid %d%s%s SwPf %d%s%s "
220 "DcMissLat %5d TlbRefillLat %5d\n",
221 reg.val, reg.ld_op, reg.st_op, dc_l1_l2tlb_miss_str,
222 dtlb_pgsize_cap ? dc_page_size_str : dc_l1tlb_hit_str,
223 dc_l2tlb_hit_2m_str, reg.dc_miss, reg.dc_mis_acc, reg.dc_wc_mem_acc,
224 reg.dc_uc_mem_acc, reg.dc_locked_op, reg.dc_miss_no_mab_alloc,
225 reg.dc_lin_addr_valid, reg.dc_phy_addr_valid, dc_l2tlb_hit_1g_str,
226 l2_miss_str, reg.sw_pf, op_mem_width_str, op_dc_miss_open_mem_reqs_str,
227 reg.dc_miss_lat, reg.tlb_refill_lat);
228 }
229
230 /*
231 * IBS Op/Execution MSRs always saved, in order, are:
232 * IBS_OP_CTL, IBS_OP_RIP, IBS_OP_DATA, IBS_OP_DATA2,
233 * IBS_OP_DATA3, IBS_DC_LINADDR, IBS_DC_PHYSADDR, BP_IBSTGT_RIP
234 */
amd_dump_ibs_op(struct perf_sample * sample)235 static void amd_dump_ibs_op(struct perf_sample *sample)
236 {
237 struct perf_ibs_data *data = sample->raw_data;
238 union ibs_op_ctl *op_ctl = (union ibs_op_ctl *)data->data;
239 __u64 *rip = (__u64 *)op_ctl + 1;
240 union ibs_op_data *op_data = (union ibs_op_data *)(rip + 1);
241 union ibs_op_data3 *op_data3 = (union ibs_op_data3 *)(rip + 3);
242
243 pr_ibs_op_ctl(*op_ctl);
244 if (!op_data->op_rip_invalid)
245 printf("IbsOpRip:\t%016llx\n", *rip);
246 pr_ibs_op_data(*op_data);
247 /*
248 * Erratum #1293: ignore op_data2 if DcMissNoMabAlloc or SwPf are set
249 */
250 if (!(cpu_family == 0x19 && cpu_model < 0x10 &&
251 (op_data3->dc_miss_no_mab_alloc || op_data3->sw_pf)))
252 pr_ibs_op_data2(*(union ibs_op_data2 *)(rip + 2));
253 pr_ibs_op_data3(*op_data3);
254 if (op_data3->dc_lin_addr_valid)
255 printf("IbsDCLinAd:\t%016llx\n", *(rip + 4));
256 if (op_data3->dc_phy_addr_valid)
257 printf("IbsDCPhysAd:\t%016llx\n", *(rip + 5));
258 if (op_data->op_brn_ret && *(rip + 6))
259 printf("IbsBrTarget:\t%016llx\n", *(rip + 6));
260 }
261
262 /*
263 * IBS Fetch MSRs always saved, in order, are:
264 * IBS_FETCH_CTL, IBS_FETCH_LINADDR, IBS_FETCH_PHYSADDR, IC_IBS_EXTD_CTL
265 */
amd_dump_ibs_fetch(struct perf_sample * sample)266 static void amd_dump_ibs_fetch(struct perf_sample *sample)
267 {
268 struct perf_ibs_data *data = sample->raw_data;
269 union ibs_fetch_ctl *fetch_ctl = (union ibs_fetch_ctl *)data->data;
270 __u64 *addr = (__u64 *)fetch_ctl + 1;
271 union ic_ibs_extd_ctl *extd_ctl = (union ic_ibs_extd_ctl *)addr + 2;
272
273 pr_ibs_fetch_ctl(*fetch_ctl);
274 printf("IbsFetchLinAd:\t%016llx\n", *addr++);
275 if (fetch_ctl->phy_addr_valid)
276 printf("IbsFetchPhysAd:\t%016llx\n", *addr);
277 pr_ic_ibs_extd_ctl(*extd_ctl);
278 }
279
280 /*
281 * Test for enable and valid bits in captured control MSRs.
282 */
is_valid_ibs_fetch_sample(struct perf_sample * sample)283 static bool is_valid_ibs_fetch_sample(struct perf_sample *sample)
284 {
285 struct perf_ibs_data *data = sample->raw_data;
286 union ibs_fetch_ctl *fetch_ctl = (union ibs_fetch_ctl *)data->data;
287
288 if (fetch_ctl->fetch_en && fetch_ctl->fetch_val)
289 return true;
290
291 return false;
292 }
293
is_valid_ibs_op_sample(struct perf_sample * sample)294 static bool is_valid_ibs_op_sample(struct perf_sample *sample)
295 {
296 struct perf_ibs_data *data = sample->raw_data;
297 union ibs_op_ctl *op_ctl = (union ibs_op_ctl *)data->data;
298
299 if (op_ctl->op_en && op_ctl->op_val)
300 return true;
301
302 return false;
303 }
304
305 /* AMD vendor specific raw sample function. Check for PERF_RECORD_SAMPLE events
306 * and if the event was triggered by IBS, display its raw data with decoded text.
307 * The function is only invoked when the dump flag -D is set.
308 */
evlist__amd_sample_raw(struct evlist * evlist,union perf_event * event,struct perf_sample * sample)309 void evlist__amd_sample_raw(struct evlist *evlist, union perf_event *event,
310 struct perf_sample *sample)
311 {
312 struct evsel *evsel;
313
314 if (event->header.type != PERF_RECORD_SAMPLE || !sample->raw_size)
315 return;
316
317 evsel = evlist__event2evsel(evlist, event);
318 if (!evsel)
319 return;
320
321 if (evsel->core.attr.type == ibs_fetch_type) {
322 if (!is_valid_ibs_fetch_sample(sample)) {
323 pr_debug("Invalid raw IBS Fetch MSR data encountered\n");
324 return;
325 }
326 amd_dump_ibs_fetch(sample);
327 } else if (evsel->core.attr.type == ibs_op_type) {
328 if (!is_valid_ibs_op_sample(sample)) {
329 pr_debug("Invalid raw IBS Op MSR data encountered\n");
330 return;
331 }
332 amd_dump_ibs_op(sample);
333 }
334 }
335
parse_cpuid(struct perf_env * env)336 static void parse_cpuid(struct perf_env *env)
337 {
338 const char *cpuid;
339 int ret;
340
341 cpuid = perf_env__cpuid(env);
342 /*
343 * cpuid = "AuthenticAMD,family,model,stepping"
344 */
345 ret = sscanf(cpuid, "%*[^,],%u,%u", &cpu_family, &cpu_model);
346 if (ret != 2)
347 pr_debug("problem parsing cpuid\n");
348 }
349
350 /*
351 * Find and assign the type number used for ibs_op or ibs_fetch samples.
352 * Device names can be large - we are only interested in the first 9 characters,
353 * to match "ibs_fetch".
354 */
evlist__has_amd_ibs(struct evlist * evlist)355 bool evlist__has_amd_ibs(struct evlist *evlist)
356 {
357 struct perf_env *env = perf_session__env(evlist->session);
358 int ret, nr_pmu_mappings = perf_env__nr_pmu_mappings(env);
359 const char *pmu_mapping = perf_env__pmu_mappings(env);
360 char name[sizeof("ibs_fetch")];
361 u32 type;
362
363 while (nr_pmu_mappings--) {
364 ret = sscanf(pmu_mapping, "%u:%9s", &type, name);
365 if (ret == 2) {
366 if (strstarts(name, "ibs_op"))
367 ibs_op_type = type;
368 else if (strstarts(name, "ibs_fetch"))
369 ibs_fetch_type = type;
370 }
371 pmu_mapping += strlen(pmu_mapping) + 1 /* '\0' */;
372 }
373
374 if (perf_env__find_pmu_cap(env, "ibs_op", "zen4_ibs_extensions"))
375 zen4_ibs_extensions = 1;
376
377 if (perf_env__find_pmu_cap(env, "ibs_op", "ldlat"))
378 ldlat_cap = 1;
379
380 if (perf_env__find_pmu_cap(env, "ibs_op", "dtlb_pgsize"))
381 dtlb_pgsize_cap = 1;
382
383 if (ibs_fetch_type || ibs_op_type) {
384 if (!cpu_family)
385 parse_cpuid(env);
386 return true;
387 }
388
389 return false;
390 }
391