1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/perf_event.h>
3 #include <asm/msr.h>
4 #include <asm/perf_event.h>
5
6 #include "../perf_event.h"
7
8 /* LBR Branch Select valid bits */
9 #define LBR_SELECT_MASK 0x1ff
10
11 /*
12 * LBR Branch Select filter bits which when set, ensures that the
13 * corresponding type of branches are not recorded
14 */
15 #define LBR_SELECT_KERNEL 0 /* Branches ending in CPL = 0 */
16 #define LBR_SELECT_USER 1 /* Branches ending in CPL > 0 */
17 #define LBR_SELECT_JCC 2 /* Conditional branches */
18 #define LBR_SELECT_CALL_NEAR_REL 3 /* Near relative calls */
19 #define LBR_SELECT_CALL_NEAR_IND 4 /* Indirect relative calls */
20 #define LBR_SELECT_RET_NEAR 5 /* Near returns */
21 #define LBR_SELECT_JMP_NEAR_IND 6 /* Near indirect jumps (excl. calls and returns) */
22 #define LBR_SELECT_JMP_NEAR_REL 7 /* Near relative jumps (excl. calls) */
23 #define LBR_SELECT_FAR_BRANCH 8 /* Far branches */
24
25 #define LBR_KERNEL BIT(LBR_SELECT_KERNEL)
26 #define LBR_USER BIT(LBR_SELECT_USER)
27 #define LBR_JCC BIT(LBR_SELECT_JCC)
28 #define LBR_REL_CALL BIT(LBR_SELECT_CALL_NEAR_REL)
29 #define LBR_IND_CALL BIT(LBR_SELECT_CALL_NEAR_IND)
30 #define LBR_RETURN BIT(LBR_SELECT_RET_NEAR)
31 #define LBR_REL_JMP BIT(LBR_SELECT_JMP_NEAR_REL)
32 #define LBR_IND_JMP BIT(LBR_SELECT_JMP_NEAR_IND)
33 #define LBR_FAR BIT(LBR_SELECT_FAR_BRANCH)
34 #define LBR_NOT_SUPP -1 /* unsupported filter */
35 #define LBR_IGNORE 0
36
37 #define LBR_ANY \
38 (LBR_JCC | LBR_REL_CALL | LBR_IND_CALL | LBR_RETURN | \
39 LBR_REL_JMP | LBR_IND_JMP | LBR_FAR)
40
41 struct branch_entry {
42 union {
43 struct {
44 u64 ip:58;
45 u64 ip_sign_ext:5;
46 u64 mispredict:1;
47 } split;
48 u64 full;
49 } from;
50
51 union {
52 struct {
53 u64 ip:58;
54 u64 ip_sign_ext:3;
55 u64 reserved:1;
56 u64 spec:1;
57 u64 valid:1;
58 } split;
59 u64 full;
60 } to;
61 };
62
amd_pmu_lbr_set_from(unsigned int idx,u64 val)63 static __always_inline void amd_pmu_lbr_set_from(unsigned int idx, u64 val)
64 {
65 wrmsrq(MSR_AMD_SAMP_BR_FROM + idx * 2, val);
66 }
67
amd_pmu_lbr_set_to(unsigned int idx,u64 val)68 static __always_inline void amd_pmu_lbr_set_to(unsigned int idx, u64 val)
69 {
70 wrmsrq(MSR_AMD_SAMP_BR_FROM + idx * 2 + 1, val);
71 }
72
amd_pmu_lbr_get_from(unsigned int idx)73 static __always_inline u64 amd_pmu_lbr_get_from(unsigned int idx)
74 {
75 u64 val;
76
77 rdmsrq(MSR_AMD_SAMP_BR_FROM + idx * 2, val);
78
79 return val;
80 }
81
amd_pmu_lbr_get_to(unsigned int idx)82 static __always_inline u64 amd_pmu_lbr_get_to(unsigned int idx)
83 {
84 u64 val;
85
86 rdmsrq(MSR_AMD_SAMP_BR_FROM + idx * 2 + 1, val);
87
88 return val;
89 }
90
sign_ext_branch_ip(u64 ip)91 static __always_inline u64 sign_ext_branch_ip(u64 ip)
92 {
93 u32 shift = 64 - boot_cpu_data.x86_virt_bits;
94
95 return (u64)(((s64)ip << shift) >> shift);
96 }
97
amd_pmu_lbr_filter(void)98 static void amd_pmu_lbr_filter(void)
99 {
100 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
101 int br_sel = cpuc->br_sel, offset, type, i, j;
102 bool compress = false;
103 bool fused_only = false;
104 u64 from, to;
105
106 /* If sampling all branches, there is nothing to filter */
107 if (((br_sel & X86_BR_ALL) == X86_BR_ALL) &&
108 ((br_sel & X86_BR_TYPE_SAVE) != X86_BR_TYPE_SAVE))
109 fused_only = true;
110
111 for (i = 0; i < cpuc->lbr_stack.nr; i++) {
112 from = cpuc->lbr_entries[i].from;
113 to = cpuc->lbr_entries[i].to;
114 type = branch_type_fused(from, to, 0, &offset);
115
116 /*
117 * Adjust the branch from address in case of instruction
118 * fusion where it points to an instruction preceding the
119 * actual branch
120 */
121 if (offset) {
122 cpuc->lbr_entries[i].from += offset;
123 if (fused_only)
124 continue;
125 }
126
127 /* If type does not correspond, then discard */
128 if (type == X86_BR_NONE || (br_sel & type) != type) {
129 cpuc->lbr_entries[i].from = 0; /* mark invalid */
130 compress = true;
131 }
132
133 if ((br_sel & X86_BR_TYPE_SAVE) == X86_BR_TYPE_SAVE)
134 cpuc->lbr_entries[i].type = common_branch_type(type);
135 }
136
137 if (!compress)
138 return;
139
140 /* Remove all invalid entries */
141 for (i = 0; i < cpuc->lbr_stack.nr; ) {
142 if (!cpuc->lbr_entries[i].from) {
143 j = i;
144 while (++j < cpuc->lbr_stack.nr)
145 cpuc->lbr_entries[j - 1] = cpuc->lbr_entries[j];
146 cpuc->lbr_stack.nr--;
147 if (!cpuc->lbr_entries[i].from)
148 continue;
149 }
150 i++;
151 }
152 }
153
154 static const int lbr_spec_map[PERF_BR_SPEC_MAX] = {
155 PERF_BR_SPEC_NA,
156 PERF_BR_SPEC_WRONG_PATH,
157 PERF_BR_NON_SPEC_CORRECT_PATH,
158 PERF_BR_SPEC_CORRECT_PATH,
159 };
160
amd_pmu_lbr_read(void)161 void amd_pmu_lbr_read(void)
162 {
163 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
164 struct perf_branch_entry *br = cpuc->lbr_entries;
165 struct branch_entry entry;
166 int out = 0, idx, i;
167
168 if (!cpuc->lbr_users)
169 return;
170
171 for (i = 0; i < x86_pmu.lbr_nr; i++) {
172 entry.from.full = amd_pmu_lbr_get_from(i);
173 entry.to.full = amd_pmu_lbr_get_to(i);
174
175 /*
176 * Check if a branch has been logged; if valid = 0, spec = 0
177 * then no branch was recorded; if reserved = 1 then an
178 * erroneous branch was recorded (see Erratum 1452)
179 */
180 if ((!entry.to.split.valid && !entry.to.split.spec) ||
181 entry.to.split.reserved)
182 continue;
183
184 perf_clear_branch_entry_bitfields(br + out);
185
186 br[out].from = sign_ext_branch_ip(entry.from.split.ip);
187 br[out].to = sign_ext_branch_ip(entry.to.split.ip);
188 br[out].mispred = entry.from.split.mispredict;
189 br[out].predicted = !br[out].mispred;
190
191 /*
192 * Set branch speculation information using the status of
193 * the valid and spec bits.
194 *
195 * When valid = 0, spec = 0, no branch was recorded and the
196 * entry is discarded as seen above.
197 *
198 * When valid = 0, spec = 1, the recorded branch was
199 * speculative but took the wrong path.
200 *
201 * When valid = 1, spec = 0, the recorded branch was
202 * non-speculative but took the correct path.
203 *
204 * When valid = 1, spec = 1, the recorded branch was
205 * speculative and took the correct path
206 */
207 idx = (entry.to.split.valid << 1) | entry.to.split.spec;
208 br[out].spec = lbr_spec_map[idx];
209 out++;
210 }
211
212 cpuc->lbr_stack.nr = out;
213
214 /*
215 * Internal register renaming always ensures that LBR From[0] and
216 * LBR To[0] always represent the TOS
217 */
218 cpuc->lbr_stack.hw_idx = 0;
219
220 /* Perform further software filtering */
221 amd_pmu_lbr_filter();
222 }
223
224 static const int lbr_select_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
225 [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER,
226 [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL,
227 [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGNORE,
228
229 [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY,
230 [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_REL_CALL | LBR_IND_CALL | LBR_FAR,
231 [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_FAR,
232 [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL,
233 [PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT] = LBR_NOT_SUPP,
234 [PERF_SAMPLE_BRANCH_IN_TX_SHIFT] = LBR_NOT_SUPP,
235 [PERF_SAMPLE_BRANCH_NO_TX_SHIFT] = LBR_NOT_SUPP,
236 [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC,
237
238 [PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = LBR_NOT_SUPP,
239 [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP,
240 [PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_REL_CALL,
241
242 [PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT] = LBR_NOT_SUPP,
243 [PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT] = LBR_NOT_SUPP,
244 };
245
amd_pmu_lbr_setup_filter(struct perf_event * event)246 static int amd_pmu_lbr_setup_filter(struct perf_event *event)
247 {
248 struct hw_perf_event_extra *reg = &event->hw.branch_reg;
249 u64 br_type = event->attr.branch_sample_type;
250 u64 mask = 0, v;
251 int i;
252
253 /* No LBR support */
254 if (!x86_pmu.lbr_nr)
255 return -EOPNOTSUPP;
256
257 if (br_type & PERF_SAMPLE_BRANCH_USER)
258 mask |= X86_BR_USER;
259
260 if (br_type & PERF_SAMPLE_BRANCH_KERNEL)
261 mask |= X86_BR_KERNEL;
262
263 /* Ignore BRANCH_HV here */
264
265 if (br_type & PERF_SAMPLE_BRANCH_ANY)
266 mask |= X86_BR_ANY;
267
268 if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL)
269 mask |= X86_BR_ANY_CALL;
270
271 if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
272 mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET;
273
274 if (br_type & PERF_SAMPLE_BRANCH_IND_CALL)
275 mask |= X86_BR_IND_CALL;
276
277 if (br_type & PERF_SAMPLE_BRANCH_COND)
278 mask |= X86_BR_JCC;
279
280 if (br_type & PERF_SAMPLE_BRANCH_IND_JUMP)
281 mask |= X86_BR_IND_JMP;
282
283 if (br_type & PERF_SAMPLE_BRANCH_CALL)
284 mask |= X86_BR_CALL | X86_BR_ZERO_CALL;
285
286 if (br_type & PERF_SAMPLE_BRANCH_TYPE_SAVE)
287 mask |= X86_BR_TYPE_SAVE;
288
289 reg->reg = mask;
290 mask = 0;
291
292 for (i = 0; i < PERF_SAMPLE_BRANCH_MAX_SHIFT; i++) {
293 if (!(br_type & BIT_ULL(i)))
294 continue;
295
296 v = lbr_select_map[i];
297 if (v == LBR_NOT_SUPP)
298 return -EOPNOTSUPP;
299
300 if (v != LBR_IGNORE)
301 mask |= v;
302 }
303
304 /* Filter bits operate in suppress mode */
305 reg->config = mask ^ LBR_SELECT_MASK;
306
307 return 0;
308 }
309
amd_pmu_lbr_hw_config(struct perf_event * event)310 int amd_pmu_lbr_hw_config(struct perf_event *event)
311 {
312 int ret = 0;
313
314 ret = amd_pmu_lbr_setup_filter(event);
315 if (!ret)
316 event->attach_state |= PERF_ATTACH_SCHED_CB;
317
318 return ret;
319 }
320
amd_pmu_lbr_reset(void)321 void amd_pmu_lbr_reset(void)
322 {
323 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
324 int i;
325
326 if (!x86_pmu.lbr_nr)
327 return;
328
329 /* Reset all branch records individually */
330 for (i = 0; i < x86_pmu.lbr_nr; i++) {
331 amd_pmu_lbr_set_from(i, 0);
332 amd_pmu_lbr_set_to(i, 0);
333 }
334
335 cpuc->last_task_ctx = NULL;
336 cpuc->last_log_id = 0;
337 wrmsrq(MSR_AMD64_LBR_SELECT, 0);
338 }
339
amd_pmu_lbr_add(struct perf_event * event)340 void amd_pmu_lbr_add(struct perf_event *event)
341 {
342 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
343 struct hw_perf_event_extra *reg = &event->hw.branch_reg;
344
345 if (!x86_pmu.lbr_nr)
346 return;
347
348 if (has_branch_stack(event)) {
349 cpuc->lbr_select = 1;
350 cpuc->lbr_sel->config = reg->config;
351 cpuc->br_sel = reg->reg;
352 }
353
354 perf_sched_cb_inc(event->pmu);
355
356 if (!cpuc->lbr_users++ && !event->total_time_running)
357 amd_pmu_lbr_reset();
358 }
359
amd_pmu_lbr_del(struct perf_event * event)360 void amd_pmu_lbr_del(struct perf_event *event)
361 {
362 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
363
364 if (!x86_pmu.lbr_nr)
365 return;
366
367 if (has_branch_stack(event))
368 cpuc->lbr_select = 0;
369
370 cpuc->lbr_users--;
371 WARN_ON_ONCE(cpuc->lbr_users < 0);
372 perf_sched_cb_dec(event->pmu);
373 }
374
amd_pmu_lbr_sched_task(struct perf_event_pmu_context * pmu_ctx,struct task_struct * task,bool sched_in)375 void amd_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx,
376 struct task_struct *task, bool sched_in)
377 {
378 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
379
380 /*
381 * A context switch can flip the address space and LBR entries are
382 * not tagged with an identifier. Hence, branches cannot be resolved
383 * from the old address space and the LBR records should be wiped.
384 */
385 if (cpuc->lbr_users && sched_in)
386 amd_pmu_lbr_reset();
387 }
388
amd_pmu_lbr_enable_all(void)389 void amd_pmu_lbr_enable_all(void)
390 {
391 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
392 u64 lbr_select, dbg_ctl, dbg_extn_cfg;
393
394 if (!cpuc->lbr_users || !x86_pmu.lbr_nr)
395 return;
396
397 /* Set hardware branch filter */
398 if (cpuc->lbr_select) {
399 lbr_select = cpuc->lbr_sel->config & LBR_SELECT_MASK;
400 wrmsrq(MSR_AMD64_LBR_SELECT, lbr_select);
401 }
402
403 if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) {
404 rdmsrq(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
405 wrmsrq(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
406 }
407
408 rdmsrq(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
409 wrmsrq(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg | DBG_EXTN_CFG_LBRV2EN);
410 }
411
amd_pmu_lbr_disable_all(void)412 void amd_pmu_lbr_disable_all(void)
413 {
414 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
415
416 if (!cpuc->lbr_users || !x86_pmu.lbr_nr)
417 return;
418
419 __amd_pmu_lbr_disable();
420 }
421
amd_pmu_lbr_init(void)422 __init int amd_pmu_lbr_init(void)
423 {
424 union cpuid_0x80000022_ebx ebx;
425
426 if (x86_pmu.version < 2 || !boot_cpu_has(X86_FEATURE_AMD_LBR_V2))
427 return -EOPNOTSUPP;
428
429 /* Set number of entries */
430 ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES);
431 x86_pmu.lbr_nr = ebx.split.lbr_v2_stack_sz;
432
433 pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr);
434
435 return 0;
436 }
437