xref: /linux/arch/x86/events/amd/lbr.c (revision ab93e0dd72c37d378dd936f031ffb83ff2bd87ce)
1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/perf_event.h>
3 #include <asm/msr.h>
4 #include <asm/perf_event.h>
5 
6 #include "../perf_event.h"
7 
8 /* LBR Branch Select valid bits */
9 #define LBR_SELECT_MASK		0x1ff
10 
11 /*
12  * LBR Branch Select filter bits which when set, ensures that the
13  * corresponding type of branches are not recorded
14  */
15 #define LBR_SELECT_KERNEL		0	/* Branches ending in CPL = 0 */
16 #define LBR_SELECT_USER			1	/* Branches ending in CPL > 0 */
17 #define LBR_SELECT_JCC			2	/* Conditional branches */
18 #define LBR_SELECT_CALL_NEAR_REL	3	/* Near relative calls */
19 #define LBR_SELECT_CALL_NEAR_IND	4	/* Indirect relative calls */
20 #define LBR_SELECT_RET_NEAR		5	/* Near returns */
21 #define LBR_SELECT_JMP_NEAR_IND		6	/* Near indirect jumps (excl. calls and returns) */
22 #define LBR_SELECT_JMP_NEAR_REL		7	/* Near relative jumps (excl. calls) */
23 #define LBR_SELECT_FAR_BRANCH		8	/* Far branches */
24 
25 #define LBR_KERNEL	BIT(LBR_SELECT_KERNEL)
26 #define LBR_USER	BIT(LBR_SELECT_USER)
27 #define LBR_JCC		BIT(LBR_SELECT_JCC)
28 #define LBR_REL_CALL	BIT(LBR_SELECT_CALL_NEAR_REL)
29 #define LBR_IND_CALL	BIT(LBR_SELECT_CALL_NEAR_IND)
30 #define LBR_RETURN	BIT(LBR_SELECT_RET_NEAR)
31 #define LBR_REL_JMP	BIT(LBR_SELECT_JMP_NEAR_REL)
32 #define LBR_IND_JMP	BIT(LBR_SELECT_JMP_NEAR_IND)
33 #define LBR_FAR		BIT(LBR_SELECT_FAR_BRANCH)
34 #define LBR_NOT_SUPP	-1	/* unsupported filter */
35 #define LBR_IGNORE	0
36 
37 #define LBR_ANY		\
38 	(LBR_JCC | LBR_REL_CALL | LBR_IND_CALL | LBR_RETURN |	\
39 	 LBR_REL_JMP | LBR_IND_JMP | LBR_FAR)
40 
41 struct branch_entry {
42 	union {
43 		struct {
44 			u64	ip:58;
45 			u64	ip_sign_ext:5;
46 			u64	mispredict:1;
47 		} split;
48 		u64		full;
49 	} from;
50 
51 	union {
52 		struct {
53 			u64	ip:58;
54 			u64	ip_sign_ext:3;
55 			u64	reserved:1;
56 			u64	spec:1;
57 			u64	valid:1;
58 		} split;
59 		u64		full;
60 	} to;
61 };
62 
amd_pmu_lbr_set_from(unsigned int idx,u64 val)63 static __always_inline void amd_pmu_lbr_set_from(unsigned int idx, u64 val)
64 {
65 	wrmsrq(MSR_AMD_SAMP_BR_FROM + idx * 2, val);
66 }
67 
amd_pmu_lbr_set_to(unsigned int idx,u64 val)68 static __always_inline void amd_pmu_lbr_set_to(unsigned int idx, u64 val)
69 {
70 	wrmsrq(MSR_AMD_SAMP_BR_FROM + idx * 2 + 1, val);
71 }
72 
amd_pmu_lbr_get_from(unsigned int idx)73 static __always_inline u64 amd_pmu_lbr_get_from(unsigned int idx)
74 {
75 	u64 val;
76 
77 	rdmsrq(MSR_AMD_SAMP_BR_FROM + idx * 2, val);
78 
79 	return val;
80 }
81 
amd_pmu_lbr_get_to(unsigned int idx)82 static __always_inline u64 amd_pmu_lbr_get_to(unsigned int idx)
83 {
84 	u64 val;
85 
86 	rdmsrq(MSR_AMD_SAMP_BR_FROM + idx * 2 + 1, val);
87 
88 	return val;
89 }
90 
sign_ext_branch_ip(u64 ip)91 static __always_inline u64 sign_ext_branch_ip(u64 ip)
92 {
93 	u32 shift = 64 - boot_cpu_data.x86_virt_bits;
94 
95 	return (u64)(((s64)ip << shift) >> shift);
96 }
97 
amd_pmu_lbr_filter(void)98 static void amd_pmu_lbr_filter(void)
99 {
100 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
101 	int br_sel = cpuc->br_sel, offset, type, i, j;
102 	bool compress = false;
103 	bool fused_only = false;
104 	u64 from, to;
105 
106 	/* If sampling all branches, there is nothing to filter */
107 	if (((br_sel & X86_BR_ALL) == X86_BR_ALL) &&
108 	    ((br_sel & X86_BR_TYPE_SAVE) != X86_BR_TYPE_SAVE))
109 		fused_only = true;
110 
111 	for (i = 0; i < cpuc->lbr_stack.nr; i++) {
112 		from = cpuc->lbr_entries[i].from;
113 		to = cpuc->lbr_entries[i].to;
114 		type = branch_type_fused(from, to, 0, &offset);
115 
116 		/*
117 		 * Adjust the branch from address in case of instruction
118 		 * fusion where it points to an instruction preceding the
119 		 * actual branch
120 		 */
121 		if (offset) {
122 			cpuc->lbr_entries[i].from += offset;
123 			if (fused_only)
124 				continue;
125 		}
126 
127 		/* If type does not correspond, then discard */
128 		if (type == X86_BR_NONE || (br_sel & type) != type) {
129 			cpuc->lbr_entries[i].from = 0;	/* mark invalid */
130 			compress = true;
131 		}
132 
133 		if ((br_sel & X86_BR_TYPE_SAVE) == X86_BR_TYPE_SAVE)
134 			cpuc->lbr_entries[i].type = common_branch_type(type);
135 	}
136 
137 	if (!compress)
138 		return;
139 
140 	/* Remove all invalid entries */
141 	for (i = 0; i < cpuc->lbr_stack.nr; ) {
142 		if (!cpuc->lbr_entries[i].from) {
143 			j = i;
144 			while (++j < cpuc->lbr_stack.nr)
145 				cpuc->lbr_entries[j - 1] = cpuc->lbr_entries[j];
146 			cpuc->lbr_stack.nr--;
147 			if (!cpuc->lbr_entries[i].from)
148 				continue;
149 		}
150 		i++;
151 	}
152 }
153 
154 static const int lbr_spec_map[PERF_BR_SPEC_MAX] = {
155 	PERF_BR_SPEC_NA,
156 	PERF_BR_SPEC_WRONG_PATH,
157 	PERF_BR_NON_SPEC_CORRECT_PATH,
158 	PERF_BR_SPEC_CORRECT_PATH,
159 };
160 
amd_pmu_lbr_read(void)161 void amd_pmu_lbr_read(void)
162 {
163 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
164 	struct perf_branch_entry *br = cpuc->lbr_entries;
165 	struct branch_entry entry;
166 	int out = 0, idx, i;
167 
168 	if (!cpuc->lbr_users)
169 		return;
170 
171 	for (i = 0; i < x86_pmu.lbr_nr; i++) {
172 		entry.from.full	= amd_pmu_lbr_get_from(i);
173 		entry.to.full	= amd_pmu_lbr_get_to(i);
174 
175 		/*
176 		 * Check if a branch has been logged; if valid = 0, spec = 0
177 		 * then no branch was recorded; if reserved = 1 then an
178 		 * erroneous branch was recorded (see Erratum 1452)
179 		 */
180 		if ((!entry.to.split.valid && !entry.to.split.spec) ||
181 		    entry.to.split.reserved)
182 			continue;
183 
184 		perf_clear_branch_entry_bitfields(br + out);
185 
186 		br[out].from	= sign_ext_branch_ip(entry.from.split.ip);
187 		br[out].to	= sign_ext_branch_ip(entry.to.split.ip);
188 		br[out].mispred	= entry.from.split.mispredict;
189 		br[out].predicted = !br[out].mispred;
190 
191 		/*
192 		 * Set branch speculation information using the status of
193 		 * the valid and spec bits.
194 		 *
195 		 * When valid = 0, spec = 0, no branch was recorded and the
196 		 * entry is discarded as seen above.
197 		 *
198 		 * When valid = 0, spec = 1, the recorded branch was
199 		 * speculative but took the wrong path.
200 		 *
201 		 * When valid = 1, spec = 0, the recorded branch was
202 		 * non-speculative but took the correct path.
203 		 *
204 		 * When valid = 1, spec = 1, the recorded branch was
205 		 * speculative and took the correct path
206 		 */
207 		idx = (entry.to.split.valid << 1) | entry.to.split.spec;
208 		br[out].spec = lbr_spec_map[idx];
209 		out++;
210 	}
211 
212 	cpuc->lbr_stack.nr = out;
213 
214 	/*
215 	 * Internal register renaming always ensures that LBR From[0] and
216 	 * LBR To[0] always represent the TOS
217 	 */
218 	cpuc->lbr_stack.hw_idx = 0;
219 
220 	/* Perform further software filtering */
221 	amd_pmu_lbr_filter();
222 }
223 
224 static const int lbr_select_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
225 	[PERF_SAMPLE_BRANCH_USER_SHIFT]		= LBR_USER,
226 	[PERF_SAMPLE_BRANCH_KERNEL_SHIFT]	= LBR_KERNEL,
227 	[PERF_SAMPLE_BRANCH_HV_SHIFT]		= LBR_IGNORE,
228 
229 	[PERF_SAMPLE_BRANCH_ANY_SHIFT]		= LBR_ANY,
230 	[PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT]	= LBR_REL_CALL | LBR_IND_CALL | LBR_FAR,
231 	[PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT]	= LBR_RETURN | LBR_FAR,
232 	[PERF_SAMPLE_BRANCH_IND_CALL_SHIFT]	= LBR_IND_CALL,
233 	[PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT]	= LBR_NOT_SUPP,
234 	[PERF_SAMPLE_BRANCH_IN_TX_SHIFT]	= LBR_NOT_SUPP,
235 	[PERF_SAMPLE_BRANCH_NO_TX_SHIFT]	= LBR_NOT_SUPP,
236 	[PERF_SAMPLE_BRANCH_COND_SHIFT]		= LBR_JCC,
237 
238 	[PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT]	= LBR_NOT_SUPP,
239 	[PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT]	= LBR_IND_JMP,
240 	[PERF_SAMPLE_BRANCH_CALL_SHIFT]		= LBR_REL_CALL,
241 
242 	[PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT]	= LBR_NOT_SUPP,
243 	[PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT]	= LBR_NOT_SUPP,
244 };
245 
amd_pmu_lbr_setup_filter(struct perf_event * event)246 static int amd_pmu_lbr_setup_filter(struct perf_event *event)
247 {
248 	struct hw_perf_event_extra *reg = &event->hw.branch_reg;
249 	u64 br_type = event->attr.branch_sample_type;
250 	u64 mask = 0, v;
251 	int i;
252 
253 	/* No LBR support */
254 	if (!x86_pmu.lbr_nr)
255 		return -EOPNOTSUPP;
256 
257 	if (br_type & PERF_SAMPLE_BRANCH_USER)
258 		mask |= X86_BR_USER;
259 
260 	if (br_type & PERF_SAMPLE_BRANCH_KERNEL)
261 		mask |= X86_BR_KERNEL;
262 
263 	/* Ignore BRANCH_HV here */
264 
265 	if (br_type & PERF_SAMPLE_BRANCH_ANY)
266 		mask |= X86_BR_ANY;
267 
268 	if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL)
269 		mask |= X86_BR_ANY_CALL;
270 
271 	if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
272 		mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET;
273 
274 	if (br_type & PERF_SAMPLE_BRANCH_IND_CALL)
275 		mask |= X86_BR_IND_CALL;
276 
277 	if (br_type & PERF_SAMPLE_BRANCH_COND)
278 		mask |= X86_BR_JCC;
279 
280 	if (br_type & PERF_SAMPLE_BRANCH_IND_JUMP)
281 		mask |= X86_BR_IND_JMP;
282 
283 	if (br_type & PERF_SAMPLE_BRANCH_CALL)
284 		mask |= X86_BR_CALL | X86_BR_ZERO_CALL;
285 
286 	if (br_type & PERF_SAMPLE_BRANCH_TYPE_SAVE)
287 		mask |= X86_BR_TYPE_SAVE;
288 
289 	reg->reg = mask;
290 	mask = 0;
291 
292 	for (i = 0; i < PERF_SAMPLE_BRANCH_MAX_SHIFT; i++) {
293 		if (!(br_type & BIT_ULL(i)))
294 			continue;
295 
296 		v = lbr_select_map[i];
297 		if (v == LBR_NOT_SUPP)
298 			return -EOPNOTSUPP;
299 
300 		if (v != LBR_IGNORE)
301 			mask |= v;
302 	}
303 
304 	/* Filter bits operate in suppress mode */
305 	reg->config = mask ^ LBR_SELECT_MASK;
306 
307 	return 0;
308 }
309 
amd_pmu_lbr_hw_config(struct perf_event * event)310 int amd_pmu_lbr_hw_config(struct perf_event *event)
311 {
312 	int ret = 0;
313 
314 	ret = amd_pmu_lbr_setup_filter(event);
315 	if (!ret)
316 		event->attach_state |= PERF_ATTACH_SCHED_CB;
317 
318 	return ret;
319 }
320 
amd_pmu_lbr_reset(void)321 void amd_pmu_lbr_reset(void)
322 {
323 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
324 	int i;
325 
326 	if (!x86_pmu.lbr_nr)
327 		return;
328 
329 	/* Reset all branch records individually */
330 	for (i = 0; i < x86_pmu.lbr_nr; i++) {
331 		amd_pmu_lbr_set_from(i, 0);
332 		amd_pmu_lbr_set_to(i, 0);
333 	}
334 
335 	cpuc->last_task_ctx = NULL;
336 	cpuc->last_log_id = 0;
337 	wrmsrq(MSR_AMD64_LBR_SELECT, 0);
338 }
339 
amd_pmu_lbr_add(struct perf_event * event)340 void amd_pmu_lbr_add(struct perf_event *event)
341 {
342 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
343 	struct hw_perf_event_extra *reg = &event->hw.branch_reg;
344 
345 	if (!x86_pmu.lbr_nr)
346 		return;
347 
348 	if (has_branch_stack(event)) {
349 		cpuc->lbr_select = 1;
350 		cpuc->lbr_sel->config = reg->config;
351 		cpuc->br_sel = reg->reg;
352 	}
353 
354 	perf_sched_cb_inc(event->pmu);
355 
356 	if (!cpuc->lbr_users++ && !event->total_time_running)
357 		amd_pmu_lbr_reset();
358 }
359 
amd_pmu_lbr_del(struct perf_event * event)360 void amd_pmu_lbr_del(struct perf_event *event)
361 {
362 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
363 
364 	if (!x86_pmu.lbr_nr)
365 		return;
366 
367 	if (has_branch_stack(event))
368 		cpuc->lbr_select = 0;
369 
370 	cpuc->lbr_users--;
371 	WARN_ON_ONCE(cpuc->lbr_users < 0);
372 	perf_sched_cb_dec(event->pmu);
373 }
374 
amd_pmu_lbr_sched_task(struct perf_event_pmu_context * pmu_ctx,struct task_struct * task,bool sched_in)375 void amd_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx,
376 			    struct task_struct *task, bool sched_in)
377 {
378 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
379 
380 	/*
381 	 * A context switch can flip the address space and LBR entries are
382 	 * not tagged with an identifier. Hence, branches cannot be resolved
383 	 * from the old address space and the LBR records should be wiped.
384 	 */
385 	if (cpuc->lbr_users && sched_in)
386 		amd_pmu_lbr_reset();
387 }
388 
amd_pmu_lbr_enable_all(void)389 void amd_pmu_lbr_enable_all(void)
390 {
391 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
392 	u64 lbr_select, dbg_ctl, dbg_extn_cfg;
393 
394 	if (!cpuc->lbr_users || !x86_pmu.lbr_nr)
395 		return;
396 
397 	/* Set hardware branch filter */
398 	if (cpuc->lbr_select) {
399 		lbr_select = cpuc->lbr_sel->config & LBR_SELECT_MASK;
400 		wrmsrq(MSR_AMD64_LBR_SELECT, lbr_select);
401 	}
402 
403 	if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) {
404 		rdmsrq(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
405 		wrmsrq(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
406 	}
407 
408 	rdmsrq(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
409 	wrmsrq(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg | DBG_EXTN_CFG_LBRV2EN);
410 }
411 
amd_pmu_lbr_disable_all(void)412 void amd_pmu_lbr_disable_all(void)
413 {
414 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
415 
416 	if (!cpuc->lbr_users || !x86_pmu.lbr_nr)
417 		return;
418 
419 	__amd_pmu_lbr_disable();
420 }
421 
amd_pmu_lbr_init(void)422 __init int amd_pmu_lbr_init(void)
423 {
424 	union cpuid_0x80000022_ebx ebx;
425 
426 	if (x86_pmu.version < 2 || !boot_cpu_has(X86_FEATURE_AMD_LBR_V2))
427 		return -EOPNOTSUPP;
428 
429 	/* Set number of entries */
430 	ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES);
431 	x86_pmu.lbr_nr = ebx.split.lbr_v2_stack_sz;
432 
433 	pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr);
434 
435 	return 0;
436 }
437