1 // SPDX-License-Identifier: GPL-2.0-only
2
3 #define pr_fmt(fmt) "callthunks: " fmt
4
5 #include <linux/debugfs.h>
6 #include <linux/kallsyms.h>
7 #include <linux/memory.h>
8 #include <linux/moduleloader.h>
9 #include <linux/static_call.h>
10
11 #include <asm/alternative.h>
12 #include <asm/asm-offsets.h>
13 #include <asm/cpu.h>
14 #include <asm/ftrace.h>
15 #include <asm/insn.h>
16 #include <asm/kexec.h>
17 #include <asm/nospec-branch.h>
18 #include <asm/paravirt.h>
19 #include <asm/sections.h>
20 #include <asm/switch_to.h>
21 #include <asm/sync_core.h>
22 #include <asm/text-patching.h>
23 #include <asm/xen/hypercall.h>
24
25 static int __initdata_or_module debug_callthunks;
26
27 #define MAX_PATCH_LEN (255-1)
28
29 #define prdbg(fmt, args...) \
30 do { \
31 if (debug_callthunks) \
32 printk(KERN_DEBUG pr_fmt(fmt), ##args); \
33 } while(0)
34
debug_thunks(char * str)35 static int __init debug_thunks(char *str)
36 {
37 debug_callthunks = 1;
38 return 1;
39 }
40 __setup("debug-callthunks", debug_thunks);
41
42 #ifdef CONFIG_CALL_THUNKS_DEBUG
43 DEFINE_PER_CPU(u64, __x86_call_count);
44 DEFINE_PER_CPU(u64, __x86_ret_count);
45 DEFINE_PER_CPU(u64, __x86_stuffs_count);
46 DEFINE_PER_CPU(u64, __x86_ctxsw_count);
47 EXPORT_PER_CPU_SYMBOL_GPL(__x86_ctxsw_count);
48 EXPORT_PER_CPU_SYMBOL_GPL(__x86_call_count);
49 #endif
50
51 extern s32 __call_sites[], __call_sites_end[];
52
53 struct core_text {
54 unsigned long base;
55 unsigned long end;
56 const char *name;
57 };
58
59 static bool thunks_initialized __ro_after_init;
60
61 static const struct core_text builtin_coretext = {
62 .base = (unsigned long)_text,
63 .end = (unsigned long)_etext,
64 .name = "builtin",
65 };
66
67 asm (
68 ".pushsection .rodata \n"
69 ".global skl_call_thunk_template \n"
70 "skl_call_thunk_template: \n"
71 __stringify(INCREMENT_CALL_DEPTH)" \n"
72 ".global skl_call_thunk_tail \n"
73 "skl_call_thunk_tail: \n"
74 ".popsection \n"
75 );
76
77 extern u8 skl_call_thunk_template[];
78 extern u8 skl_call_thunk_tail[];
79
80 #define SKL_TMPL_SIZE \
81 ((unsigned int)(skl_call_thunk_tail - skl_call_thunk_template))
82
83 extern void error_entry(void);
84 extern void xen_error_entry(void);
85 extern void paranoid_entry(void);
86
within_coretext(const struct core_text * ct,void * addr)87 static inline bool within_coretext(const struct core_text *ct, void *addr)
88 {
89 unsigned long p = (unsigned long)addr;
90
91 return ct->base <= p && p < ct->end;
92 }
93
within_module_coretext(void * addr)94 static inline bool within_module_coretext(void *addr)
95 {
96 bool ret = false;
97
98 #ifdef CONFIG_MODULES
99 struct module *mod;
100
101 guard(rcu)();
102 mod = __module_address((unsigned long)addr);
103 if (mod && within_module_core((unsigned long)addr, mod))
104 ret = true;
105 #endif
106 return ret;
107 }
108
is_coretext(const struct core_text * ct,void * addr)109 static bool is_coretext(const struct core_text *ct, void *addr)
110 {
111 if (ct && within_coretext(ct, addr))
112 return true;
113 if (within_coretext(&builtin_coretext, addr))
114 return true;
115 return within_module_coretext(addr);
116 }
117
skip_addr(void * dest)118 static bool skip_addr(void *dest)
119 {
120 if (dest == error_entry)
121 return true;
122 if (dest == paranoid_entry)
123 return true;
124 if (dest == xen_error_entry)
125 return true;
126 /* Does FILL_RSB... */
127 if (dest == __switch_to_asm)
128 return true;
129 /* Accounts directly */
130 if (dest == ret_from_fork)
131 return true;
132 #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_AMD_MEM_ENCRYPT)
133 if (dest == soft_restart_cpu)
134 return true;
135 #endif
136 #ifdef CONFIG_FUNCTION_TRACER
137 if (dest == __fentry__)
138 return true;
139 #endif
140 #ifdef CONFIG_KEXEC_CORE
141 # ifdef CONFIG_X86_64
142 if (dest >= (void *)__relocate_kernel_start &&
143 dest < (void *)__relocate_kernel_end)
144 return true;
145 # else
146 if (dest >= (void *)relocate_kernel &&
147 dest < (void*)relocate_kernel + KEXEC_CONTROL_CODE_MAX_SIZE)
148 return true;
149 # endif
150 #endif
151 return false;
152 }
153
call_get_dest(void * addr)154 static __init_or_module void *call_get_dest(void *addr)
155 {
156 struct insn insn;
157 void *dest;
158 int ret;
159
160 ret = insn_decode_kernel(&insn, addr);
161 if (ret)
162 return ERR_PTR(ret);
163
164 /* Patched out call? */
165 if (insn.opcode.bytes[0] != CALL_INSN_OPCODE)
166 return NULL;
167
168 dest = addr + insn.length + insn.immediate.value;
169 if (skip_addr(dest))
170 return NULL;
171 return dest;
172 }
173
174 static const u8 nops[] = {
175 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
176 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
177 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
178 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
179 };
180
patch_dest(void * dest,bool direct)181 static void *patch_dest(void *dest, bool direct)
182 {
183 unsigned int tsize = SKL_TMPL_SIZE;
184 u8 insn_buff[MAX_PATCH_LEN];
185 u8 *pad = dest - tsize;
186
187 memcpy(insn_buff, skl_call_thunk_template, tsize);
188 apply_relocation(insn_buff, pad, tsize, skl_call_thunk_template, tsize);
189
190 /* Already patched? */
191 if (!bcmp(pad, insn_buff, tsize))
192 return pad;
193
194 /* Ensure there are nops */
195 if (bcmp(pad, nops, tsize)) {
196 pr_warn_once("Invalid padding area for %pS\n", dest);
197 return NULL;
198 }
199
200 if (direct)
201 memcpy(pad, insn_buff, tsize);
202 else
203 text_poke_copy_locked(pad, insn_buff, tsize, true);
204 return pad;
205 }
206
patch_call(void * addr,const struct core_text * ct)207 static __init_or_module void patch_call(void *addr, const struct core_text *ct)
208 {
209 void *pad, *dest;
210 u8 bytes[8];
211
212 if (!within_coretext(ct, addr))
213 return;
214
215 dest = call_get_dest(addr);
216 if (!dest || WARN_ON_ONCE(IS_ERR(dest)))
217 return;
218
219 if (!is_coretext(ct, dest))
220 return;
221
222 pad = patch_dest(dest, within_coretext(ct, dest));
223 if (!pad)
224 return;
225
226 prdbg("Patch call at: %pS %px to %pS %px -> %px \n", addr, addr,
227 dest, dest, pad);
228 __text_gen_insn(bytes, CALL_INSN_OPCODE, addr, pad, CALL_INSN_SIZE);
229 text_poke_early(addr, bytes, CALL_INSN_SIZE);
230 }
231
232 static __init_or_module void
patch_call_sites(s32 * start,s32 * end,const struct core_text * ct)233 patch_call_sites(s32 *start, s32 *end, const struct core_text *ct)
234 {
235 s32 *s;
236
237 for (s = start; s < end; s++)
238 patch_call((void *)s + *s, ct);
239 }
240
241 static __init_or_module void
callthunks_setup(struct callthunk_sites * cs,const struct core_text * ct)242 callthunks_setup(struct callthunk_sites *cs, const struct core_text *ct)
243 {
244 prdbg("Patching call sites %s\n", ct->name);
245 patch_call_sites(cs->call_start, cs->call_end, ct);
246 prdbg("Patching call sites done%s\n", ct->name);
247 }
248
callthunks_patch_builtin_calls(void)249 void __init callthunks_patch_builtin_calls(void)
250 {
251 struct callthunk_sites cs = {
252 .call_start = __call_sites,
253 .call_end = __call_sites_end,
254 };
255
256 if (!cpu_feature_enabled(X86_FEATURE_CALL_DEPTH))
257 return;
258
259 pr_info("Setting up call depth tracking\n");
260 mutex_lock(&text_mutex);
261 callthunks_setup(&cs, &builtin_coretext);
262 thunks_initialized = true;
263 mutex_unlock(&text_mutex);
264 }
265
callthunks_translate_call_dest(void * dest)266 void *callthunks_translate_call_dest(void *dest)
267 {
268 void *target;
269
270 lockdep_assert_held(&text_mutex);
271
272 if (!thunks_initialized || skip_addr(dest))
273 return dest;
274
275 if (!is_coretext(NULL, dest))
276 return dest;
277
278 target = patch_dest(dest, false);
279 return target ? : dest;
280 }
281
282 #ifdef CONFIG_BPF_JIT
is_callthunk(void * addr)283 static bool is_callthunk(void *addr)
284 {
285 unsigned int tmpl_size = SKL_TMPL_SIZE;
286 u8 insn_buff[MAX_PATCH_LEN];
287 unsigned long dest;
288 u8 *pad;
289
290 dest = roundup((unsigned long)addr, CONFIG_FUNCTION_ALIGNMENT);
291 if (!thunks_initialized || skip_addr((void *)dest))
292 return false;
293
294 pad = (void *)(dest - tmpl_size);
295
296 memcpy(insn_buff, skl_call_thunk_template, tmpl_size);
297 apply_relocation(insn_buff, pad, tmpl_size, skl_call_thunk_template, tmpl_size);
298
299 return !bcmp(pad, insn_buff, tmpl_size);
300 }
301
x86_call_depth_emit_accounting(u8 ** pprog,void * func,void * ip)302 int x86_call_depth_emit_accounting(u8 **pprog, void *func, void *ip)
303 {
304 unsigned int tmpl_size = SKL_TMPL_SIZE;
305 u8 insn_buff[MAX_PATCH_LEN];
306
307 if (!thunks_initialized)
308 return 0;
309
310 /* Is function call target a thunk? */
311 if (func && is_callthunk(func))
312 return 0;
313
314 memcpy(insn_buff, skl_call_thunk_template, tmpl_size);
315 apply_relocation(insn_buff, ip, tmpl_size, skl_call_thunk_template, tmpl_size);
316
317 memcpy(*pprog, insn_buff, tmpl_size);
318 *pprog += tmpl_size;
319 return tmpl_size;
320 }
321 #endif
322
323 #ifdef CONFIG_MODULES
callthunks_patch_module_calls(struct callthunk_sites * cs,struct module * mod)324 void noinline callthunks_patch_module_calls(struct callthunk_sites *cs,
325 struct module *mod)
326 {
327 struct core_text ct = {
328 .base = (unsigned long)mod->mem[MOD_TEXT].base,
329 .end = (unsigned long)mod->mem[MOD_TEXT].base + mod->mem[MOD_TEXT].size,
330 .name = mod->name,
331 };
332
333 if (!thunks_initialized)
334 return;
335
336 mutex_lock(&text_mutex);
337 callthunks_setup(cs, &ct);
338 mutex_unlock(&text_mutex);
339 }
340 #endif /* CONFIG_MODULES */
341
342 #if defined(CONFIG_CALL_THUNKS_DEBUG) && defined(CONFIG_DEBUG_FS)
callthunks_debug_show(struct seq_file * m,void * p)343 static int callthunks_debug_show(struct seq_file *m, void *p)
344 {
345 unsigned long cpu = (unsigned long)m->private;
346
347 seq_printf(m, "C: %16llu R: %16llu S: %16llu X: %16llu\n,",
348 per_cpu(__x86_call_count, cpu),
349 per_cpu(__x86_ret_count, cpu),
350 per_cpu(__x86_stuffs_count, cpu),
351 per_cpu(__x86_ctxsw_count, cpu));
352 return 0;
353 }
354
callthunks_debug_open(struct inode * inode,struct file * file)355 static int callthunks_debug_open(struct inode *inode, struct file *file)
356 {
357 return single_open(file, callthunks_debug_show, inode->i_private);
358 }
359
360 static const struct file_operations dfs_ops = {
361 .open = callthunks_debug_open,
362 .read = seq_read,
363 .llseek = seq_lseek,
364 .release = single_release,
365 };
366
callthunks_debugfs_init(void)367 static int __init callthunks_debugfs_init(void)
368 {
369 struct dentry *dir;
370 unsigned long cpu;
371
372 dir = debugfs_create_dir("callthunks", NULL);
373 for_each_possible_cpu(cpu) {
374 void *arg = (void *)cpu;
375 char name [10];
376
377 sprintf(name, "cpu%lu", cpu);
378 debugfs_create_file(name, 0644, dir, arg, &dfs_ops);
379 }
380 return 0;
381 }
382 __initcall(callthunks_debugfs_init);
383 #endif
384