1 // SPDX-License-Identifier: GPL-2.0
2
3 #define pr_fmt(fmt) "x86/split lock detection: " fmt
4
5 #include <linux/semaphore.h>
6 #include <linux/workqueue.h>
7 #include <linux/delay.h>
8 #include <linux/cpuhotplug.h>
9 #include <linux/kvm_types.h>
10 #include <asm/cpu_device_id.h>
11 #include <asm/cmdline.h>
12 #include <asm/traps.h>
13 #include <asm/cpu.h>
14 #include <asm/msr.h>
15
16 enum split_lock_detect_state {
17 sld_off = 0,
18 sld_warn,
19 sld_fatal,
20 sld_ratelimit,
21 };
22
23 /*
24 * Default to sld_off because most systems do not support split lock detection.
25 * sld_state_setup() will switch this to sld_warn on systems that support
26 * split lock/bus lock detect, unless there is a command line override.
27 */
28 static enum split_lock_detect_state sld_state __ro_after_init = sld_off;
29 static u64 msr_test_ctrl_cache __ro_after_init;
30
31 /*
32 * With a name like MSR_TEST_CTL it should go without saying, but don't touch
33 * MSR_TEST_CTL unless the CPU is one of the whitelisted models. Writing it
34 * on CPUs that do not support SLD can cause fireworks, even when writing '0'.
35 */
36 static bool cpu_model_supports_sld __ro_after_init;
37
38 static const struct {
39 const char *option;
40 enum split_lock_detect_state state;
41 } sld_options[] __initconst = {
42 { "off", sld_off },
43 { "warn", sld_warn },
44 { "fatal", sld_fatal },
45 { "ratelimit:", sld_ratelimit },
46 };
47
48 static struct ratelimit_state bld_ratelimit;
49
50 static unsigned int sysctl_sld_mitigate = 1;
51 static DEFINE_SEMAPHORE(buslock_sem, 1);
52
53 #ifdef CONFIG_PROC_SYSCTL
54 static const struct ctl_table sld_sysctls[] = {
55 {
56 .procname = "split_lock_mitigate",
57 .data = &sysctl_sld_mitigate,
58 .maxlen = sizeof(unsigned int),
59 .mode = 0644,
60 .proc_handler = proc_douintvec_minmax,
61 .extra1 = SYSCTL_ZERO,
62 .extra2 = SYSCTL_ONE,
63 },
64 };
65
sld_mitigate_sysctl_init(void)66 static int __init sld_mitigate_sysctl_init(void)
67 {
68 register_sysctl_init("kernel", sld_sysctls);
69 return 0;
70 }
71
72 late_initcall(sld_mitigate_sysctl_init);
73 #endif
74
match_option(const char * arg,int arglen,const char * opt)75 static inline bool match_option(const char *arg, int arglen, const char *opt)
76 {
77 int len = strlen(opt), ratelimit;
78
79 if (strncmp(arg, opt, len))
80 return false;
81
82 /*
83 * Min ratelimit is 1 bus lock/sec.
84 * Max ratelimit is 1000 bus locks/sec.
85 */
86 if (sscanf(arg, "ratelimit:%d", &ratelimit) == 1 &&
87 ratelimit > 0 && ratelimit <= 1000) {
88 ratelimit_state_init(&bld_ratelimit, HZ, ratelimit);
89 ratelimit_set_flags(&bld_ratelimit, RATELIMIT_MSG_ON_RELEASE);
90 return true;
91 }
92
93 return len == arglen;
94 }
95
split_lock_verify_msr(bool on)96 static bool split_lock_verify_msr(bool on)
97 {
98 u64 ctrl, tmp;
99
100 if (rdmsrq_safe(MSR_TEST_CTRL, &ctrl))
101 return false;
102 if (on)
103 ctrl |= MSR_TEST_CTRL_SPLIT_LOCK_DETECT;
104 else
105 ctrl &= ~MSR_TEST_CTRL_SPLIT_LOCK_DETECT;
106 if (wrmsrq_safe(MSR_TEST_CTRL, ctrl))
107 return false;
108 rdmsrq(MSR_TEST_CTRL, tmp);
109 return ctrl == tmp;
110 }
111
sld_state_setup(void)112 static void __init sld_state_setup(void)
113 {
114 enum split_lock_detect_state state = sld_warn;
115 char arg[20];
116 int i, ret;
117
118 if (!boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) &&
119 !boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT))
120 return;
121
122 ret = cmdline_find_option(boot_command_line, "split_lock_detect",
123 arg, sizeof(arg));
124 if (ret >= 0) {
125 for (i = 0; i < ARRAY_SIZE(sld_options); i++) {
126 if (match_option(arg, ret, sld_options[i].option)) {
127 state = sld_options[i].state;
128 break;
129 }
130 }
131 }
132 sld_state = state;
133 }
134
setup_split_lock_detect(char * arg)135 static __init int setup_split_lock_detect(char *arg)
136 {
137 return 1;
138 }
139 __setup("split_lock_detect=", setup_split_lock_detect);
140
__split_lock_setup(void)141 static void __init __split_lock_setup(void)
142 {
143 if (!split_lock_verify_msr(false)) {
144 pr_info("MSR access failed: Disabled\n");
145 return;
146 }
147
148 rdmsrq(MSR_TEST_CTRL, msr_test_ctrl_cache);
149
150 if (!split_lock_verify_msr(true)) {
151 pr_info("MSR access failed: Disabled\n");
152 return;
153 }
154
155 /* Restore the MSR to its cached value. */
156 wrmsrq(MSR_TEST_CTRL, msr_test_ctrl_cache);
157
158 setup_force_cpu_cap(X86_FEATURE_SPLIT_LOCK_DETECT);
159 }
160
161 /*
162 * MSR_TEST_CTRL is per core, but we treat it like a per CPU MSR. Locking
163 * is not implemented as one thread could undo the setting of the other
164 * thread immediately after dropping the lock anyway.
165 */
sld_update_msr(bool on)166 static void sld_update_msr(bool on)
167 {
168 u64 test_ctrl_val = msr_test_ctrl_cache;
169
170 if (on)
171 test_ctrl_val |= MSR_TEST_CTRL_SPLIT_LOCK_DETECT;
172
173 wrmsrq(MSR_TEST_CTRL, test_ctrl_val);
174 }
175
split_lock_init(void)176 void split_lock_init(void)
177 {
178 /*
179 * #DB for bus lock handles ratelimit and #AC for split lock is
180 * disabled.
181 */
182 if (sld_state == sld_ratelimit) {
183 split_lock_verify_msr(false);
184 return;
185 }
186
187 if (cpu_model_supports_sld)
188 split_lock_verify_msr(sld_state != sld_off);
189 }
190
__split_lock_reenable_unlock(struct work_struct * work)191 static void __split_lock_reenable_unlock(struct work_struct *work)
192 {
193 sld_update_msr(true);
194 up(&buslock_sem);
195 }
196
197 static DECLARE_DELAYED_WORK(sl_reenable_unlock, __split_lock_reenable_unlock);
198
__split_lock_reenable(struct work_struct * work)199 static void __split_lock_reenable(struct work_struct *work)
200 {
201 sld_update_msr(true);
202 }
203 /*
204 * In order for each CPU to schedule its delayed work independently of the
205 * others, delayed work struct must be per-CPU. This is not required when
206 * sysctl_sld_mitigate is enabled because of the semaphore that limits
207 * the number of simultaneously scheduled delayed works to 1.
208 */
209 static DEFINE_PER_CPU(struct delayed_work, sl_reenable);
210
211 /*
212 * Per-CPU delayed_work can't be statically initialized properly because
213 * the struct address is unknown. Thus per-CPU delayed_work structures
214 * have to be initialized during kernel initialization and after calling
215 * setup_per_cpu_areas().
216 */
setup_split_lock_delayed_work(void)217 static int __init setup_split_lock_delayed_work(void)
218 {
219 unsigned int cpu;
220
221 for_each_possible_cpu(cpu) {
222 struct delayed_work *work = per_cpu_ptr(&sl_reenable, cpu);
223
224 INIT_DELAYED_WORK(work, __split_lock_reenable);
225 }
226
227 return 0;
228 }
229 pure_initcall(setup_split_lock_delayed_work);
230
231 /*
232 * If a CPU goes offline with pending delayed work to re-enable split lock
233 * detection then the delayed work will be executed on some other CPU. That
234 * handles releasing the buslock_sem, but because it executes on a
235 * different CPU probably won't re-enable split lock detection. This is a
236 * problem on HT systems since the sibling CPU on the same core may then be
237 * left running with split lock detection disabled.
238 *
239 * Unconditionally re-enable detection here.
240 */
splitlock_cpu_offline(unsigned int cpu)241 static int splitlock_cpu_offline(unsigned int cpu)
242 {
243 sld_update_msr(true);
244
245 return 0;
246 }
247
split_lock_warn(unsigned long ip)248 static void split_lock_warn(unsigned long ip)
249 {
250 struct delayed_work *work;
251 int cpu;
252 unsigned int saved_sld_mitigate = READ_ONCE(sysctl_sld_mitigate);
253
254 if (!current->reported_split_lock)
255 pr_warn_ratelimited("#AC: %s/%d took a split_lock trap at address: 0x%lx\n",
256 current->comm, current->pid, ip);
257 current->reported_split_lock = 1;
258
259 if (saved_sld_mitigate) {
260 /*
261 * misery factor #1:
262 * sleep 10ms before trying to execute split lock.
263 */
264 if (msleep_interruptible(10) > 0)
265 return;
266 /*
267 * Misery factor #2:
268 * only allow one buslocked disabled core at a time.
269 */
270 if (down_interruptible(&buslock_sem) == -EINTR)
271 return;
272 }
273
274 cpu = get_cpu();
275 work = saved_sld_mitigate ? &sl_reenable_unlock : per_cpu_ptr(&sl_reenable, cpu);
276 schedule_delayed_work_on(cpu, work, 2);
277
278 /* Disable split lock detection on this CPU to make progress */
279 sld_update_msr(false);
280 put_cpu();
281 }
282
handle_guest_split_lock(unsigned long ip)283 bool handle_guest_split_lock(unsigned long ip)
284 {
285 if (sld_state == sld_warn) {
286 split_lock_warn(ip);
287 return true;
288 }
289
290 pr_warn_once("#AC: %s/%d %s split_lock trap at address: 0x%lx\n",
291 current->comm, current->pid,
292 sld_state == sld_fatal ? "fatal" : "bogus", ip);
293
294 current->thread.error_code = 0;
295 current->thread.trap_nr = X86_TRAP_AC;
296 force_sig_fault(SIGBUS, BUS_ADRALN, NULL);
297 return false;
298 }
299 EXPORT_SYMBOL_FOR_KVM(handle_guest_split_lock);
300
bus_lock_init(void)301 void bus_lock_init(void)
302 {
303 u64 val;
304
305 if (!boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT))
306 return;
307
308 rdmsrq(MSR_IA32_DEBUGCTLMSR, val);
309
310 if ((boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) &&
311 (sld_state == sld_warn || sld_state == sld_fatal)) ||
312 sld_state == sld_off) {
313 /*
314 * Warn and fatal are handled by #AC for split lock if #AC for
315 * split lock is supported.
316 */
317 val &= ~DEBUGCTLMSR_BUS_LOCK_DETECT;
318 } else {
319 val |= DEBUGCTLMSR_BUS_LOCK_DETECT;
320 }
321
322 wrmsrq(MSR_IA32_DEBUGCTLMSR, val);
323 }
324
handle_user_split_lock(struct pt_regs * regs,long error_code)325 bool handle_user_split_lock(struct pt_regs *regs, long error_code)
326 {
327 if ((regs->flags & X86_EFLAGS_AC) || sld_state == sld_fatal)
328 return false;
329 split_lock_warn(regs->ip);
330 return true;
331 }
332
handle_bus_lock(struct pt_regs * regs)333 void handle_bus_lock(struct pt_regs *regs)
334 {
335 switch (sld_state) {
336 case sld_off:
337 break;
338 case sld_ratelimit:
339 /* Enforce no more than bld_ratelimit bus locks/sec. */
340 while (!__ratelimit(&bld_ratelimit))
341 msleep(20);
342 /* Warn on the bus lock. */
343 fallthrough;
344 case sld_warn:
345 pr_warn_ratelimited("#DB: %s/%d took a bus_lock trap at address: 0x%lx\n",
346 current->comm, current->pid, regs->ip);
347 break;
348 case sld_fatal:
349 force_sig_fault(SIGBUS, BUS_ADRALN, NULL);
350 break;
351 }
352 }
353
354 /*
355 * CPU models that are known to have the per-core split-lock detection
356 * feature even though they do not enumerate IA32_CORE_CAPABILITIES.
357 */
358 static const struct x86_cpu_id split_lock_cpu_ids[] __initconst = {
359 X86_MATCH_VFM(INTEL_ICELAKE_X, 0),
360 X86_MATCH_VFM(INTEL_ICELAKE_L, 0),
361 X86_MATCH_VFM(INTEL_ICELAKE_D, 0),
362 {}
363 };
364
split_lock_setup(struct cpuinfo_x86 * c)365 static void __init split_lock_setup(struct cpuinfo_x86 *c)
366 {
367 const struct x86_cpu_id *m;
368 u64 ia32_core_caps;
369
370 if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
371 return;
372
373 /* Check for CPUs that have support but do not enumerate it: */
374 m = x86_match_cpu(split_lock_cpu_ids);
375 if (m)
376 goto supported;
377
378 if (!cpu_has(c, X86_FEATURE_CORE_CAPABILITIES))
379 return;
380
381 /*
382 * Not all bits in MSR_IA32_CORE_CAPS are architectural, but
383 * MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT is. All CPUs that set
384 * it have split lock detection.
385 */
386 rdmsrq(MSR_IA32_CORE_CAPS, ia32_core_caps);
387 if (ia32_core_caps & MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT)
388 goto supported;
389
390 /* CPU is not in the model list and does not have the MSR bit: */
391 return;
392
393 supported:
394 cpu_model_supports_sld = true;
395 __split_lock_setup();
396 }
397
sld_state_show(void)398 static void sld_state_show(void)
399 {
400 const char *action = "warning";
401
402 if ((!boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT) &&
403 !boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) ||
404 (sld_state == sld_off))
405 return;
406
407 if (sld_state == sld_ratelimit) {
408 if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT))
409 pr_info("#DB: setting system wide bus lock rate limit to %u/sec\n", bld_ratelimit.burst);
410 return;
411 } else if (sld_state == sld_fatal) {
412 action = "sending SIGBUS";
413 }
414
415 if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) {
416 pr_info("#AC: crashing the kernel on kernel split_locks and %s on user-space split_locks\n", action);
417
418 /*
419 * This is handling the case where a CPU goes offline at the
420 * moment where split lock detection is disabled in the warn
421 * setting, see split_lock_warn(). It doesn't have any effect
422 * in the fatal case.
423 */
424 if (cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/splitlock", NULL, splitlock_cpu_offline) < 0)
425 pr_warn("No splitlock CPU offline handler\n");
426
427 } else if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) {
428 pr_info("#DB: %s on user-space bus_locks\n", action);
429 }
430 }
431
sld_setup(struct cpuinfo_x86 * c)432 void __init sld_setup(struct cpuinfo_x86 *c)
433 {
434 split_lock_setup(c);
435 sld_state_setup();
436 sld_state_show();
437 }
438