1 // SPDX-License-Identifier: GPL-2.0
2
3 #define pr_fmt(fmt) "x86/split lock detection: " fmt
4
5 #include <linux/semaphore.h>
6 #include <linux/workqueue.h>
7 #include <linux/delay.h>
8 #include <linux/cpuhotplug.h>
9 #include <linux/kvm_types.h>
10 #include <asm/cpu_device_id.h>
11 #include <asm/cmdline.h>
12 #include <asm/traps.h>
13 #include <asm/cpu.h>
14 #include <asm/msr.h>
15
16 enum split_lock_detect_state {
17 sld_off = 0,
18 sld_warn,
19 sld_fatal,
20 sld_ratelimit,
21 };
22
23 /*
24 * Default to sld_off because most systems do not support split lock detection.
25 * sld_state_setup() will switch this to sld_warn on systems that support
26 * split lock/bus lock detect, unless there is a command line override.
27 */
28 static enum split_lock_detect_state sld_state __ro_after_init = sld_off;
29 static u64 msr_test_ctrl_cache __ro_after_init;
30
31 /*
32 * With a name like MSR_TEST_CTL it should go without saying, but don't touch
33 * MSR_TEST_CTL unless the CPU is one of the whitelisted models. Writing it
34 * on CPUs that do not support SLD can cause fireworks, even when writing '0'.
35 */
36 static bool cpu_model_supports_sld __ro_after_init;
37
38 static const struct {
39 const char *option;
40 enum split_lock_detect_state state;
41 } sld_options[] __initconst = {
42 { "off", sld_off },
43 { "warn", sld_warn },
44 { "fatal", sld_fatal },
45 { "ratelimit:", sld_ratelimit },
46 };
47
48 static struct ratelimit_state bld_ratelimit;
49
50 static unsigned int sysctl_sld_mitigate = 1;
51 static DEFINE_SEMAPHORE(buslock_sem, 1);
52
53 #ifdef CONFIG_PROC_SYSCTL
54 static const struct ctl_table sld_sysctls[] = {
55 {
56 .procname = "split_lock_mitigate",
57 .data = &sysctl_sld_mitigate,
58 .maxlen = sizeof(unsigned int),
59 .mode = 0644,
60 .proc_handler = proc_douintvec_minmax,
61 .extra1 = SYSCTL_ZERO,
62 .extra2 = SYSCTL_ONE,
63 },
64 };
65
sld_mitigate_sysctl_init(void)66 static int __init sld_mitigate_sysctl_init(void)
67 {
68 register_sysctl_init("kernel", sld_sysctls);
69 return 0;
70 }
71
72 late_initcall(sld_mitigate_sysctl_init);
73 #endif
74
match_option(const char * arg,int arglen,const char * opt)75 static inline bool match_option(const char *arg, int arglen, const char *opt)
76 {
77 int len = strlen(opt), ratelimit;
78
79 if (strncmp(arg, opt, len))
80 return false;
81
82 /*
83 * Min ratelimit is 1 bus lock/sec.
84 * Max ratelimit is 1000 bus locks/sec.
85 */
86 if (sscanf(arg, "ratelimit:%d", &ratelimit) == 1 &&
87 ratelimit > 0 && ratelimit <= 1000) {
88 ratelimit_state_init(&bld_ratelimit, HZ, ratelimit);
89 ratelimit_set_flags(&bld_ratelimit, RATELIMIT_MSG_ON_RELEASE);
90 return true;
91 }
92
93 return len == arglen;
94 }
95
split_lock_verify_msr(bool on)96 static bool split_lock_verify_msr(bool on)
97 {
98 u64 ctrl, tmp;
99
100 if (rdmsrq_safe(MSR_TEST_CTRL, &ctrl))
101 return false;
102 if (on)
103 ctrl |= MSR_TEST_CTRL_SPLIT_LOCK_DETECT;
104 else
105 ctrl &= ~MSR_TEST_CTRL_SPLIT_LOCK_DETECT;
106 if (wrmsrq_safe(MSR_TEST_CTRL, ctrl))
107 return false;
108 rdmsrq(MSR_TEST_CTRL, tmp);
109 return ctrl == tmp;
110 }
111
sld_state_setup(void)112 static void __init sld_state_setup(void)
113 {
114 enum split_lock_detect_state state = sld_warn;
115 char arg[20];
116 int i, ret;
117
118 if (!boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) &&
119 !boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT))
120 return;
121
122 ret = cmdline_find_option(boot_command_line, "split_lock_detect",
123 arg, sizeof(arg));
124 if (ret >= 0) {
125 for (i = 0; i < ARRAY_SIZE(sld_options); i++) {
126 if (match_option(arg, ret, sld_options[i].option)) {
127 state = sld_options[i].state;
128 break;
129 }
130 }
131 }
132 sld_state = state;
133 }
134
__split_lock_setup(void)135 static void __init __split_lock_setup(void)
136 {
137 if (!split_lock_verify_msr(false)) {
138 pr_info("MSR access failed: Disabled\n");
139 return;
140 }
141
142 rdmsrq(MSR_TEST_CTRL, msr_test_ctrl_cache);
143
144 if (!split_lock_verify_msr(true)) {
145 pr_info("MSR access failed: Disabled\n");
146 return;
147 }
148
149 /* Restore the MSR to its cached value. */
150 wrmsrq(MSR_TEST_CTRL, msr_test_ctrl_cache);
151
152 setup_force_cpu_cap(X86_FEATURE_SPLIT_LOCK_DETECT);
153 }
154
155 /*
156 * MSR_TEST_CTRL is per core, but we treat it like a per CPU MSR. Locking
157 * is not implemented as one thread could undo the setting of the other
158 * thread immediately after dropping the lock anyway.
159 */
sld_update_msr(bool on)160 static void sld_update_msr(bool on)
161 {
162 u64 test_ctrl_val = msr_test_ctrl_cache;
163
164 if (on)
165 test_ctrl_val |= MSR_TEST_CTRL_SPLIT_LOCK_DETECT;
166
167 wrmsrq(MSR_TEST_CTRL, test_ctrl_val);
168 }
169
split_lock_init(void)170 void split_lock_init(void)
171 {
172 /*
173 * #DB for bus lock handles ratelimit and #AC for split lock is
174 * disabled.
175 */
176 if (sld_state == sld_ratelimit) {
177 split_lock_verify_msr(false);
178 return;
179 }
180
181 if (cpu_model_supports_sld)
182 split_lock_verify_msr(sld_state != sld_off);
183 }
184
__split_lock_reenable_unlock(struct work_struct * work)185 static void __split_lock_reenable_unlock(struct work_struct *work)
186 {
187 sld_update_msr(true);
188 up(&buslock_sem);
189 }
190
191 static DECLARE_DELAYED_WORK(sl_reenable_unlock, __split_lock_reenable_unlock);
192
__split_lock_reenable(struct work_struct * work)193 static void __split_lock_reenable(struct work_struct *work)
194 {
195 sld_update_msr(true);
196 }
197 /*
198 * In order for each CPU to schedule its delayed work independently of the
199 * others, delayed work struct must be per-CPU. This is not required when
200 * sysctl_sld_mitigate is enabled because of the semaphore that limits
201 * the number of simultaneously scheduled delayed works to 1.
202 */
203 static DEFINE_PER_CPU(struct delayed_work, sl_reenable);
204
205 /*
206 * Per-CPU delayed_work can't be statically initialized properly because
207 * the struct address is unknown. Thus per-CPU delayed_work structures
208 * have to be initialized during kernel initialization and after calling
209 * setup_per_cpu_areas().
210 */
setup_split_lock_delayed_work(void)211 static int __init setup_split_lock_delayed_work(void)
212 {
213 unsigned int cpu;
214
215 for_each_possible_cpu(cpu) {
216 struct delayed_work *work = per_cpu_ptr(&sl_reenable, cpu);
217
218 INIT_DELAYED_WORK(work, __split_lock_reenable);
219 }
220
221 return 0;
222 }
223 pure_initcall(setup_split_lock_delayed_work);
224
225 /*
226 * If a CPU goes offline with pending delayed work to re-enable split lock
227 * detection then the delayed work will be executed on some other CPU. That
228 * handles releasing the buslock_sem, but because it executes on a
229 * different CPU probably won't re-enable split lock detection. This is a
230 * problem on HT systems since the sibling CPU on the same core may then be
231 * left running with split lock detection disabled.
232 *
233 * Unconditionally re-enable detection here.
234 */
splitlock_cpu_offline(unsigned int cpu)235 static int splitlock_cpu_offline(unsigned int cpu)
236 {
237 sld_update_msr(true);
238
239 return 0;
240 }
241
split_lock_warn(unsigned long ip)242 static void split_lock_warn(unsigned long ip)
243 {
244 struct delayed_work *work;
245 int cpu;
246 unsigned int saved_sld_mitigate = READ_ONCE(sysctl_sld_mitigate);
247
248 if (!current->reported_split_lock)
249 pr_warn_ratelimited("#AC: %s/%d took a split_lock trap at address: 0x%lx\n",
250 current->comm, current->pid, ip);
251 current->reported_split_lock = 1;
252
253 if (saved_sld_mitigate) {
254 /*
255 * misery factor #1:
256 * sleep 10ms before trying to execute split lock.
257 */
258 if (msleep_interruptible(10) > 0)
259 return;
260 /*
261 * Misery factor #2:
262 * only allow one buslocked disabled core at a time.
263 */
264 if (down_interruptible(&buslock_sem) == -EINTR)
265 return;
266 }
267
268 cpu = get_cpu();
269 work = saved_sld_mitigate ? &sl_reenable_unlock : per_cpu_ptr(&sl_reenable, cpu);
270 schedule_delayed_work_on(cpu, work, 2);
271
272 /* Disable split lock detection on this CPU to make progress */
273 sld_update_msr(false);
274 put_cpu();
275 }
276
handle_guest_split_lock(unsigned long ip)277 bool handle_guest_split_lock(unsigned long ip)
278 {
279 if (sld_state == sld_warn) {
280 split_lock_warn(ip);
281 return true;
282 }
283
284 pr_warn_once("#AC: %s/%d %s split_lock trap at address: 0x%lx\n",
285 current->comm, current->pid,
286 sld_state == sld_fatal ? "fatal" : "bogus", ip);
287
288 current->thread.error_code = 0;
289 current->thread.trap_nr = X86_TRAP_AC;
290 force_sig_fault(SIGBUS, BUS_ADRALN, NULL);
291 return false;
292 }
293 EXPORT_SYMBOL_FOR_KVM(handle_guest_split_lock);
294
bus_lock_init(void)295 void bus_lock_init(void)
296 {
297 u64 val;
298
299 if (!boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT))
300 return;
301
302 rdmsrq(MSR_IA32_DEBUGCTLMSR, val);
303
304 if ((boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) &&
305 (sld_state == sld_warn || sld_state == sld_fatal)) ||
306 sld_state == sld_off) {
307 /*
308 * Warn and fatal are handled by #AC for split lock if #AC for
309 * split lock is supported.
310 */
311 val &= ~DEBUGCTLMSR_BUS_LOCK_DETECT;
312 } else {
313 val |= DEBUGCTLMSR_BUS_LOCK_DETECT;
314 }
315
316 wrmsrq(MSR_IA32_DEBUGCTLMSR, val);
317 }
318
handle_user_split_lock(struct pt_regs * regs,long error_code)319 bool handle_user_split_lock(struct pt_regs *regs, long error_code)
320 {
321 if ((regs->flags & X86_EFLAGS_AC) || sld_state == sld_fatal)
322 return false;
323 split_lock_warn(regs->ip);
324 return true;
325 }
326
handle_bus_lock(struct pt_regs * regs)327 void handle_bus_lock(struct pt_regs *regs)
328 {
329 switch (sld_state) {
330 case sld_off:
331 break;
332 case sld_ratelimit:
333 /* Enforce no more than bld_ratelimit bus locks/sec. */
334 while (!__ratelimit(&bld_ratelimit))
335 msleep(20);
336 /* Warn on the bus lock. */
337 fallthrough;
338 case sld_warn:
339 pr_warn_ratelimited("#DB: %s/%d took a bus_lock trap at address: 0x%lx\n",
340 current->comm, current->pid, regs->ip);
341 break;
342 case sld_fatal:
343 force_sig_fault(SIGBUS, BUS_ADRALN, NULL);
344 break;
345 }
346 }
347
348 /*
349 * CPU models that are known to have the per-core split-lock detection
350 * feature even though they do not enumerate IA32_CORE_CAPABILITIES.
351 */
352 static const struct x86_cpu_id split_lock_cpu_ids[] __initconst = {
353 X86_MATCH_VFM(INTEL_ICELAKE_X, 0),
354 X86_MATCH_VFM(INTEL_ICELAKE_L, 0),
355 X86_MATCH_VFM(INTEL_ICELAKE_D, 0),
356 {}
357 };
358
split_lock_setup(struct cpuinfo_x86 * c)359 static void __init split_lock_setup(struct cpuinfo_x86 *c)
360 {
361 const struct x86_cpu_id *m;
362 u64 ia32_core_caps;
363
364 if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
365 return;
366
367 /* Check for CPUs that have support but do not enumerate it: */
368 m = x86_match_cpu(split_lock_cpu_ids);
369 if (m)
370 goto supported;
371
372 if (!cpu_has(c, X86_FEATURE_CORE_CAPABILITIES))
373 return;
374
375 /*
376 * Not all bits in MSR_IA32_CORE_CAPS are architectural, but
377 * MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT is. All CPUs that set
378 * it have split lock detection.
379 */
380 rdmsrq(MSR_IA32_CORE_CAPS, ia32_core_caps);
381 if (ia32_core_caps & MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT)
382 goto supported;
383
384 /* CPU is not in the model list and does not have the MSR bit: */
385 return;
386
387 supported:
388 cpu_model_supports_sld = true;
389 __split_lock_setup();
390 }
391
sld_state_show(void)392 static void sld_state_show(void)
393 {
394 if (!boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT) &&
395 !boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT))
396 return;
397
398 switch (sld_state) {
399 case sld_off:
400 pr_info("disabled\n");
401 break;
402 case sld_warn:
403 if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) {
404 pr_info("#AC: crashing the kernel on kernel split_locks and warning on user-space split_locks\n");
405 if (cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
406 "x86/splitlock", NULL, splitlock_cpu_offline) < 0)
407 pr_warn("No splitlock CPU offline handler\n");
408 } else if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) {
409 pr_info("#DB: warning on user-space bus_locks\n");
410 }
411 break;
412 case sld_fatal:
413 if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT))
414 pr_info("#AC: crashing the kernel on kernel split_locks and sending SIGBUS on user-space split_locks\n");
415 else if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT))
416 pr_info("#DB: sending SIGBUS on user-space bus_locks\n");
417 break;
418 case sld_ratelimit:
419 if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT))
420 pr_info("#DB: setting system wide bus lock rate limit to %u/sec\n", bld_ratelimit.burst);
421 break;
422 }
423 }
424
sld_setup(struct cpuinfo_x86 * c)425 void __init sld_setup(struct cpuinfo_x86 *c)
426 {
427 split_lock_setup(c);
428 sld_state_setup();
429 sld_state_show();
430 }
431