1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Machine check injection support.
4  * Copyright 2008 Intel Corporation.
5  *
6  * Authors:
7  * Andi Kleen
8  * Ying Huang
9  *
10  * The AMD part (from mce_amd_inj.c): a simple MCE injection facility
11  * for testing different aspects of the RAS code. This driver should be
12  * built as module so that it can be loaded on production kernels for
13  * testing purposes.
14  *
15  * Copyright (c) 2010-17:  Borislav Petkov <bp@alien8.de>
16  *			   Advanced Micro Devices Inc.
17  */
18 
19 #include <linux/cpu.h>
20 #include <linux/debugfs.h>
21 #include <linux/kernel.h>
22 #include <linux/module.h>
23 #include <linux/notifier.h>
24 #include <linux/pci.h>
25 #include <linux/uaccess.h>
26 
27 #include <asm/amd_nb.h>
28 #include <asm/apic.h>
29 #include <asm/irq_vectors.h>
30 #include <asm/mce.h>
31 #include <asm/nmi.h>
32 #include <asm/smp.h>
33 
34 #include "internal.h"
35 
36 static bool hw_injection_possible = true;
37 
38 /*
39  * Collect all the MCi_XXX settings
40  */
41 static struct mce i_mce;
42 static struct dentry *dfs_inj;
43 
44 #define MAX_FLAG_OPT_SIZE	4
45 #define NBCFG			0x44
46 
47 enum injection_type {
48 	SW_INJ = 0,	/* SW injection, simply decode the error */
49 	HW_INJ,		/* Trigger a #MC */
50 	DFR_INT_INJ,    /* Trigger Deferred error interrupt */
51 	THR_INT_INJ,    /* Trigger threshold interrupt */
52 	N_INJ_TYPES,
53 };
54 
55 static const char * const flags_options[] = {
56 	[SW_INJ] = "sw",
57 	[HW_INJ] = "hw",
58 	[DFR_INT_INJ] = "df",
59 	[THR_INT_INJ] = "th",
60 	NULL
61 };
62 
63 /* Set default injection to SW_INJ */
64 static enum injection_type inj_type = SW_INJ;
65 
66 #define MCE_INJECT_SET(reg)						\
67 static int inj_##reg##_set(void *data, u64 val)				\
68 {									\
69 	struct mce *m = (struct mce *)data;				\
70 									\
71 	m->reg = val;							\
72 	return 0;							\
73 }
74 
75 MCE_INJECT_SET(status);
76 MCE_INJECT_SET(misc);
77 MCE_INJECT_SET(addr);
78 MCE_INJECT_SET(synd);
79 
80 #define MCE_INJECT_GET(reg)						\
81 static int inj_##reg##_get(void *data, u64 *val)			\
82 {									\
83 	struct mce *m = (struct mce *)data;				\
84 									\
85 	*val = m->reg;							\
86 	return 0;							\
87 }
88 
89 MCE_INJECT_GET(status);
90 MCE_INJECT_GET(misc);
91 MCE_INJECT_GET(addr);
92 MCE_INJECT_GET(synd);
93 MCE_INJECT_GET(ipid);
94 
95 DEFINE_SIMPLE_ATTRIBUTE(status_fops, inj_status_get, inj_status_set, "%llx\n");
96 DEFINE_SIMPLE_ATTRIBUTE(misc_fops, inj_misc_get, inj_misc_set, "%llx\n");
97 DEFINE_SIMPLE_ATTRIBUTE(addr_fops, inj_addr_get, inj_addr_set, "%llx\n");
98 DEFINE_SIMPLE_ATTRIBUTE(synd_fops, inj_synd_get, inj_synd_set, "%llx\n");
99 
100 /* Use the user provided IPID value on a sw injection. */
inj_ipid_set(void * data,u64 val)101 static int inj_ipid_set(void *data, u64 val)
102 {
103 	struct mce *m = (struct mce *)data;
104 
105 	if (cpu_feature_enabled(X86_FEATURE_SMCA)) {
106 		if (inj_type == SW_INJ)
107 			m->ipid = val;
108 	}
109 
110 	return 0;
111 }
112 
113 DEFINE_SIMPLE_ATTRIBUTE(ipid_fops, inj_ipid_get, inj_ipid_set, "%llx\n");
114 
setup_inj_struct(struct mce * m)115 static void setup_inj_struct(struct mce *m)
116 {
117 	memset(m, 0, sizeof(struct mce));
118 
119 	m->cpuvendor = boot_cpu_data.x86_vendor;
120 	m->time	     = ktime_get_real_seconds();
121 	m->cpuid     = cpuid_eax(1);
122 	m->microcode = boot_cpu_data.microcode;
123 }
124 
125 /* Update fake mce registers on current CPU. */
inject_mce(struct mce * m)126 static void inject_mce(struct mce *m)
127 {
128 	struct mce *i = &per_cpu(injectm, m->extcpu);
129 
130 	/* Make sure no one reads partially written injectm */
131 	i->finished = 0;
132 	mb();
133 	m->finished = 0;
134 	/* First set the fields after finished */
135 	i->extcpu = m->extcpu;
136 	mb();
137 	/* Now write record in order, finished last (except above) */
138 	memcpy(i, m, sizeof(struct mce));
139 	/* Finally activate it */
140 	mb();
141 	i->finished = 1;
142 }
143 
raise_poll(struct mce * m)144 static void raise_poll(struct mce *m)
145 {
146 	unsigned long flags;
147 	mce_banks_t b;
148 
149 	memset(&b, 0xff, sizeof(mce_banks_t));
150 	local_irq_save(flags);
151 	machine_check_poll(0, &b);
152 	local_irq_restore(flags);
153 	m->finished = 0;
154 }
155 
raise_exception(struct mce * m,struct pt_regs * pregs)156 static void raise_exception(struct mce *m, struct pt_regs *pregs)
157 {
158 	struct pt_regs regs;
159 	unsigned long flags;
160 
161 	if (!pregs) {
162 		memset(&regs, 0, sizeof(struct pt_regs));
163 		regs.ip = m->ip;
164 		regs.cs = m->cs;
165 		pregs = &regs;
166 	}
167 	/* do_machine_check() expects interrupts disabled -- at least */
168 	local_irq_save(flags);
169 	do_machine_check(pregs);
170 	local_irq_restore(flags);
171 	m->finished = 0;
172 }
173 
174 static cpumask_var_t mce_inject_cpumask;
175 static DEFINE_MUTEX(mce_inject_mutex);
176 
mce_raise_notify(unsigned int cmd,struct pt_regs * regs)177 static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs)
178 {
179 	int cpu = smp_processor_id();
180 	struct mce *m = this_cpu_ptr(&injectm);
181 	if (!cpumask_test_cpu(cpu, mce_inject_cpumask))
182 		return NMI_DONE;
183 	cpumask_clear_cpu(cpu, mce_inject_cpumask);
184 	if (m->inject_flags & MCJ_EXCEPTION)
185 		raise_exception(m, regs);
186 	else if (m->status)
187 		raise_poll(m);
188 	return NMI_HANDLED;
189 }
190 
mce_irq_ipi(void * info)191 static void mce_irq_ipi(void *info)
192 {
193 	int cpu = smp_processor_id();
194 	struct mce *m = this_cpu_ptr(&injectm);
195 
196 	if (cpumask_test_cpu(cpu, mce_inject_cpumask) &&
197 			m->inject_flags & MCJ_EXCEPTION) {
198 		cpumask_clear_cpu(cpu, mce_inject_cpumask);
199 		raise_exception(m, NULL);
200 	}
201 }
202 
203 /* Inject mce on current CPU */
raise_local(void)204 static int raise_local(void)
205 {
206 	struct mce *m = this_cpu_ptr(&injectm);
207 	int context = MCJ_CTX(m->inject_flags);
208 	int ret = 0;
209 	int cpu = m->extcpu;
210 
211 	if (m->inject_flags & MCJ_EXCEPTION) {
212 		pr_info("Triggering MCE exception on CPU %d\n", cpu);
213 		switch (context) {
214 		case MCJ_CTX_IRQ:
215 			/*
216 			 * Could do more to fake interrupts like
217 			 * calling irq_enter, but the necessary
218 			 * machinery isn't exported currently.
219 			 */
220 			fallthrough;
221 		case MCJ_CTX_PROCESS:
222 			raise_exception(m, NULL);
223 			break;
224 		default:
225 			pr_info("Invalid MCE context\n");
226 			ret = -EINVAL;
227 		}
228 		pr_info("MCE exception done on CPU %d\n", cpu);
229 	} else if (m->status) {
230 		pr_info("Starting machine check poll CPU %d\n", cpu);
231 		raise_poll(m);
232 		pr_info("Machine check poll done on CPU %d\n", cpu);
233 	} else
234 		m->finished = 0;
235 
236 	return ret;
237 }
238 
raise_mce(struct mce * m)239 static void __maybe_unused raise_mce(struct mce *m)
240 {
241 	int context = MCJ_CTX(m->inject_flags);
242 
243 	inject_mce(m);
244 
245 	if (context == MCJ_CTX_RANDOM)
246 		return;
247 
248 	if (m->inject_flags & (MCJ_IRQ_BROADCAST | MCJ_NMI_BROADCAST)) {
249 		unsigned long start;
250 		int cpu;
251 
252 		cpus_read_lock();
253 		cpumask_copy(mce_inject_cpumask, cpu_online_mask);
254 		cpumask_clear_cpu(get_cpu(), mce_inject_cpumask);
255 		for_each_online_cpu(cpu) {
256 			struct mce *mcpu = &per_cpu(injectm, cpu);
257 			if (!mcpu->finished ||
258 			    MCJ_CTX(mcpu->inject_flags) != MCJ_CTX_RANDOM)
259 				cpumask_clear_cpu(cpu, mce_inject_cpumask);
260 		}
261 		if (!cpumask_empty(mce_inject_cpumask)) {
262 			if (m->inject_flags & MCJ_IRQ_BROADCAST) {
263 				/*
264 				 * don't wait because mce_irq_ipi is necessary
265 				 * to be sync with following raise_local
266 				 */
267 				preempt_disable();
268 				smp_call_function_many(mce_inject_cpumask,
269 					mce_irq_ipi, NULL, 0);
270 				preempt_enable();
271 			} else if (m->inject_flags & MCJ_NMI_BROADCAST)
272 				__apic_send_IPI_mask(mce_inject_cpumask, NMI_VECTOR);
273 		}
274 		start = jiffies;
275 		while (!cpumask_empty(mce_inject_cpumask)) {
276 			if (!time_before(jiffies, start + 2*HZ)) {
277 				pr_err("Timeout waiting for mce inject %lx\n",
278 					*cpumask_bits(mce_inject_cpumask));
279 				break;
280 			}
281 			cpu_relax();
282 		}
283 		raise_local();
284 		put_cpu();
285 		cpus_read_unlock();
286 	} else {
287 		preempt_disable();
288 		raise_local();
289 		preempt_enable();
290 	}
291 }
292 
mce_inject_raise(struct notifier_block * nb,unsigned long val,void * data)293 static int mce_inject_raise(struct notifier_block *nb, unsigned long val,
294 			    void *data)
295 {
296 	struct mce *m = (struct mce *)data;
297 
298 	if (!m)
299 		return NOTIFY_DONE;
300 
301 	mutex_lock(&mce_inject_mutex);
302 	raise_mce(m);
303 	mutex_unlock(&mce_inject_mutex);
304 
305 	return NOTIFY_DONE;
306 }
307 
308 static struct notifier_block inject_nb = {
309 	.notifier_call  = mce_inject_raise,
310 };
311 
312 /*
313  * Caller needs to be make sure this cpu doesn't disappear
314  * from under us, i.e.: get_cpu/put_cpu.
315  */
toggle_hw_mce_inject(unsigned int cpu,bool enable)316 static int toggle_hw_mce_inject(unsigned int cpu, bool enable)
317 {
318 	u32 l, h;
319 	int err;
320 
321 	err = rdmsr_on_cpu(cpu, MSR_K7_HWCR, &l, &h);
322 	if (err) {
323 		pr_err("%s: error reading HWCR\n", __func__);
324 		return err;
325 	}
326 
327 	enable ? (l |= BIT(18)) : (l &= ~BIT(18));
328 
329 	err = wrmsr_on_cpu(cpu, MSR_K7_HWCR, l, h);
330 	if (err)
331 		pr_err("%s: error writing HWCR\n", __func__);
332 
333 	return err;
334 }
335 
__set_inj(const char * buf)336 static int __set_inj(const char *buf)
337 {
338 	int i;
339 
340 	for (i = 0; i < N_INJ_TYPES; i++) {
341 		if (!strncmp(flags_options[i], buf, strlen(flags_options[i]))) {
342 			if (i > SW_INJ && !hw_injection_possible)
343 				continue;
344 			inj_type = i;
345 			return 0;
346 		}
347 	}
348 	return -EINVAL;
349 }
350 
flags_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)351 static ssize_t flags_read(struct file *filp, char __user *ubuf,
352 			  size_t cnt, loff_t *ppos)
353 {
354 	char buf[MAX_FLAG_OPT_SIZE];
355 	int n;
356 
357 	n = sprintf(buf, "%s\n", flags_options[inj_type]);
358 
359 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, n);
360 }
361 
flags_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)362 static ssize_t flags_write(struct file *filp, const char __user *ubuf,
363 			   size_t cnt, loff_t *ppos)
364 {
365 	char buf[MAX_FLAG_OPT_SIZE], *__buf;
366 	int err;
367 
368 	if (!cnt || cnt > MAX_FLAG_OPT_SIZE)
369 		return -EINVAL;
370 
371 	if (copy_from_user(&buf, ubuf, cnt))
372 		return -EFAULT;
373 
374 	buf[cnt - 1] = 0;
375 
376 	/* strip whitespace */
377 	__buf = strstrip(buf);
378 
379 	err = __set_inj(__buf);
380 	if (err) {
381 		pr_err("%s: Invalid flags value: %s\n", __func__, __buf);
382 		return err;
383 	}
384 
385 	*ppos += cnt;
386 
387 	return cnt;
388 }
389 
390 static const struct file_operations flags_fops = {
391 	.read           = flags_read,
392 	.write          = flags_write,
393 	.llseek         = generic_file_llseek,
394 };
395 
396 /*
397  * On which CPU to inject?
398  */
399 MCE_INJECT_GET(extcpu);
400 
inj_extcpu_set(void * data,u64 val)401 static int inj_extcpu_set(void *data, u64 val)
402 {
403 	struct mce *m = (struct mce *)data;
404 
405 	if (val >= nr_cpu_ids || !cpu_online(val)) {
406 		pr_err("%s: Invalid CPU: %llu\n", __func__, val);
407 		return -EINVAL;
408 	}
409 	m->extcpu = val;
410 	return 0;
411 }
412 
413 DEFINE_SIMPLE_ATTRIBUTE(extcpu_fops, inj_extcpu_get, inj_extcpu_set, "%llu\n");
414 
trigger_mce(void * info)415 static void trigger_mce(void *info)
416 {
417 	asm volatile("int $18");
418 }
419 
trigger_dfr_int(void * info)420 static void trigger_dfr_int(void *info)
421 {
422 	asm volatile("int %0" :: "i" (DEFERRED_ERROR_VECTOR));
423 }
424 
trigger_thr_int(void * info)425 static void trigger_thr_int(void *info)
426 {
427 	asm volatile("int %0" :: "i" (THRESHOLD_APIC_VECTOR));
428 }
429 
get_nbc_for_node(int node_id)430 static u32 get_nbc_for_node(int node_id)
431 {
432 	u32 cores_per_node;
433 
434 	cores_per_node = topology_num_threads_per_package() / topology_amd_nodes_per_pkg();
435 	return cores_per_node * node_id;
436 }
437 
toggle_nb_mca_mst_cpu(u16 nid)438 static void toggle_nb_mca_mst_cpu(u16 nid)
439 {
440 	struct amd_northbridge *nb;
441 	struct pci_dev *F3;
442 	u32 val;
443 	int err;
444 
445 	nb = node_to_amd_nb(nid);
446 	if (!nb)
447 		return;
448 
449 	F3 = nb->misc;
450 	if (!F3)
451 		return;
452 
453 	err = pci_read_config_dword(F3, NBCFG, &val);
454 	if (err) {
455 		pr_err("%s: Error reading F%dx%03x.\n",
456 		       __func__, PCI_FUNC(F3->devfn), NBCFG);
457 		return;
458 	}
459 
460 	if (val & BIT(27))
461 		return;
462 
463 	pr_err("%s: Set D18F3x44[NbMcaToMstCpuEn] which BIOS hasn't done.\n",
464 	       __func__);
465 
466 	val |= BIT(27);
467 	err = pci_write_config_dword(F3, NBCFG, val);
468 	if (err)
469 		pr_err("%s: Error writing F%dx%03x.\n",
470 		       __func__, PCI_FUNC(F3->devfn), NBCFG);
471 }
472 
prepare_msrs(void * info)473 static void prepare_msrs(void *info)
474 {
475 	struct mce m = *(struct mce *)info;
476 	u8 b = m.bank;
477 
478 	wrmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);
479 
480 	if (boot_cpu_has(X86_FEATURE_SMCA)) {
481 		if (m.inject_flags == DFR_INT_INJ) {
482 			wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(b), m.status);
483 			wrmsrl(MSR_AMD64_SMCA_MCx_DEADDR(b), m.addr);
484 		} else {
485 			wrmsrl(MSR_AMD64_SMCA_MCx_STATUS(b), m.status);
486 			wrmsrl(MSR_AMD64_SMCA_MCx_ADDR(b), m.addr);
487 		}
488 
489 		wrmsrl(MSR_AMD64_SMCA_MCx_SYND(b), m.synd);
490 
491 		if (m.misc)
492 			wrmsrl(MSR_AMD64_SMCA_MCx_MISC(b), m.misc);
493 	} else {
494 		wrmsrl(MSR_IA32_MCx_STATUS(b), m.status);
495 		wrmsrl(MSR_IA32_MCx_ADDR(b), m.addr);
496 
497 		if (m.misc)
498 			wrmsrl(MSR_IA32_MCx_MISC(b), m.misc);
499 	}
500 }
501 
do_inject(void)502 static void do_inject(void)
503 {
504 	unsigned int cpu = i_mce.extcpu;
505 	struct mce_hw_err err;
506 	u64 mcg_status = 0;
507 	u8 b = i_mce.bank;
508 
509 	i_mce.tsc = rdtsc_ordered();
510 
511 	i_mce.status |= MCI_STATUS_VAL;
512 
513 	if (i_mce.misc)
514 		i_mce.status |= MCI_STATUS_MISCV;
515 
516 	if (i_mce.synd)
517 		i_mce.status |= MCI_STATUS_SYNDV;
518 
519 	if (inj_type == SW_INJ) {
520 		err.m = i_mce;
521 		mce_log(&err);
522 		return;
523 	}
524 
525 	/* prep MCE global settings for the injection */
526 	mcg_status = MCG_STATUS_MCIP | MCG_STATUS_EIPV;
527 
528 	if (!(i_mce.status & MCI_STATUS_PCC))
529 		mcg_status |= MCG_STATUS_RIPV;
530 
531 	/*
532 	 * Ensure necessary status bits for deferred errors:
533 	 * - MCx_STATUS[Deferred]: make sure it is a deferred error
534 	 * - MCx_STATUS[UC] cleared: deferred errors are _not_ UC
535 	 */
536 	if (inj_type == DFR_INT_INJ) {
537 		i_mce.status |= MCI_STATUS_DEFERRED;
538 		i_mce.status &= ~MCI_STATUS_UC;
539 	}
540 
541 	/*
542 	 * For multi node CPUs, logging and reporting of bank 4 errors happens
543 	 * only on the node base core. Refer to D18F3x44[NbMcaToMstCpuEn] for
544 	 * Fam10h and later BKDGs.
545 	 */
546 	if (boot_cpu_has(X86_FEATURE_AMD_DCM) &&
547 	    b == 4 &&
548 	    boot_cpu_data.x86 < 0x17) {
549 		toggle_nb_mca_mst_cpu(topology_amd_node_id(cpu));
550 		cpu = get_nbc_for_node(topology_amd_node_id(cpu));
551 	}
552 
553 	cpus_read_lock();
554 	if (!cpu_online(cpu))
555 		goto err;
556 
557 	toggle_hw_mce_inject(cpu, true);
558 
559 	i_mce.mcgstatus = mcg_status;
560 	i_mce.inject_flags = inj_type;
561 	smp_call_function_single(cpu, prepare_msrs, &i_mce, 0);
562 
563 	toggle_hw_mce_inject(cpu, false);
564 
565 	switch (inj_type) {
566 	case DFR_INT_INJ:
567 		smp_call_function_single(cpu, trigger_dfr_int, NULL, 0);
568 		break;
569 	case THR_INT_INJ:
570 		smp_call_function_single(cpu, trigger_thr_int, NULL, 0);
571 		break;
572 	default:
573 		smp_call_function_single(cpu, trigger_mce, NULL, 0);
574 	}
575 
576 err:
577 	cpus_read_unlock();
578 
579 }
580 
581 /*
582  * This denotes into which bank we're injecting and triggers
583  * the injection, at the same time.
584  */
inj_bank_set(void * data,u64 val)585 static int inj_bank_set(void *data, u64 val)
586 {
587 	struct mce *m = (struct mce *)data;
588 	u8 n_banks;
589 	u64 cap;
590 
591 	/* Get bank count on target CPU so we can handle non-uniform values. */
592 	rdmsrl_on_cpu(m->extcpu, MSR_IA32_MCG_CAP, &cap);
593 	n_banks = cap & MCG_BANKCNT_MASK;
594 
595 	if (val >= n_banks) {
596 		pr_err("MCA bank %llu non-existent on CPU%d\n", val, m->extcpu);
597 		return -EINVAL;
598 	}
599 
600 	m->bank = val;
601 
602 	/*
603 	 * sw-only injection allows to write arbitrary values into the MCA
604 	 * registers because it tests only the decoding paths.
605 	 */
606 	if (inj_type == SW_INJ)
607 		goto inject;
608 
609 	/*
610 	 * Read IPID value to determine if a bank is populated on the target
611 	 * CPU.
612 	 */
613 	if (cpu_feature_enabled(X86_FEATURE_SMCA)) {
614 		u64 ipid;
615 
616 		if (rdmsrl_on_cpu(m->extcpu, MSR_AMD64_SMCA_MCx_IPID(val), &ipid)) {
617 			pr_err("Error reading IPID on CPU%d\n", m->extcpu);
618 			return -EINVAL;
619 		}
620 
621 		if (!ipid) {
622 			pr_err("Cannot inject into unpopulated bank %llu\n", val);
623 			return -ENODEV;
624 		}
625 	}
626 
627 inject:
628 	do_inject();
629 
630 	/* Reset injection struct */
631 	setup_inj_struct(&i_mce);
632 
633 	return 0;
634 }
635 
636 MCE_INJECT_GET(bank);
637 
638 DEFINE_SIMPLE_ATTRIBUTE(bank_fops, inj_bank_get, inj_bank_set, "%llu\n");
639 
640 static const char readme_msg[] =
641 "Description of the files and their usages:\n"
642 "\n"
643 "Note1: i refers to the bank number below.\n"
644 "Note2: See respective BKDGs for the exact bit definitions of the files below\n"
645 "as they mirror the hardware registers.\n"
646 "\n"
647 "status:\t Set MCi_STATUS: the bits in that MSR control the error type and\n"
648 "\t attributes of the error which caused the MCE.\n"
649 "\n"
650 "misc:\t Set MCi_MISC: provide auxiliary info about the error. It is mostly\n"
651 "\t used for error thresholding purposes and its validity is indicated by\n"
652 "\t MCi_STATUS[MiscV].\n"
653 "\n"
654 "synd:\t Set MCi_SYND: provide syndrome info about the error. Only valid on\n"
655 "\t Scalable MCA systems, and its validity is indicated by MCi_STATUS[SyndV].\n"
656 "\n"
657 "addr:\t Error address value to be written to MCi_ADDR. Log address information\n"
658 "\t associated with the error.\n"
659 "\n"
660 "cpu:\t The CPU to inject the error on.\n"
661 "\n"
662 "bank:\t Specify the bank you want to inject the error into: the number of\n"
663 "\t banks in a processor varies and is family/model-specific, therefore, the\n"
664 "\t supplied value is sanity-checked. Setting the bank value also triggers the\n"
665 "\t injection.\n"
666 "\n"
667 "flags:\t Injection type to be performed. Writing to this file will trigger a\n"
668 "\t real machine check, an APIC interrupt or invoke the error decoder routines\n"
669 "\t for AMD processors.\n"
670 "\n"
671 "\t Allowed error injection types:\n"
672 "\t  - \"sw\": Software error injection. Decode error to a human-readable \n"
673 "\t    format only. Safe to use.\n"
674 "\t  - \"hw\": Hardware error injection. Causes the #MC exception handler to \n"
675 "\t    handle the error. Be warned: might cause system panic if MCi_STATUS[PCC] \n"
676 "\t    is set. Therefore, consider setting (debugfs_mountpoint)/mce/fake_panic \n"
677 "\t    before injecting.\n"
678 "\t  - \"df\": Trigger APIC interrupt for Deferred error. Causes deferred \n"
679 "\t    error APIC interrupt handler to handle the error if the feature is \n"
680 "\t    is present in hardware. \n"
681 "\t  - \"th\": Trigger APIC interrupt for Threshold errors. Causes threshold \n"
682 "\t    APIC interrupt handler to handle the error. \n"
683 "\n"
684 "ipid:\t IPID (AMD-specific)\n"
685 "\n";
686 
687 static ssize_t
inj_readme_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)688 inj_readme_read(struct file *filp, char __user *ubuf,
689 		       size_t cnt, loff_t *ppos)
690 {
691 	return simple_read_from_buffer(ubuf, cnt, ppos,
692 					readme_msg, strlen(readme_msg));
693 }
694 
695 static const struct file_operations readme_fops = {
696 	.read		= inj_readme_read,
697 };
698 
699 static struct dfs_node {
700 	char *name;
701 	const struct file_operations *fops;
702 	umode_t perm;
703 } dfs_fls[] = {
704 	{ .name = "status",	.fops = &status_fops, .perm = S_IRUSR | S_IWUSR },
705 	{ .name = "misc",	.fops = &misc_fops,   .perm = S_IRUSR | S_IWUSR },
706 	{ .name = "addr",	.fops = &addr_fops,   .perm = S_IRUSR | S_IWUSR },
707 	{ .name = "synd",	.fops = &synd_fops,   .perm = S_IRUSR | S_IWUSR },
708 	{ .name = "ipid",	.fops = &ipid_fops,   .perm = S_IRUSR | S_IWUSR },
709 	{ .name = "bank",	.fops = &bank_fops,   .perm = S_IRUSR | S_IWUSR },
710 	{ .name = "flags",	.fops = &flags_fops,  .perm = S_IRUSR | S_IWUSR },
711 	{ .name = "cpu",	.fops = &extcpu_fops, .perm = S_IRUSR | S_IWUSR },
712 	{ .name = "README",	.fops = &readme_fops, .perm = S_IRUSR | S_IRGRP | S_IROTH },
713 };
714 
debugfs_init(void)715 static void __init debugfs_init(void)
716 {
717 	unsigned int i;
718 
719 	dfs_inj = debugfs_create_dir("mce-inject", NULL);
720 
721 	for (i = 0; i < ARRAY_SIZE(dfs_fls); i++)
722 		debugfs_create_file(dfs_fls[i].name, dfs_fls[i].perm, dfs_inj,
723 				    &i_mce, dfs_fls[i].fops);
724 }
725 
check_hw_inj_possible(void)726 static void check_hw_inj_possible(void)
727 {
728 	int cpu;
729 	u8 bank;
730 
731 	/*
732 	 * This behavior exists only on SMCA systems though its not directly
733 	 * related to SMCA.
734 	 */
735 	if (!cpu_feature_enabled(X86_FEATURE_SMCA))
736 		return;
737 
738 	cpu = get_cpu();
739 
740 	for (bank = 0; bank < MAX_NR_BANKS; ++bank) {
741 		u64 status = MCI_STATUS_VAL, ipid;
742 
743 		/* Check whether bank is populated */
744 		rdmsrl(MSR_AMD64_SMCA_MCx_IPID(bank), ipid);
745 		if (!ipid)
746 			continue;
747 
748 		toggle_hw_mce_inject(cpu, true);
749 
750 		wrmsrl_safe(mca_msr_reg(bank, MCA_STATUS), status);
751 		rdmsrl_safe(mca_msr_reg(bank, MCA_STATUS), &status);
752 		wrmsrl_safe(mca_msr_reg(bank, MCA_STATUS), 0);
753 
754 		if (!status) {
755 			hw_injection_possible = false;
756 			pr_warn("Platform does not allow *hardware* error injection."
757 				"Try using APEI EINJ instead.\n");
758 		}
759 
760 		toggle_hw_mce_inject(cpu, false);
761 
762 		break;
763 	}
764 
765 	put_cpu();
766 }
767 
inject_init(void)768 static int __init inject_init(void)
769 {
770 	if (!alloc_cpumask_var(&mce_inject_cpumask, GFP_KERNEL))
771 		return -ENOMEM;
772 
773 	check_hw_inj_possible();
774 
775 	debugfs_init();
776 
777 	register_nmi_handler(NMI_LOCAL, mce_raise_notify, 0, "mce_notify");
778 	mce_register_injector_chain(&inject_nb);
779 
780 	setup_inj_struct(&i_mce);
781 
782 	pr_info("Machine check injector initialized\n");
783 
784 	return 0;
785 }
786 
inject_exit(void)787 static void __exit inject_exit(void)
788 {
789 
790 	mce_unregister_injector_chain(&inject_nb);
791 	unregister_nmi_handler(NMI_LOCAL, "mce_notify");
792 
793 	debugfs_remove_recursive(dfs_inj);
794 	dfs_inj = NULL;
795 
796 	memset(&dfs_fls, 0, sizeof(dfs_fls));
797 
798 	free_cpumask_var(mce_inject_cpumask);
799 }
800 
801 module_init(inject_init);
802 module_exit(inject_exit);
803 MODULE_DESCRIPTION("Machine check injection support");
804 MODULE_LICENSE("GPL");
805