1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * APEI Generic Hardware Error Source support
4  *
5  * Generic Hardware Error Source provides a way to report platform
6  * hardware errors (such as that from chipset). It works in so called
7  * "Firmware First" mode, that is, hardware errors are reported to
8  * firmware firstly, then reported to Linux by firmware. This way,
9  * some non-standard hardware error registers or non-standard hardware
10  * link can be checked by firmware to produce more hardware error
11  * information for Linux.
12  *
13  * For more information about Generic Hardware Error Source, please
14  * refer to ACPI Specification version 4.0, section 17.3.2.6
15  *
16  * Copyright 2010,2011 Intel Corp.
17  *   Author: Huang Ying <ying.huang@intel.com>
18  */
19 
20 #include <linux/arm_sdei.h>
21 #include <linux/kernel.h>
22 #include <linux/moduleparam.h>
23 #include <linux/init.h>
24 #include <linux/acpi.h>
25 #include <linux/io.h>
26 #include <linux/interrupt.h>
27 #include <linux/timer.h>
28 #include <linux/cper.h>
29 #include <linux/cleanup.h>
30 #include <linux/platform_device.h>
31 #include <linux/mutex.h>
32 #include <linux/ratelimit.h>
33 #include <linux/vmalloc.h>
34 #include <linux/irq_work.h>
35 #include <linux/llist.h>
36 #include <linux/genalloc.h>
37 #include <linux/kfifo.h>
38 #include <linux/pci.h>
39 #include <linux/pfn.h>
40 #include <linux/aer.h>
41 #include <linux/nmi.h>
42 #include <linux/sched/clock.h>
43 #include <linux/uuid.h>
44 #include <linux/ras.h>
45 #include <linux/task_work.h>
46 
47 #include <acpi/actbl1.h>
48 #include <acpi/ghes.h>
49 #include <acpi/apei.h>
50 #include <asm/fixmap.h>
51 #include <asm/tlbflush.h>
52 #include <cxl/event.h>
53 #include <ras/ras_event.h>
54 
55 #include "apei-internal.h"
56 
57 #define GHES_PFX	"GHES: "
58 
59 #define GHES_ESTATUS_MAX_SIZE		65536
60 #define GHES_ESOURCE_PREALLOC_MAX_SIZE	65536
61 
62 #define GHES_ESTATUS_POOL_MIN_ALLOC_ORDER 3
63 
64 /* This is just an estimation for memory pool allocation */
65 #define GHES_ESTATUS_CACHE_AVG_SIZE	512
66 
67 #define GHES_ESTATUS_CACHES_SIZE	4
68 
69 #define GHES_ESTATUS_IN_CACHE_MAX_NSEC	10000000000ULL
70 /* Prevent too many caches are allocated because of RCU */
71 #define GHES_ESTATUS_CACHE_ALLOCED_MAX	(GHES_ESTATUS_CACHES_SIZE * 3 / 2)
72 
73 #define GHES_ESTATUS_CACHE_LEN(estatus_len)			\
74 	(sizeof(struct ghes_estatus_cache) + (estatus_len))
75 #define GHES_ESTATUS_FROM_CACHE(estatus_cache)			\
76 	((struct acpi_hest_generic_status *)				\
77 	 ((struct ghes_estatus_cache *)(estatus_cache) + 1))
78 
79 #define GHES_ESTATUS_NODE_LEN(estatus_len)			\
80 	(sizeof(struct ghes_estatus_node) + (estatus_len))
81 #define GHES_ESTATUS_FROM_NODE(estatus_node)			\
82 	((struct acpi_hest_generic_status *)				\
83 	 ((struct ghes_estatus_node *)(estatus_node) + 1))
84 
85 #define GHES_VENDOR_ENTRY_LEN(gdata_len)                               \
86 	(sizeof(struct ghes_vendor_record_entry) + (gdata_len))
87 #define GHES_GDATA_FROM_VENDOR_ENTRY(vendor_entry)                     \
88 	((struct acpi_hest_generic_data *)                              \
89 	((struct ghes_vendor_record_entry *)(vendor_entry) + 1))
90 
91 /*
92  *  NMI-like notifications vary by architecture, before the compiler can prune
93  *  unused static functions it needs a value for these enums.
94  */
95 #ifndef CONFIG_ARM_SDE_INTERFACE
96 #define FIX_APEI_GHES_SDEI_NORMAL	__end_of_fixed_addresses
97 #define FIX_APEI_GHES_SDEI_CRITICAL	__end_of_fixed_addresses
98 #endif
99 
100 static ATOMIC_NOTIFIER_HEAD(ghes_report_chain);
101 
102 static inline bool is_hest_type_generic_v2(struct ghes *ghes)
103 {
104 	return ghes->generic->header.type == ACPI_HEST_TYPE_GENERIC_ERROR_V2;
105 }
106 
107 /*
108  * A platform may describe one error source for the handling of synchronous
109  * errors (e.g. MCE or SEA), or for handling asynchronous errors (e.g. SCI
110  * or External Interrupt). On x86, the HEST notifications are always
111  * asynchronous, so only SEA on ARM is delivered as a synchronous
112  * notification.
113  */
114 static inline bool is_hest_sync_notify(struct ghes *ghes)
115 {
116 	u8 notify_type = ghes->generic->notify.type;
117 
118 	return notify_type == ACPI_HEST_NOTIFY_SEA;
119 }
120 
121 /*
122  * This driver isn't really modular, however for the time being,
123  * continuing to use module_param is the easiest way to remain
124  * compatible with existing boot arg use cases.
125  */
126 bool ghes_disable;
127 module_param_named(disable, ghes_disable, bool, 0);
128 
129 /*
130  * "ghes.edac_force_enable" forcibly enables ghes_edac and skips the platform
131  * check.
132  */
133 static bool ghes_edac_force_enable;
134 module_param_named(edac_force_enable, ghes_edac_force_enable, bool, 0);
135 
136 /*
137  * All error sources notified with HED (Hardware Error Device) share a
138  * single notifier callback, so they need to be linked and checked one
139  * by one. This holds true for NMI too.
140  *
141  * RCU is used for these lists, so ghes_list_mutex is only used for
142  * list changing, not for traversing.
143  */
144 static LIST_HEAD(ghes_hed);
145 static DEFINE_MUTEX(ghes_list_mutex);
146 
147 /*
148  * A list of GHES devices which are given to the corresponding EDAC driver
149  * ghes_edac for further use.
150  */
151 static LIST_HEAD(ghes_devs);
152 static DEFINE_MUTEX(ghes_devs_mutex);
153 
154 /*
155  * Because the memory area used to transfer hardware error information
156  * from BIOS to Linux can be determined only in NMI, IRQ or timer
157  * handler, but general ioremap can not be used in atomic context, so
158  * the fixmap is used instead.
159  *
160  * This spinlock is used to prevent the fixmap entry from being used
161  * simultaneously.
162  */
163 static DEFINE_SPINLOCK(ghes_notify_lock_irq);
164 
165 struct ghes_vendor_record_entry {
166 	struct work_struct work;
167 	int error_severity;
168 	char vendor_record[];
169 };
170 
171 static struct gen_pool *ghes_estatus_pool;
172 
173 static struct ghes_estatus_cache __rcu *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE];
174 static atomic_t ghes_estatus_cache_alloced;
175 
176 static void __iomem *ghes_map(u64 pfn, enum fixed_addresses fixmap_idx)
177 {
178 	phys_addr_t paddr;
179 	pgprot_t prot;
180 
181 	paddr = PFN_PHYS(pfn);
182 	prot = arch_apei_get_mem_attribute(paddr);
183 	__set_fixmap(fixmap_idx, paddr, prot);
184 
185 	return (void __iomem *) __fix_to_virt(fixmap_idx);
186 }
187 
188 static void ghes_unmap(void __iomem *vaddr, enum fixed_addresses fixmap_idx)
189 {
190 	int _idx = virt_to_fix((unsigned long)vaddr);
191 
192 	WARN_ON_ONCE(fixmap_idx != _idx);
193 	clear_fixmap(fixmap_idx);
194 }
195 
196 int ghes_estatus_pool_init(unsigned int num_ghes)
197 {
198 	unsigned long addr, len;
199 	int rc;
200 
201 	ghes_estatus_pool = gen_pool_create(GHES_ESTATUS_POOL_MIN_ALLOC_ORDER, -1);
202 	if (!ghes_estatus_pool)
203 		return -ENOMEM;
204 
205 	len = GHES_ESTATUS_CACHE_AVG_SIZE * GHES_ESTATUS_CACHE_ALLOCED_MAX;
206 	len += (num_ghes * GHES_ESOURCE_PREALLOC_MAX_SIZE);
207 
208 	addr = (unsigned long)vmalloc(PAGE_ALIGN(len));
209 	if (!addr)
210 		goto err_pool_alloc;
211 
212 	rc = gen_pool_add(ghes_estatus_pool, addr, PAGE_ALIGN(len), -1);
213 	if (rc)
214 		goto err_pool_add;
215 
216 	return 0;
217 
218 err_pool_add:
219 	vfree((void *)addr);
220 
221 err_pool_alloc:
222 	gen_pool_destroy(ghes_estatus_pool);
223 
224 	return -ENOMEM;
225 }
226 
227 /**
228  * ghes_estatus_pool_region_free - free previously allocated memory
229  *				   from the ghes_estatus_pool.
230  * @addr: address of memory to free.
231  * @size: size of memory to free.
232  *
233  * Returns none.
234  */
235 void ghes_estatus_pool_region_free(unsigned long addr, u32 size)
236 {
237 	gen_pool_free(ghes_estatus_pool, addr, size);
238 }
239 EXPORT_SYMBOL_GPL(ghes_estatus_pool_region_free);
240 
241 static int map_gen_v2(struct ghes *ghes)
242 {
243 	return apei_map_generic_address(&ghes->generic_v2->read_ack_register);
244 }
245 
246 static void unmap_gen_v2(struct ghes *ghes)
247 {
248 	apei_unmap_generic_address(&ghes->generic_v2->read_ack_register);
249 }
250 
251 static void ghes_ack_error(struct acpi_hest_generic_v2 *gv2)
252 {
253 	int rc;
254 	u64 val = 0;
255 
256 	rc = apei_read(&val, &gv2->read_ack_register);
257 	if (rc)
258 		return;
259 
260 	val &= gv2->read_ack_preserve << gv2->read_ack_register.bit_offset;
261 	val |= gv2->read_ack_write    << gv2->read_ack_register.bit_offset;
262 
263 	apei_write(val, &gv2->read_ack_register);
264 }
265 
266 static struct ghes *ghes_new(struct acpi_hest_generic *generic)
267 {
268 	struct ghes *ghes;
269 	unsigned int error_block_length;
270 	int rc;
271 
272 	ghes = kzalloc(sizeof(*ghes), GFP_KERNEL);
273 	if (!ghes)
274 		return ERR_PTR(-ENOMEM);
275 
276 	ghes->generic = generic;
277 	if (is_hest_type_generic_v2(ghes)) {
278 		rc = map_gen_v2(ghes);
279 		if (rc)
280 			goto err_free;
281 	}
282 
283 	rc = apei_map_generic_address(&generic->error_status_address);
284 	if (rc)
285 		goto err_unmap_read_ack_addr;
286 	error_block_length = generic->error_block_length;
287 	if (error_block_length > GHES_ESTATUS_MAX_SIZE) {
288 		pr_warn(FW_WARN GHES_PFX
289 			"Error status block length is too long: %u for "
290 			"generic hardware error source: %d.\n",
291 			error_block_length, generic->header.source_id);
292 		error_block_length = GHES_ESTATUS_MAX_SIZE;
293 	}
294 	ghes->estatus = kmalloc(error_block_length, GFP_KERNEL);
295 	if (!ghes->estatus) {
296 		rc = -ENOMEM;
297 		goto err_unmap_status_addr;
298 	}
299 
300 	return ghes;
301 
302 err_unmap_status_addr:
303 	apei_unmap_generic_address(&generic->error_status_address);
304 err_unmap_read_ack_addr:
305 	if (is_hest_type_generic_v2(ghes))
306 		unmap_gen_v2(ghes);
307 err_free:
308 	kfree(ghes);
309 	return ERR_PTR(rc);
310 }
311 
312 static void ghes_fini(struct ghes *ghes)
313 {
314 	kfree(ghes->estatus);
315 	apei_unmap_generic_address(&ghes->generic->error_status_address);
316 	if (is_hest_type_generic_v2(ghes))
317 		unmap_gen_v2(ghes);
318 }
319 
320 static inline int ghes_severity(int severity)
321 {
322 	switch (severity) {
323 	case CPER_SEV_INFORMATIONAL:
324 		return GHES_SEV_NO;
325 	case CPER_SEV_CORRECTED:
326 		return GHES_SEV_CORRECTED;
327 	case CPER_SEV_RECOVERABLE:
328 		return GHES_SEV_RECOVERABLE;
329 	case CPER_SEV_FATAL:
330 		return GHES_SEV_PANIC;
331 	default:
332 		/* Unknown, go panic */
333 		return GHES_SEV_PANIC;
334 	}
335 }
336 
337 static void ghes_copy_tofrom_phys(void *buffer, u64 paddr, u32 len,
338 				  int from_phys,
339 				  enum fixed_addresses fixmap_idx)
340 {
341 	void __iomem *vaddr;
342 	u64 offset;
343 	u32 trunk;
344 
345 	while (len > 0) {
346 		offset = paddr - (paddr & PAGE_MASK);
347 		vaddr = ghes_map(PHYS_PFN(paddr), fixmap_idx);
348 		trunk = PAGE_SIZE - offset;
349 		trunk = min(trunk, len);
350 		if (from_phys)
351 			memcpy_fromio(buffer, vaddr + offset, trunk);
352 		else
353 			memcpy_toio(vaddr + offset, buffer, trunk);
354 		len -= trunk;
355 		paddr += trunk;
356 		buffer += trunk;
357 		ghes_unmap(vaddr, fixmap_idx);
358 	}
359 }
360 
361 /* Check the top-level record header has an appropriate size. */
362 static int __ghes_check_estatus(struct ghes *ghes,
363 				struct acpi_hest_generic_status *estatus)
364 {
365 	u32 len = cper_estatus_len(estatus);
366 
367 	if (len < sizeof(*estatus)) {
368 		pr_warn_ratelimited(FW_WARN GHES_PFX "Truncated error status block!\n");
369 		return -EIO;
370 	}
371 
372 	if (len > ghes->generic->error_block_length) {
373 		pr_warn_ratelimited(FW_WARN GHES_PFX "Invalid error status block length!\n");
374 		return -EIO;
375 	}
376 
377 	if (cper_estatus_check_header(estatus)) {
378 		pr_warn_ratelimited(FW_WARN GHES_PFX "Invalid CPER header!\n");
379 		return -EIO;
380 	}
381 
382 	return 0;
383 }
384 
385 /* Read the CPER block, returning its address, and header in estatus. */
386 static int __ghes_peek_estatus(struct ghes *ghes,
387 			       struct acpi_hest_generic_status *estatus,
388 			       u64 *buf_paddr, enum fixed_addresses fixmap_idx)
389 {
390 	struct acpi_hest_generic *g = ghes->generic;
391 	int rc;
392 
393 	rc = apei_read(buf_paddr, &g->error_status_address);
394 	if (rc) {
395 		*buf_paddr = 0;
396 		pr_warn_ratelimited(FW_WARN GHES_PFX
397 "Failed to read error status block address for hardware error source: %d.\n",
398 				   g->header.source_id);
399 		return -EIO;
400 	}
401 	if (!*buf_paddr)
402 		return -ENOENT;
403 
404 	ghes_copy_tofrom_phys(estatus, *buf_paddr, sizeof(*estatus), 1,
405 			      fixmap_idx);
406 	if (!estatus->block_status) {
407 		*buf_paddr = 0;
408 		return -ENOENT;
409 	}
410 
411 	return 0;
412 }
413 
414 static int __ghes_read_estatus(struct acpi_hest_generic_status *estatus,
415 			       u64 buf_paddr, enum fixed_addresses fixmap_idx,
416 			       size_t buf_len)
417 {
418 	ghes_copy_tofrom_phys(estatus, buf_paddr, buf_len, 1, fixmap_idx);
419 	if (cper_estatus_check(estatus)) {
420 		pr_warn_ratelimited(FW_WARN GHES_PFX
421 				    "Failed to read error status block!\n");
422 		return -EIO;
423 	}
424 
425 	return 0;
426 }
427 
428 static int ghes_read_estatus(struct ghes *ghes,
429 			     struct acpi_hest_generic_status *estatus,
430 			     u64 *buf_paddr, enum fixed_addresses fixmap_idx)
431 {
432 	int rc;
433 
434 	rc = __ghes_peek_estatus(ghes, estatus, buf_paddr, fixmap_idx);
435 	if (rc)
436 		return rc;
437 
438 	rc = __ghes_check_estatus(ghes, estatus);
439 	if (rc)
440 		return rc;
441 
442 	return __ghes_read_estatus(estatus, *buf_paddr, fixmap_idx,
443 				   cper_estatus_len(estatus));
444 }
445 
446 static void ghes_clear_estatus(struct ghes *ghes,
447 			       struct acpi_hest_generic_status *estatus,
448 			       u64 buf_paddr, enum fixed_addresses fixmap_idx)
449 {
450 	estatus->block_status = 0;
451 
452 	if (!buf_paddr)
453 		return;
454 
455 	ghes_copy_tofrom_phys(estatus, buf_paddr,
456 			      sizeof(estatus->block_status), 0,
457 			      fixmap_idx);
458 
459 	/*
460 	 * GHESv2 type HEST entries introduce support for error acknowledgment,
461 	 * so only acknowledge the error if this support is present.
462 	 */
463 	if (is_hest_type_generic_v2(ghes))
464 		ghes_ack_error(ghes->generic_v2);
465 }
466 
467 /*
468  * Called as task_work before returning to user-space.
469  * Ensure any queued work has been done before we return to the context that
470  * triggered the notification.
471  */
472 static void ghes_kick_task_work(struct callback_head *head)
473 {
474 	struct acpi_hest_generic_status *estatus;
475 	struct ghes_estatus_node *estatus_node;
476 	u32 node_len;
477 
478 	estatus_node = container_of(head, struct ghes_estatus_node, task_work);
479 	if (IS_ENABLED(CONFIG_ACPI_APEI_MEMORY_FAILURE))
480 		memory_failure_queue_kick(estatus_node->task_work_cpu);
481 
482 	estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
483 	node_len = GHES_ESTATUS_NODE_LEN(cper_estatus_len(estatus));
484 	gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node, node_len);
485 }
486 
487 static bool ghes_do_memory_failure(u64 physical_addr, int flags)
488 {
489 	unsigned long pfn;
490 
491 	if (!IS_ENABLED(CONFIG_ACPI_APEI_MEMORY_FAILURE))
492 		return false;
493 
494 	pfn = PHYS_PFN(physical_addr);
495 	if (!pfn_valid(pfn) && !arch_is_platform_page(physical_addr)) {
496 		pr_warn_ratelimited(FW_WARN GHES_PFX
497 		"Invalid address in generic error data: %#llx\n",
498 		physical_addr);
499 		return false;
500 	}
501 
502 	memory_failure_queue(pfn, flags);
503 	return true;
504 }
505 
506 static bool ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata,
507 				       int sev, bool sync)
508 {
509 	int flags = -1;
510 	int sec_sev = ghes_severity(gdata->error_severity);
511 	struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);
512 
513 	if (!(mem_err->validation_bits & CPER_MEM_VALID_PA))
514 		return false;
515 
516 	/* iff following two events can be handled properly by now */
517 	if (sec_sev == GHES_SEV_CORRECTED &&
518 	    (gdata->flags & CPER_SEC_ERROR_THRESHOLD_EXCEEDED))
519 		flags = MF_SOFT_OFFLINE;
520 	if (sev == GHES_SEV_RECOVERABLE && sec_sev == GHES_SEV_RECOVERABLE)
521 		flags = sync ? MF_ACTION_REQUIRED : 0;
522 
523 	if (flags != -1)
524 		return ghes_do_memory_failure(mem_err->physical_addr, flags);
525 
526 	return false;
527 }
528 
529 static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata,
530 				       int sev, bool sync)
531 {
532 	struct cper_sec_proc_arm *err = acpi_hest_get_payload(gdata);
533 	int flags = sync ? MF_ACTION_REQUIRED : 0;
534 	bool queued = false;
535 	int sec_sev, i;
536 	char *p;
537 
538 	log_arm_hw_error(err);
539 
540 	sec_sev = ghes_severity(gdata->error_severity);
541 	if (sev != GHES_SEV_RECOVERABLE || sec_sev != GHES_SEV_RECOVERABLE)
542 		return false;
543 
544 	p = (char *)(err + 1);
545 	for (i = 0; i < err->err_info_num; i++) {
546 		struct cper_arm_err_info *err_info = (struct cper_arm_err_info *)p;
547 		bool is_cache = (err_info->type == CPER_ARM_CACHE_ERROR);
548 		bool has_pa = (err_info->validation_bits & CPER_ARM_INFO_VALID_PHYSICAL_ADDR);
549 		const char *error_type = "unknown error";
550 
551 		/*
552 		 * The field (err_info->error_info & BIT(26)) is fixed to set to
553 		 * 1 in some old firmware of HiSilicon Kunpeng920. We assume that
554 		 * firmware won't mix corrected errors in an uncorrected section,
555 		 * and don't filter out 'corrected' error here.
556 		 */
557 		if (is_cache && has_pa) {
558 			queued = ghes_do_memory_failure(err_info->physical_fault_addr, flags);
559 			p += err_info->length;
560 			continue;
561 		}
562 
563 		if (err_info->type < ARRAY_SIZE(cper_proc_error_type_strs))
564 			error_type = cper_proc_error_type_strs[err_info->type];
565 
566 		pr_warn_ratelimited(FW_WARN GHES_PFX
567 				    "Unhandled processor error type: %s\n",
568 				    error_type);
569 		p += err_info->length;
570 	}
571 
572 	return queued;
573 }
574 
575 /*
576  * PCIe AER errors need to be sent to the AER driver for reporting and
577  * recovery. The GHES severities map to the following AER severities and
578  * require the following handling:
579  *
580  * GHES_SEV_CORRECTABLE -> AER_CORRECTABLE
581  *     These need to be reported by the AER driver but no recovery is
582  *     necessary.
583  * GHES_SEV_RECOVERABLE -> AER_NONFATAL
584  * GHES_SEV_RECOVERABLE && CPER_SEC_RESET -> AER_FATAL
585  *     These both need to be reported and recovered from by the AER driver.
586  * GHES_SEV_PANIC does not make it to this handling since the kernel must
587  *     panic.
588  */
589 static void ghes_handle_aer(struct acpi_hest_generic_data *gdata)
590 {
591 #ifdef CONFIG_ACPI_APEI_PCIEAER
592 	struct cper_sec_pcie *pcie_err = acpi_hest_get_payload(gdata);
593 
594 	if (pcie_err->validation_bits & CPER_PCIE_VALID_DEVICE_ID &&
595 	    pcie_err->validation_bits & CPER_PCIE_VALID_AER_INFO) {
596 		unsigned int devfn;
597 		int aer_severity;
598 		u8 *aer_info;
599 
600 		devfn = PCI_DEVFN(pcie_err->device_id.device,
601 				  pcie_err->device_id.function);
602 		aer_severity = cper_severity_to_aer(gdata->error_severity);
603 
604 		/*
605 		 * If firmware reset the component to contain
606 		 * the error, we must reinitialize it before
607 		 * use, so treat it as a fatal AER error.
608 		 */
609 		if (gdata->flags & CPER_SEC_RESET)
610 			aer_severity = AER_FATAL;
611 
612 		aer_info = (void *)gen_pool_alloc(ghes_estatus_pool,
613 						  sizeof(struct aer_capability_regs));
614 		if (!aer_info)
615 			return;
616 		memcpy(aer_info, pcie_err->aer_info, sizeof(struct aer_capability_regs));
617 
618 		aer_recover_queue(pcie_err->device_id.segment,
619 				  pcie_err->device_id.bus,
620 				  devfn, aer_severity,
621 				  (struct aer_capability_regs *)
622 				  aer_info);
623 	}
624 #endif
625 }
626 
627 static BLOCKING_NOTIFIER_HEAD(vendor_record_notify_list);
628 
629 int ghes_register_vendor_record_notifier(struct notifier_block *nb)
630 {
631 	return blocking_notifier_chain_register(&vendor_record_notify_list, nb);
632 }
633 EXPORT_SYMBOL_GPL(ghes_register_vendor_record_notifier);
634 
635 void ghes_unregister_vendor_record_notifier(struct notifier_block *nb)
636 {
637 	blocking_notifier_chain_unregister(&vendor_record_notify_list, nb);
638 }
639 EXPORT_SYMBOL_GPL(ghes_unregister_vendor_record_notifier);
640 
641 static void ghes_vendor_record_work_func(struct work_struct *work)
642 {
643 	struct ghes_vendor_record_entry *entry;
644 	struct acpi_hest_generic_data *gdata;
645 	u32 len;
646 
647 	entry = container_of(work, struct ghes_vendor_record_entry, work);
648 	gdata = GHES_GDATA_FROM_VENDOR_ENTRY(entry);
649 
650 	blocking_notifier_call_chain(&vendor_record_notify_list,
651 				     entry->error_severity, gdata);
652 
653 	len = GHES_VENDOR_ENTRY_LEN(acpi_hest_get_record_size(gdata));
654 	gen_pool_free(ghes_estatus_pool, (unsigned long)entry, len);
655 }
656 
657 static void ghes_defer_non_standard_event(struct acpi_hest_generic_data *gdata,
658 					  int sev)
659 {
660 	struct acpi_hest_generic_data *copied_gdata;
661 	struct ghes_vendor_record_entry *entry;
662 	u32 len;
663 
664 	len = GHES_VENDOR_ENTRY_LEN(acpi_hest_get_record_size(gdata));
665 	entry = (void *)gen_pool_alloc(ghes_estatus_pool, len);
666 	if (!entry)
667 		return;
668 
669 	copied_gdata = GHES_GDATA_FROM_VENDOR_ENTRY(entry);
670 	memcpy(copied_gdata, gdata, acpi_hest_get_record_size(gdata));
671 	entry->error_severity = sev;
672 
673 	INIT_WORK(&entry->work, ghes_vendor_record_work_func);
674 	schedule_work(&entry->work);
675 }
676 
677 /* Room for 8 entries */
678 #define CXL_CPER_PROT_ERR_FIFO_DEPTH 8
679 static DEFINE_KFIFO(cxl_cper_prot_err_fifo, struct cxl_cper_prot_err_work_data,
680 		    CXL_CPER_PROT_ERR_FIFO_DEPTH);
681 
682 /* Synchronize schedule_work() with cxl_cper_prot_err_work changes */
683 static DEFINE_SPINLOCK(cxl_cper_prot_err_work_lock);
684 struct work_struct *cxl_cper_prot_err_work;
685 
686 static void cxl_cper_post_prot_err(struct cxl_cper_sec_prot_err *prot_err,
687 				   int severity)
688 {
689 #ifdef CONFIG_ACPI_APEI_PCIEAER
690 	struct cxl_cper_prot_err_work_data wd;
691 	u8 *dvsec_start, *cap_start;
692 
693 	if (!(prot_err->valid_bits & PROT_ERR_VALID_AGENT_ADDRESS)) {
694 		pr_err_ratelimited("CXL CPER invalid agent type\n");
695 		return;
696 	}
697 
698 	if (!(prot_err->valid_bits & PROT_ERR_VALID_ERROR_LOG)) {
699 		pr_err_ratelimited("CXL CPER invalid protocol error log\n");
700 		return;
701 	}
702 
703 	if (prot_err->err_len != sizeof(struct cxl_ras_capability_regs)) {
704 		pr_err_ratelimited("CXL CPER invalid RAS Cap size (%u)\n",
705 				   prot_err->err_len);
706 		return;
707 	}
708 
709 	if (!(prot_err->valid_bits & PROT_ERR_VALID_SERIAL_NUMBER))
710 		pr_warn(FW_WARN "CXL CPER no device serial number\n");
711 
712 	guard(spinlock_irqsave)(&cxl_cper_prot_err_work_lock);
713 
714 	if (!cxl_cper_prot_err_work)
715 		return;
716 
717 	switch (prot_err->agent_type) {
718 	case RCD:
719 	case DEVICE:
720 	case LD:
721 	case FMLD:
722 	case RP:
723 	case DSP:
724 	case USP:
725 		memcpy(&wd.prot_err, prot_err, sizeof(wd.prot_err));
726 
727 		dvsec_start = (u8 *)(prot_err + 1);
728 		cap_start = dvsec_start + prot_err->dvsec_len;
729 
730 		memcpy(&wd.ras_cap, cap_start, sizeof(wd.ras_cap));
731 		wd.severity = cper_severity_to_aer(severity);
732 		break;
733 	default:
734 		pr_err_ratelimited("CXL CPER invalid agent type: %d\n",
735 				   prot_err->agent_type);
736 		return;
737 	}
738 
739 	if (!kfifo_put(&cxl_cper_prot_err_fifo, wd)) {
740 		pr_err_ratelimited("CXL CPER kfifo overflow\n");
741 		return;
742 	}
743 
744 	schedule_work(cxl_cper_prot_err_work);
745 #endif
746 }
747 
748 int cxl_cper_register_prot_err_work(struct work_struct *work)
749 {
750 	if (cxl_cper_prot_err_work)
751 		return -EINVAL;
752 
753 	guard(spinlock)(&cxl_cper_prot_err_work_lock);
754 	cxl_cper_prot_err_work = work;
755 	return 0;
756 }
757 EXPORT_SYMBOL_NS_GPL(cxl_cper_register_prot_err_work, "CXL");
758 
759 int cxl_cper_unregister_prot_err_work(struct work_struct *work)
760 {
761 	if (cxl_cper_prot_err_work != work)
762 		return -EINVAL;
763 
764 	guard(spinlock)(&cxl_cper_prot_err_work_lock);
765 	cxl_cper_prot_err_work = NULL;
766 	return 0;
767 }
768 EXPORT_SYMBOL_NS_GPL(cxl_cper_unregister_prot_err_work, "CXL");
769 
770 int cxl_cper_prot_err_kfifo_get(struct cxl_cper_prot_err_work_data *wd)
771 {
772 	return kfifo_get(&cxl_cper_prot_err_fifo, wd);
773 }
774 EXPORT_SYMBOL_NS_GPL(cxl_cper_prot_err_kfifo_get, "CXL");
775 
776 /* Room for 8 entries for each of the 4 event log queues */
777 #define CXL_CPER_FIFO_DEPTH 32
778 DEFINE_KFIFO(cxl_cper_fifo, struct cxl_cper_work_data, CXL_CPER_FIFO_DEPTH);
779 
780 /* Synchronize schedule_work() with cxl_cper_work changes */
781 static DEFINE_SPINLOCK(cxl_cper_work_lock);
782 struct work_struct *cxl_cper_work;
783 
784 static void cxl_cper_post_event(enum cxl_event_type event_type,
785 				struct cxl_cper_event_rec *rec)
786 {
787 	struct cxl_cper_work_data wd;
788 
789 	if (rec->hdr.length <= sizeof(rec->hdr) ||
790 	    rec->hdr.length > sizeof(*rec)) {
791 		pr_err(FW_WARN "CXL CPER Invalid section length (%u)\n",
792 		       rec->hdr.length);
793 		return;
794 	}
795 
796 	if (!(rec->hdr.validation_bits & CPER_CXL_COMP_EVENT_LOG_VALID)) {
797 		pr_err(FW_WARN "CXL CPER invalid event\n");
798 		return;
799 	}
800 
801 	guard(spinlock_irqsave)(&cxl_cper_work_lock);
802 
803 	if (!cxl_cper_work)
804 		return;
805 
806 	wd.event_type = event_type;
807 	memcpy(&wd.rec, rec, sizeof(wd.rec));
808 
809 	if (!kfifo_put(&cxl_cper_fifo, wd)) {
810 		pr_err_ratelimited("CXL CPER kfifo overflow\n");
811 		return;
812 	}
813 
814 	schedule_work(cxl_cper_work);
815 }
816 
817 int cxl_cper_register_work(struct work_struct *work)
818 {
819 	if (cxl_cper_work)
820 		return -EINVAL;
821 
822 	guard(spinlock)(&cxl_cper_work_lock);
823 	cxl_cper_work = work;
824 	return 0;
825 }
826 EXPORT_SYMBOL_NS_GPL(cxl_cper_register_work, "CXL");
827 
828 int cxl_cper_unregister_work(struct work_struct *work)
829 {
830 	if (cxl_cper_work != work)
831 		return -EINVAL;
832 
833 	guard(spinlock)(&cxl_cper_work_lock);
834 	cxl_cper_work = NULL;
835 	return 0;
836 }
837 EXPORT_SYMBOL_NS_GPL(cxl_cper_unregister_work, "CXL");
838 
839 int cxl_cper_kfifo_get(struct cxl_cper_work_data *wd)
840 {
841 	return kfifo_get(&cxl_cper_fifo, wd);
842 }
843 EXPORT_SYMBOL_NS_GPL(cxl_cper_kfifo_get, "CXL");
844 
845 static bool ghes_do_proc(struct ghes *ghes,
846 			 const struct acpi_hest_generic_status *estatus)
847 {
848 	int sev, sec_sev;
849 	struct acpi_hest_generic_data *gdata;
850 	guid_t *sec_type;
851 	const guid_t *fru_id = &guid_null;
852 	char *fru_text = "";
853 	bool queued = false;
854 	bool sync = is_hest_sync_notify(ghes);
855 
856 	sev = ghes_severity(estatus->error_severity);
857 	apei_estatus_for_each_section(estatus, gdata) {
858 		sec_type = (guid_t *)gdata->section_type;
859 		sec_sev = ghes_severity(gdata->error_severity);
860 		if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
861 			fru_id = (guid_t *)gdata->fru_id;
862 
863 		if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
864 			fru_text = gdata->fru_text;
865 
866 		if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) {
867 			struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);
868 
869 			atomic_notifier_call_chain(&ghes_report_chain, sev, mem_err);
870 
871 			arch_apei_report_mem_error(sev, mem_err);
872 			queued = ghes_handle_memory_failure(gdata, sev, sync);
873 		}
874 		else if (guid_equal(sec_type, &CPER_SEC_PCIE)) {
875 			ghes_handle_aer(gdata);
876 		}
877 		else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) {
878 			queued = ghes_handle_arm_hw_error(gdata, sev, sync);
879 		} else if (guid_equal(sec_type, &CPER_SEC_CXL_PROT_ERR)) {
880 			struct cxl_cper_sec_prot_err *prot_err = acpi_hest_get_payload(gdata);
881 
882 			cxl_cper_post_prot_err(prot_err, gdata->error_severity);
883 		} else if (guid_equal(sec_type, &CPER_SEC_CXL_GEN_MEDIA_GUID)) {
884 			struct cxl_cper_event_rec *rec = acpi_hest_get_payload(gdata);
885 
886 			cxl_cper_post_event(CXL_CPER_EVENT_GEN_MEDIA, rec);
887 		} else if (guid_equal(sec_type, &CPER_SEC_CXL_DRAM_GUID)) {
888 			struct cxl_cper_event_rec *rec = acpi_hest_get_payload(gdata);
889 
890 			cxl_cper_post_event(CXL_CPER_EVENT_DRAM, rec);
891 		} else if (guid_equal(sec_type, &CPER_SEC_CXL_MEM_MODULE_GUID)) {
892 			struct cxl_cper_event_rec *rec = acpi_hest_get_payload(gdata);
893 
894 			cxl_cper_post_event(CXL_CPER_EVENT_MEM_MODULE, rec);
895 		} else {
896 			void *err = acpi_hest_get_payload(gdata);
897 
898 			ghes_defer_non_standard_event(gdata, sev);
899 			log_non_standard_event(sec_type, fru_id, fru_text,
900 					       sec_sev, err,
901 					       gdata->error_data_length);
902 		}
903 	}
904 
905 	return queued;
906 }
907 
908 static void __ghes_print_estatus(const char *pfx,
909 				 const struct acpi_hest_generic *generic,
910 				 const struct acpi_hest_generic_status *estatus)
911 {
912 	static atomic_t seqno;
913 	unsigned int curr_seqno;
914 	char pfx_seq[64];
915 
916 	if (pfx == NULL) {
917 		if (ghes_severity(estatus->error_severity) <=
918 		    GHES_SEV_CORRECTED)
919 			pfx = KERN_WARNING;
920 		else
921 			pfx = KERN_ERR;
922 	}
923 	curr_seqno = atomic_inc_return(&seqno);
924 	snprintf(pfx_seq, sizeof(pfx_seq), "%s{%u}" HW_ERR, pfx, curr_seqno);
925 	printk("%s""Hardware error from APEI Generic Hardware Error Source: %d\n",
926 	       pfx_seq, generic->header.source_id);
927 	cper_estatus_print(pfx_seq, estatus);
928 }
929 
930 static int ghes_print_estatus(const char *pfx,
931 			      const struct acpi_hest_generic *generic,
932 			      const struct acpi_hest_generic_status *estatus)
933 {
934 	/* Not more than 2 messages every 5 seconds */
935 	static DEFINE_RATELIMIT_STATE(ratelimit_corrected, 5*HZ, 2);
936 	static DEFINE_RATELIMIT_STATE(ratelimit_uncorrected, 5*HZ, 2);
937 	struct ratelimit_state *ratelimit;
938 
939 	if (ghes_severity(estatus->error_severity) <= GHES_SEV_CORRECTED)
940 		ratelimit = &ratelimit_corrected;
941 	else
942 		ratelimit = &ratelimit_uncorrected;
943 	if (__ratelimit(ratelimit)) {
944 		__ghes_print_estatus(pfx, generic, estatus);
945 		return 1;
946 	}
947 	return 0;
948 }
949 
950 /*
951  * GHES error status reporting throttle, to report more kinds of
952  * errors, instead of just most frequently occurred errors.
953  */
954 static int ghes_estatus_cached(struct acpi_hest_generic_status *estatus)
955 {
956 	u32 len;
957 	int i, cached = 0;
958 	unsigned long long now;
959 	struct ghes_estatus_cache *cache;
960 	struct acpi_hest_generic_status *cache_estatus;
961 
962 	len = cper_estatus_len(estatus);
963 	rcu_read_lock();
964 	for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) {
965 		cache = rcu_dereference(ghes_estatus_caches[i]);
966 		if (cache == NULL)
967 			continue;
968 		if (len != cache->estatus_len)
969 			continue;
970 		cache_estatus = GHES_ESTATUS_FROM_CACHE(cache);
971 		if (memcmp(estatus, cache_estatus, len))
972 			continue;
973 		atomic_inc(&cache->count);
974 		now = sched_clock();
975 		if (now - cache->time_in < GHES_ESTATUS_IN_CACHE_MAX_NSEC)
976 			cached = 1;
977 		break;
978 	}
979 	rcu_read_unlock();
980 	return cached;
981 }
982 
983 static struct ghes_estatus_cache *ghes_estatus_cache_alloc(
984 	struct acpi_hest_generic *generic,
985 	struct acpi_hest_generic_status *estatus)
986 {
987 	int alloced;
988 	u32 len, cache_len;
989 	struct ghes_estatus_cache *cache;
990 	struct acpi_hest_generic_status *cache_estatus;
991 
992 	alloced = atomic_add_return(1, &ghes_estatus_cache_alloced);
993 	if (alloced > GHES_ESTATUS_CACHE_ALLOCED_MAX) {
994 		atomic_dec(&ghes_estatus_cache_alloced);
995 		return NULL;
996 	}
997 	len = cper_estatus_len(estatus);
998 	cache_len = GHES_ESTATUS_CACHE_LEN(len);
999 	cache = (void *)gen_pool_alloc(ghes_estatus_pool, cache_len);
1000 	if (!cache) {
1001 		atomic_dec(&ghes_estatus_cache_alloced);
1002 		return NULL;
1003 	}
1004 	cache_estatus = GHES_ESTATUS_FROM_CACHE(cache);
1005 	memcpy(cache_estatus, estatus, len);
1006 	cache->estatus_len = len;
1007 	atomic_set(&cache->count, 0);
1008 	cache->generic = generic;
1009 	cache->time_in = sched_clock();
1010 	return cache;
1011 }
1012 
1013 static void ghes_estatus_cache_rcu_free(struct rcu_head *head)
1014 {
1015 	struct ghes_estatus_cache *cache;
1016 	u32 len;
1017 
1018 	cache = container_of(head, struct ghes_estatus_cache, rcu);
1019 	len = cper_estatus_len(GHES_ESTATUS_FROM_CACHE(cache));
1020 	len = GHES_ESTATUS_CACHE_LEN(len);
1021 	gen_pool_free(ghes_estatus_pool, (unsigned long)cache, len);
1022 	atomic_dec(&ghes_estatus_cache_alloced);
1023 }
1024 
1025 static void
1026 ghes_estatus_cache_add(struct acpi_hest_generic *generic,
1027 		       struct acpi_hest_generic_status *estatus)
1028 {
1029 	unsigned long long now, duration, period, max_period = 0;
1030 	struct ghes_estatus_cache *cache, *new_cache;
1031 	struct ghes_estatus_cache __rcu *victim;
1032 	int i, slot = -1, count;
1033 
1034 	new_cache = ghes_estatus_cache_alloc(generic, estatus);
1035 	if (!new_cache)
1036 		return;
1037 
1038 	rcu_read_lock();
1039 	now = sched_clock();
1040 	for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) {
1041 		cache = rcu_dereference(ghes_estatus_caches[i]);
1042 		if (cache == NULL) {
1043 			slot = i;
1044 			break;
1045 		}
1046 		duration = now - cache->time_in;
1047 		if (duration >= GHES_ESTATUS_IN_CACHE_MAX_NSEC) {
1048 			slot = i;
1049 			break;
1050 		}
1051 		count = atomic_read(&cache->count);
1052 		period = duration;
1053 		do_div(period, (count + 1));
1054 		if (period > max_period) {
1055 			max_period = period;
1056 			slot = i;
1057 		}
1058 	}
1059 	rcu_read_unlock();
1060 
1061 	if (slot != -1) {
1062 		/*
1063 		 * Use release semantics to ensure that ghes_estatus_cached()
1064 		 * running on another CPU will see the updated cache fields if
1065 		 * it can see the new value of the pointer.
1066 		 */
1067 		victim = xchg_release(&ghes_estatus_caches[slot],
1068 				      RCU_INITIALIZER(new_cache));
1069 
1070 		/*
1071 		 * At this point, victim may point to a cached item different
1072 		 * from the one based on which we selected the slot. Instead of
1073 		 * going to the loop again to pick another slot, let's just
1074 		 * drop the other item anyway: this may cause a false cache
1075 		 * miss later on, but that won't cause any problems.
1076 		 */
1077 		if (victim)
1078 			call_rcu(&unrcu_pointer(victim)->rcu,
1079 				 ghes_estatus_cache_rcu_free);
1080 	}
1081 }
1082 
1083 static void __ghes_panic(struct ghes *ghes,
1084 			 struct acpi_hest_generic_status *estatus,
1085 			 u64 buf_paddr, enum fixed_addresses fixmap_idx)
1086 {
1087 	const char *msg = GHES_PFX "Fatal hardware error";
1088 
1089 	__ghes_print_estatus(KERN_EMERG, ghes->generic, estatus);
1090 
1091 	ghes_clear_estatus(ghes, estatus, buf_paddr, fixmap_idx);
1092 
1093 	if (!panic_timeout)
1094 		pr_emerg("%s but panic disabled\n", msg);
1095 
1096 	panic(msg);
1097 }
1098 
1099 static int ghes_proc(struct ghes *ghes)
1100 {
1101 	struct acpi_hest_generic_status *estatus = ghes->estatus;
1102 	u64 buf_paddr;
1103 	int rc;
1104 
1105 	rc = ghes_read_estatus(ghes, estatus, &buf_paddr, FIX_APEI_GHES_IRQ);
1106 	if (rc)
1107 		goto out;
1108 
1109 	if (ghes_severity(estatus->error_severity) >= GHES_SEV_PANIC)
1110 		__ghes_panic(ghes, estatus, buf_paddr, FIX_APEI_GHES_IRQ);
1111 
1112 	if (!ghes_estatus_cached(estatus)) {
1113 		if (ghes_print_estatus(NULL, ghes->generic, estatus))
1114 			ghes_estatus_cache_add(ghes->generic, estatus);
1115 	}
1116 	ghes_do_proc(ghes, estatus);
1117 
1118 out:
1119 	ghes_clear_estatus(ghes, estatus, buf_paddr, FIX_APEI_GHES_IRQ);
1120 
1121 	return rc;
1122 }
1123 
1124 static void ghes_add_timer(struct ghes *ghes)
1125 {
1126 	struct acpi_hest_generic *g = ghes->generic;
1127 	unsigned long expire;
1128 
1129 	if (!g->notify.poll_interval) {
1130 		pr_warn(FW_WARN GHES_PFX "Poll interval is 0 for generic hardware error source: %d, disabled.\n",
1131 			g->header.source_id);
1132 		return;
1133 	}
1134 	expire = jiffies + msecs_to_jiffies(g->notify.poll_interval);
1135 	ghes->timer.expires = round_jiffies_relative(expire);
1136 	add_timer(&ghes->timer);
1137 }
1138 
1139 static void ghes_poll_func(struct timer_list *t)
1140 {
1141 	struct ghes *ghes = timer_container_of(ghes, t, timer);
1142 	unsigned long flags;
1143 
1144 	spin_lock_irqsave(&ghes_notify_lock_irq, flags);
1145 	ghes_proc(ghes);
1146 	spin_unlock_irqrestore(&ghes_notify_lock_irq, flags);
1147 	if (!(ghes->flags & GHES_EXITING))
1148 		ghes_add_timer(ghes);
1149 }
1150 
1151 static irqreturn_t ghes_irq_func(int irq, void *data)
1152 {
1153 	struct ghes *ghes = data;
1154 	unsigned long flags;
1155 	int rc;
1156 
1157 	spin_lock_irqsave(&ghes_notify_lock_irq, flags);
1158 	rc = ghes_proc(ghes);
1159 	spin_unlock_irqrestore(&ghes_notify_lock_irq, flags);
1160 	if (rc)
1161 		return IRQ_NONE;
1162 
1163 	return IRQ_HANDLED;
1164 }
1165 
1166 static int ghes_notify_hed(struct notifier_block *this, unsigned long event,
1167 			   void *data)
1168 {
1169 	struct ghes *ghes;
1170 	unsigned long flags;
1171 	int ret = NOTIFY_DONE;
1172 
1173 	spin_lock_irqsave(&ghes_notify_lock_irq, flags);
1174 	rcu_read_lock();
1175 	list_for_each_entry_rcu(ghes, &ghes_hed, list) {
1176 		if (!ghes_proc(ghes))
1177 			ret = NOTIFY_OK;
1178 	}
1179 	rcu_read_unlock();
1180 	spin_unlock_irqrestore(&ghes_notify_lock_irq, flags);
1181 
1182 	return ret;
1183 }
1184 
1185 static struct notifier_block ghes_notifier_hed = {
1186 	.notifier_call = ghes_notify_hed,
1187 };
1188 
1189 /*
1190  * Handlers for CPER records may not be NMI safe. For example,
1191  * memory_failure_queue() takes spinlocks and calls schedule_work_on().
1192  * In any NMI-like handler, memory from ghes_estatus_pool is used to save
1193  * estatus, and added to the ghes_estatus_llist. irq_work_queue() causes
1194  * ghes_proc_in_irq() to run in IRQ context where each estatus in
1195  * ghes_estatus_llist is processed.
1196  *
1197  * Memory from the ghes_estatus_pool is also used with the ghes_estatus_cache
1198  * to suppress frequent messages.
1199  */
1200 static struct llist_head ghes_estatus_llist;
1201 static struct irq_work ghes_proc_irq_work;
1202 
1203 static void ghes_proc_in_irq(struct irq_work *irq_work)
1204 {
1205 	struct llist_node *llnode, *next;
1206 	struct ghes_estatus_node *estatus_node;
1207 	struct acpi_hest_generic *generic;
1208 	struct acpi_hest_generic_status *estatus;
1209 	bool task_work_pending;
1210 	u32 len, node_len;
1211 	int ret;
1212 
1213 	llnode = llist_del_all(&ghes_estatus_llist);
1214 	/*
1215 	 * Because the time order of estatus in list is reversed,
1216 	 * revert it back to proper order.
1217 	 */
1218 	llnode = llist_reverse_order(llnode);
1219 	while (llnode) {
1220 		next = llnode->next;
1221 		estatus_node = llist_entry(llnode, struct ghes_estatus_node,
1222 					   llnode);
1223 		estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
1224 		len = cper_estatus_len(estatus);
1225 		node_len = GHES_ESTATUS_NODE_LEN(len);
1226 		task_work_pending = ghes_do_proc(estatus_node->ghes, estatus);
1227 		if (!ghes_estatus_cached(estatus)) {
1228 			generic = estatus_node->generic;
1229 			if (ghes_print_estatus(NULL, generic, estatus))
1230 				ghes_estatus_cache_add(generic, estatus);
1231 		}
1232 
1233 		if (task_work_pending && current->mm) {
1234 			estatus_node->task_work.func = ghes_kick_task_work;
1235 			estatus_node->task_work_cpu = smp_processor_id();
1236 			ret = task_work_add(current, &estatus_node->task_work,
1237 					    TWA_RESUME);
1238 			if (ret)
1239 				estatus_node->task_work.func = NULL;
1240 		}
1241 
1242 		if (!estatus_node->task_work.func)
1243 			gen_pool_free(ghes_estatus_pool,
1244 				      (unsigned long)estatus_node, node_len);
1245 
1246 		llnode = next;
1247 	}
1248 }
1249 
1250 static void ghes_print_queued_estatus(void)
1251 {
1252 	struct llist_node *llnode;
1253 	struct ghes_estatus_node *estatus_node;
1254 	struct acpi_hest_generic *generic;
1255 	struct acpi_hest_generic_status *estatus;
1256 
1257 	llnode = llist_del_all(&ghes_estatus_llist);
1258 	/*
1259 	 * Because the time order of estatus in list is reversed,
1260 	 * revert it back to proper order.
1261 	 */
1262 	llnode = llist_reverse_order(llnode);
1263 	while (llnode) {
1264 		estatus_node = llist_entry(llnode, struct ghes_estatus_node,
1265 					   llnode);
1266 		estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
1267 		generic = estatus_node->generic;
1268 		ghes_print_estatus(NULL, generic, estatus);
1269 		llnode = llnode->next;
1270 	}
1271 }
1272 
1273 static int ghes_in_nmi_queue_one_entry(struct ghes *ghes,
1274 				       enum fixed_addresses fixmap_idx)
1275 {
1276 	struct acpi_hest_generic_status *estatus, tmp_header;
1277 	struct ghes_estatus_node *estatus_node;
1278 	u32 len, node_len;
1279 	u64 buf_paddr;
1280 	int sev, rc;
1281 
1282 	if (!IS_ENABLED(CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG))
1283 		return -EOPNOTSUPP;
1284 
1285 	rc = __ghes_peek_estatus(ghes, &tmp_header, &buf_paddr, fixmap_idx);
1286 	if (rc) {
1287 		ghes_clear_estatus(ghes, &tmp_header, buf_paddr, fixmap_idx);
1288 		return rc;
1289 	}
1290 
1291 	rc = __ghes_check_estatus(ghes, &tmp_header);
1292 	if (rc) {
1293 		ghes_clear_estatus(ghes, &tmp_header, buf_paddr, fixmap_idx);
1294 		return rc;
1295 	}
1296 
1297 	len = cper_estatus_len(&tmp_header);
1298 	node_len = GHES_ESTATUS_NODE_LEN(len);
1299 	estatus_node = (void *)gen_pool_alloc(ghes_estatus_pool, node_len);
1300 	if (!estatus_node)
1301 		return -ENOMEM;
1302 
1303 	estatus_node->ghes = ghes;
1304 	estatus_node->generic = ghes->generic;
1305 	estatus_node->task_work.func = NULL;
1306 	estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
1307 
1308 	if (__ghes_read_estatus(estatus, buf_paddr, fixmap_idx, len)) {
1309 		ghes_clear_estatus(ghes, estatus, buf_paddr, fixmap_idx);
1310 		rc = -ENOENT;
1311 		goto no_work;
1312 	}
1313 
1314 	sev = ghes_severity(estatus->error_severity);
1315 	if (sev >= GHES_SEV_PANIC) {
1316 		ghes_print_queued_estatus();
1317 		__ghes_panic(ghes, estatus, buf_paddr, fixmap_idx);
1318 	}
1319 
1320 	ghes_clear_estatus(ghes, &tmp_header, buf_paddr, fixmap_idx);
1321 
1322 	/* This error has been reported before, don't process it again. */
1323 	if (ghes_estatus_cached(estatus))
1324 		goto no_work;
1325 
1326 	llist_add(&estatus_node->llnode, &ghes_estatus_llist);
1327 
1328 	return rc;
1329 
1330 no_work:
1331 	gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node,
1332 		      node_len);
1333 
1334 	return rc;
1335 }
1336 
1337 static int ghes_in_nmi_spool_from_list(struct list_head *rcu_list,
1338 				       enum fixed_addresses fixmap_idx)
1339 {
1340 	int ret = -ENOENT;
1341 	struct ghes *ghes;
1342 
1343 	rcu_read_lock();
1344 	list_for_each_entry_rcu(ghes, rcu_list, list) {
1345 		if (!ghes_in_nmi_queue_one_entry(ghes, fixmap_idx))
1346 			ret = 0;
1347 	}
1348 	rcu_read_unlock();
1349 
1350 	if (IS_ENABLED(CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG) && !ret)
1351 		irq_work_queue(&ghes_proc_irq_work);
1352 
1353 	return ret;
1354 }
1355 
1356 #ifdef CONFIG_ACPI_APEI_SEA
1357 static LIST_HEAD(ghes_sea);
1358 
1359 /*
1360  * Return 0 only if one of the SEA error sources successfully reported an error
1361  * record sent from the firmware.
1362  */
1363 int ghes_notify_sea(void)
1364 {
1365 	static DEFINE_RAW_SPINLOCK(ghes_notify_lock_sea);
1366 	int rv;
1367 
1368 	raw_spin_lock(&ghes_notify_lock_sea);
1369 	rv = ghes_in_nmi_spool_from_list(&ghes_sea, FIX_APEI_GHES_SEA);
1370 	raw_spin_unlock(&ghes_notify_lock_sea);
1371 
1372 	return rv;
1373 }
1374 
1375 static void ghes_sea_add(struct ghes *ghes)
1376 {
1377 	mutex_lock(&ghes_list_mutex);
1378 	list_add_rcu(&ghes->list, &ghes_sea);
1379 	mutex_unlock(&ghes_list_mutex);
1380 }
1381 
1382 static void ghes_sea_remove(struct ghes *ghes)
1383 {
1384 	mutex_lock(&ghes_list_mutex);
1385 	list_del_rcu(&ghes->list);
1386 	mutex_unlock(&ghes_list_mutex);
1387 	synchronize_rcu();
1388 }
1389 #else /* CONFIG_ACPI_APEI_SEA */
1390 static inline void ghes_sea_add(struct ghes *ghes) { }
1391 static inline void ghes_sea_remove(struct ghes *ghes) { }
1392 #endif /* CONFIG_ACPI_APEI_SEA */
1393 
1394 #ifdef CONFIG_HAVE_ACPI_APEI_NMI
1395 /*
1396  * NMI may be triggered on any CPU, so ghes_in_nmi is used for
1397  * having only one concurrent reader.
1398  */
1399 static atomic_t ghes_in_nmi = ATOMIC_INIT(0);
1400 
1401 static LIST_HEAD(ghes_nmi);
1402 
1403 static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs)
1404 {
1405 	static DEFINE_RAW_SPINLOCK(ghes_notify_lock_nmi);
1406 	int ret = NMI_DONE;
1407 
1408 	if (!atomic_add_unless(&ghes_in_nmi, 1, 1))
1409 		return ret;
1410 
1411 	raw_spin_lock(&ghes_notify_lock_nmi);
1412 	if (!ghes_in_nmi_spool_from_list(&ghes_nmi, FIX_APEI_GHES_NMI))
1413 		ret = NMI_HANDLED;
1414 	raw_spin_unlock(&ghes_notify_lock_nmi);
1415 
1416 	atomic_dec(&ghes_in_nmi);
1417 	return ret;
1418 }
1419 
1420 static void ghes_nmi_add(struct ghes *ghes)
1421 {
1422 	mutex_lock(&ghes_list_mutex);
1423 	if (list_empty(&ghes_nmi))
1424 		register_nmi_handler(NMI_LOCAL, ghes_notify_nmi, 0, "ghes");
1425 	list_add_rcu(&ghes->list, &ghes_nmi);
1426 	mutex_unlock(&ghes_list_mutex);
1427 }
1428 
1429 static void ghes_nmi_remove(struct ghes *ghes)
1430 {
1431 	mutex_lock(&ghes_list_mutex);
1432 	list_del_rcu(&ghes->list);
1433 	if (list_empty(&ghes_nmi))
1434 		unregister_nmi_handler(NMI_LOCAL, "ghes");
1435 	mutex_unlock(&ghes_list_mutex);
1436 	/*
1437 	 * To synchronize with NMI handler, ghes can only be
1438 	 * freed after NMI handler finishes.
1439 	 */
1440 	synchronize_rcu();
1441 }
1442 #else /* CONFIG_HAVE_ACPI_APEI_NMI */
1443 static inline void ghes_nmi_add(struct ghes *ghes) { }
1444 static inline void ghes_nmi_remove(struct ghes *ghes) { }
1445 #endif /* CONFIG_HAVE_ACPI_APEI_NMI */
1446 
1447 static void ghes_nmi_init_cxt(void)
1448 {
1449 	init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq);
1450 }
1451 
1452 static int __ghes_sdei_callback(struct ghes *ghes,
1453 				enum fixed_addresses fixmap_idx)
1454 {
1455 	if (!ghes_in_nmi_queue_one_entry(ghes, fixmap_idx)) {
1456 		irq_work_queue(&ghes_proc_irq_work);
1457 
1458 		return 0;
1459 	}
1460 
1461 	return -ENOENT;
1462 }
1463 
1464 static int ghes_sdei_normal_callback(u32 event_num, struct pt_regs *regs,
1465 				      void *arg)
1466 {
1467 	static DEFINE_RAW_SPINLOCK(ghes_notify_lock_sdei_normal);
1468 	struct ghes *ghes = arg;
1469 	int err;
1470 
1471 	raw_spin_lock(&ghes_notify_lock_sdei_normal);
1472 	err = __ghes_sdei_callback(ghes, FIX_APEI_GHES_SDEI_NORMAL);
1473 	raw_spin_unlock(&ghes_notify_lock_sdei_normal);
1474 
1475 	return err;
1476 }
1477 
1478 static int ghes_sdei_critical_callback(u32 event_num, struct pt_regs *regs,
1479 				       void *arg)
1480 {
1481 	static DEFINE_RAW_SPINLOCK(ghes_notify_lock_sdei_critical);
1482 	struct ghes *ghes = arg;
1483 	int err;
1484 
1485 	raw_spin_lock(&ghes_notify_lock_sdei_critical);
1486 	err = __ghes_sdei_callback(ghes, FIX_APEI_GHES_SDEI_CRITICAL);
1487 	raw_spin_unlock(&ghes_notify_lock_sdei_critical);
1488 
1489 	return err;
1490 }
1491 
1492 static int apei_sdei_register_ghes(struct ghes *ghes)
1493 {
1494 	if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE))
1495 		return -EOPNOTSUPP;
1496 
1497 	return sdei_register_ghes(ghes, ghes_sdei_normal_callback,
1498 				 ghes_sdei_critical_callback);
1499 }
1500 
1501 static int apei_sdei_unregister_ghes(struct ghes *ghes)
1502 {
1503 	if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE))
1504 		return -EOPNOTSUPP;
1505 
1506 	return sdei_unregister_ghes(ghes);
1507 }
1508 
1509 static int ghes_probe(struct platform_device *ghes_dev)
1510 {
1511 	struct acpi_hest_generic *generic;
1512 	struct ghes *ghes = NULL;
1513 	unsigned long flags;
1514 
1515 	int rc = -EINVAL;
1516 
1517 	generic = *(struct acpi_hest_generic **)ghes_dev->dev.platform_data;
1518 	if (!generic->enabled)
1519 		return -ENODEV;
1520 
1521 	switch (generic->notify.type) {
1522 	case ACPI_HEST_NOTIFY_POLLED:
1523 	case ACPI_HEST_NOTIFY_EXTERNAL:
1524 	case ACPI_HEST_NOTIFY_SCI:
1525 	case ACPI_HEST_NOTIFY_GSIV:
1526 	case ACPI_HEST_NOTIFY_GPIO:
1527 		break;
1528 
1529 	case ACPI_HEST_NOTIFY_SEA:
1530 		if (!IS_ENABLED(CONFIG_ACPI_APEI_SEA)) {
1531 			pr_warn(GHES_PFX "Generic hardware error source: %d notified via SEA is not supported\n",
1532 				generic->header.source_id);
1533 			rc = -ENOTSUPP;
1534 			goto err;
1535 		}
1536 		break;
1537 	case ACPI_HEST_NOTIFY_NMI:
1538 		if (!IS_ENABLED(CONFIG_HAVE_ACPI_APEI_NMI)) {
1539 			pr_warn(GHES_PFX "Generic hardware error source: %d notified via NMI interrupt is not supported!\n",
1540 				generic->header.source_id);
1541 			goto err;
1542 		}
1543 		break;
1544 	case ACPI_HEST_NOTIFY_SOFTWARE_DELEGATED:
1545 		if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE)) {
1546 			pr_warn(GHES_PFX "Generic hardware error source: %d notified via SDE Interface is not supported!\n",
1547 				generic->header.source_id);
1548 			goto err;
1549 		}
1550 		break;
1551 	case ACPI_HEST_NOTIFY_LOCAL:
1552 		pr_warn(GHES_PFX "Generic hardware error source: %d notified via local interrupt is not supported!\n",
1553 			generic->header.source_id);
1554 		goto err;
1555 	default:
1556 		pr_warn(FW_WARN GHES_PFX "Unknown notification type: %u for generic hardware error source: %d\n",
1557 			generic->notify.type, generic->header.source_id);
1558 		goto err;
1559 	}
1560 
1561 	rc = -EIO;
1562 	if (generic->error_block_length <
1563 	    sizeof(struct acpi_hest_generic_status)) {
1564 		pr_warn(FW_BUG GHES_PFX "Invalid error block length: %u for generic hardware error source: %d\n",
1565 			generic->error_block_length, generic->header.source_id);
1566 		goto err;
1567 	}
1568 	ghes = ghes_new(generic);
1569 	if (IS_ERR(ghes)) {
1570 		rc = PTR_ERR(ghes);
1571 		ghes = NULL;
1572 		goto err;
1573 	}
1574 
1575 	switch (generic->notify.type) {
1576 	case ACPI_HEST_NOTIFY_POLLED:
1577 		timer_setup(&ghes->timer, ghes_poll_func, 0);
1578 		ghes_add_timer(ghes);
1579 		break;
1580 	case ACPI_HEST_NOTIFY_EXTERNAL:
1581 		/* External interrupt vector is GSI */
1582 		rc = acpi_gsi_to_irq(generic->notify.vector, &ghes->irq);
1583 		if (rc) {
1584 			pr_err(GHES_PFX "Failed to map GSI to IRQ for generic hardware error source: %d\n",
1585 			       generic->header.source_id);
1586 			goto err;
1587 		}
1588 		rc = request_irq(ghes->irq, ghes_irq_func, IRQF_SHARED,
1589 				 "GHES IRQ", ghes);
1590 		if (rc) {
1591 			pr_err(GHES_PFX "Failed to register IRQ for generic hardware error source: %d\n",
1592 			       generic->header.source_id);
1593 			goto err;
1594 		}
1595 		break;
1596 
1597 	case ACPI_HEST_NOTIFY_SCI:
1598 	case ACPI_HEST_NOTIFY_GSIV:
1599 	case ACPI_HEST_NOTIFY_GPIO:
1600 		mutex_lock(&ghes_list_mutex);
1601 		if (list_empty(&ghes_hed))
1602 			register_acpi_hed_notifier(&ghes_notifier_hed);
1603 		list_add_rcu(&ghes->list, &ghes_hed);
1604 		mutex_unlock(&ghes_list_mutex);
1605 		break;
1606 
1607 	case ACPI_HEST_NOTIFY_SEA:
1608 		ghes_sea_add(ghes);
1609 		break;
1610 	case ACPI_HEST_NOTIFY_NMI:
1611 		ghes_nmi_add(ghes);
1612 		break;
1613 	case ACPI_HEST_NOTIFY_SOFTWARE_DELEGATED:
1614 		rc = apei_sdei_register_ghes(ghes);
1615 		if (rc)
1616 			goto err;
1617 		break;
1618 	default:
1619 		BUG();
1620 	}
1621 
1622 	platform_set_drvdata(ghes_dev, ghes);
1623 
1624 	ghes->dev = &ghes_dev->dev;
1625 
1626 	mutex_lock(&ghes_devs_mutex);
1627 	list_add_tail(&ghes->elist, &ghes_devs);
1628 	mutex_unlock(&ghes_devs_mutex);
1629 
1630 	/* Handle any pending errors right away */
1631 	spin_lock_irqsave(&ghes_notify_lock_irq, flags);
1632 	ghes_proc(ghes);
1633 	spin_unlock_irqrestore(&ghes_notify_lock_irq, flags);
1634 
1635 	return 0;
1636 
1637 err:
1638 	if (ghes) {
1639 		ghes_fini(ghes);
1640 		kfree(ghes);
1641 	}
1642 	return rc;
1643 }
1644 
1645 static void ghes_remove(struct platform_device *ghes_dev)
1646 {
1647 	int rc;
1648 	struct ghes *ghes;
1649 	struct acpi_hest_generic *generic;
1650 
1651 	ghes = platform_get_drvdata(ghes_dev);
1652 	generic = ghes->generic;
1653 
1654 	ghes->flags |= GHES_EXITING;
1655 	switch (generic->notify.type) {
1656 	case ACPI_HEST_NOTIFY_POLLED:
1657 		timer_shutdown_sync(&ghes->timer);
1658 		break;
1659 	case ACPI_HEST_NOTIFY_EXTERNAL:
1660 		free_irq(ghes->irq, ghes);
1661 		break;
1662 
1663 	case ACPI_HEST_NOTIFY_SCI:
1664 	case ACPI_HEST_NOTIFY_GSIV:
1665 	case ACPI_HEST_NOTIFY_GPIO:
1666 		mutex_lock(&ghes_list_mutex);
1667 		list_del_rcu(&ghes->list);
1668 		if (list_empty(&ghes_hed))
1669 			unregister_acpi_hed_notifier(&ghes_notifier_hed);
1670 		mutex_unlock(&ghes_list_mutex);
1671 		synchronize_rcu();
1672 		break;
1673 
1674 	case ACPI_HEST_NOTIFY_SEA:
1675 		ghes_sea_remove(ghes);
1676 		break;
1677 	case ACPI_HEST_NOTIFY_NMI:
1678 		ghes_nmi_remove(ghes);
1679 		break;
1680 	case ACPI_HEST_NOTIFY_SOFTWARE_DELEGATED:
1681 		rc = apei_sdei_unregister_ghes(ghes);
1682 		if (rc) {
1683 			/*
1684 			 * Returning early results in a resource leak, but we're
1685 			 * only here if stopping the hardware failed.
1686 			 */
1687 			dev_err(&ghes_dev->dev, "Failed to unregister ghes (%pe)\n",
1688 				ERR_PTR(rc));
1689 			return;
1690 		}
1691 		break;
1692 	default:
1693 		BUG();
1694 		break;
1695 	}
1696 
1697 	ghes_fini(ghes);
1698 
1699 	mutex_lock(&ghes_devs_mutex);
1700 	list_del(&ghes->elist);
1701 	mutex_unlock(&ghes_devs_mutex);
1702 
1703 	kfree(ghes);
1704 }
1705 
1706 static struct platform_driver ghes_platform_driver = {
1707 	.driver		= {
1708 		.name	= "GHES",
1709 	},
1710 	.probe		= ghes_probe,
1711 	.remove		= ghes_remove,
1712 };
1713 
1714 void __init acpi_ghes_init(void)
1715 {
1716 	int rc;
1717 
1718 	acpi_sdei_init();
1719 
1720 	if (acpi_disabled)
1721 		return;
1722 
1723 	switch (hest_disable) {
1724 	case HEST_NOT_FOUND:
1725 		return;
1726 	case HEST_DISABLED:
1727 		pr_info(GHES_PFX "HEST is not enabled!\n");
1728 		return;
1729 	default:
1730 		break;
1731 	}
1732 
1733 	if (ghes_disable) {
1734 		pr_info(GHES_PFX "GHES is not enabled!\n");
1735 		return;
1736 	}
1737 
1738 	ghes_nmi_init_cxt();
1739 
1740 	rc = platform_driver_register(&ghes_platform_driver);
1741 	if (rc)
1742 		return;
1743 
1744 	rc = apei_osc_setup();
1745 	if (rc == 0 && osc_sb_apei_support_acked)
1746 		pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit and WHEA _OSC.\n");
1747 	else if (rc == 0 && !osc_sb_apei_support_acked)
1748 		pr_info(GHES_PFX "APEI firmware first mode is enabled by WHEA _OSC.\n");
1749 	else if (rc && osc_sb_apei_support_acked)
1750 		pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit.\n");
1751 	else
1752 		pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n");
1753 }
1754 
1755 /*
1756  * Known x86 systems that prefer GHES error reporting:
1757  */
1758 static struct acpi_platform_list plat_list[] = {
1759 	{"HPE   ", "Server  ", 0, ACPI_SIG_FADT, all_versions},
1760 	{ } /* End */
1761 };
1762 
1763 struct list_head *ghes_get_devices(void)
1764 {
1765 	int idx = -1;
1766 
1767 	if (IS_ENABLED(CONFIG_X86)) {
1768 		idx = acpi_match_platform_list(plat_list);
1769 		if (idx < 0) {
1770 			if (!ghes_edac_force_enable)
1771 				return NULL;
1772 
1773 			pr_warn_once("Force-loading ghes_edac on an unsupported platform. You're on your own!\n");
1774 		}
1775 	} else if (list_empty(&ghes_devs)) {
1776 		return NULL;
1777 	}
1778 
1779 	return &ghes_devs;
1780 }
1781 EXPORT_SYMBOL_GPL(ghes_get_devices);
1782 
1783 void ghes_register_report_chain(struct notifier_block *nb)
1784 {
1785 	atomic_notifier_chain_register(&ghes_report_chain, nb);
1786 }
1787 EXPORT_SYMBOL_GPL(ghes_register_report_chain);
1788 
1789 void ghes_unregister_report_chain(struct notifier_block *nb)
1790 {
1791 	atomic_notifier_chain_unregister(&ghes_report_chain, nb);
1792 }
1793 EXPORT_SYMBOL_GPL(ghes_unregister_report_chain);
1794