1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * APM X-Gene SoC EDAC (error detection and correction)
4  *
5  * Copyright (c) 2015, Applied Micro Circuits Corporation
6  * Author: Feng Kan <fkan@apm.com>
7  *         Loc Ho <lho@apm.com>
8  */
9 
10 #include <linux/ctype.h>
11 #include <linux/edac.h>
12 #include <linux/interrupt.h>
13 #include <linux/mfd/syscon.h>
14 #include <linux/module.h>
15 #include <linux/of.h>
16 #include <linux/of_address.h>
17 #include <linux/regmap.h>
18 #include <linux/string_choices.h>
19 
20 #include "edac_module.h"
21 
22 #define EDAC_MOD_STR			"xgene_edac"
23 
24 /* Global error configuration status registers (CSR) */
25 #define PCPHPERRINTSTS			0x0000
26 #define PCPHPERRINTMSK			0x0004
27 #define  MCU_CTL_ERR_MASK		BIT(12)
28 #define  IOB_PA_ERR_MASK		BIT(11)
29 #define  IOB_BA_ERR_MASK		BIT(10)
30 #define  IOB_XGIC_ERR_MASK		BIT(9)
31 #define  IOB_RB_ERR_MASK		BIT(8)
32 #define  L3C_UNCORR_ERR_MASK		BIT(5)
33 #define  MCU_UNCORR_ERR_MASK		BIT(4)
34 #define  PMD3_MERR_MASK			BIT(3)
35 #define  PMD2_MERR_MASK			BIT(2)
36 #define  PMD1_MERR_MASK			BIT(1)
37 #define  PMD0_MERR_MASK			BIT(0)
38 #define PCPLPERRINTSTS			0x0008
39 #define PCPLPERRINTMSK			0x000C
40 #define  CSW_SWITCH_TRACE_ERR_MASK	BIT(2)
41 #define  L3C_CORR_ERR_MASK		BIT(1)
42 #define  MCU_CORR_ERR_MASK		BIT(0)
43 #define MEMERRINTSTS			0x0010
44 #define MEMERRINTMSK			0x0014
45 
46 struct xgene_edac {
47 	struct device		*dev;
48 	struct regmap		*csw_map;
49 	struct regmap		*mcba_map;
50 	struct regmap		*mcbb_map;
51 	struct regmap		*efuse_map;
52 	struct regmap		*rb_map;
53 	void __iomem		*pcp_csr;
54 	spinlock_t		lock;
55 	struct dentry           *dfs;
56 
57 	struct list_head	mcus;
58 	struct list_head	pmds;
59 	struct list_head	l3s;
60 	struct list_head	socs;
61 
62 	struct mutex		mc_lock;
63 	int			mc_active_mask;
64 	int			mc_registered_mask;
65 };
66 
xgene_edac_pcp_rd(struct xgene_edac * edac,u32 reg,u32 * val)67 static void xgene_edac_pcp_rd(struct xgene_edac *edac, u32 reg, u32 *val)
68 {
69 	*val = readl(edac->pcp_csr + reg);
70 }
71 
xgene_edac_pcp_clrbits(struct xgene_edac * edac,u32 reg,u32 bits_mask)72 static void xgene_edac_pcp_clrbits(struct xgene_edac *edac, u32 reg,
73 				   u32 bits_mask)
74 {
75 	u32 val;
76 
77 	spin_lock(&edac->lock);
78 	val = readl(edac->pcp_csr + reg);
79 	val &= ~bits_mask;
80 	writel(val, edac->pcp_csr + reg);
81 	spin_unlock(&edac->lock);
82 }
83 
xgene_edac_pcp_setbits(struct xgene_edac * edac,u32 reg,u32 bits_mask)84 static void xgene_edac_pcp_setbits(struct xgene_edac *edac, u32 reg,
85 				   u32 bits_mask)
86 {
87 	u32 val;
88 
89 	spin_lock(&edac->lock);
90 	val = readl(edac->pcp_csr + reg);
91 	val |= bits_mask;
92 	writel(val, edac->pcp_csr + reg);
93 	spin_unlock(&edac->lock);
94 }
95 
96 /* Memory controller error CSR */
97 #define MCU_MAX_RANK			8
98 #define MCU_RANK_STRIDE			0x40
99 
100 #define MCUGECR				0x0110
101 #define  MCU_GECR_DEMANDUCINTREN_MASK	BIT(0)
102 #define  MCU_GECR_BACKUCINTREN_MASK	BIT(1)
103 #define  MCU_GECR_CINTREN_MASK		BIT(2)
104 #define  MUC_GECR_MCUADDRERREN_MASK	BIT(9)
105 #define MCUGESR				0x0114
106 #define  MCU_GESR_ADDRNOMATCH_ERR_MASK	BIT(7)
107 #define  MCU_GESR_ADDRMULTIMATCH_ERR_MASK	BIT(6)
108 #define  MCU_GESR_PHYP_ERR_MASK		BIT(3)
109 #define MCUESRR0			0x0314
110 #define  MCU_ESRR_MULTUCERR_MASK	BIT(3)
111 #define  MCU_ESRR_BACKUCERR_MASK	BIT(2)
112 #define  MCU_ESRR_DEMANDUCERR_MASK	BIT(1)
113 #define  MCU_ESRR_CERR_MASK		BIT(0)
114 #define MCUESRRA0			0x0318
115 #define MCUEBLRR0			0x031c
116 #define  MCU_EBLRR_ERRBANK_RD(src)	(((src) & 0x00000007) >> 0)
117 #define MCUERCRR0			0x0320
118 #define  MCU_ERCRR_ERRROW_RD(src)	(((src) & 0xFFFF0000) >> 16)
119 #define  MCU_ERCRR_ERRCOL_RD(src)	((src) & 0x00000FFF)
120 #define MCUSBECNT0			0x0324
121 #define MCU_SBECNT_COUNT(src)		((src) & 0xFFFF)
122 
123 #define CSW_CSWCR			0x0000
124 #define  CSW_CSWCR_DUALMCB_MASK		BIT(0)
125 
126 #define MCBADDRMR			0x0000
127 #define  MCBADDRMR_MCU_INTLV_MODE_MASK	BIT(3)
128 #define  MCBADDRMR_DUALMCU_MODE_MASK	BIT(2)
129 #define  MCBADDRMR_MCB_INTLV_MODE_MASK	BIT(1)
130 #define  MCBADDRMR_ADDRESS_MODE_MASK	BIT(0)
131 
132 struct xgene_edac_mc_ctx {
133 	struct list_head	next;
134 	char			*name;
135 	struct mem_ctl_info	*mci;
136 	struct xgene_edac	*edac;
137 	void __iomem		*mcu_csr;
138 	u32			mcu_id;
139 };
140 
xgene_edac_mc_err_inject_write(struct file * file,const char __user * data,size_t count,loff_t * ppos)141 static ssize_t xgene_edac_mc_err_inject_write(struct file *file,
142 					      const char __user *data,
143 					      size_t count, loff_t *ppos)
144 {
145 	struct mem_ctl_info *mci = file->private_data;
146 	struct xgene_edac_mc_ctx *ctx = mci->pvt_info;
147 	int i;
148 
149 	for (i = 0; i < MCU_MAX_RANK; i++) {
150 		writel(MCU_ESRR_MULTUCERR_MASK | MCU_ESRR_BACKUCERR_MASK |
151 		       MCU_ESRR_DEMANDUCERR_MASK | MCU_ESRR_CERR_MASK,
152 		       ctx->mcu_csr + MCUESRRA0 + i * MCU_RANK_STRIDE);
153 	}
154 	return count;
155 }
156 
157 static const struct file_operations xgene_edac_mc_debug_inject_fops = {
158 	.open = simple_open,
159 	.write = xgene_edac_mc_err_inject_write,
160 	.llseek = generic_file_llseek,
161 };
162 
xgene_edac_mc_create_debugfs_node(struct mem_ctl_info * mci)163 static void xgene_edac_mc_create_debugfs_node(struct mem_ctl_info *mci)
164 {
165 	if (!IS_ENABLED(CONFIG_EDAC_DEBUG))
166 		return;
167 
168 	if (!mci->debugfs)
169 		return;
170 
171 	edac_debugfs_create_file("inject_ctrl", S_IWUSR, mci->debugfs, mci,
172 				 &xgene_edac_mc_debug_inject_fops);
173 }
174 
xgene_edac_mc_check(struct mem_ctl_info * mci)175 static void xgene_edac_mc_check(struct mem_ctl_info *mci)
176 {
177 	struct xgene_edac_mc_ctx *ctx = mci->pvt_info;
178 	unsigned int pcp_hp_stat;
179 	unsigned int pcp_lp_stat;
180 	u32 reg;
181 	u32 rank;
182 	u32 bank;
183 	u32 count;
184 	u32 col_row;
185 
186 	xgene_edac_pcp_rd(ctx->edac, PCPHPERRINTSTS, &pcp_hp_stat);
187 	xgene_edac_pcp_rd(ctx->edac, PCPLPERRINTSTS, &pcp_lp_stat);
188 	if (!((MCU_UNCORR_ERR_MASK & pcp_hp_stat) ||
189 	      (MCU_CTL_ERR_MASK & pcp_hp_stat) ||
190 	      (MCU_CORR_ERR_MASK & pcp_lp_stat)))
191 		return;
192 
193 	for (rank = 0; rank < MCU_MAX_RANK; rank++) {
194 		reg = readl(ctx->mcu_csr + MCUESRR0 + rank * MCU_RANK_STRIDE);
195 
196 		/* Detect uncorrectable memory error */
197 		if (reg & (MCU_ESRR_DEMANDUCERR_MASK |
198 			   MCU_ESRR_BACKUCERR_MASK)) {
199 			/* Detected uncorrectable memory error */
200 			edac_mc_chipset_printk(mci, KERN_ERR, "X-Gene",
201 				"MCU uncorrectable error at rank %d\n", rank);
202 
203 			edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
204 				1, 0, 0, 0, 0, 0, -1, mci->ctl_name, "");
205 		}
206 
207 		/* Detect correctable memory error */
208 		if (reg & MCU_ESRR_CERR_MASK) {
209 			bank = readl(ctx->mcu_csr + MCUEBLRR0 +
210 				     rank * MCU_RANK_STRIDE);
211 			col_row = readl(ctx->mcu_csr + MCUERCRR0 +
212 					rank * MCU_RANK_STRIDE);
213 			count = readl(ctx->mcu_csr + MCUSBECNT0 +
214 				      rank * MCU_RANK_STRIDE);
215 			edac_mc_chipset_printk(mci, KERN_WARNING, "X-Gene",
216 				"MCU correctable error at rank %d bank %d column %d row %d count %d\n",
217 				rank, MCU_EBLRR_ERRBANK_RD(bank),
218 				MCU_ERCRR_ERRCOL_RD(col_row),
219 				MCU_ERCRR_ERRROW_RD(col_row),
220 				MCU_SBECNT_COUNT(count));
221 
222 			edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
223 				1, 0, 0, 0, 0, 0, -1, mci->ctl_name, "");
224 		}
225 
226 		/* Clear all error registers */
227 		writel(0x0, ctx->mcu_csr + MCUEBLRR0 + rank * MCU_RANK_STRIDE);
228 		writel(0x0, ctx->mcu_csr + MCUERCRR0 + rank * MCU_RANK_STRIDE);
229 		writel(0x0, ctx->mcu_csr + MCUSBECNT0 +
230 		       rank * MCU_RANK_STRIDE);
231 		writel(reg, ctx->mcu_csr + MCUESRR0 + rank * MCU_RANK_STRIDE);
232 	}
233 
234 	/* Detect memory controller error */
235 	reg = readl(ctx->mcu_csr + MCUGESR);
236 	if (reg) {
237 		if (reg & MCU_GESR_ADDRNOMATCH_ERR_MASK)
238 			edac_mc_chipset_printk(mci, KERN_WARNING, "X-Gene",
239 				"MCU address miss-match error\n");
240 		if (reg & MCU_GESR_ADDRMULTIMATCH_ERR_MASK)
241 			edac_mc_chipset_printk(mci, KERN_WARNING, "X-Gene",
242 				"MCU address multi-match error\n");
243 
244 		writel(reg, ctx->mcu_csr + MCUGESR);
245 	}
246 }
247 
xgene_edac_mc_irq_ctl(struct mem_ctl_info * mci,bool enable)248 static void xgene_edac_mc_irq_ctl(struct mem_ctl_info *mci, bool enable)
249 {
250 	struct xgene_edac_mc_ctx *ctx = mci->pvt_info;
251 	unsigned int val;
252 
253 	if (edac_op_state != EDAC_OPSTATE_INT)
254 		return;
255 
256 	mutex_lock(&ctx->edac->mc_lock);
257 
258 	/*
259 	 * As there is only single bit for enable error and interrupt mask,
260 	 * we must only enable top level interrupt after all MCUs are
261 	 * registered. Otherwise, if there is an error and the corresponding
262 	 * MCU has not registered, the interrupt will never get cleared. To
263 	 * determine all MCU have registered, we will keep track of active
264 	 * MCUs and registered MCUs.
265 	 */
266 	if (enable) {
267 		/* Set registered MCU bit */
268 		ctx->edac->mc_registered_mask |= 1 << ctx->mcu_id;
269 
270 		/* Enable interrupt after all active MCU registered */
271 		if (ctx->edac->mc_registered_mask ==
272 		    ctx->edac->mc_active_mask) {
273 			/* Enable memory controller top level interrupt */
274 			xgene_edac_pcp_clrbits(ctx->edac, PCPHPERRINTMSK,
275 					       MCU_UNCORR_ERR_MASK |
276 					       MCU_CTL_ERR_MASK);
277 			xgene_edac_pcp_clrbits(ctx->edac, PCPLPERRINTMSK,
278 					       MCU_CORR_ERR_MASK);
279 		}
280 
281 		/* Enable MCU interrupt and error reporting */
282 		val = readl(ctx->mcu_csr + MCUGECR);
283 		val |= MCU_GECR_DEMANDUCINTREN_MASK |
284 		       MCU_GECR_BACKUCINTREN_MASK |
285 		       MCU_GECR_CINTREN_MASK |
286 		       MUC_GECR_MCUADDRERREN_MASK;
287 		writel(val, ctx->mcu_csr + MCUGECR);
288 	} else {
289 		/* Disable MCU interrupt */
290 		val = readl(ctx->mcu_csr + MCUGECR);
291 		val &= ~(MCU_GECR_DEMANDUCINTREN_MASK |
292 			 MCU_GECR_BACKUCINTREN_MASK |
293 			 MCU_GECR_CINTREN_MASK |
294 			 MUC_GECR_MCUADDRERREN_MASK);
295 		writel(val, ctx->mcu_csr + MCUGECR);
296 
297 		/* Disable memory controller top level interrupt */
298 		xgene_edac_pcp_setbits(ctx->edac, PCPHPERRINTMSK,
299 				       MCU_UNCORR_ERR_MASK | MCU_CTL_ERR_MASK);
300 		xgene_edac_pcp_setbits(ctx->edac, PCPLPERRINTMSK,
301 				       MCU_CORR_ERR_MASK);
302 
303 		/* Clear registered MCU bit */
304 		ctx->edac->mc_registered_mask &= ~(1 << ctx->mcu_id);
305 	}
306 
307 	mutex_unlock(&ctx->edac->mc_lock);
308 }
309 
xgene_edac_mc_is_active(struct xgene_edac_mc_ctx * ctx,int mc_idx)310 static int xgene_edac_mc_is_active(struct xgene_edac_mc_ctx *ctx, int mc_idx)
311 {
312 	unsigned int reg;
313 	u32 mcu_mask;
314 
315 	if (regmap_read(ctx->edac->csw_map, CSW_CSWCR, &reg))
316 		return 0;
317 
318 	if (reg & CSW_CSWCR_DUALMCB_MASK) {
319 		/*
320 		 * Dual MCB active - Determine if all 4 active or just MCU0
321 		 * and MCU2 active
322 		 */
323 		if (regmap_read(ctx->edac->mcbb_map, MCBADDRMR, &reg))
324 			return 0;
325 		mcu_mask = (reg & MCBADDRMR_DUALMCU_MODE_MASK) ? 0xF : 0x5;
326 	} else {
327 		/*
328 		 * Single MCB active - Determine if MCU0/MCU1 or just MCU0
329 		 * active
330 		 */
331 		if (regmap_read(ctx->edac->mcba_map, MCBADDRMR, &reg))
332 			return 0;
333 		mcu_mask = (reg & MCBADDRMR_DUALMCU_MODE_MASK) ? 0x3 : 0x1;
334 	}
335 
336 	/* Save active MC mask if hasn't set already */
337 	if (!ctx->edac->mc_active_mask)
338 		ctx->edac->mc_active_mask = mcu_mask;
339 
340 	return (mcu_mask & (1 << mc_idx)) ? 1 : 0;
341 }
342 
xgene_edac_mc_add(struct xgene_edac * edac,struct device_node * np)343 static int xgene_edac_mc_add(struct xgene_edac *edac, struct device_node *np)
344 {
345 	struct mem_ctl_info *mci;
346 	struct edac_mc_layer layers[2];
347 	struct xgene_edac_mc_ctx tmp_ctx;
348 	struct xgene_edac_mc_ctx *ctx;
349 	struct resource res;
350 	int rc;
351 
352 	memset(&tmp_ctx, 0, sizeof(tmp_ctx));
353 	tmp_ctx.edac = edac;
354 
355 	if (!devres_open_group(edac->dev, xgene_edac_mc_add, GFP_KERNEL))
356 		return -ENOMEM;
357 
358 	rc = of_address_to_resource(np, 0, &res);
359 	if (rc < 0) {
360 		dev_err(edac->dev, "no MCU resource address\n");
361 		goto err_group;
362 	}
363 	tmp_ctx.mcu_csr = devm_ioremap_resource(edac->dev, &res);
364 	if (IS_ERR(tmp_ctx.mcu_csr)) {
365 		dev_err(edac->dev, "unable to map MCU resource\n");
366 		rc = PTR_ERR(tmp_ctx.mcu_csr);
367 		goto err_group;
368 	}
369 
370 	/* Ignore non-active MCU */
371 	if (of_property_read_u32(np, "memory-controller", &tmp_ctx.mcu_id)) {
372 		dev_err(edac->dev, "no memory-controller property\n");
373 		rc = -ENODEV;
374 		goto err_group;
375 	}
376 	if (!xgene_edac_mc_is_active(&tmp_ctx, tmp_ctx.mcu_id)) {
377 		rc = -ENODEV;
378 		goto err_group;
379 	}
380 
381 	layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
382 	layers[0].size = 4;
383 	layers[0].is_virt_csrow = true;
384 	layers[1].type = EDAC_MC_LAYER_CHANNEL;
385 	layers[1].size = 2;
386 	layers[1].is_virt_csrow = false;
387 	mci = edac_mc_alloc(tmp_ctx.mcu_id, ARRAY_SIZE(layers), layers,
388 			    sizeof(*ctx));
389 	if (!mci) {
390 		rc = -ENOMEM;
391 		goto err_group;
392 	}
393 
394 	ctx = mci->pvt_info;
395 	*ctx = tmp_ctx;		/* Copy over resource value */
396 	ctx->name = "xgene_edac_mc_err";
397 	ctx->mci = mci;
398 	mci->pdev = &mci->dev;
399 	mci->ctl_name = ctx->name;
400 	mci->dev_name = ctx->name;
401 
402 	mci->mtype_cap = MEM_FLAG_RDDR | MEM_FLAG_RDDR2 | MEM_FLAG_RDDR3 |
403 			 MEM_FLAG_DDR | MEM_FLAG_DDR2 | MEM_FLAG_DDR3;
404 	mci->edac_ctl_cap = EDAC_FLAG_SECDED;
405 	mci->edac_cap = EDAC_FLAG_SECDED;
406 	mci->mod_name = EDAC_MOD_STR;
407 	mci->ctl_page_to_phys = NULL;
408 	mci->scrub_cap = SCRUB_FLAG_HW_SRC;
409 	mci->scrub_mode = SCRUB_HW_SRC;
410 
411 	if (edac_op_state == EDAC_OPSTATE_POLL)
412 		mci->edac_check = xgene_edac_mc_check;
413 
414 	if (edac_mc_add_mc(mci)) {
415 		dev_err(edac->dev, "edac_mc_add_mc failed\n");
416 		rc = -EINVAL;
417 		goto err_free;
418 	}
419 
420 	xgene_edac_mc_create_debugfs_node(mci);
421 
422 	list_add(&ctx->next, &edac->mcus);
423 
424 	xgene_edac_mc_irq_ctl(mci, true);
425 
426 	devres_remove_group(edac->dev, xgene_edac_mc_add);
427 
428 	dev_info(edac->dev, "X-Gene EDAC MC registered\n");
429 	return 0;
430 
431 err_free:
432 	edac_mc_free(mci);
433 err_group:
434 	devres_release_group(edac->dev, xgene_edac_mc_add);
435 	return rc;
436 }
437 
xgene_edac_mc_remove(struct xgene_edac_mc_ctx * mcu)438 static int xgene_edac_mc_remove(struct xgene_edac_mc_ctx *mcu)
439 {
440 	xgene_edac_mc_irq_ctl(mcu->mci, false);
441 	edac_mc_del_mc(&mcu->mci->dev);
442 	edac_mc_free(mcu->mci);
443 	return 0;
444 }
445 
446 /* CPU L1/L2 error CSR */
447 #define MAX_CPU_PER_PMD				2
448 #define CPU_CSR_STRIDE				0x00100000
449 #define CPU_L2C_PAGE				0x000D0000
450 #define CPU_MEMERR_L2C_PAGE			0x000E0000
451 #define CPU_MEMERR_CPU_PAGE			0x000F0000
452 
453 #define MEMERR_CPU_ICFECR_PAGE_OFFSET		0x0000
454 #define MEMERR_CPU_ICFESR_PAGE_OFFSET		0x0004
455 #define  MEMERR_CPU_ICFESR_ERRWAY_RD(src)	(((src) & 0xFF000000) >> 24)
456 #define  MEMERR_CPU_ICFESR_ERRINDEX_RD(src)	(((src) & 0x003F0000) >> 16)
457 #define  MEMERR_CPU_ICFESR_ERRINFO_RD(src)	(((src) & 0x0000FF00) >> 8)
458 #define  MEMERR_CPU_ICFESR_ERRTYPE_RD(src)	(((src) & 0x00000070) >> 4)
459 #define  MEMERR_CPU_ICFESR_MULTCERR_MASK	BIT(2)
460 #define  MEMERR_CPU_ICFESR_CERR_MASK		BIT(0)
461 #define MEMERR_CPU_LSUESR_PAGE_OFFSET		0x000c
462 #define  MEMERR_CPU_LSUESR_ERRWAY_RD(src)	(((src) & 0xFF000000) >> 24)
463 #define  MEMERR_CPU_LSUESR_ERRINDEX_RD(src)	(((src) & 0x003F0000) >> 16)
464 #define  MEMERR_CPU_LSUESR_ERRINFO_RD(src)	(((src) & 0x0000FF00) >> 8)
465 #define  MEMERR_CPU_LSUESR_ERRTYPE_RD(src)	(((src) & 0x00000070) >> 4)
466 #define  MEMERR_CPU_LSUESR_MULTCERR_MASK	BIT(2)
467 #define  MEMERR_CPU_LSUESR_CERR_MASK		BIT(0)
468 #define MEMERR_CPU_LSUECR_PAGE_OFFSET		0x0008
469 #define MEMERR_CPU_MMUECR_PAGE_OFFSET		0x0010
470 #define MEMERR_CPU_MMUESR_PAGE_OFFSET		0x0014
471 #define  MEMERR_CPU_MMUESR_ERRWAY_RD(src)	(((src) & 0xFF000000) >> 24)
472 #define  MEMERR_CPU_MMUESR_ERRINDEX_RD(src)	(((src) & 0x007F0000) >> 16)
473 #define  MEMERR_CPU_MMUESR_ERRINFO_RD(src)	(((src) & 0x0000FF00) >> 8)
474 #define  MEMERR_CPU_MMUESR_ERRREQSTR_LSU_MASK	BIT(7)
475 #define  MEMERR_CPU_MMUESR_ERRTYPE_RD(src)	(((src) & 0x00000070) >> 4)
476 #define  MEMERR_CPU_MMUESR_MULTCERR_MASK	BIT(2)
477 #define  MEMERR_CPU_MMUESR_CERR_MASK		BIT(0)
478 #define MEMERR_CPU_ICFESRA_PAGE_OFFSET		0x0804
479 #define MEMERR_CPU_LSUESRA_PAGE_OFFSET		0x080c
480 #define MEMERR_CPU_MMUESRA_PAGE_OFFSET		0x0814
481 
482 #define MEMERR_L2C_L2ECR_PAGE_OFFSET		0x0000
483 #define MEMERR_L2C_L2ESR_PAGE_OFFSET		0x0004
484 #define  MEMERR_L2C_L2ESR_ERRSYN_RD(src)	(((src) & 0xFF000000) >> 24)
485 #define  MEMERR_L2C_L2ESR_ERRWAY_RD(src)	(((src) & 0x00FC0000) >> 18)
486 #define  MEMERR_L2C_L2ESR_ERRCPU_RD(src)	(((src) & 0x00020000) >> 17)
487 #define  MEMERR_L2C_L2ESR_ERRGROUP_RD(src)	(((src) & 0x0000E000) >> 13)
488 #define  MEMERR_L2C_L2ESR_ERRACTION_RD(src)	(((src) & 0x00001C00) >> 10)
489 #define  MEMERR_L2C_L2ESR_ERRTYPE_RD(src)	(((src) & 0x00000300) >> 8)
490 #define  MEMERR_L2C_L2ESR_MULTUCERR_MASK	BIT(3)
491 #define  MEMERR_L2C_L2ESR_MULTICERR_MASK	BIT(2)
492 #define  MEMERR_L2C_L2ESR_UCERR_MASK		BIT(1)
493 #define  MEMERR_L2C_L2ESR_ERR_MASK		BIT(0)
494 #define MEMERR_L2C_L2EALR_PAGE_OFFSET		0x0008
495 #define CPUX_L2C_L2RTOCR_PAGE_OFFSET		0x0010
496 #define MEMERR_L2C_L2EAHR_PAGE_OFFSET		0x000c
497 #define CPUX_L2C_L2RTOSR_PAGE_OFFSET		0x0014
498 #define  MEMERR_L2C_L2RTOSR_MULTERR_MASK	BIT(1)
499 #define  MEMERR_L2C_L2RTOSR_ERR_MASK		BIT(0)
500 #define CPUX_L2C_L2RTOALR_PAGE_OFFSET		0x0018
501 #define CPUX_L2C_L2RTOAHR_PAGE_OFFSET		0x001c
502 #define MEMERR_L2C_L2ESRA_PAGE_OFFSET		0x0804
503 
504 /*
505  * Processor Module Domain (PMD) context - Context for a pair of processors.
506  * Each PMD consists of 2 CPUs and a shared L2 cache. Each CPU consists of
507  * its own L1 cache.
508  */
509 struct xgene_edac_pmd_ctx {
510 	struct list_head	next;
511 	struct device		ddev;
512 	char			*name;
513 	struct xgene_edac	*edac;
514 	struct edac_device_ctl_info *edac_dev;
515 	void __iomem		*pmd_csr;
516 	u32			pmd;
517 	int			version;
518 };
519 
xgene_edac_pmd_l1_check(struct edac_device_ctl_info * edac_dev,int cpu_idx)520 static void xgene_edac_pmd_l1_check(struct edac_device_ctl_info *edac_dev,
521 				    int cpu_idx)
522 {
523 	struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
524 	void __iomem *pg_f;
525 	u32 val;
526 
527 	pg_f = ctx->pmd_csr + cpu_idx * CPU_CSR_STRIDE + CPU_MEMERR_CPU_PAGE;
528 
529 	val = readl(pg_f + MEMERR_CPU_ICFESR_PAGE_OFFSET);
530 	if (!val)
531 		goto chk_lsu;
532 	dev_err(edac_dev->dev,
533 		"CPU%d L1 memory error ICF 0x%08X Way 0x%02X Index 0x%02X Info 0x%02X\n",
534 		ctx->pmd * MAX_CPU_PER_PMD + cpu_idx, val,
535 		MEMERR_CPU_ICFESR_ERRWAY_RD(val),
536 		MEMERR_CPU_ICFESR_ERRINDEX_RD(val),
537 		MEMERR_CPU_ICFESR_ERRINFO_RD(val));
538 	if (val & MEMERR_CPU_ICFESR_CERR_MASK)
539 		dev_err(edac_dev->dev, "One or more correctable error\n");
540 	if (val & MEMERR_CPU_ICFESR_MULTCERR_MASK)
541 		dev_err(edac_dev->dev, "Multiple correctable error\n");
542 	switch (MEMERR_CPU_ICFESR_ERRTYPE_RD(val)) {
543 	case 1:
544 		dev_err(edac_dev->dev, "L1 TLB multiple hit\n");
545 		break;
546 	case 2:
547 		dev_err(edac_dev->dev, "Way select multiple hit\n");
548 		break;
549 	case 3:
550 		dev_err(edac_dev->dev, "Physical tag parity error\n");
551 		break;
552 	case 4:
553 	case 5:
554 		dev_err(edac_dev->dev, "L1 data parity error\n");
555 		break;
556 	case 6:
557 		dev_err(edac_dev->dev, "L1 pre-decode parity error\n");
558 		break;
559 	}
560 
561 	/* Clear any HW errors */
562 	writel(val, pg_f + MEMERR_CPU_ICFESR_PAGE_OFFSET);
563 
564 	if (val & (MEMERR_CPU_ICFESR_CERR_MASK |
565 		   MEMERR_CPU_ICFESR_MULTCERR_MASK))
566 		edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
567 
568 chk_lsu:
569 	val = readl(pg_f + MEMERR_CPU_LSUESR_PAGE_OFFSET);
570 	if (!val)
571 		goto chk_mmu;
572 	dev_err(edac_dev->dev,
573 		"CPU%d memory error LSU 0x%08X Way 0x%02X Index 0x%02X Info 0x%02X\n",
574 		ctx->pmd * MAX_CPU_PER_PMD + cpu_idx, val,
575 		MEMERR_CPU_LSUESR_ERRWAY_RD(val),
576 		MEMERR_CPU_LSUESR_ERRINDEX_RD(val),
577 		MEMERR_CPU_LSUESR_ERRINFO_RD(val));
578 	if (val & MEMERR_CPU_LSUESR_CERR_MASK)
579 		dev_err(edac_dev->dev, "One or more correctable error\n");
580 	if (val & MEMERR_CPU_LSUESR_MULTCERR_MASK)
581 		dev_err(edac_dev->dev, "Multiple correctable error\n");
582 	switch (MEMERR_CPU_LSUESR_ERRTYPE_RD(val)) {
583 	case 0:
584 		dev_err(edac_dev->dev, "Load tag error\n");
585 		break;
586 	case 1:
587 		dev_err(edac_dev->dev, "Load data error\n");
588 		break;
589 	case 2:
590 		dev_err(edac_dev->dev, "WSL multihit error\n");
591 		break;
592 	case 3:
593 		dev_err(edac_dev->dev, "Store tag error\n");
594 		break;
595 	case 4:
596 		dev_err(edac_dev->dev,
597 			"DTB multihit from load pipeline error\n");
598 		break;
599 	case 5:
600 		dev_err(edac_dev->dev,
601 			"DTB multihit from store pipeline error\n");
602 		break;
603 	}
604 
605 	/* Clear any HW errors */
606 	writel(val, pg_f + MEMERR_CPU_LSUESR_PAGE_OFFSET);
607 
608 	if (val & (MEMERR_CPU_LSUESR_CERR_MASK |
609 		   MEMERR_CPU_LSUESR_MULTCERR_MASK))
610 		edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
611 
612 chk_mmu:
613 	val = readl(pg_f + MEMERR_CPU_MMUESR_PAGE_OFFSET);
614 	if (!val)
615 		return;
616 	dev_err(edac_dev->dev,
617 		"CPU%d memory error MMU 0x%08X Way 0x%02X Index 0x%02X Info 0x%02X %s\n",
618 		ctx->pmd * MAX_CPU_PER_PMD + cpu_idx, val,
619 		MEMERR_CPU_MMUESR_ERRWAY_RD(val),
620 		MEMERR_CPU_MMUESR_ERRINDEX_RD(val),
621 		MEMERR_CPU_MMUESR_ERRINFO_RD(val),
622 		val & MEMERR_CPU_MMUESR_ERRREQSTR_LSU_MASK ? "LSU" : "ICF");
623 	if (val & MEMERR_CPU_MMUESR_CERR_MASK)
624 		dev_err(edac_dev->dev, "One or more correctable error\n");
625 	if (val & MEMERR_CPU_MMUESR_MULTCERR_MASK)
626 		dev_err(edac_dev->dev, "Multiple correctable error\n");
627 	switch (MEMERR_CPU_MMUESR_ERRTYPE_RD(val)) {
628 	case 0:
629 		dev_err(edac_dev->dev, "Stage 1 UTB hit error\n");
630 		break;
631 	case 1:
632 		dev_err(edac_dev->dev, "Stage 1 UTB miss error\n");
633 		break;
634 	case 2:
635 		dev_err(edac_dev->dev, "Stage 1 UTB allocate error\n");
636 		break;
637 	case 3:
638 		dev_err(edac_dev->dev, "TMO operation single bank error\n");
639 		break;
640 	case 4:
641 		dev_err(edac_dev->dev, "Stage 2 UTB error\n");
642 		break;
643 	case 5:
644 		dev_err(edac_dev->dev, "Stage 2 UTB miss error\n");
645 		break;
646 	case 6:
647 		dev_err(edac_dev->dev, "Stage 2 UTB allocate error\n");
648 		break;
649 	case 7:
650 		dev_err(edac_dev->dev, "TMO operation multiple bank error\n");
651 		break;
652 	}
653 
654 	/* Clear any HW errors */
655 	writel(val, pg_f + MEMERR_CPU_MMUESR_PAGE_OFFSET);
656 
657 	edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
658 }
659 
xgene_edac_pmd_l2_check(struct edac_device_ctl_info * edac_dev)660 static void xgene_edac_pmd_l2_check(struct edac_device_ctl_info *edac_dev)
661 {
662 	struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
663 	void __iomem *pg_d;
664 	void __iomem *pg_e;
665 	u32 val_hi;
666 	u32 val_lo;
667 	u32 val;
668 
669 	/* Check L2 */
670 	pg_e = ctx->pmd_csr + CPU_MEMERR_L2C_PAGE;
671 	val = readl(pg_e + MEMERR_L2C_L2ESR_PAGE_OFFSET);
672 	if (!val)
673 		goto chk_l2c;
674 	val_lo = readl(pg_e + MEMERR_L2C_L2EALR_PAGE_OFFSET);
675 	val_hi = readl(pg_e + MEMERR_L2C_L2EAHR_PAGE_OFFSET);
676 	dev_err(edac_dev->dev,
677 		"PMD%d memory error L2C L2ESR 0x%08X @ 0x%08X.%08X\n",
678 		ctx->pmd, val, val_hi, val_lo);
679 	dev_err(edac_dev->dev,
680 		"ErrSyndrome 0x%02X ErrWay 0x%02X ErrCpu %d ErrGroup 0x%02X ErrAction 0x%02X\n",
681 		MEMERR_L2C_L2ESR_ERRSYN_RD(val),
682 		MEMERR_L2C_L2ESR_ERRWAY_RD(val),
683 		MEMERR_L2C_L2ESR_ERRCPU_RD(val),
684 		MEMERR_L2C_L2ESR_ERRGROUP_RD(val),
685 		MEMERR_L2C_L2ESR_ERRACTION_RD(val));
686 
687 	if (val & MEMERR_L2C_L2ESR_ERR_MASK)
688 		dev_err(edac_dev->dev, "One or more correctable error\n");
689 	if (val & MEMERR_L2C_L2ESR_MULTICERR_MASK)
690 		dev_err(edac_dev->dev, "Multiple correctable error\n");
691 	if (val & MEMERR_L2C_L2ESR_UCERR_MASK)
692 		dev_err(edac_dev->dev, "One or more uncorrectable error\n");
693 	if (val & MEMERR_L2C_L2ESR_MULTUCERR_MASK)
694 		dev_err(edac_dev->dev, "Multiple uncorrectable error\n");
695 
696 	switch (MEMERR_L2C_L2ESR_ERRTYPE_RD(val)) {
697 	case 0:
698 		dev_err(edac_dev->dev, "Outbound SDB parity error\n");
699 		break;
700 	case 1:
701 		dev_err(edac_dev->dev, "Inbound SDB parity error\n");
702 		break;
703 	case 2:
704 		dev_err(edac_dev->dev, "Tag ECC error\n");
705 		break;
706 	case 3:
707 		dev_err(edac_dev->dev, "Data ECC error\n");
708 		break;
709 	}
710 
711 	/* Clear any HW errors */
712 	writel(val, pg_e + MEMERR_L2C_L2ESR_PAGE_OFFSET);
713 
714 	if (val & (MEMERR_L2C_L2ESR_ERR_MASK |
715 		   MEMERR_L2C_L2ESR_MULTICERR_MASK))
716 		edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
717 	if (val & (MEMERR_L2C_L2ESR_UCERR_MASK |
718 		   MEMERR_L2C_L2ESR_MULTUCERR_MASK))
719 		edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
720 
721 chk_l2c:
722 	/* Check if any memory request timed out on L2 cache */
723 	pg_d = ctx->pmd_csr + CPU_L2C_PAGE;
724 	val = readl(pg_d + CPUX_L2C_L2RTOSR_PAGE_OFFSET);
725 	if (val) {
726 		val_lo = readl(pg_d + CPUX_L2C_L2RTOALR_PAGE_OFFSET);
727 		val_hi = readl(pg_d + CPUX_L2C_L2RTOAHR_PAGE_OFFSET);
728 		dev_err(edac_dev->dev,
729 			"PMD%d L2C error L2C RTOSR 0x%08X @ 0x%08X.%08X\n",
730 			ctx->pmd, val, val_hi, val_lo);
731 		writel(val, pg_d + CPUX_L2C_L2RTOSR_PAGE_OFFSET);
732 	}
733 }
734 
xgene_edac_pmd_check(struct edac_device_ctl_info * edac_dev)735 static void xgene_edac_pmd_check(struct edac_device_ctl_info *edac_dev)
736 {
737 	struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
738 	unsigned int pcp_hp_stat;
739 	int i;
740 
741 	xgene_edac_pcp_rd(ctx->edac, PCPHPERRINTSTS, &pcp_hp_stat);
742 	if (!((PMD0_MERR_MASK << ctx->pmd) & pcp_hp_stat))
743 		return;
744 
745 	/* Check CPU L1 error */
746 	for (i = 0; i < MAX_CPU_PER_PMD; i++)
747 		xgene_edac_pmd_l1_check(edac_dev, i);
748 
749 	/* Check CPU L2 error */
750 	xgene_edac_pmd_l2_check(edac_dev);
751 }
752 
xgene_edac_pmd_cpu_hw_cfg(struct edac_device_ctl_info * edac_dev,int cpu)753 static void xgene_edac_pmd_cpu_hw_cfg(struct edac_device_ctl_info *edac_dev,
754 				      int cpu)
755 {
756 	struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
757 	void __iomem *pg_f = ctx->pmd_csr + cpu * CPU_CSR_STRIDE +
758 			     CPU_MEMERR_CPU_PAGE;
759 
760 	/*
761 	 * Enable CPU memory error:
762 	 *  MEMERR_CPU_ICFESRA, MEMERR_CPU_LSUESRA, and MEMERR_CPU_MMUESRA
763 	 */
764 	writel(0x00000301, pg_f + MEMERR_CPU_ICFECR_PAGE_OFFSET);
765 	writel(0x00000301, pg_f + MEMERR_CPU_LSUECR_PAGE_OFFSET);
766 	writel(0x00000101, pg_f + MEMERR_CPU_MMUECR_PAGE_OFFSET);
767 }
768 
xgene_edac_pmd_hw_cfg(struct edac_device_ctl_info * edac_dev)769 static void xgene_edac_pmd_hw_cfg(struct edac_device_ctl_info *edac_dev)
770 {
771 	struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
772 	void __iomem *pg_d = ctx->pmd_csr + CPU_L2C_PAGE;
773 	void __iomem *pg_e = ctx->pmd_csr + CPU_MEMERR_L2C_PAGE;
774 
775 	/* Enable PMD memory error - MEMERR_L2C_L2ECR and L2C_L2RTOCR */
776 	writel(0x00000703, pg_e + MEMERR_L2C_L2ECR_PAGE_OFFSET);
777 	/* Configure L2C HW request time out feature if supported */
778 	if (ctx->version > 1)
779 		writel(0x00000119, pg_d + CPUX_L2C_L2RTOCR_PAGE_OFFSET);
780 }
781 
xgene_edac_pmd_hw_ctl(struct edac_device_ctl_info * edac_dev,bool enable)782 static void xgene_edac_pmd_hw_ctl(struct edac_device_ctl_info *edac_dev,
783 				  bool enable)
784 {
785 	struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
786 	int i;
787 
788 	/* Enable PMD error interrupt */
789 	if (edac_dev->op_state == OP_RUNNING_INTERRUPT) {
790 		if (enable)
791 			xgene_edac_pcp_clrbits(ctx->edac, PCPHPERRINTMSK,
792 					       PMD0_MERR_MASK << ctx->pmd);
793 		else
794 			xgene_edac_pcp_setbits(ctx->edac, PCPHPERRINTMSK,
795 					       PMD0_MERR_MASK << ctx->pmd);
796 	}
797 
798 	if (enable) {
799 		xgene_edac_pmd_hw_cfg(edac_dev);
800 
801 		/* Two CPUs per a PMD */
802 		for (i = 0; i < MAX_CPU_PER_PMD; i++)
803 			xgene_edac_pmd_cpu_hw_cfg(edac_dev, i);
804 	}
805 }
806 
xgene_edac_pmd_l1_inject_ctrl_write(struct file * file,const char __user * data,size_t count,loff_t * ppos)807 static ssize_t xgene_edac_pmd_l1_inject_ctrl_write(struct file *file,
808 						   const char __user *data,
809 						   size_t count, loff_t *ppos)
810 {
811 	struct edac_device_ctl_info *edac_dev = file->private_data;
812 	struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
813 	void __iomem *cpux_pg_f;
814 	int i;
815 
816 	for (i = 0; i < MAX_CPU_PER_PMD; i++) {
817 		cpux_pg_f = ctx->pmd_csr + i * CPU_CSR_STRIDE +
818 			    CPU_MEMERR_CPU_PAGE;
819 
820 		writel(MEMERR_CPU_ICFESR_MULTCERR_MASK |
821 		       MEMERR_CPU_ICFESR_CERR_MASK,
822 		       cpux_pg_f + MEMERR_CPU_ICFESRA_PAGE_OFFSET);
823 		writel(MEMERR_CPU_LSUESR_MULTCERR_MASK |
824 		       MEMERR_CPU_LSUESR_CERR_MASK,
825 		       cpux_pg_f + MEMERR_CPU_LSUESRA_PAGE_OFFSET);
826 		writel(MEMERR_CPU_MMUESR_MULTCERR_MASK |
827 		       MEMERR_CPU_MMUESR_CERR_MASK,
828 		       cpux_pg_f + MEMERR_CPU_MMUESRA_PAGE_OFFSET);
829 	}
830 	return count;
831 }
832 
xgene_edac_pmd_l2_inject_ctrl_write(struct file * file,const char __user * data,size_t count,loff_t * ppos)833 static ssize_t xgene_edac_pmd_l2_inject_ctrl_write(struct file *file,
834 						   const char __user *data,
835 						   size_t count, loff_t *ppos)
836 {
837 	struct edac_device_ctl_info *edac_dev = file->private_data;
838 	struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
839 	void __iomem *pg_e = ctx->pmd_csr + CPU_MEMERR_L2C_PAGE;
840 
841 	writel(MEMERR_L2C_L2ESR_MULTUCERR_MASK |
842 	       MEMERR_L2C_L2ESR_MULTICERR_MASK |
843 	       MEMERR_L2C_L2ESR_UCERR_MASK |
844 	       MEMERR_L2C_L2ESR_ERR_MASK,
845 	       pg_e + MEMERR_L2C_L2ESRA_PAGE_OFFSET);
846 	return count;
847 }
848 
849 static const struct file_operations xgene_edac_pmd_debug_inject_fops[] = {
850 	{
851 	.open = simple_open,
852 	.write = xgene_edac_pmd_l1_inject_ctrl_write,
853 	.llseek = generic_file_llseek, },
854 	{
855 	.open = simple_open,
856 	.write = xgene_edac_pmd_l2_inject_ctrl_write,
857 	.llseek = generic_file_llseek, },
858 	{ }
859 };
860 
861 static void
xgene_edac_pmd_create_debugfs_nodes(struct edac_device_ctl_info * edac_dev)862 xgene_edac_pmd_create_debugfs_nodes(struct edac_device_ctl_info *edac_dev)
863 {
864 	struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
865 	struct dentry *dbgfs_dir;
866 	char name[10];
867 
868 	if (!IS_ENABLED(CONFIG_EDAC_DEBUG) || !ctx->edac->dfs)
869 		return;
870 
871 	snprintf(name, sizeof(name), "PMD%d", ctx->pmd);
872 	dbgfs_dir = edac_debugfs_create_dir_at(name, ctx->edac->dfs);
873 	if (!dbgfs_dir)
874 		return;
875 
876 	edac_debugfs_create_file("l1_inject_ctrl", S_IWUSR, dbgfs_dir, edac_dev,
877 				 &xgene_edac_pmd_debug_inject_fops[0]);
878 	edac_debugfs_create_file("l2_inject_ctrl", S_IWUSR, dbgfs_dir, edac_dev,
879 				 &xgene_edac_pmd_debug_inject_fops[1]);
880 }
881 
xgene_edac_pmd_available(u32 efuse,int pmd)882 static int xgene_edac_pmd_available(u32 efuse, int pmd)
883 {
884 	return (efuse & (1 << pmd)) ? 0 : 1;
885 }
886 
xgene_edac_pmd_add(struct xgene_edac * edac,struct device_node * np,int version)887 static int xgene_edac_pmd_add(struct xgene_edac *edac, struct device_node *np,
888 			      int version)
889 {
890 	struct edac_device_ctl_info *edac_dev;
891 	struct xgene_edac_pmd_ctx *ctx;
892 	struct resource res;
893 	char edac_name[10];
894 	u32 pmd;
895 	int rc;
896 	u32 val;
897 
898 	if (!devres_open_group(edac->dev, xgene_edac_pmd_add, GFP_KERNEL))
899 		return -ENOMEM;
900 
901 	/* Determine if this PMD is disabled */
902 	if (of_property_read_u32(np, "pmd-controller", &pmd)) {
903 		dev_err(edac->dev, "no pmd-controller property\n");
904 		rc = -ENODEV;
905 		goto err_group;
906 	}
907 	rc = regmap_read(edac->efuse_map, 0, &val);
908 	if (rc)
909 		goto err_group;
910 	if (!xgene_edac_pmd_available(val, pmd)) {
911 		rc = -ENODEV;
912 		goto err_group;
913 	}
914 
915 	snprintf(edac_name, sizeof(edac_name), "l2c%d", pmd);
916 	edac_dev = edac_device_alloc_ctl_info(sizeof(*ctx),
917 					      edac_name, 1, "l2c", 1, 2,
918 					      edac_device_alloc_index());
919 	if (!edac_dev) {
920 		rc = -ENOMEM;
921 		goto err_group;
922 	}
923 
924 	ctx = edac_dev->pvt_info;
925 	ctx->name = "xgene_pmd_err";
926 	ctx->pmd = pmd;
927 	ctx->edac = edac;
928 	ctx->edac_dev = edac_dev;
929 	ctx->ddev = *edac->dev;
930 	ctx->version = version;
931 	edac_dev->dev = &ctx->ddev;
932 	edac_dev->ctl_name = ctx->name;
933 	edac_dev->dev_name = ctx->name;
934 	edac_dev->mod_name = EDAC_MOD_STR;
935 
936 	rc = of_address_to_resource(np, 0, &res);
937 	if (rc < 0) {
938 		dev_err(edac->dev, "no PMD resource address\n");
939 		goto err_free;
940 	}
941 	ctx->pmd_csr = devm_ioremap_resource(edac->dev, &res);
942 	if (IS_ERR(ctx->pmd_csr)) {
943 		dev_err(edac->dev,
944 			"devm_ioremap_resource failed for PMD resource address\n");
945 		rc = PTR_ERR(ctx->pmd_csr);
946 		goto err_free;
947 	}
948 
949 	if (edac_op_state == EDAC_OPSTATE_POLL)
950 		edac_dev->edac_check = xgene_edac_pmd_check;
951 
952 	xgene_edac_pmd_create_debugfs_nodes(edac_dev);
953 
954 	rc = edac_device_add_device(edac_dev);
955 	if (rc > 0) {
956 		dev_err(edac->dev, "edac_device_add_device failed\n");
957 		rc = -ENOMEM;
958 		goto err_free;
959 	}
960 
961 	if (edac_op_state == EDAC_OPSTATE_INT)
962 		edac_dev->op_state = OP_RUNNING_INTERRUPT;
963 
964 	list_add(&ctx->next, &edac->pmds);
965 
966 	xgene_edac_pmd_hw_ctl(edac_dev, 1);
967 
968 	devres_remove_group(edac->dev, xgene_edac_pmd_add);
969 
970 	dev_info(edac->dev, "X-Gene EDAC PMD%d registered\n", ctx->pmd);
971 	return 0;
972 
973 err_free:
974 	edac_device_free_ctl_info(edac_dev);
975 err_group:
976 	devres_release_group(edac->dev, xgene_edac_pmd_add);
977 	return rc;
978 }
979 
xgene_edac_pmd_remove(struct xgene_edac_pmd_ctx * pmd)980 static int xgene_edac_pmd_remove(struct xgene_edac_pmd_ctx *pmd)
981 {
982 	struct edac_device_ctl_info *edac_dev = pmd->edac_dev;
983 
984 	xgene_edac_pmd_hw_ctl(edac_dev, 0);
985 	edac_device_del_device(edac_dev->dev);
986 	edac_device_free_ctl_info(edac_dev);
987 	return 0;
988 }
989 
990 /* L3 Error device */
991 #define L3C_ESR				(0x0A * 4)
992 #define  L3C_ESR_DATATAG_MASK		BIT(9)
993 #define  L3C_ESR_MULTIHIT_MASK		BIT(8)
994 #define  L3C_ESR_UCEVICT_MASK		BIT(6)
995 #define  L3C_ESR_MULTIUCERR_MASK	BIT(5)
996 #define  L3C_ESR_MULTICERR_MASK		BIT(4)
997 #define  L3C_ESR_UCERR_MASK		BIT(3)
998 #define  L3C_ESR_CERR_MASK		BIT(2)
999 #define  L3C_ESR_UCERRINTR_MASK		BIT(1)
1000 #define  L3C_ESR_CERRINTR_MASK		BIT(0)
1001 #define L3C_ECR				(0x0B * 4)
1002 #define  L3C_ECR_UCINTREN		BIT(3)
1003 #define  L3C_ECR_CINTREN		BIT(2)
1004 #define  L3C_UCERREN			BIT(1)
1005 #define  L3C_CERREN			BIT(0)
1006 #define L3C_ELR				(0x0C * 4)
1007 #define  L3C_ELR_ERRSYN(src)		((src & 0xFF800000) >> 23)
1008 #define  L3C_ELR_ERRWAY(src)		((src & 0x007E0000) >> 17)
1009 #define  L3C_ELR_AGENTID(src)		((src & 0x0001E000) >> 13)
1010 #define  L3C_ELR_ERRGRP(src)		((src & 0x00000F00) >> 8)
1011 #define  L3C_ELR_OPTYPE(src)		((src & 0x000000F0) >> 4)
1012 #define  L3C_ELR_PADDRHIGH(src)		(src & 0x0000000F)
1013 #define L3C_AELR			(0x0D * 4)
1014 #define L3C_BELR			(0x0E * 4)
1015 #define  L3C_BELR_BANK(src)		(src & 0x0000000F)
1016 
1017 struct xgene_edac_dev_ctx {
1018 	struct list_head	next;
1019 	struct device		ddev;
1020 	char			*name;
1021 	struct xgene_edac	*edac;
1022 	struct edac_device_ctl_info *edac_dev;
1023 	int			edac_idx;
1024 	void __iomem		*dev_csr;
1025 	int			version;
1026 };
1027 
1028 /*
1029  * Version 1 of the L3 controller has broken single bit correctable logic for
1030  * certain error syndromes. Log them as uncorrectable in that case.
1031  */
xgene_edac_l3_promote_to_uc_err(u32 l3cesr,u32 l3celr)1032 static bool xgene_edac_l3_promote_to_uc_err(u32 l3cesr, u32 l3celr)
1033 {
1034 	if (l3cesr & L3C_ESR_DATATAG_MASK) {
1035 		switch (L3C_ELR_ERRSYN(l3celr)) {
1036 		case 0x13C:
1037 		case 0x0B4:
1038 		case 0x007:
1039 		case 0x00D:
1040 		case 0x00E:
1041 		case 0x019:
1042 		case 0x01A:
1043 		case 0x01C:
1044 		case 0x04E:
1045 		case 0x041:
1046 			return true;
1047 		}
1048 	} else if (L3C_ELR_ERRWAY(l3celr) == 9)
1049 		return true;
1050 
1051 	return false;
1052 }
1053 
xgene_edac_l3_check(struct edac_device_ctl_info * edac_dev)1054 static void xgene_edac_l3_check(struct edac_device_ctl_info *edac_dev)
1055 {
1056 	struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1057 	u32 l3cesr;
1058 	u32 l3celr;
1059 	u32 l3caelr;
1060 	u32 l3cbelr;
1061 
1062 	l3cesr = readl(ctx->dev_csr + L3C_ESR);
1063 	if (!(l3cesr & (L3C_ESR_UCERR_MASK | L3C_ESR_CERR_MASK)))
1064 		return;
1065 
1066 	if (l3cesr & L3C_ESR_UCERR_MASK)
1067 		dev_err(edac_dev->dev, "L3C uncorrectable error\n");
1068 	if (l3cesr & L3C_ESR_CERR_MASK)
1069 		dev_warn(edac_dev->dev, "L3C correctable error\n");
1070 
1071 	l3celr = readl(ctx->dev_csr + L3C_ELR);
1072 	l3caelr = readl(ctx->dev_csr + L3C_AELR);
1073 	l3cbelr = readl(ctx->dev_csr + L3C_BELR);
1074 	if (l3cesr & L3C_ESR_MULTIHIT_MASK)
1075 		dev_err(edac_dev->dev, "L3C multiple hit error\n");
1076 	if (l3cesr & L3C_ESR_UCEVICT_MASK)
1077 		dev_err(edac_dev->dev,
1078 			"L3C dropped eviction of line with error\n");
1079 	if (l3cesr & L3C_ESR_MULTIUCERR_MASK)
1080 		dev_err(edac_dev->dev, "L3C multiple uncorrectable error\n");
1081 	if (l3cesr & L3C_ESR_DATATAG_MASK)
1082 		dev_err(edac_dev->dev,
1083 			"L3C data error syndrome 0x%X group 0x%X\n",
1084 			L3C_ELR_ERRSYN(l3celr), L3C_ELR_ERRGRP(l3celr));
1085 	else
1086 		dev_err(edac_dev->dev,
1087 			"L3C tag error syndrome 0x%X Way of Tag 0x%X Agent ID 0x%X Operation type 0x%X\n",
1088 			L3C_ELR_ERRSYN(l3celr), L3C_ELR_ERRWAY(l3celr),
1089 			L3C_ELR_AGENTID(l3celr), L3C_ELR_OPTYPE(l3celr));
1090 	/*
1091 	 * NOTE: Address [41:38] in L3C_ELR_PADDRHIGH(l3celr).
1092 	 *       Address [37:6] in l3caelr. Lower 6 bits are zero.
1093 	 */
1094 	dev_err(edac_dev->dev, "L3C error address 0x%08X.%08X bank %d\n",
1095 		L3C_ELR_PADDRHIGH(l3celr) << 6 | (l3caelr >> 26),
1096 		(l3caelr & 0x3FFFFFFF) << 6, L3C_BELR_BANK(l3cbelr));
1097 	dev_err(edac_dev->dev,
1098 		"L3C error status register value 0x%X\n", l3cesr);
1099 
1100 	/* Clear L3C error interrupt */
1101 	writel(0, ctx->dev_csr + L3C_ESR);
1102 
1103 	if (ctx->version <= 1 &&
1104 	    xgene_edac_l3_promote_to_uc_err(l3cesr, l3celr)) {
1105 		edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
1106 		return;
1107 	}
1108 	if (l3cesr & L3C_ESR_CERR_MASK)
1109 		edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
1110 	if (l3cesr & L3C_ESR_UCERR_MASK)
1111 		edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
1112 }
1113 
xgene_edac_l3_hw_init(struct edac_device_ctl_info * edac_dev,bool enable)1114 static void xgene_edac_l3_hw_init(struct edac_device_ctl_info *edac_dev,
1115 				  bool enable)
1116 {
1117 	struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1118 	u32 val;
1119 
1120 	val = readl(ctx->dev_csr + L3C_ECR);
1121 	val |= L3C_UCERREN | L3C_CERREN;
1122 	/* On disable, we just disable interrupt but keep error enabled */
1123 	if (edac_dev->op_state == OP_RUNNING_INTERRUPT) {
1124 		if (enable)
1125 			val |= L3C_ECR_UCINTREN | L3C_ECR_CINTREN;
1126 		else
1127 			val &= ~(L3C_ECR_UCINTREN | L3C_ECR_CINTREN);
1128 	}
1129 	writel(val, ctx->dev_csr + L3C_ECR);
1130 
1131 	if (edac_dev->op_state == OP_RUNNING_INTERRUPT) {
1132 		/* Enable/disable L3 error top level interrupt */
1133 		if (enable) {
1134 			xgene_edac_pcp_clrbits(ctx->edac, PCPHPERRINTMSK,
1135 					       L3C_UNCORR_ERR_MASK);
1136 			xgene_edac_pcp_clrbits(ctx->edac, PCPLPERRINTMSK,
1137 					       L3C_CORR_ERR_MASK);
1138 		} else {
1139 			xgene_edac_pcp_setbits(ctx->edac, PCPHPERRINTMSK,
1140 					       L3C_UNCORR_ERR_MASK);
1141 			xgene_edac_pcp_setbits(ctx->edac, PCPLPERRINTMSK,
1142 					       L3C_CORR_ERR_MASK);
1143 		}
1144 	}
1145 }
1146 
xgene_edac_l3_inject_ctrl_write(struct file * file,const char __user * data,size_t count,loff_t * ppos)1147 static ssize_t xgene_edac_l3_inject_ctrl_write(struct file *file,
1148 					       const char __user *data,
1149 					       size_t count, loff_t *ppos)
1150 {
1151 	struct edac_device_ctl_info *edac_dev = file->private_data;
1152 	struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1153 
1154 	/* Generate all errors */
1155 	writel(0xFFFFFFFF, ctx->dev_csr + L3C_ESR);
1156 	return count;
1157 }
1158 
1159 static const struct file_operations xgene_edac_l3_debug_inject_fops = {
1160 	.open = simple_open,
1161 	.write = xgene_edac_l3_inject_ctrl_write,
1162 	.llseek = generic_file_llseek
1163 };
1164 
1165 static void
xgene_edac_l3_create_debugfs_nodes(struct edac_device_ctl_info * edac_dev)1166 xgene_edac_l3_create_debugfs_nodes(struct edac_device_ctl_info *edac_dev)
1167 {
1168 	struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1169 	struct dentry *dbgfs_dir;
1170 	char name[10];
1171 
1172 	if (!IS_ENABLED(CONFIG_EDAC_DEBUG) || !ctx->edac->dfs)
1173 		return;
1174 
1175 	snprintf(name, sizeof(name), "l3c%d", ctx->edac_idx);
1176 	dbgfs_dir = edac_debugfs_create_dir_at(name, ctx->edac->dfs);
1177 	if (!dbgfs_dir)
1178 		return;
1179 
1180 	debugfs_create_file("l3_inject_ctrl", S_IWUSR, dbgfs_dir, edac_dev,
1181 			    &xgene_edac_l3_debug_inject_fops);
1182 }
1183 
xgene_edac_l3_add(struct xgene_edac * edac,struct device_node * np,int version)1184 static int xgene_edac_l3_add(struct xgene_edac *edac, struct device_node *np,
1185 			     int version)
1186 {
1187 	struct edac_device_ctl_info *edac_dev;
1188 	struct xgene_edac_dev_ctx *ctx;
1189 	struct resource res;
1190 	void __iomem *dev_csr;
1191 	int edac_idx;
1192 	int rc = 0;
1193 
1194 	if (!devres_open_group(edac->dev, xgene_edac_l3_add, GFP_KERNEL))
1195 		return -ENOMEM;
1196 
1197 	rc = of_address_to_resource(np, 0, &res);
1198 	if (rc < 0) {
1199 		dev_err(edac->dev, "no L3 resource address\n");
1200 		goto err_release_group;
1201 	}
1202 	dev_csr = devm_ioremap_resource(edac->dev, &res);
1203 	if (IS_ERR(dev_csr)) {
1204 		dev_err(edac->dev,
1205 			"devm_ioremap_resource failed for L3 resource address\n");
1206 		rc = PTR_ERR(dev_csr);
1207 		goto err_release_group;
1208 	}
1209 
1210 	edac_idx = edac_device_alloc_index();
1211 	edac_dev = edac_device_alloc_ctl_info(sizeof(*ctx),
1212 					      "l3c", 1, "l3c", 1, 0, edac_idx);
1213 	if (!edac_dev) {
1214 		rc = -ENOMEM;
1215 		goto err_release_group;
1216 	}
1217 
1218 	ctx = edac_dev->pvt_info;
1219 	ctx->dev_csr = dev_csr;
1220 	ctx->name = "xgene_l3_err";
1221 	ctx->edac_idx = edac_idx;
1222 	ctx->edac = edac;
1223 	ctx->edac_dev = edac_dev;
1224 	ctx->ddev = *edac->dev;
1225 	ctx->version = version;
1226 	edac_dev->dev = &ctx->ddev;
1227 	edac_dev->ctl_name = ctx->name;
1228 	edac_dev->dev_name = ctx->name;
1229 	edac_dev->mod_name = EDAC_MOD_STR;
1230 
1231 	if (edac_op_state == EDAC_OPSTATE_POLL)
1232 		edac_dev->edac_check = xgene_edac_l3_check;
1233 
1234 	xgene_edac_l3_create_debugfs_nodes(edac_dev);
1235 
1236 	rc = edac_device_add_device(edac_dev);
1237 	if (rc > 0) {
1238 		dev_err(edac->dev, "failed edac_device_add_device()\n");
1239 		rc = -ENOMEM;
1240 		goto err_ctl_free;
1241 	}
1242 
1243 	if (edac_op_state == EDAC_OPSTATE_INT)
1244 		edac_dev->op_state = OP_RUNNING_INTERRUPT;
1245 
1246 	list_add(&ctx->next, &edac->l3s);
1247 
1248 	xgene_edac_l3_hw_init(edac_dev, 1);
1249 
1250 	devres_remove_group(edac->dev, xgene_edac_l3_add);
1251 
1252 	dev_info(edac->dev, "X-Gene EDAC L3 registered\n");
1253 	return 0;
1254 
1255 err_ctl_free:
1256 	edac_device_free_ctl_info(edac_dev);
1257 err_release_group:
1258 	devres_release_group(edac->dev, xgene_edac_l3_add);
1259 	return rc;
1260 }
1261 
xgene_edac_l3_remove(struct xgene_edac_dev_ctx * l3)1262 static int xgene_edac_l3_remove(struct xgene_edac_dev_ctx *l3)
1263 {
1264 	struct edac_device_ctl_info *edac_dev = l3->edac_dev;
1265 
1266 	xgene_edac_l3_hw_init(edac_dev, 0);
1267 	edac_device_del_device(l3->edac->dev);
1268 	edac_device_free_ctl_info(edac_dev);
1269 	return 0;
1270 }
1271 
1272 /* SoC error device */
1273 #define IOBAXIS0TRANSERRINTSTS		0x0000
1274 #define  IOBAXIS0_M_ILLEGAL_ACCESS_MASK	BIT(1)
1275 #define  IOBAXIS0_ILLEGAL_ACCESS_MASK	BIT(0)
1276 #define IOBAXIS0TRANSERRINTMSK		0x0004
1277 #define IOBAXIS0TRANSERRREQINFOL	0x0008
1278 #define IOBAXIS0TRANSERRREQINFOH	0x000c
1279 #define  REQTYPE_RD(src)		(((src) & BIT(0)))
1280 #define  ERRADDRH_RD(src)		(((src) & 0xffc00000) >> 22)
1281 #define IOBAXIS1TRANSERRINTSTS		0x0010
1282 #define IOBAXIS1TRANSERRINTMSK		0x0014
1283 #define IOBAXIS1TRANSERRREQINFOL	0x0018
1284 #define IOBAXIS1TRANSERRREQINFOH	0x001c
1285 #define IOBPATRANSERRINTSTS		0x0020
1286 #define  IOBPA_M_REQIDRAM_CORRUPT_MASK	BIT(7)
1287 #define  IOBPA_REQIDRAM_CORRUPT_MASK	BIT(6)
1288 #define  IOBPA_M_TRANS_CORRUPT_MASK	BIT(5)
1289 #define  IOBPA_TRANS_CORRUPT_MASK	BIT(4)
1290 #define  IOBPA_M_WDATA_CORRUPT_MASK	BIT(3)
1291 #define  IOBPA_WDATA_CORRUPT_MASK	BIT(2)
1292 #define  IOBPA_M_RDATA_CORRUPT_MASK	BIT(1)
1293 #define  IOBPA_RDATA_CORRUPT_MASK	BIT(0)
1294 #define IOBBATRANSERRINTSTS		0x0030
1295 #define  M_ILLEGAL_ACCESS_MASK		BIT(15)
1296 #define  ILLEGAL_ACCESS_MASK		BIT(14)
1297 #define  M_WIDRAM_CORRUPT_MASK		BIT(13)
1298 #define  WIDRAM_CORRUPT_MASK		BIT(12)
1299 #define  M_RIDRAM_CORRUPT_MASK		BIT(11)
1300 #define  RIDRAM_CORRUPT_MASK		BIT(10)
1301 #define  M_TRANS_CORRUPT_MASK		BIT(9)
1302 #define  TRANS_CORRUPT_MASK		BIT(8)
1303 #define  M_WDATA_CORRUPT_MASK		BIT(7)
1304 #define  WDATA_CORRUPT_MASK		BIT(6)
1305 #define  M_RBM_POISONED_REQ_MASK	BIT(5)
1306 #define  RBM_POISONED_REQ_MASK		BIT(4)
1307 #define  M_XGIC_POISONED_REQ_MASK	BIT(3)
1308 #define  XGIC_POISONED_REQ_MASK		BIT(2)
1309 #define  M_WRERR_RESP_MASK		BIT(1)
1310 #define  WRERR_RESP_MASK		BIT(0)
1311 #define IOBBATRANSERRREQINFOL		0x0038
1312 #define IOBBATRANSERRREQINFOH		0x003c
1313 #define  REQTYPE_F2_RD(src)		((src) & BIT(0))
1314 #define  ERRADDRH_F2_RD(src)		(((src) & 0xffc00000) >> 22)
1315 #define IOBBATRANSERRCSWREQID		0x0040
1316 #define XGICTRANSERRINTSTS		0x0050
1317 #define  M_WR_ACCESS_ERR_MASK		BIT(3)
1318 #define  WR_ACCESS_ERR_MASK		BIT(2)
1319 #define  M_RD_ACCESS_ERR_MASK		BIT(1)
1320 #define  RD_ACCESS_ERR_MASK		BIT(0)
1321 #define XGICTRANSERRINTMSK		0x0054
1322 #define XGICTRANSERRREQINFO		0x0058
1323 #define  REQTYPE_MASK			BIT(26)
1324 #define  ERRADDR_RD(src)		((src) & 0x03ffffff)
1325 #define GLBL_ERR_STS			0x0800
1326 #define  MDED_ERR_MASK			BIT(3)
1327 #define  DED_ERR_MASK			BIT(2)
1328 #define  MSEC_ERR_MASK			BIT(1)
1329 #define  SEC_ERR_MASK			BIT(0)
1330 #define GLBL_SEC_ERRL			0x0810
1331 #define GLBL_SEC_ERRH			0x0818
1332 #define GLBL_MSEC_ERRL			0x0820
1333 #define GLBL_MSEC_ERRH			0x0828
1334 #define GLBL_DED_ERRL			0x0830
1335 #define GLBL_DED_ERRLMASK		0x0834
1336 #define GLBL_DED_ERRH			0x0838
1337 #define GLBL_DED_ERRHMASK		0x083c
1338 #define GLBL_MDED_ERRL			0x0840
1339 #define GLBL_MDED_ERRLMASK		0x0844
1340 #define GLBL_MDED_ERRH			0x0848
1341 #define GLBL_MDED_ERRHMASK		0x084c
1342 
1343 /* IO Bus Registers */
1344 #define RBCSR				0x0000
1345 #define STICKYERR_MASK			BIT(0)
1346 #define RBEIR				0x0008
1347 #define AGENT_OFFLINE_ERR_MASK		BIT(30)
1348 #define UNIMPL_RBPAGE_ERR_MASK		BIT(29)
1349 #define WORD_ALIGNED_ERR_MASK		BIT(28)
1350 #define PAGE_ACCESS_ERR_MASK		BIT(27)
1351 #define WRITE_ACCESS_MASK		BIT(26)
1352 
1353 static const char * const soc_mem_err_v1[] = {
1354 	"10GbE0",
1355 	"10GbE1",
1356 	"Security",
1357 	"SATA45",
1358 	"SATA23/ETH23",
1359 	"SATA01/ETH01",
1360 	"USB1",
1361 	"USB0",
1362 	"QML",
1363 	"QM0",
1364 	"QM1 (XGbE01)",
1365 	"PCIE4",
1366 	"PCIE3",
1367 	"PCIE2",
1368 	"PCIE1",
1369 	"PCIE0",
1370 	"CTX Manager",
1371 	"OCM",
1372 	"1GbE",
1373 	"CLE",
1374 	"AHBC",
1375 	"PktDMA",
1376 	"GFC",
1377 	"MSLIM",
1378 	"10GbE2",
1379 	"10GbE3",
1380 	"QM2 (XGbE23)",
1381 	"IOB",
1382 	"unknown",
1383 	"unknown",
1384 	"unknown",
1385 	"unknown",
1386 };
1387 
xgene_edac_iob_gic_report(struct edac_device_ctl_info * edac_dev)1388 static void xgene_edac_iob_gic_report(struct edac_device_ctl_info *edac_dev)
1389 {
1390 	struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1391 	u32 err_addr_lo;
1392 	u32 err_addr_hi;
1393 	u32 reg;
1394 	u32 info;
1395 
1396 	/* GIC transaction error interrupt */
1397 	reg = readl(ctx->dev_csr + XGICTRANSERRINTSTS);
1398 	if (!reg)
1399 		goto chk_iob_err;
1400 	dev_err(edac_dev->dev, "XGIC transaction error\n");
1401 	if (reg & RD_ACCESS_ERR_MASK)
1402 		dev_err(edac_dev->dev, "XGIC read size error\n");
1403 	if (reg & M_RD_ACCESS_ERR_MASK)
1404 		dev_err(edac_dev->dev, "Multiple XGIC read size error\n");
1405 	if (reg & WR_ACCESS_ERR_MASK)
1406 		dev_err(edac_dev->dev, "XGIC write size error\n");
1407 	if (reg & M_WR_ACCESS_ERR_MASK)
1408 		dev_err(edac_dev->dev, "Multiple XGIC write size error\n");
1409 	info = readl(ctx->dev_csr + XGICTRANSERRREQINFO);
1410 	dev_err(edac_dev->dev, "XGIC %s access @ 0x%08X (0x%08X)\n",
1411 		str_read_write(info & REQTYPE_MASK), ERRADDR_RD(info),
1412 		info);
1413 	writel(reg, ctx->dev_csr + XGICTRANSERRINTSTS);
1414 
1415 chk_iob_err:
1416 	/* IOB memory error */
1417 	reg = readl(ctx->dev_csr + GLBL_ERR_STS);
1418 	if (!reg)
1419 		return;
1420 	if (reg & SEC_ERR_MASK) {
1421 		err_addr_lo = readl(ctx->dev_csr + GLBL_SEC_ERRL);
1422 		err_addr_hi = readl(ctx->dev_csr + GLBL_SEC_ERRH);
1423 		dev_err(edac_dev->dev,
1424 			"IOB single-bit correctable memory at 0x%08X.%08X error\n",
1425 			err_addr_lo, err_addr_hi);
1426 		writel(err_addr_lo, ctx->dev_csr + GLBL_SEC_ERRL);
1427 		writel(err_addr_hi, ctx->dev_csr + GLBL_SEC_ERRH);
1428 	}
1429 	if (reg & MSEC_ERR_MASK) {
1430 		err_addr_lo = readl(ctx->dev_csr + GLBL_MSEC_ERRL);
1431 		err_addr_hi = readl(ctx->dev_csr + GLBL_MSEC_ERRH);
1432 		dev_err(edac_dev->dev,
1433 			"IOB multiple single-bit correctable memory at 0x%08X.%08X error\n",
1434 			err_addr_lo, err_addr_hi);
1435 		writel(err_addr_lo, ctx->dev_csr + GLBL_MSEC_ERRL);
1436 		writel(err_addr_hi, ctx->dev_csr + GLBL_MSEC_ERRH);
1437 	}
1438 	if (reg & (SEC_ERR_MASK | MSEC_ERR_MASK))
1439 		edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
1440 
1441 	if (reg & DED_ERR_MASK) {
1442 		err_addr_lo = readl(ctx->dev_csr + GLBL_DED_ERRL);
1443 		err_addr_hi = readl(ctx->dev_csr + GLBL_DED_ERRH);
1444 		dev_err(edac_dev->dev,
1445 			"IOB double-bit uncorrectable memory at 0x%08X.%08X error\n",
1446 			err_addr_lo, err_addr_hi);
1447 		writel(err_addr_lo, ctx->dev_csr + GLBL_DED_ERRL);
1448 		writel(err_addr_hi, ctx->dev_csr + GLBL_DED_ERRH);
1449 	}
1450 	if (reg & MDED_ERR_MASK) {
1451 		err_addr_lo = readl(ctx->dev_csr + GLBL_MDED_ERRL);
1452 		err_addr_hi = readl(ctx->dev_csr + GLBL_MDED_ERRH);
1453 		dev_err(edac_dev->dev,
1454 			"Multiple IOB double-bit uncorrectable memory at 0x%08X.%08X error\n",
1455 			err_addr_lo, err_addr_hi);
1456 		writel(err_addr_lo, ctx->dev_csr + GLBL_MDED_ERRL);
1457 		writel(err_addr_hi, ctx->dev_csr + GLBL_MDED_ERRH);
1458 	}
1459 	if (reg & (DED_ERR_MASK | MDED_ERR_MASK))
1460 		edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
1461 }
1462 
xgene_edac_rb_report(struct edac_device_ctl_info * edac_dev)1463 static void xgene_edac_rb_report(struct edac_device_ctl_info *edac_dev)
1464 {
1465 	struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1466 	u32 err_addr_lo;
1467 	u32 err_addr_hi;
1468 	u32 reg;
1469 
1470 	/* If the register bus resource isn't available, just skip it */
1471 	if (!ctx->edac->rb_map)
1472 		goto rb_skip;
1473 
1474 	/*
1475 	 * Check RB access errors
1476 	 * 1. Out of range
1477 	 * 2. Un-implemented page
1478 	 * 3. Un-aligned access
1479 	 * 4. Offline slave IP
1480 	 */
1481 	if (regmap_read(ctx->edac->rb_map, RBCSR, &reg))
1482 		return;
1483 	if (reg & STICKYERR_MASK) {
1484 		bool write;
1485 
1486 		dev_err(edac_dev->dev, "IOB bus access error(s)\n");
1487 		if (regmap_read(ctx->edac->rb_map, RBEIR, &reg))
1488 			return;
1489 		write = reg & WRITE_ACCESS_MASK ? 1 : 0;
1490 		if (reg & AGENT_OFFLINE_ERR_MASK)
1491 			dev_err(edac_dev->dev,
1492 				"IOB bus %s access to offline agent error\n",
1493 				str_write_read(write));
1494 		if (reg & UNIMPL_RBPAGE_ERR_MASK)
1495 			dev_err(edac_dev->dev,
1496 				"IOB bus %s access to unimplemented page error\n",
1497 				str_write_read(write));
1498 		if (reg & WORD_ALIGNED_ERR_MASK)
1499 			dev_err(edac_dev->dev,
1500 				"IOB bus %s word aligned access error\n",
1501 				str_write_read(write));
1502 		if (reg & PAGE_ACCESS_ERR_MASK)
1503 			dev_err(edac_dev->dev,
1504 				"IOB bus %s to page out of range access error\n",
1505 				str_write_read(write));
1506 		if (regmap_write(ctx->edac->rb_map, RBEIR, 0))
1507 			return;
1508 		if (regmap_write(ctx->edac->rb_map, RBCSR, 0))
1509 			return;
1510 	}
1511 rb_skip:
1512 
1513 	/* IOB Bridge agent transaction error interrupt */
1514 	reg = readl(ctx->dev_csr + IOBBATRANSERRINTSTS);
1515 	if (!reg)
1516 		return;
1517 
1518 	dev_err(edac_dev->dev, "IOB bridge agent (BA) transaction error\n");
1519 	if (reg & WRERR_RESP_MASK)
1520 		dev_err(edac_dev->dev, "IOB BA write response error\n");
1521 	if (reg & M_WRERR_RESP_MASK)
1522 		dev_err(edac_dev->dev,
1523 			"Multiple IOB BA write response error\n");
1524 	if (reg & XGIC_POISONED_REQ_MASK)
1525 		dev_err(edac_dev->dev, "IOB BA XGIC poisoned write error\n");
1526 	if (reg & M_XGIC_POISONED_REQ_MASK)
1527 		dev_err(edac_dev->dev,
1528 			"Multiple IOB BA XGIC poisoned write error\n");
1529 	if (reg & RBM_POISONED_REQ_MASK)
1530 		dev_err(edac_dev->dev, "IOB BA RBM poisoned write error\n");
1531 	if (reg & M_RBM_POISONED_REQ_MASK)
1532 		dev_err(edac_dev->dev,
1533 			"Multiple IOB BA RBM poisoned write error\n");
1534 	if (reg & WDATA_CORRUPT_MASK)
1535 		dev_err(edac_dev->dev, "IOB BA write error\n");
1536 	if (reg & M_WDATA_CORRUPT_MASK)
1537 		dev_err(edac_dev->dev, "Multiple IOB BA write error\n");
1538 	if (reg & TRANS_CORRUPT_MASK)
1539 		dev_err(edac_dev->dev, "IOB BA transaction error\n");
1540 	if (reg & M_TRANS_CORRUPT_MASK)
1541 		dev_err(edac_dev->dev, "Multiple IOB BA transaction error\n");
1542 	if (reg & RIDRAM_CORRUPT_MASK)
1543 		dev_err(edac_dev->dev,
1544 			"IOB BA RDIDRAM read transaction ID error\n");
1545 	if (reg & M_RIDRAM_CORRUPT_MASK)
1546 		dev_err(edac_dev->dev,
1547 			"Multiple IOB BA RDIDRAM read transaction ID error\n");
1548 	if (reg & WIDRAM_CORRUPT_MASK)
1549 		dev_err(edac_dev->dev,
1550 			"IOB BA RDIDRAM write transaction ID error\n");
1551 	if (reg & M_WIDRAM_CORRUPT_MASK)
1552 		dev_err(edac_dev->dev,
1553 			"Multiple IOB BA RDIDRAM write transaction ID error\n");
1554 	if (reg & ILLEGAL_ACCESS_MASK)
1555 		dev_err(edac_dev->dev,
1556 			"IOB BA XGIC/RB illegal access error\n");
1557 	if (reg & M_ILLEGAL_ACCESS_MASK)
1558 		dev_err(edac_dev->dev,
1559 			"Multiple IOB BA XGIC/RB illegal access error\n");
1560 
1561 	err_addr_lo = readl(ctx->dev_csr + IOBBATRANSERRREQINFOL);
1562 	err_addr_hi = readl(ctx->dev_csr + IOBBATRANSERRREQINFOH);
1563 	dev_err(edac_dev->dev, "IOB BA %s access at 0x%02X.%08X (0x%08X)\n",
1564 		str_read_write(REQTYPE_F2_RD(err_addr_hi)),
1565 		ERRADDRH_F2_RD(err_addr_hi), err_addr_lo, err_addr_hi);
1566 	if (reg & WRERR_RESP_MASK)
1567 		dev_err(edac_dev->dev, "IOB BA requestor ID 0x%08X\n",
1568 			readl(ctx->dev_csr + IOBBATRANSERRCSWREQID));
1569 	writel(reg, ctx->dev_csr + IOBBATRANSERRINTSTS);
1570 }
1571 
xgene_edac_pa_report(struct edac_device_ctl_info * edac_dev)1572 static void xgene_edac_pa_report(struct edac_device_ctl_info *edac_dev)
1573 {
1574 	struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1575 	u32 err_addr_lo;
1576 	u32 err_addr_hi;
1577 	u32 reg;
1578 
1579 	/* IOB Processing agent transaction error interrupt */
1580 	reg = readl(ctx->dev_csr + IOBPATRANSERRINTSTS);
1581 	if (!reg)
1582 		goto chk_iob_axi0;
1583 	dev_err(edac_dev->dev, "IOB processing agent (PA) transaction error\n");
1584 	if (reg & IOBPA_RDATA_CORRUPT_MASK)
1585 		dev_err(edac_dev->dev, "IOB PA read data RAM error\n");
1586 	if (reg & IOBPA_M_RDATA_CORRUPT_MASK)
1587 		dev_err(edac_dev->dev,
1588 			"Multiple IOB PA read data RAM error\n");
1589 	if (reg & IOBPA_WDATA_CORRUPT_MASK)
1590 		dev_err(edac_dev->dev, "IOB PA write data RAM error\n");
1591 	if (reg & IOBPA_M_WDATA_CORRUPT_MASK)
1592 		dev_err(edac_dev->dev,
1593 			"Multiple IOB PA write data RAM error\n");
1594 	if (reg & IOBPA_TRANS_CORRUPT_MASK)
1595 		dev_err(edac_dev->dev, "IOB PA transaction error\n");
1596 	if (reg & IOBPA_M_TRANS_CORRUPT_MASK)
1597 		dev_err(edac_dev->dev, "Multiple IOB PA transaction error\n");
1598 	if (reg & IOBPA_REQIDRAM_CORRUPT_MASK)
1599 		dev_err(edac_dev->dev, "IOB PA transaction ID RAM error\n");
1600 	if (reg & IOBPA_M_REQIDRAM_CORRUPT_MASK)
1601 		dev_err(edac_dev->dev,
1602 			"Multiple IOB PA transaction ID RAM error\n");
1603 	writel(reg, ctx->dev_csr + IOBPATRANSERRINTSTS);
1604 
1605 chk_iob_axi0:
1606 	/* IOB AXI0 Error */
1607 	reg = readl(ctx->dev_csr + IOBAXIS0TRANSERRINTSTS);
1608 	if (!reg)
1609 		goto chk_iob_axi1;
1610 	err_addr_lo = readl(ctx->dev_csr + IOBAXIS0TRANSERRREQINFOL);
1611 	err_addr_hi = readl(ctx->dev_csr + IOBAXIS0TRANSERRREQINFOH);
1612 	dev_err(edac_dev->dev,
1613 		"%sAXI slave 0 illegal %s access @ 0x%02X.%08X (0x%08X)\n",
1614 		reg & IOBAXIS0_M_ILLEGAL_ACCESS_MASK ? "Multiple " : "",
1615 		str_read_write(REQTYPE_RD(err_addr_hi)),
1616 		ERRADDRH_RD(err_addr_hi), err_addr_lo, err_addr_hi);
1617 	writel(reg, ctx->dev_csr + IOBAXIS0TRANSERRINTSTS);
1618 
1619 chk_iob_axi1:
1620 	/* IOB AXI1 Error */
1621 	reg = readl(ctx->dev_csr + IOBAXIS1TRANSERRINTSTS);
1622 	if (!reg)
1623 		return;
1624 	err_addr_lo = readl(ctx->dev_csr + IOBAXIS1TRANSERRREQINFOL);
1625 	err_addr_hi = readl(ctx->dev_csr + IOBAXIS1TRANSERRREQINFOH);
1626 	dev_err(edac_dev->dev,
1627 		"%sAXI slave 1 illegal %s access @ 0x%02X.%08X (0x%08X)\n",
1628 		reg & IOBAXIS0_M_ILLEGAL_ACCESS_MASK ? "Multiple " : "",
1629 		str_read_write(REQTYPE_RD(err_addr_hi)),
1630 		ERRADDRH_RD(err_addr_hi), err_addr_lo, err_addr_hi);
1631 	writel(reg, ctx->dev_csr + IOBAXIS1TRANSERRINTSTS);
1632 }
1633 
xgene_edac_soc_check(struct edac_device_ctl_info * edac_dev)1634 static void xgene_edac_soc_check(struct edac_device_ctl_info *edac_dev)
1635 {
1636 	struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1637 	const char * const *soc_mem_err = NULL;
1638 	u32 pcp_hp_stat;
1639 	u32 pcp_lp_stat;
1640 	u32 reg;
1641 	int i;
1642 
1643 	xgene_edac_pcp_rd(ctx->edac, PCPHPERRINTSTS, &pcp_hp_stat);
1644 	xgene_edac_pcp_rd(ctx->edac, PCPLPERRINTSTS, &pcp_lp_stat);
1645 	xgene_edac_pcp_rd(ctx->edac, MEMERRINTSTS, &reg);
1646 	if (!((pcp_hp_stat & (IOB_PA_ERR_MASK | IOB_BA_ERR_MASK |
1647 			      IOB_XGIC_ERR_MASK | IOB_RB_ERR_MASK)) ||
1648 	      (pcp_lp_stat & CSW_SWITCH_TRACE_ERR_MASK) || reg))
1649 		return;
1650 
1651 	if (pcp_hp_stat & IOB_XGIC_ERR_MASK)
1652 		xgene_edac_iob_gic_report(edac_dev);
1653 
1654 	if (pcp_hp_stat & (IOB_RB_ERR_MASK | IOB_BA_ERR_MASK))
1655 		xgene_edac_rb_report(edac_dev);
1656 
1657 	if (pcp_hp_stat & IOB_PA_ERR_MASK)
1658 		xgene_edac_pa_report(edac_dev);
1659 
1660 	if (pcp_lp_stat & CSW_SWITCH_TRACE_ERR_MASK) {
1661 		dev_info(edac_dev->dev,
1662 			 "CSW switch trace correctable memory parity error\n");
1663 		edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
1664 	}
1665 
1666 	if (!reg)
1667 		return;
1668 	if (ctx->version == 1)
1669 		soc_mem_err = soc_mem_err_v1;
1670 	if (!soc_mem_err) {
1671 		dev_err(edac_dev->dev, "SoC memory parity error 0x%08X\n",
1672 			reg);
1673 		edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
1674 		return;
1675 	}
1676 	for (i = 0; i < 31; i++) {
1677 		if (reg & (1 << i)) {
1678 			dev_err(edac_dev->dev, "%s memory parity error\n",
1679 				soc_mem_err[i]);
1680 			edac_device_handle_ue(edac_dev, 0, 0,
1681 					      edac_dev->ctl_name);
1682 		}
1683 	}
1684 }
1685 
xgene_edac_soc_hw_init(struct edac_device_ctl_info * edac_dev,bool enable)1686 static void xgene_edac_soc_hw_init(struct edac_device_ctl_info *edac_dev,
1687 				   bool enable)
1688 {
1689 	struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1690 
1691 	/* Enable SoC IP error interrupt */
1692 	if (edac_dev->op_state == OP_RUNNING_INTERRUPT) {
1693 		if (enable) {
1694 			xgene_edac_pcp_clrbits(ctx->edac, PCPHPERRINTMSK,
1695 					       IOB_PA_ERR_MASK |
1696 					       IOB_BA_ERR_MASK |
1697 					       IOB_XGIC_ERR_MASK |
1698 					       IOB_RB_ERR_MASK);
1699 			xgene_edac_pcp_clrbits(ctx->edac, PCPLPERRINTMSK,
1700 					       CSW_SWITCH_TRACE_ERR_MASK);
1701 		} else {
1702 			xgene_edac_pcp_setbits(ctx->edac, PCPHPERRINTMSK,
1703 					       IOB_PA_ERR_MASK |
1704 					       IOB_BA_ERR_MASK |
1705 					       IOB_XGIC_ERR_MASK |
1706 					       IOB_RB_ERR_MASK);
1707 			xgene_edac_pcp_setbits(ctx->edac, PCPLPERRINTMSK,
1708 					       CSW_SWITCH_TRACE_ERR_MASK);
1709 		}
1710 
1711 		writel(enable ? 0x0 : 0xFFFFFFFF,
1712 		       ctx->dev_csr + IOBAXIS0TRANSERRINTMSK);
1713 		writel(enable ? 0x0 : 0xFFFFFFFF,
1714 		       ctx->dev_csr + IOBAXIS1TRANSERRINTMSK);
1715 		writel(enable ? 0x0 : 0xFFFFFFFF,
1716 		       ctx->dev_csr + XGICTRANSERRINTMSK);
1717 
1718 		xgene_edac_pcp_setbits(ctx->edac, MEMERRINTMSK,
1719 				       enable ? 0x0 : 0xFFFFFFFF);
1720 	}
1721 }
1722 
xgene_edac_soc_add(struct xgene_edac * edac,struct device_node * np,int version)1723 static int xgene_edac_soc_add(struct xgene_edac *edac, struct device_node *np,
1724 			      int version)
1725 {
1726 	struct edac_device_ctl_info *edac_dev;
1727 	struct xgene_edac_dev_ctx *ctx;
1728 	void __iomem *dev_csr;
1729 	struct resource res;
1730 	int edac_idx;
1731 	int rc;
1732 
1733 	if (!devres_open_group(edac->dev, xgene_edac_soc_add, GFP_KERNEL))
1734 		return -ENOMEM;
1735 
1736 	rc = of_address_to_resource(np, 0, &res);
1737 	if (rc < 0) {
1738 		dev_err(edac->dev, "no SoC resource address\n");
1739 		goto err_release_group;
1740 	}
1741 	dev_csr = devm_ioremap_resource(edac->dev, &res);
1742 	if (IS_ERR(dev_csr)) {
1743 		dev_err(edac->dev,
1744 			"devm_ioremap_resource failed for soc resource address\n");
1745 		rc = PTR_ERR(dev_csr);
1746 		goto err_release_group;
1747 	}
1748 
1749 	edac_idx = edac_device_alloc_index();
1750 	edac_dev = edac_device_alloc_ctl_info(sizeof(*ctx),
1751 					      "SOC", 1, "SOC", 1, 2, edac_idx);
1752 	if (!edac_dev) {
1753 		rc = -ENOMEM;
1754 		goto err_release_group;
1755 	}
1756 
1757 	ctx = edac_dev->pvt_info;
1758 	ctx->dev_csr = dev_csr;
1759 	ctx->name = "xgene_soc_err";
1760 	ctx->edac_idx = edac_idx;
1761 	ctx->edac = edac;
1762 	ctx->edac_dev = edac_dev;
1763 	ctx->ddev = *edac->dev;
1764 	ctx->version = version;
1765 	edac_dev->dev = &ctx->ddev;
1766 	edac_dev->ctl_name = ctx->name;
1767 	edac_dev->dev_name = ctx->name;
1768 	edac_dev->mod_name = EDAC_MOD_STR;
1769 
1770 	if (edac_op_state == EDAC_OPSTATE_POLL)
1771 		edac_dev->edac_check = xgene_edac_soc_check;
1772 
1773 	rc = edac_device_add_device(edac_dev);
1774 	if (rc > 0) {
1775 		dev_err(edac->dev, "failed edac_device_add_device()\n");
1776 		rc = -ENOMEM;
1777 		goto err_ctl_free;
1778 	}
1779 
1780 	if (edac_op_state == EDAC_OPSTATE_INT)
1781 		edac_dev->op_state = OP_RUNNING_INTERRUPT;
1782 
1783 	list_add(&ctx->next, &edac->socs);
1784 
1785 	xgene_edac_soc_hw_init(edac_dev, 1);
1786 
1787 	devres_remove_group(edac->dev, xgene_edac_soc_add);
1788 
1789 	dev_info(edac->dev, "X-Gene EDAC SoC registered\n");
1790 
1791 	return 0;
1792 
1793 err_ctl_free:
1794 	edac_device_free_ctl_info(edac_dev);
1795 err_release_group:
1796 	devres_release_group(edac->dev, xgene_edac_soc_add);
1797 	return rc;
1798 }
1799 
xgene_edac_soc_remove(struct xgene_edac_dev_ctx * soc)1800 static int xgene_edac_soc_remove(struct xgene_edac_dev_ctx *soc)
1801 {
1802 	struct edac_device_ctl_info *edac_dev = soc->edac_dev;
1803 
1804 	xgene_edac_soc_hw_init(edac_dev, 0);
1805 	edac_device_del_device(soc->edac->dev);
1806 	edac_device_free_ctl_info(edac_dev);
1807 	return 0;
1808 }
1809 
xgene_edac_isr(int irq,void * dev_id)1810 static irqreturn_t xgene_edac_isr(int irq, void *dev_id)
1811 {
1812 	struct xgene_edac *ctx = dev_id;
1813 	struct xgene_edac_pmd_ctx *pmd;
1814 	struct xgene_edac_dev_ctx *node;
1815 	unsigned int pcp_hp_stat;
1816 	unsigned int pcp_lp_stat;
1817 
1818 	xgene_edac_pcp_rd(ctx, PCPHPERRINTSTS, &pcp_hp_stat);
1819 	xgene_edac_pcp_rd(ctx, PCPLPERRINTSTS, &pcp_lp_stat);
1820 	if ((MCU_UNCORR_ERR_MASK & pcp_hp_stat) ||
1821 	    (MCU_CTL_ERR_MASK & pcp_hp_stat) ||
1822 	    (MCU_CORR_ERR_MASK & pcp_lp_stat)) {
1823 		struct xgene_edac_mc_ctx *mcu;
1824 
1825 		list_for_each_entry(mcu, &ctx->mcus, next)
1826 			xgene_edac_mc_check(mcu->mci);
1827 	}
1828 
1829 	list_for_each_entry(pmd, &ctx->pmds, next) {
1830 		if ((PMD0_MERR_MASK << pmd->pmd) & pcp_hp_stat)
1831 			xgene_edac_pmd_check(pmd->edac_dev);
1832 	}
1833 
1834 	list_for_each_entry(node, &ctx->l3s, next)
1835 		xgene_edac_l3_check(node->edac_dev);
1836 
1837 	list_for_each_entry(node, &ctx->socs, next)
1838 		xgene_edac_soc_check(node->edac_dev);
1839 
1840 	return IRQ_HANDLED;
1841 }
1842 
xgene_edac_probe(struct platform_device * pdev)1843 static int xgene_edac_probe(struct platform_device *pdev)
1844 {
1845 	struct xgene_edac *edac;
1846 	struct device_node *child;
1847 	struct resource *res;
1848 	int rc;
1849 
1850 	edac = devm_kzalloc(&pdev->dev, sizeof(*edac), GFP_KERNEL);
1851 	if (!edac)
1852 		return -ENOMEM;
1853 
1854 	edac->dev = &pdev->dev;
1855 	platform_set_drvdata(pdev, edac);
1856 	INIT_LIST_HEAD(&edac->mcus);
1857 	INIT_LIST_HEAD(&edac->pmds);
1858 	INIT_LIST_HEAD(&edac->l3s);
1859 	INIT_LIST_HEAD(&edac->socs);
1860 	spin_lock_init(&edac->lock);
1861 	mutex_init(&edac->mc_lock);
1862 
1863 	edac->csw_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
1864 							"regmap-csw");
1865 	if (IS_ERR(edac->csw_map)) {
1866 		dev_err(edac->dev, "unable to get syscon regmap csw\n");
1867 		rc = PTR_ERR(edac->csw_map);
1868 		goto out_err;
1869 	}
1870 
1871 	edac->mcba_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
1872 							 "regmap-mcba");
1873 	if (IS_ERR(edac->mcba_map)) {
1874 		dev_err(edac->dev, "unable to get syscon regmap mcba\n");
1875 		rc = PTR_ERR(edac->mcba_map);
1876 		goto out_err;
1877 	}
1878 
1879 	edac->mcbb_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
1880 							 "regmap-mcbb");
1881 	if (IS_ERR(edac->mcbb_map)) {
1882 		dev_err(edac->dev, "unable to get syscon regmap mcbb\n");
1883 		rc = PTR_ERR(edac->mcbb_map);
1884 		goto out_err;
1885 	}
1886 	edac->efuse_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
1887 							  "regmap-efuse");
1888 	if (IS_ERR(edac->efuse_map)) {
1889 		dev_err(edac->dev, "unable to get syscon regmap efuse\n");
1890 		rc = PTR_ERR(edac->efuse_map);
1891 		goto out_err;
1892 	}
1893 
1894 	/*
1895 	 * NOTE: The register bus resource is optional for compatibility
1896 	 * reason.
1897 	 */
1898 	edac->rb_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
1899 						       "regmap-rb");
1900 	if (IS_ERR(edac->rb_map)) {
1901 		dev_warn(edac->dev, "missing syscon regmap rb\n");
1902 		edac->rb_map = NULL;
1903 	}
1904 
1905 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
1906 	edac->pcp_csr = devm_ioremap_resource(&pdev->dev, res);
1907 	if (IS_ERR(edac->pcp_csr)) {
1908 		dev_err(&pdev->dev, "no PCP resource address\n");
1909 		rc = PTR_ERR(edac->pcp_csr);
1910 		goto out_err;
1911 	}
1912 
1913 	if (edac_op_state == EDAC_OPSTATE_INT) {
1914 		int irq;
1915 		int i;
1916 
1917 		for (i = 0; i < 3; i++) {
1918 			irq = platform_get_irq_optional(pdev, i);
1919 			if (irq < 0) {
1920 				dev_err(&pdev->dev, "No IRQ resource\n");
1921 				rc = irq;
1922 				goto out_err;
1923 			}
1924 			rc = devm_request_irq(&pdev->dev, irq,
1925 					      xgene_edac_isr, IRQF_SHARED,
1926 					      dev_name(&pdev->dev), edac);
1927 			if (rc) {
1928 				dev_err(&pdev->dev,
1929 					"Could not request IRQ %d\n", irq);
1930 				goto out_err;
1931 			}
1932 		}
1933 	}
1934 
1935 	edac->dfs = edac_debugfs_create_dir(pdev->dev.kobj.name);
1936 
1937 	for_each_child_of_node(pdev->dev.of_node, child) {
1938 		if (!of_device_is_available(child))
1939 			continue;
1940 		if (of_device_is_compatible(child, "apm,xgene-edac-mc"))
1941 			xgene_edac_mc_add(edac, child);
1942 		if (of_device_is_compatible(child, "apm,xgene-edac-pmd"))
1943 			xgene_edac_pmd_add(edac, child, 1);
1944 		if (of_device_is_compatible(child, "apm,xgene-edac-pmd-v2"))
1945 			xgene_edac_pmd_add(edac, child, 2);
1946 		if (of_device_is_compatible(child, "apm,xgene-edac-l3"))
1947 			xgene_edac_l3_add(edac, child, 1);
1948 		if (of_device_is_compatible(child, "apm,xgene-edac-l3-v2"))
1949 			xgene_edac_l3_add(edac, child, 2);
1950 		if (of_device_is_compatible(child, "apm,xgene-edac-soc"))
1951 			xgene_edac_soc_add(edac, child, 0);
1952 		if (of_device_is_compatible(child, "apm,xgene-edac-soc-v1"))
1953 			xgene_edac_soc_add(edac, child, 1);
1954 	}
1955 
1956 	return 0;
1957 
1958 out_err:
1959 	return rc;
1960 }
1961 
xgene_edac_remove(struct platform_device * pdev)1962 static void xgene_edac_remove(struct platform_device *pdev)
1963 {
1964 	struct xgene_edac *edac = dev_get_drvdata(&pdev->dev);
1965 	struct xgene_edac_mc_ctx *mcu;
1966 	struct xgene_edac_mc_ctx *temp_mcu;
1967 	struct xgene_edac_pmd_ctx *pmd;
1968 	struct xgene_edac_pmd_ctx *temp_pmd;
1969 	struct xgene_edac_dev_ctx *node;
1970 	struct xgene_edac_dev_ctx *temp_node;
1971 
1972 	list_for_each_entry_safe(mcu, temp_mcu, &edac->mcus, next)
1973 		xgene_edac_mc_remove(mcu);
1974 
1975 	list_for_each_entry_safe(pmd, temp_pmd, &edac->pmds, next)
1976 		xgene_edac_pmd_remove(pmd);
1977 
1978 	list_for_each_entry_safe(node, temp_node, &edac->l3s, next)
1979 		xgene_edac_l3_remove(node);
1980 
1981 	list_for_each_entry_safe(node, temp_node, &edac->socs, next)
1982 		xgene_edac_soc_remove(node);
1983 }
1984 
1985 static const struct of_device_id xgene_edac_of_match[] = {
1986 	{ .compatible = "apm,xgene-edac" },
1987 	{},
1988 };
1989 MODULE_DEVICE_TABLE(of, xgene_edac_of_match);
1990 
1991 static struct platform_driver xgene_edac_driver = {
1992 	.probe = xgene_edac_probe,
1993 	.remove = xgene_edac_remove,
1994 	.driver = {
1995 		.name = "xgene-edac",
1996 		.of_match_table = xgene_edac_of_match,
1997 	},
1998 };
1999 
xgene_edac_init(void)2000 static int __init xgene_edac_init(void)
2001 {
2002 	int rc;
2003 
2004 	if (ghes_get_devices())
2005 		return -EBUSY;
2006 
2007 	/* Make sure error reporting method is sane */
2008 	switch (edac_op_state) {
2009 	case EDAC_OPSTATE_POLL:
2010 	case EDAC_OPSTATE_INT:
2011 		break;
2012 	default:
2013 		edac_op_state = EDAC_OPSTATE_INT;
2014 		break;
2015 	}
2016 
2017 	rc = platform_driver_register(&xgene_edac_driver);
2018 	if (rc) {
2019 		edac_printk(KERN_ERR, EDAC_MOD_STR,
2020 			    "EDAC fails to register\n");
2021 		goto reg_failed;
2022 	}
2023 
2024 	return 0;
2025 
2026 reg_failed:
2027 	return rc;
2028 }
2029 module_init(xgene_edac_init);
2030 
xgene_edac_exit(void)2031 static void __exit xgene_edac_exit(void)
2032 {
2033 	platform_driver_unregister(&xgene_edac_driver);
2034 }
2035 module_exit(xgene_edac_exit);
2036 
2037 MODULE_LICENSE("GPL");
2038 MODULE_AUTHOR("Feng Kan <fkan@apm.com>");
2039 MODULE_DESCRIPTION("APM X-Gene EDAC driver");
2040 module_param(edac_op_state, int, 0444);
2041 MODULE_PARM_DESC(edac_op_state,
2042 		 "EDAC error reporting state: 0=Poll, 2=Interrupt");
2043