1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * APM X-Gene SoC EDAC (error detection and correction)
4 *
5 * Copyright (c) 2015, Applied Micro Circuits Corporation
6 * Author: Feng Kan <fkan@apm.com>
7 * Loc Ho <lho@apm.com>
8 */
9
10 #include <linux/ctype.h>
11 #include <linux/edac.h>
12 #include <linux/interrupt.h>
13 #include <linux/mfd/syscon.h>
14 #include <linux/module.h>
15 #include <linux/of.h>
16 #include <linux/of_address.h>
17 #include <linux/regmap.h>
18 #include <linux/string_choices.h>
19
20 #include "edac_module.h"
21
22 #define EDAC_MOD_STR "xgene_edac"
23
24 /* Global error configuration status registers (CSR) */
25 #define PCPHPERRINTSTS 0x0000
26 #define PCPHPERRINTMSK 0x0004
27 #define MCU_CTL_ERR_MASK BIT(12)
28 #define IOB_PA_ERR_MASK BIT(11)
29 #define IOB_BA_ERR_MASK BIT(10)
30 #define IOB_XGIC_ERR_MASK BIT(9)
31 #define IOB_RB_ERR_MASK BIT(8)
32 #define L3C_UNCORR_ERR_MASK BIT(5)
33 #define MCU_UNCORR_ERR_MASK BIT(4)
34 #define PMD3_MERR_MASK BIT(3)
35 #define PMD2_MERR_MASK BIT(2)
36 #define PMD1_MERR_MASK BIT(1)
37 #define PMD0_MERR_MASK BIT(0)
38 #define PCPLPERRINTSTS 0x0008
39 #define PCPLPERRINTMSK 0x000C
40 #define CSW_SWITCH_TRACE_ERR_MASK BIT(2)
41 #define L3C_CORR_ERR_MASK BIT(1)
42 #define MCU_CORR_ERR_MASK BIT(0)
43 #define MEMERRINTSTS 0x0010
44 #define MEMERRINTMSK 0x0014
45
46 struct xgene_edac {
47 struct device *dev;
48 struct regmap *csw_map;
49 struct regmap *mcba_map;
50 struct regmap *mcbb_map;
51 struct regmap *efuse_map;
52 struct regmap *rb_map;
53 void __iomem *pcp_csr;
54 spinlock_t lock;
55 struct dentry *dfs;
56
57 struct list_head mcus;
58 struct list_head pmds;
59 struct list_head l3s;
60 struct list_head socs;
61
62 struct mutex mc_lock;
63 int mc_active_mask;
64 int mc_registered_mask;
65 };
66
xgene_edac_pcp_rd(struct xgene_edac * edac,u32 reg,u32 * val)67 static void xgene_edac_pcp_rd(struct xgene_edac *edac, u32 reg, u32 *val)
68 {
69 *val = readl(edac->pcp_csr + reg);
70 }
71
xgene_edac_pcp_clrbits(struct xgene_edac * edac,u32 reg,u32 bits_mask)72 static void xgene_edac_pcp_clrbits(struct xgene_edac *edac, u32 reg,
73 u32 bits_mask)
74 {
75 u32 val;
76
77 spin_lock(&edac->lock);
78 val = readl(edac->pcp_csr + reg);
79 val &= ~bits_mask;
80 writel(val, edac->pcp_csr + reg);
81 spin_unlock(&edac->lock);
82 }
83
xgene_edac_pcp_setbits(struct xgene_edac * edac,u32 reg,u32 bits_mask)84 static void xgene_edac_pcp_setbits(struct xgene_edac *edac, u32 reg,
85 u32 bits_mask)
86 {
87 u32 val;
88
89 spin_lock(&edac->lock);
90 val = readl(edac->pcp_csr + reg);
91 val |= bits_mask;
92 writel(val, edac->pcp_csr + reg);
93 spin_unlock(&edac->lock);
94 }
95
96 /* Memory controller error CSR */
97 #define MCU_MAX_RANK 8
98 #define MCU_RANK_STRIDE 0x40
99
100 #define MCUGECR 0x0110
101 #define MCU_GECR_DEMANDUCINTREN_MASK BIT(0)
102 #define MCU_GECR_BACKUCINTREN_MASK BIT(1)
103 #define MCU_GECR_CINTREN_MASK BIT(2)
104 #define MUC_GECR_MCUADDRERREN_MASK BIT(9)
105 #define MCUGESR 0x0114
106 #define MCU_GESR_ADDRNOMATCH_ERR_MASK BIT(7)
107 #define MCU_GESR_ADDRMULTIMATCH_ERR_MASK BIT(6)
108 #define MCU_GESR_PHYP_ERR_MASK BIT(3)
109 #define MCUESRR0 0x0314
110 #define MCU_ESRR_MULTUCERR_MASK BIT(3)
111 #define MCU_ESRR_BACKUCERR_MASK BIT(2)
112 #define MCU_ESRR_DEMANDUCERR_MASK BIT(1)
113 #define MCU_ESRR_CERR_MASK BIT(0)
114 #define MCUESRRA0 0x0318
115 #define MCUEBLRR0 0x031c
116 #define MCU_EBLRR_ERRBANK_RD(src) (((src) & 0x00000007) >> 0)
117 #define MCUERCRR0 0x0320
118 #define MCU_ERCRR_ERRROW_RD(src) (((src) & 0xFFFF0000) >> 16)
119 #define MCU_ERCRR_ERRCOL_RD(src) ((src) & 0x00000FFF)
120 #define MCUSBECNT0 0x0324
121 #define MCU_SBECNT_COUNT(src) ((src) & 0xFFFF)
122
123 #define CSW_CSWCR 0x0000
124 #define CSW_CSWCR_DUALMCB_MASK BIT(0)
125
126 #define MCBADDRMR 0x0000
127 #define MCBADDRMR_MCU_INTLV_MODE_MASK BIT(3)
128 #define MCBADDRMR_DUALMCU_MODE_MASK BIT(2)
129 #define MCBADDRMR_MCB_INTLV_MODE_MASK BIT(1)
130 #define MCBADDRMR_ADDRESS_MODE_MASK BIT(0)
131
132 struct xgene_edac_mc_ctx {
133 struct list_head next;
134 char *name;
135 struct mem_ctl_info *mci;
136 struct xgene_edac *edac;
137 void __iomem *mcu_csr;
138 u32 mcu_id;
139 };
140
xgene_edac_mc_err_inject_write(struct file * file,const char __user * data,size_t count,loff_t * ppos)141 static ssize_t xgene_edac_mc_err_inject_write(struct file *file,
142 const char __user *data,
143 size_t count, loff_t *ppos)
144 {
145 struct mem_ctl_info *mci = file->private_data;
146 struct xgene_edac_mc_ctx *ctx = mci->pvt_info;
147 int i;
148
149 for (i = 0; i < MCU_MAX_RANK; i++) {
150 writel(MCU_ESRR_MULTUCERR_MASK | MCU_ESRR_BACKUCERR_MASK |
151 MCU_ESRR_DEMANDUCERR_MASK | MCU_ESRR_CERR_MASK,
152 ctx->mcu_csr + MCUESRRA0 + i * MCU_RANK_STRIDE);
153 }
154 return count;
155 }
156
157 static const struct file_operations xgene_edac_mc_debug_inject_fops = {
158 .open = simple_open,
159 .write = xgene_edac_mc_err_inject_write,
160 .llseek = generic_file_llseek,
161 };
162
xgene_edac_mc_create_debugfs_node(struct mem_ctl_info * mci)163 static void xgene_edac_mc_create_debugfs_node(struct mem_ctl_info *mci)
164 {
165 if (!IS_ENABLED(CONFIG_EDAC_DEBUG))
166 return;
167
168 if (!mci->debugfs)
169 return;
170
171 edac_debugfs_create_file("inject_ctrl", S_IWUSR, mci->debugfs, mci,
172 &xgene_edac_mc_debug_inject_fops);
173 }
174
xgene_edac_mc_check(struct mem_ctl_info * mci)175 static void xgene_edac_mc_check(struct mem_ctl_info *mci)
176 {
177 struct xgene_edac_mc_ctx *ctx = mci->pvt_info;
178 unsigned int pcp_hp_stat;
179 unsigned int pcp_lp_stat;
180 u32 reg;
181 u32 rank;
182 u32 bank;
183 u32 count;
184 u32 col_row;
185
186 xgene_edac_pcp_rd(ctx->edac, PCPHPERRINTSTS, &pcp_hp_stat);
187 xgene_edac_pcp_rd(ctx->edac, PCPLPERRINTSTS, &pcp_lp_stat);
188 if (!((MCU_UNCORR_ERR_MASK & pcp_hp_stat) ||
189 (MCU_CTL_ERR_MASK & pcp_hp_stat) ||
190 (MCU_CORR_ERR_MASK & pcp_lp_stat)))
191 return;
192
193 for (rank = 0; rank < MCU_MAX_RANK; rank++) {
194 reg = readl(ctx->mcu_csr + MCUESRR0 + rank * MCU_RANK_STRIDE);
195
196 /* Detect uncorrectable memory error */
197 if (reg & (MCU_ESRR_DEMANDUCERR_MASK |
198 MCU_ESRR_BACKUCERR_MASK)) {
199 /* Detected uncorrectable memory error */
200 edac_mc_chipset_printk(mci, KERN_ERR, "X-Gene",
201 "MCU uncorrectable error at rank %d\n", rank);
202
203 edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
204 1, 0, 0, 0, 0, 0, -1, mci->ctl_name, "");
205 }
206
207 /* Detect correctable memory error */
208 if (reg & MCU_ESRR_CERR_MASK) {
209 bank = readl(ctx->mcu_csr + MCUEBLRR0 +
210 rank * MCU_RANK_STRIDE);
211 col_row = readl(ctx->mcu_csr + MCUERCRR0 +
212 rank * MCU_RANK_STRIDE);
213 count = readl(ctx->mcu_csr + MCUSBECNT0 +
214 rank * MCU_RANK_STRIDE);
215 edac_mc_chipset_printk(mci, KERN_WARNING, "X-Gene",
216 "MCU correctable error at rank %d bank %d column %d row %d count %d\n",
217 rank, MCU_EBLRR_ERRBANK_RD(bank),
218 MCU_ERCRR_ERRCOL_RD(col_row),
219 MCU_ERCRR_ERRROW_RD(col_row),
220 MCU_SBECNT_COUNT(count));
221
222 edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
223 1, 0, 0, 0, 0, 0, -1, mci->ctl_name, "");
224 }
225
226 /* Clear all error registers */
227 writel(0x0, ctx->mcu_csr + MCUEBLRR0 + rank * MCU_RANK_STRIDE);
228 writel(0x0, ctx->mcu_csr + MCUERCRR0 + rank * MCU_RANK_STRIDE);
229 writel(0x0, ctx->mcu_csr + MCUSBECNT0 +
230 rank * MCU_RANK_STRIDE);
231 writel(reg, ctx->mcu_csr + MCUESRR0 + rank * MCU_RANK_STRIDE);
232 }
233
234 /* Detect memory controller error */
235 reg = readl(ctx->mcu_csr + MCUGESR);
236 if (reg) {
237 if (reg & MCU_GESR_ADDRNOMATCH_ERR_MASK)
238 edac_mc_chipset_printk(mci, KERN_WARNING, "X-Gene",
239 "MCU address miss-match error\n");
240 if (reg & MCU_GESR_ADDRMULTIMATCH_ERR_MASK)
241 edac_mc_chipset_printk(mci, KERN_WARNING, "X-Gene",
242 "MCU address multi-match error\n");
243
244 writel(reg, ctx->mcu_csr + MCUGESR);
245 }
246 }
247
xgene_edac_mc_irq_ctl(struct mem_ctl_info * mci,bool enable)248 static void xgene_edac_mc_irq_ctl(struct mem_ctl_info *mci, bool enable)
249 {
250 struct xgene_edac_mc_ctx *ctx = mci->pvt_info;
251 unsigned int val;
252
253 if (edac_op_state != EDAC_OPSTATE_INT)
254 return;
255
256 mutex_lock(&ctx->edac->mc_lock);
257
258 /*
259 * As there is only single bit for enable error and interrupt mask,
260 * we must only enable top level interrupt after all MCUs are
261 * registered. Otherwise, if there is an error and the corresponding
262 * MCU has not registered, the interrupt will never get cleared. To
263 * determine all MCU have registered, we will keep track of active
264 * MCUs and registered MCUs.
265 */
266 if (enable) {
267 /* Set registered MCU bit */
268 ctx->edac->mc_registered_mask |= 1 << ctx->mcu_id;
269
270 /* Enable interrupt after all active MCU registered */
271 if (ctx->edac->mc_registered_mask ==
272 ctx->edac->mc_active_mask) {
273 /* Enable memory controller top level interrupt */
274 xgene_edac_pcp_clrbits(ctx->edac, PCPHPERRINTMSK,
275 MCU_UNCORR_ERR_MASK |
276 MCU_CTL_ERR_MASK);
277 xgene_edac_pcp_clrbits(ctx->edac, PCPLPERRINTMSK,
278 MCU_CORR_ERR_MASK);
279 }
280
281 /* Enable MCU interrupt and error reporting */
282 val = readl(ctx->mcu_csr + MCUGECR);
283 val |= MCU_GECR_DEMANDUCINTREN_MASK |
284 MCU_GECR_BACKUCINTREN_MASK |
285 MCU_GECR_CINTREN_MASK |
286 MUC_GECR_MCUADDRERREN_MASK;
287 writel(val, ctx->mcu_csr + MCUGECR);
288 } else {
289 /* Disable MCU interrupt */
290 val = readl(ctx->mcu_csr + MCUGECR);
291 val &= ~(MCU_GECR_DEMANDUCINTREN_MASK |
292 MCU_GECR_BACKUCINTREN_MASK |
293 MCU_GECR_CINTREN_MASK |
294 MUC_GECR_MCUADDRERREN_MASK);
295 writel(val, ctx->mcu_csr + MCUGECR);
296
297 /* Disable memory controller top level interrupt */
298 xgene_edac_pcp_setbits(ctx->edac, PCPHPERRINTMSK,
299 MCU_UNCORR_ERR_MASK | MCU_CTL_ERR_MASK);
300 xgene_edac_pcp_setbits(ctx->edac, PCPLPERRINTMSK,
301 MCU_CORR_ERR_MASK);
302
303 /* Clear registered MCU bit */
304 ctx->edac->mc_registered_mask &= ~(1 << ctx->mcu_id);
305 }
306
307 mutex_unlock(&ctx->edac->mc_lock);
308 }
309
xgene_edac_mc_is_active(struct xgene_edac_mc_ctx * ctx,int mc_idx)310 static int xgene_edac_mc_is_active(struct xgene_edac_mc_ctx *ctx, int mc_idx)
311 {
312 unsigned int reg;
313 u32 mcu_mask;
314
315 if (regmap_read(ctx->edac->csw_map, CSW_CSWCR, ®))
316 return 0;
317
318 if (reg & CSW_CSWCR_DUALMCB_MASK) {
319 /*
320 * Dual MCB active - Determine if all 4 active or just MCU0
321 * and MCU2 active
322 */
323 if (regmap_read(ctx->edac->mcbb_map, MCBADDRMR, ®))
324 return 0;
325 mcu_mask = (reg & MCBADDRMR_DUALMCU_MODE_MASK) ? 0xF : 0x5;
326 } else {
327 /*
328 * Single MCB active - Determine if MCU0/MCU1 or just MCU0
329 * active
330 */
331 if (regmap_read(ctx->edac->mcba_map, MCBADDRMR, ®))
332 return 0;
333 mcu_mask = (reg & MCBADDRMR_DUALMCU_MODE_MASK) ? 0x3 : 0x1;
334 }
335
336 /* Save active MC mask if hasn't set already */
337 if (!ctx->edac->mc_active_mask)
338 ctx->edac->mc_active_mask = mcu_mask;
339
340 return (mcu_mask & (1 << mc_idx)) ? 1 : 0;
341 }
342
xgene_edac_mc_add(struct xgene_edac * edac,struct device_node * np)343 static int xgene_edac_mc_add(struct xgene_edac *edac, struct device_node *np)
344 {
345 struct mem_ctl_info *mci;
346 struct edac_mc_layer layers[2];
347 struct xgene_edac_mc_ctx tmp_ctx;
348 struct xgene_edac_mc_ctx *ctx;
349 struct resource res;
350 int rc;
351
352 memset(&tmp_ctx, 0, sizeof(tmp_ctx));
353 tmp_ctx.edac = edac;
354
355 if (!devres_open_group(edac->dev, xgene_edac_mc_add, GFP_KERNEL))
356 return -ENOMEM;
357
358 rc = of_address_to_resource(np, 0, &res);
359 if (rc < 0) {
360 dev_err(edac->dev, "no MCU resource address\n");
361 goto err_group;
362 }
363 tmp_ctx.mcu_csr = devm_ioremap_resource(edac->dev, &res);
364 if (IS_ERR(tmp_ctx.mcu_csr)) {
365 dev_err(edac->dev, "unable to map MCU resource\n");
366 rc = PTR_ERR(tmp_ctx.mcu_csr);
367 goto err_group;
368 }
369
370 /* Ignore non-active MCU */
371 if (of_property_read_u32(np, "memory-controller", &tmp_ctx.mcu_id)) {
372 dev_err(edac->dev, "no memory-controller property\n");
373 rc = -ENODEV;
374 goto err_group;
375 }
376 if (!xgene_edac_mc_is_active(&tmp_ctx, tmp_ctx.mcu_id)) {
377 rc = -ENODEV;
378 goto err_group;
379 }
380
381 layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
382 layers[0].size = 4;
383 layers[0].is_virt_csrow = true;
384 layers[1].type = EDAC_MC_LAYER_CHANNEL;
385 layers[1].size = 2;
386 layers[1].is_virt_csrow = false;
387 mci = edac_mc_alloc(tmp_ctx.mcu_id, ARRAY_SIZE(layers), layers,
388 sizeof(*ctx));
389 if (!mci) {
390 rc = -ENOMEM;
391 goto err_group;
392 }
393
394 ctx = mci->pvt_info;
395 *ctx = tmp_ctx; /* Copy over resource value */
396 ctx->name = "xgene_edac_mc_err";
397 ctx->mci = mci;
398 mci->pdev = &mci->dev;
399 mci->ctl_name = ctx->name;
400 mci->dev_name = ctx->name;
401
402 mci->mtype_cap = MEM_FLAG_RDDR | MEM_FLAG_RDDR2 | MEM_FLAG_RDDR3 |
403 MEM_FLAG_DDR | MEM_FLAG_DDR2 | MEM_FLAG_DDR3;
404 mci->edac_ctl_cap = EDAC_FLAG_SECDED;
405 mci->edac_cap = EDAC_FLAG_SECDED;
406 mci->mod_name = EDAC_MOD_STR;
407 mci->ctl_page_to_phys = NULL;
408 mci->scrub_cap = SCRUB_FLAG_HW_SRC;
409 mci->scrub_mode = SCRUB_HW_SRC;
410
411 if (edac_op_state == EDAC_OPSTATE_POLL)
412 mci->edac_check = xgene_edac_mc_check;
413
414 if (edac_mc_add_mc(mci)) {
415 dev_err(edac->dev, "edac_mc_add_mc failed\n");
416 rc = -EINVAL;
417 goto err_free;
418 }
419
420 xgene_edac_mc_create_debugfs_node(mci);
421
422 list_add(&ctx->next, &edac->mcus);
423
424 xgene_edac_mc_irq_ctl(mci, true);
425
426 devres_remove_group(edac->dev, xgene_edac_mc_add);
427
428 dev_info(edac->dev, "X-Gene EDAC MC registered\n");
429 return 0;
430
431 err_free:
432 edac_mc_free(mci);
433 err_group:
434 devres_release_group(edac->dev, xgene_edac_mc_add);
435 return rc;
436 }
437
xgene_edac_mc_remove(struct xgene_edac_mc_ctx * mcu)438 static int xgene_edac_mc_remove(struct xgene_edac_mc_ctx *mcu)
439 {
440 xgene_edac_mc_irq_ctl(mcu->mci, false);
441 edac_mc_del_mc(&mcu->mci->dev);
442 edac_mc_free(mcu->mci);
443 return 0;
444 }
445
446 /* CPU L1/L2 error CSR */
447 #define MAX_CPU_PER_PMD 2
448 #define CPU_CSR_STRIDE 0x00100000
449 #define CPU_L2C_PAGE 0x000D0000
450 #define CPU_MEMERR_L2C_PAGE 0x000E0000
451 #define CPU_MEMERR_CPU_PAGE 0x000F0000
452
453 #define MEMERR_CPU_ICFECR_PAGE_OFFSET 0x0000
454 #define MEMERR_CPU_ICFESR_PAGE_OFFSET 0x0004
455 #define MEMERR_CPU_ICFESR_ERRWAY_RD(src) (((src) & 0xFF000000) >> 24)
456 #define MEMERR_CPU_ICFESR_ERRINDEX_RD(src) (((src) & 0x003F0000) >> 16)
457 #define MEMERR_CPU_ICFESR_ERRINFO_RD(src) (((src) & 0x0000FF00) >> 8)
458 #define MEMERR_CPU_ICFESR_ERRTYPE_RD(src) (((src) & 0x00000070) >> 4)
459 #define MEMERR_CPU_ICFESR_MULTCERR_MASK BIT(2)
460 #define MEMERR_CPU_ICFESR_CERR_MASK BIT(0)
461 #define MEMERR_CPU_LSUESR_PAGE_OFFSET 0x000c
462 #define MEMERR_CPU_LSUESR_ERRWAY_RD(src) (((src) & 0xFF000000) >> 24)
463 #define MEMERR_CPU_LSUESR_ERRINDEX_RD(src) (((src) & 0x003F0000) >> 16)
464 #define MEMERR_CPU_LSUESR_ERRINFO_RD(src) (((src) & 0x0000FF00) >> 8)
465 #define MEMERR_CPU_LSUESR_ERRTYPE_RD(src) (((src) & 0x00000070) >> 4)
466 #define MEMERR_CPU_LSUESR_MULTCERR_MASK BIT(2)
467 #define MEMERR_CPU_LSUESR_CERR_MASK BIT(0)
468 #define MEMERR_CPU_LSUECR_PAGE_OFFSET 0x0008
469 #define MEMERR_CPU_MMUECR_PAGE_OFFSET 0x0010
470 #define MEMERR_CPU_MMUESR_PAGE_OFFSET 0x0014
471 #define MEMERR_CPU_MMUESR_ERRWAY_RD(src) (((src) & 0xFF000000) >> 24)
472 #define MEMERR_CPU_MMUESR_ERRINDEX_RD(src) (((src) & 0x007F0000) >> 16)
473 #define MEMERR_CPU_MMUESR_ERRINFO_RD(src) (((src) & 0x0000FF00) >> 8)
474 #define MEMERR_CPU_MMUESR_ERRREQSTR_LSU_MASK BIT(7)
475 #define MEMERR_CPU_MMUESR_ERRTYPE_RD(src) (((src) & 0x00000070) >> 4)
476 #define MEMERR_CPU_MMUESR_MULTCERR_MASK BIT(2)
477 #define MEMERR_CPU_MMUESR_CERR_MASK BIT(0)
478 #define MEMERR_CPU_ICFESRA_PAGE_OFFSET 0x0804
479 #define MEMERR_CPU_LSUESRA_PAGE_OFFSET 0x080c
480 #define MEMERR_CPU_MMUESRA_PAGE_OFFSET 0x0814
481
482 #define MEMERR_L2C_L2ECR_PAGE_OFFSET 0x0000
483 #define MEMERR_L2C_L2ESR_PAGE_OFFSET 0x0004
484 #define MEMERR_L2C_L2ESR_ERRSYN_RD(src) (((src) & 0xFF000000) >> 24)
485 #define MEMERR_L2C_L2ESR_ERRWAY_RD(src) (((src) & 0x00FC0000) >> 18)
486 #define MEMERR_L2C_L2ESR_ERRCPU_RD(src) (((src) & 0x00020000) >> 17)
487 #define MEMERR_L2C_L2ESR_ERRGROUP_RD(src) (((src) & 0x0000E000) >> 13)
488 #define MEMERR_L2C_L2ESR_ERRACTION_RD(src) (((src) & 0x00001C00) >> 10)
489 #define MEMERR_L2C_L2ESR_ERRTYPE_RD(src) (((src) & 0x00000300) >> 8)
490 #define MEMERR_L2C_L2ESR_MULTUCERR_MASK BIT(3)
491 #define MEMERR_L2C_L2ESR_MULTICERR_MASK BIT(2)
492 #define MEMERR_L2C_L2ESR_UCERR_MASK BIT(1)
493 #define MEMERR_L2C_L2ESR_ERR_MASK BIT(0)
494 #define MEMERR_L2C_L2EALR_PAGE_OFFSET 0x0008
495 #define CPUX_L2C_L2RTOCR_PAGE_OFFSET 0x0010
496 #define MEMERR_L2C_L2EAHR_PAGE_OFFSET 0x000c
497 #define CPUX_L2C_L2RTOSR_PAGE_OFFSET 0x0014
498 #define MEMERR_L2C_L2RTOSR_MULTERR_MASK BIT(1)
499 #define MEMERR_L2C_L2RTOSR_ERR_MASK BIT(0)
500 #define CPUX_L2C_L2RTOALR_PAGE_OFFSET 0x0018
501 #define CPUX_L2C_L2RTOAHR_PAGE_OFFSET 0x001c
502 #define MEMERR_L2C_L2ESRA_PAGE_OFFSET 0x0804
503
504 /*
505 * Processor Module Domain (PMD) context - Context for a pair of processors.
506 * Each PMD consists of 2 CPUs and a shared L2 cache. Each CPU consists of
507 * its own L1 cache.
508 */
509 struct xgene_edac_pmd_ctx {
510 struct list_head next;
511 struct device ddev;
512 char *name;
513 struct xgene_edac *edac;
514 struct edac_device_ctl_info *edac_dev;
515 void __iomem *pmd_csr;
516 u32 pmd;
517 int version;
518 };
519
xgene_edac_pmd_l1_check(struct edac_device_ctl_info * edac_dev,int cpu_idx)520 static void xgene_edac_pmd_l1_check(struct edac_device_ctl_info *edac_dev,
521 int cpu_idx)
522 {
523 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
524 void __iomem *pg_f;
525 u32 val;
526
527 pg_f = ctx->pmd_csr + cpu_idx * CPU_CSR_STRIDE + CPU_MEMERR_CPU_PAGE;
528
529 val = readl(pg_f + MEMERR_CPU_ICFESR_PAGE_OFFSET);
530 if (!val)
531 goto chk_lsu;
532 dev_err(edac_dev->dev,
533 "CPU%d L1 memory error ICF 0x%08X Way 0x%02X Index 0x%02X Info 0x%02X\n",
534 ctx->pmd * MAX_CPU_PER_PMD + cpu_idx, val,
535 MEMERR_CPU_ICFESR_ERRWAY_RD(val),
536 MEMERR_CPU_ICFESR_ERRINDEX_RD(val),
537 MEMERR_CPU_ICFESR_ERRINFO_RD(val));
538 if (val & MEMERR_CPU_ICFESR_CERR_MASK)
539 dev_err(edac_dev->dev, "One or more correctable error\n");
540 if (val & MEMERR_CPU_ICFESR_MULTCERR_MASK)
541 dev_err(edac_dev->dev, "Multiple correctable error\n");
542 switch (MEMERR_CPU_ICFESR_ERRTYPE_RD(val)) {
543 case 1:
544 dev_err(edac_dev->dev, "L1 TLB multiple hit\n");
545 break;
546 case 2:
547 dev_err(edac_dev->dev, "Way select multiple hit\n");
548 break;
549 case 3:
550 dev_err(edac_dev->dev, "Physical tag parity error\n");
551 break;
552 case 4:
553 case 5:
554 dev_err(edac_dev->dev, "L1 data parity error\n");
555 break;
556 case 6:
557 dev_err(edac_dev->dev, "L1 pre-decode parity error\n");
558 break;
559 }
560
561 /* Clear any HW errors */
562 writel(val, pg_f + MEMERR_CPU_ICFESR_PAGE_OFFSET);
563
564 if (val & (MEMERR_CPU_ICFESR_CERR_MASK |
565 MEMERR_CPU_ICFESR_MULTCERR_MASK))
566 edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
567
568 chk_lsu:
569 val = readl(pg_f + MEMERR_CPU_LSUESR_PAGE_OFFSET);
570 if (!val)
571 goto chk_mmu;
572 dev_err(edac_dev->dev,
573 "CPU%d memory error LSU 0x%08X Way 0x%02X Index 0x%02X Info 0x%02X\n",
574 ctx->pmd * MAX_CPU_PER_PMD + cpu_idx, val,
575 MEMERR_CPU_LSUESR_ERRWAY_RD(val),
576 MEMERR_CPU_LSUESR_ERRINDEX_RD(val),
577 MEMERR_CPU_LSUESR_ERRINFO_RD(val));
578 if (val & MEMERR_CPU_LSUESR_CERR_MASK)
579 dev_err(edac_dev->dev, "One or more correctable error\n");
580 if (val & MEMERR_CPU_LSUESR_MULTCERR_MASK)
581 dev_err(edac_dev->dev, "Multiple correctable error\n");
582 switch (MEMERR_CPU_LSUESR_ERRTYPE_RD(val)) {
583 case 0:
584 dev_err(edac_dev->dev, "Load tag error\n");
585 break;
586 case 1:
587 dev_err(edac_dev->dev, "Load data error\n");
588 break;
589 case 2:
590 dev_err(edac_dev->dev, "WSL multihit error\n");
591 break;
592 case 3:
593 dev_err(edac_dev->dev, "Store tag error\n");
594 break;
595 case 4:
596 dev_err(edac_dev->dev,
597 "DTB multihit from load pipeline error\n");
598 break;
599 case 5:
600 dev_err(edac_dev->dev,
601 "DTB multihit from store pipeline error\n");
602 break;
603 }
604
605 /* Clear any HW errors */
606 writel(val, pg_f + MEMERR_CPU_LSUESR_PAGE_OFFSET);
607
608 if (val & (MEMERR_CPU_LSUESR_CERR_MASK |
609 MEMERR_CPU_LSUESR_MULTCERR_MASK))
610 edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
611
612 chk_mmu:
613 val = readl(pg_f + MEMERR_CPU_MMUESR_PAGE_OFFSET);
614 if (!val)
615 return;
616 dev_err(edac_dev->dev,
617 "CPU%d memory error MMU 0x%08X Way 0x%02X Index 0x%02X Info 0x%02X %s\n",
618 ctx->pmd * MAX_CPU_PER_PMD + cpu_idx, val,
619 MEMERR_CPU_MMUESR_ERRWAY_RD(val),
620 MEMERR_CPU_MMUESR_ERRINDEX_RD(val),
621 MEMERR_CPU_MMUESR_ERRINFO_RD(val),
622 val & MEMERR_CPU_MMUESR_ERRREQSTR_LSU_MASK ? "LSU" : "ICF");
623 if (val & MEMERR_CPU_MMUESR_CERR_MASK)
624 dev_err(edac_dev->dev, "One or more correctable error\n");
625 if (val & MEMERR_CPU_MMUESR_MULTCERR_MASK)
626 dev_err(edac_dev->dev, "Multiple correctable error\n");
627 switch (MEMERR_CPU_MMUESR_ERRTYPE_RD(val)) {
628 case 0:
629 dev_err(edac_dev->dev, "Stage 1 UTB hit error\n");
630 break;
631 case 1:
632 dev_err(edac_dev->dev, "Stage 1 UTB miss error\n");
633 break;
634 case 2:
635 dev_err(edac_dev->dev, "Stage 1 UTB allocate error\n");
636 break;
637 case 3:
638 dev_err(edac_dev->dev, "TMO operation single bank error\n");
639 break;
640 case 4:
641 dev_err(edac_dev->dev, "Stage 2 UTB error\n");
642 break;
643 case 5:
644 dev_err(edac_dev->dev, "Stage 2 UTB miss error\n");
645 break;
646 case 6:
647 dev_err(edac_dev->dev, "Stage 2 UTB allocate error\n");
648 break;
649 case 7:
650 dev_err(edac_dev->dev, "TMO operation multiple bank error\n");
651 break;
652 }
653
654 /* Clear any HW errors */
655 writel(val, pg_f + MEMERR_CPU_MMUESR_PAGE_OFFSET);
656
657 edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
658 }
659
xgene_edac_pmd_l2_check(struct edac_device_ctl_info * edac_dev)660 static void xgene_edac_pmd_l2_check(struct edac_device_ctl_info *edac_dev)
661 {
662 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
663 void __iomem *pg_d;
664 void __iomem *pg_e;
665 u32 val_hi;
666 u32 val_lo;
667 u32 val;
668
669 /* Check L2 */
670 pg_e = ctx->pmd_csr + CPU_MEMERR_L2C_PAGE;
671 val = readl(pg_e + MEMERR_L2C_L2ESR_PAGE_OFFSET);
672 if (!val)
673 goto chk_l2c;
674 val_lo = readl(pg_e + MEMERR_L2C_L2EALR_PAGE_OFFSET);
675 val_hi = readl(pg_e + MEMERR_L2C_L2EAHR_PAGE_OFFSET);
676 dev_err(edac_dev->dev,
677 "PMD%d memory error L2C L2ESR 0x%08X @ 0x%08X.%08X\n",
678 ctx->pmd, val, val_hi, val_lo);
679 dev_err(edac_dev->dev,
680 "ErrSyndrome 0x%02X ErrWay 0x%02X ErrCpu %d ErrGroup 0x%02X ErrAction 0x%02X\n",
681 MEMERR_L2C_L2ESR_ERRSYN_RD(val),
682 MEMERR_L2C_L2ESR_ERRWAY_RD(val),
683 MEMERR_L2C_L2ESR_ERRCPU_RD(val),
684 MEMERR_L2C_L2ESR_ERRGROUP_RD(val),
685 MEMERR_L2C_L2ESR_ERRACTION_RD(val));
686
687 if (val & MEMERR_L2C_L2ESR_ERR_MASK)
688 dev_err(edac_dev->dev, "One or more correctable error\n");
689 if (val & MEMERR_L2C_L2ESR_MULTICERR_MASK)
690 dev_err(edac_dev->dev, "Multiple correctable error\n");
691 if (val & MEMERR_L2C_L2ESR_UCERR_MASK)
692 dev_err(edac_dev->dev, "One or more uncorrectable error\n");
693 if (val & MEMERR_L2C_L2ESR_MULTUCERR_MASK)
694 dev_err(edac_dev->dev, "Multiple uncorrectable error\n");
695
696 switch (MEMERR_L2C_L2ESR_ERRTYPE_RD(val)) {
697 case 0:
698 dev_err(edac_dev->dev, "Outbound SDB parity error\n");
699 break;
700 case 1:
701 dev_err(edac_dev->dev, "Inbound SDB parity error\n");
702 break;
703 case 2:
704 dev_err(edac_dev->dev, "Tag ECC error\n");
705 break;
706 case 3:
707 dev_err(edac_dev->dev, "Data ECC error\n");
708 break;
709 }
710
711 /* Clear any HW errors */
712 writel(val, pg_e + MEMERR_L2C_L2ESR_PAGE_OFFSET);
713
714 if (val & (MEMERR_L2C_L2ESR_ERR_MASK |
715 MEMERR_L2C_L2ESR_MULTICERR_MASK))
716 edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
717 if (val & (MEMERR_L2C_L2ESR_UCERR_MASK |
718 MEMERR_L2C_L2ESR_MULTUCERR_MASK))
719 edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
720
721 chk_l2c:
722 /* Check if any memory request timed out on L2 cache */
723 pg_d = ctx->pmd_csr + CPU_L2C_PAGE;
724 val = readl(pg_d + CPUX_L2C_L2RTOSR_PAGE_OFFSET);
725 if (val) {
726 val_lo = readl(pg_d + CPUX_L2C_L2RTOALR_PAGE_OFFSET);
727 val_hi = readl(pg_d + CPUX_L2C_L2RTOAHR_PAGE_OFFSET);
728 dev_err(edac_dev->dev,
729 "PMD%d L2C error L2C RTOSR 0x%08X @ 0x%08X.%08X\n",
730 ctx->pmd, val, val_hi, val_lo);
731 writel(val, pg_d + CPUX_L2C_L2RTOSR_PAGE_OFFSET);
732 }
733 }
734
xgene_edac_pmd_check(struct edac_device_ctl_info * edac_dev)735 static void xgene_edac_pmd_check(struct edac_device_ctl_info *edac_dev)
736 {
737 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
738 unsigned int pcp_hp_stat;
739 int i;
740
741 xgene_edac_pcp_rd(ctx->edac, PCPHPERRINTSTS, &pcp_hp_stat);
742 if (!((PMD0_MERR_MASK << ctx->pmd) & pcp_hp_stat))
743 return;
744
745 /* Check CPU L1 error */
746 for (i = 0; i < MAX_CPU_PER_PMD; i++)
747 xgene_edac_pmd_l1_check(edac_dev, i);
748
749 /* Check CPU L2 error */
750 xgene_edac_pmd_l2_check(edac_dev);
751 }
752
xgene_edac_pmd_cpu_hw_cfg(struct edac_device_ctl_info * edac_dev,int cpu)753 static void xgene_edac_pmd_cpu_hw_cfg(struct edac_device_ctl_info *edac_dev,
754 int cpu)
755 {
756 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
757 void __iomem *pg_f = ctx->pmd_csr + cpu * CPU_CSR_STRIDE +
758 CPU_MEMERR_CPU_PAGE;
759
760 /*
761 * Enable CPU memory error:
762 * MEMERR_CPU_ICFESRA, MEMERR_CPU_LSUESRA, and MEMERR_CPU_MMUESRA
763 */
764 writel(0x00000301, pg_f + MEMERR_CPU_ICFECR_PAGE_OFFSET);
765 writel(0x00000301, pg_f + MEMERR_CPU_LSUECR_PAGE_OFFSET);
766 writel(0x00000101, pg_f + MEMERR_CPU_MMUECR_PAGE_OFFSET);
767 }
768
xgene_edac_pmd_hw_cfg(struct edac_device_ctl_info * edac_dev)769 static void xgene_edac_pmd_hw_cfg(struct edac_device_ctl_info *edac_dev)
770 {
771 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
772 void __iomem *pg_d = ctx->pmd_csr + CPU_L2C_PAGE;
773 void __iomem *pg_e = ctx->pmd_csr + CPU_MEMERR_L2C_PAGE;
774
775 /* Enable PMD memory error - MEMERR_L2C_L2ECR and L2C_L2RTOCR */
776 writel(0x00000703, pg_e + MEMERR_L2C_L2ECR_PAGE_OFFSET);
777 /* Configure L2C HW request time out feature if supported */
778 if (ctx->version > 1)
779 writel(0x00000119, pg_d + CPUX_L2C_L2RTOCR_PAGE_OFFSET);
780 }
781
xgene_edac_pmd_hw_ctl(struct edac_device_ctl_info * edac_dev,bool enable)782 static void xgene_edac_pmd_hw_ctl(struct edac_device_ctl_info *edac_dev,
783 bool enable)
784 {
785 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
786 int i;
787
788 /* Enable PMD error interrupt */
789 if (edac_dev->op_state == OP_RUNNING_INTERRUPT) {
790 if (enable)
791 xgene_edac_pcp_clrbits(ctx->edac, PCPHPERRINTMSK,
792 PMD0_MERR_MASK << ctx->pmd);
793 else
794 xgene_edac_pcp_setbits(ctx->edac, PCPHPERRINTMSK,
795 PMD0_MERR_MASK << ctx->pmd);
796 }
797
798 if (enable) {
799 xgene_edac_pmd_hw_cfg(edac_dev);
800
801 /* Two CPUs per a PMD */
802 for (i = 0; i < MAX_CPU_PER_PMD; i++)
803 xgene_edac_pmd_cpu_hw_cfg(edac_dev, i);
804 }
805 }
806
xgene_edac_pmd_l1_inject_ctrl_write(struct file * file,const char __user * data,size_t count,loff_t * ppos)807 static ssize_t xgene_edac_pmd_l1_inject_ctrl_write(struct file *file,
808 const char __user *data,
809 size_t count, loff_t *ppos)
810 {
811 struct edac_device_ctl_info *edac_dev = file->private_data;
812 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
813 void __iomem *cpux_pg_f;
814 int i;
815
816 for (i = 0; i < MAX_CPU_PER_PMD; i++) {
817 cpux_pg_f = ctx->pmd_csr + i * CPU_CSR_STRIDE +
818 CPU_MEMERR_CPU_PAGE;
819
820 writel(MEMERR_CPU_ICFESR_MULTCERR_MASK |
821 MEMERR_CPU_ICFESR_CERR_MASK,
822 cpux_pg_f + MEMERR_CPU_ICFESRA_PAGE_OFFSET);
823 writel(MEMERR_CPU_LSUESR_MULTCERR_MASK |
824 MEMERR_CPU_LSUESR_CERR_MASK,
825 cpux_pg_f + MEMERR_CPU_LSUESRA_PAGE_OFFSET);
826 writel(MEMERR_CPU_MMUESR_MULTCERR_MASK |
827 MEMERR_CPU_MMUESR_CERR_MASK,
828 cpux_pg_f + MEMERR_CPU_MMUESRA_PAGE_OFFSET);
829 }
830 return count;
831 }
832
xgene_edac_pmd_l2_inject_ctrl_write(struct file * file,const char __user * data,size_t count,loff_t * ppos)833 static ssize_t xgene_edac_pmd_l2_inject_ctrl_write(struct file *file,
834 const char __user *data,
835 size_t count, loff_t *ppos)
836 {
837 struct edac_device_ctl_info *edac_dev = file->private_data;
838 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
839 void __iomem *pg_e = ctx->pmd_csr + CPU_MEMERR_L2C_PAGE;
840
841 writel(MEMERR_L2C_L2ESR_MULTUCERR_MASK |
842 MEMERR_L2C_L2ESR_MULTICERR_MASK |
843 MEMERR_L2C_L2ESR_UCERR_MASK |
844 MEMERR_L2C_L2ESR_ERR_MASK,
845 pg_e + MEMERR_L2C_L2ESRA_PAGE_OFFSET);
846 return count;
847 }
848
849 static const struct file_operations xgene_edac_pmd_debug_inject_fops[] = {
850 {
851 .open = simple_open,
852 .write = xgene_edac_pmd_l1_inject_ctrl_write,
853 .llseek = generic_file_llseek, },
854 {
855 .open = simple_open,
856 .write = xgene_edac_pmd_l2_inject_ctrl_write,
857 .llseek = generic_file_llseek, },
858 { }
859 };
860
861 static void
xgene_edac_pmd_create_debugfs_nodes(struct edac_device_ctl_info * edac_dev)862 xgene_edac_pmd_create_debugfs_nodes(struct edac_device_ctl_info *edac_dev)
863 {
864 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
865 struct dentry *dbgfs_dir;
866 char name[10];
867
868 if (!IS_ENABLED(CONFIG_EDAC_DEBUG) || !ctx->edac->dfs)
869 return;
870
871 snprintf(name, sizeof(name), "PMD%d", ctx->pmd);
872 dbgfs_dir = edac_debugfs_create_dir_at(name, ctx->edac->dfs);
873 if (!dbgfs_dir)
874 return;
875
876 edac_debugfs_create_file("l1_inject_ctrl", S_IWUSR, dbgfs_dir, edac_dev,
877 &xgene_edac_pmd_debug_inject_fops[0]);
878 edac_debugfs_create_file("l2_inject_ctrl", S_IWUSR, dbgfs_dir, edac_dev,
879 &xgene_edac_pmd_debug_inject_fops[1]);
880 }
881
xgene_edac_pmd_available(u32 efuse,int pmd)882 static int xgene_edac_pmd_available(u32 efuse, int pmd)
883 {
884 return (efuse & (1 << pmd)) ? 0 : 1;
885 }
886
xgene_edac_pmd_add(struct xgene_edac * edac,struct device_node * np,int version)887 static int xgene_edac_pmd_add(struct xgene_edac *edac, struct device_node *np,
888 int version)
889 {
890 struct edac_device_ctl_info *edac_dev;
891 struct xgene_edac_pmd_ctx *ctx;
892 struct resource res;
893 char edac_name[10];
894 u32 pmd;
895 int rc;
896 u32 val;
897
898 if (!devres_open_group(edac->dev, xgene_edac_pmd_add, GFP_KERNEL))
899 return -ENOMEM;
900
901 /* Determine if this PMD is disabled */
902 if (of_property_read_u32(np, "pmd-controller", &pmd)) {
903 dev_err(edac->dev, "no pmd-controller property\n");
904 rc = -ENODEV;
905 goto err_group;
906 }
907 rc = regmap_read(edac->efuse_map, 0, &val);
908 if (rc)
909 goto err_group;
910 if (!xgene_edac_pmd_available(val, pmd)) {
911 rc = -ENODEV;
912 goto err_group;
913 }
914
915 snprintf(edac_name, sizeof(edac_name), "l2c%d", pmd);
916 edac_dev = edac_device_alloc_ctl_info(sizeof(*ctx),
917 edac_name, 1, "l2c", 1, 2,
918 edac_device_alloc_index());
919 if (!edac_dev) {
920 rc = -ENOMEM;
921 goto err_group;
922 }
923
924 ctx = edac_dev->pvt_info;
925 ctx->name = "xgene_pmd_err";
926 ctx->pmd = pmd;
927 ctx->edac = edac;
928 ctx->edac_dev = edac_dev;
929 ctx->ddev = *edac->dev;
930 ctx->version = version;
931 edac_dev->dev = &ctx->ddev;
932 edac_dev->ctl_name = ctx->name;
933 edac_dev->dev_name = ctx->name;
934 edac_dev->mod_name = EDAC_MOD_STR;
935
936 rc = of_address_to_resource(np, 0, &res);
937 if (rc < 0) {
938 dev_err(edac->dev, "no PMD resource address\n");
939 goto err_free;
940 }
941 ctx->pmd_csr = devm_ioremap_resource(edac->dev, &res);
942 if (IS_ERR(ctx->pmd_csr)) {
943 dev_err(edac->dev,
944 "devm_ioremap_resource failed for PMD resource address\n");
945 rc = PTR_ERR(ctx->pmd_csr);
946 goto err_free;
947 }
948
949 if (edac_op_state == EDAC_OPSTATE_POLL)
950 edac_dev->edac_check = xgene_edac_pmd_check;
951
952 xgene_edac_pmd_create_debugfs_nodes(edac_dev);
953
954 rc = edac_device_add_device(edac_dev);
955 if (rc > 0) {
956 dev_err(edac->dev, "edac_device_add_device failed\n");
957 rc = -ENOMEM;
958 goto err_free;
959 }
960
961 if (edac_op_state == EDAC_OPSTATE_INT)
962 edac_dev->op_state = OP_RUNNING_INTERRUPT;
963
964 list_add(&ctx->next, &edac->pmds);
965
966 xgene_edac_pmd_hw_ctl(edac_dev, 1);
967
968 devres_remove_group(edac->dev, xgene_edac_pmd_add);
969
970 dev_info(edac->dev, "X-Gene EDAC PMD%d registered\n", ctx->pmd);
971 return 0;
972
973 err_free:
974 edac_device_free_ctl_info(edac_dev);
975 err_group:
976 devres_release_group(edac->dev, xgene_edac_pmd_add);
977 return rc;
978 }
979
xgene_edac_pmd_remove(struct xgene_edac_pmd_ctx * pmd)980 static int xgene_edac_pmd_remove(struct xgene_edac_pmd_ctx *pmd)
981 {
982 struct edac_device_ctl_info *edac_dev = pmd->edac_dev;
983
984 xgene_edac_pmd_hw_ctl(edac_dev, 0);
985 edac_device_del_device(edac_dev->dev);
986 edac_device_free_ctl_info(edac_dev);
987 return 0;
988 }
989
990 /* L3 Error device */
991 #define L3C_ESR (0x0A * 4)
992 #define L3C_ESR_DATATAG_MASK BIT(9)
993 #define L3C_ESR_MULTIHIT_MASK BIT(8)
994 #define L3C_ESR_UCEVICT_MASK BIT(6)
995 #define L3C_ESR_MULTIUCERR_MASK BIT(5)
996 #define L3C_ESR_MULTICERR_MASK BIT(4)
997 #define L3C_ESR_UCERR_MASK BIT(3)
998 #define L3C_ESR_CERR_MASK BIT(2)
999 #define L3C_ESR_UCERRINTR_MASK BIT(1)
1000 #define L3C_ESR_CERRINTR_MASK BIT(0)
1001 #define L3C_ECR (0x0B * 4)
1002 #define L3C_ECR_UCINTREN BIT(3)
1003 #define L3C_ECR_CINTREN BIT(2)
1004 #define L3C_UCERREN BIT(1)
1005 #define L3C_CERREN BIT(0)
1006 #define L3C_ELR (0x0C * 4)
1007 #define L3C_ELR_ERRSYN(src) ((src & 0xFF800000) >> 23)
1008 #define L3C_ELR_ERRWAY(src) ((src & 0x007E0000) >> 17)
1009 #define L3C_ELR_AGENTID(src) ((src & 0x0001E000) >> 13)
1010 #define L3C_ELR_ERRGRP(src) ((src & 0x00000F00) >> 8)
1011 #define L3C_ELR_OPTYPE(src) ((src & 0x000000F0) >> 4)
1012 #define L3C_ELR_PADDRHIGH(src) (src & 0x0000000F)
1013 #define L3C_AELR (0x0D * 4)
1014 #define L3C_BELR (0x0E * 4)
1015 #define L3C_BELR_BANK(src) (src & 0x0000000F)
1016
1017 struct xgene_edac_dev_ctx {
1018 struct list_head next;
1019 struct device ddev;
1020 char *name;
1021 struct xgene_edac *edac;
1022 struct edac_device_ctl_info *edac_dev;
1023 int edac_idx;
1024 void __iomem *dev_csr;
1025 int version;
1026 };
1027
1028 /*
1029 * Version 1 of the L3 controller has broken single bit correctable logic for
1030 * certain error syndromes. Log them as uncorrectable in that case.
1031 */
xgene_edac_l3_promote_to_uc_err(u32 l3cesr,u32 l3celr)1032 static bool xgene_edac_l3_promote_to_uc_err(u32 l3cesr, u32 l3celr)
1033 {
1034 if (l3cesr & L3C_ESR_DATATAG_MASK) {
1035 switch (L3C_ELR_ERRSYN(l3celr)) {
1036 case 0x13C:
1037 case 0x0B4:
1038 case 0x007:
1039 case 0x00D:
1040 case 0x00E:
1041 case 0x019:
1042 case 0x01A:
1043 case 0x01C:
1044 case 0x04E:
1045 case 0x041:
1046 return true;
1047 }
1048 } else if (L3C_ELR_ERRWAY(l3celr) == 9)
1049 return true;
1050
1051 return false;
1052 }
1053
xgene_edac_l3_check(struct edac_device_ctl_info * edac_dev)1054 static void xgene_edac_l3_check(struct edac_device_ctl_info *edac_dev)
1055 {
1056 struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1057 u32 l3cesr;
1058 u32 l3celr;
1059 u32 l3caelr;
1060 u32 l3cbelr;
1061
1062 l3cesr = readl(ctx->dev_csr + L3C_ESR);
1063 if (!(l3cesr & (L3C_ESR_UCERR_MASK | L3C_ESR_CERR_MASK)))
1064 return;
1065
1066 if (l3cesr & L3C_ESR_UCERR_MASK)
1067 dev_err(edac_dev->dev, "L3C uncorrectable error\n");
1068 if (l3cesr & L3C_ESR_CERR_MASK)
1069 dev_warn(edac_dev->dev, "L3C correctable error\n");
1070
1071 l3celr = readl(ctx->dev_csr + L3C_ELR);
1072 l3caelr = readl(ctx->dev_csr + L3C_AELR);
1073 l3cbelr = readl(ctx->dev_csr + L3C_BELR);
1074 if (l3cesr & L3C_ESR_MULTIHIT_MASK)
1075 dev_err(edac_dev->dev, "L3C multiple hit error\n");
1076 if (l3cesr & L3C_ESR_UCEVICT_MASK)
1077 dev_err(edac_dev->dev,
1078 "L3C dropped eviction of line with error\n");
1079 if (l3cesr & L3C_ESR_MULTIUCERR_MASK)
1080 dev_err(edac_dev->dev, "L3C multiple uncorrectable error\n");
1081 if (l3cesr & L3C_ESR_DATATAG_MASK)
1082 dev_err(edac_dev->dev,
1083 "L3C data error syndrome 0x%X group 0x%X\n",
1084 L3C_ELR_ERRSYN(l3celr), L3C_ELR_ERRGRP(l3celr));
1085 else
1086 dev_err(edac_dev->dev,
1087 "L3C tag error syndrome 0x%X Way of Tag 0x%X Agent ID 0x%X Operation type 0x%X\n",
1088 L3C_ELR_ERRSYN(l3celr), L3C_ELR_ERRWAY(l3celr),
1089 L3C_ELR_AGENTID(l3celr), L3C_ELR_OPTYPE(l3celr));
1090 /*
1091 * NOTE: Address [41:38] in L3C_ELR_PADDRHIGH(l3celr).
1092 * Address [37:6] in l3caelr. Lower 6 bits are zero.
1093 */
1094 dev_err(edac_dev->dev, "L3C error address 0x%08X.%08X bank %d\n",
1095 L3C_ELR_PADDRHIGH(l3celr) << 6 | (l3caelr >> 26),
1096 (l3caelr & 0x3FFFFFFF) << 6, L3C_BELR_BANK(l3cbelr));
1097 dev_err(edac_dev->dev,
1098 "L3C error status register value 0x%X\n", l3cesr);
1099
1100 /* Clear L3C error interrupt */
1101 writel(0, ctx->dev_csr + L3C_ESR);
1102
1103 if (ctx->version <= 1 &&
1104 xgene_edac_l3_promote_to_uc_err(l3cesr, l3celr)) {
1105 edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
1106 return;
1107 }
1108 if (l3cesr & L3C_ESR_CERR_MASK)
1109 edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
1110 if (l3cesr & L3C_ESR_UCERR_MASK)
1111 edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
1112 }
1113
xgene_edac_l3_hw_init(struct edac_device_ctl_info * edac_dev,bool enable)1114 static void xgene_edac_l3_hw_init(struct edac_device_ctl_info *edac_dev,
1115 bool enable)
1116 {
1117 struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1118 u32 val;
1119
1120 val = readl(ctx->dev_csr + L3C_ECR);
1121 val |= L3C_UCERREN | L3C_CERREN;
1122 /* On disable, we just disable interrupt but keep error enabled */
1123 if (edac_dev->op_state == OP_RUNNING_INTERRUPT) {
1124 if (enable)
1125 val |= L3C_ECR_UCINTREN | L3C_ECR_CINTREN;
1126 else
1127 val &= ~(L3C_ECR_UCINTREN | L3C_ECR_CINTREN);
1128 }
1129 writel(val, ctx->dev_csr + L3C_ECR);
1130
1131 if (edac_dev->op_state == OP_RUNNING_INTERRUPT) {
1132 /* Enable/disable L3 error top level interrupt */
1133 if (enable) {
1134 xgene_edac_pcp_clrbits(ctx->edac, PCPHPERRINTMSK,
1135 L3C_UNCORR_ERR_MASK);
1136 xgene_edac_pcp_clrbits(ctx->edac, PCPLPERRINTMSK,
1137 L3C_CORR_ERR_MASK);
1138 } else {
1139 xgene_edac_pcp_setbits(ctx->edac, PCPHPERRINTMSK,
1140 L3C_UNCORR_ERR_MASK);
1141 xgene_edac_pcp_setbits(ctx->edac, PCPLPERRINTMSK,
1142 L3C_CORR_ERR_MASK);
1143 }
1144 }
1145 }
1146
xgene_edac_l3_inject_ctrl_write(struct file * file,const char __user * data,size_t count,loff_t * ppos)1147 static ssize_t xgene_edac_l3_inject_ctrl_write(struct file *file,
1148 const char __user *data,
1149 size_t count, loff_t *ppos)
1150 {
1151 struct edac_device_ctl_info *edac_dev = file->private_data;
1152 struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1153
1154 /* Generate all errors */
1155 writel(0xFFFFFFFF, ctx->dev_csr + L3C_ESR);
1156 return count;
1157 }
1158
1159 static const struct file_operations xgene_edac_l3_debug_inject_fops = {
1160 .open = simple_open,
1161 .write = xgene_edac_l3_inject_ctrl_write,
1162 .llseek = generic_file_llseek
1163 };
1164
1165 static void
xgene_edac_l3_create_debugfs_nodes(struct edac_device_ctl_info * edac_dev)1166 xgene_edac_l3_create_debugfs_nodes(struct edac_device_ctl_info *edac_dev)
1167 {
1168 struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1169 struct dentry *dbgfs_dir;
1170 char name[10];
1171
1172 if (!IS_ENABLED(CONFIG_EDAC_DEBUG) || !ctx->edac->dfs)
1173 return;
1174
1175 snprintf(name, sizeof(name), "l3c%d", ctx->edac_idx);
1176 dbgfs_dir = edac_debugfs_create_dir_at(name, ctx->edac->dfs);
1177 if (!dbgfs_dir)
1178 return;
1179
1180 debugfs_create_file("l3_inject_ctrl", S_IWUSR, dbgfs_dir, edac_dev,
1181 &xgene_edac_l3_debug_inject_fops);
1182 }
1183
xgene_edac_l3_add(struct xgene_edac * edac,struct device_node * np,int version)1184 static int xgene_edac_l3_add(struct xgene_edac *edac, struct device_node *np,
1185 int version)
1186 {
1187 struct edac_device_ctl_info *edac_dev;
1188 struct xgene_edac_dev_ctx *ctx;
1189 struct resource res;
1190 void __iomem *dev_csr;
1191 int edac_idx;
1192 int rc = 0;
1193
1194 if (!devres_open_group(edac->dev, xgene_edac_l3_add, GFP_KERNEL))
1195 return -ENOMEM;
1196
1197 rc = of_address_to_resource(np, 0, &res);
1198 if (rc < 0) {
1199 dev_err(edac->dev, "no L3 resource address\n");
1200 goto err_release_group;
1201 }
1202 dev_csr = devm_ioremap_resource(edac->dev, &res);
1203 if (IS_ERR(dev_csr)) {
1204 dev_err(edac->dev,
1205 "devm_ioremap_resource failed for L3 resource address\n");
1206 rc = PTR_ERR(dev_csr);
1207 goto err_release_group;
1208 }
1209
1210 edac_idx = edac_device_alloc_index();
1211 edac_dev = edac_device_alloc_ctl_info(sizeof(*ctx),
1212 "l3c", 1, "l3c", 1, 0, edac_idx);
1213 if (!edac_dev) {
1214 rc = -ENOMEM;
1215 goto err_release_group;
1216 }
1217
1218 ctx = edac_dev->pvt_info;
1219 ctx->dev_csr = dev_csr;
1220 ctx->name = "xgene_l3_err";
1221 ctx->edac_idx = edac_idx;
1222 ctx->edac = edac;
1223 ctx->edac_dev = edac_dev;
1224 ctx->ddev = *edac->dev;
1225 ctx->version = version;
1226 edac_dev->dev = &ctx->ddev;
1227 edac_dev->ctl_name = ctx->name;
1228 edac_dev->dev_name = ctx->name;
1229 edac_dev->mod_name = EDAC_MOD_STR;
1230
1231 if (edac_op_state == EDAC_OPSTATE_POLL)
1232 edac_dev->edac_check = xgene_edac_l3_check;
1233
1234 xgene_edac_l3_create_debugfs_nodes(edac_dev);
1235
1236 rc = edac_device_add_device(edac_dev);
1237 if (rc > 0) {
1238 dev_err(edac->dev, "failed edac_device_add_device()\n");
1239 rc = -ENOMEM;
1240 goto err_ctl_free;
1241 }
1242
1243 if (edac_op_state == EDAC_OPSTATE_INT)
1244 edac_dev->op_state = OP_RUNNING_INTERRUPT;
1245
1246 list_add(&ctx->next, &edac->l3s);
1247
1248 xgene_edac_l3_hw_init(edac_dev, 1);
1249
1250 devres_remove_group(edac->dev, xgene_edac_l3_add);
1251
1252 dev_info(edac->dev, "X-Gene EDAC L3 registered\n");
1253 return 0;
1254
1255 err_ctl_free:
1256 edac_device_free_ctl_info(edac_dev);
1257 err_release_group:
1258 devres_release_group(edac->dev, xgene_edac_l3_add);
1259 return rc;
1260 }
1261
xgene_edac_l3_remove(struct xgene_edac_dev_ctx * l3)1262 static int xgene_edac_l3_remove(struct xgene_edac_dev_ctx *l3)
1263 {
1264 struct edac_device_ctl_info *edac_dev = l3->edac_dev;
1265
1266 xgene_edac_l3_hw_init(edac_dev, 0);
1267 edac_device_del_device(l3->edac->dev);
1268 edac_device_free_ctl_info(edac_dev);
1269 return 0;
1270 }
1271
1272 /* SoC error device */
1273 #define IOBAXIS0TRANSERRINTSTS 0x0000
1274 #define IOBAXIS0_M_ILLEGAL_ACCESS_MASK BIT(1)
1275 #define IOBAXIS0_ILLEGAL_ACCESS_MASK BIT(0)
1276 #define IOBAXIS0TRANSERRINTMSK 0x0004
1277 #define IOBAXIS0TRANSERRREQINFOL 0x0008
1278 #define IOBAXIS0TRANSERRREQINFOH 0x000c
1279 #define REQTYPE_RD(src) (((src) & BIT(0)))
1280 #define ERRADDRH_RD(src) (((src) & 0xffc00000) >> 22)
1281 #define IOBAXIS1TRANSERRINTSTS 0x0010
1282 #define IOBAXIS1TRANSERRINTMSK 0x0014
1283 #define IOBAXIS1TRANSERRREQINFOL 0x0018
1284 #define IOBAXIS1TRANSERRREQINFOH 0x001c
1285 #define IOBPATRANSERRINTSTS 0x0020
1286 #define IOBPA_M_REQIDRAM_CORRUPT_MASK BIT(7)
1287 #define IOBPA_REQIDRAM_CORRUPT_MASK BIT(6)
1288 #define IOBPA_M_TRANS_CORRUPT_MASK BIT(5)
1289 #define IOBPA_TRANS_CORRUPT_MASK BIT(4)
1290 #define IOBPA_M_WDATA_CORRUPT_MASK BIT(3)
1291 #define IOBPA_WDATA_CORRUPT_MASK BIT(2)
1292 #define IOBPA_M_RDATA_CORRUPT_MASK BIT(1)
1293 #define IOBPA_RDATA_CORRUPT_MASK BIT(0)
1294 #define IOBBATRANSERRINTSTS 0x0030
1295 #define M_ILLEGAL_ACCESS_MASK BIT(15)
1296 #define ILLEGAL_ACCESS_MASK BIT(14)
1297 #define M_WIDRAM_CORRUPT_MASK BIT(13)
1298 #define WIDRAM_CORRUPT_MASK BIT(12)
1299 #define M_RIDRAM_CORRUPT_MASK BIT(11)
1300 #define RIDRAM_CORRUPT_MASK BIT(10)
1301 #define M_TRANS_CORRUPT_MASK BIT(9)
1302 #define TRANS_CORRUPT_MASK BIT(8)
1303 #define M_WDATA_CORRUPT_MASK BIT(7)
1304 #define WDATA_CORRUPT_MASK BIT(6)
1305 #define M_RBM_POISONED_REQ_MASK BIT(5)
1306 #define RBM_POISONED_REQ_MASK BIT(4)
1307 #define M_XGIC_POISONED_REQ_MASK BIT(3)
1308 #define XGIC_POISONED_REQ_MASK BIT(2)
1309 #define M_WRERR_RESP_MASK BIT(1)
1310 #define WRERR_RESP_MASK BIT(0)
1311 #define IOBBATRANSERRREQINFOL 0x0038
1312 #define IOBBATRANSERRREQINFOH 0x003c
1313 #define REQTYPE_F2_RD(src) ((src) & BIT(0))
1314 #define ERRADDRH_F2_RD(src) (((src) & 0xffc00000) >> 22)
1315 #define IOBBATRANSERRCSWREQID 0x0040
1316 #define XGICTRANSERRINTSTS 0x0050
1317 #define M_WR_ACCESS_ERR_MASK BIT(3)
1318 #define WR_ACCESS_ERR_MASK BIT(2)
1319 #define M_RD_ACCESS_ERR_MASK BIT(1)
1320 #define RD_ACCESS_ERR_MASK BIT(0)
1321 #define XGICTRANSERRINTMSK 0x0054
1322 #define XGICTRANSERRREQINFO 0x0058
1323 #define REQTYPE_MASK BIT(26)
1324 #define ERRADDR_RD(src) ((src) & 0x03ffffff)
1325 #define GLBL_ERR_STS 0x0800
1326 #define MDED_ERR_MASK BIT(3)
1327 #define DED_ERR_MASK BIT(2)
1328 #define MSEC_ERR_MASK BIT(1)
1329 #define SEC_ERR_MASK BIT(0)
1330 #define GLBL_SEC_ERRL 0x0810
1331 #define GLBL_SEC_ERRH 0x0818
1332 #define GLBL_MSEC_ERRL 0x0820
1333 #define GLBL_MSEC_ERRH 0x0828
1334 #define GLBL_DED_ERRL 0x0830
1335 #define GLBL_DED_ERRLMASK 0x0834
1336 #define GLBL_DED_ERRH 0x0838
1337 #define GLBL_DED_ERRHMASK 0x083c
1338 #define GLBL_MDED_ERRL 0x0840
1339 #define GLBL_MDED_ERRLMASK 0x0844
1340 #define GLBL_MDED_ERRH 0x0848
1341 #define GLBL_MDED_ERRHMASK 0x084c
1342
1343 /* IO Bus Registers */
1344 #define RBCSR 0x0000
1345 #define STICKYERR_MASK BIT(0)
1346 #define RBEIR 0x0008
1347 #define AGENT_OFFLINE_ERR_MASK BIT(30)
1348 #define UNIMPL_RBPAGE_ERR_MASK BIT(29)
1349 #define WORD_ALIGNED_ERR_MASK BIT(28)
1350 #define PAGE_ACCESS_ERR_MASK BIT(27)
1351 #define WRITE_ACCESS_MASK BIT(26)
1352
1353 static const char * const soc_mem_err_v1[] = {
1354 "10GbE0",
1355 "10GbE1",
1356 "Security",
1357 "SATA45",
1358 "SATA23/ETH23",
1359 "SATA01/ETH01",
1360 "USB1",
1361 "USB0",
1362 "QML",
1363 "QM0",
1364 "QM1 (XGbE01)",
1365 "PCIE4",
1366 "PCIE3",
1367 "PCIE2",
1368 "PCIE1",
1369 "PCIE0",
1370 "CTX Manager",
1371 "OCM",
1372 "1GbE",
1373 "CLE",
1374 "AHBC",
1375 "PktDMA",
1376 "GFC",
1377 "MSLIM",
1378 "10GbE2",
1379 "10GbE3",
1380 "QM2 (XGbE23)",
1381 "IOB",
1382 "unknown",
1383 "unknown",
1384 "unknown",
1385 "unknown",
1386 };
1387
xgene_edac_iob_gic_report(struct edac_device_ctl_info * edac_dev)1388 static void xgene_edac_iob_gic_report(struct edac_device_ctl_info *edac_dev)
1389 {
1390 struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1391 u32 err_addr_lo;
1392 u32 err_addr_hi;
1393 u32 reg;
1394 u32 info;
1395
1396 /* GIC transaction error interrupt */
1397 reg = readl(ctx->dev_csr + XGICTRANSERRINTSTS);
1398 if (!reg)
1399 goto chk_iob_err;
1400 dev_err(edac_dev->dev, "XGIC transaction error\n");
1401 if (reg & RD_ACCESS_ERR_MASK)
1402 dev_err(edac_dev->dev, "XGIC read size error\n");
1403 if (reg & M_RD_ACCESS_ERR_MASK)
1404 dev_err(edac_dev->dev, "Multiple XGIC read size error\n");
1405 if (reg & WR_ACCESS_ERR_MASK)
1406 dev_err(edac_dev->dev, "XGIC write size error\n");
1407 if (reg & M_WR_ACCESS_ERR_MASK)
1408 dev_err(edac_dev->dev, "Multiple XGIC write size error\n");
1409 info = readl(ctx->dev_csr + XGICTRANSERRREQINFO);
1410 dev_err(edac_dev->dev, "XGIC %s access @ 0x%08X (0x%08X)\n",
1411 str_read_write(info & REQTYPE_MASK), ERRADDR_RD(info),
1412 info);
1413 writel(reg, ctx->dev_csr + XGICTRANSERRINTSTS);
1414
1415 chk_iob_err:
1416 /* IOB memory error */
1417 reg = readl(ctx->dev_csr + GLBL_ERR_STS);
1418 if (!reg)
1419 return;
1420 if (reg & SEC_ERR_MASK) {
1421 err_addr_lo = readl(ctx->dev_csr + GLBL_SEC_ERRL);
1422 err_addr_hi = readl(ctx->dev_csr + GLBL_SEC_ERRH);
1423 dev_err(edac_dev->dev,
1424 "IOB single-bit correctable memory at 0x%08X.%08X error\n",
1425 err_addr_lo, err_addr_hi);
1426 writel(err_addr_lo, ctx->dev_csr + GLBL_SEC_ERRL);
1427 writel(err_addr_hi, ctx->dev_csr + GLBL_SEC_ERRH);
1428 }
1429 if (reg & MSEC_ERR_MASK) {
1430 err_addr_lo = readl(ctx->dev_csr + GLBL_MSEC_ERRL);
1431 err_addr_hi = readl(ctx->dev_csr + GLBL_MSEC_ERRH);
1432 dev_err(edac_dev->dev,
1433 "IOB multiple single-bit correctable memory at 0x%08X.%08X error\n",
1434 err_addr_lo, err_addr_hi);
1435 writel(err_addr_lo, ctx->dev_csr + GLBL_MSEC_ERRL);
1436 writel(err_addr_hi, ctx->dev_csr + GLBL_MSEC_ERRH);
1437 }
1438 if (reg & (SEC_ERR_MASK | MSEC_ERR_MASK))
1439 edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
1440
1441 if (reg & DED_ERR_MASK) {
1442 err_addr_lo = readl(ctx->dev_csr + GLBL_DED_ERRL);
1443 err_addr_hi = readl(ctx->dev_csr + GLBL_DED_ERRH);
1444 dev_err(edac_dev->dev,
1445 "IOB double-bit uncorrectable memory at 0x%08X.%08X error\n",
1446 err_addr_lo, err_addr_hi);
1447 writel(err_addr_lo, ctx->dev_csr + GLBL_DED_ERRL);
1448 writel(err_addr_hi, ctx->dev_csr + GLBL_DED_ERRH);
1449 }
1450 if (reg & MDED_ERR_MASK) {
1451 err_addr_lo = readl(ctx->dev_csr + GLBL_MDED_ERRL);
1452 err_addr_hi = readl(ctx->dev_csr + GLBL_MDED_ERRH);
1453 dev_err(edac_dev->dev,
1454 "Multiple IOB double-bit uncorrectable memory at 0x%08X.%08X error\n",
1455 err_addr_lo, err_addr_hi);
1456 writel(err_addr_lo, ctx->dev_csr + GLBL_MDED_ERRL);
1457 writel(err_addr_hi, ctx->dev_csr + GLBL_MDED_ERRH);
1458 }
1459 if (reg & (DED_ERR_MASK | MDED_ERR_MASK))
1460 edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
1461 }
1462
xgene_edac_rb_report(struct edac_device_ctl_info * edac_dev)1463 static void xgene_edac_rb_report(struct edac_device_ctl_info *edac_dev)
1464 {
1465 struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1466 u32 err_addr_lo;
1467 u32 err_addr_hi;
1468 u32 reg;
1469
1470 /* If the register bus resource isn't available, just skip it */
1471 if (!ctx->edac->rb_map)
1472 goto rb_skip;
1473
1474 /*
1475 * Check RB access errors
1476 * 1. Out of range
1477 * 2. Un-implemented page
1478 * 3. Un-aligned access
1479 * 4. Offline slave IP
1480 */
1481 if (regmap_read(ctx->edac->rb_map, RBCSR, ®))
1482 return;
1483 if (reg & STICKYERR_MASK) {
1484 bool write;
1485
1486 dev_err(edac_dev->dev, "IOB bus access error(s)\n");
1487 if (regmap_read(ctx->edac->rb_map, RBEIR, ®))
1488 return;
1489 write = reg & WRITE_ACCESS_MASK ? 1 : 0;
1490 if (reg & AGENT_OFFLINE_ERR_MASK)
1491 dev_err(edac_dev->dev,
1492 "IOB bus %s access to offline agent error\n",
1493 str_write_read(write));
1494 if (reg & UNIMPL_RBPAGE_ERR_MASK)
1495 dev_err(edac_dev->dev,
1496 "IOB bus %s access to unimplemented page error\n",
1497 str_write_read(write));
1498 if (reg & WORD_ALIGNED_ERR_MASK)
1499 dev_err(edac_dev->dev,
1500 "IOB bus %s word aligned access error\n",
1501 str_write_read(write));
1502 if (reg & PAGE_ACCESS_ERR_MASK)
1503 dev_err(edac_dev->dev,
1504 "IOB bus %s to page out of range access error\n",
1505 str_write_read(write));
1506 if (regmap_write(ctx->edac->rb_map, RBEIR, 0))
1507 return;
1508 if (regmap_write(ctx->edac->rb_map, RBCSR, 0))
1509 return;
1510 }
1511 rb_skip:
1512
1513 /* IOB Bridge agent transaction error interrupt */
1514 reg = readl(ctx->dev_csr + IOBBATRANSERRINTSTS);
1515 if (!reg)
1516 return;
1517
1518 dev_err(edac_dev->dev, "IOB bridge agent (BA) transaction error\n");
1519 if (reg & WRERR_RESP_MASK)
1520 dev_err(edac_dev->dev, "IOB BA write response error\n");
1521 if (reg & M_WRERR_RESP_MASK)
1522 dev_err(edac_dev->dev,
1523 "Multiple IOB BA write response error\n");
1524 if (reg & XGIC_POISONED_REQ_MASK)
1525 dev_err(edac_dev->dev, "IOB BA XGIC poisoned write error\n");
1526 if (reg & M_XGIC_POISONED_REQ_MASK)
1527 dev_err(edac_dev->dev,
1528 "Multiple IOB BA XGIC poisoned write error\n");
1529 if (reg & RBM_POISONED_REQ_MASK)
1530 dev_err(edac_dev->dev, "IOB BA RBM poisoned write error\n");
1531 if (reg & M_RBM_POISONED_REQ_MASK)
1532 dev_err(edac_dev->dev,
1533 "Multiple IOB BA RBM poisoned write error\n");
1534 if (reg & WDATA_CORRUPT_MASK)
1535 dev_err(edac_dev->dev, "IOB BA write error\n");
1536 if (reg & M_WDATA_CORRUPT_MASK)
1537 dev_err(edac_dev->dev, "Multiple IOB BA write error\n");
1538 if (reg & TRANS_CORRUPT_MASK)
1539 dev_err(edac_dev->dev, "IOB BA transaction error\n");
1540 if (reg & M_TRANS_CORRUPT_MASK)
1541 dev_err(edac_dev->dev, "Multiple IOB BA transaction error\n");
1542 if (reg & RIDRAM_CORRUPT_MASK)
1543 dev_err(edac_dev->dev,
1544 "IOB BA RDIDRAM read transaction ID error\n");
1545 if (reg & M_RIDRAM_CORRUPT_MASK)
1546 dev_err(edac_dev->dev,
1547 "Multiple IOB BA RDIDRAM read transaction ID error\n");
1548 if (reg & WIDRAM_CORRUPT_MASK)
1549 dev_err(edac_dev->dev,
1550 "IOB BA RDIDRAM write transaction ID error\n");
1551 if (reg & M_WIDRAM_CORRUPT_MASK)
1552 dev_err(edac_dev->dev,
1553 "Multiple IOB BA RDIDRAM write transaction ID error\n");
1554 if (reg & ILLEGAL_ACCESS_MASK)
1555 dev_err(edac_dev->dev,
1556 "IOB BA XGIC/RB illegal access error\n");
1557 if (reg & M_ILLEGAL_ACCESS_MASK)
1558 dev_err(edac_dev->dev,
1559 "Multiple IOB BA XGIC/RB illegal access error\n");
1560
1561 err_addr_lo = readl(ctx->dev_csr + IOBBATRANSERRREQINFOL);
1562 err_addr_hi = readl(ctx->dev_csr + IOBBATRANSERRREQINFOH);
1563 dev_err(edac_dev->dev, "IOB BA %s access at 0x%02X.%08X (0x%08X)\n",
1564 str_read_write(REQTYPE_F2_RD(err_addr_hi)),
1565 ERRADDRH_F2_RD(err_addr_hi), err_addr_lo, err_addr_hi);
1566 if (reg & WRERR_RESP_MASK)
1567 dev_err(edac_dev->dev, "IOB BA requestor ID 0x%08X\n",
1568 readl(ctx->dev_csr + IOBBATRANSERRCSWREQID));
1569 writel(reg, ctx->dev_csr + IOBBATRANSERRINTSTS);
1570 }
1571
xgene_edac_pa_report(struct edac_device_ctl_info * edac_dev)1572 static void xgene_edac_pa_report(struct edac_device_ctl_info *edac_dev)
1573 {
1574 struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1575 u32 err_addr_lo;
1576 u32 err_addr_hi;
1577 u32 reg;
1578
1579 /* IOB Processing agent transaction error interrupt */
1580 reg = readl(ctx->dev_csr + IOBPATRANSERRINTSTS);
1581 if (!reg)
1582 goto chk_iob_axi0;
1583 dev_err(edac_dev->dev, "IOB processing agent (PA) transaction error\n");
1584 if (reg & IOBPA_RDATA_CORRUPT_MASK)
1585 dev_err(edac_dev->dev, "IOB PA read data RAM error\n");
1586 if (reg & IOBPA_M_RDATA_CORRUPT_MASK)
1587 dev_err(edac_dev->dev,
1588 "Multiple IOB PA read data RAM error\n");
1589 if (reg & IOBPA_WDATA_CORRUPT_MASK)
1590 dev_err(edac_dev->dev, "IOB PA write data RAM error\n");
1591 if (reg & IOBPA_M_WDATA_CORRUPT_MASK)
1592 dev_err(edac_dev->dev,
1593 "Multiple IOB PA write data RAM error\n");
1594 if (reg & IOBPA_TRANS_CORRUPT_MASK)
1595 dev_err(edac_dev->dev, "IOB PA transaction error\n");
1596 if (reg & IOBPA_M_TRANS_CORRUPT_MASK)
1597 dev_err(edac_dev->dev, "Multiple IOB PA transaction error\n");
1598 if (reg & IOBPA_REQIDRAM_CORRUPT_MASK)
1599 dev_err(edac_dev->dev, "IOB PA transaction ID RAM error\n");
1600 if (reg & IOBPA_M_REQIDRAM_CORRUPT_MASK)
1601 dev_err(edac_dev->dev,
1602 "Multiple IOB PA transaction ID RAM error\n");
1603 writel(reg, ctx->dev_csr + IOBPATRANSERRINTSTS);
1604
1605 chk_iob_axi0:
1606 /* IOB AXI0 Error */
1607 reg = readl(ctx->dev_csr + IOBAXIS0TRANSERRINTSTS);
1608 if (!reg)
1609 goto chk_iob_axi1;
1610 err_addr_lo = readl(ctx->dev_csr + IOBAXIS0TRANSERRREQINFOL);
1611 err_addr_hi = readl(ctx->dev_csr + IOBAXIS0TRANSERRREQINFOH);
1612 dev_err(edac_dev->dev,
1613 "%sAXI slave 0 illegal %s access @ 0x%02X.%08X (0x%08X)\n",
1614 reg & IOBAXIS0_M_ILLEGAL_ACCESS_MASK ? "Multiple " : "",
1615 str_read_write(REQTYPE_RD(err_addr_hi)),
1616 ERRADDRH_RD(err_addr_hi), err_addr_lo, err_addr_hi);
1617 writel(reg, ctx->dev_csr + IOBAXIS0TRANSERRINTSTS);
1618
1619 chk_iob_axi1:
1620 /* IOB AXI1 Error */
1621 reg = readl(ctx->dev_csr + IOBAXIS1TRANSERRINTSTS);
1622 if (!reg)
1623 return;
1624 err_addr_lo = readl(ctx->dev_csr + IOBAXIS1TRANSERRREQINFOL);
1625 err_addr_hi = readl(ctx->dev_csr + IOBAXIS1TRANSERRREQINFOH);
1626 dev_err(edac_dev->dev,
1627 "%sAXI slave 1 illegal %s access @ 0x%02X.%08X (0x%08X)\n",
1628 reg & IOBAXIS0_M_ILLEGAL_ACCESS_MASK ? "Multiple " : "",
1629 str_read_write(REQTYPE_RD(err_addr_hi)),
1630 ERRADDRH_RD(err_addr_hi), err_addr_lo, err_addr_hi);
1631 writel(reg, ctx->dev_csr + IOBAXIS1TRANSERRINTSTS);
1632 }
1633
xgene_edac_soc_check(struct edac_device_ctl_info * edac_dev)1634 static void xgene_edac_soc_check(struct edac_device_ctl_info *edac_dev)
1635 {
1636 struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1637 const char * const *soc_mem_err = NULL;
1638 u32 pcp_hp_stat;
1639 u32 pcp_lp_stat;
1640 u32 reg;
1641 int i;
1642
1643 xgene_edac_pcp_rd(ctx->edac, PCPHPERRINTSTS, &pcp_hp_stat);
1644 xgene_edac_pcp_rd(ctx->edac, PCPLPERRINTSTS, &pcp_lp_stat);
1645 xgene_edac_pcp_rd(ctx->edac, MEMERRINTSTS, ®);
1646 if (!((pcp_hp_stat & (IOB_PA_ERR_MASK | IOB_BA_ERR_MASK |
1647 IOB_XGIC_ERR_MASK | IOB_RB_ERR_MASK)) ||
1648 (pcp_lp_stat & CSW_SWITCH_TRACE_ERR_MASK) || reg))
1649 return;
1650
1651 if (pcp_hp_stat & IOB_XGIC_ERR_MASK)
1652 xgene_edac_iob_gic_report(edac_dev);
1653
1654 if (pcp_hp_stat & (IOB_RB_ERR_MASK | IOB_BA_ERR_MASK))
1655 xgene_edac_rb_report(edac_dev);
1656
1657 if (pcp_hp_stat & IOB_PA_ERR_MASK)
1658 xgene_edac_pa_report(edac_dev);
1659
1660 if (pcp_lp_stat & CSW_SWITCH_TRACE_ERR_MASK) {
1661 dev_info(edac_dev->dev,
1662 "CSW switch trace correctable memory parity error\n");
1663 edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
1664 }
1665
1666 if (!reg)
1667 return;
1668 if (ctx->version == 1)
1669 soc_mem_err = soc_mem_err_v1;
1670 if (!soc_mem_err) {
1671 dev_err(edac_dev->dev, "SoC memory parity error 0x%08X\n",
1672 reg);
1673 edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
1674 return;
1675 }
1676 for (i = 0; i < 31; i++) {
1677 if (reg & (1 << i)) {
1678 dev_err(edac_dev->dev, "%s memory parity error\n",
1679 soc_mem_err[i]);
1680 edac_device_handle_ue(edac_dev, 0, 0,
1681 edac_dev->ctl_name);
1682 }
1683 }
1684 }
1685
xgene_edac_soc_hw_init(struct edac_device_ctl_info * edac_dev,bool enable)1686 static void xgene_edac_soc_hw_init(struct edac_device_ctl_info *edac_dev,
1687 bool enable)
1688 {
1689 struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1690
1691 /* Enable SoC IP error interrupt */
1692 if (edac_dev->op_state == OP_RUNNING_INTERRUPT) {
1693 if (enable) {
1694 xgene_edac_pcp_clrbits(ctx->edac, PCPHPERRINTMSK,
1695 IOB_PA_ERR_MASK |
1696 IOB_BA_ERR_MASK |
1697 IOB_XGIC_ERR_MASK |
1698 IOB_RB_ERR_MASK);
1699 xgene_edac_pcp_clrbits(ctx->edac, PCPLPERRINTMSK,
1700 CSW_SWITCH_TRACE_ERR_MASK);
1701 } else {
1702 xgene_edac_pcp_setbits(ctx->edac, PCPHPERRINTMSK,
1703 IOB_PA_ERR_MASK |
1704 IOB_BA_ERR_MASK |
1705 IOB_XGIC_ERR_MASK |
1706 IOB_RB_ERR_MASK);
1707 xgene_edac_pcp_setbits(ctx->edac, PCPLPERRINTMSK,
1708 CSW_SWITCH_TRACE_ERR_MASK);
1709 }
1710
1711 writel(enable ? 0x0 : 0xFFFFFFFF,
1712 ctx->dev_csr + IOBAXIS0TRANSERRINTMSK);
1713 writel(enable ? 0x0 : 0xFFFFFFFF,
1714 ctx->dev_csr + IOBAXIS1TRANSERRINTMSK);
1715 writel(enable ? 0x0 : 0xFFFFFFFF,
1716 ctx->dev_csr + XGICTRANSERRINTMSK);
1717
1718 xgene_edac_pcp_setbits(ctx->edac, MEMERRINTMSK,
1719 enable ? 0x0 : 0xFFFFFFFF);
1720 }
1721 }
1722
xgene_edac_soc_add(struct xgene_edac * edac,struct device_node * np,int version)1723 static int xgene_edac_soc_add(struct xgene_edac *edac, struct device_node *np,
1724 int version)
1725 {
1726 struct edac_device_ctl_info *edac_dev;
1727 struct xgene_edac_dev_ctx *ctx;
1728 void __iomem *dev_csr;
1729 struct resource res;
1730 int edac_idx;
1731 int rc;
1732
1733 if (!devres_open_group(edac->dev, xgene_edac_soc_add, GFP_KERNEL))
1734 return -ENOMEM;
1735
1736 rc = of_address_to_resource(np, 0, &res);
1737 if (rc < 0) {
1738 dev_err(edac->dev, "no SoC resource address\n");
1739 goto err_release_group;
1740 }
1741 dev_csr = devm_ioremap_resource(edac->dev, &res);
1742 if (IS_ERR(dev_csr)) {
1743 dev_err(edac->dev,
1744 "devm_ioremap_resource failed for soc resource address\n");
1745 rc = PTR_ERR(dev_csr);
1746 goto err_release_group;
1747 }
1748
1749 edac_idx = edac_device_alloc_index();
1750 edac_dev = edac_device_alloc_ctl_info(sizeof(*ctx),
1751 "SOC", 1, "SOC", 1, 2, edac_idx);
1752 if (!edac_dev) {
1753 rc = -ENOMEM;
1754 goto err_release_group;
1755 }
1756
1757 ctx = edac_dev->pvt_info;
1758 ctx->dev_csr = dev_csr;
1759 ctx->name = "xgene_soc_err";
1760 ctx->edac_idx = edac_idx;
1761 ctx->edac = edac;
1762 ctx->edac_dev = edac_dev;
1763 ctx->ddev = *edac->dev;
1764 ctx->version = version;
1765 edac_dev->dev = &ctx->ddev;
1766 edac_dev->ctl_name = ctx->name;
1767 edac_dev->dev_name = ctx->name;
1768 edac_dev->mod_name = EDAC_MOD_STR;
1769
1770 if (edac_op_state == EDAC_OPSTATE_POLL)
1771 edac_dev->edac_check = xgene_edac_soc_check;
1772
1773 rc = edac_device_add_device(edac_dev);
1774 if (rc > 0) {
1775 dev_err(edac->dev, "failed edac_device_add_device()\n");
1776 rc = -ENOMEM;
1777 goto err_ctl_free;
1778 }
1779
1780 if (edac_op_state == EDAC_OPSTATE_INT)
1781 edac_dev->op_state = OP_RUNNING_INTERRUPT;
1782
1783 list_add(&ctx->next, &edac->socs);
1784
1785 xgene_edac_soc_hw_init(edac_dev, 1);
1786
1787 devres_remove_group(edac->dev, xgene_edac_soc_add);
1788
1789 dev_info(edac->dev, "X-Gene EDAC SoC registered\n");
1790
1791 return 0;
1792
1793 err_ctl_free:
1794 edac_device_free_ctl_info(edac_dev);
1795 err_release_group:
1796 devres_release_group(edac->dev, xgene_edac_soc_add);
1797 return rc;
1798 }
1799
xgene_edac_soc_remove(struct xgene_edac_dev_ctx * soc)1800 static int xgene_edac_soc_remove(struct xgene_edac_dev_ctx *soc)
1801 {
1802 struct edac_device_ctl_info *edac_dev = soc->edac_dev;
1803
1804 xgene_edac_soc_hw_init(edac_dev, 0);
1805 edac_device_del_device(soc->edac->dev);
1806 edac_device_free_ctl_info(edac_dev);
1807 return 0;
1808 }
1809
xgene_edac_isr(int irq,void * dev_id)1810 static irqreturn_t xgene_edac_isr(int irq, void *dev_id)
1811 {
1812 struct xgene_edac *ctx = dev_id;
1813 struct xgene_edac_pmd_ctx *pmd;
1814 struct xgene_edac_dev_ctx *node;
1815 unsigned int pcp_hp_stat;
1816 unsigned int pcp_lp_stat;
1817
1818 xgene_edac_pcp_rd(ctx, PCPHPERRINTSTS, &pcp_hp_stat);
1819 xgene_edac_pcp_rd(ctx, PCPLPERRINTSTS, &pcp_lp_stat);
1820 if ((MCU_UNCORR_ERR_MASK & pcp_hp_stat) ||
1821 (MCU_CTL_ERR_MASK & pcp_hp_stat) ||
1822 (MCU_CORR_ERR_MASK & pcp_lp_stat)) {
1823 struct xgene_edac_mc_ctx *mcu;
1824
1825 list_for_each_entry(mcu, &ctx->mcus, next)
1826 xgene_edac_mc_check(mcu->mci);
1827 }
1828
1829 list_for_each_entry(pmd, &ctx->pmds, next) {
1830 if ((PMD0_MERR_MASK << pmd->pmd) & pcp_hp_stat)
1831 xgene_edac_pmd_check(pmd->edac_dev);
1832 }
1833
1834 list_for_each_entry(node, &ctx->l3s, next)
1835 xgene_edac_l3_check(node->edac_dev);
1836
1837 list_for_each_entry(node, &ctx->socs, next)
1838 xgene_edac_soc_check(node->edac_dev);
1839
1840 return IRQ_HANDLED;
1841 }
1842
xgene_edac_probe(struct platform_device * pdev)1843 static int xgene_edac_probe(struct platform_device *pdev)
1844 {
1845 struct xgene_edac *edac;
1846 struct device_node *child;
1847 struct resource *res;
1848 int rc;
1849
1850 edac = devm_kzalloc(&pdev->dev, sizeof(*edac), GFP_KERNEL);
1851 if (!edac)
1852 return -ENOMEM;
1853
1854 edac->dev = &pdev->dev;
1855 platform_set_drvdata(pdev, edac);
1856 INIT_LIST_HEAD(&edac->mcus);
1857 INIT_LIST_HEAD(&edac->pmds);
1858 INIT_LIST_HEAD(&edac->l3s);
1859 INIT_LIST_HEAD(&edac->socs);
1860 spin_lock_init(&edac->lock);
1861 mutex_init(&edac->mc_lock);
1862
1863 edac->csw_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
1864 "regmap-csw");
1865 if (IS_ERR(edac->csw_map)) {
1866 dev_err(edac->dev, "unable to get syscon regmap csw\n");
1867 rc = PTR_ERR(edac->csw_map);
1868 goto out_err;
1869 }
1870
1871 edac->mcba_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
1872 "regmap-mcba");
1873 if (IS_ERR(edac->mcba_map)) {
1874 dev_err(edac->dev, "unable to get syscon regmap mcba\n");
1875 rc = PTR_ERR(edac->mcba_map);
1876 goto out_err;
1877 }
1878
1879 edac->mcbb_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
1880 "regmap-mcbb");
1881 if (IS_ERR(edac->mcbb_map)) {
1882 dev_err(edac->dev, "unable to get syscon regmap mcbb\n");
1883 rc = PTR_ERR(edac->mcbb_map);
1884 goto out_err;
1885 }
1886 edac->efuse_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
1887 "regmap-efuse");
1888 if (IS_ERR(edac->efuse_map)) {
1889 dev_err(edac->dev, "unable to get syscon regmap efuse\n");
1890 rc = PTR_ERR(edac->efuse_map);
1891 goto out_err;
1892 }
1893
1894 /*
1895 * NOTE: The register bus resource is optional for compatibility
1896 * reason.
1897 */
1898 edac->rb_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
1899 "regmap-rb");
1900 if (IS_ERR(edac->rb_map)) {
1901 dev_warn(edac->dev, "missing syscon regmap rb\n");
1902 edac->rb_map = NULL;
1903 }
1904
1905 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
1906 edac->pcp_csr = devm_ioremap_resource(&pdev->dev, res);
1907 if (IS_ERR(edac->pcp_csr)) {
1908 dev_err(&pdev->dev, "no PCP resource address\n");
1909 rc = PTR_ERR(edac->pcp_csr);
1910 goto out_err;
1911 }
1912
1913 if (edac_op_state == EDAC_OPSTATE_INT) {
1914 int irq;
1915 int i;
1916
1917 for (i = 0; i < 3; i++) {
1918 irq = platform_get_irq_optional(pdev, i);
1919 if (irq < 0) {
1920 dev_err(&pdev->dev, "No IRQ resource\n");
1921 rc = irq;
1922 goto out_err;
1923 }
1924 rc = devm_request_irq(&pdev->dev, irq,
1925 xgene_edac_isr, IRQF_SHARED,
1926 dev_name(&pdev->dev), edac);
1927 if (rc) {
1928 dev_err(&pdev->dev,
1929 "Could not request IRQ %d\n", irq);
1930 goto out_err;
1931 }
1932 }
1933 }
1934
1935 edac->dfs = edac_debugfs_create_dir(pdev->dev.kobj.name);
1936
1937 for_each_child_of_node(pdev->dev.of_node, child) {
1938 if (!of_device_is_available(child))
1939 continue;
1940 if (of_device_is_compatible(child, "apm,xgene-edac-mc"))
1941 xgene_edac_mc_add(edac, child);
1942 if (of_device_is_compatible(child, "apm,xgene-edac-pmd"))
1943 xgene_edac_pmd_add(edac, child, 1);
1944 if (of_device_is_compatible(child, "apm,xgene-edac-pmd-v2"))
1945 xgene_edac_pmd_add(edac, child, 2);
1946 if (of_device_is_compatible(child, "apm,xgene-edac-l3"))
1947 xgene_edac_l3_add(edac, child, 1);
1948 if (of_device_is_compatible(child, "apm,xgene-edac-l3-v2"))
1949 xgene_edac_l3_add(edac, child, 2);
1950 if (of_device_is_compatible(child, "apm,xgene-edac-soc"))
1951 xgene_edac_soc_add(edac, child, 0);
1952 if (of_device_is_compatible(child, "apm,xgene-edac-soc-v1"))
1953 xgene_edac_soc_add(edac, child, 1);
1954 }
1955
1956 return 0;
1957
1958 out_err:
1959 return rc;
1960 }
1961
xgene_edac_remove(struct platform_device * pdev)1962 static void xgene_edac_remove(struct platform_device *pdev)
1963 {
1964 struct xgene_edac *edac = dev_get_drvdata(&pdev->dev);
1965 struct xgene_edac_mc_ctx *mcu;
1966 struct xgene_edac_mc_ctx *temp_mcu;
1967 struct xgene_edac_pmd_ctx *pmd;
1968 struct xgene_edac_pmd_ctx *temp_pmd;
1969 struct xgene_edac_dev_ctx *node;
1970 struct xgene_edac_dev_ctx *temp_node;
1971
1972 list_for_each_entry_safe(mcu, temp_mcu, &edac->mcus, next)
1973 xgene_edac_mc_remove(mcu);
1974
1975 list_for_each_entry_safe(pmd, temp_pmd, &edac->pmds, next)
1976 xgene_edac_pmd_remove(pmd);
1977
1978 list_for_each_entry_safe(node, temp_node, &edac->l3s, next)
1979 xgene_edac_l3_remove(node);
1980
1981 list_for_each_entry_safe(node, temp_node, &edac->socs, next)
1982 xgene_edac_soc_remove(node);
1983 }
1984
1985 static const struct of_device_id xgene_edac_of_match[] = {
1986 { .compatible = "apm,xgene-edac" },
1987 {},
1988 };
1989 MODULE_DEVICE_TABLE(of, xgene_edac_of_match);
1990
1991 static struct platform_driver xgene_edac_driver = {
1992 .probe = xgene_edac_probe,
1993 .remove = xgene_edac_remove,
1994 .driver = {
1995 .name = "xgene-edac",
1996 .of_match_table = xgene_edac_of_match,
1997 },
1998 };
1999
xgene_edac_init(void)2000 static int __init xgene_edac_init(void)
2001 {
2002 int rc;
2003
2004 if (ghes_get_devices())
2005 return -EBUSY;
2006
2007 /* Make sure error reporting method is sane */
2008 switch (edac_op_state) {
2009 case EDAC_OPSTATE_POLL:
2010 case EDAC_OPSTATE_INT:
2011 break;
2012 default:
2013 edac_op_state = EDAC_OPSTATE_INT;
2014 break;
2015 }
2016
2017 rc = platform_driver_register(&xgene_edac_driver);
2018 if (rc) {
2019 edac_printk(KERN_ERR, EDAC_MOD_STR,
2020 "EDAC fails to register\n");
2021 goto reg_failed;
2022 }
2023
2024 return 0;
2025
2026 reg_failed:
2027 return rc;
2028 }
2029 module_init(xgene_edac_init);
2030
xgene_edac_exit(void)2031 static void __exit xgene_edac_exit(void)
2032 {
2033 platform_driver_unregister(&xgene_edac_driver);
2034 }
2035 module_exit(xgene_edac_exit);
2036
2037 MODULE_LICENSE("GPL");
2038 MODULE_AUTHOR("Feng Kan <fkan@apm.com>");
2039 MODULE_DESCRIPTION("APM X-Gene EDAC driver");
2040 module_param(edac_op_state, int, 0444);
2041 MODULE_PARM_DESC(edac_op_state,
2042 "EDAC error reporting state: 0=Poll, 2=Interrupt");
2043