1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Bluefield-specific EDAC driver. 4 * 5 * Copyright (c) 2019 Mellanox Technologies. 6 */ 7 8 #include <linux/acpi.h> 9 #include <linux/arm-smccc.h> 10 #include <linux/bitfield.h> 11 #include <linux/edac.h> 12 #include <linux/io.h> 13 #include <linux/module.h> 14 #include <linux/platform_device.h> 15 16 #include "edac_module.h" 17 18 #define DRIVER_NAME "bluefield-edac" 19 20 /* 21 * Mellanox BlueField EMI (External Memory Interface) register definitions. 22 */ 23 24 #define MLXBF_ECC_CNT 0x340 25 #define MLXBF_ECC_CNT__SERR_CNT GENMASK(15, 0) 26 #define MLXBF_ECC_CNT__DERR_CNT GENMASK(31, 16) 27 28 #define MLXBF_ECC_ERR 0x348 29 #define MLXBF_ECC_ERR__SECC BIT(0) 30 #define MLXBF_ECC_ERR__DECC BIT(16) 31 32 #define MLXBF_ECC_LATCH_SEL 0x354 33 #define MLXBF_ECC_LATCH_SEL__START BIT(24) 34 35 #define MLXBF_ERR_ADDR_0 0x358 36 37 #define MLXBF_ERR_ADDR_1 0x37c 38 39 #define MLXBF_SYNDROM 0x35c 40 #define MLXBF_SYNDROM__DERR BIT(0) 41 #define MLXBF_SYNDROM__SERR BIT(1) 42 #define MLXBF_SYNDROM__SYN GENMASK(25, 16) 43 44 #define MLXBF_ADD_INFO 0x364 45 #define MLXBF_ADD_INFO__ERR_PRANK GENMASK(9, 8) 46 47 #define MLXBF_EDAC_MAX_DIMM_PER_MC 2 48 #define MLXBF_EDAC_ERROR_GRAIN 8 49 50 #define MLXBF_WRITE_REG_32 (0x82000009) 51 #define MLXBF_READ_REG_32 (0x8200000A) 52 #define MLXBF_SIP_SVC_VERSION (0x8200ff03) 53 54 #define MLXBF_SMCCC_ACCESS_VIOLATION (-4) 55 56 #define MLXBF_SVC_REQ_MAJOR 0 57 #define MLXBF_SVC_REQ_MINOR 3 58 59 /* 60 * Request MLXBF_SIP_GET_DIMM_INFO 61 * 62 * Retrieve information about DIMM on a certain slot. 63 * 64 * Call register usage: 65 * a0: MLXBF_SIP_GET_DIMM_INFO 66 * a1: (Memory controller index) << 16 | (Dimm index in memory controller) 67 * a2-7: not used. 68 * 69 * Return status: 70 * a0: MLXBF_DIMM_INFO defined below describing the DIMM. 71 * a1-3: not used. 72 */ 73 #define MLXBF_SIP_GET_DIMM_INFO 0x82000008 74 75 /* Format for the SMC response about the memory information */ 76 #define MLXBF_DIMM_INFO__SIZE_GB GENMASK_ULL(15, 0) 77 #define MLXBF_DIMM_INFO__IS_RDIMM BIT(16) 78 #define MLXBF_DIMM_INFO__IS_LRDIMM BIT(17) 79 #define MLXBF_DIMM_INFO__IS_NVDIMM BIT(18) 80 #define MLXBF_DIMM_INFO__RANKS GENMASK_ULL(23, 21) 81 #define MLXBF_DIMM_INFO__PACKAGE_X GENMASK_ULL(31, 24) 82 83 struct bluefield_edac_priv { 84 /* pointer to device structure */ 85 struct device *dev; 86 int dimm_ranks[MLXBF_EDAC_MAX_DIMM_PER_MC]; 87 void __iomem *emi_base; 88 int dimm_per_mc; 89 /* access to secure regs supported */ 90 bool svc_sreg_support; 91 /* SMC table# for secure regs access */ 92 u32 sreg_tbl; 93 }; 94 95 static u64 smc_call1(u64 smc_op, u64 smc_arg) 96 { 97 struct arm_smccc_res res; 98 99 arm_smccc_smc(smc_op, smc_arg, 0, 0, 0, 0, 0, 0, &res); 100 101 return res.a0; 102 } 103 104 static int secure_readl(void __iomem *addr, u32 *result, u32 sreg_tbl) 105 { 106 struct arm_smccc_res res; 107 int status; 108 109 arm_smccc_smc(MLXBF_READ_REG_32, sreg_tbl, (uintptr_t)addr, 110 0, 0, 0, 0, 0, &res); 111 112 status = res.a0; 113 114 if (status == SMCCC_RET_NOT_SUPPORTED || 115 status == MLXBF_SMCCC_ACCESS_VIOLATION) 116 return -1; 117 118 *result = (u32)res.a1; 119 return 0; 120 } 121 122 static int secure_writel(void __iomem *addr, u32 data, u32 sreg_tbl) 123 { 124 struct arm_smccc_res res; 125 int status; 126 127 arm_smccc_smc(MLXBF_WRITE_REG_32, sreg_tbl, data, (uintptr_t)addr, 128 0, 0, 0, 0, &res); 129 130 status = res.a0; 131 132 if (status == SMCCC_RET_NOT_SUPPORTED || 133 status == MLXBF_SMCCC_ACCESS_VIOLATION) 134 return -1; 135 else 136 return 0; 137 } 138 139 static int bluefield_edac_readl(struct bluefield_edac_priv *priv, u32 offset, u32 *result) 140 { 141 void __iomem *addr; 142 int err = 0; 143 144 addr = priv->emi_base + offset; 145 146 if (priv->svc_sreg_support) 147 err = secure_readl(addr, result, priv->sreg_tbl); 148 else 149 *result = readl(addr); 150 151 return err; 152 } 153 154 static int bluefield_edac_writel(struct bluefield_edac_priv *priv, u32 offset, u32 data) 155 { 156 void __iomem *addr; 157 int err = 0; 158 159 addr = priv->emi_base + offset; 160 161 if (priv->svc_sreg_support) 162 err = secure_writel(addr, data, priv->sreg_tbl); 163 else 164 writel(data, addr); 165 166 return err; 167 } 168 169 /* 170 * Gather the ECC information from the External Memory Interface registers 171 * and report it to the edac handler. 172 */ 173 static void bluefield_gather_report_ecc(struct mem_ctl_info *mci, 174 int error_cnt, 175 int is_single_ecc) 176 { 177 struct bluefield_edac_priv *priv = mci->pvt_info; 178 u32 dram_additional_info, err_prank, edea0, edea1; 179 u32 ecc_latch_select, dram_syndrom, serr, derr, syndrom; 180 enum hw_event_mc_err_type ecc_type; 181 u64 ecc_dimm_addr; 182 int ecc_dimm, err; 183 184 ecc_type = is_single_ecc ? HW_EVENT_ERR_CORRECTED : 185 HW_EVENT_ERR_UNCORRECTED; 186 187 /* 188 * Tell the External Memory Interface to populate the relevant 189 * registers with information about the last ECC error occurrence. 190 */ 191 ecc_latch_select = MLXBF_ECC_LATCH_SEL__START; 192 err = bluefield_edac_writel(priv, MLXBF_ECC_LATCH_SEL, ecc_latch_select); 193 if (err) 194 dev_err(priv->dev, "ECC latch select write failed.\n"); 195 196 /* 197 * Verify that the ECC reported info in the registers is of the 198 * same type as the one asked to report. If not, just report the 199 * error without the detailed information. 200 */ 201 err = bluefield_edac_readl(priv, MLXBF_SYNDROM, &dram_syndrom); 202 if (err) { 203 dev_err(priv->dev, "DRAM syndrom read failed.\n"); 204 return; 205 } 206 207 serr = FIELD_GET(MLXBF_SYNDROM__SERR, dram_syndrom); 208 derr = FIELD_GET(MLXBF_SYNDROM__DERR, dram_syndrom); 209 syndrom = FIELD_GET(MLXBF_SYNDROM__SYN, dram_syndrom); 210 211 if ((is_single_ecc && !serr) || (!is_single_ecc && !derr)) { 212 edac_mc_handle_error(ecc_type, mci, error_cnt, 0, 0, 0, 213 0, 0, -1, mci->ctl_name, ""); 214 return; 215 } 216 217 err = bluefield_edac_readl(priv, MLXBF_ADD_INFO, &dram_additional_info); 218 if (err) { 219 dev_err(priv->dev, "DRAM additional info read failed.\n"); 220 return; 221 } 222 223 err_prank = FIELD_GET(MLXBF_ADD_INFO__ERR_PRANK, dram_additional_info); 224 225 ecc_dimm = (err_prank >= 2 && priv->dimm_ranks[0] <= 2) ? 1 : 0; 226 227 err = bluefield_edac_readl(priv, MLXBF_ERR_ADDR_0, &edea0); 228 if (err) { 229 dev_err(priv->dev, "Error addr 0 read failed.\n"); 230 return; 231 } 232 233 err = bluefield_edac_readl(priv, MLXBF_ERR_ADDR_1, &edea1); 234 if (err) { 235 dev_err(priv->dev, "Error addr 1 read failed.\n"); 236 return; 237 } 238 239 ecc_dimm_addr = ((u64)edea1 << 32) | edea0; 240 241 edac_mc_handle_error(ecc_type, mci, error_cnt, 242 PFN_DOWN(ecc_dimm_addr), 243 offset_in_page(ecc_dimm_addr), 244 syndrom, ecc_dimm, 0, 0, mci->ctl_name, ""); 245 } 246 247 static void bluefield_edac_check(struct mem_ctl_info *mci) 248 { 249 struct bluefield_edac_priv *priv = mci->pvt_info; 250 u32 ecc_count, single_error_count, double_error_count, ecc_error = 0; 251 int err; 252 253 /* 254 * The memory controller might not be initialized by the firmware 255 * when there isn't memory, which may lead to bad register readings. 256 */ 257 if (mci->edac_cap == EDAC_FLAG_NONE) 258 return; 259 260 err = bluefield_edac_readl(priv, MLXBF_ECC_CNT, &ecc_count); 261 if (err) { 262 dev_err(priv->dev, "ECC count read failed.\n"); 263 return; 264 } 265 266 single_error_count = FIELD_GET(MLXBF_ECC_CNT__SERR_CNT, ecc_count); 267 double_error_count = FIELD_GET(MLXBF_ECC_CNT__DERR_CNT, ecc_count); 268 269 if (single_error_count) { 270 ecc_error |= MLXBF_ECC_ERR__SECC; 271 272 bluefield_gather_report_ecc(mci, single_error_count, 1); 273 } 274 275 if (double_error_count) { 276 ecc_error |= MLXBF_ECC_ERR__DECC; 277 278 bluefield_gather_report_ecc(mci, double_error_count, 0); 279 } 280 281 /* Write to clear reported errors. */ 282 if (ecc_count) { 283 err = bluefield_edac_writel(priv, MLXBF_ECC_ERR, ecc_error); 284 if (err) 285 dev_err(priv->dev, "ECC Error write failed.\n"); 286 } 287 } 288 289 /* Initialize the DIMMs information for the given memory controller. */ 290 static void bluefield_edac_init_dimms(struct mem_ctl_info *mci) 291 { 292 struct bluefield_edac_priv *priv = mci->pvt_info; 293 u64 mem_ctrl_idx = mci->mc_idx; 294 struct dimm_info *dimm; 295 u64 smc_info, smc_arg; 296 int is_empty = 1, i; 297 298 for (i = 0; i < priv->dimm_per_mc; i++) { 299 dimm = mci->dimms[i]; 300 301 smc_arg = mem_ctrl_idx << 16 | i; 302 smc_info = smc_call1(MLXBF_SIP_GET_DIMM_INFO, smc_arg); 303 304 if (!FIELD_GET(MLXBF_DIMM_INFO__SIZE_GB, smc_info)) { 305 dimm->mtype = MEM_EMPTY; 306 continue; 307 } 308 309 is_empty = 0; 310 311 dimm->edac_mode = EDAC_SECDED; 312 313 if (FIELD_GET(MLXBF_DIMM_INFO__IS_NVDIMM, smc_info)) 314 dimm->mtype = MEM_NVDIMM; 315 else if (FIELD_GET(MLXBF_DIMM_INFO__IS_LRDIMM, smc_info)) 316 dimm->mtype = MEM_LRDDR4; 317 else if (FIELD_GET(MLXBF_DIMM_INFO__IS_RDIMM, smc_info)) 318 dimm->mtype = MEM_RDDR4; 319 else 320 dimm->mtype = MEM_DDR4; 321 322 dimm->nr_pages = 323 FIELD_GET(MLXBF_DIMM_INFO__SIZE_GB, smc_info) * 324 (SZ_1G / PAGE_SIZE); 325 dimm->grain = MLXBF_EDAC_ERROR_GRAIN; 326 327 /* Mem controller for BlueField only supports x4, x8 and x16 */ 328 switch (FIELD_GET(MLXBF_DIMM_INFO__PACKAGE_X, smc_info)) { 329 case 4: 330 dimm->dtype = DEV_X4; 331 break; 332 case 8: 333 dimm->dtype = DEV_X8; 334 break; 335 case 16: 336 dimm->dtype = DEV_X16; 337 break; 338 default: 339 dimm->dtype = DEV_UNKNOWN; 340 } 341 342 priv->dimm_ranks[i] = 343 FIELD_GET(MLXBF_DIMM_INFO__RANKS, smc_info); 344 } 345 346 if (is_empty) 347 mci->edac_cap = EDAC_FLAG_NONE; 348 else 349 mci->edac_cap = EDAC_FLAG_SECDED; 350 } 351 352 static int bluefield_edac_mc_probe(struct platform_device *pdev) 353 { 354 struct bluefield_edac_priv *priv; 355 struct device *dev = &pdev->dev; 356 struct edac_mc_layer layers[1]; 357 struct arm_smccc_res res; 358 struct mem_ctl_info *mci; 359 struct resource *emi_res; 360 unsigned int mc_idx, dimm_count; 361 int rc, ret; 362 363 /* Read the MSS (Memory SubSystem) index from ACPI table. */ 364 if (device_property_read_u32(dev, "mss_number", &mc_idx)) { 365 dev_warn(dev, "bf_edac: MSS number unknown\n"); 366 return -EINVAL; 367 } 368 369 /* Read the DIMMs per MC from ACPI table. */ 370 if (device_property_read_u32(dev, "dimm_per_mc", &dimm_count)) { 371 dev_warn(dev, "bf_edac: DIMMs per MC unknown\n"); 372 return -EINVAL; 373 } 374 375 if (dimm_count > MLXBF_EDAC_MAX_DIMM_PER_MC) { 376 dev_warn(dev, "bf_edac: DIMMs per MC not valid\n"); 377 return -EINVAL; 378 } 379 380 emi_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); 381 if (!emi_res) 382 return -EINVAL; 383 384 layers[0].type = EDAC_MC_LAYER_SLOT; 385 layers[0].size = dimm_count; 386 layers[0].is_virt_csrow = true; 387 388 mci = edac_mc_alloc(mc_idx, ARRAY_SIZE(layers), layers, sizeof(*priv)); 389 if (!mci) 390 return -ENOMEM; 391 392 priv = mci->pvt_info; 393 priv->dev = dev; 394 395 /* 396 * The "sec_reg_block" property in the ACPI table determines the method 397 * the driver uses to access the EMI registers: 398 * a) property is not present - directly access registers via readl/writel 399 * b) property is present - indirectly access registers via SMC calls 400 * (assuming required Silicon Provider service version found) 401 */ 402 if (device_property_read_u32(dev, "sec_reg_block", &priv->sreg_tbl)) { 403 priv->svc_sreg_support = false; 404 } else { 405 /* 406 * Check for minimum required Arm Silicon Provider (SiP) service 407 * version, ensuring support of required SMC function IDs. 408 */ 409 arm_smccc_smc(MLXBF_SIP_SVC_VERSION, 0, 0, 0, 0, 0, 0, 0, &res); 410 if (res.a0 == MLXBF_SVC_REQ_MAJOR && 411 res.a1 >= MLXBF_SVC_REQ_MINOR) { 412 priv->svc_sreg_support = true; 413 } else { 414 dev_err(dev, "Required SMCs are not supported.\n"); 415 ret = -EINVAL; 416 goto err; 417 } 418 } 419 420 priv->dimm_per_mc = dimm_count; 421 if (!priv->svc_sreg_support) { 422 priv->emi_base = devm_ioremap_resource(dev, emi_res); 423 if (IS_ERR(priv->emi_base)) { 424 dev_err(dev, "failed to map EMI IO resource\n"); 425 ret = PTR_ERR(priv->emi_base); 426 goto err; 427 } 428 } else { 429 priv->emi_base = (void __iomem *)emi_res->start; 430 } 431 432 mci->pdev = dev; 433 mci->mtype_cap = MEM_FLAG_DDR4 | MEM_FLAG_RDDR4 | 434 MEM_FLAG_LRDDR4 | MEM_FLAG_NVDIMM; 435 mci->edac_ctl_cap = EDAC_FLAG_SECDED; 436 437 mci->mod_name = DRIVER_NAME; 438 mci->ctl_name = "BlueField_Memory_Controller"; 439 mci->dev_name = dev_name(dev); 440 mci->edac_check = bluefield_edac_check; 441 442 /* Initialize mci with the actual populated DIMM information. */ 443 bluefield_edac_init_dimms(mci); 444 445 platform_set_drvdata(pdev, mci); 446 447 /* Register with EDAC core */ 448 rc = edac_mc_add_mc(mci); 449 if (rc) { 450 dev_err(dev, "failed to register with EDAC core\n"); 451 ret = rc; 452 goto err; 453 } 454 455 /* Only POLL mode supported so far. */ 456 edac_op_state = EDAC_OPSTATE_POLL; 457 458 return 0; 459 460 err: 461 edac_mc_free(mci); 462 463 return ret; 464 } 465 466 static void bluefield_edac_mc_remove(struct platform_device *pdev) 467 { 468 struct mem_ctl_info *mci = platform_get_drvdata(pdev); 469 470 edac_mc_del_mc(&pdev->dev); 471 edac_mc_free(mci); 472 } 473 474 static const struct acpi_device_id bluefield_mc_acpi_ids[] = { 475 {"MLNXBF08", 0}, 476 {} 477 }; 478 479 MODULE_DEVICE_TABLE(acpi, bluefield_mc_acpi_ids); 480 481 static struct platform_driver bluefield_edac_mc_driver = { 482 .driver = { 483 .name = DRIVER_NAME, 484 .acpi_match_table = bluefield_mc_acpi_ids, 485 }, 486 .probe = bluefield_edac_mc_probe, 487 .remove = bluefield_edac_mc_remove, 488 }; 489 490 module_platform_driver(bluefield_edac_mc_driver); 491 492 MODULE_DESCRIPTION("Mellanox BlueField memory edac driver"); 493 MODULE_AUTHOR("Mellanox Technologies"); 494 MODULE_LICENSE("GPL v2"); 495