1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * CXL EDAC memory feature driver. 4 * 5 * Copyright (c) 2024-2025 HiSilicon Limited. 6 * 7 * - Supports functions to configure EDAC features of the 8 * CXL memory devices. 9 * - Registers with the EDAC device subsystem driver to expose 10 * the features sysfs attributes to the user for configuring 11 * CXL memory RAS feature. 12 */ 13 14 #include <linux/cleanup.h> 15 #include <linux/edac.h> 16 #include <linux/limits.h> 17 #include <linux/unaligned.h> 18 #include <linux/xarray.h> 19 #include <cxl/features.h> 20 #include <cxl.h> 21 #include <cxlmem.h> 22 #include "core.h" 23 #include "trace.h" 24 25 #define CXL_NR_EDAC_DEV_FEATURES 7 26 27 #define CXL_SCRUB_NO_REGION -1 28 29 struct cxl_patrol_scrub_context { 30 u8 instance; 31 u16 get_feat_size; 32 u16 set_feat_size; 33 u8 get_version; 34 u8 set_version; 35 u16 effects; 36 struct cxl_memdev *cxlmd; 37 struct cxl_region *cxlr; 38 }; 39 40 /* 41 * See CXL spec rev 3.2 @8.2.10.9.11.1 Table 8-222 Device Patrol Scrub Control 42 * Feature Readable Attributes. 43 */ 44 struct cxl_scrub_rd_attrbs { 45 u8 scrub_cycle_cap; 46 __le16 scrub_cycle_hours; 47 u8 scrub_flags; 48 } __packed; 49 50 /* 51 * See CXL spec rev 3.2 @8.2.10.9.11.1 Table 8-223 Device Patrol Scrub Control 52 * Feature Writable Attributes. 53 */ 54 struct cxl_scrub_wr_attrbs { 55 u8 scrub_cycle_hours; 56 u8 scrub_flags; 57 } __packed; 58 59 #define CXL_SCRUB_CONTROL_CHANGEABLE BIT(0) 60 #define CXL_SCRUB_CONTROL_REALTIME BIT(1) 61 #define CXL_SCRUB_CONTROL_CYCLE_MASK GENMASK(7, 0) 62 #define CXL_SCRUB_CONTROL_MIN_CYCLE_MASK GENMASK(15, 8) 63 #define CXL_SCRUB_CONTROL_ENABLE BIT(0) 64 65 #define CXL_GET_SCRUB_CYCLE_CHANGEABLE(cap) \ 66 FIELD_GET(CXL_SCRUB_CONTROL_CHANGEABLE, cap) 67 #define CXL_GET_SCRUB_CYCLE(cycle) \ 68 FIELD_GET(CXL_SCRUB_CONTROL_CYCLE_MASK, cycle) 69 #define CXL_GET_SCRUB_MIN_CYCLE(cycle) \ 70 FIELD_GET(CXL_SCRUB_CONTROL_MIN_CYCLE_MASK, cycle) 71 #define CXL_GET_SCRUB_EN_STS(flags) FIELD_GET(CXL_SCRUB_CONTROL_ENABLE, flags) 72 73 #define CXL_SET_SCRUB_CYCLE(cycle) \ 74 FIELD_PREP(CXL_SCRUB_CONTROL_CYCLE_MASK, cycle) 75 #define CXL_SET_SCRUB_EN(en) FIELD_PREP(CXL_SCRUB_CONTROL_ENABLE, en) 76 77 static int cxl_mem_scrub_get_attrbs(struct cxl_mailbox *cxl_mbox, u8 *cap, 78 u16 *cycle, u8 *flags, u8 *min_cycle) 79 { 80 size_t rd_data_size = sizeof(struct cxl_scrub_rd_attrbs); 81 size_t data_size; 82 struct cxl_scrub_rd_attrbs *rd_attrbs __free(kfree) = 83 kzalloc(rd_data_size, GFP_KERNEL); 84 if (!rd_attrbs) 85 return -ENOMEM; 86 87 data_size = cxl_get_feature(cxl_mbox, &CXL_FEAT_PATROL_SCRUB_UUID, 88 CXL_GET_FEAT_SEL_CURRENT_VALUE, rd_attrbs, 89 rd_data_size, 0, NULL); 90 if (!data_size) 91 return -EIO; 92 93 *cap = rd_attrbs->scrub_cycle_cap; 94 *cycle = le16_to_cpu(rd_attrbs->scrub_cycle_hours); 95 *flags = rd_attrbs->scrub_flags; 96 if (min_cycle) 97 *min_cycle = CXL_GET_SCRUB_MIN_CYCLE(*cycle); 98 99 return 0; 100 } 101 102 static int cxl_scrub_get_attrbs(struct cxl_patrol_scrub_context *cxl_ps_ctx, 103 u8 *cap, u16 *cycle, u8 *flags, u8 *min_cycle) 104 { 105 struct cxl_mailbox *cxl_mbox; 106 u8 min_scrub_cycle = U8_MAX; 107 struct cxl_region_params *p; 108 struct cxl_memdev *cxlmd; 109 struct cxl_region *cxlr; 110 int i, ret; 111 112 if (!cxl_ps_ctx->cxlr) { 113 cxl_mbox = &cxl_ps_ctx->cxlmd->cxlds->cxl_mbox; 114 return cxl_mem_scrub_get_attrbs(cxl_mbox, cap, cycle, 115 flags, min_cycle); 116 } 117 118 struct rw_semaphore *region_lock __free(rwsem_read_release) = 119 rwsem_read_intr_acquire(&cxl_region_rwsem); 120 if (!region_lock) 121 return -EINTR; 122 123 cxlr = cxl_ps_ctx->cxlr; 124 p = &cxlr->params; 125 126 for (i = 0; i < p->nr_targets; i++) { 127 struct cxl_endpoint_decoder *cxled = p->targets[i]; 128 129 cxlmd = cxled_to_memdev(cxled); 130 cxl_mbox = &cxlmd->cxlds->cxl_mbox; 131 ret = cxl_mem_scrub_get_attrbs(cxl_mbox, cap, cycle, flags, 132 min_cycle); 133 if (ret) 134 return ret; 135 136 if (min_cycle) 137 min_scrub_cycle = min(*min_cycle, min_scrub_cycle); 138 } 139 140 if (min_cycle) 141 *min_cycle = min_scrub_cycle; 142 143 return 0; 144 } 145 146 static int cxl_scrub_set_attrbs_region(struct device *dev, 147 struct cxl_patrol_scrub_context *cxl_ps_ctx, 148 u8 cycle, u8 flags) 149 { 150 struct cxl_scrub_wr_attrbs wr_attrbs; 151 struct cxl_mailbox *cxl_mbox; 152 struct cxl_region_params *p; 153 struct cxl_memdev *cxlmd; 154 struct cxl_region *cxlr; 155 int ret, i; 156 157 struct rw_semaphore *region_lock __free(rwsem_read_release) = 158 rwsem_read_intr_acquire(&cxl_region_rwsem); 159 if (!region_lock) 160 return -EINTR; 161 162 cxlr = cxl_ps_ctx->cxlr; 163 p = &cxlr->params; 164 wr_attrbs.scrub_cycle_hours = cycle; 165 wr_attrbs.scrub_flags = flags; 166 167 for (i = 0; i < p->nr_targets; i++) { 168 struct cxl_endpoint_decoder *cxled = p->targets[i]; 169 170 cxlmd = cxled_to_memdev(cxled); 171 cxl_mbox = &cxlmd->cxlds->cxl_mbox; 172 ret = cxl_set_feature(cxl_mbox, &CXL_FEAT_PATROL_SCRUB_UUID, 173 cxl_ps_ctx->set_version, &wr_attrbs, 174 sizeof(wr_attrbs), 175 CXL_SET_FEAT_FLAG_DATA_SAVED_ACROSS_RESET, 176 0, NULL); 177 if (ret) 178 return ret; 179 180 if (cycle != cxlmd->scrub_cycle) { 181 if (cxlmd->scrub_region_id != CXL_SCRUB_NO_REGION) 182 dev_info(dev, 183 "Device scrub rate(%d hours) set by region%d rate overwritten by region%d scrub rate(%d hours)\n", 184 cxlmd->scrub_cycle, 185 cxlmd->scrub_region_id, cxlr->id, 186 cycle); 187 188 cxlmd->scrub_cycle = cycle; 189 cxlmd->scrub_region_id = cxlr->id; 190 } 191 } 192 193 return 0; 194 } 195 196 static int cxl_scrub_set_attrbs_device(struct device *dev, 197 struct cxl_patrol_scrub_context *cxl_ps_ctx, 198 u8 cycle, u8 flags) 199 { 200 struct cxl_scrub_wr_attrbs wr_attrbs; 201 struct cxl_mailbox *cxl_mbox; 202 struct cxl_memdev *cxlmd; 203 int ret; 204 205 wr_attrbs.scrub_cycle_hours = cycle; 206 wr_attrbs.scrub_flags = flags; 207 208 cxlmd = cxl_ps_ctx->cxlmd; 209 cxl_mbox = &cxlmd->cxlds->cxl_mbox; 210 ret = cxl_set_feature(cxl_mbox, &CXL_FEAT_PATROL_SCRUB_UUID, 211 cxl_ps_ctx->set_version, &wr_attrbs, 212 sizeof(wr_attrbs), 213 CXL_SET_FEAT_FLAG_DATA_SAVED_ACROSS_RESET, 0, 214 NULL); 215 if (ret) 216 return ret; 217 218 if (cycle != cxlmd->scrub_cycle) { 219 if (cxlmd->scrub_region_id != CXL_SCRUB_NO_REGION) 220 dev_info(dev, 221 "Device scrub rate(%d hours) set by region%d rate overwritten with device local scrub rate(%d hours)\n", 222 cxlmd->scrub_cycle, cxlmd->scrub_region_id, 223 cycle); 224 225 cxlmd->scrub_cycle = cycle; 226 cxlmd->scrub_region_id = CXL_SCRUB_NO_REGION; 227 } 228 229 return 0; 230 } 231 232 static int cxl_scrub_set_attrbs(struct device *dev, 233 struct cxl_patrol_scrub_context *cxl_ps_ctx, 234 u8 cycle, u8 flags) 235 { 236 if (cxl_ps_ctx->cxlr) 237 return cxl_scrub_set_attrbs_region(dev, cxl_ps_ctx, cycle, flags); 238 239 return cxl_scrub_set_attrbs_device(dev, cxl_ps_ctx, cycle, flags); 240 } 241 242 static int cxl_patrol_scrub_get_enabled_bg(struct device *dev, void *drv_data, 243 bool *enabled) 244 { 245 struct cxl_patrol_scrub_context *ctx = drv_data; 246 u8 cap, flags; 247 u16 cycle; 248 int ret; 249 250 ret = cxl_scrub_get_attrbs(ctx, &cap, &cycle, &flags, NULL); 251 if (ret) 252 return ret; 253 254 *enabled = CXL_GET_SCRUB_EN_STS(flags); 255 256 return 0; 257 } 258 259 static int cxl_patrol_scrub_set_enabled_bg(struct device *dev, void *drv_data, 260 bool enable) 261 { 262 struct cxl_patrol_scrub_context *ctx = drv_data; 263 u8 cap, flags, wr_cycle; 264 u16 rd_cycle; 265 int ret; 266 267 if (!capable(CAP_SYS_RAWIO)) 268 return -EPERM; 269 270 ret = cxl_scrub_get_attrbs(ctx, &cap, &rd_cycle, &flags, NULL); 271 if (ret) 272 return ret; 273 274 wr_cycle = CXL_GET_SCRUB_CYCLE(rd_cycle); 275 flags = CXL_SET_SCRUB_EN(enable); 276 277 return cxl_scrub_set_attrbs(dev, ctx, wr_cycle, flags); 278 } 279 280 static int cxl_patrol_scrub_get_min_scrub_cycle(struct device *dev, 281 void *drv_data, u32 *min) 282 { 283 struct cxl_patrol_scrub_context *ctx = drv_data; 284 u8 cap, flags, min_cycle; 285 u16 cycle; 286 int ret; 287 288 ret = cxl_scrub_get_attrbs(ctx, &cap, &cycle, &flags, &min_cycle); 289 if (ret) 290 return ret; 291 292 *min = min_cycle * 3600; 293 294 return 0; 295 } 296 297 static int cxl_patrol_scrub_get_max_scrub_cycle(struct device *dev, 298 void *drv_data, u32 *max) 299 { 300 *max = U8_MAX * 3600; /* Max set by register size */ 301 302 return 0; 303 } 304 305 static int cxl_patrol_scrub_get_scrub_cycle(struct device *dev, void *drv_data, 306 u32 *scrub_cycle_secs) 307 { 308 struct cxl_patrol_scrub_context *ctx = drv_data; 309 u8 cap, flags; 310 u16 cycle; 311 int ret; 312 313 ret = cxl_scrub_get_attrbs(ctx, &cap, &cycle, &flags, NULL); 314 if (ret) 315 return ret; 316 317 *scrub_cycle_secs = CXL_GET_SCRUB_CYCLE(cycle) * 3600; 318 319 return 0; 320 } 321 322 static int cxl_patrol_scrub_set_scrub_cycle(struct device *dev, void *drv_data, 323 u32 scrub_cycle_secs) 324 { 325 struct cxl_patrol_scrub_context *ctx = drv_data; 326 u8 scrub_cycle_hours = scrub_cycle_secs / 3600; 327 u8 cap, wr_cycle, flags, min_cycle; 328 u16 rd_cycle; 329 int ret; 330 331 if (!capable(CAP_SYS_RAWIO)) 332 return -EPERM; 333 334 ret = cxl_scrub_get_attrbs(ctx, &cap, &rd_cycle, &flags, &min_cycle); 335 if (ret) 336 return ret; 337 338 if (!CXL_GET_SCRUB_CYCLE_CHANGEABLE(cap)) 339 return -EOPNOTSUPP; 340 341 if (scrub_cycle_hours < min_cycle) { 342 dev_dbg(dev, "Invalid CXL patrol scrub cycle(%d) to set\n", 343 scrub_cycle_hours); 344 dev_dbg(dev, 345 "Minimum supported CXL patrol scrub cycle in hour %d\n", 346 min_cycle); 347 return -EINVAL; 348 } 349 wr_cycle = CXL_SET_SCRUB_CYCLE(scrub_cycle_hours); 350 351 return cxl_scrub_set_attrbs(dev, ctx, wr_cycle, flags); 352 } 353 354 static const struct edac_scrub_ops cxl_ps_scrub_ops = { 355 .get_enabled_bg = cxl_patrol_scrub_get_enabled_bg, 356 .set_enabled_bg = cxl_patrol_scrub_set_enabled_bg, 357 .get_min_cycle = cxl_patrol_scrub_get_min_scrub_cycle, 358 .get_max_cycle = cxl_patrol_scrub_get_max_scrub_cycle, 359 .get_cycle_duration = cxl_patrol_scrub_get_scrub_cycle, 360 .set_cycle_duration = cxl_patrol_scrub_set_scrub_cycle, 361 }; 362 363 static int cxl_memdev_scrub_init(struct cxl_memdev *cxlmd, 364 struct edac_dev_feature *ras_feature, 365 u8 scrub_inst) 366 { 367 struct cxl_patrol_scrub_context *cxl_ps_ctx; 368 struct cxl_feat_entry *feat_entry; 369 u8 cap, flags; 370 u16 cycle; 371 int rc; 372 373 feat_entry = cxl_feature_info(to_cxlfs(cxlmd->cxlds), 374 &CXL_FEAT_PATROL_SCRUB_UUID); 375 if (IS_ERR(feat_entry)) 376 return -EOPNOTSUPP; 377 378 if (!(le32_to_cpu(feat_entry->flags) & CXL_FEATURE_F_CHANGEABLE)) 379 return -EOPNOTSUPP; 380 381 cxl_ps_ctx = devm_kzalloc(&cxlmd->dev, sizeof(*cxl_ps_ctx), GFP_KERNEL); 382 if (!cxl_ps_ctx) 383 return -ENOMEM; 384 385 *cxl_ps_ctx = (struct cxl_patrol_scrub_context){ 386 .get_feat_size = le16_to_cpu(feat_entry->get_feat_size), 387 .set_feat_size = le16_to_cpu(feat_entry->set_feat_size), 388 .get_version = feat_entry->get_feat_ver, 389 .set_version = feat_entry->set_feat_ver, 390 .effects = le16_to_cpu(feat_entry->effects), 391 .instance = scrub_inst, 392 .cxlmd = cxlmd, 393 }; 394 395 rc = cxl_mem_scrub_get_attrbs(&cxlmd->cxlds->cxl_mbox, &cap, &cycle, 396 &flags, NULL); 397 if (rc) 398 return rc; 399 400 cxlmd->scrub_cycle = CXL_GET_SCRUB_CYCLE(cycle); 401 cxlmd->scrub_region_id = CXL_SCRUB_NO_REGION; 402 403 ras_feature->ft_type = RAS_FEAT_SCRUB; 404 ras_feature->instance = cxl_ps_ctx->instance; 405 ras_feature->scrub_ops = &cxl_ps_scrub_ops; 406 ras_feature->ctx = cxl_ps_ctx; 407 408 return 0; 409 } 410 411 static int cxl_region_scrub_init(struct cxl_region *cxlr, 412 struct edac_dev_feature *ras_feature, 413 u8 scrub_inst) 414 { 415 struct cxl_patrol_scrub_context *cxl_ps_ctx; 416 struct cxl_region_params *p = &cxlr->params; 417 struct cxl_feat_entry *feat_entry = NULL; 418 struct cxl_memdev *cxlmd; 419 u8 cap, flags; 420 u16 cycle; 421 int i, rc; 422 423 /* 424 * The cxl_region_rwsem must be held if the code below is used in a context 425 * other than when the region is in the probe state, as shown here. 426 */ 427 for (i = 0; i < p->nr_targets; i++) { 428 struct cxl_endpoint_decoder *cxled = p->targets[i]; 429 430 cxlmd = cxled_to_memdev(cxled); 431 feat_entry = cxl_feature_info(to_cxlfs(cxlmd->cxlds), 432 &CXL_FEAT_PATROL_SCRUB_UUID); 433 if (IS_ERR(feat_entry)) 434 return -EOPNOTSUPP; 435 436 if (!(le32_to_cpu(feat_entry->flags) & 437 CXL_FEATURE_F_CHANGEABLE)) 438 return -EOPNOTSUPP; 439 440 rc = cxl_mem_scrub_get_attrbs(&cxlmd->cxlds->cxl_mbox, &cap, 441 &cycle, &flags, NULL); 442 if (rc) 443 return rc; 444 445 cxlmd->scrub_cycle = CXL_GET_SCRUB_CYCLE(cycle); 446 cxlmd->scrub_region_id = CXL_SCRUB_NO_REGION; 447 } 448 449 cxl_ps_ctx = devm_kzalloc(&cxlr->dev, sizeof(*cxl_ps_ctx), GFP_KERNEL); 450 if (!cxl_ps_ctx) 451 return -ENOMEM; 452 453 *cxl_ps_ctx = (struct cxl_patrol_scrub_context){ 454 .get_feat_size = le16_to_cpu(feat_entry->get_feat_size), 455 .set_feat_size = le16_to_cpu(feat_entry->set_feat_size), 456 .get_version = feat_entry->get_feat_ver, 457 .set_version = feat_entry->set_feat_ver, 458 .effects = le16_to_cpu(feat_entry->effects), 459 .instance = scrub_inst, 460 .cxlr = cxlr, 461 }; 462 463 ras_feature->ft_type = RAS_FEAT_SCRUB; 464 ras_feature->instance = cxl_ps_ctx->instance; 465 ras_feature->scrub_ops = &cxl_ps_scrub_ops; 466 ras_feature->ctx = cxl_ps_ctx; 467 468 return 0; 469 } 470 471 struct cxl_ecs_context { 472 u16 num_media_frus; 473 u16 get_feat_size; 474 u16 set_feat_size; 475 u8 get_version; 476 u8 set_version; 477 u16 effects; 478 struct cxl_memdev *cxlmd; 479 }; 480 481 /* 482 * See CXL spec rev 3.2 @8.2.10.9.11.2 Table 8-225 DDR5 ECS Control Feature 483 * Readable Attributes. 484 */ 485 struct cxl_ecs_fru_rd_attrbs { 486 u8 ecs_cap; 487 __le16 ecs_config; 488 u8 ecs_flags; 489 } __packed; 490 491 struct cxl_ecs_rd_attrbs { 492 u8 ecs_log_cap; 493 struct cxl_ecs_fru_rd_attrbs fru_attrbs[]; 494 } __packed; 495 496 /* 497 * See CXL spec rev 3.2 @8.2.10.9.11.2 Table 8-226 DDR5 ECS Control Feature 498 * Writable Attributes. 499 */ 500 struct cxl_ecs_fru_wr_attrbs { 501 __le16 ecs_config; 502 } __packed; 503 504 struct cxl_ecs_wr_attrbs { 505 u8 ecs_log_cap; 506 struct cxl_ecs_fru_wr_attrbs fru_attrbs[]; 507 } __packed; 508 509 #define CXL_ECS_LOG_ENTRY_TYPE_MASK GENMASK(1, 0) 510 #define CXL_ECS_REALTIME_REPORT_CAP_MASK BIT(0) 511 #define CXL_ECS_THRESHOLD_COUNT_MASK GENMASK(2, 0) 512 #define CXL_ECS_COUNT_MODE_MASK BIT(3) 513 #define CXL_ECS_RESET_COUNTER_MASK BIT(4) 514 #define CXL_ECS_RESET_COUNTER 1 515 516 enum { 517 ECS_THRESHOLD_256 = 256, 518 ECS_THRESHOLD_1024 = 1024, 519 ECS_THRESHOLD_4096 = 4096, 520 }; 521 522 enum { 523 ECS_THRESHOLD_IDX_256 = 3, 524 ECS_THRESHOLD_IDX_1024 = 4, 525 ECS_THRESHOLD_IDX_4096 = 5, 526 }; 527 528 static const u16 ecs_supp_threshold[] = { 529 [ECS_THRESHOLD_IDX_256] = 256, 530 [ECS_THRESHOLD_IDX_1024] = 1024, 531 [ECS_THRESHOLD_IDX_4096] = 4096, 532 }; 533 534 enum { 535 ECS_LOG_ENTRY_TYPE_DRAM = 0x0, 536 ECS_LOG_ENTRY_TYPE_MEM_MEDIA_FRU = 0x1, 537 }; 538 539 enum cxl_ecs_count_mode { 540 ECS_MODE_COUNTS_ROWS = 0, 541 ECS_MODE_COUNTS_CODEWORDS = 1, 542 }; 543 544 static int cxl_mem_ecs_get_attrbs(struct device *dev, 545 struct cxl_ecs_context *cxl_ecs_ctx, 546 int fru_id, u8 *log_cap, u16 *config) 547 { 548 struct cxl_memdev *cxlmd = cxl_ecs_ctx->cxlmd; 549 struct cxl_mailbox *cxl_mbox = &cxlmd->cxlds->cxl_mbox; 550 struct cxl_ecs_fru_rd_attrbs *fru_rd_attrbs; 551 size_t rd_data_size; 552 size_t data_size; 553 554 rd_data_size = cxl_ecs_ctx->get_feat_size; 555 556 struct cxl_ecs_rd_attrbs *rd_attrbs __free(kvfree) = 557 kvzalloc(rd_data_size, GFP_KERNEL); 558 if (!rd_attrbs) 559 return -ENOMEM; 560 561 data_size = cxl_get_feature(cxl_mbox, &CXL_FEAT_ECS_UUID, 562 CXL_GET_FEAT_SEL_CURRENT_VALUE, rd_attrbs, 563 rd_data_size, 0, NULL); 564 if (!data_size) 565 return -EIO; 566 567 fru_rd_attrbs = rd_attrbs->fru_attrbs; 568 *log_cap = rd_attrbs->ecs_log_cap; 569 *config = le16_to_cpu(fru_rd_attrbs[fru_id].ecs_config); 570 571 return 0; 572 } 573 574 static int cxl_mem_ecs_set_attrbs(struct device *dev, 575 struct cxl_ecs_context *cxl_ecs_ctx, 576 int fru_id, u8 log_cap, u16 config) 577 { 578 struct cxl_memdev *cxlmd = cxl_ecs_ctx->cxlmd; 579 struct cxl_mailbox *cxl_mbox = &cxlmd->cxlds->cxl_mbox; 580 struct cxl_ecs_fru_rd_attrbs *fru_rd_attrbs; 581 struct cxl_ecs_fru_wr_attrbs *fru_wr_attrbs; 582 size_t rd_data_size, wr_data_size; 583 u16 num_media_frus, count; 584 size_t data_size; 585 586 num_media_frus = cxl_ecs_ctx->num_media_frus; 587 rd_data_size = cxl_ecs_ctx->get_feat_size; 588 wr_data_size = cxl_ecs_ctx->set_feat_size; 589 struct cxl_ecs_rd_attrbs *rd_attrbs __free(kvfree) = 590 kvzalloc(rd_data_size, GFP_KERNEL); 591 if (!rd_attrbs) 592 return -ENOMEM; 593 594 data_size = cxl_get_feature(cxl_mbox, &CXL_FEAT_ECS_UUID, 595 CXL_GET_FEAT_SEL_CURRENT_VALUE, rd_attrbs, 596 rd_data_size, 0, NULL); 597 if (!data_size) 598 return -EIO; 599 600 struct cxl_ecs_wr_attrbs *wr_attrbs __free(kvfree) = 601 kvzalloc(wr_data_size, GFP_KERNEL); 602 if (!wr_attrbs) 603 return -ENOMEM; 604 605 /* 606 * Fill writable attributes from the current attributes read 607 * for all the media FRUs. 608 */ 609 fru_rd_attrbs = rd_attrbs->fru_attrbs; 610 fru_wr_attrbs = wr_attrbs->fru_attrbs; 611 wr_attrbs->ecs_log_cap = log_cap; 612 for (count = 0; count < num_media_frus; count++) 613 fru_wr_attrbs[count].ecs_config = 614 fru_rd_attrbs[count].ecs_config; 615 616 fru_wr_attrbs[fru_id].ecs_config = cpu_to_le16(config); 617 618 return cxl_set_feature(cxl_mbox, &CXL_FEAT_ECS_UUID, 619 cxl_ecs_ctx->set_version, wr_attrbs, 620 wr_data_size, 621 CXL_SET_FEAT_FLAG_DATA_SAVED_ACROSS_RESET, 622 0, NULL); 623 } 624 625 static u8 cxl_get_ecs_log_entry_type(u8 log_cap, u16 config) 626 { 627 return FIELD_GET(CXL_ECS_LOG_ENTRY_TYPE_MASK, log_cap); 628 } 629 630 static u16 cxl_get_ecs_threshold(u8 log_cap, u16 config) 631 { 632 u8 index = FIELD_GET(CXL_ECS_THRESHOLD_COUNT_MASK, config); 633 634 return ecs_supp_threshold[index]; 635 } 636 637 static u8 cxl_get_ecs_count_mode(u8 log_cap, u16 config) 638 { 639 return FIELD_GET(CXL_ECS_COUNT_MODE_MASK, config); 640 } 641 642 #define CXL_ECS_GET_ATTR(attrb) \ 643 static int cxl_ecs_get_##attrb(struct device *dev, void *drv_data, \ 644 int fru_id, u32 *val) \ 645 { \ 646 struct cxl_ecs_context *ctx = drv_data; \ 647 u8 log_cap; \ 648 u16 config; \ 649 int ret; \ 650 \ 651 ret = cxl_mem_ecs_get_attrbs(dev, ctx, fru_id, &log_cap, \ 652 &config); \ 653 if (ret) \ 654 return ret; \ 655 \ 656 *val = cxl_get_ecs_##attrb(log_cap, config); \ 657 \ 658 return 0; \ 659 } 660 661 CXL_ECS_GET_ATTR(log_entry_type) 662 CXL_ECS_GET_ATTR(count_mode) 663 CXL_ECS_GET_ATTR(threshold) 664 665 static int cxl_set_ecs_log_entry_type(struct device *dev, u8 *log_cap, 666 u16 *config, u32 val) 667 { 668 if (val != ECS_LOG_ENTRY_TYPE_DRAM && 669 val != ECS_LOG_ENTRY_TYPE_MEM_MEDIA_FRU) 670 return -EINVAL; 671 672 *log_cap = FIELD_PREP(CXL_ECS_LOG_ENTRY_TYPE_MASK, val); 673 674 return 0; 675 } 676 677 static int cxl_set_ecs_threshold(struct device *dev, u8 *log_cap, u16 *config, 678 u32 val) 679 { 680 *config &= ~CXL_ECS_THRESHOLD_COUNT_MASK; 681 682 switch (val) { 683 case ECS_THRESHOLD_256: 684 *config |= FIELD_PREP(CXL_ECS_THRESHOLD_COUNT_MASK, 685 ECS_THRESHOLD_IDX_256); 686 break; 687 case ECS_THRESHOLD_1024: 688 *config |= FIELD_PREP(CXL_ECS_THRESHOLD_COUNT_MASK, 689 ECS_THRESHOLD_IDX_1024); 690 break; 691 case ECS_THRESHOLD_4096: 692 *config |= FIELD_PREP(CXL_ECS_THRESHOLD_COUNT_MASK, 693 ECS_THRESHOLD_IDX_4096); 694 break; 695 default: 696 dev_dbg(dev, "Invalid CXL ECS threshold count(%d) to set\n", 697 val); 698 dev_dbg(dev, "Supported ECS threshold counts: %u, %u, %u\n", 699 ECS_THRESHOLD_256, ECS_THRESHOLD_1024, 700 ECS_THRESHOLD_4096); 701 return -EINVAL; 702 } 703 704 return 0; 705 } 706 707 static int cxl_set_ecs_count_mode(struct device *dev, u8 *log_cap, u16 *config, 708 u32 val) 709 { 710 if (val != ECS_MODE_COUNTS_ROWS && val != ECS_MODE_COUNTS_CODEWORDS) { 711 dev_dbg(dev, "Invalid CXL ECS scrub mode(%d) to set\n", val); 712 dev_dbg(dev, 713 "Supported ECS Modes: 0: ECS counts rows with errors," 714 " 1: ECS counts codewords with errors\n"); 715 return -EINVAL; 716 } 717 718 *config &= ~CXL_ECS_COUNT_MODE_MASK; 719 *config |= FIELD_PREP(CXL_ECS_COUNT_MODE_MASK, val); 720 721 return 0; 722 } 723 724 static int cxl_set_ecs_reset_counter(struct device *dev, u8 *log_cap, 725 u16 *config, u32 val) 726 { 727 if (val != CXL_ECS_RESET_COUNTER) 728 return -EINVAL; 729 730 *config &= ~CXL_ECS_RESET_COUNTER_MASK; 731 *config |= FIELD_PREP(CXL_ECS_RESET_COUNTER_MASK, val); 732 733 return 0; 734 } 735 736 #define CXL_ECS_SET_ATTR(attrb) \ 737 static int cxl_ecs_set_##attrb(struct device *dev, void *drv_data, \ 738 int fru_id, u32 val) \ 739 { \ 740 struct cxl_ecs_context *ctx = drv_data; \ 741 u8 log_cap; \ 742 u16 config; \ 743 int ret; \ 744 \ 745 if (!capable(CAP_SYS_RAWIO)) \ 746 return -EPERM; \ 747 \ 748 ret = cxl_mem_ecs_get_attrbs(dev, ctx, fru_id, &log_cap, \ 749 &config); \ 750 if (ret) \ 751 return ret; \ 752 \ 753 ret = cxl_set_ecs_##attrb(dev, &log_cap, &config, val); \ 754 if (ret) \ 755 return ret; \ 756 \ 757 return cxl_mem_ecs_set_attrbs(dev, ctx, fru_id, log_cap, \ 758 config); \ 759 } 760 CXL_ECS_SET_ATTR(log_entry_type) 761 CXL_ECS_SET_ATTR(count_mode) 762 CXL_ECS_SET_ATTR(reset_counter) 763 CXL_ECS_SET_ATTR(threshold) 764 765 static const struct edac_ecs_ops cxl_ecs_ops = { 766 .get_log_entry_type = cxl_ecs_get_log_entry_type, 767 .set_log_entry_type = cxl_ecs_set_log_entry_type, 768 .get_mode = cxl_ecs_get_count_mode, 769 .set_mode = cxl_ecs_set_count_mode, 770 .reset = cxl_ecs_set_reset_counter, 771 .get_threshold = cxl_ecs_get_threshold, 772 .set_threshold = cxl_ecs_set_threshold, 773 }; 774 775 static int cxl_memdev_ecs_init(struct cxl_memdev *cxlmd, 776 struct edac_dev_feature *ras_feature) 777 { 778 struct cxl_ecs_context *cxl_ecs_ctx; 779 struct cxl_feat_entry *feat_entry; 780 int num_media_frus; 781 782 feat_entry = 783 cxl_feature_info(to_cxlfs(cxlmd->cxlds), &CXL_FEAT_ECS_UUID); 784 if (IS_ERR(feat_entry)) 785 return -EOPNOTSUPP; 786 787 if (!(le32_to_cpu(feat_entry->flags) & CXL_FEATURE_F_CHANGEABLE)) 788 return -EOPNOTSUPP; 789 790 num_media_frus = (le16_to_cpu(feat_entry->get_feat_size) - 791 sizeof(struct cxl_ecs_rd_attrbs)) / 792 sizeof(struct cxl_ecs_fru_rd_attrbs); 793 if (!num_media_frus) 794 return -EOPNOTSUPP; 795 796 cxl_ecs_ctx = 797 devm_kzalloc(&cxlmd->dev, sizeof(*cxl_ecs_ctx), GFP_KERNEL); 798 if (!cxl_ecs_ctx) 799 return -ENOMEM; 800 801 *cxl_ecs_ctx = (struct cxl_ecs_context){ 802 .get_feat_size = le16_to_cpu(feat_entry->get_feat_size), 803 .set_feat_size = le16_to_cpu(feat_entry->set_feat_size), 804 .get_version = feat_entry->get_feat_ver, 805 .set_version = feat_entry->set_feat_ver, 806 .effects = le16_to_cpu(feat_entry->effects), 807 .num_media_frus = num_media_frus, 808 .cxlmd = cxlmd, 809 }; 810 811 ras_feature->ft_type = RAS_FEAT_ECS; 812 ras_feature->ecs_ops = &cxl_ecs_ops; 813 ras_feature->ctx = cxl_ecs_ctx; 814 ras_feature->ecs_info.num_media_frus = num_media_frus; 815 816 return 0; 817 } 818 819 /* 820 * Perform Maintenance CXL 3.2 Spec 8.2.10.7.1 821 */ 822 823 /* 824 * Perform Maintenance input payload 825 * CXL rev 3.2 section 8.2.10.7.1 Table 8-117 826 */ 827 struct cxl_mbox_maintenance_hdr { 828 u8 op_class; 829 u8 op_subclass; 830 } __packed; 831 832 static int cxl_perform_maintenance(struct cxl_mailbox *cxl_mbox, u8 class, 833 u8 subclass, void *data_in, 834 size_t data_in_size) 835 { 836 struct cxl_memdev_maintenance_pi { 837 struct cxl_mbox_maintenance_hdr hdr; 838 u8 data[]; 839 } __packed; 840 struct cxl_mbox_cmd mbox_cmd; 841 size_t hdr_size; 842 843 struct cxl_memdev_maintenance_pi *pi __free(kvfree) = 844 kvzalloc(cxl_mbox->payload_size, GFP_KERNEL); 845 if (!pi) 846 return -ENOMEM; 847 848 pi->hdr.op_class = class; 849 pi->hdr.op_subclass = subclass; 850 hdr_size = sizeof(pi->hdr); 851 /* 852 * Check minimum mbox payload size is available for 853 * the maintenance data transfer. 854 */ 855 if (hdr_size + data_in_size > cxl_mbox->payload_size) 856 return -ENOMEM; 857 858 memcpy(pi->data, data_in, data_in_size); 859 mbox_cmd = (struct cxl_mbox_cmd){ 860 .opcode = CXL_MBOX_OP_DO_MAINTENANCE, 861 .size_in = hdr_size + data_in_size, 862 .payload_in = pi, 863 }; 864 865 return cxl_internal_send_cmd(cxl_mbox, &mbox_cmd); 866 } 867 868 /* 869 * Support for finding a memory operation attributes 870 * are from the current boot or not. 871 */ 872 873 struct cxl_mem_err_rec { 874 struct xarray rec_gen_media; 875 struct xarray rec_dram; 876 }; 877 878 enum cxl_mem_repair_type { 879 CXL_PPR, 880 CXL_CACHELINE_SPARING, 881 CXL_ROW_SPARING, 882 CXL_BANK_SPARING, 883 CXL_RANK_SPARING, 884 CXL_REPAIR_MAX, 885 }; 886 887 /** 888 * struct cxl_mem_repair_attrbs - CXL memory repair attributes 889 * @dpa: DPA of memory to repair 890 * @nibble_mask: nibble mask, identifies one or more nibbles on the memory bus 891 * @row: row of memory to repair 892 * @column: column of memory to repair 893 * @channel: channel of memory to repair 894 * @sub_channel: sub channel of memory to repair 895 * @rank: rank of memory to repair 896 * @bank_group: bank group of memory to repair 897 * @bank: bank of memory to repair 898 * @repair_type: repair type. For eg. PPR, memory sparing etc. 899 */ 900 struct cxl_mem_repair_attrbs { 901 u64 dpa; 902 u32 nibble_mask; 903 u32 row; 904 u16 column; 905 u8 channel; 906 u8 sub_channel; 907 u8 rank; 908 u8 bank_group; 909 u8 bank; 910 enum cxl_mem_repair_type repair_type; 911 }; 912 913 static struct cxl_event_gen_media * 914 cxl_find_rec_gen_media(struct cxl_memdev *cxlmd, 915 struct cxl_mem_repair_attrbs *attrbs) 916 { 917 struct cxl_mem_err_rec *array_rec = cxlmd->err_rec_array; 918 struct cxl_event_gen_media *rec; 919 920 if (!array_rec) 921 return NULL; 922 923 rec = xa_load(&array_rec->rec_gen_media, attrbs->dpa); 924 if (!rec) 925 return NULL; 926 927 if (attrbs->repair_type == CXL_PPR) 928 return rec; 929 930 return NULL; 931 } 932 933 static struct cxl_event_dram * 934 cxl_find_rec_dram(struct cxl_memdev *cxlmd, 935 struct cxl_mem_repair_attrbs *attrbs) 936 { 937 struct cxl_mem_err_rec *array_rec = cxlmd->err_rec_array; 938 struct cxl_event_dram *rec; 939 u16 validity_flags; 940 941 if (!array_rec) 942 return NULL; 943 944 rec = xa_load(&array_rec->rec_dram, attrbs->dpa); 945 if (!rec) 946 return NULL; 947 948 validity_flags = get_unaligned_le16(rec->media_hdr.validity_flags); 949 if (!(validity_flags & CXL_DER_VALID_CHANNEL) || 950 !(validity_flags & CXL_DER_VALID_RANK)) 951 return NULL; 952 953 switch (attrbs->repair_type) { 954 case CXL_PPR: 955 if (!(validity_flags & CXL_DER_VALID_NIBBLE) || 956 get_unaligned_le24(rec->nibble_mask) == attrbs->nibble_mask) 957 return rec; 958 break; 959 case CXL_CACHELINE_SPARING: 960 if (!(validity_flags & CXL_DER_VALID_BANK_GROUP) || 961 !(validity_flags & CXL_DER_VALID_BANK) || 962 !(validity_flags & CXL_DER_VALID_ROW) || 963 !(validity_flags & CXL_DER_VALID_COLUMN)) 964 return NULL; 965 966 if (rec->media_hdr.channel == attrbs->channel && 967 rec->media_hdr.rank == attrbs->rank && 968 rec->bank_group == attrbs->bank_group && 969 rec->bank == attrbs->bank && 970 get_unaligned_le24(rec->row) == attrbs->row && 971 get_unaligned_le16(rec->column) == attrbs->column && 972 (!(validity_flags & CXL_DER_VALID_NIBBLE) || 973 get_unaligned_le24(rec->nibble_mask) == 974 attrbs->nibble_mask) && 975 (!(validity_flags & CXL_DER_VALID_SUB_CHANNEL) || 976 rec->sub_channel == attrbs->sub_channel)) 977 return rec; 978 break; 979 case CXL_ROW_SPARING: 980 if (!(validity_flags & CXL_DER_VALID_BANK_GROUP) || 981 !(validity_flags & CXL_DER_VALID_BANK) || 982 !(validity_flags & CXL_DER_VALID_ROW)) 983 return NULL; 984 985 if (rec->media_hdr.channel == attrbs->channel && 986 rec->media_hdr.rank == attrbs->rank && 987 rec->bank_group == attrbs->bank_group && 988 rec->bank == attrbs->bank && 989 get_unaligned_le24(rec->row) == attrbs->row && 990 (!(validity_flags & CXL_DER_VALID_NIBBLE) || 991 get_unaligned_le24(rec->nibble_mask) == 992 attrbs->nibble_mask)) 993 return rec; 994 break; 995 case CXL_BANK_SPARING: 996 if (!(validity_flags & CXL_DER_VALID_BANK_GROUP) || 997 !(validity_flags & CXL_DER_VALID_BANK)) 998 return NULL; 999 1000 if (rec->media_hdr.channel == attrbs->channel && 1001 rec->media_hdr.rank == attrbs->rank && 1002 rec->bank_group == attrbs->bank_group && 1003 rec->bank == attrbs->bank && 1004 (!(validity_flags & CXL_DER_VALID_NIBBLE) || 1005 get_unaligned_le24(rec->nibble_mask) == 1006 attrbs->nibble_mask)) 1007 return rec; 1008 break; 1009 case CXL_RANK_SPARING: 1010 if (rec->media_hdr.channel == attrbs->channel && 1011 rec->media_hdr.rank == attrbs->rank && 1012 (!(validity_flags & CXL_DER_VALID_NIBBLE) || 1013 get_unaligned_le24(rec->nibble_mask) == 1014 attrbs->nibble_mask)) 1015 return rec; 1016 break; 1017 default: 1018 return NULL; 1019 } 1020 1021 return NULL; 1022 } 1023 1024 #define CXL_MAX_STORAGE_DAYS 10 1025 #define CXL_MAX_STORAGE_TIME_SECS (CXL_MAX_STORAGE_DAYS * 24 * 60 * 60) 1026 1027 static void cxl_del_expired_gmedia_recs(struct xarray *rec_xarray, 1028 struct cxl_event_gen_media *cur_rec) 1029 { 1030 u64 cur_ts = le64_to_cpu(cur_rec->media_hdr.hdr.timestamp); 1031 struct cxl_event_gen_media *rec; 1032 unsigned long index; 1033 u64 delta_ts_secs; 1034 1035 xa_for_each(rec_xarray, index, rec) { 1036 delta_ts_secs = (cur_ts - 1037 le64_to_cpu(rec->media_hdr.hdr.timestamp)) / 1000000000ULL; 1038 if (delta_ts_secs >= CXL_MAX_STORAGE_TIME_SECS) { 1039 xa_erase(rec_xarray, index); 1040 kfree(rec); 1041 } 1042 } 1043 } 1044 1045 static void cxl_del_expired_dram_recs(struct xarray *rec_xarray, 1046 struct cxl_event_dram *cur_rec) 1047 { 1048 u64 cur_ts = le64_to_cpu(cur_rec->media_hdr.hdr.timestamp); 1049 struct cxl_event_dram *rec; 1050 unsigned long index; 1051 u64 delta_secs; 1052 1053 xa_for_each(rec_xarray, index, rec) { 1054 delta_secs = (cur_ts - 1055 le64_to_cpu(rec->media_hdr.hdr.timestamp)) / 1000000000ULL; 1056 if (delta_secs >= CXL_MAX_STORAGE_TIME_SECS) { 1057 xa_erase(rec_xarray, index); 1058 kfree(rec); 1059 } 1060 } 1061 } 1062 1063 #define CXL_MAX_REC_STORAGE_COUNT 200 1064 1065 static void cxl_del_overflow_old_recs(struct xarray *rec_xarray) 1066 { 1067 void *err_rec; 1068 unsigned long index, count = 0; 1069 1070 xa_for_each(rec_xarray, index, err_rec) 1071 count++; 1072 1073 if (count <= CXL_MAX_REC_STORAGE_COUNT) 1074 return; 1075 1076 count -= CXL_MAX_REC_STORAGE_COUNT; 1077 xa_for_each(rec_xarray, index, err_rec) { 1078 xa_erase(rec_xarray, index); 1079 kfree(err_rec); 1080 count--; 1081 if (!count) 1082 break; 1083 } 1084 } 1085 1086 int cxl_store_rec_gen_media(struct cxl_memdev *cxlmd, union cxl_event *evt) 1087 { 1088 struct cxl_mem_err_rec *array_rec = cxlmd->err_rec_array; 1089 struct cxl_event_gen_media *rec; 1090 void *old_rec; 1091 1092 if (!IS_ENABLED(CONFIG_CXL_EDAC_MEM_REPAIR) || !array_rec) 1093 return 0; 1094 1095 rec = kmemdup(&evt->gen_media, sizeof(*rec), GFP_KERNEL); 1096 if (!rec) 1097 return -ENOMEM; 1098 1099 old_rec = xa_store(&array_rec->rec_gen_media, 1100 le64_to_cpu(rec->media_hdr.phys_addr), rec, 1101 GFP_KERNEL); 1102 if (xa_is_err(old_rec)) 1103 return xa_err(old_rec); 1104 1105 kfree(old_rec); 1106 1107 cxl_del_expired_gmedia_recs(&array_rec->rec_gen_media, rec); 1108 cxl_del_overflow_old_recs(&array_rec->rec_gen_media); 1109 1110 return 0; 1111 } 1112 EXPORT_SYMBOL_NS_GPL(cxl_store_rec_gen_media, "CXL"); 1113 1114 int cxl_store_rec_dram(struct cxl_memdev *cxlmd, union cxl_event *evt) 1115 { 1116 struct cxl_mem_err_rec *array_rec = cxlmd->err_rec_array; 1117 struct cxl_event_dram *rec; 1118 void *old_rec; 1119 1120 if (!IS_ENABLED(CONFIG_CXL_EDAC_MEM_REPAIR) || !array_rec) 1121 return 0; 1122 1123 rec = kmemdup(&evt->dram, sizeof(*rec), GFP_KERNEL); 1124 if (!rec) 1125 return -ENOMEM; 1126 1127 old_rec = xa_store(&array_rec->rec_dram, 1128 le64_to_cpu(rec->media_hdr.phys_addr), rec, 1129 GFP_KERNEL); 1130 if (xa_is_err(old_rec)) 1131 return xa_err(old_rec); 1132 1133 kfree(old_rec); 1134 1135 cxl_del_expired_dram_recs(&array_rec->rec_dram, rec); 1136 cxl_del_overflow_old_recs(&array_rec->rec_dram); 1137 1138 return 0; 1139 } 1140 EXPORT_SYMBOL_NS_GPL(cxl_store_rec_dram, "CXL"); 1141 1142 static bool cxl_is_memdev_memory_online(const struct cxl_memdev *cxlmd) 1143 { 1144 struct cxl_port *port = cxlmd->endpoint; 1145 1146 if (port && cxl_num_decoders_committed(port)) 1147 return true; 1148 1149 return false; 1150 } 1151 1152 /* 1153 * CXL memory sparing control 1154 */ 1155 enum cxl_mem_sparing_granularity { 1156 CXL_MEM_SPARING_CACHELINE, 1157 CXL_MEM_SPARING_ROW, 1158 CXL_MEM_SPARING_BANK, 1159 CXL_MEM_SPARING_RANK, 1160 CXL_MEM_SPARING_MAX 1161 }; 1162 1163 struct cxl_mem_sparing_context { 1164 struct cxl_memdev *cxlmd; 1165 uuid_t repair_uuid; 1166 u16 get_feat_size; 1167 u16 set_feat_size; 1168 u16 effects; 1169 u8 instance; 1170 u8 get_version; 1171 u8 set_version; 1172 u8 op_class; 1173 u8 op_subclass; 1174 bool cap_safe_when_in_use; 1175 bool cap_hard_sparing; 1176 bool cap_soft_sparing; 1177 u8 channel; 1178 u8 rank; 1179 u8 bank_group; 1180 u32 nibble_mask; 1181 u64 dpa; 1182 u32 row; 1183 u16 column; 1184 u8 bank; 1185 u8 sub_channel; 1186 enum edac_mem_repair_type repair_type; 1187 bool persist_mode; 1188 }; 1189 1190 #define CXL_SPARING_RD_CAP_SAFE_IN_USE_MASK BIT(0) 1191 #define CXL_SPARING_RD_CAP_HARD_SPARING_MASK BIT(1) 1192 #define CXL_SPARING_RD_CAP_SOFT_SPARING_MASK BIT(2) 1193 1194 #define CXL_SPARING_WR_DEVICE_INITIATED_MASK BIT(0) 1195 1196 #define CXL_SPARING_QUERY_RESOURCE_FLAG BIT(0) 1197 #define CXL_SET_HARD_SPARING_FLAG BIT(1) 1198 #define CXL_SPARING_SUB_CHNL_VALID_FLAG BIT(2) 1199 #define CXL_SPARING_NIB_MASK_VALID_FLAG BIT(3) 1200 1201 #define CXL_GET_SPARING_SAFE_IN_USE(flags) \ 1202 (FIELD_GET(CXL_SPARING_RD_CAP_SAFE_IN_USE_MASK, \ 1203 flags) ^ 1) 1204 #define CXL_GET_CAP_HARD_SPARING(flags) \ 1205 FIELD_GET(CXL_SPARING_RD_CAP_HARD_SPARING_MASK, \ 1206 flags) 1207 #define CXL_GET_CAP_SOFT_SPARING(flags) \ 1208 FIELD_GET(CXL_SPARING_RD_CAP_SOFT_SPARING_MASK, \ 1209 flags) 1210 1211 #define CXL_SET_SPARING_QUERY_RESOURCE(val) \ 1212 FIELD_PREP(CXL_SPARING_QUERY_RESOURCE_FLAG, val) 1213 #define CXL_SET_HARD_SPARING(val) \ 1214 FIELD_PREP(CXL_SET_HARD_SPARING_FLAG, val) 1215 #define CXL_SET_SPARING_SUB_CHNL_VALID(val) \ 1216 FIELD_PREP(CXL_SPARING_SUB_CHNL_VALID_FLAG, val) 1217 #define CXL_SET_SPARING_NIB_MASK_VALID(val) \ 1218 FIELD_PREP(CXL_SPARING_NIB_MASK_VALID_FLAG, val) 1219 1220 /* 1221 * See CXL spec rev 3.2 @8.2.10.7.2.3 Table 8-134 Memory Sparing Feature 1222 * Readable Attributes. 1223 */ 1224 struct cxl_memdev_repair_rd_attrbs_hdr { 1225 u8 max_op_latency; 1226 __le16 op_cap; 1227 __le16 op_mode; 1228 u8 op_class; 1229 u8 op_subclass; 1230 u8 rsvd[9]; 1231 } __packed; 1232 1233 struct cxl_memdev_sparing_rd_attrbs { 1234 struct cxl_memdev_repair_rd_attrbs_hdr hdr; 1235 u8 rsvd; 1236 __le16 restriction_flags; 1237 } __packed; 1238 1239 /* 1240 * See CXL spec rev 3.2 @8.2.10.7.1.4 Table 8-120 Memory Sparing Input Payload. 1241 */ 1242 struct cxl_memdev_sparing_in_payload { 1243 u8 flags; 1244 u8 channel; 1245 u8 rank; 1246 u8 nibble_mask[3]; 1247 u8 bank_group; 1248 u8 bank; 1249 u8 row[3]; 1250 __le16 column; 1251 u8 sub_channel; 1252 } __packed; 1253 1254 static int 1255 cxl_mem_sparing_get_attrbs(struct cxl_mem_sparing_context *cxl_sparing_ctx) 1256 { 1257 size_t rd_data_size = sizeof(struct cxl_memdev_sparing_rd_attrbs); 1258 struct cxl_memdev *cxlmd = cxl_sparing_ctx->cxlmd; 1259 struct cxl_mailbox *cxl_mbox = &cxlmd->cxlds->cxl_mbox; 1260 u16 restriction_flags; 1261 size_t data_size; 1262 u16 return_code; 1263 struct cxl_memdev_sparing_rd_attrbs *rd_attrbs __free(kfree) = 1264 kzalloc(rd_data_size, GFP_KERNEL); 1265 if (!rd_attrbs) 1266 return -ENOMEM; 1267 1268 data_size = cxl_get_feature(cxl_mbox, &cxl_sparing_ctx->repair_uuid, 1269 CXL_GET_FEAT_SEL_CURRENT_VALUE, rd_attrbs, 1270 rd_data_size, 0, &return_code); 1271 if (!data_size) 1272 return -EIO; 1273 1274 cxl_sparing_ctx->op_class = rd_attrbs->hdr.op_class; 1275 cxl_sparing_ctx->op_subclass = rd_attrbs->hdr.op_subclass; 1276 restriction_flags = le16_to_cpu(rd_attrbs->restriction_flags); 1277 cxl_sparing_ctx->cap_safe_when_in_use = 1278 CXL_GET_SPARING_SAFE_IN_USE(restriction_flags); 1279 cxl_sparing_ctx->cap_hard_sparing = 1280 CXL_GET_CAP_HARD_SPARING(restriction_flags); 1281 cxl_sparing_ctx->cap_soft_sparing = 1282 CXL_GET_CAP_SOFT_SPARING(restriction_flags); 1283 1284 return 0; 1285 } 1286 1287 static struct cxl_event_dram * 1288 cxl_mem_get_rec_dram(struct cxl_memdev *cxlmd, 1289 struct cxl_mem_sparing_context *ctx) 1290 { 1291 struct cxl_mem_repair_attrbs attrbs = { 0 }; 1292 1293 attrbs.dpa = ctx->dpa; 1294 attrbs.channel = ctx->channel; 1295 attrbs.rank = ctx->rank; 1296 attrbs.nibble_mask = ctx->nibble_mask; 1297 switch (ctx->repair_type) { 1298 case EDAC_REPAIR_CACHELINE_SPARING: 1299 attrbs.repair_type = CXL_CACHELINE_SPARING; 1300 attrbs.bank_group = ctx->bank_group; 1301 attrbs.bank = ctx->bank; 1302 attrbs.row = ctx->row; 1303 attrbs.column = ctx->column; 1304 attrbs.sub_channel = ctx->sub_channel; 1305 break; 1306 case EDAC_REPAIR_ROW_SPARING: 1307 attrbs.repair_type = CXL_ROW_SPARING; 1308 attrbs.bank_group = ctx->bank_group; 1309 attrbs.bank = ctx->bank; 1310 attrbs.row = ctx->row; 1311 break; 1312 case EDAC_REPAIR_BANK_SPARING: 1313 attrbs.repair_type = CXL_BANK_SPARING; 1314 attrbs.bank_group = ctx->bank_group; 1315 attrbs.bank = ctx->bank; 1316 break; 1317 case EDAC_REPAIR_RANK_SPARING: 1318 attrbs.repair_type = CXL_BANK_SPARING; 1319 break; 1320 default: 1321 return NULL; 1322 } 1323 1324 return cxl_find_rec_dram(cxlmd, &attrbs); 1325 } 1326 1327 static int 1328 cxl_mem_perform_sparing(struct device *dev, 1329 struct cxl_mem_sparing_context *cxl_sparing_ctx) 1330 { 1331 struct cxl_memdev *cxlmd = cxl_sparing_ctx->cxlmd; 1332 struct cxl_memdev_sparing_in_payload sparing_pi; 1333 struct cxl_event_dram *rec = NULL; 1334 u16 validity_flags = 0; 1335 1336 struct rw_semaphore *region_lock __free(rwsem_read_release) = 1337 rwsem_read_intr_acquire(&cxl_region_rwsem); 1338 if (!region_lock) 1339 return -EINTR; 1340 1341 struct rw_semaphore *dpa_lock __free(rwsem_read_release) = 1342 rwsem_read_intr_acquire(&cxl_dpa_rwsem); 1343 if (!dpa_lock) 1344 return -EINTR; 1345 1346 if (!cxl_sparing_ctx->cap_safe_when_in_use) { 1347 /* Memory to repair must be offline */ 1348 if (cxl_is_memdev_memory_online(cxlmd)) 1349 return -EBUSY; 1350 } else { 1351 if (cxl_is_memdev_memory_online(cxlmd)) { 1352 rec = cxl_mem_get_rec_dram(cxlmd, cxl_sparing_ctx); 1353 if (!rec) 1354 return -EINVAL; 1355 1356 if (!get_unaligned_le16(rec->media_hdr.validity_flags)) 1357 return -EINVAL; 1358 } 1359 } 1360 1361 memset(&sparing_pi, 0, sizeof(sparing_pi)); 1362 sparing_pi.flags = CXL_SET_SPARING_QUERY_RESOURCE(0); 1363 if (cxl_sparing_ctx->persist_mode) 1364 sparing_pi.flags |= CXL_SET_HARD_SPARING(1); 1365 1366 if (rec) 1367 validity_flags = get_unaligned_le16(rec->media_hdr.validity_flags); 1368 1369 switch (cxl_sparing_ctx->repair_type) { 1370 case EDAC_REPAIR_CACHELINE_SPARING: 1371 sparing_pi.column = cpu_to_le16(cxl_sparing_ctx->column); 1372 if (!rec || (validity_flags & CXL_DER_VALID_SUB_CHANNEL)) { 1373 sparing_pi.flags |= CXL_SET_SPARING_SUB_CHNL_VALID(1); 1374 sparing_pi.sub_channel = cxl_sparing_ctx->sub_channel; 1375 } 1376 fallthrough; 1377 case EDAC_REPAIR_ROW_SPARING: 1378 put_unaligned_le24(cxl_sparing_ctx->row, sparing_pi.row); 1379 fallthrough; 1380 case EDAC_REPAIR_BANK_SPARING: 1381 sparing_pi.bank_group = cxl_sparing_ctx->bank_group; 1382 sparing_pi.bank = cxl_sparing_ctx->bank; 1383 fallthrough; 1384 case EDAC_REPAIR_RANK_SPARING: 1385 sparing_pi.rank = cxl_sparing_ctx->rank; 1386 fallthrough; 1387 default: 1388 sparing_pi.channel = cxl_sparing_ctx->channel; 1389 if ((rec && (validity_flags & CXL_DER_VALID_NIBBLE)) || 1390 (!rec && (!cxl_sparing_ctx->nibble_mask || 1391 (cxl_sparing_ctx->nibble_mask & 0xFFFFFF)))) { 1392 sparing_pi.flags |= CXL_SET_SPARING_NIB_MASK_VALID(1); 1393 put_unaligned_le24(cxl_sparing_ctx->nibble_mask, 1394 sparing_pi.nibble_mask); 1395 } 1396 break; 1397 } 1398 1399 return cxl_perform_maintenance(&cxlmd->cxlds->cxl_mbox, 1400 cxl_sparing_ctx->op_class, 1401 cxl_sparing_ctx->op_subclass, 1402 &sparing_pi, sizeof(sparing_pi)); 1403 } 1404 1405 static int cxl_mem_sparing_get_repair_type(struct device *dev, void *drv_data, 1406 const char **repair_type) 1407 { 1408 struct cxl_mem_sparing_context *ctx = drv_data; 1409 1410 switch (ctx->repair_type) { 1411 case EDAC_REPAIR_CACHELINE_SPARING: 1412 case EDAC_REPAIR_ROW_SPARING: 1413 case EDAC_REPAIR_BANK_SPARING: 1414 case EDAC_REPAIR_RANK_SPARING: 1415 *repair_type = edac_repair_type[ctx->repair_type]; 1416 break; 1417 default: 1418 return -EINVAL; 1419 } 1420 1421 return 0; 1422 } 1423 1424 #define CXL_SPARING_GET_ATTR(attrb, data_type) \ 1425 static int cxl_mem_sparing_get_##attrb( \ 1426 struct device *dev, void *drv_data, data_type *val) \ 1427 { \ 1428 struct cxl_mem_sparing_context *ctx = drv_data; \ 1429 \ 1430 *val = ctx->attrb; \ 1431 \ 1432 return 0; \ 1433 } 1434 CXL_SPARING_GET_ATTR(persist_mode, bool) 1435 CXL_SPARING_GET_ATTR(dpa, u64) 1436 CXL_SPARING_GET_ATTR(nibble_mask, u32) 1437 CXL_SPARING_GET_ATTR(bank_group, u32) 1438 CXL_SPARING_GET_ATTR(bank, u32) 1439 CXL_SPARING_GET_ATTR(rank, u32) 1440 CXL_SPARING_GET_ATTR(row, u32) 1441 CXL_SPARING_GET_ATTR(column, u32) 1442 CXL_SPARING_GET_ATTR(channel, u32) 1443 CXL_SPARING_GET_ATTR(sub_channel, u32) 1444 1445 #define CXL_SPARING_SET_ATTR(attrb, data_type) \ 1446 static int cxl_mem_sparing_set_##attrb(struct device *dev, \ 1447 void *drv_data, data_type val) \ 1448 { \ 1449 struct cxl_mem_sparing_context *ctx = drv_data; \ 1450 \ 1451 ctx->attrb = val; \ 1452 \ 1453 return 0; \ 1454 } 1455 CXL_SPARING_SET_ATTR(nibble_mask, u32) 1456 CXL_SPARING_SET_ATTR(bank_group, u32) 1457 CXL_SPARING_SET_ATTR(bank, u32) 1458 CXL_SPARING_SET_ATTR(rank, u32) 1459 CXL_SPARING_SET_ATTR(row, u32) 1460 CXL_SPARING_SET_ATTR(column, u32) 1461 CXL_SPARING_SET_ATTR(channel, u32) 1462 CXL_SPARING_SET_ATTR(sub_channel, u32) 1463 1464 static int cxl_mem_sparing_set_persist_mode(struct device *dev, void *drv_data, 1465 bool persist_mode) 1466 { 1467 struct cxl_mem_sparing_context *ctx = drv_data; 1468 1469 if ((persist_mode && ctx->cap_hard_sparing) || 1470 (!persist_mode && ctx->cap_soft_sparing)) 1471 ctx->persist_mode = persist_mode; 1472 else 1473 return -EOPNOTSUPP; 1474 1475 return 0; 1476 } 1477 1478 static int cxl_get_mem_sparing_safe_when_in_use(struct device *dev, 1479 void *drv_data, bool *safe) 1480 { 1481 struct cxl_mem_sparing_context *ctx = drv_data; 1482 1483 *safe = ctx->cap_safe_when_in_use; 1484 1485 return 0; 1486 } 1487 1488 static int cxl_mem_sparing_get_min_dpa(struct device *dev, void *drv_data, 1489 u64 *min_dpa) 1490 { 1491 struct cxl_mem_sparing_context *ctx = drv_data; 1492 struct cxl_memdev *cxlmd = ctx->cxlmd; 1493 struct cxl_dev_state *cxlds = cxlmd->cxlds; 1494 1495 *min_dpa = cxlds->dpa_res.start; 1496 1497 return 0; 1498 } 1499 1500 static int cxl_mem_sparing_get_max_dpa(struct device *dev, void *drv_data, 1501 u64 *max_dpa) 1502 { 1503 struct cxl_mem_sparing_context *ctx = drv_data; 1504 struct cxl_memdev *cxlmd = ctx->cxlmd; 1505 struct cxl_dev_state *cxlds = cxlmd->cxlds; 1506 1507 *max_dpa = cxlds->dpa_res.end; 1508 1509 return 0; 1510 } 1511 1512 static int cxl_mem_sparing_set_dpa(struct device *dev, void *drv_data, u64 dpa) 1513 { 1514 struct cxl_mem_sparing_context *ctx = drv_data; 1515 struct cxl_memdev *cxlmd = ctx->cxlmd; 1516 struct cxl_dev_state *cxlds = cxlmd->cxlds; 1517 1518 if (dpa < cxlds->dpa_res.start || dpa > cxlds->dpa_res.end) 1519 return -EINVAL; 1520 1521 ctx->dpa = dpa; 1522 1523 return 0; 1524 } 1525 1526 static int cxl_do_mem_sparing(struct device *dev, void *drv_data, u32 val) 1527 { 1528 struct cxl_mem_sparing_context *ctx = drv_data; 1529 1530 if (val != EDAC_DO_MEM_REPAIR) 1531 return -EINVAL; 1532 1533 return cxl_mem_perform_sparing(dev, ctx); 1534 } 1535 1536 #define RANK_OPS \ 1537 .get_repair_type = cxl_mem_sparing_get_repair_type, \ 1538 .get_persist_mode = cxl_mem_sparing_get_persist_mode, \ 1539 .set_persist_mode = cxl_mem_sparing_set_persist_mode, \ 1540 .get_repair_safe_when_in_use = cxl_get_mem_sparing_safe_when_in_use, \ 1541 .get_min_dpa = cxl_mem_sparing_get_min_dpa, \ 1542 .get_max_dpa = cxl_mem_sparing_get_max_dpa, \ 1543 .get_dpa = cxl_mem_sparing_get_dpa, \ 1544 .set_dpa = cxl_mem_sparing_set_dpa, \ 1545 .get_nibble_mask = cxl_mem_sparing_get_nibble_mask, \ 1546 .set_nibble_mask = cxl_mem_sparing_set_nibble_mask, \ 1547 .get_rank = cxl_mem_sparing_get_rank, \ 1548 .set_rank = cxl_mem_sparing_set_rank, \ 1549 .get_channel = cxl_mem_sparing_get_channel, \ 1550 .set_channel = cxl_mem_sparing_set_channel, \ 1551 .do_repair = cxl_do_mem_sparing 1552 1553 #define BANK_OPS \ 1554 RANK_OPS, .get_bank_group = cxl_mem_sparing_get_bank_group, \ 1555 .set_bank_group = cxl_mem_sparing_set_bank_group, \ 1556 .get_bank = cxl_mem_sparing_get_bank, \ 1557 .set_bank = cxl_mem_sparing_set_bank 1558 1559 #define ROW_OPS \ 1560 BANK_OPS, .get_row = cxl_mem_sparing_get_row, \ 1561 .set_row = cxl_mem_sparing_set_row 1562 1563 #define CACHELINE_OPS \ 1564 ROW_OPS, .get_column = cxl_mem_sparing_get_column, \ 1565 .set_column = cxl_mem_sparing_set_column, \ 1566 .get_sub_channel = cxl_mem_sparing_get_sub_channel, \ 1567 .set_sub_channel = cxl_mem_sparing_set_sub_channel 1568 1569 static const struct edac_mem_repair_ops cxl_rank_sparing_ops = { 1570 RANK_OPS, 1571 }; 1572 1573 static const struct edac_mem_repair_ops cxl_bank_sparing_ops = { 1574 BANK_OPS, 1575 }; 1576 1577 static const struct edac_mem_repair_ops cxl_row_sparing_ops = { 1578 ROW_OPS, 1579 }; 1580 1581 static const struct edac_mem_repair_ops cxl_cacheline_sparing_ops = { 1582 CACHELINE_OPS, 1583 }; 1584 1585 struct cxl_mem_sparing_desc { 1586 const uuid_t repair_uuid; 1587 enum edac_mem_repair_type repair_type; 1588 const struct edac_mem_repair_ops *repair_ops; 1589 }; 1590 1591 static const struct cxl_mem_sparing_desc mem_sparing_desc[] = { 1592 { 1593 .repair_uuid = CXL_FEAT_CACHELINE_SPARING_UUID, 1594 .repair_type = EDAC_REPAIR_CACHELINE_SPARING, 1595 .repair_ops = &cxl_cacheline_sparing_ops, 1596 }, 1597 { 1598 .repair_uuid = CXL_FEAT_ROW_SPARING_UUID, 1599 .repair_type = EDAC_REPAIR_ROW_SPARING, 1600 .repair_ops = &cxl_row_sparing_ops, 1601 }, 1602 { 1603 .repair_uuid = CXL_FEAT_BANK_SPARING_UUID, 1604 .repair_type = EDAC_REPAIR_BANK_SPARING, 1605 .repair_ops = &cxl_bank_sparing_ops, 1606 }, 1607 { 1608 .repair_uuid = CXL_FEAT_RANK_SPARING_UUID, 1609 .repair_type = EDAC_REPAIR_RANK_SPARING, 1610 .repair_ops = &cxl_rank_sparing_ops, 1611 }, 1612 }; 1613 1614 static int cxl_memdev_sparing_init(struct cxl_memdev *cxlmd, 1615 struct edac_dev_feature *ras_feature, 1616 const struct cxl_mem_sparing_desc *desc, 1617 u8 repair_inst) 1618 { 1619 struct cxl_mem_sparing_context *cxl_sparing_ctx; 1620 struct cxl_feat_entry *feat_entry; 1621 int ret; 1622 1623 feat_entry = cxl_feature_info(to_cxlfs(cxlmd->cxlds), 1624 &desc->repair_uuid); 1625 if (IS_ERR(feat_entry)) 1626 return -EOPNOTSUPP; 1627 1628 if (!(le32_to_cpu(feat_entry->flags) & CXL_FEATURE_F_CHANGEABLE)) 1629 return -EOPNOTSUPP; 1630 1631 cxl_sparing_ctx = devm_kzalloc(&cxlmd->dev, sizeof(*cxl_sparing_ctx), 1632 GFP_KERNEL); 1633 if (!cxl_sparing_ctx) 1634 return -ENOMEM; 1635 1636 *cxl_sparing_ctx = (struct cxl_mem_sparing_context){ 1637 .get_feat_size = le16_to_cpu(feat_entry->get_feat_size), 1638 .set_feat_size = le16_to_cpu(feat_entry->set_feat_size), 1639 .get_version = feat_entry->get_feat_ver, 1640 .set_version = feat_entry->set_feat_ver, 1641 .effects = le16_to_cpu(feat_entry->effects), 1642 .cxlmd = cxlmd, 1643 .repair_type = desc->repair_type, 1644 .instance = repair_inst++, 1645 }; 1646 uuid_copy(&cxl_sparing_ctx->repair_uuid, &desc->repair_uuid); 1647 1648 ret = cxl_mem_sparing_get_attrbs(cxl_sparing_ctx); 1649 if (ret) 1650 return ret; 1651 1652 if ((cxl_sparing_ctx->cap_soft_sparing && 1653 cxl_sparing_ctx->cap_hard_sparing) || 1654 cxl_sparing_ctx->cap_soft_sparing) 1655 cxl_sparing_ctx->persist_mode = 0; 1656 else if (cxl_sparing_ctx->cap_hard_sparing) 1657 cxl_sparing_ctx->persist_mode = 1; 1658 else 1659 return -EOPNOTSUPP; 1660 1661 ras_feature->ft_type = RAS_FEAT_MEM_REPAIR; 1662 ras_feature->instance = cxl_sparing_ctx->instance; 1663 ras_feature->mem_repair_ops = desc->repair_ops; 1664 ras_feature->ctx = cxl_sparing_ctx; 1665 1666 return 0; 1667 } 1668 1669 /* 1670 * CXL memory soft PPR & hard PPR control 1671 */ 1672 struct cxl_ppr_context { 1673 uuid_t repair_uuid; 1674 u8 instance; 1675 u16 get_feat_size; 1676 u16 set_feat_size; 1677 u8 get_version; 1678 u8 set_version; 1679 u16 effects; 1680 u8 op_class; 1681 u8 op_subclass; 1682 bool cap_dpa; 1683 bool cap_nib_mask; 1684 bool media_accessible; 1685 bool data_retained; 1686 struct cxl_memdev *cxlmd; 1687 enum edac_mem_repair_type repair_type; 1688 bool persist_mode; 1689 u64 dpa; 1690 u32 nibble_mask; 1691 }; 1692 1693 /* 1694 * See CXL rev 3.2 @8.2.10.7.2.1 Table 8-128 sPPR Feature Readable Attributes 1695 * 1696 * See CXL rev 3.2 @8.2.10.7.2.2 Table 8-131 hPPR Feature Readable Attributes 1697 */ 1698 1699 #define CXL_PPR_OP_CAP_DEVICE_INITIATED BIT(0) 1700 #define CXL_PPR_OP_MODE_DEV_INITIATED BIT(0) 1701 1702 #define CXL_PPR_FLAG_DPA_SUPPORT_MASK BIT(0) 1703 #define CXL_PPR_FLAG_NIB_SUPPORT_MASK BIT(1) 1704 #define CXL_PPR_FLAG_MEM_SPARING_EV_REC_SUPPORT_MASK BIT(2) 1705 #define CXL_PPR_FLAG_DEV_INITED_PPR_AT_BOOT_CAP_MASK BIT(3) 1706 1707 #define CXL_PPR_RESTRICTION_FLAG_MEDIA_ACCESSIBLE_MASK BIT(0) 1708 #define CXL_PPR_RESTRICTION_FLAG_DATA_RETAINED_MASK BIT(2) 1709 1710 #define CXL_PPR_SPARING_EV_REC_EN_MASK BIT(0) 1711 #define CXL_PPR_DEV_INITED_PPR_AT_BOOT_EN_MASK BIT(1) 1712 1713 #define CXL_PPR_GET_CAP_DPA(flags) \ 1714 FIELD_GET(CXL_PPR_FLAG_DPA_SUPPORT_MASK, flags) 1715 #define CXL_PPR_GET_CAP_NIB_MASK(flags) \ 1716 FIELD_GET(CXL_PPR_FLAG_NIB_SUPPORT_MASK, flags) 1717 #define CXL_PPR_GET_MEDIA_ACCESSIBLE(restriction_flags) \ 1718 (FIELD_GET(CXL_PPR_RESTRICTION_FLAG_MEDIA_ACCESSIBLE_MASK, \ 1719 restriction_flags) ^ 1) 1720 #define CXL_PPR_GET_DATA_RETAINED(restriction_flags) \ 1721 (FIELD_GET(CXL_PPR_RESTRICTION_FLAG_DATA_RETAINED_MASK, \ 1722 restriction_flags) ^ 1) 1723 1724 struct cxl_memdev_ppr_rd_attrbs { 1725 struct cxl_memdev_repair_rd_attrbs_hdr hdr; 1726 u8 ppr_flags; 1727 __le16 restriction_flags; 1728 u8 ppr_op_mode; 1729 } __packed; 1730 1731 /* 1732 * See CXL rev 3.2 @8.2.10.7.1.2 Table 8-118 sPPR Maintenance Input Payload 1733 * 1734 * See CXL rev 3.2 @8.2.10.7.1.3 Table 8-119 hPPR Maintenance Input Payload 1735 */ 1736 struct cxl_memdev_ppr_maintenance_attrbs { 1737 u8 flags; 1738 __le64 dpa; 1739 u8 nibble_mask[3]; 1740 } __packed; 1741 1742 static int cxl_mem_ppr_get_attrbs(struct cxl_ppr_context *cxl_ppr_ctx) 1743 { 1744 size_t rd_data_size = sizeof(struct cxl_memdev_ppr_rd_attrbs); 1745 struct cxl_memdev *cxlmd = cxl_ppr_ctx->cxlmd; 1746 struct cxl_mailbox *cxl_mbox = &cxlmd->cxlds->cxl_mbox; 1747 u16 restriction_flags; 1748 size_t data_size; 1749 u16 return_code; 1750 1751 struct cxl_memdev_ppr_rd_attrbs *rd_attrbs __free(kfree) = 1752 kmalloc(rd_data_size, GFP_KERNEL); 1753 if (!rd_attrbs) 1754 return -ENOMEM; 1755 1756 data_size = cxl_get_feature(cxl_mbox, &cxl_ppr_ctx->repair_uuid, 1757 CXL_GET_FEAT_SEL_CURRENT_VALUE, rd_attrbs, 1758 rd_data_size, 0, &return_code); 1759 if (!data_size) 1760 return -EIO; 1761 1762 cxl_ppr_ctx->op_class = rd_attrbs->hdr.op_class; 1763 cxl_ppr_ctx->op_subclass = rd_attrbs->hdr.op_subclass; 1764 cxl_ppr_ctx->cap_dpa = CXL_PPR_GET_CAP_DPA(rd_attrbs->ppr_flags); 1765 cxl_ppr_ctx->cap_nib_mask = 1766 CXL_PPR_GET_CAP_NIB_MASK(rd_attrbs->ppr_flags); 1767 1768 restriction_flags = le16_to_cpu(rd_attrbs->restriction_flags); 1769 cxl_ppr_ctx->media_accessible = 1770 CXL_PPR_GET_MEDIA_ACCESSIBLE(restriction_flags); 1771 cxl_ppr_ctx->data_retained = 1772 CXL_PPR_GET_DATA_RETAINED(restriction_flags); 1773 1774 return 0; 1775 } 1776 1777 static int cxl_mem_perform_ppr(struct cxl_ppr_context *cxl_ppr_ctx) 1778 { 1779 struct cxl_memdev_ppr_maintenance_attrbs maintenance_attrbs; 1780 struct cxl_memdev *cxlmd = cxl_ppr_ctx->cxlmd; 1781 struct cxl_mem_repair_attrbs attrbs = { 0 }; 1782 1783 struct rw_semaphore *region_lock __free(rwsem_read_release) = 1784 rwsem_read_intr_acquire(&cxl_region_rwsem); 1785 if (!region_lock) 1786 return -EINTR; 1787 1788 struct rw_semaphore *dpa_lock __free(rwsem_read_release) = 1789 rwsem_read_intr_acquire(&cxl_dpa_rwsem); 1790 if (!dpa_lock) 1791 return -EINTR; 1792 1793 if (!cxl_ppr_ctx->media_accessible || !cxl_ppr_ctx->data_retained) { 1794 /* Memory to repair must be offline */ 1795 if (cxl_is_memdev_memory_online(cxlmd)) 1796 return -EBUSY; 1797 } else { 1798 if (cxl_is_memdev_memory_online(cxlmd)) { 1799 /* Check memory to repair is from the current boot */ 1800 attrbs.repair_type = CXL_PPR; 1801 attrbs.dpa = cxl_ppr_ctx->dpa; 1802 attrbs.nibble_mask = cxl_ppr_ctx->nibble_mask; 1803 if (!cxl_find_rec_dram(cxlmd, &attrbs) && 1804 !cxl_find_rec_gen_media(cxlmd, &attrbs)) 1805 return -EINVAL; 1806 } 1807 } 1808 1809 memset(&maintenance_attrbs, 0, sizeof(maintenance_attrbs)); 1810 maintenance_attrbs.flags = 0; 1811 maintenance_attrbs.dpa = cpu_to_le64(cxl_ppr_ctx->dpa); 1812 put_unaligned_le24(cxl_ppr_ctx->nibble_mask, 1813 maintenance_attrbs.nibble_mask); 1814 1815 return cxl_perform_maintenance(&cxlmd->cxlds->cxl_mbox, 1816 cxl_ppr_ctx->op_class, 1817 cxl_ppr_ctx->op_subclass, 1818 &maintenance_attrbs, 1819 sizeof(maintenance_attrbs)); 1820 } 1821 1822 static int cxl_ppr_get_repair_type(struct device *dev, void *drv_data, 1823 const char **repair_type) 1824 { 1825 *repair_type = edac_repair_type[EDAC_REPAIR_PPR]; 1826 1827 return 0; 1828 } 1829 1830 static int cxl_ppr_get_persist_mode(struct device *dev, void *drv_data, 1831 bool *persist_mode) 1832 { 1833 struct cxl_ppr_context *cxl_ppr_ctx = drv_data; 1834 1835 *persist_mode = cxl_ppr_ctx->persist_mode; 1836 1837 return 0; 1838 } 1839 1840 static int cxl_get_ppr_safe_when_in_use(struct device *dev, void *drv_data, 1841 bool *safe) 1842 { 1843 struct cxl_ppr_context *cxl_ppr_ctx = drv_data; 1844 1845 *safe = cxl_ppr_ctx->media_accessible & cxl_ppr_ctx->data_retained; 1846 1847 return 0; 1848 } 1849 1850 static int cxl_ppr_get_min_dpa(struct device *dev, void *drv_data, u64 *min_dpa) 1851 { 1852 struct cxl_ppr_context *cxl_ppr_ctx = drv_data; 1853 struct cxl_memdev *cxlmd = cxl_ppr_ctx->cxlmd; 1854 struct cxl_dev_state *cxlds = cxlmd->cxlds; 1855 1856 *min_dpa = cxlds->dpa_res.start; 1857 1858 return 0; 1859 } 1860 1861 static int cxl_ppr_get_max_dpa(struct device *dev, void *drv_data, u64 *max_dpa) 1862 { 1863 struct cxl_ppr_context *cxl_ppr_ctx = drv_data; 1864 struct cxl_memdev *cxlmd = cxl_ppr_ctx->cxlmd; 1865 struct cxl_dev_state *cxlds = cxlmd->cxlds; 1866 1867 *max_dpa = cxlds->dpa_res.end; 1868 1869 return 0; 1870 } 1871 1872 static int cxl_ppr_get_dpa(struct device *dev, void *drv_data, u64 *dpa) 1873 { 1874 struct cxl_ppr_context *cxl_ppr_ctx = drv_data; 1875 1876 *dpa = cxl_ppr_ctx->dpa; 1877 1878 return 0; 1879 } 1880 1881 static int cxl_ppr_set_dpa(struct device *dev, void *drv_data, u64 dpa) 1882 { 1883 struct cxl_ppr_context *cxl_ppr_ctx = drv_data; 1884 struct cxl_memdev *cxlmd = cxl_ppr_ctx->cxlmd; 1885 struct cxl_dev_state *cxlds = cxlmd->cxlds; 1886 1887 if (dpa < cxlds->dpa_res.start || dpa > cxlds->dpa_res.end) 1888 return -EINVAL; 1889 1890 cxl_ppr_ctx->dpa = dpa; 1891 1892 return 0; 1893 } 1894 1895 static int cxl_ppr_get_nibble_mask(struct device *dev, void *drv_data, 1896 u32 *nibble_mask) 1897 { 1898 struct cxl_ppr_context *cxl_ppr_ctx = drv_data; 1899 1900 *nibble_mask = cxl_ppr_ctx->nibble_mask; 1901 1902 return 0; 1903 } 1904 1905 static int cxl_ppr_set_nibble_mask(struct device *dev, void *drv_data, 1906 u32 nibble_mask) 1907 { 1908 struct cxl_ppr_context *cxl_ppr_ctx = drv_data; 1909 1910 cxl_ppr_ctx->nibble_mask = nibble_mask; 1911 1912 return 0; 1913 } 1914 1915 static int cxl_do_ppr(struct device *dev, void *drv_data, u32 val) 1916 { 1917 struct cxl_ppr_context *cxl_ppr_ctx = drv_data; 1918 1919 if (!cxl_ppr_ctx->dpa || val != EDAC_DO_MEM_REPAIR) 1920 return -EINVAL; 1921 1922 return cxl_mem_perform_ppr(cxl_ppr_ctx); 1923 } 1924 1925 static const struct edac_mem_repair_ops cxl_sppr_ops = { 1926 .get_repair_type = cxl_ppr_get_repair_type, 1927 .get_persist_mode = cxl_ppr_get_persist_mode, 1928 .get_repair_safe_when_in_use = cxl_get_ppr_safe_when_in_use, 1929 .get_min_dpa = cxl_ppr_get_min_dpa, 1930 .get_max_dpa = cxl_ppr_get_max_dpa, 1931 .get_dpa = cxl_ppr_get_dpa, 1932 .set_dpa = cxl_ppr_set_dpa, 1933 .get_nibble_mask = cxl_ppr_get_nibble_mask, 1934 .set_nibble_mask = cxl_ppr_set_nibble_mask, 1935 .do_repair = cxl_do_ppr, 1936 }; 1937 1938 static int cxl_memdev_soft_ppr_init(struct cxl_memdev *cxlmd, 1939 struct edac_dev_feature *ras_feature, 1940 u8 repair_inst) 1941 { 1942 struct cxl_ppr_context *cxl_sppr_ctx; 1943 struct cxl_feat_entry *feat_entry; 1944 int ret; 1945 1946 feat_entry = cxl_feature_info(to_cxlfs(cxlmd->cxlds), 1947 &CXL_FEAT_SPPR_UUID); 1948 if (IS_ERR(feat_entry)) 1949 return -EOPNOTSUPP; 1950 1951 if (!(le32_to_cpu(feat_entry->flags) & CXL_FEATURE_F_CHANGEABLE)) 1952 return -EOPNOTSUPP; 1953 1954 cxl_sppr_ctx = 1955 devm_kzalloc(&cxlmd->dev, sizeof(*cxl_sppr_ctx), GFP_KERNEL); 1956 if (!cxl_sppr_ctx) 1957 return -ENOMEM; 1958 1959 *cxl_sppr_ctx = (struct cxl_ppr_context){ 1960 .get_feat_size = le16_to_cpu(feat_entry->get_feat_size), 1961 .set_feat_size = le16_to_cpu(feat_entry->set_feat_size), 1962 .get_version = feat_entry->get_feat_ver, 1963 .set_version = feat_entry->set_feat_ver, 1964 .effects = le16_to_cpu(feat_entry->effects), 1965 .cxlmd = cxlmd, 1966 .repair_type = EDAC_REPAIR_PPR, 1967 .persist_mode = 0, 1968 .instance = repair_inst, 1969 }; 1970 uuid_copy(&cxl_sppr_ctx->repair_uuid, &CXL_FEAT_SPPR_UUID); 1971 1972 ret = cxl_mem_ppr_get_attrbs(cxl_sppr_ctx); 1973 if (ret) 1974 return ret; 1975 1976 ras_feature->ft_type = RAS_FEAT_MEM_REPAIR; 1977 ras_feature->instance = cxl_sppr_ctx->instance; 1978 ras_feature->mem_repair_ops = &cxl_sppr_ops; 1979 ras_feature->ctx = cxl_sppr_ctx; 1980 1981 return 0; 1982 } 1983 1984 int devm_cxl_memdev_edac_register(struct cxl_memdev *cxlmd) 1985 { 1986 struct edac_dev_feature ras_features[CXL_NR_EDAC_DEV_FEATURES]; 1987 int num_ras_features = 0; 1988 u8 repair_inst = 0; 1989 int rc; 1990 1991 if (IS_ENABLED(CONFIG_CXL_EDAC_SCRUB)) { 1992 rc = cxl_memdev_scrub_init(cxlmd, &ras_features[num_ras_features], 0); 1993 if (rc < 0 && rc != -EOPNOTSUPP) 1994 return rc; 1995 1996 if (rc != -EOPNOTSUPP) 1997 num_ras_features++; 1998 } 1999 2000 if (IS_ENABLED(CONFIG_CXL_EDAC_ECS)) { 2001 rc = cxl_memdev_ecs_init(cxlmd, &ras_features[num_ras_features]); 2002 if (rc < 0 && rc != -EOPNOTSUPP) 2003 return rc; 2004 2005 if (rc != -EOPNOTSUPP) 2006 num_ras_features++; 2007 } 2008 2009 if (IS_ENABLED(CONFIG_CXL_EDAC_MEM_REPAIR)) { 2010 for (int i = 0; i < CXL_MEM_SPARING_MAX; i++) { 2011 rc = cxl_memdev_sparing_init(cxlmd, 2012 &ras_features[num_ras_features], 2013 &mem_sparing_desc[i], repair_inst); 2014 if (rc == -EOPNOTSUPP) 2015 continue; 2016 if (rc < 0) 2017 return rc; 2018 2019 repair_inst++; 2020 num_ras_features++; 2021 } 2022 2023 rc = cxl_memdev_soft_ppr_init(cxlmd, &ras_features[num_ras_features], 2024 repair_inst); 2025 if (rc < 0 && rc != -EOPNOTSUPP) 2026 return rc; 2027 2028 if (rc != -EOPNOTSUPP) { 2029 repair_inst++; 2030 num_ras_features++; 2031 } 2032 2033 if (repair_inst) { 2034 struct cxl_mem_err_rec *array_rec = 2035 devm_kzalloc(&cxlmd->dev, sizeof(*array_rec), 2036 GFP_KERNEL); 2037 if (!array_rec) 2038 return -ENOMEM; 2039 2040 xa_init(&array_rec->rec_gen_media); 2041 xa_init(&array_rec->rec_dram); 2042 cxlmd->err_rec_array = array_rec; 2043 } 2044 } 2045 2046 if (!num_ras_features) 2047 return -EINVAL; 2048 2049 char *cxl_dev_name __free(kfree) = 2050 kasprintf(GFP_KERNEL, "cxl_%s", dev_name(&cxlmd->dev)); 2051 if (!cxl_dev_name) 2052 return -ENOMEM; 2053 2054 return edac_dev_register(&cxlmd->dev, cxl_dev_name, NULL, 2055 num_ras_features, ras_features); 2056 } 2057 EXPORT_SYMBOL_NS_GPL(devm_cxl_memdev_edac_register, "CXL"); 2058 2059 int devm_cxl_region_edac_register(struct cxl_region *cxlr) 2060 { 2061 struct edac_dev_feature ras_features[CXL_NR_EDAC_DEV_FEATURES]; 2062 int num_ras_features = 0; 2063 int rc; 2064 2065 if (!IS_ENABLED(CONFIG_CXL_EDAC_SCRUB)) 2066 return 0; 2067 2068 rc = cxl_region_scrub_init(cxlr, &ras_features[num_ras_features], 0); 2069 if (rc < 0) 2070 return rc; 2071 2072 num_ras_features++; 2073 2074 char *cxl_dev_name __free(kfree) = 2075 kasprintf(GFP_KERNEL, "cxl_%s", dev_name(&cxlr->dev)); 2076 if (!cxl_dev_name) 2077 return -ENOMEM; 2078 2079 return edac_dev_register(&cxlr->dev, cxl_dev_name, NULL, 2080 num_ras_features, ras_features); 2081 } 2082 EXPORT_SYMBOL_NS_GPL(devm_cxl_region_edac_register, "CXL"); 2083 2084 void devm_cxl_memdev_edac_release(struct cxl_memdev *cxlmd) 2085 { 2086 struct cxl_mem_err_rec *array_rec = cxlmd->err_rec_array; 2087 struct cxl_event_gen_media *rec_gen_media; 2088 struct cxl_event_dram *rec_dram; 2089 unsigned long index; 2090 2091 if (!IS_ENABLED(CONFIG_CXL_EDAC_MEM_REPAIR) || !array_rec) 2092 return; 2093 2094 xa_for_each(&array_rec->rec_dram, index, rec_dram) 2095 kfree(rec_dram); 2096 xa_destroy(&array_rec->rec_dram); 2097 2098 xa_for_each(&array_rec->rec_gen_media, index, rec_gen_media) 2099 kfree(rec_gen_media); 2100 xa_destroy(&array_rec->rec_gen_media); 2101 } 2102 EXPORT_SYMBOL_NS_GPL(devm_cxl_memdev_edac_release, "CXL"); 2103