1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * CXL EDAC memory feature driver.
4 *
5 * Copyright (c) 2024-2025 HiSilicon Limited.
6 *
7 * - Supports functions to configure EDAC features of the
8 * CXL memory devices.
9 * - Registers with the EDAC device subsystem driver to expose
10 * the features sysfs attributes to the user for configuring
11 * CXL memory RAS feature.
12 */
13
14 #include <linux/cleanup.h>
15 #include <linux/edac.h>
16 #include <linux/limits.h>
17 #include <linux/unaligned.h>
18 #include <linux/xarray.h>
19 #include <cxl/features.h>
20 #include <cxl.h>
21 #include <cxlmem.h>
22 #include "core.h"
23 #include "trace.h"
24
25 #define CXL_NR_EDAC_DEV_FEATURES 7
26
27 #define CXL_SCRUB_NO_REGION -1
28
29 struct cxl_patrol_scrub_context {
30 u8 instance;
31 u16 get_feat_size;
32 u16 set_feat_size;
33 u8 get_version;
34 u8 set_version;
35 u16 effects;
36 struct cxl_memdev *cxlmd;
37 struct cxl_region *cxlr;
38 };
39
40 /*
41 * See CXL spec rev 3.2 @8.2.10.9.11.1 Table 8-222 Device Patrol Scrub Control
42 * Feature Readable Attributes.
43 */
44 struct cxl_scrub_rd_attrbs {
45 u8 scrub_cycle_cap;
46 __le16 scrub_cycle_hours;
47 u8 scrub_flags;
48 } __packed;
49
50 /*
51 * See CXL spec rev 3.2 @8.2.10.9.11.1 Table 8-223 Device Patrol Scrub Control
52 * Feature Writable Attributes.
53 */
54 struct cxl_scrub_wr_attrbs {
55 u8 scrub_cycle_hours;
56 u8 scrub_flags;
57 } __packed;
58
59 #define CXL_SCRUB_CONTROL_CHANGEABLE BIT(0)
60 #define CXL_SCRUB_CONTROL_REALTIME BIT(1)
61 #define CXL_SCRUB_CONTROL_CYCLE_MASK GENMASK(7, 0)
62 #define CXL_SCRUB_CONTROL_MIN_CYCLE_MASK GENMASK(15, 8)
63 #define CXL_SCRUB_CONTROL_ENABLE BIT(0)
64
65 #define CXL_GET_SCRUB_CYCLE_CHANGEABLE(cap) \
66 FIELD_GET(CXL_SCRUB_CONTROL_CHANGEABLE, cap)
67 #define CXL_GET_SCRUB_CYCLE(cycle) \
68 FIELD_GET(CXL_SCRUB_CONTROL_CYCLE_MASK, cycle)
69 #define CXL_GET_SCRUB_MIN_CYCLE(cycle) \
70 FIELD_GET(CXL_SCRUB_CONTROL_MIN_CYCLE_MASK, cycle)
71 #define CXL_GET_SCRUB_EN_STS(flags) FIELD_GET(CXL_SCRUB_CONTROL_ENABLE, flags)
72
73 #define CXL_SET_SCRUB_CYCLE(cycle) \
74 FIELD_PREP(CXL_SCRUB_CONTROL_CYCLE_MASK, cycle)
75 #define CXL_SET_SCRUB_EN(en) FIELD_PREP(CXL_SCRUB_CONTROL_ENABLE, en)
76
cxl_mem_scrub_get_attrbs(struct cxl_mailbox * cxl_mbox,u8 * cap,u16 * cycle,u8 * flags,u8 * min_cycle)77 static int cxl_mem_scrub_get_attrbs(struct cxl_mailbox *cxl_mbox, u8 *cap,
78 u16 *cycle, u8 *flags, u8 *min_cycle)
79 {
80 size_t rd_data_size = sizeof(struct cxl_scrub_rd_attrbs);
81 size_t data_size;
82 struct cxl_scrub_rd_attrbs *rd_attrbs __free(kfree) =
83 kzalloc(rd_data_size, GFP_KERNEL);
84 if (!rd_attrbs)
85 return -ENOMEM;
86
87 data_size = cxl_get_feature(cxl_mbox, &CXL_FEAT_PATROL_SCRUB_UUID,
88 CXL_GET_FEAT_SEL_CURRENT_VALUE, rd_attrbs,
89 rd_data_size, 0, NULL);
90 if (!data_size)
91 return -EIO;
92
93 *cap = rd_attrbs->scrub_cycle_cap;
94 *cycle = le16_to_cpu(rd_attrbs->scrub_cycle_hours);
95 *flags = rd_attrbs->scrub_flags;
96 if (min_cycle)
97 *min_cycle = CXL_GET_SCRUB_MIN_CYCLE(*cycle);
98
99 return 0;
100 }
101
cxl_scrub_get_attrbs(struct cxl_patrol_scrub_context * cxl_ps_ctx,u8 * cap,u16 * cycle,u8 * flags,u8 * min_cycle)102 static int cxl_scrub_get_attrbs(struct cxl_patrol_scrub_context *cxl_ps_ctx,
103 u8 *cap, u16 *cycle, u8 *flags, u8 *min_cycle)
104 {
105 struct cxl_mailbox *cxl_mbox;
106 struct cxl_region_params *p;
107 struct cxl_memdev *cxlmd;
108 struct cxl_region *cxlr;
109 u8 min_scrub_cycle = 0;
110 int i, ret;
111
112 if (!cxl_ps_ctx->cxlr) {
113 cxl_mbox = &cxl_ps_ctx->cxlmd->cxlds->cxl_mbox;
114 return cxl_mem_scrub_get_attrbs(cxl_mbox, cap, cycle,
115 flags, min_cycle);
116 }
117
118 ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region);
119 if ((ret = ACQUIRE_ERR(rwsem_read_intr, &rwsem)))
120 return ret;
121
122 cxlr = cxl_ps_ctx->cxlr;
123 p = &cxlr->params;
124
125 for (i = 0; i < p->nr_targets; i++) {
126 struct cxl_endpoint_decoder *cxled = p->targets[i];
127
128 cxlmd = cxled_to_memdev(cxled);
129 cxl_mbox = &cxlmd->cxlds->cxl_mbox;
130 ret = cxl_mem_scrub_get_attrbs(cxl_mbox, cap, cycle, flags,
131 min_cycle);
132 if (ret)
133 return ret;
134
135 /*
136 * The min_scrub_cycle of a region is the max of minimum scrub
137 * cycles supported by memdevs that back the region.
138 */
139 if (min_cycle)
140 min_scrub_cycle = max(*min_cycle, min_scrub_cycle);
141 }
142
143 if (min_cycle)
144 *min_cycle = min_scrub_cycle;
145
146 return 0;
147 }
148
cxl_scrub_set_attrbs_region(struct device * dev,struct cxl_patrol_scrub_context * cxl_ps_ctx,u8 cycle,u8 flags)149 static int cxl_scrub_set_attrbs_region(struct device *dev,
150 struct cxl_patrol_scrub_context *cxl_ps_ctx,
151 u8 cycle, u8 flags)
152 {
153 struct cxl_scrub_wr_attrbs wr_attrbs;
154 struct cxl_mailbox *cxl_mbox;
155 struct cxl_region_params *p;
156 struct cxl_memdev *cxlmd;
157 struct cxl_region *cxlr;
158 int ret, i;
159
160 ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region);
161 if ((ret = ACQUIRE_ERR(rwsem_read_intr, &rwsem)))
162 return ret;
163
164 cxlr = cxl_ps_ctx->cxlr;
165 p = &cxlr->params;
166 wr_attrbs.scrub_cycle_hours = cycle;
167 wr_attrbs.scrub_flags = flags;
168
169 for (i = 0; i < p->nr_targets; i++) {
170 struct cxl_endpoint_decoder *cxled = p->targets[i];
171
172 cxlmd = cxled_to_memdev(cxled);
173 cxl_mbox = &cxlmd->cxlds->cxl_mbox;
174 ret = cxl_set_feature(cxl_mbox, &CXL_FEAT_PATROL_SCRUB_UUID,
175 cxl_ps_ctx->set_version, &wr_attrbs,
176 sizeof(wr_attrbs),
177 CXL_SET_FEAT_FLAG_DATA_SAVED_ACROSS_RESET,
178 0, NULL);
179 if (ret)
180 return ret;
181
182 if (cycle != cxlmd->scrub_cycle) {
183 if (cxlmd->scrub_region_id != CXL_SCRUB_NO_REGION)
184 dev_info(dev,
185 "Device scrub rate(%d hours) set by region%d rate overwritten by region%d scrub rate(%d hours)\n",
186 cxlmd->scrub_cycle,
187 cxlmd->scrub_region_id, cxlr->id,
188 cycle);
189
190 cxlmd->scrub_cycle = cycle;
191 cxlmd->scrub_region_id = cxlr->id;
192 }
193 }
194
195 return 0;
196 }
197
cxl_scrub_set_attrbs_device(struct device * dev,struct cxl_patrol_scrub_context * cxl_ps_ctx,u8 cycle,u8 flags)198 static int cxl_scrub_set_attrbs_device(struct device *dev,
199 struct cxl_patrol_scrub_context *cxl_ps_ctx,
200 u8 cycle, u8 flags)
201 {
202 struct cxl_scrub_wr_attrbs wr_attrbs;
203 struct cxl_mailbox *cxl_mbox;
204 struct cxl_memdev *cxlmd;
205 int ret;
206
207 wr_attrbs.scrub_cycle_hours = cycle;
208 wr_attrbs.scrub_flags = flags;
209
210 cxlmd = cxl_ps_ctx->cxlmd;
211 cxl_mbox = &cxlmd->cxlds->cxl_mbox;
212 ret = cxl_set_feature(cxl_mbox, &CXL_FEAT_PATROL_SCRUB_UUID,
213 cxl_ps_ctx->set_version, &wr_attrbs,
214 sizeof(wr_attrbs),
215 CXL_SET_FEAT_FLAG_DATA_SAVED_ACROSS_RESET, 0,
216 NULL);
217 if (ret)
218 return ret;
219
220 if (cycle != cxlmd->scrub_cycle) {
221 if (cxlmd->scrub_region_id != CXL_SCRUB_NO_REGION)
222 dev_info(dev,
223 "Device scrub rate(%d hours) set by region%d rate overwritten with device local scrub rate(%d hours)\n",
224 cxlmd->scrub_cycle, cxlmd->scrub_region_id,
225 cycle);
226
227 cxlmd->scrub_cycle = cycle;
228 cxlmd->scrub_region_id = CXL_SCRUB_NO_REGION;
229 }
230
231 return 0;
232 }
233
cxl_scrub_set_attrbs(struct device * dev,struct cxl_patrol_scrub_context * cxl_ps_ctx,u8 cycle,u8 flags)234 static int cxl_scrub_set_attrbs(struct device *dev,
235 struct cxl_patrol_scrub_context *cxl_ps_ctx,
236 u8 cycle, u8 flags)
237 {
238 if (cxl_ps_ctx->cxlr)
239 return cxl_scrub_set_attrbs_region(dev, cxl_ps_ctx, cycle, flags);
240
241 return cxl_scrub_set_attrbs_device(dev, cxl_ps_ctx, cycle, flags);
242 }
243
cxl_patrol_scrub_get_enabled_bg(struct device * dev,void * drv_data,bool * enabled)244 static int cxl_patrol_scrub_get_enabled_bg(struct device *dev, void *drv_data,
245 bool *enabled)
246 {
247 struct cxl_patrol_scrub_context *ctx = drv_data;
248 u8 cap, flags;
249 u16 cycle;
250 int ret;
251
252 ret = cxl_scrub_get_attrbs(ctx, &cap, &cycle, &flags, NULL);
253 if (ret)
254 return ret;
255
256 *enabled = CXL_GET_SCRUB_EN_STS(flags);
257
258 return 0;
259 }
260
cxl_patrol_scrub_set_enabled_bg(struct device * dev,void * drv_data,bool enable)261 static int cxl_patrol_scrub_set_enabled_bg(struct device *dev, void *drv_data,
262 bool enable)
263 {
264 struct cxl_patrol_scrub_context *ctx = drv_data;
265 u8 cap, flags, wr_cycle;
266 u16 rd_cycle;
267 int ret;
268
269 if (!capable(CAP_SYS_RAWIO))
270 return -EPERM;
271
272 ret = cxl_scrub_get_attrbs(ctx, &cap, &rd_cycle, &flags, NULL);
273 if (ret)
274 return ret;
275
276 wr_cycle = CXL_GET_SCRUB_CYCLE(rd_cycle);
277 flags = CXL_SET_SCRUB_EN(enable);
278
279 return cxl_scrub_set_attrbs(dev, ctx, wr_cycle, flags);
280 }
281
cxl_patrol_scrub_get_min_scrub_cycle(struct device * dev,void * drv_data,u32 * min)282 static int cxl_patrol_scrub_get_min_scrub_cycle(struct device *dev,
283 void *drv_data, u32 *min)
284 {
285 struct cxl_patrol_scrub_context *ctx = drv_data;
286 u8 cap, flags, min_cycle;
287 u16 cycle;
288 int ret;
289
290 ret = cxl_scrub_get_attrbs(ctx, &cap, &cycle, &flags, &min_cycle);
291 if (ret)
292 return ret;
293
294 *min = min_cycle * 3600;
295
296 return 0;
297 }
298
cxl_patrol_scrub_get_max_scrub_cycle(struct device * dev,void * drv_data,u32 * max)299 static int cxl_patrol_scrub_get_max_scrub_cycle(struct device *dev,
300 void *drv_data, u32 *max)
301 {
302 *max = U8_MAX * 3600; /* Max set by register size */
303
304 return 0;
305 }
306
cxl_patrol_scrub_get_scrub_cycle(struct device * dev,void * drv_data,u32 * scrub_cycle_secs)307 static int cxl_patrol_scrub_get_scrub_cycle(struct device *dev, void *drv_data,
308 u32 *scrub_cycle_secs)
309 {
310 struct cxl_patrol_scrub_context *ctx = drv_data;
311 u8 cap, flags;
312 u16 cycle;
313 int ret;
314
315 ret = cxl_scrub_get_attrbs(ctx, &cap, &cycle, &flags, NULL);
316 if (ret)
317 return ret;
318
319 *scrub_cycle_secs = CXL_GET_SCRUB_CYCLE(cycle) * 3600;
320
321 return 0;
322 }
323
cxl_patrol_scrub_set_scrub_cycle(struct device * dev,void * drv_data,u32 scrub_cycle_secs)324 static int cxl_patrol_scrub_set_scrub_cycle(struct device *dev, void *drv_data,
325 u32 scrub_cycle_secs)
326 {
327 struct cxl_patrol_scrub_context *ctx = drv_data;
328 u8 scrub_cycle_hours = scrub_cycle_secs / 3600;
329 u8 cap, wr_cycle, flags, min_cycle;
330 u16 rd_cycle;
331 int ret;
332
333 if (!capable(CAP_SYS_RAWIO))
334 return -EPERM;
335
336 ret = cxl_scrub_get_attrbs(ctx, &cap, &rd_cycle, &flags, &min_cycle);
337 if (ret)
338 return ret;
339
340 if (!CXL_GET_SCRUB_CYCLE_CHANGEABLE(cap))
341 return -EOPNOTSUPP;
342
343 if (scrub_cycle_hours < min_cycle) {
344 dev_dbg(dev, "Invalid CXL patrol scrub cycle(%d) to set\n",
345 scrub_cycle_hours);
346 dev_dbg(dev,
347 "Minimum supported CXL patrol scrub cycle in hour %d\n",
348 min_cycle);
349 return -EINVAL;
350 }
351 wr_cycle = CXL_SET_SCRUB_CYCLE(scrub_cycle_hours);
352
353 return cxl_scrub_set_attrbs(dev, ctx, wr_cycle, flags);
354 }
355
356 static const struct edac_scrub_ops cxl_ps_scrub_ops = {
357 .get_enabled_bg = cxl_patrol_scrub_get_enabled_bg,
358 .set_enabled_bg = cxl_patrol_scrub_set_enabled_bg,
359 .get_min_cycle = cxl_patrol_scrub_get_min_scrub_cycle,
360 .get_max_cycle = cxl_patrol_scrub_get_max_scrub_cycle,
361 .get_cycle_duration = cxl_patrol_scrub_get_scrub_cycle,
362 .set_cycle_duration = cxl_patrol_scrub_set_scrub_cycle,
363 };
364
cxl_memdev_scrub_init(struct cxl_memdev * cxlmd,struct edac_dev_feature * ras_feature,u8 scrub_inst)365 static int cxl_memdev_scrub_init(struct cxl_memdev *cxlmd,
366 struct edac_dev_feature *ras_feature,
367 u8 scrub_inst)
368 {
369 struct cxl_patrol_scrub_context *cxl_ps_ctx;
370 struct cxl_feat_entry *feat_entry;
371 u8 cap, flags;
372 u16 cycle;
373 int rc;
374
375 feat_entry = cxl_feature_info(to_cxlfs(cxlmd->cxlds),
376 &CXL_FEAT_PATROL_SCRUB_UUID);
377 if (IS_ERR(feat_entry))
378 return -EOPNOTSUPP;
379
380 if (!(le32_to_cpu(feat_entry->flags) & CXL_FEATURE_F_CHANGEABLE))
381 return -EOPNOTSUPP;
382
383 cxl_ps_ctx = devm_kzalloc(&cxlmd->dev, sizeof(*cxl_ps_ctx), GFP_KERNEL);
384 if (!cxl_ps_ctx)
385 return -ENOMEM;
386
387 *cxl_ps_ctx = (struct cxl_patrol_scrub_context){
388 .get_feat_size = le16_to_cpu(feat_entry->get_feat_size),
389 .set_feat_size = le16_to_cpu(feat_entry->set_feat_size),
390 .get_version = feat_entry->get_feat_ver,
391 .set_version = feat_entry->set_feat_ver,
392 .effects = le16_to_cpu(feat_entry->effects),
393 .instance = scrub_inst,
394 .cxlmd = cxlmd,
395 };
396
397 rc = cxl_mem_scrub_get_attrbs(&cxlmd->cxlds->cxl_mbox, &cap, &cycle,
398 &flags, NULL);
399 if (rc)
400 return rc;
401
402 cxlmd->scrub_cycle = CXL_GET_SCRUB_CYCLE(cycle);
403 cxlmd->scrub_region_id = CXL_SCRUB_NO_REGION;
404
405 ras_feature->ft_type = RAS_FEAT_SCRUB;
406 ras_feature->instance = cxl_ps_ctx->instance;
407 ras_feature->scrub_ops = &cxl_ps_scrub_ops;
408 ras_feature->ctx = cxl_ps_ctx;
409
410 return 0;
411 }
412
cxl_region_scrub_init(struct cxl_region * cxlr,struct edac_dev_feature * ras_feature,u8 scrub_inst)413 static int cxl_region_scrub_init(struct cxl_region *cxlr,
414 struct edac_dev_feature *ras_feature,
415 u8 scrub_inst)
416 {
417 struct cxl_patrol_scrub_context *cxl_ps_ctx;
418 struct cxl_region_params *p = &cxlr->params;
419 struct cxl_feat_entry *feat_entry = NULL;
420 struct cxl_memdev *cxlmd;
421 u8 cap, flags;
422 u16 cycle;
423 int i, rc;
424
425 /*
426 * The cxl_region_rwsem must be held if the code below is used in a context
427 * other than when the region is in the probe state, as shown here.
428 */
429 for (i = 0; i < p->nr_targets; i++) {
430 struct cxl_endpoint_decoder *cxled = p->targets[i];
431
432 cxlmd = cxled_to_memdev(cxled);
433 feat_entry = cxl_feature_info(to_cxlfs(cxlmd->cxlds),
434 &CXL_FEAT_PATROL_SCRUB_UUID);
435 if (IS_ERR(feat_entry))
436 return -EOPNOTSUPP;
437
438 if (!(le32_to_cpu(feat_entry->flags) &
439 CXL_FEATURE_F_CHANGEABLE))
440 return -EOPNOTSUPP;
441
442 rc = cxl_mem_scrub_get_attrbs(&cxlmd->cxlds->cxl_mbox, &cap,
443 &cycle, &flags, NULL);
444 if (rc)
445 return rc;
446
447 cxlmd->scrub_cycle = CXL_GET_SCRUB_CYCLE(cycle);
448 cxlmd->scrub_region_id = CXL_SCRUB_NO_REGION;
449 }
450
451 cxl_ps_ctx = devm_kzalloc(&cxlr->dev, sizeof(*cxl_ps_ctx), GFP_KERNEL);
452 if (!cxl_ps_ctx)
453 return -ENOMEM;
454
455 *cxl_ps_ctx = (struct cxl_patrol_scrub_context){
456 .get_feat_size = le16_to_cpu(feat_entry->get_feat_size),
457 .set_feat_size = le16_to_cpu(feat_entry->set_feat_size),
458 .get_version = feat_entry->get_feat_ver,
459 .set_version = feat_entry->set_feat_ver,
460 .effects = le16_to_cpu(feat_entry->effects),
461 .instance = scrub_inst,
462 .cxlr = cxlr,
463 };
464
465 ras_feature->ft_type = RAS_FEAT_SCRUB;
466 ras_feature->instance = cxl_ps_ctx->instance;
467 ras_feature->scrub_ops = &cxl_ps_scrub_ops;
468 ras_feature->ctx = cxl_ps_ctx;
469
470 return 0;
471 }
472
473 struct cxl_ecs_context {
474 u16 num_media_frus;
475 u16 get_feat_size;
476 u16 set_feat_size;
477 u8 get_version;
478 u8 set_version;
479 u16 effects;
480 struct cxl_memdev *cxlmd;
481 };
482
483 /*
484 * See CXL spec rev 3.2 @8.2.10.9.11.2 Table 8-225 DDR5 ECS Control Feature
485 * Readable Attributes.
486 */
487 struct cxl_ecs_fru_rd_attrbs {
488 u8 ecs_cap;
489 __le16 ecs_config;
490 u8 ecs_flags;
491 } __packed;
492
493 struct cxl_ecs_rd_attrbs {
494 u8 ecs_log_cap;
495 struct cxl_ecs_fru_rd_attrbs fru_attrbs[];
496 } __packed;
497
498 /*
499 * See CXL spec rev 3.2 @8.2.10.9.11.2 Table 8-226 DDR5 ECS Control Feature
500 * Writable Attributes.
501 */
502 struct cxl_ecs_fru_wr_attrbs {
503 __le16 ecs_config;
504 } __packed;
505
506 struct cxl_ecs_wr_attrbs {
507 u8 ecs_log_cap;
508 struct cxl_ecs_fru_wr_attrbs fru_attrbs[];
509 } __packed;
510
511 #define CXL_ECS_LOG_ENTRY_TYPE_MASK GENMASK(1, 0)
512 #define CXL_ECS_REALTIME_REPORT_CAP_MASK BIT(0)
513 #define CXL_ECS_THRESHOLD_COUNT_MASK GENMASK(2, 0)
514 #define CXL_ECS_COUNT_MODE_MASK BIT(3)
515 #define CXL_ECS_RESET_COUNTER_MASK BIT(4)
516 #define CXL_ECS_RESET_COUNTER 1
517
518 enum {
519 ECS_THRESHOLD_256 = 256,
520 ECS_THRESHOLD_1024 = 1024,
521 ECS_THRESHOLD_4096 = 4096,
522 };
523
524 enum {
525 ECS_THRESHOLD_IDX_256 = 3,
526 ECS_THRESHOLD_IDX_1024 = 4,
527 ECS_THRESHOLD_IDX_4096 = 5,
528 };
529
530 static const u16 ecs_supp_threshold[] = {
531 [ECS_THRESHOLD_IDX_256] = 256,
532 [ECS_THRESHOLD_IDX_1024] = 1024,
533 [ECS_THRESHOLD_IDX_4096] = 4096,
534 };
535
536 enum {
537 ECS_LOG_ENTRY_TYPE_DRAM = 0x0,
538 ECS_LOG_ENTRY_TYPE_MEM_MEDIA_FRU = 0x1,
539 };
540
541 enum cxl_ecs_count_mode {
542 ECS_MODE_COUNTS_ROWS = 0,
543 ECS_MODE_COUNTS_CODEWORDS = 1,
544 };
545
cxl_mem_ecs_get_attrbs(struct device * dev,struct cxl_ecs_context * cxl_ecs_ctx,int fru_id,u8 * log_cap,u16 * config)546 static int cxl_mem_ecs_get_attrbs(struct device *dev,
547 struct cxl_ecs_context *cxl_ecs_ctx,
548 int fru_id, u8 *log_cap, u16 *config)
549 {
550 struct cxl_memdev *cxlmd = cxl_ecs_ctx->cxlmd;
551 struct cxl_mailbox *cxl_mbox = &cxlmd->cxlds->cxl_mbox;
552 struct cxl_ecs_fru_rd_attrbs *fru_rd_attrbs;
553 size_t rd_data_size;
554 size_t data_size;
555
556 rd_data_size = cxl_ecs_ctx->get_feat_size;
557
558 struct cxl_ecs_rd_attrbs *rd_attrbs __free(kvfree) =
559 kvzalloc(rd_data_size, GFP_KERNEL);
560 if (!rd_attrbs)
561 return -ENOMEM;
562
563 data_size = cxl_get_feature(cxl_mbox, &CXL_FEAT_ECS_UUID,
564 CXL_GET_FEAT_SEL_CURRENT_VALUE, rd_attrbs,
565 rd_data_size, 0, NULL);
566 if (!data_size)
567 return -EIO;
568
569 fru_rd_attrbs = rd_attrbs->fru_attrbs;
570 *log_cap = rd_attrbs->ecs_log_cap;
571 *config = le16_to_cpu(fru_rd_attrbs[fru_id].ecs_config);
572
573 return 0;
574 }
575
cxl_mem_ecs_set_attrbs(struct device * dev,struct cxl_ecs_context * cxl_ecs_ctx,int fru_id,u8 log_cap,u16 config)576 static int cxl_mem_ecs_set_attrbs(struct device *dev,
577 struct cxl_ecs_context *cxl_ecs_ctx,
578 int fru_id, u8 log_cap, u16 config)
579 {
580 struct cxl_memdev *cxlmd = cxl_ecs_ctx->cxlmd;
581 struct cxl_mailbox *cxl_mbox = &cxlmd->cxlds->cxl_mbox;
582 struct cxl_ecs_fru_rd_attrbs *fru_rd_attrbs;
583 struct cxl_ecs_fru_wr_attrbs *fru_wr_attrbs;
584 size_t rd_data_size, wr_data_size;
585 u16 num_media_frus, count;
586 size_t data_size;
587
588 num_media_frus = cxl_ecs_ctx->num_media_frus;
589 rd_data_size = cxl_ecs_ctx->get_feat_size;
590 wr_data_size = cxl_ecs_ctx->set_feat_size;
591 struct cxl_ecs_rd_attrbs *rd_attrbs __free(kvfree) =
592 kvzalloc(rd_data_size, GFP_KERNEL);
593 if (!rd_attrbs)
594 return -ENOMEM;
595
596 data_size = cxl_get_feature(cxl_mbox, &CXL_FEAT_ECS_UUID,
597 CXL_GET_FEAT_SEL_CURRENT_VALUE, rd_attrbs,
598 rd_data_size, 0, NULL);
599 if (!data_size)
600 return -EIO;
601
602 struct cxl_ecs_wr_attrbs *wr_attrbs __free(kvfree) =
603 kvzalloc(wr_data_size, GFP_KERNEL);
604 if (!wr_attrbs)
605 return -ENOMEM;
606
607 /*
608 * Fill writable attributes from the current attributes read
609 * for all the media FRUs.
610 */
611 fru_rd_attrbs = rd_attrbs->fru_attrbs;
612 fru_wr_attrbs = wr_attrbs->fru_attrbs;
613 wr_attrbs->ecs_log_cap = log_cap;
614 for (count = 0; count < num_media_frus; count++)
615 fru_wr_attrbs[count].ecs_config =
616 fru_rd_attrbs[count].ecs_config;
617
618 fru_wr_attrbs[fru_id].ecs_config = cpu_to_le16(config);
619
620 return cxl_set_feature(cxl_mbox, &CXL_FEAT_ECS_UUID,
621 cxl_ecs_ctx->set_version, wr_attrbs,
622 wr_data_size,
623 CXL_SET_FEAT_FLAG_DATA_SAVED_ACROSS_RESET,
624 0, NULL);
625 }
626
cxl_get_ecs_log_entry_type(u8 log_cap,u16 config)627 static u8 cxl_get_ecs_log_entry_type(u8 log_cap, u16 config)
628 {
629 return FIELD_GET(CXL_ECS_LOG_ENTRY_TYPE_MASK, log_cap);
630 }
631
cxl_get_ecs_threshold(u8 log_cap,u16 config)632 static u16 cxl_get_ecs_threshold(u8 log_cap, u16 config)
633 {
634 u8 index = FIELD_GET(CXL_ECS_THRESHOLD_COUNT_MASK, config);
635
636 return ecs_supp_threshold[index];
637 }
638
cxl_get_ecs_count_mode(u8 log_cap,u16 config)639 static u8 cxl_get_ecs_count_mode(u8 log_cap, u16 config)
640 {
641 return FIELD_GET(CXL_ECS_COUNT_MODE_MASK, config);
642 }
643
644 #define CXL_ECS_GET_ATTR(attrb) \
645 static int cxl_ecs_get_##attrb(struct device *dev, void *drv_data, \
646 int fru_id, u32 *val) \
647 { \
648 struct cxl_ecs_context *ctx = drv_data; \
649 u8 log_cap; \
650 u16 config; \
651 int ret; \
652 \
653 ret = cxl_mem_ecs_get_attrbs(dev, ctx, fru_id, &log_cap, \
654 &config); \
655 if (ret) \
656 return ret; \
657 \
658 *val = cxl_get_ecs_##attrb(log_cap, config); \
659 \
660 return 0; \
661 }
662
663 CXL_ECS_GET_ATTR(log_entry_type)
CXL_ECS_GET_ATTR(count_mode)664 CXL_ECS_GET_ATTR(count_mode)
665 CXL_ECS_GET_ATTR(threshold)
666
667 static int cxl_set_ecs_log_entry_type(struct device *dev, u8 *log_cap,
668 u16 *config, u32 val)
669 {
670 if (val != ECS_LOG_ENTRY_TYPE_DRAM &&
671 val != ECS_LOG_ENTRY_TYPE_MEM_MEDIA_FRU)
672 return -EINVAL;
673
674 *log_cap = FIELD_PREP(CXL_ECS_LOG_ENTRY_TYPE_MASK, val);
675
676 return 0;
677 }
678
cxl_set_ecs_threshold(struct device * dev,u8 * log_cap,u16 * config,u32 val)679 static int cxl_set_ecs_threshold(struct device *dev, u8 *log_cap, u16 *config,
680 u32 val)
681 {
682 *config &= ~CXL_ECS_THRESHOLD_COUNT_MASK;
683
684 switch (val) {
685 case ECS_THRESHOLD_256:
686 *config |= FIELD_PREP(CXL_ECS_THRESHOLD_COUNT_MASK,
687 ECS_THRESHOLD_IDX_256);
688 break;
689 case ECS_THRESHOLD_1024:
690 *config |= FIELD_PREP(CXL_ECS_THRESHOLD_COUNT_MASK,
691 ECS_THRESHOLD_IDX_1024);
692 break;
693 case ECS_THRESHOLD_4096:
694 *config |= FIELD_PREP(CXL_ECS_THRESHOLD_COUNT_MASK,
695 ECS_THRESHOLD_IDX_4096);
696 break;
697 default:
698 dev_dbg(dev, "Invalid CXL ECS threshold count(%u) to set\n",
699 val);
700 dev_dbg(dev, "Supported ECS threshold counts: %u, %u, %u\n",
701 ECS_THRESHOLD_256, ECS_THRESHOLD_1024,
702 ECS_THRESHOLD_4096);
703 return -EINVAL;
704 }
705
706 return 0;
707 }
708
cxl_set_ecs_count_mode(struct device * dev,u8 * log_cap,u16 * config,u32 val)709 static int cxl_set_ecs_count_mode(struct device *dev, u8 *log_cap, u16 *config,
710 u32 val)
711 {
712 if (val != ECS_MODE_COUNTS_ROWS && val != ECS_MODE_COUNTS_CODEWORDS) {
713 dev_dbg(dev, "Invalid CXL ECS scrub mode(%d) to set\n", val);
714 dev_dbg(dev,
715 "Supported ECS Modes: 0: ECS counts rows with errors,"
716 " 1: ECS counts codewords with errors\n");
717 return -EINVAL;
718 }
719
720 *config &= ~CXL_ECS_COUNT_MODE_MASK;
721 *config |= FIELD_PREP(CXL_ECS_COUNT_MODE_MASK, val);
722
723 return 0;
724 }
725
cxl_set_ecs_reset_counter(struct device * dev,u8 * log_cap,u16 * config,u32 val)726 static int cxl_set_ecs_reset_counter(struct device *dev, u8 *log_cap,
727 u16 *config, u32 val)
728 {
729 if (val != CXL_ECS_RESET_COUNTER)
730 return -EINVAL;
731
732 *config &= ~CXL_ECS_RESET_COUNTER_MASK;
733 *config |= FIELD_PREP(CXL_ECS_RESET_COUNTER_MASK, val);
734
735 return 0;
736 }
737
738 #define CXL_ECS_SET_ATTR(attrb) \
739 static int cxl_ecs_set_##attrb(struct device *dev, void *drv_data, \
740 int fru_id, u32 val) \
741 { \
742 struct cxl_ecs_context *ctx = drv_data; \
743 u8 log_cap; \
744 u16 config; \
745 int ret; \
746 \
747 if (!capable(CAP_SYS_RAWIO)) \
748 return -EPERM; \
749 \
750 ret = cxl_mem_ecs_get_attrbs(dev, ctx, fru_id, &log_cap, \
751 &config); \
752 if (ret) \
753 return ret; \
754 \
755 ret = cxl_set_ecs_##attrb(dev, &log_cap, &config, val); \
756 if (ret) \
757 return ret; \
758 \
759 return cxl_mem_ecs_set_attrbs(dev, ctx, fru_id, log_cap, \
760 config); \
761 }
762 CXL_ECS_SET_ATTR(log_entry_type)
763 CXL_ECS_SET_ATTR(count_mode)
764 CXL_ECS_SET_ATTR(reset_counter)
765 CXL_ECS_SET_ATTR(threshold)
766
767 static const struct edac_ecs_ops cxl_ecs_ops = {
768 .get_log_entry_type = cxl_ecs_get_log_entry_type,
769 .set_log_entry_type = cxl_ecs_set_log_entry_type,
770 .get_mode = cxl_ecs_get_count_mode,
771 .set_mode = cxl_ecs_set_count_mode,
772 .reset = cxl_ecs_set_reset_counter,
773 .get_threshold = cxl_ecs_get_threshold,
774 .set_threshold = cxl_ecs_set_threshold,
775 };
776
cxl_memdev_ecs_init(struct cxl_memdev * cxlmd,struct edac_dev_feature * ras_feature)777 static int cxl_memdev_ecs_init(struct cxl_memdev *cxlmd,
778 struct edac_dev_feature *ras_feature)
779 {
780 struct cxl_ecs_context *cxl_ecs_ctx;
781 struct cxl_feat_entry *feat_entry;
782 int num_media_frus;
783
784 feat_entry =
785 cxl_feature_info(to_cxlfs(cxlmd->cxlds), &CXL_FEAT_ECS_UUID);
786 if (IS_ERR(feat_entry))
787 return -EOPNOTSUPP;
788
789 if (!(le32_to_cpu(feat_entry->flags) & CXL_FEATURE_F_CHANGEABLE))
790 return -EOPNOTSUPP;
791
792 num_media_frus = (le16_to_cpu(feat_entry->get_feat_size) -
793 sizeof(struct cxl_ecs_rd_attrbs)) /
794 sizeof(struct cxl_ecs_fru_rd_attrbs);
795 if (!num_media_frus)
796 return -EOPNOTSUPP;
797
798 cxl_ecs_ctx =
799 devm_kzalloc(&cxlmd->dev, sizeof(*cxl_ecs_ctx), GFP_KERNEL);
800 if (!cxl_ecs_ctx)
801 return -ENOMEM;
802
803 *cxl_ecs_ctx = (struct cxl_ecs_context){
804 .get_feat_size = le16_to_cpu(feat_entry->get_feat_size),
805 .set_feat_size = le16_to_cpu(feat_entry->set_feat_size),
806 .get_version = feat_entry->get_feat_ver,
807 .set_version = feat_entry->set_feat_ver,
808 .effects = le16_to_cpu(feat_entry->effects),
809 .num_media_frus = num_media_frus,
810 .cxlmd = cxlmd,
811 };
812
813 ras_feature->ft_type = RAS_FEAT_ECS;
814 ras_feature->ecs_ops = &cxl_ecs_ops;
815 ras_feature->ctx = cxl_ecs_ctx;
816 ras_feature->ecs_info.num_media_frus = num_media_frus;
817
818 return 0;
819 }
820
821 /*
822 * Perform Maintenance CXL 3.2 Spec 8.2.10.7.1
823 */
824
825 /*
826 * Perform Maintenance input payload
827 * CXL rev 3.2 section 8.2.10.7.1 Table 8-117
828 */
829 struct cxl_mbox_maintenance_hdr {
830 u8 op_class;
831 u8 op_subclass;
832 } __packed;
833
cxl_perform_maintenance(struct cxl_mailbox * cxl_mbox,u8 class,u8 subclass,void * data_in,size_t data_in_size)834 static int cxl_perform_maintenance(struct cxl_mailbox *cxl_mbox, u8 class,
835 u8 subclass, void *data_in,
836 size_t data_in_size)
837 {
838 struct cxl_memdev_maintenance_pi {
839 struct cxl_mbox_maintenance_hdr hdr;
840 u8 data[];
841 } __packed;
842 struct cxl_mbox_cmd mbox_cmd;
843 size_t hdr_size;
844
845 struct cxl_memdev_maintenance_pi *pi __free(kvfree) =
846 kvzalloc(cxl_mbox->payload_size, GFP_KERNEL);
847 if (!pi)
848 return -ENOMEM;
849
850 pi->hdr.op_class = class;
851 pi->hdr.op_subclass = subclass;
852 hdr_size = sizeof(pi->hdr);
853 /*
854 * Check minimum mbox payload size is available for
855 * the maintenance data transfer.
856 */
857 if (hdr_size + data_in_size > cxl_mbox->payload_size)
858 return -ENOMEM;
859
860 memcpy(pi->data, data_in, data_in_size);
861 mbox_cmd = (struct cxl_mbox_cmd){
862 .opcode = CXL_MBOX_OP_DO_MAINTENANCE,
863 .size_in = hdr_size + data_in_size,
864 .payload_in = pi,
865 };
866
867 return cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
868 }
869
870 /*
871 * Support for finding a memory operation attributes
872 * are from the current boot or not.
873 */
874
875 struct cxl_mem_err_rec {
876 struct xarray rec_gen_media;
877 struct xarray rec_dram;
878 };
879
880 enum cxl_mem_repair_type {
881 CXL_PPR,
882 CXL_CACHELINE_SPARING,
883 CXL_ROW_SPARING,
884 CXL_BANK_SPARING,
885 CXL_RANK_SPARING,
886 CXL_REPAIR_MAX,
887 };
888
889 /**
890 * struct cxl_mem_repair_attrbs - CXL memory repair attributes
891 * @dpa: DPA of memory to repair
892 * @nibble_mask: nibble mask, identifies one or more nibbles on the memory bus
893 * @row: row of memory to repair
894 * @column: column of memory to repair
895 * @channel: channel of memory to repair
896 * @sub_channel: sub channel of memory to repair
897 * @rank: rank of memory to repair
898 * @bank_group: bank group of memory to repair
899 * @bank: bank of memory to repair
900 * @repair_type: repair type. For eg. PPR, memory sparing etc.
901 */
902 struct cxl_mem_repair_attrbs {
903 u64 dpa;
904 u32 nibble_mask;
905 u32 row;
906 u16 column;
907 u8 channel;
908 u8 sub_channel;
909 u8 rank;
910 u8 bank_group;
911 u8 bank;
912 enum cxl_mem_repair_type repair_type;
913 };
914
915 static struct cxl_event_gen_media *
cxl_find_rec_gen_media(struct cxl_memdev * cxlmd,struct cxl_mem_repair_attrbs * attrbs)916 cxl_find_rec_gen_media(struct cxl_memdev *cxlmd,
917 struct cxl_mem_repair_attrbs *attrbs)
918 {
919 struct cxl_mem_err_rec *array_rec = cxlmd->err_rec_array;
920 struct cxl_event_gen_media *rec;
921
922 if (!array_rec)
923 return NULL;
924
925 rec = xa_load(&array_rec->rec_gen_media, attrbs->dpa);
926 if (!rec)
927 return NULL;
928
929 if (attrbs->repair_type == CXL_PPR)
930 return rec;
931
932 return NULL;
933 }
934
935 static struct cxl_event_dram *
cxl_find_rec_dram(struct cxl_memdev * cxlmd,struct cxl_mem_repair_attrbs * attrbs)936 cxl_find_rec_dram(struct cxl_memdev *cxlmd,
937 struct cxl_mem_repair_attrbs *attrbs)
938 {
939 struct cxl_mem_err_rec *array_rec = cxlmd->err_rec_array;
940 struct cxl_event_dram *rec;
941 u16 validity_flags;
942
943 if (!array_rec)
944 return NULL;
945
946 rec = xa_load(&array_rec->rec_dram, attrbs->dpa);
947 if (!rec)
948 return NULL;
949
950 validity_flags = get_unaligned_le16(rec->media_hdr.validity_flags);
951 if (!(validity_flags & CXL_DER_VALID_CHANNEL) ||
952 !(validity_flags & CXL_DER_VALID_RANK))
953 return NULL;
954
955 switch (attrbs->repair_type) {
956 case CXL_PPR:
957 if (!(validity_flags & CXL_DER_VALID_NIBBLE) ||
958 get_unaligned_le24(rec->nibble_mask) == attrbs->nibble_mask)
959 return rec;
960 break;
961 case CXL_CACHELINE_SPARING:
962 if (!(validity_flags & CXL_DER_VALID_BANK_GROUP) ||
963 !(validity_flags & CXL_DER_VALID_BANK) ||
964 !(validity_flags & CXL_DER_VALID_ROW) ||
965 !(validity_flags & CXL_DER_VALID_COLUMN))
966 return NULL;
967
968 if (rec->media_hdr.channel == attrbs->channel &&
969 rec->media_hdr.rank == attrbs->rank &&
970 rec->bank_group == attrbs->bank_group &&
971 rec->bank == attrbs->bank &&
972 get_unaligned_le24(rec->row) == attrbs->row &&
973 get_unaligned_le16(rec->column) == attrbs->column &&
974 (!(validity_flags & CXL_DER_VALID_NIBBLE) ||
975 get_unaligned_le24(rec->nibble_mask) ==
976 attrbs->nibble_mask) &&
977 (!(validity_flags & CXL_DER_VALID_SUB_CHANNEL) ||
978 rec->sub_channel == attrbs->sub_channel))
979 return rec;
980 break;
981 case CXL_ROW_SPARING:
982 if (!(validity_flags & CXL_DER_VALID_BANK_GROUP) ||
983 !(validity_flags & CXL_DER_VALID_BANK) ||
984 !(validity_flags & CXL_DER_VALID_ROW))
985 return NULL;
986
987 if (rec->media_hdr.channel == attrbs->channel &&
988 rec->media_hdr.rank == attrbs->rank &&
989 rec->bank_group == attrbs->bank_group &&
990 rec->bank == attrbs->bank &&
991 get_unaligned_le24(rec->row) == attrbs->row &&
992 (!(validity_flags & CXL_DER_VALID_NIBBLE) ||
993 get_unaligned_le24(rec->nibble_mask) ==
994 attrbs->nibble_mask))
995 return rec;
996 break;
997 case CXL_BANK_SPARING:
998 if (!(validity_flags & CXL_DER_VALID_BANK_GROUP) ||
999 !(validity_flags & CXL_DER_VALID_BANK))
1000 return NULL;
1001
1002 if (rec->media_hdr.channel == attrbs->channel &&
1003 rec->media_hdr.rank == attrbs->rank &&
1004 rec->bank_group == attrbs->bank_group &&
1005 rec->bank == attrbs->bank &&
1006 (!(validity_flags & CXL_DER_VALID_NIBBLE) ||
1007 get_unaligned_le24(rec->nibble_mask) ==
1008 attrbs->nibble_mask))
1009 return rec;
1010 break;
1011 case CXL_RANK_SPARING:
1012 if (rec->media_hdr.channel == attrbs->channel &&
1013 rec->media_hdr.rank == attrbs->rank &&
1014 (!(validity_flags & CXL_DER_VALID_NIBBLE) ||
1015 get_unaligned_le24(rec->nibble_mask) ==
1016 attrbs->nibble_mask))
1017 return rec;
1018 break;
1019 default:
1020 return NULL;
1021 }
1022
1023 return NULL;
1024 }
1025
1026 #define CXL_MAX_STORAGE_DAYS 10
1027 #define CXL_MAX_STORAGE_TIME_SECS (CXL_MAX_STORAGE_DAYS * 24 * 60 * 60)
1028
cxl_del_expired_gmedia_recs(struct xarray * rec_xarray,struct cxl_event_gen_media * cur_rec)1029 static void cxl_del_expired_gmedia_recs(struct xarray *rec_xarray,
1030 struct cxl_event_gen_media *cur_rec)
1031 {
1032 u64 cur_ts = le64_to_cpu(cur_rec->media_hdr.hdr.timestamp);
1033 struct cxl_event_gen_media *rec;
1034 unsigned long index;
1035 u64 delta_ts_secs;
1036
1037 xa_for_each(rec_xarray, index, rec) {
1038 delta_ts_secs = (cur_ts -
1039 le64_to_cpu(rec->media_hdr.hdr.timestamp)) / 1000000000ULL;
1040 if (delta_ts_secs >= CXL_MAX_STORAGE_TIME_SECS) {
1041 xa_erase(rec_xarray, index);
1042 kfree(rec);
1043 }
1044 }
1045 }
1046
cxl_del_expired_dram_recs(struct xarray * rec_xarray,struct cxl_event_dram * cur_rec)1047 static void cxl_del_expired_dram_recs(struct xarray *rec_xarray,
1048 struct cxl_event_dram *cur_rec)
1049 {
1050 u64 cur_ts = le64_to_cpu(cur_rec->media_hdr.hdr.timestamp);
1051 struct cxl_event_dram *rec;
1052 unsigned long index;
1053 u64 delta_secs;
1054
1055 xa_for_each(rec_xarray, index, rec) {
1056 delta_secs = (cur_ts -
1057 le64_to_cpu(rec->media_hdr.hdr.timestamp)) / 1000000000ULL;
1058 if (delta_secs >= CXL_MAX_STORAGE_TIME_SECS) {
1059 xa_erase(rec_xarray, index);
1060 kfree(rec);
1061 }
1062 }
1063 }
1064
1065 #define CXL_MAX_REC_STORAGE_COUNT 200
1066
cxl_del_overflow_old_recs(struct xarray * rec_xarray)1067 static void cxl_del_overflow_old_recs(struct xarray *rec_xarray)
1068 {
1069 void *err_rec;
1070 unsigned long index, count = 0;
1071
1072 xa_for_each(rec_xarray, index, err_rec)
1073 count++;
1074
1075 if (count <= CXL_MAX_REC_STORAGE_COUNT)
1076 return;
1077
1078 count -= CXL_MAX_REC_STORAGE_COUNT;
1079 xa_for_each(rec_xarray, index, err_rec) {
1080 xa_erase(rec_xarray, index);
1081 kfree(err_rec);
1082 count--;
1083 if (!count)
1084 break;
1085 }
1086 }
1087
cxl_store_rec_gen_media(struct cxl_memdev * cxlmd,union cxl_event * evt)1088 int cxl_store_rec_gen_media(struct cxl_memdev *cxlmd, union cxl_event *evt)
1089 {
1090 struct cxl_mem_err_rec *array_rec = cxlmd->err_rec_array;
1091 struct cxl_event_gen_media *rec;
1092 void *old_rec;
1093
1094 if (!IS_ENABLED(CONFIG_CXL_EDAC_MEM_REPAIR) || !array_rec)
1095 return 0;
1096
1097 rec = kmemdup(&evt->gen_media, sizeof(*rec), GFP_KERNEL);
1098 if (!rec)
1099 return -ENOMEM;
1100
1101 old_rec = xa_store(&array_rec->rec_gen_media,
1102 le64_to_cpu(rec->media_hdr.phys_addr), rec,
1103 GFP_KERNEL);
1104 if (xa_is_err(old_rec)) {
1105 kfree(rec);
1106 return xa_err(old_rec);
1107 }
1108
1109 kfree(old_rec);
1110
1111 cxl_del_expired_gmedia_recs(&array_rec->rec_gen_media, rec);
1112 cxl_del_overflow_old_recs(&array_rec->rec_gen_media);
1113
1114 return 0;
1115 }
1116 EXPORT_SYMBOL_NS_GPL(cxl_store_rec_gen_media, "CXL");
1117
cxl_store_rec_dram(struct cxl_memdev * cxlmd,union cxl_event * evt)1118 int cxl_store_rec_dram(struct cxl_memdev *cxlmd, union cxl_event *evt)
1119 {
1120 struct cxl_mem_err_rec *array_rec = cxlmd->err_rec_array;
1121 struct cxl_event_dram *rec;
1122 void *old_rec;
1123
1124 if (!IS_ENABLED(CONFIG_CXL_EDAC_MEM_REPAIR) || !array_rec)
1125 return 0;
1126
1127 rec = kmemdup(&evt->dram, sizeof(*rec), GFP_KERNEL);
1128 if (!rec)
1129 return -ENOMEM;
1130
1131 old_rec = xa_store(&array_rec->rec_dram,
1132 le64_to_cpu(rec->media_hdr.phys_addr), rec,
1133 GFP_KERNEL);
1134 if (xa_is_err(old_rec)) {
1135 kfree(rec);
1136 return xa_err(old_rec);
1137 }
1138
1139 kfree(old_rec);
1140
1141 cxl_del_expired_dram_recs(&array_rec->rec_dram, rec);
1142 cxl_del_overflow_old_recs(&array_rec->rec_dram);
1143
1144 return 0;
1145 }
1146 EXPORT_SYMBOL_NS_GPL(cxl_store_rec_dram, "CXL");
1147
cxl_is_memdev_memory_online(const struct cxl_memdev * cxlmd)1148 static bool cxl_is_memdev_memory_online(const struct cxl_memdev *cxlmd)
1149 {
1150 struct cxl_port *port = cxlmd->endpoint;
1151
1152 if (port && cxl_num_decoders_committed(port))
1153 return true;
1154
1155 return false;
1156 }
1157
1158 /*
1159 * CXL memory sparing control
1160 */
1161 enum cxl_mem_sparing_granularity {
1162 CXL_MEM_SPARING_CACHELINE,
1163 CXL_MEM_SPARING_ROW,
1164 CXL_MEM_SPARING_BANK,
1165 CXL_MEM_SPARING_RANK,
1166 CXL_MEM_SPARING_MAX
1167 };
1168
1169 struct cxl_mem_sparing_context {
1170 struct cxl_memdev *cxlmd;
1171 uuid_t repair_uuid;
1172 u16 get_feat_size;
1173 u16 set_feat_size;
1174 u16 effects;
1175 u8 instance;
1176 u8 get_version;
1177 u8 set_version;
1178 u8 op_class;
1179 u8 op_subclass;
1180 bool cap_safe_when_in_use;
1181 bool cap_hard_sparing;
1182 bool cap_soft_sparing;
1183 u8 channel;
1184 u8 rank;
1185 u8 bank_group;
1186 u32 nibble_mask;
1187 u64 dpa;
1188 u32 row;
1189 u16 column;
1190 u8 bank;
1191 u8 sub_channel;
1192 enum edac_mem_repair_type repair_type;
1193 bool persist_mode;
1194 };
1195
1196 #define CXL_SPARING_RD_CAP_SAFE_IN_USE_MASK BIT(0)
1197 #define CXL_SPARING_RD_CAP_HARD_SPARING_MASK BIT(1)
1198 #define CXL_SPARING_RD_CAP_SOFT_SPARING_MASK BIT(2)
1199
1200 #define CXL_SPARING_WR_DEVICE_INITIATED_MASK BIT(0)
1201
1202 #define CXL_SPARING_QUERY_RESOURCE_FLAG BIT(0)
1203 #define CXL_SET_HARD_SPARING_FLAG BIT(1)
1204 #define CXL_SPARING_SUB_CHNL_VALID_FLAG BIT(2)
1205 #define CXL_SPARING_NIB_MASK_VALID_FLAG BIT(3)
1206
1207 #define CXL_GET_SPARING_SAFE_IN_USE(flags) \
1208 (FIELD_GET(CXL_SPARING_RD_CAP_SAFE_IN_USE_MASK, \
1209 flags) ^ 1)
1210 #define CXL_GET_CAP_HARD_SPARING(flags) \
1211 FIELD_GET(CXL_SPARING_RD_CAP_HARD_SPARING_MASK, \
1212 flags)
1213 #define CXL_GET_CAP_SOFT_SPARING(flags) \
1214 FIELD_GET(CXL_SPARING_RD_CAP_SOFT_SPARING_MASK, \
1215 flags)
1216
1217 #define CXL_SET_SPARING_QUERY_RESOURCE(val) \
1218 FIELD_PREP(CXL_SPARING_QUERY_RESOURCE_FLAG, val)
1219 #define CXL_SET_HARD_SPARING(val) \
1220 FIELD_PREP(CXL_SET_HARD_SPARING_FLAG, val)
1221 #define CXL_SET_SPARING_SUB_CHNL_VALID(val) \
1222 FIELD_PREP(CXL_SPARING_SUB_CHNL_VALID_FLAG, val)
1223 #define CXL_SET_SPARING_NIB_MASK_VALID(val) \
1224 FIELD_PREP(CXL_SPARING_NIB_MASK_VALID_FLAG, val)
1225
1226 /*
1227 * See CXL spec rev 3.2 @8.2.10.7.2.3 Table 8-134 Memory Sparing Feature
1228 * Readable Attributes.
1229 */
1230 struct cxl_memdev_repair_rd_attrbs_hdr {
1231 u8 max_op_latency;
1232 __le16 op_cap;
1233 __le16 op_mode;
1234 u8 op_class;
1235 u8 op_subclass;
1236 u8 rsvd[9];
1237 } __packed;
1238
1239 struct cxl_memdev_sparing_rd_attrbs {
1240 struct cxl_memdev_repair_rd_attrbs_hdr hdr;
1241 u8 rsvd;
1242 __le16 restriction_flags;
1243 } __packed;
1244
1245 /*
1246 * See CXL spec rev 3.2 @8.2.10.7.1.4 Table 8-120 Memory Sparing Input Payload.
1247 */
1248 struct cxl_memdev_sparing_in_payload {
1249 u8 flags;
1250 u8 channel;
1251 u8 rank;
1252 u8 nibble_mask[3];
1253 u8 bank_group;
1254 u8 bank;
1255 u8 row[3];
1256 __le16 column;
1257 u8 sub_channel;
1258 } __packed;
1259
1260 static int
cxl_mem_sparing_get_attrbs(struct cxl_mem_sparing_context * cxl_sparing_ctx)1261 cxl_mem_sparing_get_attrbs(struct cxl_mem_sparing_context *cxl_sparing_ctx)
1262 {
1263 size_t rd_data_size = sizeof(struct cxl_memdev_sparing_rd_attrbs);
1264 struct cxl_memdev *cxlmd = cxl_sparing_ctx->cxlmd;
1265 struct cxl_mailbox *cxl_mbox = &cxlmd->cxlds->cxl_mbox;
1266 u16 restriction_flags;
1267 size_t data_size;
1268 u16 return_code;
1269 struct cxl_memdev_sparing_rd_attrbs *rd_attrbs __free(kfree) =
1270 kzalloc(rd_data_size, GFP_KERNEL);
1271 if (!rd_attrbs)
1272 return -ENOMEM;
1273
1274 data_size = cxl_get_feature(cxl_mbox, &cxl_sparing_ctx->repair_uuid,
1275 CXL_GET_FEAT_SEL_CURRENT_VALUE, rd_attrbs,
1276 rd_data_size, 0, &return_code);
1277 if (!data_size)
1278 return -EIO;
1279
1280 cxl_sparing_ctx->op_class = rd_attrbs->hdr.op_class;
1281 cxl_sparing_ctx->op_subclass = rd_attrbs->hdr.op_subclass;
1282 restriction_flags = le16_to_cpu(rd_attrbs->restriction_flags);
1283 cxl_sparing_ctx->cap_safe_when_in_use =
1284 CXL_GET_SPARING_SAFE_IN_USE(restriction_flags);
1285 cxl_sparing_ctx->cap_hard_sparing =
1286 CXL_GET_CAP_HARD_SPARING(restriction_flags);
1287 cxl_sparing_ctx->cap_soft_sparing =
1288 CXL_GET_CAP_SOFT_SPARING(restriction_flags);
1289
1290 return 0;
1291 }
1292
1293 static struct cxl_event_dram *
cxl_mem_get_rec_dram(struct cxl_memdev * cxlmd,struct cxl_mem_sparing_context * ctx)1294 cxl_mem_get_rec_dram(struct cxl_memdev *cxlmd,
1295 struct cxl_mem_sparing_context *ctx)
1296 {
1297 struct cxl_mem_repair_attrbs attrbs = { 0 };
1298
1299 attrbs.dpa = ctx->dpa;
1300 attrbs.channel = ctx->channel;
1301 attrbs.rank = ctx->rank;
1302 attrbs.nibble_mask = ctx->nibble_mask;
1303 switch (ctx->repair_type) {
1304 case EDAC_REPAIR_CACHELINE_SPARING:
1305 attrbs.repair_type = CXL_CACHELINE_SPARING;
1306 attrbs.bank_group = ctx->bank_group;
1307 attrbs.bank = ctx->bank;
1308 attrbs.row = ctx->row;
1309 attrbs.column = ctx->column;
1310 attrbs.sub_channel = ctx->sub_channel;
1311 break;
1312 case EDAC_REPAIR_ROW_SPARING:
1313 attrbs.repair_type = CXL_ROW_SPARING;
1314 attrbs.bank_group = ctx->bank_group;
1315 attrbs.bank = ctx->bank;
1316 attrbs.row = ctx->row;
1317 break;
1318 case EDAC_REPAIR_BANK_SPARING:
1319 attrbs.repair_type = CXL_BANK_SPARING;
1320 attrbs.bank_group = ctx->bank_group;
1321 attrbs.bank = ctx->bank;
1322 break;
1323 case EDAC_REPAIR_RANK_SPARING:
1324 attrbs.repair_type = CXL_RANK_SPARING;
1325 break;
1326 default:
1327 return NULL;
1328 }
1329
1330 return cxl_find_rec_dram(cxlmd, &attrbs);
1331 }
1332
1333 static int
cxl_mem_perform_sparing(struct device * dev,struct cxl_mem_sparing_context * cxl_sparing_ctx)1334 cxl_mem_perform_sparing(struct device *dev,
1335 struct cxl_mem_sparing_context *cxl_sparing_ctx)
1336 {
1337 struct cxl_memdev *cxlmd = cxl_sparing_ctx->cxlmd;
1338 struct cxl_memdev_sparing_in_payload sparing_pi;
1339 struct cxl_event_dram *rec = NULL;
1340 u16 validity_flags = 0;
1341 int ret;
1342
1343 ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region);
1344 if ((ret = ACQUIRE_ERR(rwsem_read_intr, ®ion_rwsem)))
1345 return ret;
1346
1347 ACQUIRE(rwsem_read_intr, dpa_rwsem)(&cxl_rwsem.dpa);
1348 if ((ret = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem)))
1349 return ret;
1350
1351 if (!cxl_sparing_ctx->cap_safe_when_in_use) {
1352 /* Memory to repair must be offline */
1353 if (cxl_is_memdev_memory_online(cxlmd))
1354 return -EBUSY;
1355 } else {
1356 if (cxl_is_memdev_memory_online(cxlmd)) {
1357 rec = cxl_mem_get_rec_dram(cxlmd, cxl_sparing_ctx);
1358 if (!rec)
1359 return -EINVAL;
1360
1361 if (!get_unaligned_le16(rec->media_hdr.validity_flags))
1362 return -EINVAL;
1363 }
1364 }
1365
1366 memset(&sparing_pi, 0, sizeof(sparing_pi));
1367 sparing_pi.flags = CXL_SET_SPARING_QUERY_RESOURCE(0);
1368 if (cxl_sparing_ctx->persist_mode)
1369 sparing_pi.flags |= CXL_SET_HARD_SPARING(1);
1370
1371 if (rec)
1372 validity_flags = get_unaligned_le16(rec->media_hdr.validity_flags);
1373
1374 switch (cxl_sparing_ctx->repair_type) {
1375 case EDAC_REPAIR_CACHELINE_SPARING:
1376 sparing_pi.column = cpu_to_le16(cxl_sparing_ctx->column);
1377 if (!rec || (validity_flags & CXL_DER_VALID_SUB_CHANNEL)) {
1378 sparing_pi.flags |= CXL_SET_SPARING_SUB_CHNL_VALID(1);
1379 sparing_pi.sub_channel = cxl_sparing_ctx->sub_channel;
1380 }
1381 fallthrough;
1382 case EDAC_REPAIR_ROW_SPARING:
1383 put_unaligned_le24(cxl_sparing_ctx->row, sparing_pi.row);
1384 fallthrough;
1385 case EDAC_REPAIR_BANK_SPARING:
1386 sparing_pi.bank_group = cxl_sparing_ctx->bank_group;
1387 sparing_pi.bank = cxl_sparing_ctx->bank;
1388 fallthrough;
1389 case EDAC_REPAIR_RANK_SPARING:
1390 sparing_pi.rank = cxl_sparing_ctx->rank;
1391 fallthrough;
1392 default:
1393 sparing_pi.channel = cxl_sparing_ctx->channel;
1394 if ((rec && (validity_flags & CXL_DER_VALID_NIBBLE)) ||
1395 (!rec && (!cxl_sparing_ctx->nibble_mask ||
1396 (cxl_sparing_ctx->nibble_mask & 0xFFFFFF)))) {
1397 sparing_pi.flags |= CXL_SET_SPARING_NIB_MASK_VALID(1);
1398 put_unaligned_le24(cxl_sparing_ctx->nibble_mask,
1399 sparing_pi.nibble_mask);
1400 }
1401 break;
1402 }
1403
1404 return cxl_perform_maintenance(&cxlmd->cxlds->cxl_mbox,
1405 cxl_sparing_ctx->op_class,
1406 cxl_sparing_ctx->op_subclass,
1407 &sparing_pi, sizeof(sparing_pi));
1408 }
1409
cxl_mem_sparing_get_repair_type(struct device * dev,void * drv_data,const char ** repair_type)1410 static int cxl_mem_sparing_get_repair_type(struct device *dev, void *drv_data,
1411 const char **repair_type)
1412 {
1413 struct cxl_mem_sparing_context *ctx = drv_data;
1414
1415 switch (ctx->repair_type) {
1416 case EDAC_REPAIR_CACHELINE_SPARING:
1417 case EDAC_REPAIR_ROW_SPARING:
1418 case EDAC_REPAIR_BANK_SPARING:
1419 case EDAC_REPAIR_RANK_SPARING:
1420 *repair_type = edac_repair_type[ctx->repair_type];
1421 break;
1422 default:
1423 return -EINVAL;
1424 }
1425
1426 return 0;
1427 }
1428
1429 #define CXL_SPARING_GET_ATTR(attrb, data_type) \
1430 static int cxl_mem_sparing_get_##attrb( \
1431 struct device *dev, void *drv_data, data_type *val) \
1432 { \
1433 struct cxl_mem_sparing_context *ctx = drv_data; \
1434 \
1435 *val = ctx->attrb; \
1436 \
1437 return 0; \
1438 }
CXL_SPARING_GET_ATTR(persist_mode,bool)1439 CXL_SPARING_GET_ATTR(persist_mode, bool)
1440 CXL_SPARING_GET_ATTR(dpa, u64)
1441 CXL_SPARING_GET_ATTR(nibble_mask, u32)
1442 CXL_SPARING_GET_ATTR(bank_group, u32)
1443 CXL_SPARING_GET_ATTR(bank, u32)
1444 CXL_SPARING_GET_ATTR(rank, u32)
1445 CXL_SPARING_GET_ATTR(row, u32)
1446 CXL_SPARING_GET_ATTR(column, u32)
1447 CXL_SPARING_GET_ATTR(channel, u32)
1448 CXL_SPARING_GET_ATTR(sub_channel, u32)
1449
1450 #define CXL_SPARING_SET_ATTR(attrb, data_type) \
1451 static int cxl_mem_sparing_set_##attrb(struct device *dev, \
1452 void *drv_data, data_type val) \
1453 { \
1454 struct cxl_mem_sparing_context *ctx = drv_data; \
1455 \
1456 ctx->attrb = val; \
1457 \
1458 return 0; \
1459 }
1460 CXL_SPARING_SET_ATTR(nibble_mask, u32)
1461 CXL_SPARING_SET_ATTR(bank_group, u32)
1462 CXL_SPARING_SET_ATTR(bank, u32)
1463 CXL_SPARING_SET_ATTR(rank, u32)
1464 CXL_SPARING_SET_ATTR(row, u32)
1465 CXL_SPARING_SET_ATTR(column, u32)
1466 CXL_SPARING_SET_ATTR(channel, u32)
1467 CXL_SPARING_SET_ATTR(sub_channel, u32)
1468
1469 static int cxl_mem_sparing_set_persist_mode(struct device *dev, void *drv_data,
1470 bool persist_mode)
1471 {
1472 struct cxl_mem_sparing_context *ctx = drv_data;
1473
1474 if ((persist_mode && ctx->cap_hard_sparing) ||
1475 (!persist_mode && ctx->cap_soft_sparing))
1476 ctx->persist_mode = persist_mode;
1477 else
1478 return -EOPNOTSUPP;
1479
1480 return 0;
1481 }
1482
cxl_get_mem_sparing_safe_when_in_use(struct device * dev,void * drv_data,bool * safe)1483 static int cxl_get_mem_sparing_safe_when_in_use(struct device *dev,
1484 void *drv_data, bool *safe)
1485 {
1486 struct cxl_mem_sparing_context *ctx = drv_data;
1487
1488 *safe = ctx->cap_safe_when_in_use;
1489
1490 return 0;
1491 }
1492
cxl_mem_sparing_get_min_dpa(struct device * dev,void * drv_data,u64 * min_dpa)1493 static int cxl_mem_sparing_get_min_dpa(struct device *dev, void *drv_data,
1494 u64 *min_dpa)
1495 {
1496 struct cxl_mem_sparing_context *ctx = drv_data;
1497 struct cxl_memdev *cxlmd = ctx->cxlmd;
1498 struct cxl_dev_state *cxlds = cxlmd->cxlds;
1499
1500 *min_dpa = cxlds->dpa_res.start;
1501
1502 return 0;
1503 }
1504
cxl_mem_sparing_get_max_dpa(struct device * dev,void * drv_data,u64 * max_dpa)1505 static int cxl_mem_sparing_get_max_dpa(struct device *dev, void *drv_data,
1506 u64 *max_dpa)
1507 {
1508 struct cxl_mem_sparing_context *ctx = drv_data;
1509 struct cxl_memdev *cxlmd = ctx->cxlmd;
1510 struct cxl_dev_state *cxlds = cxlmd->cxlds;
1511
1512 *max_dpa = cxlds->dpa_res.end;
1513
1514 return 0;
1515 }
1516
cxl_mem_sparing_set_dpa(struct device * dev,void * drv_data,u64 dpa)1517 static int cxl_mem_sparing_set_dpa(struct device *dev, void *drv_data, u64 dpa)
1518 {
1519 struct cxl_mem_sparing_context *ctx = drv_data;
1520 struct cxl_memdev *cxlmd = ctx->cxlmd;
1521 struct cxl_dev_state *cxlds = cxlmd->cxlds;
1522
1523 if (!cxl_resource_contains_addr(&cxlds->dpa_res, dpa))
1524 return -EINVAL;
1525
1526 ctx->dpa = dpa;
1527
1528 return 0;
1529 }
1530
cxl_do_mem_sparing(struct device * dev,void * drv_data,u32 val)1531 static int cxl_do_mem_sparing(struct device *dev, void *drv_data, u32 val)
1532 {
1533 struct cxl_mem_sparing_context *ctx = drv_data;
1534
1535 if (val != EDAC_DO_MEM_REPAIR)
1536 return -EINVAL;
1537
1538 return cxl_mem_perform_sparing(dev, ctx);
1539 }
1540
1541 #define RANK_OPS \
1542 .get_repair_type = cxl_mem_sparing_get_repair_type, \
1543 .get_persist_mode = cxl_mem_sparing_get_persist_mode, \
1544 .set_persist_mode = cxl_mem_sparing_set_persist_mode, \
1545 .get_repair_safe_when_in_use = cxl_get_mem_sparing_safe_when_in_use, \
1546 .get_min_dpa = cxl_mem_sparing_get_min_dpa, \
1547 .get_max_dpa = cxl_mem_sparing_get_max_dpa, \
1548 .get_dpa = cxl_mem_sparing_get_dpa, \
1549 .set_dpa = cxl_mem_sparing_set_dpa, \
1550 .get_nibble_mask = cxl_mem_sparing_get_nibble_mask, \
1551 .set_nibble_mask = cxl_mem_sparing_set_nibble_mask, \
1552 .get_rank = cxl_mem_sparing_get_rank, \
1553 .set_rank = cxl_mem_sparing_set_rank, \
1554 .get_channel = cxl_mem_sparing_get_channel, \
1555 .set_channel = cxl_mem_sparing_set_channel, \
1556 .do_repair = cxl_do_mem_sparing
1557
1558 #define BANK_OPS \
1559 RANK_OPS, .get_bank_group = cxl_mem_sparing_get_bank_group, \
1560 .set_bank_group = cxl_mem_sparing_set_bank_group, \
1561 .get_bank = cxl_mem_sparing_get_bank, \
1562 .set_bank = cxl_mem_sparing_set_bank
1563
1564 #define ROW_OPS \
1565 BANK_OPS, .get_row = cxl_mem_sparing_get_row, \
1566 .set_row = cxl_mem_sparing_set_row
1567
1568 #define CACHELINE_OPS \
1569 ROW_OPS, .get_column = cxl_mem_sparing_get_column, \
1570 .set_column = cxl_mem_sparing_set_column, \
1571 .get_sub_channel = cxl_mem_sparing_get_sub_channel, \
1572 .set_sub_channel = cxl_mem_sparing_set_sub_channel
1573
1574 static const struct edac_mem_repair_ops cxl_rank_sparing_ops = {
1575 RANK_OPS,
1576 };
1577
1578 static const struct edac_mem_repair_ops cxl_bank_sparing_ops = {
1579 BANK_OPS,
1580 };
1581
1582 static const struct edac_mem_repair_ops cxl_row_sparing_ops = {
1583 ROW_OPS,
1584 };
1585
1586 static const struct edac_mem_repair_ops cxl_cacheline_sparing_ops = {
1587 CACHELINE_OPS,
1588 };
1589
1590 struct cxl_mem_sparing_desc {
1591 const uuid_t repair_uuid;
1592 enum edac_mem_repair_type repair_type;
1593 const struct edac_mem_repair_ops *repair_ops;
1594 };
1595
1596 static const struct cxl_mem_sparing_desc mem_sparing_desc[] = {
1597 {
1598 .repair_uuid = CXL_FEAT_CACHELINE_SPARING_UUID,
1599 .repair_type = EDAC_REPAIR_CACHELINE_SPARING,
1600 .repair_ops = &cxl_cacheline_sparing_ops,
1601 },
1602 {
1603 .repair_uuid = CXL_FEAT_ROW_SPARING_UUID,
1604 .repair_type = EDAC_REPAIR_ROW_SPARING,
1605 .repair_ops = &cxl_row_sparing_ops,
1606 },
1607 {
1608 .repair_uuid = CXL_FEAT_BANK_SPARING_UUID,
1609 .repair_type = EDAC_REPAIR_BANK_SPARING,
1610 .repair_ops = &cxl_bank_sparing_ops,
1611 },
1612 {
1613 .repair_uuid = CXL_FEAT_RANK_SPARING_UUID,
1614 .repair_type = EDAC_REPAIR_RANK_SPARING,
1615 .repair_ops = &cxl_rank_sparing_ops,
1616 },
1617 };
1618
cxl_memdev_sparing_init(struct cxl_memdev * cxlmd,struct edac_dev_feature * ras_feature,const struct cxl_mem_sparing_desc * desc,u8 repair_inst)1619 static int cxl_memdev_sparing_init(struct cxl_memdev *cxlmd,
1620 struct edac_dev_feature *ras_feature,
1621 const struct cxl_mem_sparing_desc *desc,
1622 u8 repair_inst)
1623 {
1624 struct cxl_mem_sparing_context *cxl_sparing_ctx;
1625 struct cxl_feat_entry *feat_entry;
1626 int ret;
1627
1628 feat_entry = cxl_feature_info(to_cxlfs(cxlmd->cxlds),
1629 &desc->repair_uuid);
1630 if (IS_ERR(feat_entry))
1631 return -EOPNOTSUPP;
1632
1633 if (!(le32_to_cpu(feat_entry->flags) & CXL_FEATURE_F_CHANGEABLE))
1634 return -EOPNOTSUPP;
1635
1636 cxl_sparing_ctx = devm_kzalloc(&cxlmd->dev, sizeof(*cxl_sparing_ctx),
1637 GFP_KERNEL);
1638 if (!cxl_sparing_ctx)
1639 return -ENOMEM;
1640
1641 *cxl_sparing_ctx = (struct cxl_mem_sparing_context){
1642 .get_feat_size = le16_to_cpu(feat_entry->get_feat_size),
1643 .set_feat_size = le16_to_cpu(feat_entry->set_feat_size),
1644 .get_version = feat_entry->get_feat_ver,
1645 .set_version = feat_entry->set_feat_ver,
1646 .effects = le16_to_cpu(feat_entry->effects),
1647 .cxlmd = cxlmd,
1648 .repair_type = desc->repair_type,
1649 .instance = repair_inst++,
1650 };
1651 uuid_copy(&cxl_sparing_ctx->repair_uuid, &desc->repair_uuid);
1652
1653 ret = cxl_mem_sparing_get_attrbs(cxl_sparing_ctx);
1654 if (ret)
1655 return ret;
1656
1657 if ((cxl_sparing_ctx->cap_soft_sparing &&
1658 cxl_sparing_ctx->cap_hard_sparing) ||
1659 cxl_sparing_ctx->cap_soft_sparing)
1660 cxl_sparing_ctx->persist_mode = 0;
1661 else if (cxl_sparing_ctx->cap_hard_sparing)
1662 cxl_sparing_ctx->persist_mode = 1;
1663 else
1664 return -EOPNOTSUPP;
1665
1666 ras_feature->ft_type = RAS_FEAT_MEM_REPAIR;
1667 ras_feature->instance = cxl_sparing_ctx->instance;
1668 ras_feature->mem_repair_ops = desc->repair_ops;
1669 ras_feature->ctx = cxl_sparing_ctx;
1670
1671 return 0;
1672 }
1673
1674 /*
1675 * CXL memory soft PPR & hard PPR control
1676 */
1677 struct cxl_ppr_context {
1678 uuid_t repair_uuid;
1679 u8 instance;
1680 u16 get_feat_size;
1681 u16 set_feat_size;
1682 u8 get_version;
1683 u8 set_version;
1684 u16 effects;
1685 u8 op_class;
1686 u8 op_subclass;
1687 bool cap_dpa;
1688 bool cap_nib_mask;
1689 bool media_accessible;
1690 bool data_retained;
1691 struct cxl_memdev *cxlmd;
1692 enum edac_mem_repair_type repair_type;
1693 bool persist_mode;
1694 u64 dpa;
1695 u32 nibble_mask;
1696 };
1697
1698 /*
1699 * See CXL rev 3.2 @8.2.10.7.2.1 Table 8-128 sPPR Feature Readable Attributes
1700 *
1701 * See CXL rev 3.2 @8.2.10.7.2.2 Table 8-131 hPPR Feature Readable Attributes
1702 */
1703
1704 #define CXL_PPR_OP_CAP_DEVICE_INITIATED BIT(0)
1705 #define CXL_PPR_OP_MODE_DEV_INITIATED BIT(0)
1706
1707 #define CXL_PPR_FLAG_DPA_SUPPORT_MASK BIT(0)
1708 #define CXL_PPR_FLAG_NIB_SUPPORT_MASK BIT(1)
1709 #define CXL_PPR_FLAG_MEM_SPARING_EV_REC_SUPPORT_MASK BIT(2)
1710 #define CXL_PPR_FLAG_DEV_INITED_PPR_AT_BOOT_CAP_MASK BIT(3)
1711
1712 #define CXL_PPR_RESTRICTION_FLAG_MEDIA_ACCESSIBLE_MASK BIT(0)
1713 #define CXL_PPR_RESTRICTION_FLAG_DATA_RETAINED_MASK BIT(2)
1714
1715 #define CXL_PPR_SPARING_EV_REC_EN_MASK BIT(0)
1716 #define CXL_PPR_DEV_INITED_PPR_AT_BOOT_EN_MASK BIT(1)
1717
1718 #define CXL_PPR_GET_CAP_DPA(flags) \
1719 FIELD_GET(CXL_PPR_FLAG_DPA_SUPPORT_MASK, flags)
1720 #define CXL_PPR_GET_CAP_NIB_MASK(flags) \
1721 FIELD_GET(CXL_PPR_FLAG_NIB_SUPPORT_MASK, flags)
1722 #define CXL_PPR_GET_MEDIA_ACCESSIBLE(restriction_flags) \
1723 (FIELD_GET(CXL_PPR_RESTRICTION_FLAG_MEDIA_ACCESSIBLE_MASK, \
1724 restriction_flags) ^ 1)
1725 #define CXL_PPR_GET_DATA_RETAINED(restriction_flags) \
1726 (FIELD_GET(CXL_PPR_RESTRICTION_FLAG_DATA_RETAINED_MASK, \
1727 restriction_flags) ^ 1)
1728
1729 struct cxl_memdev_ppr_rd_attrbs {
1730 struct cxl_memdev_repair_rd_attrbs_hdr hdr;
1731 u8 ppr_flags;
1732 __le16 restriction_flags;
1733 u8 ppr_op_mode;
1734 } __packed;
1735
1736 /*
1737 * See CXL rev 3.2 @8.2.10.7.1.2 Table 8-118 sPPR Maintenance Input Payload
1738 *
1739 * See CXL rev 3.2 @8.2.10.7.1.3 Table 8-119 hPPR Maintenance Input Payload
1740 */
1741 struct cxl_memdev_ppr_maintenance_attrbs {
1742 u8 flags;
1743 __le64 dpa;
1744 u8 nibble_mask[3];
1745 } __packed;
1746
cxl_mem_ppr_get_attrbs(struct cxl_ppr_context * cxl_ppr_ctx)1747 static int cxl_mem_ppr_get_attrbs(struct cxl_ppr_context *cxl_ppr_ctx)
1748 {
1749 size_t rd_data_size = sizeof(struct cxl_memdev_ppr_rd_attrbs);
1750 struct cxl_memdev *cxlmd = cxl_ppr_ctx->cxlmd;
1751 struct cxl_mailbox *cxl_mbox = &cxlmd->cxlds->cxl_mbox;
1752 u16 restriction_flags;
1753 size_t data_size;
1754 u16 return_code;
1755
1756 struct cxl_memdev_ppr_rd_attrbs *rd_attrbs __free(kfree) =
1757 kmalloc(rd_data_size, GFP_KERNEL);
1758 if (!rd_attrbs)
1759 return -ENOMEM;
1760
1761 data_size = cxl_get_feature(cxl_mbox, &cxl_ppr_ctx->repair_uuid,
1762 CXL_GET_FEAT_SEL_CURRENT_VALUE, rd_attrbs,
1763 rd_data_size, 0, &return_code);
1764 if (!data_size)
1765 return -EIO;
1766
1767 cxl_ppr_ctx->op_class = rd_attrbs->hdr.op_class;
1768 cxl_ppr_ctx->op_subclass = rd_attrbs->hdr.op_subclass;
1769 cxl_ppr_ctx->cap_dpa = CXL_PPR_GET_CAP_DPA(rd_attrbs->ppr_flags);
1770 cxl_ppr_ctx->cap_nib_mask =
1771 CXL_PPR_GET_CAP_NIB_MASK(rd_attrbs->ppr_flags);
1772
1773 restriction_flags = le16_to_cpu(rd_attrbs->restriction_flags);
1774 cxl_ppr_ctx->media_accessible =
1775 CXL_PPR_GET_MEDIA_ACCESSIBLE(restriction_flags);
1776 cxl_ppr_ctx->data_retained =
1777 CXL_PPR_GET_DATA_RETAINED(restriction_flags);
1778
1779 return 0;
1780 }
1781
cxl_mem_perform_ppr(struct cxl_ppr_context * cxl_ppr_ctx)1782 static int cxl_mem_perform_ppr(struct cxl_ppr_context *cxl_ppr_ctx)
1783 {
1784 struct cxl_memdev_ppr_maintenance_attrbs maintenance_attrbs;
1785 struct cxl_memdev *cxlmd = cxl_ppr_ctx->cxlmd;
1786 struct cxl_mem_repair_attrbs attrbs = { 0 };
1787 int ret;
1788
1789 ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region);
1790 if ((ret = ACQUIRE_ERR(rwsem_read_intr, ®ion_rwsem)))
1791 return ret;
1792
1793 ACQUIRE(rwsem_read_intr, dpa_rwsem)(&cxl_rwsem.dpa);
1794 if ((ret = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem)))
1795 return ret;
1796
1797 if (!cxl_ppr_ctx->media_accessible || !cxl_ppr_ctx->data_retained) {
1798 /* Memory to repair must be offline */
1799 if (cxl_is_memdev_memory_online(cxlmd))
1800 return -EBUSY;
1801 } else {
1802 if (cxl_is_memdev_memory_online(cxlmd)) {
1803 /* Check memory to repair is from the current boot */
1804 attrbs.repair_type = CXL_PPR;
1805 attrbs.dpa = cxl_ppr_ctx->dpa;
1806 attrbs.nibble_mask = cxl_ppr_ctx->nibble_mask;
1807 if (!cxl_find_rec_dram(cxlmd, &attrbs) &&
1808 !cxl_find_rec_gen_media(cxlmd, &attrbs))
1809 return -EINVAL;
1810 }
1811 }
1812
1813 memset(&maintenance_attrbs, 0, sizeof(maintenance_attrbs));
1814 maintenance_attrbs.flags = 0;
1815 maintenance_attrbs.dpa = cpu_to_le64(cxl_ppr_ctx->dpa);
1816 put_unaligned_le24(cxl_ppr_ctx->nibble_mask,
1817 maintenance_attrbs.nibble_mask);
1818
1819 return cxl_perform_maintenance(&cxlmd->cxlds->cxl_mbox,
1820 cxl_ppr_ctx->op_class,
1821 cxl_ppr_ctx->op_subclass,
1822 &maintenance_attrbs,
1823 sizeof(maintenance_attrbs));
1824 }
1825
cxl_ppr_get_repair_type(struct device * dev,void * drv_data,const char ** repair_type)1826 static int cxl_ppr_get_repair_type(struct device *dev, void *drv_data,
1827 const char **repair_type)
1828 {
1829 *repair_type = edac_repair_type[EDAC_REPAIR_PPR];
1830
1831 return 0;
1832 }
1833
cxl_ppr_get_persist_mode(struct device * dev,void * drv_data,bool * persist_mode)1834 static int cxl_ppr_get_persist_mode(struct device *dev, void *drv_data,
1835 bool *persist_mode)
1836 {
1837 struct cxl_ppr_context *cxl_ppr_ctx = drv_data;
1838
1839 *persist_mode = cxl_ppr_ctx->persist_mode;
1840
1841 return 0;
1842 }
1843
cxl_get_ppr_safe_when_in_use(struct device * dev,void * drv_data,bool * safe)1844 static int cxl_get_ppr_safe_when_in_use(struct device *dev, void *drv_data,
1845 bool *safe)
1846 {
1847 struct cxl_ppr_context *cxl_ppr_ctx = drv_data;
1848
1849 *safe = cxl_ppr_ctx->media_accessible & cxl_ppr_ctx->data_retained;
1850
1851 return 0;
1852 }
1853
cxl_ppr_get_min_dpa(struct device * dev,void * drv_data,u64 * min_dpa)1854 static int cxl_ppr_get_min_dpa(struct device *dev, void *drv_data, u64 *min_dpa)
1855 {
1856 struct cxl_ppr_context *cxl_ppr_ctx = drv_data;
1857 struct cxl_memdev *cxlmd = cxl_ppr_ctx->cxlmd;
1858 struct cxl_dev_state *cxlds = cxlmd->cxlds;
1859
1860 *min_dpa = cxlds->dpa_res.start;
1861
1862 return 0;
1863 }
1864
cxl_ppr_get_max_dpa(struct device * dev,void * drv_data,u64 * max_dpa)1865 static int cxl_ppr_get_max_dpa(struct device *dev, void *drv_data, u64 *max_dpa)
1866 {
1867 struct cxl_ppr_context *cxl_ppr_ctx = drv_data;
1868 struct cxl_memdev *cxlmd = cxl_ppr_ctx->cxlmd;
1869 struct cxl_dev_state *cxlds = cxlmd->cxlds;
1870
1871 *max_dpa = cxlds->dpa_res.end;
1872
1873 return 0;
1874 }
1875
cxl_ppr_get_dpa(struct device * dev,void * drv_data,u64 * dpa)1876 static int cxl_ppr_get_dpa(struct device *dev, void *drv_data, u64 *dpa)
1877 {
1878 struct cxl_ppr_context *cxl_ppr_ctx = drv_data;
1879
1880 *dpa = cxl_ppr_ctx->dpa;
1881
1882 return 0;
1883 }
1884
cxl_ppr_set_dpa(struct device * dev,void * drv_data,u64 dpa)1885 static int cxl_ppr_set_dpa(struct device *dev, void *drv_data, u64 dpa)
1886 {
1887 struct cxl_ppr_context *cxl_ppr_ctx = drv_data;
1888 struct cxl_memdev *cxlmd = cxl_ppr_ctx->cxlmd;
1889 struct cxl_dev_state *cxlds = cxlmd->cxlds;
1890
1891 if (!cxl_resource_contains_addr(&cxlds->dpa_res, dpa))
1892 return -EINVAL;
1893
1894 cxl_ppr_ctx->dpa = dpa;
1895
1896 return 0;
1897 }
1898
cxl_ppr_get_nibble_mask(struct device * dev,void * drv_data,u32 * nibble_mask)1899 static int cxl_ppr_get_nibble_mask(struct device *dev, void *drv_data,
1900 u32 *nibble_mask)
1901 {
1902 struct cxl_ppr_context *cxl_ppr_ctx = drv_data;
1903
1904 *nibble_mask = cxl_ppr_ctx->nibble_mask;
1905
1906 return 0;
1907 }
1908
cxl_ppr_set_nibble_mask(struct device * dev,void * drv_data,u32 nibble_mask)1909 static int cxl_ppr_set_nibble_mask(struct device *dev, void *drv_data,
1910 u32 nibble_mask)
1911 {
1912 struct cxl_ppr_context *cxl_ppr_ctx = drv_data;
1913
1914 cxl_ppr_ctx->nibble_mask = nibble_mask;
1915
1916 return 0;
1917 }
1918
cxl_do_ppr(struct device * dev,void * drv_data,u32 val)1919 static int cxl_do_ppr(struct device *dev, void *drv_data, u32 val)
1920 {
1921 struct cxl_ppr_context *cxl_ppr_ctx = drv_data;
1922 struct cxl_memdev *cxlmd = cxl_ppr_ctx->cxlmd;
1923 struct cxl_dev_state *cxlds = cxlmd->cxlds;
1924
1925 if (val != EDAC_DO_MEM_REPAIR ||
1926 !cxl_resource_contains_addr(&cxlds->dpa_res, cxl_ppr_ctx->dpa))
1927 return -EINVAL;
1928
1929 return cxl_mem_perform_ppr(cxl_ppr_ctx);
1930 }
1931
1932 static const struct edac_mem_repair_ops cxl_sppr_ops = {
1933 .get_repair_type = cxl_ppr_get_repair_type,
1934 .get_persist_mode = cxl_ppr_get_persist_mode,
1935 .get_repair_safe_when_in_use = cxl_get_ppr_safe_when_in_use,
1936 .get_min_dpa = cxl_ppr_get_min_dpa,
1937 .get_max_dpa = cxl_ppr_get_max_dpa,
1938 .get_dpa = cxl_ppr_get_dpa,
1939 .set_dpa = cxl_ppr_set_dpa,
1940 .get_nibble_mask = cxl_ppr_get_nibble_mask,
1941 .set_nibble_mask = cxl_ppr_set_nibble_mask,
1942 .do_repair = cxl_do_ppr,
1943 };
1944
cxl_memdev_soft_ppr_init(struct cxl_memdev * cxlmd,struct edac_dev_feature * ras_feature,u8 repair_inst)1945 static int cxl_memdev_soft_ppr_init(struct cxl_memdev *cxlmd,
1946 struct edac_dev_feature *ras_feature,
1947 u8 repair_inst)
1948 {
1949 struct cxl_ppr_context *cxl_sppr_ctx;
1950 struct cxl_feat_entry *feat_entry;
1951 int ret;
1952
1953 feat_entry = cxl_feature_info(to_cxlfs(cxlmd->cxlds),
1954 &CXL_FEAT_SPPR_UUID);
1955 if (IS_ERR(feat_entry))
1956 return -EOPNOTSUPP;
1957
1958 if (!(le32_to_cpu(feat_entry->flags) & CXL_FEATURE_F_CHANGEABLE))
1959 return -EOPNOTSUPP;
1960
1961 cxl_sppr_ctx =
1962 devm_kzalloc(&cxlmd->dev, sizeof(*cxl_sppr_ctx), GFP_KERNEL);
1963 if (!cxl_sppr_ctx)
1964 return -ENOMEM;
1965
1966 *cxl_sppr_ctx = (struct cxl_ppr_context){
1967 .get_feat_size = le16_to_cpu(feat_entry->get_feat_size),
1968 .set_feat_size = le16_to_cpu(feat_entry->set_feat_size),
1969 .get_version = feat_entry->get_feat_ver,
1970 .set_version = feat_entry->set_feat_ver,
1971 .effects = le16_to_cpu(feat_entry->effects),
1972 .cxlmd = cxlmd,
1973 .repair_type = EDAC_REPAIR_PPR,
1974 .persist_mode = 0,
1975 .instance = repair_inst,
1976 };
1977 uuid_copy(&cxl_sppr_ctx->repair_uuid, &CXL_FEAT_SPPR_UUID);
1978
1979 ret = cxl_mem_ppr_get_attrbs(cxl_sppr_ctx);
1980 if (ret)
1981 return ret;
1982
1983 ras_feature->ft_type = RAS_FEAT_MEM_REPAIR;
1984 ras_feature->instance = cxl_sppr_ctx->instance;
1985 ras_feature->mem_repair_ops = &cxl_sppr_ops;
1986 ras_feature->ctx = cxl_sppr_ctx;
1987
1988 return 0;
1989 }
1990
devm_cxl_memdev_edac_register(struct cxl_memdev * cxlmd)1991 int devm_cxl_memdev_edac_register(struct cxl_memdev *cxlmd)
1992 {
1993 struct edac_dev_feature ras_features[CXL_NR_EDAC_DEV_FEATURES];
1994 int num_ras_features = 0;
1995 u8 repair_inst = 0;
1996 int rc;
1997
1998 if (IS_ENABLED(CONFIG_CXL_EDAC_SCRUB)) {
1999 rc = cxl_memdev_scrub_init(cxlmd, &ras_features[num_ras_features], 0);
2000 if (rc < 0 && rc != -EOPNOTSUPP)
2001 return rc;
2002
2003 if (rc != -EOPNOTSUPP)
2004 num_ras_features++;
2005 }
2006
2007 if (IS_ENABLED(CONFIG_CXL_EDAC_ECS)) {
2008 rc = cxl_memdev_ecs_init(cxlmd, &ras_features[num_ras_features]);
2009 if (rc < 0 && rc != -EOPNOTSUPP)
2010 return rc;
2011
2012 if (rc != -EOPNOTSUPP)
2013 num_ras_features++;
2014 }
2015
2016 if (IS_ENABLED(CONFIG_CXL_EDAC_MEM_REPAIR)) {
2017 for (int i = 0; i < CXL_MEM_SPARING_MAX; i++) {
2018 rc = cxl_memdev_sparing_init(cxlmd,
2019 &ras_features[num_ras_features],
2020 &mem_sparing_desc[i], repair_inst);
2021 if (rc == -EOPNOTSUPP)
2022 continue;
2023 if (rc < 0)
2024 return rc;
2025
2026 repair_inst++;
2027 num_ras_features++;
2028 }
2029
2030 rc = cxl_memdev_soft_ppr_init(cxlmd, &ras_features[num_ras_features],
2031 repair_inst);
2032 if (rc < 0 && rc != -EOPNOTSUPP)
2033 return rc;
2034
2035 if (rc != -EOPNOTSUPP) {
2036 repair_inst++;
2037 num_ras_features++;
2038 }
2039
2040 if (repair_inst) {
2041 struct cxl_mem_err_rec *array_rec =
2042 devm_kzalloc(&cxlmd->dev, sizeof(*array_rec),
2043 GFP_KERNEL);
2044 if (!array_rec)
2045 return -ENOMEM;
2046
2047 xa_init(&array_rec->rec_gen_media);
2048 xa_init(&array_rec->rec_dram);
2049 cxlmd->err_rec_array = array_rec;
2050 }
2051 }
2052
2053 if (!num_ras_features)
2054 return -EINVAL;
2055
2056 char *cxl_dev_name __free(kfree) =
2057 kasprintf(GFP_KERNEL, "cxl_%s", dev_name(&cxlmd->dev));
2058 if (!cxl_dev_name)
2059 return -ENOMEM;
2060
2061 return edac_dev_register(&cxlmd->dev, cxl_dev_name, NULL,
2062 num_ras_features, ras_features);
2063 }
2064 EXPORT_SYMBOL_NS_GPL(devm_cxl_memdev_edac_register, "CXL");
2065
devm_cxl_region_edac_register(struct cxl_region * cxlr)2066 int devm_cxl_region_edac_register(struct cxl_region *cxlr)
2067 {
2068 struct edac_dev_feature ras_features[CXL_NR_EDAC_DEV_FEATURES];
2069 int num_ras_features = 0;
2070 int rc;
2071
2072 if (!IS_ENABLED(CONFIG_CXL_EDAC_SCRUB))
2073 return 0;
2074
2075 rc = cxl_region_scrub_init(cxlr, &ras_features[num_ras_features], 0);
2076 if (rc < 0)
2077 return rc;
2078
2079 num_ras_features++;
2080
2081 char *cxl_dev_name __free(kfree) =
2082 kasprintf(GFP_KERNEL, "cxl_%s", dev_name(&cxlr->dev));
2083 if (!cxl_dev_name)
2084 return -ENOMEM;
2085
2086 return edac_dev_register(&cxlr->dev, cxl_dev_name, NULL,
2087 num_ras_features, ras_features);
2088 }
2089 EXPORT_SYMBOL_NS_GPL(devm_cxl_region_edac_register, "CXL");
2090
devm_cxl_memdev_edac_release(struct cxl_memdev * cxlmd)2091 void devm_cxl_memdev_edac_release(struct cxl_memdev *cxlmd)
2092 {
2093 struct cxl_mem_err_rec *array_rec = cxlmd->err_rec_array;
2094 struct cxl_event_gen_media *rec_gen_media;
2095 struct cxl_event_dram *rec_dram;
2096 unsigned long index;
2097
2098 if (!IS_ENABLED(CONFIG_CXL_EDAC_MEM_REPAIR) || !array_rec)
2099 return;
2100
2101 xa_for_each(&array_rec->rec_dram, index, rec_dram)
2102 kfree(rec_dram);
2103 xa_destroy(&array_rec->rec_dram);
2104
2105 xa_for_each(&array_rec->rec_gen_media, index, rec_gen_media)
2106 kfree(rec_gen_media);
2107 xa_destroy(&array_rec->rec_gen_media);
2108 }
2109 EXPORT_SYMBOL_NS_GPL(devm_cxl_memdev_edac_release, "CXL");
2110