1 /* SPDX-License-Identifier: GPL-2.0 */
2 /*
3 * Common codes for both the skx_edac driver and Intel 10nm server EDAC driver.
4 * Originally split out from the skx_edac driver.
5 *
6 * Copyright (c) 2018, Intel Corporation.
7 */
8
9 #ifndef _SKX_COMM_EDAC_H
10 #define _SKX_COMM_EDAC_H
11
12 #include <linux/bits.h>
13 #include <asm/mce.h>
14
15 #define MSG_SIZE 1024
16
17 /*
18 * Debug macros
19 */
20 #define skx_printk(level, fmt, arg...) \
21 edac_printk(level, "skx", fmt, ##arg)
22
23 #define skx_mc_printk(mci, level, fmt, arg...) \
24 edac_mc_chipset_printk(mci, level, "skx", fmt, ##arg)
25
26 /*
27 * Get a bit field at register value <v>, from bit <lo> to bit <hi>
28 */
29 #define GET_BITFIELD(v, lo, hi) \
30 (((v) & GENMASK_ULL((hi), (lo))) >> (lo))
31
32 #define SKX_NUM_CHANNELS 3 /* Channels per memory controller */
33 #define SKX_NUM_DIMMS 2 /* Max DIMMS per channel */
34
35 #define I10NM_NUM_DDR_CHANNELS 2
36 #define I10NM_NUM_DDR_DIMMS 2
37
38 #define I10NM_NUM_HBM_CHANNELS 2
39 #define I10NM_NUM_HBM_DIMMS 1
40
41 #define I10NM_NUM_CHANNELS MAX(I10NM_NUM_DDR_CHANNELS, I10NM_NUM_HBM_CHANNELS)
42 #define I10NM_NUM_DIMMS MAX(I10NM_NUM_DDR_DIMMS, I10NM_NUM_HBM_DIMMS)
43
44 #define NUM_CHANNELS MAX(SKX_NUM_CHANNELS, I10NM_NUM_CHANNELS)
45 #define NUM_DIMMS MAX(SKX_NUM_DIMMS, I10NM_NUM_DIMMS)
46
47 #define IS_DIMM_PRESENT(r) GET_BITFIELD(r, 15, 15)
48 #define IS_NVDIMM_PRESENT(r, i) GET_BITFIELD(r, i, i)
49
50 #define MCI_MISC_ECC_MODE(m) (((m) >> 59) & 15)
51 #define MCI_MISC_ECC_DDRT 8 /* read from DDRT */
52
53 /*
54 * According to Intel Architecture spec vol 3B,
55 * Table 15-10 "IA32_MCi_Status [15:0] Compound Error Code Encoding"
56 * memory errors should fit one of these masks:
57 * 000f 0000 1mmm cccc (binary)
58 * 000f 0010 1mmm cccc (binary) [RAM used as cache]
59 * where:
60 * f = Correction Report Filtering Bit. If 1, subsequent errors
61 * won't be shown
62 * mmm = error type
63 * cccc = channel
64 */
65 #define MCACOD_MEM_ERR_MASK 0xef80
66 /*
67 * Errors from either the memory of the 1-level memory system or the
68 * 2nd level memory (the slow "far" memory) of the 2-level memory system.
69 */
70 #define MCACOD_MEM_CTL_ERR 0x80
71 /*
72 * Errors from the 1st level memory (the fast "near" memory as cache)
73 * of the 2-level memory system.
74 */
75 #define MCACOD_EXT_MEM_ERR 0x280
76
77 /* Max RRL register sets per {,sub-,pseudo-}channel. */
78 #define NUM_RRL_SET 4
79 /* Max RRL registers per set. */
80 #define NUM_RRL_REG 6
81 /* Max correctable error count registers. */
82 #define NUM_CECNT_REG 8
83
84 /* Modes of RRL register set. */
85 enum rrl_mode {
86 /* Last read error from patrol scrub. */
87 LRE_SCRUB,
88 /* Last read error from demand. */
89 LRE_DEMAND,
90 /* First read error from patrol scrub. */
91 FRE_SCRUB,
92 /* First read error from demand. */
93 FRE_DEMAND,
94 };
95
96 /* RRL registers per {,sub-,pseudo-}channel. */
97 struct reg_rrl {
98 /* RRL register parts. */
99 int set_num, reg_num;
100 enum rrl_mode modes[NUM_RRL_SET];
101 u32 offsets[NUM_RRL_SET][NUM_RRL_REG];
102 /* RRL register widths in byte per set. */
103 u8 widths[NUM_RRL_REG];
104 /* RRL control bits of the first register per set. */
105 u32 v_mask;
106 u32 uc_mask;
107 u32 over_mask;
108 u32 en_patspr_mask;
109 u32 noover_mask;
110 u32 en_mask;
111
112 /* CORRERRCNT register parts. */
113 int cecnt_num;
114 u32 cecnt_offsets[NUM_CECNT_REG];
115 u8 cecnt_widths[NUM_CECNT_REG];
116 };
117
118 /*
119 * Each cpu socket contains some pci devices that provide global
120 * information, and also some that are local to each of the two
121 * memory controllers on the die.
122 */
123 struct skx_dev {
124 /* {skx,i10nm}_edac */
125 u8 bus[4];
126 int seg;
127 struct pci_dev *sad_all;
128 struct pci_dev *util_all;
129 struct pci_dev *uracu;
130 struct pci_dev *pcu_cr3;
131 u32 mcroute;
132
133 /* imh_edac */
134 /* System-view MMIO base physical addresses. */
135 u64 mmio_base_h_north;
136 u64 mmio_base_h_south;
137 int pkg;
138
139 int num_imc;
140 struct list_head list;
141 struct skx_imc {
142 /* i10nm_edac */
143 struct pci_dev *mdev;
144
145 /* imh_edac */
146 struct device *dev;
147
148 struct mem_ctl_info *mci;
149 void __iomem *mbase;
150 int chan_mmio_sz;
151 int num_channels; /* channels per memory controller */
152 int num_dimms; /* dimms per channel */
153 bool hbm_mc;
154 u8 mc; /* system wide mc# */
155 u8 lmc; /* socket relative mc# */
156 u8 src_id;
157 /*
158 * Some server BIOS may hide certain memory controllers, and the
159 * EDAC driver skips those hidden memory controllers. However, the
160 * ADXL still decodes memory error address using physical memory
161 * controller indices. The mapping table is used to convert the
162 * physical indices (reported by ADXL) to the logical indices
163 * (used the EDAC driver) of present memory controllers during the
164 * error handling process.
165 */
166 u8 mc_mapping;
167 struct skx_channel {
168 struct pci_dev *cdev;
169 struct pci_dev *edev;
170 /*
171 * Two groups of RRL control registers per channel to save default RRL
172 * settings of two {sub-,pseudo-}channels in Linux RRL control mode.
173 */
174 u32 rrl_ctl[2][NUM_RRL_SET];
175 struct skx_dimm {
176 u8 close_pg;
177 u8 bank_xor_enable;
178 u8 fine_grain_bank;
179 u8 rowbits;
180 u8 colbits;
181 } dimms[NUM_DIMMS];
182 } chan[NUM_CHANNELS];
183 } imc[];
184 };
185
186 struct skx_pvt {
187 struct skx_imc *imc;
188 };
189
190 enum type {
191 SKX,
192 I10NM,
193 SPR,
194 GNR,
195 DMR,
196 };
197
198 enum {
199 INDEX_SOCKET,
200 INDEX_MEMCTRL,
201 INDEX_CHANNEL,
202 INDEX_DIMM,
203 INDEX_CS,
204 INDEX_NM_FIRST,
205 INDEX_NM_MEMCTRL = INDEX_NM_FIRST,
206 INDEX_NM_CHANNEL,
207 INDEX_NM_DIMM,
208 INDEX_NM_CS,
209 INDEX_MAX
210 };
211
212 enum error_source {
213 ERR_SRC_1LM,
214 ERR_SRC_2LM_NM,
215 ERR_SRC_2LM_FM,
216 ERR_SRC_NOT_MEMORY,
217 };
218
219 #define BIT_NM_MEMCTRL BIT_ULL(INDEX_NM_MEMCTRL)
220 #define BIT_NM_CHANNEL BIT_ULL(INDEX_NM_CHANNEL)
221 #define BIT_NM_DIMM BIT_ULL(INDEX_NM_DIMM)
222 #define BIT_NM_CS BIT_ULL(INDEX_NM_CS)
223
224 struct decoded_addr {
225 struct mce *mce;
226 struct skx_dev *dev;
227 u64 addr;
228 int socket;
229 int imc;
230 int channel;
231 u64 chan_addr;
232 int sktways;
233 int chanways;
234 int dimm;
235 int cs;
236 int rank;
237 int channel_rank;
238 u64 rank_address;
239 int row;
240 int column;
241 int bank_address;
242 int bank_group;
243 bool decoded_by_adxl;
244 };
245
246 struct pci_bdf {
247 u32 bus : 8;
248 u32 dev : 5;
249 u32 fun : 3;
250 };
251
252 struct res_config {
253 enum type type;
254 /* DDR memory controllers per socket */
255 int ddr_imc_num;
256 /* DDR channels per DDR memory controller */
257 int ddr_chan_num;
258 /* DDR DIMMs per DDR memory channel */
259 int ddr_dimm_num;
260 /* Per DDR channel memory-mapped I/O size */
261 int ddr_chan_mmio_sz;
262 /* HBM memory controllers per socket */
263 int hbm_imc_num;
264 /* HBM channels per HBM memory controller */
265 int hbm_chan_num;
266 /* HBM DIMMs per HBM memory channel */
267 int hbm_dimm_num;
268 /* Per HBM channel memory-mapped I/O size */
269 int hbm_chan_mmio_sz;
270 bool support_ddr5;
271 /* RRL register sets per DDR channel */
272 struct reg_rrl *reg_rrl_ddr;
273 /* RRL register sets per HBM channel */
274 struct reg_rrl *reg_rrl_hbm[2];
275 union {
276 /* {skx,i10nm}_edac */
277 struct {
278 /* Configuration agent device ID */
279 unsigned int decs_did;
280 /* Default bus number configuration register offset */
281 int busno_cfg_offset;
282 struct pci_bdf sad_all_bdf;
283 struct pci_bdf pcu_cr3_bdf;
284 struct pci_bdf util_all_bdf;
285 struct pci_bdf uracu_bdf;
286 struct pci_bdf ddr_mdev_bdf;
287 struct pci_bdf hbm_mdev_bdf;
288 int sad_all_offset;
289 };
290 /* imh_edac */
291 struct {
292 /* MMIO base physical address in local package view */
293 u64 mmio_base_l_north;
294 u64 mmio_base_l_south;
295 u64 ddr_imc_base;
296 u64 ddr_reg_mcmtr_offset;
297 u8 ddr_reg_mcmtr_width;
298 u64 ddr_reg_dimmmtr_offset;
299 u8 ddr_reg_dimmmtr_width;
300 u64 ubox_base;
301 u32 ubox_size;
302 u32 ubox_reg_mmio_base_offset;
303 u8 ubox_reg_mmio_base_width;
304 u32 ubox_reg_socket_id_offset;
305 u8 ubox_reg_socket_id_width;
306 u64 pcu_base;
307 u32 pcu_size;
308 u32 pcu_reg_capid3_offset;
309 u8 pcu_reg_capid3_width;
310 u64 sca_base;
311 u32 sca_size;
312 u32 sca_reg_tolm_offset;
313 u8 sca_reg_tolm_width;
314 u32 sca_reg_tohm_offset;
315 u8 sca_reg_tohm_width;
316 u64 ha_base;
317 u32 ha_size;
318 u32 ha_reg_mode_offset;
319 u8 ha_reg_mode_width;
320 };
321 };
322 };
323
324 typedef int (*get_dimm_config_f)(struct mem_ctl_info *mci,
325 struct res_config *cfg);
326 typedef bool (*skx_decode_f)(struct decoded_addr *res);
327 typedef void (*skx_show_retry_log_f)(struct decoded_addr *res, char *msg, int len, bool scrub_err);
328
329 int skx_adxl_get(void);
330 void skx_adxl_put(void);
331 void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log);
332 void skx_set_mem_cfg(bool mem_cfg_2lm);
333 void skx_set_res_cfg(struct res_config *cfg);
334 void skx_init_mc_mapping(struct skx_dev *d);
335 void skx_set_mc_mapping(struct skx_dev *d, u8 pmc, u8 lmc);
336
337 int skx_get_src_id(struct skx_dev *d, int off, u8 *id);
338
339 int skx_get_all_bus_mappings(struct res_config *cfg, struct list_head **list);
340
341 struct list_head *skx_get_edac_list(void);
342
343 int skx_get_hi_lo(unsigned int did, int off[], u64 *tolm, u64 *tohm);
344 void skx_set_hi_lo(u64 tolm, u64 tohm);
345
346 int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm,
347 struct skx_imc *imc, int chan, int dimmno,
348 struct res_config *cfg);
349
350 int skx_get_nvdimm_info(struct dimm_info *dimm, struct skx_imc *imc,
351 int chan, int dimmno, const char *mod_str);
352
353 int skx_register_mci(struct skx_imc *imc, struct device *dev, const char *dev_name,
354 const char *ctl_name, const char *mod_str,
355 get_dimm_config_f get_dimm_config,
356 struct res_config *cfg);
357
358 int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
359 void *data);
360
361 void skx_remove(void);
362
363 #ifdef CONFIG_EDAC_DEBUG
364 void skx_setup_debug(const char *name);
365 void skx_teardown_debug(void);
366 #else
skx_setup_debug(const char * name)367 static inline void skx_setup_debug(const char *name) {}
skx_teardown_debug(void)368 static inline void skx_teardown_debug(void) {}
369 #endif
370
371 #endif /* _SKX_COMM_EDAC_H */
372