1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2022 Intel Corporation
4 */
5
6 #include "xe_gt_topology.h"
7
8 #include <generated/xe_wa_oob.h>
9 #include <linux/bitmap.h>
10 #include <linux/compiler.h>
11
12 #include "regs/xe_gt_regs.h"
13 #include "xe_assert.h"
14 #include "xe_gt.h"
15 #include "xe_gt_printk.h"
16 #include "xe_mmio.h"
17 #include "xe_wa.h"
18
load_dss_mask(struct xe_gt * gt,xe_dss_mask_t mask,int numregs,const struct xe_reg regs[])19 static void load_dss_mask(struct xe_gt *gt, xe_dss_mask_t mask, int numregs,
20 const struct xe_reg regs[])
21 {
22 u32 fuse_val[XE_MAX_DSS_FUSE_REGS] = {};
23 int i;
24
25 xe_gt_assert(gt, numregs <= ARRAY_SIZE(fuse_val));
26
27 for (i = 0; i < numregs; i++)
28 fuse_val[i] = xe_mmio_read32(>->mmio, regs[i]);
29
30 bitmap_from_arr32(mask, fuse_val, numregs * 32);
31 }
32
33 static void
load_eu_mask(struct xe_gt * gt,xe_eu_mask_t mask,enum xe_gt_eu_type * eu_type)34 load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask, enum xe_gt_eu_type *eu_type)
35 {
36 struct xe_device *xe = gt_to_xe(gt);
37 u32 reg_val = xe_mmio_read32(>->mmio, XELP_EU_ENABLE);
38 u32 val = 0;
39 int i;
40
41 BUILD_BUG_ON(XE_MAX_EU_FUSE_REGS > 1);
42
43 /*
44 * Pre-Xe_HP platforms inverted the bit meaning (disable instead
45 * of enable).
46 */
47 if (GRAPHICS_VERx100(xe) < 1250)
48 reg_val = ~reg_val & XELP_EU_MASK;
49
50 if (GRAPHICS_VERx100(xe) == 1260 || GRAPHICS_VER(xe) >= 20) {
51 /* SIMD16 EUs, one bit == one EU */
52 *eu_type = XE_GT_EU_TYPE_SIMD16;
53 val = reg_val;
54 } else {
55 /* SIMD8 EUs, one bit == 2 EU */
56 *eu_type = XE_GT_EU_TYPE_SIMD8;
57 for (i = 0; i < fls(reg_val); i++)
58 if (reg_val & BIT(i))
59 val |= 0x3 << 2 * i;
60 }
61
62 bitmap_from_arr32(mask, &val, XE_MAX_EU_FUSE_BITS);
63 }
64
65 /**
66 * gen_l3_mask_from_pattern - Replicate a bit pattern according to a mask
67 *
68 * It is used to compute the L3 bank masks in a generic format on
69 * various platforms where the internal representation of L3 node
70 * and masks from registers are different.
71 *
72 * @xe: device
73 * @dst: destination
74 * @pattern: pattern to replicate
75 * @patternbits: size of the pattern, in bits
76 * @mask: mask describing where to replicate the pattern
77 *
78 * Example 1:
79 * ----------
80 * @pattern = 0b1111
81 * └┬─┘
82 * @patternbits = 4 (bits)
83 * @mask = 0b0101
84 * ││││
85 * │││└────────────────── 0b1111 (=1×0b1111)
86 * ││└──────────── 0b0000 │ (=0×0b1111)
87 * │└────── 0b1111 │ │ (=1×0b1111)
88 * └ 0b0000 │ │ │ (=0×0b1111)
89 * │ │ │ │
90 * @dst = 0b0000 0b1111 0b0000 0b1111
91 *
92 * Example 2:
93 * ----------
94 * @pattern = 0b11111111
95 * └┬─────┘
96 * @patternbits = 8 (bits)
97 * @mask = 0b10
98 * ││
99 * ││
100 * ││
101 * │└────────── 0b00000000 (=0×0b11111111)
102 * └ 0b11111111 │ (=1×0b11111111)
103 * │ │
104 * @dst = 0b11111111 0b00000000
105 */
106 static void
gen_l3_mask_from_pattern(struct xe_device * xe,xe_l3_bank_mask_t dst,xe_l3_bank_mask_t pattern,int patternbits,unsigned long mask)107 gen_l3_mask_from_pattern(struct xe_device *xe, xe_l3_bank_mask_t dst,
108 xe_l3_bank_mask_t pattern, int patternbits,
109 unsigned long mask)
110 {
111 unsigned long bit;
112
113 xe_assert(xe, find_last_bit(pattern, XE_MAX_L3_BANK_MASK_BITS) < patternbits ||
114 bitmap_empty(pattern, XE_MAX_L3_BANK_MASK_BITS));
115 xe_assert(xe, !mask || patternbits * (__fls(mask) + 1) <= XE_MAX_L3_BANK_MASK_BITS);
116 for_each_set_bit(bit, &mask, 32) {
117 xe_l3_bank_mask_t shifted_pattern = {};
118
119 bitmap_shift_left(shifted_pattern, pattern, bit * patternbits,
120 XE_MAX_L3_BANK_MASK_BITS);
121 bitmap_or(dst, dst, shifted_pattern, XE_MAX_L3_BANK_MASK_BITS);
122 }
123 }
124
125 static void
load_l3_bank_mask(struct xe_gt * gt,xe_l3_bank_mask_t l3_bank_mask)126 load_l3_bank_mask(struct xe_gt *gt, xe_l3_bank_mask_t l3_bank_mask)
127 {
128 struct xe_device *xe = gt_to_xe(gt);
129 struct xe_mmio *mmio = >->mmio;
130 u32 fuse3 = xe_mmio_read32(mmio, MIRROR_FUSE3);
131
132 /*
133 * PTL platforms with media version 30.00 do not provide proper values
134 * for the media GT's L3 bank registers. Skip the readout since we
135 * don't have any way to obtain real values.
136 *
137 * This may get re-described as an official workaround in the future,
138 * but there's no tracking number assigned yet so we use a custom
139 * OOB workaround descriptor.
140 */
141 if (XE_WA(gt, no_media_l3))
142 return;
143
144 if (GRAPHICS_VER(xe) >= 30) {
145 xe_l3_bank_mask_t per_node = {};
146 u32 meml3_en = REG_FIELD_GET(XE2_NODE_ENABLE_MASK, fuse3);
147 u32 mirror_l3bank_enable = xe_mmio_read32(mmio, MIRROR_L3BANK_ENABLE);
148 u32 bank_val = REG_FIELD_GET(XE3_L3BANK_ENABLE, mirror_l3bank_enable);
149
150 bitmap_from_arr32(per_node, &bank_val, 32);
151 gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 32,
152 meml3_en);
153 } else if (GRAPHICS_VER(xe) >= 20) {
154 xe_l3_bank_mask_t per_node = {};
155 u32 meml3_en = REG_FIELD_GET(XE2_NODE_ENABLE_MASK, fuse3);
156 u32 bank_val = REG_FIELD_GET(XE2_GT_L3_MODE_MASK, fuse3);
157
158 bitmap_from_arr32(per_node, &bank_val, 32);
159 gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 4,
160 meml3_en);
161 } else if (GRAPHICS_VERx100(xe) >= 1270) {
162 xe_l3_bank_mask_t per_node = {};
163 xe_l3_bank_mask_t per_mask_bit = {};
164 u32 meml3_en = REG_FIELD_GET(MEML3_EN_MASK, fuse3);
165 u32 fuse4 = xe_mmio_read32(mmio, XEHP_FUSE4);
166 u32 bank_val = REG_FIELD_GET(GT_L3_EXC_MASK, fuse4);
167
168 bitmap_set_value8(per_mask_bit, 0x3, 0);
169 gen_l3_mask_from_pattern(xe, per_node, per_mask_bit, 2, bank_val);
170 gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 4,
171 meml3_en);
172 } else if (xe->info.platform == XE_PVC) {
173 xe_l3_bank_mask_t per_node = {};
174 xe_l3_bank_mask_t per_mask_bit = {};
175 u32 meml3_en = REG_FIELD_GET(MEML3_EN_MASK, fuse3);
176 u32 bank_val = REG_FIELD_GET(XEHPC_GT_L3_MODE_MASK, fuse3);
177
178 bitmap_set_value8(per_mask_bit, 0xf, 0);
179 gen_l3_mask_from_pattern(xe, per_node, per_mask_bit, 4,
180 bank_val);
181 gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 16,
182 meml3_en);
183 } else if (xe->info.platform == XE_DG2) {
184 xe_l3_bank_mask_t per_node = {};
185 u32 mask = REG_FIELD_GET(MEML3_EN_MASK, fuse3);
186
187 bitmap_set_value8(per_node, 0xff, 0);
188 gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 8, mask);
189 } else {
190 /* 1:1 register bit to mask bit (inverted register bits) */
191 u32 mask = REG_FIELD_GET(XELP_GT_L3_MODE_MASK, ~fuse3);
192
193 bitmap_from_arr32(l3_bank_mask, &mask, 32);
194 }
195 }
196
197 static void
get_num_dss_regs(struct xe_device * xe,int * geometry_regs,int * compute_regs)198 get_num_dss_regs(struct xe_device *xe, int *geometry_regs, int *compute_regs)
199 {
200 if (GRAPHICS_VER(xe) > 20) {
201 *geometry_regs = 3;
202 *compute_regs = 3;
203 } else if (GRAPHICS_VERx100(xe) == 1260) {
204 *geometry_regs = 0;
205 *compute_regs = 2;
206 } else if (GRAPHICS_VERx100(xe) >= 1250) {
207 *geometry_regs = 1;
208 *compute_regs = 1;
209 } else {
210 *geometry_regs = 1;
211 *compute_regs = 0;
212 }
213 }
214
215 void
xe_gt_topology_init(struct xe_gt * gt)216 xe_gt_topology_init(struct xe_gt *gt)
217 {
218 static const struct xe_reg geometry_regs[] = {
219 XELP_GT_GEOMETRY_DSS_ENABLE,
220 XE2_GT_GEOMETRY_DSS_1,
221 XE2_GT_GEOMETRY_DSS_2,
222 };
223 static const struct xe_reg compute_regs[] = {
224 XEHP_GT_COMPUTE_DSS_ENABLE,
225 XEHPC_GT_COMPUTE_DSS_ENABLE_EXT,
226 XE2_GT_COMPUTE_DSS_2,
227 };
228 int num_geometry_regs, num_compute_regs;
229 struct xe_device *xe = gt_to_xe(gt);
230 struct drm_printer p;
231
232 get_num_dss_regs(xe, &num_geometry_regs, &num_compute_regs);
233
234 /*
235 * Register counts returned shouldn't exceed the number of registers
236 * passed as parameters below.
237 */
238 xe_gt_assert(gt, num_geometry_regs <= ARRAY_SIZE(geometry_regs));
239 xe_gt_assert(gt, num_compute_regs <= ARRAY_SIZE(compute_regs));
240
241 load_dss_mask(gt, gt->fuse_topo.g_dss_mask,
242 num_geometry_regs, geometry_regs);
243 load_dss_mask(gt, gt->fuse_topo.c_dss_mask,
244 num_compute_regs, compute_regs);
245
246 load_eu_mask(gt, gt->fuse_topo.eu_mask_per_dss, >->fuse_topo.eu_type);
247 load_l3_bank_mask(gt, gt->fuse_topo.l3_bank_mask);
248
249 p = xe_gt_dbg_printer(gt);
250 xe_gt_topology_dump(gt, &p);
251 }
252
eu_type_to_str(enum xe_gt_eu_type eu_type)253 static const char *eu_type_to_str(enum xe_gt_eu_type eu_type)
254 {
255 switch (eu_type) {
256 case XE_GT_EU_TYPE_SIMD16:
257 return "simd16";
258 case XE_GT_EU_TYPE_SIMD8:
259 return "simd8";
260 }
261
262 return NULL;
263 }
264
265 void
xe_gt_topology_dump(struct xe_gt * gt,struct drm_printer * p)266 xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p)
267 {
268 drm_printf(p, "dss mask (geometry): %*pb\n", XE_MAX_DSS_FUSE_BITS,
269 gt->fuse_topo.g_dss_mask);
270 drm_printf(p, "dss mask (compute): %*pb\n", XE_MAX_DSS_FUSE_BITS,
271 gt->fuse_topo.c_dss_mask);
272
273 drm_printf(p, "EU mask per DSS: %*pb\n", XE_MAX_EU_FUSE_BITS,
274 gt->fuse_topo.eu_mask_per_dss);
275 drm_printf(p, "EU type: %s\n",
276 eu_type_to_str(gt->fuse_topo.eu_type));
277
278 drm_printf(p, "L3 bank mask: %*pb\n", XE_MAX_L3_BANK_MASK_BITS,
279 gt->fuse_topo.l3_bank_mask);
280 }
281
282 /*
283 * Used to obtain the index of the first DSS. Can start searching from the
284 * beginning of a specific dss group (e.g., gslice, cslice, etc.) if
285 * groupsize and groupnum are non-zero.
286 */
287 unsigned int
xe_dss_mask_group_ffs(const xe_dss_mask_t mask,int groupsize,int groupnum)288 xe_dss_mask_group_ffs(const xe_dss_mask_t mask, int groupsize, int groupnum)
289 {
290 return find_next_bit(mask, XE_MAX_DSS_FUSE_BITS, groupnum * groupsize);
291 }
292
293 /**
294 * xe_gt_topology_has_dss_in_quadrant - check fusing of DSS in GT quadrant
295 * @gt: GT to check
296 * @quad: Which quadrant of the DSS space to check
297 *
298 * Since Xe_HP platforms can have up to four CCS engines, those engines
299 * are each logically associated with a quarter of the possible DSS. If there
300 * are no DSS present in one of the four quadrants of the DSS space, the
301 * corresponding CCS engine is also not available for use.
302 *
303 * Returns false if all DSS in a quadrant of the GT are fused off, else true.
304 */
xe_gt_topology_has_dss_in_quadrant(struct xe_gt * gt,int quad)305 bool xe_gt_topology_has_dss_in_quadrant(struct xe_gt *gt, int quad)
306 {
307 struct xe_device *xe = gt_to_xe(gt);
308 xe_dss_mask_t all_dss;
309 int g_dss_regs, c_dss_regs, dss_per_quad, quad_first;
310
311 bitmap_or(all_dss, gt->fuse_topo.g_dss_mask, gt->fuse_topo.c_dss_mask,
312 XE_MAX_DSS_FUSE_BITS);
313
314 get_num_dss_regs(xe, &g_dss_regs, &c_dss_regs);
315 dss_per_quad = 32 * max(g_dss_regs, c_dss_regs) / 4;
316
317 quad_first = xe_dss_mask_group_ffs(all_dss, dss_per_quad, quad);
318
319 return quad_first < (quad + 1) * dss_per_quad;
320 }
321
xe_gt_has_geometry_dss(struct xe_gt * gt,unsigned int dss)322 bool xe_gt_has_geometry_dss(struct xe_gt *gt, unsigned int dss)
323 {
324 return test_bit(dss, gt->fuse_topo.g_dss_mask);
325 }
326
xe_gt_has_compute_dss(struct xe_gt * gt,unsigned int dss)327 bool xe_gt_has_compute_dss(struct xe_gt *gt, unsigned int dss)
328 {
329 return test_bit(dss, gt->fuse_topo.c_dss_mask);
330 }
331