1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2022 Intel Corporation
4 */
5
6 #include "xe_gt_topology.h"
7
8 #include <generated/xe_wa_oob.h>
9 #include <linux/bitmap.h>
10 #include <linux/compiler.h>
11
12 #include "regs/xe_gt_regs.h"
13 #include "xe_assert.h"
14 #include "xe_gt.h"
15 #include "xe_mmio.h"
16 #include "xe_wa.h"
17
18 static void
load_dss_mask(struct xe_gt * gt,xe_dss_mask_t mask,int numregs,...)19 load_dss_mask(struct xe_gt *gt, xe_dss_mask_t mask, int numregs, ...)
20 {
21 va_list argp;
22 u32 fuse_val[XE_MAX_DSS_FUSE_REGS] = {};
23 int i;
24
25 if (drm_WARN_ON(>_to_xe(gt)->drm, numregs > XE_MAX_DSS_FUSE_REGS))
26 numregs = XE_MAX_DSS_FUSE_REGS;
27
28 va_start(argp, numregs);
29 for (i = 0; i < numregs; i++)
30 fuse_val[i] = xe_mmio_read32(>->mmio, va_arg(argp, struct xe_reg));
31 va_end(argp);
32
33 bitmap_from_arr32(mask, fuse_val, numregs * 32);
34 }
35
36 static void
load_eu_mask(struct xe_gt * gt,xe_eu_mask_t mask,enum xe_gt_eu_type * eu_type)37 load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask, enum xe_gt_eu_type *eu_type)
38 {
39 struct xe_device *xe = gt_to_xe(gt);
40 u32 reg_val = xe_mmio_read32(>->mmio, XELP_EU_ENABLE);
41 u32 val = 0;
42 int i;
43
44 BUILD_BUG_ON(XE_MAX_EU_FUSE_REGS > 1);
45
46 /*
47 * Pre-Xe_HP platforms inverted the bit meaning (disable instead
48 * of enable).
49 */
50 if (GRAPHICS_VERx100(xe) < 1250)
51 reg_val = ~reg_val & XELP_EU_MASK;
52
53 if (GRAPHICS_VERx100(xe) == 1260 || GRAPHICS_VER(xe) >= 20) {
54 /* SIMD16 EUs, one bit == one EU */
55 *eu_type = XE_GT_EU_TYPE_SIMD16;
56 val = reg_val;
57 } else {
58 /* SIMD8 EUs, one bit == 2 EU */
59 *eu_type = XE_GT_EU_TYPE_SIMD8;
60 for (i = 0; i < fls(reg_val); i++)
61 if (reg_val & BIT(i))
62 val |= 0x3 << 2 * i;
63 }
64
65 bitmap_from_arr32(mask, &val, XE_MAX_EU_FUSE_BITS);
66 }
67
68 /**
69 * gen_l3_mask_from_pattern - Replicate a bit pattern according to a mask
70 *
71 * It is used to compute the L3 bank masks in a generic format on
72 * various platforms where the internal representation of L3 node
73 * and masks from registers are different.
74 *
75 * @xe: device
76 * @dst: destination
77 * @pattern: pattern to replicate
78 * @patternbits: size of the pattern, in bits
79 * @mask: mask describing where to replicate the pattern
80 *
81 * Example 1:
82 * ----------
83 * @pattern = 0b1111
84 * └┬─┘
85 * @patternbits = 4 (bits)
86 * @mask = 0b0101
87 * ││││
88 * │││└────────────────── 0b1111 (=1×0b1111)
89 * ││└──────────── 0b0000 │ (=0×0b1111)
90 * │└────── 0b1111 │ │ (=1×0b1111)
91 * └ 0b0000 │ │ │ (=0×0b1111)
92 * │ │ │ │
93 * @dst = 0b0000 0b1111 0b0000 0b1111
94 *
95 * Example 2:
96 * ----------
97 * @pattern = 0b11111111
98 * └┬─────┘
99 * @patternbits = 8 (bits)
100 * @mask = 0b10
101 * ││
102 * ││
103 * ││
104 * │└────────── 0b00000000 (=0×0b11111111)
105 * └ 0b11111111 │ (=1×0b11111111)
106 * │ │
107 * @dst = 0b11111111 0b00000000
108 */
109 static void
gen_l3_mask_from_pattern(struct xe_device * xe,xe_l3_bank_mask_t dst,xe_l3_bank_mask_t pattern,int patternbits,unsigned long mask)110 gen_l3_mask_from_pattern(struct xe_device *xe, xe_l3_bank_mask_t dst,
111 xe_l3_bank_mask_t pattern, int patternbits,
112 unsigned long mask)
113 {
114 unsigned long bit;
115
116 xe_assert(xe, find_last_bit(pattern, XE_MAX_L3_BANK_MASK_BITS) < patternbits ||
117 bitmap_empty(pattern, XE_MAX_L3_BANK_MASK_BITS));
118 xe_assert(xe, !mask || patternbits * (__fls(mask) + 1) <= XE_MAX_L3_BANK_MASK_BITS);
119 for_each_set_bit(bit, &mask, 32) {
120 xe_l3_bank_mask_t shifted_pattern = {};
121
122 bitmap_shift_left(shifted_pattern, pattern, bit * patternbits,
123 XE_MAX_L3_BANK_MASK_BITS);
124 bitmap_or(dst, dst, shifted_pattern, XE_MAX_L3_BANK_MASK_BITS);
125 }
126 }
127
128 static void
load_l3_bank_mask(struct xe_gt * gt,xe_l3_bank_mask_t l3_bank_mask)129 load_l3_bank_mask(struct xe_gt *gt, xe_l3_bank_mask_t l3_bank_mask)
130 {
131 struct xe_device *xe = gt_to_xe(gt);
132 struct xe_mmio *mmio = >->mmio;
133 u32 fuse3 = xe_mmio_read32(mmio, MIRROR_FUSE3);
134
135 /*
136 * PTL platforms with media version 30.00 do not provide proper values
137 * for the media GT's L3 bank registers. Skip the readout since we
138 * don't have any way to obtain real values.
139 *
140 * This may get re-described as an official workaround in the future,
141 * but there's no tracking number assigned yet so we use a custom
142 * OOB workaround descriptor.
143 */
144 if (XE_WA(gt, no_media_l3))
145 return;
146
147 if (GRAPHICS_VER(xe) >= 30) {
148 xe_l3_bank_mask_t per_node = {};
149 u32 meml3_en = REG_FIELD_GET(XE2_NODE_ENABLE_MASK, fuse3);
150 u32 mirror_l3bank_enable = xe_mmio_read32(mmio, MIRROR_L3BANK_ENABLE);
151 u32 bank_val = REG_FIELD_GET(XE3_L3BANK_ENABLE, mirror_l3bank_enable);
152
153 bitmap_from_arr32(per_node, &bank_val, 32);
154 gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 32,
155 meml3_en);
156 } else if (GRAPHICS_VER(xe) >= 20) {
157 xe_l3_bank_mask_t per_node = {};
158 u32 meml3_en = REG_FIELD_GET(XE2_NODE_ENABLE_MASK, fuse3);
159 u32 bank_val = REG_FIELD_GET(XE2_GT_L3_MODE_MASK, fuse3);
160
161 bitmap_from_arr32(per_node, &bank_val, 32);
162 gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 4,
163 meml3_en);
164 } else if (GRAPHICS_VERx100(xe) >= 1270) {
165 xe_l3_bank_mask_t per_node = {};
166 xe_l3_bank_mask_t per_mask_bit = {};
167 u32 meml3_en = REG_FIELD_GET(MEML3_EN_MASK, fuse3);
168 u32 fuse4 = xe_mmio_read32(mmio, XEHP_FUSE4);
169 u32 bank_val = REG_FIELD_GET(GT_L3_EXC_MASK, fuse4);
170
171 bitmap_set_value8(per_mask_bit, 0x3, 0);
172 gen_l3_mask_from_pattern(xe, per_node, per_mask_bit, 2, bank_val);
173 gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 4,
174 meml3_en);
175 } else if (xe->info.platform == XE_PVC) {
176 xe_l3_bank_mask_t per_node = {};
177 xe_l3_bank_mask_t per_mask_bit = {};
178 u32 meml3_en = REG_FIELD_GET(MEML3_EN_MASK, fuse3);
179 u32 bank_val = REG_FIELD_GET(XEHPC_GT_L3_MODE_MASK, fuse3);
180
181 bitmap_set_value8(per_mask_bit, 0xf, 0);
182 gen_l3_mask_from_pattern(xe, per_node, per_mask_bit, 4,
183 bank_val);
184 gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 16,
185 meml3_en);
186 } else if (xe->info.platform == XE_DG2) {
187 xe_l3_bank_mask_t per_node = {};
188 u32 mask = REG_FIELD_GET(MEML3_EN_MASK, fuse3);
189
190 bitmap_set_value8(per_node, 0xff, 0);
191 gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 8, mask);
192 } else {
193 /* 1:1 register bit to mask bit (inverted register bits) */
194 u32 mask = REG_FIELD_GET(XELP_GT_L3_MODE_MASK, ~fuse3);
195
196 bitmap_from_arr32(l3_bank_mask, &mask, 32);
197 }
198 }
199
200 static void
get_num_dss_regs(struct xe_device * xe,int * geometry_regs,int * compute_regs)201 get_num_dss_regs(struct xe_device *xe, int *geometry_regs, int *compute_regs)
202 {
203 if (GRAPHICS_VER(xe) > 20) {
204 *geometry_regs = 3;
205 *compute_regs = 3;
206 } else if (GRAPHICS_VERx100(xe) == 1260) {
207 *geometry_regs = 0;
208 *compute_regs = 2;
209 } else if (GRAPHICS_VERx100(xe) >= 1250) {
210 *geometry_regs = 1;
211 *compute_regs = 1;
212 } else {
213 *geometry_regs = 1;
214 *compute_regs = 0;
215 }
216 }
217
218 void
xe_gt_topology_init(struct xe_gt * gt)219 xe_gt_topology_init(struct xe_gt *gt)
220 {
221 struct xe_device *xe = gt_to_xe(gt);
222 struct drm_printer p;
223 int num_geometry_regs, num_compute_regs;
224
225 get_num_dss_regs(xe, &num_geometry_regs, &num_compute_regs);
226
227 /*
228 * Register counts returned shouldn't exceed the number of registers
229 * passed as parameters below.
230 */
231 drm_WARN_ON(&xe->drm, num_geometry_regs > 3);
232 drm_WARN_ON(&xe->drm, num_compute_regs > 3);
233
234 load_dss_mask(gt, gt->fuse_topo.g_dss_mask,
235 num_geometry_regs,
236 XELP_GT_GEOMETRY_DSS_ENABLE,
237 XE2_GT_GEOMETRY_DSS_1,
238 XE2_GT_GEOMETRY_DSS_2);
239 load_dss_mask(gt, gt->fuse_topo.c_dss_mask, num_compute_regs,
240 XEHP_GT_COMPUTE_DSS_ENABLE,
241 XEHPC_GT_COMPUTE_DSS_ENABLE_EXT,
242 XE2_GT_COMPUTE_DSS_2);
243 load_eu_mask(gt, gt->fuse_topo.eu_mask_per_dss, >->fuse_topo.eu_type);
244 load_l3_bank_mask(gt, gt->fuse_topo.l3_bank_mask);
245
246 p = drm_dbg_printer(>_to_xe(gt)->drm, DRM_UT_DRIVER, "GT topology");
247
248 xe_gt_topology_dump(gt, &p);
249 }
250
eu_type_to_str(enum xe_gt_eu_type eu_type)251 static const char *eu_type_to_str(enum xe_gt_eu_type eu_type)
252 {
253 switch (eu_type) {
254 case XE_GT_EU_TYPE_SIMD16:
255 return "simd16";
256 case XE_GT_EU_TYPE_SIMD8:
257 return "simd8";
258 }
259
260 return NULL;
261 }
262
263 void
xe_gt_topology_dump(struct xe_gt * gt,struct drm_printer * p)264 xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p)
265 {
266 drm_printf(p, "dss mask (geometry): %*pb\n", XE_MAX_DSS_FUSE_BITS,
267 gt->fuse_topo.g_dss_mask);
268 drm_printf(p, "dss mask (compute): %*pb\n", XE_MAX_DSS_FUSE_BITS,
269 gt->fuse_topo.c_dss_mask);
270
271 drm_printf(p, "EU mask per DSS: %*pb\n", XE_MAX_EU_FUSE_BITS,
272 gt->fuse_topo.eu_mask_per_dss);
273 drm_printf(p, "EU type: %s\n",
274 eu_type_to_str(gt->fuse_topo.eu_type));
275
276 drm_printf(p, "L3 bank mask: %*pb\n", XE_MAX_L3_BANK_MASK_BITS,
277 gt->fuse_topo.l3_bank_mask);
278 }
279
280 /*
281 * Used to obtain the index of the first DSS. Can start searching from the
282 * beginning of a specific dss group (e.g., gslice, cslice, etc.) if
283 * groupsize and groupnum are non-zero.
284 */
285 unsigned int
xe_dss_mask_group_ffs(const xe_dss_mask_t mask,int groupsize,int groupnum)286 xe_dss_mask_group_ffs(const xe_dss_mask_t mask, int groupsize, int groupnum)
287 {
288 return find_next_bit(mask, XE_MAX_DSS_FUSE_BITS, groupnum * groupsize);
289 }
290
xe_dss_mask_empty(const xe_dss_mask_t mask)291 bool xe_dss_mask_empty(const xe_dss_mask_t mask)
292 {
293 return bitmap_empty(mask, XE_MAX_DSS_FUSE_BITS);
294 }
295
296 /**
297 * xe_gt_topology_has_dss_in_quadrant - check fusing of DSS in GT quadrant
298 * @gt: GT to check
299 * @quad: Which quadrant of the DSS space to check
300 *
301 * Since Xe_HP platforms can have up to four CCS engines, those engines
302 * are each logically associated with a quarter of the possible DSS. If there
303 * are no DSS present in one of the four quadrants of the DSS space, the
304 * corresponding CCS engine is also not available for use.
305 *
306 * Returns false if all DSS in a quadrant of the GT are fused off, else true.
307 */
xe_gt_topology_has_dss_in_quadrant(struct xe_gt * gt,int quad)308 bool xe_gt_topology_has_dss_in_quadrant(struct xe_gt *gt, int quad)
309 {
310 struct xe_device *xe = gt_to_xe(gt);
311 xe_dss_mask_t all_dss;
312 int g_dss_regs, c_dss_regs, dss_per_quad, quad_first;
313
314 bitmap_or(all_dss, gt->fuse_topo.g_dss_mask, gt->fuse_topo.c_dss_mask,
315 XE_MAX_DSS_FUSE_BITS);
316
317 get_num_dss_regs(xe, &g_dss_regs, &c_dss_regs);
318 dss_per_quad = 32 * max(g_dss_regs, c_dss_regs) / 4;
319
320 quad_first = xe_dss_mask_group_ffs(all_dss, dss_per_quad, quad);
321
322 return quad_first < (quad + 1) * dss_per_quad;
323 }
324
xe_gt_has_geometry_dss(struct xe_gt * gt,unsigned int dss)325 bool xe_gt_has_geometry_dss(struct xe_gt *gt, unsigned int dss)
326 {
327 return test_bit(dss, gt->fuse_topo.g_dss_mask);
328 }
329
xe_gt_has_compute_dss(struct xe_gt * gt,unsigned int dss)330 bool xe_gt_has_compute_dss(struct xe_gt *gt, unsigned int dss)
331 {
332 return test_bit(dss, gt->fuse_topo.c_dss_mask);
333 }
334