1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2022 Intel Corporation 4 */ 5 6 #include "xe_gt_topology.h" 7 8 #include <generated/xe_wa_oob.h> 9 #include <linux/bitmap.h> 10 #include <linux/compiler.h> 11 12 #include "regs/xe_gt_regs.h" 13 #include "xe_assert.h" 14 #include "xe_gt.h" 15 #include "xe_mmio.h" 16 #include "xe_wa.h" 17 18 static void 19 load_dss_mask(struct xe_gt *gt, xe_dss_mask_t mask, int numregs, ...) 20 { 21 va_list argp; 22 u32 fuse_val[XE_MAX_DSS_FUSE_REGS] = {}; 23 int i; 24 25 if (drm_WARN_ON(>_to_xe(gt)->drm, numregs > XE_MAX_DSS_FUSE_REGS)) 26 numregs = XE_MAX_DSS_FUSE_REGS; 27 28 va_start(argp, numregs); 29 for (i = 0; i < numregs; i++) 30 fuse_val[i] = xe_mmio_read32(>->mmio, va_arg(argp, struct xe_reg)); 31 va_end(argp); 32 33 bitmap_from_arr32(mask, fuse_val, numregs * 32); 34 } 35 36 static void 37 load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask, enum xe_gt_eu_type *eu_type) 38 { 39 struct xe_device *xe = gt_to_xe(gt); 40 u32 reg_val = xe_mmio_read32(>->mmio, XELP_EU_ENABLE); 41 u32 val = 0; 42 int i; 43 44 BUILD_BUG_ON(XE_MAX_EU_FUSE_REGS > 1); 45 46 /* 47 * Pre-Xe_HP platforms inverted the bit meaning (disable instead 48 * of enable). 49 */ 50 if (GRAPHICS_VERx100(xe) < 1250) 51 reg_val = ~reg_val & XELP_EU_MASK; 52 53 if (GRAPHICS_VERx100(xe) == 1260 || GRAPHICS_VER(xe) >= 20) { 54 /* SIMD16 EUs, one bit == one EU */ 55 *eu_type = XE_GT_EU_TYPE_SIMD16; 56 val = reg_val; 57 } else { 58 /* SIMD8 EUs, one bit == 2 EU */ 59 *eu_type = XE_GT_EU_TYPE_SIMD8; 60 for (i = 0; i < fls(reg_val); i++) 61 if (reg_val & BIT(i)) 62 val |= 0x3 << 2 * i; 63 } 64 65 bitmap_from_arr32(mask, &val, XE_MAX_EU_FUSE_BITS); 66 } 67 68 /** 69 * gen_l3_mask_from_pattern - Replicate a bit pattern according to a mask 70 * 71 * It is used to compute the L3 bank masks in a generic format on 72 * various platforms where the internal representation of L3 node 73 * and masks from registers are different. 74 * 75 * @xe: device 76 * @dst: destination 77 * @pattern: pattern to replicate 78 * @patternbits: size of the pattern, in bits 79 * @mask: mask describing where to replicate the pattern 80 * 81 * Example 1: 82 * ---------- 83 * @pattern = 0b1111 84 * └┬─┘ 85 * @patternbits = 4 (bits) 86 * @mask = 0b0101 87 * ││││ 88 * │││└────────────────── 0b1111 (=1×0b1111) 89 * ││└──────────── 0b0000 │ (=0×0b1111) 90 * │└────── 0b1111 │ │ (=1×0b1111) 91 * └ 0b0000 │ │ │ (=0×0b1111) 92 * │ │ │ │ 93 * @dst = 0b0000 0b1111 0b0000 0b1111 94 * 95 * Example 2: 96 * ---------- 97 * @pattern = 0b11111111 98 * └┬─────┘ 99 * @patternbits = 8 (bits) 100 * @mask = 0b10 101 * ││ 102 * ││ 103 * ││ 104 * │└────────── 0b00000000 (=0×0b11111111) 105 * └ 0b11111111 │ (=1×0b11111111) 106 * │ │ 107 * @dst = 0b11111111 0b00000000 108 */ 109 static void 110 gen_l3_mask_from_pattern(struct xe_device *xe, xe_l3_bank_mask_t dst, 111 xe_l3_bank_mask_t pattern, int patternbits, 112 unsigned long mask) 113 { 114 unsigned long bit; 115 116 xe_assert(xe, find_last_bit(pattern, XE_MAX_L3_BANK_MASK_BITS) < patternbits || 117 bitmap_empty(pattern, XE_MAX_L3_BANK_MASK_BITS)); 118 xe_assert(xe, !mask || patternbits * (__fls(mask) + 1) <= XE_MAX_L3_BANK_MASK_BITS); 119 for_each_set_bit(bit, &mask, 32) { 120 xe_l3_bank_mask_t shifted_pattern = {}; 121 122 bitmap_shift_left(shifted_pattern, pattern, bit * patternbits, 123 XE_MAX_L3_BANK_MASK_BITS); 124 bitmap_or(dst, dst, shifted_pattern, XE_MAX_L3_BANK_MASK_BITS); 125 } 126 } 127 128 static void 129 load_l3_bank_mask(struct xe_gt *gt, xe_l3_bank_mask_t l3_bank_mask) 130 { 131 struct xe_device *xe = gt_to_xe(gt); 132 struct xe_mmio *mmio = >->mmio; 133 u32 fuse3 = xe_mmio_read32(mmio, MIRROR_FUSE3); 134 135 /* 136 * PTL platforms with media version 30.00 do not provide proper values 137 * for the media GT's L3 bank registers. Skip the readout since we 138 * don't have any way to obtain real values. 139 * 140 * This may get re-described as an official workaround in the future, 141 * but there's no tracking number assigned yet so we use a custom 142 * OOB workaround descriptor. 143 */ 144 if (XE_WA(gt, no_media_l3)) 145 return; 146 147 if (GRAPHICS_VER(xe) >= 30) { 148 xe_l3_bank_mask_t per_node = {}; 149 u32 meml3_en = REG_FIELD_GET(XE2_NODE_ENABLE_MASK, fuse3); 150 u32 mirror_l3bank_enable = xe_mmio_read32(mmio, MIRROR_L3BANK_ENABLE); 151 u32 bank_val = REG_FIELD_GET(XE3_L3BANK_ENABLE, mirror_l3bank_enable); 152 153 bitmap_from_arr32(per_node, &bank_val, 32); 154 gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 32, 155 meml3_en); 156 } else if (GRAPHICS_VER(xe) >= 20) { 157 xe_l3_bank_mask_t per_node = {}; 158 u32 meml3_en = REG_FIELD_GET(XE2_NODE_ENABLE_MASK, fuse3); 159 u32 bank_val = REG_FIELD_GET(XE2_GT_L3_MODE_MASK, fuse3); 160 161 bitmap_from_arr32(per_node, &bank_val, 32); 162 gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 4, 163 meml3_en); 164 } else if (GRAPHICS_VERx100(xe) >= 1270) { 165 xe_l3_bank_mask_t per_node = {}; 166 xe_l3_bank_mask_t per_mask_bit = {}; 167 u32 meml3_en = REG_FIELD_GET(MEML3_EN_MASK, fuse3); 168 u32 fuse4 = xe_mmio_read32(mmio, XEHP_FUSE4); 169 u32 bank_val = REG_FIELD_GET(GT_L3_EXC_MASK, fuse4); 170 171 bitmap_set_value8(per_mask_bit, 0x3, 0); 172 gen_l3_mask_from_pattern(xe, per_node, per_mask_bit, 2, bank_val); 173 gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 4, 174 meml3_en); 175 } else if (xe->info.platform == XE_PVC) { 176 xe_l3_bank_mask_t per_node = {}; 177 xe_l3_bank_mask_t per_mask_bit = {}; 178 u32 meml3_en = REG_FIELD_GET(MEML3_EN_MASK, fuse3); 179 u32 bank_val = REG_FIELD_GET(XEHPC_GT_L3_MODE_MASK, fuse3); 180 181 bitmap_set_value8(per_mask_bit, 0xf, 0); 182 gen_l3_mask_from_pattern(xe, per_node, per_mask_bit, 4, 183 bank_val); 184 gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 16, 185 meml3_en); 186 } else if (xe->info.platform == XE_DG2) { 187 xe_l3_bank_mask_t per_node = {}; 188 u32 mask = REG_FIELD_GET(MEML3_EN_MASK, fuse3); 189 190 bitmap_set_value8(per_node, 0xff, 0); 191 gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 8, mask); 192 } else { 193 /* 1:1 register bit to mask bit (inverted register bits) */ 194 u32 mask = REG_FIELD_GET(XELP_GT_L3_MODE_MASK, ~fuse3); 195 196 bitmap_from_arr32(l3_bank_mask, &mask, 32); 197 } 198 } 199 200 static void 201 get_num_dss_regs(struct xe_device *xe, int *geometry_regs, int *compute_regs) 202 { 203 if (GRAPHICS_VER(xe) > 20) { 204 *geometry_regs = 3; 205 *compute_regs = 3; 206 } else if (GRAPHICS_VERx100(xe) == 1260) { 207 *geometry_regs = 0; 208 *compute_regs = 2; 209 } else if (GRAPHICS_VERx100(xe) >= 1250) { 210 *geometry_regs = 1; 211 *compute_regs = 1; 212 } else { 213 *geometry_regs = 1; 214 *compute_regs = 0; 215 } 216 } 217 218 void 219 xe_gt_topology_init(struct xe_gt *gt) 220 { 221 struct xe_device *xe = gt_to_xe(gt); 222 struct drm_printer p; 223 int num_geometry_regs, num_compute_regs; 224 225 get_num_dss_regs(xe, &num_geometry_regs, &num_compute_regs); 226 227 /* 228 * Register counts returned shouldn't exceed the number of registers 229 * passed as parameters below. 230 */ 231 drm_WARN_ON(&xe->drm, num_geometry_regs > 3); 232 drm_WARN_ON(&xe->drm, num_compute_regs > 3); 233 234 load_dss_mask(gt, gt->fuse_topo.g_dss_mask, 235 num_geometry_regs, 236 XELP_GT_GEOMETRY_DSS_ENABLE, 237 XE2_GT_GEOMETRY_DSS_1, 238 XE2_GT_GEOMETRY_DSS_2); 239 load_dss_mask(gt, gt->fuse_topo.c_dss_mask, num_compute_regs, 240 XEHP_GT_COMPUTE_DSS_ENABLE, 241 XEHPC_GT_COMPUTE_DSS_ENABLE_EXT, 242 XE2_GT_COMPUTE_DSS_2); 243 load_eu_mask(gt, gt->fuse_topo.eu_mask_per_dss, >->fuse_topo.eu_type); 244 load_l3_bank_mask(gt, gt->fuse_topo.l3_bank_mask); 245 246 p = drm_dbg_printer(>_to_xe(gt)->drm, DRM_UT_DRIVER, "GT topology"); 247 248 xe_gt_topology_dump(gt, &p); 249 } 250 251 static const char *eu_type_to_str(enum xe_gt_eu_type eu_type) 252 { 253 switch (eu_type) { 254 case XE_GT_EU_TYPE_SIMD16: 255 return "simd16"; 256 case XE_GT_EU_TYPE_SIMD8: 257 return "simd8"; 258 } 259 260 return NULL; 261 } 262 263 void 264 xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p) 265 { 266 drm_printf(p, "dss mask (geometry): %*pb\n", XE_MAX_DSS_FUSE_BITS, 267 gt->fuse_topo.g_dss_mask); 268 drm_printf(p, "dss mask (compute): %*pb\n", XE_MAX_DSS_FUSE_BITS, 269 gt->fuse_topo.c_dss_mask); 270 271 drm_printf(p, "EU mask per DSS: %*pb\n", XE_MAX_EU_FUSE_BITS, 272 gt->fuse_topo.eu_mask_per_dss); 273 drm_printf(p, "EU type: %s\n", 274 eu_type_to_str(gt->fuse_topo.eu_type)); 275 276 drm_printf(p, "L3 bank mask: %*pb\n", XE_MAX_L3_BANK_MASK_BITS, 277 gt->fuse_topo.l3_bank_mask); 278 } 279 280 /* 281 * Used to obtain the index of the first DSS. Can start searching from the 282 * beginning of a specific dss group (e.g., gslice, cslice, etc.) if 283 * groupsize and groupnum are non-zero. 284 */ 285 unsigned int 286 xe_dss_mask_group_ffs(const xe_dss_mask_t mask, int groupsize, int groupnum) 287 { 288 return find_next_bit(mask, XE_MAX_DSS_FUSE_BITS, groupnum * groupsize); 289 } 290 291 bool xe_dss_mask_empty(const xe_dss_mask_t mask) 292 { 293 return bitmap_empty(mask, XE_MAX_DSS_FUSE_BITS); 294 } 295 296 /** 297 * xe_gt_topology_has_dss_in_quadrant - check fusing of DSS in GT quadrant 298 * @gt: GT to check 299 * @quad: Which quadrant of the DSS space to check 300 * 301 * Since Xe_HP platforms can have up to four CCS engines, those engines 302 * are each logically associated with a quarter of the possible DSS. If there 303 * are no DSS present in one of the four quadrants of the DSS space, the 304 * corresponding CCS engine is also not available for use. 305 * 306 * Returns false if all DSS in a quadrant of the GT are fused off, else true. 307 */ 308 bool xe_gt_topology_has_dss_in_quadrant(struct xe_gt *gt, int quad) 309 { 310 struct xe_device *xe = gt_to_xe(gt); 311 xe_dss_mask_t all_dss; 312 int g_dss_regs, c_dss_regs, dss_per_quad, quad_first; 313 314 bitmap_or(all_dss, gt->fuse_topo.g_dss_mask, gt->fuse_topo.c_dss_mask, 315 XE_MAX_DSS_FUSE_BITS); 316 317 get_num_dss_regs(xe, &g_dss_regs, &c_dss_regs); 318 dss_per_quad = 32 * max(g_dss_regs, c_dss_regs) / 4; 319 320 quad_first = xe_dss_mask_group_ffs(all_dss, dss_per_quad, quad); 321 322 return quad_first < (quad + 1) * dss_per_quad; 323 } 324 325 bool xe_gt_has_geometry_dss(struct xe_gt *gt, unsigned int dss) 326 { 327 return test_bit(dss, gt->fuse_topo.g_dss_mask); 328 } 329 330 bool xe_gt_has_compute_dss(struct xe_gt *gt, unsigned int dss) 331 { 332 return test_bit(dss, gt->fuse_topo.c_dss_mask); 333 } 334