1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. */
3
4 #include <linux/ascii85.h>
5 #include "msm_gem.h"
6 #include "a6xx_gpu.h"
7 #include "a6xx_gmu.h"
8 #include "a6xx_gpu_state.h"
9 #include "a6xx_gmu.xml.h"
10
11 static const unsigned int *gen7_0_0_external_core_regs[] __always_unused;
12 static const unsigned int *gen7_2_0_external_core_regs[] __always_unused;
13 static const unsigned int *gen7_9_0_external_core_regs[] __always_unused;
14 static struct gen7_sptp_cluster_registers gen7_9_0_sptp_clusters[] __always_unused;
15 static const u32 gen7_9_0_cx_debugbus_blocks[] __always_unused;
16
17 #include "adreno_gen7_0_0_snapshot.h"
18 #include "adreno_gen7_2_0_snapshot.h"
19 #include "adreno_gen7_9_0_snapshot.h"
20
21 struct a6xx_gpu_state_obj {
22 const void *handle;
23 u32 *data;
24 u32 count; /* optional, used when count potentially read from hw */
25 };
26
27 struct a6xx_gpu_state {
28 struct msm_gpu_state base;
29
30 struct a6xx_gpu_state_obj *gmu_registers;
31 int nr_gmu_registers;
32
33 struct a6xx_gpu_state_obj *registers;
34 int nr_registers;
35
36 struct a6xx_gpu_state_obj *shaders;
37 int nr_shaders;
38
39 struct a6xx_gpu_state_obj *clusters;
40 int nr_clusters;
41
42 struct a6xx_gpu_state_obj *dbgahb_clusters;
43 int nr_dbgahb_clusters;
44
45 struct a6xx_gpu_state_obj *indexed_regs;
46 int nr_indexed_regs;
47
48 struct a6xx_gpu_state_obj *debugbus;
49 int nr_debugbus;
50
51 struct a6xx_gpu_state_obj *vbif_debugbus;
52
53 struct a6xx_gpu_state_obj *cx_debugbus;
54 int nr_cx_debugbus;
55
56 struct msm_gpu_state_bo *gmu_log;
57 struct msm_gpu_state_bo *gmu_hfi;
58 struct msm_gpu_state_bo *gmu_debug;
59
60 s32 hfi_queue_history[2][HFI_HISTORY_SZ];
61
62 struct list_head objs;
63
64 bool gpu_initialized;
65 };
66
CRASHDUMP_WRITE(u64 * in,u32 reg,u32 val)67 static inline int CRASHDUMP_WRITE(u64 *in, u32 reg, u32 val)
68 {
69 in[0] = val;
70 in[1] = (((u64) reg) << 44 | (1 << 21) | 1);
71
72 return 2;
73 }
74
CRASHDUMP_READ(u64 * in,u32 reg,u32 dwords,u64 target)75 static inline int CRASHDUMP_READ(u64 *in, u32 reg, u32 dwords, u64 target)
76 {
77 in[0] = target;
78 in[1] = (((u64) reg) << 44 | dwords);
79
80 return 2;
81 }
82
CRASHDUMP_FINI(u64 * in)83 static inline int CRASHDUMP_FINI(u64 *in)
84 {
85 in[0] = 0;
86 in[1] = 0;
87
88 return 2;
89 }
90
91 struct a6xx_crashdumper {
92 void *ptr;
93 struct drm_gem_object *bo;
94 u64 iova;
95 };
96
97 struct a6xx_state_memobj {
98 struct list_head node;
99 unsigned long long data[];
100 };
101
state_kcalloc(struct a6xx_gpu_state * a6xx_state,int nr,size_t objsize)102 static void *state_kcalloc(struct a6xx_gpu_state *a6xx_state, int nr, size_t objsize)
103 {
104 struct a6xx_state_memobj *obj =
105 kvzalloc((nr * objsize) + sizeof(*obj), GFP_KERNEL);
106
107 if (!obj)
108 return NULL;
109
110 list_add_tail(&obj->node, &a6xx_state->objs);
111 return &obj->data;
112 }
113
state_kmemdup(struct a6xx_gpu_state * a6xx_state,void * src,size_t size)114 static void *state_kmemdup(struct a6xx_gpu_state *a6xx_state, void *src,
115 size_t size)
116 {
117 void *dst = state_kcalloc(a6xx_state, 1, size);
118
119 if (dst)
120 memcpy(dst, src, size);
121 return dst;
122 }
123
124 /*
125 * Allocate 1MB for the crashdumper scratch region - 8k for the script and
126 * the rest for the data
127 */
128 #define A6XX_CD_DATA_OFFSET 8192
129 #define A6XX_CD_DATA_SIZE (SZ_1M - 8192)
130
a6xx_crashdumper_init(struct msm_gpu * gpu,struct a6xx_crashdumper * dumper)131 static int a6xx_crashdumper_init(struct msm_gpu *gpu,
132 struct a6xx_crashdumper *dumper)
133 {
134 dumper->ptr = msm_gem_kernel_new(gpu->dev,
135 SZ_1M, MSM_BO_WC, gpu->aspace,
136 &dumper->bo, &dumper->iova);
137
138 if (!IS_ERR(dumper->ptr))
139 msm_gem_object_set_name(dumper->bo, "crashdump");
140
141 return PTR_ERR_OR_ZERO(dumper->ptr);
142 }
143
a6xx_crashdumper_run(struct msm_gpu * gpu,struct a6xx_crashdumper * dumper)144 static int a6xx_crashdumper_run(struct msm_gpu *gpu,
145 struct a6xx_crashdumper *dumper)
146 {
147 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
148 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
149 u32 val;
150 int ret;
151
152 if (IS_ERR_OR_NULL(dumper->ptr))
153 return -EINVAL;
154
155 if (!a6xx_gmu_sptprac_is_on(&a6xx_gpu->gmu))
156 return -EINVAL;
157
158 /* Make sure all pending memory writes are posted */
159 wmb();
160
161 gpu_write64(gpu, REG_A6XX_CP_CRASH_SCRIPT_BASE, dumper->iova);
162
163 gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 1);
164
165 ret = gpu_poll_timeout(gpu, REG_A6XX_CP_CRASH_DUMP_STATUS, val,
166 val & 0x02, 100, 10000);
167
168 gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 0);
169
170 return ret;
171 }
172
173 /* read a value from the GX debug bus */
debugbus_read(struct msm_gpu * gpu,u32 block,u32 offset,u32 * data)174 static int debugbus_read(struct msm_gpu *gpu, u32 block, u32 offset,
175 u32 *data)
176 {
177 u32 reg = A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX(offset) |
178 A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL(block);
179
180 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_A, reg);
181 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_B, reg);
182 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_C, reg);
183 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_D, reg);
184
185 /* Wait 1 us to make sure the data is flowing */
186 udelay(1);
187
188 data[0] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF2);
189 data[1] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF1);
190
191 return 2;
192 }
193
194 #define cxdbg_write(ptr, offset, val) \
195 writel((val), (ptr) + ((offset) << 2))
196
197 #define cxdbg_read(ptr, offset) \
198 readl((ptr) + ((offset) << 2))
199
200 /* read a value from the CX debug bus */
cx_debugbus_read(void __iomem * cxdbg,u32 block,u32 offset,u32 * data)201 static int cx_debugbus_read(void __iomem *cxdbg, u32 block, u32 offset,
202 u32 *data)
203 {
204 u32 reg = A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX(offset) |
205 A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL(block);
206
207 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_A, reg);
208 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_B, reg);
209 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_C, reg);
210 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_D, reg);
211
212 /* Wait 1 us to make sure the data is flowing */
213 udelay(1);
214
215 data[0] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF2);
216 data[1] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF1);
217
218 return 2;
219 }
220
221 /* Read a chunk of data from the VBIF debug bus */
vbif_debugbus_read(struct msm_gpu * gpu,u32 ctrl0,u32 ctrl1,u32 reg,int count,u32 * data)222 static int vbif_debugbus_read(struct msm_gpu *gpu, u32 ctrl0, u32 ctrl1,
223 u32 reg, int count, u32 *data)
224 {
225 int i;
226
227 gpu_write(gpu, ctrl0, reg);
228
229 for (i = 0; i < count; i++) {
230 gpu_write(gpu, ctrl1, i);
231 data[i] = gpu_read(gpu, REG_A6XX_VBIF_TEST_BUS_OUT);
232 }
233
234 return count;
235 }
236
237 #define AXI_ARB_BLOCKS 2
238 #define XIN_AXI_BLOCKS 5
239 #define XIN_CORE_BLOCKS 4
240
241 #define VBIF_DEBUGBUS_BLOCK_SIZE \
242 ((16 * AXI_ARB_BLOCKS) + \
243 (18 * XIN_AXI_BLOCKS) + \
244 (12 * XIN_CORE_BLOCKS))
245
a6xx_get_vbif_debugbus_block(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,struct a6xx_gpu_state_obj * obj)246 static void a6xx_get_vbif_debugbus_block(struct msm_gpu *gpu,
247 struct a6xx_gpu_state *a6xx_state,
248 struct a6xx_gpu_state_obj *obj)
249 {
250 u32 clk, *ptr;
251 int i;
252
253 obj->data = state_kcalloc(a6xx_state, VBIF_DEBUGBUS_BLOCK_SIZE,
254 sizeof(u32));
255 if (!obj->data)
256 return;
257
258 obj->handle = NULL;
259
260 /* Get the current clock setting */
261 clk = gpu_read(gpu, REG_A6XX_VBIF_CLKON);
262
263 /* Force on the bus so we can read it */
264 gpu_write(gpu, REG_A6XX_VBIF_CLKON,
265 clk | A6XX_VBIF_CLKON_FORCE_ON_TESTBUS);
266
267 /* We will read from BUS2 first, so disable BUS1 */
268 gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS1_CTRL0, 0);
269
270 /* Enable the VBIF bus for reading */
271 gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS_OUT_CTRL, 1);
272
273 ptr = obj->data;
274
275 for (i = 0; i < AXI_ARB_BLOCKS; i++)
276 ptr += vbif_debugbus_read(gpu,
277 REG_A6XX_VBIF_TEST_BUS2_CTRL0,
278 REG_A6XX_VBIF_TEST_BUS2_CTRL1,
279 1 << (i + 16), 16, ptr);
280
281 for (i = 0; i < XIN_AXI_BLOCKS; i++)
282 ptr += vbif_debugbus_read(gpu,
283 REG_A6XX_VBIF_TEST_BUS2_CTRL0,
284 REG_A6XX_VBIF_TEST_BUS2_CTRL1,
285 1 << i, 18, ptr);
286
287 /* Stop BUS2 so we can turn on BUS1 */
288 gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS2_CTRL0, 0);
289
290 for (i = 0; i < XIN_CORE_BLOCKS; i++)
291 ptr += vbif_debugbus_read(gpu,
292 REG_A6XX_VBIF_TEST_BUS1_CTRL0,
293 REG_A6XX_VBIF_TEST_BUS1_CTRL1,
294 1 << i, 12, ptr);
295
296 /* Restore the VBIF clock setting */
297 gpu_write(gpu, REG_A6XX_VBIF_CLKON, clk);
298 }
299
a6xx_get_debugbus_block(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,const struct a6xx_debugbus_block * block,struct a6xx_gpu_state_obj * obj)300 static void a6xx_get_debugbus_block(struct msm_gpu *gpu,
301 struct a6xx_gpu_state *a6xx_state,
302 const struct a6xx_debugbus_block *block,
303 struct a6xx_gpu_state_obj *obj)
304 {
305 int i;
306 u32 *ptr;
307
308 obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64));
309 if (!obj->data)
310 return;
311
312 obj->handle = block;
313
314 for (ptr = obj->data, i = 0; i < block->count; i++)
315 ptr += debugbus_read(gpu, block->id, i, ptr);
316 }
317
a6xx_get_cx_debugbus_block(void __iomem * cxdbg,struct a6xx_gpu_state * a6xx_state,const struct a6xx_debugbus_block * block,struct a6xx_gpu_state_obj * obj)318 static void a6xx_get_cx_debugbus_block(void __iomem *cxdbg,
319 struct a6xx_gpu_state *a6xx_state,
320 const struct a6xx_debugbus_block *block,
321 struct a6xx_gpu_state_obj *obj)
322 {
323 int i;
324 u32 *ptr;
325
326 obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64));
327 if (!obj->data)
328 return;
329
330 obj->handle = block;
331
332 for (ptr = obj->data, i = 0; i < block->count; i++)
333 ptr += cx_debugbus_read(cxdbg, block->id, i, ptr);
334 }
335
a6xx_get_debugbus_blocks(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state)336 static void a6xx_get_debugbus_blocks(struct msm_gpu *gpu,
337 struct a6xx_gpu_state *a6xx_state)
338 {
339 int nr_debugbus_blocks = ARRAY_SIZE(a6xx_debugbus_blocks) +
340 (a6xx_has_gbif(to_adreno_gpu(gpu)) ? 1 : 0);
341
342 if (adreno_is_a650_family(to_adreno_gpu(gpu)))
343 nr_debugbus_blocks += ARRAY_SIZE(a650_debugbus_blocks);
344
345 a6xx_state->debugbus = state_kcalloc(a6xx_state, nr_debugbus_blocks,
346 sizeof(*a6xx_state->debugbus));
347
348 if (a6xx_state->debugbus) {
349 int i;
350
351 for (i = 0; i < ARRAY_SIZE(a6xx_debugbus_blocks); i++)
352 a6xx_get_debugbus_block(gpu,
353 a6xx_state,
354 &a6xx_debugbus_blocks[i],
355 &a6xx_state->debugbus[i]);
356
357 a6xx_state->nr_debugbus = ARRAY_SIZE(a6xx_debugbus_blocks);
358
359 /*
360 * GBIF has same debugbus as of other GPU blocks, fall back to
361 * default path if GPU uses GBIF, also GBIF uses exactly same
362 * ID as of VBIF.
363 */
364 if (a6xx_has_gbif(to_adreno_gpu(gpu))) {
365 a6xx_get_debugbus_block(gpu, a6xx_state,
366 &a6xx_gbif_debugbus_block,
367 &a6xx_state->debugbus[i]);
368
369 a6xx_state->nr_debugbus += 1;
370 }
371
372
373 if (adreno_is_a650_family(to_adreno_gpu(gpu))) {
374 for (i = 0; i < ARRAY_SIZE(a650_debugbus_blocks); i++)
375 a6xx_get_debugbus_block(gpu,
376 a6xx_state,
377 &a650_debugbus_blocks[i],
378 &a6xx_state->debugbus[i]);
379 }
380 }
381 }
382
a7xx_get_debugbus_blocks(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state)383 static void a7xx_get_debugbus_blocks(struct msm_gpu *gpu,
384 struct a6xx_gpu_state *a6xx_state)
385 {
386 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
387 int debugbus_blocks_count, gbif_debugbus_blocks_count, total_debugbus_blocks;
388 const u32 *debugbus_blocks, *gbif_debugbus_blocks;
389 int i;
390
391 if (adreno_gpu->info->family == ADRENO_7XX_GEN1) {
392 debugbus_blocks = gen7_0_0_debugbus_blocks;
393 debugbus_blocks_count = ARRAY_SIZE(gen7_0_0_debugbus_blocks);
394 gbif_debugbus_blocks = a7xx_gbif_debugbus_blocks;
395 gbif_debugbus_blocks_count = ARRAY_SIZE(a7xx_gbif_debugbus_blocks);
396 } else if (adreno_gpu->info->family == ADRENO_7XX_GEN2) {
397 debugbus_blocks = gen7_2_0_debugbus_blocks;
398 debugbus_blocks_count = ARRAY_SIZE(gen7_2_0_debugbus_blocks);
399 gbif_debugbus_blocks = a7xx_gbif_debugbus_blocks;
400 gbif_debugbus_blocks_count = ARRAY_SIZE(a7xx_gbif_debugbus_blocks);
401 } else {
402 BUG_ON(adreno_gpu->info->family != ADRENO_7XX_GEN3);
403 debugbus_blocks = gen7_9_0_debugbus_blocks;
404 debugbus_blocks_count = ARRAY_SIZE(gen7_9_0_debugbus_blocks);
405 gbif_debugbus_blocks = gen7_9_0_gbif_debugbus_blocks;
406 gbif_debugbus_blocks_count = ARRAY_SIZE(gen7_9_0_gbif_debugbus_blocks);
407 }
408
409 total_debugbus_blocks = debugbus_blocks_count + gbif_debugbus_blocks_count;
410
411 a6xx_state->debugbus = state_kcalloc(a6xx_state, total_debugbus_blocks,
412 sizeof(*a6xx_state->debugbus));
413
414 if (a6xx_state->debugbus) {
415 for (i = 0; i < debugbus_blocks_count; i++) {
416 a6xx_get_debugbus_block(gpu,
417 a6xx_state, &a7xx_debugbus_blocks[debugbus_blocks[i]],
418 &a6xx_state->debugbus[i]);
419 }
420
421 for (i = 0; i < gbif_debugbus_blocks_count; i++) {
422 a6xx_get_debugbus_block(gpu,
423 a6xx_state, &a7xx_debugbus_blocks[gbif_debugbus_blocks[i]],
424 &a6xx_state->debugbus[i + debugbus_blocks_count]);
425 }
426 }
427
428 }
429
a6xx_get_debugbus(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state)430 static void a6xx_get_debugbus(struct msm_gpu *gpu,
431 struct a6xx_gpu_state *a6xx_state)
432 {
433 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
434 struct resource *res;
435 void __iomem *cxdbg = NULL;
436
437 /* Set up the GX debug bus */
438
439 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLT,
440 A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));
441
442 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLM,
443 A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));
444
445 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_0, 0);
446 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_1, 0);
447 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_2, 0);
448 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_3, 0);
449
450 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_0, 0x76543210);
451 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_1, 0xFEDCBA98);
452
453 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_0, 0);
454 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_1, 0);
455 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_2, 0);
456 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_3, 0);
457
458 /* Set up the CX debug bus - it lives elsewhere in the system so do a
459 * temporary ioremap for the registers
460 */
461 res = platform_get_resource_byname(gpu->pdev, IORESOURCE_MEM,
462 "cx_dbgc");
463
464 if (res)
465 cxdbg = ioremap(res->start, resource_size(res));
466
467 if (cxdbg) {
468 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLT,
469 A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));
470
471 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLM,
472 A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));
473
474 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_0, 0);
475 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_1, 0);
476 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_2, 0);
477 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_3, 0);
478
479 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0,
480 0x76543210);
481 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1,
482 0xFEDCBA98);
483
484 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_0, 0);
485 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_1, 0);
486 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_2, 0);
487 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_3, 0);
488 }
489
490 if (adreno_is_a7xx(adreno_gpu)) {
491 a7xx_get_debugbus_blocks(gpu, a6xx_state);
492 } else {
493 a6xx_get_debugbus_blocks(gpu, a6xx_state);
494 }
495
496 /* Dump the VBIF debugbus on applicable targets */
497 if (!a6xx_has_gbif(adreno_gpu)) {
498 a6xx_state->vbif_debugbus =
499 state_kcalloc(a6xx_state, 1,
500 sizeof(*a6xx_state->vbif_debugbus));
501
502 if (a6xx_state->vbif_debugbus)
503 a6xx_get_vbif_debugbus_block(gpu, a6xx_state,
504 a6xx_state->vbif_debugbus);
505 }
506
507 if (cxdbg) {
508 unsigned nr_cx_debugbus_blocks;
509 const struct a6xx_debugbus_block *cx_debugbus_blocks;
510
511 if (adreno_is_a7xx(adreno_gpu)) {
512 BUG_ON(adreno_gpu->info->family > ADRENO_7XX_GEN3);
513 cx_debugbus_blocks = a7xx_cx_debugbus_blocks;
514 nr_cx_debugbus_blocks = ARRAY_SIZE(a7xx_cx_debugbus_blocks);
515 } else {
516 cx_debugbus_blocks = a6xx_cx_debugbus_blocks;
517 nr_cx_debugbus_blocks = ARRAY_SIZE(a6xx_cx_debugbus_blocks);
518 }
519
520 a6xx_state->cx_debugbus =
521 state_kcalloc(a6xx_state,
522 nr_cx_debugbus_blocks,
523 sizeof(*a6xx_state->cx_debugbus));
524
525 if (a6xx_state->cx_debugbus) {
526 int i;
527
528 for (i = 0; i < nr_cx_debugbus_blocks; i++)
529 a6xx_get_cx_debugbus_block(cxdbg,
530 a6xx_state,
531 &cx_debugbus_blocks[i],
532 &a6xx_state->cx_debugbus[i]);
533
534 a6xx_state->nr_cx_debugbus =
535 nr_cx_debugbus_blocks;
536 }
537
538 iounmap(cxdbg);
539 }
540 }
541
542 #define RANGE(reg, a) ((reg)[(a) + 1] - (reg)[(a)] + 1)
543
544 /* Read a data cluster from behind the AHB aperture */
a6xx_get_dbgahb_cluster(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,const struct a6xx_dbgahb_cluster * dbgahb,struct a6xx_gpu_state_obj * obj,struct a6xx_crashdumper * dumper)545 static void a6xx_get_dbgahb_cluster(struct msm_gpu *gpu,
546 struct a6xx_gpu_state *a6xx_state,
547 const struct a6xx_dbgahb_cluster *dbgahb,
548 struct a6xx_gpu_state_obj *obj,
549 struct a6xx_crashdumper *dumper)
550 {
551 u64 *in = dumper->ptr;
552 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
553 size_t datasize;
554 int i, regcount = 0;
555
556 for (i = 0; i < A6XX_NUM_CONTEXTS; i++) {
557 int j;
558
559 in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL,
560 (dbgahb->statetype + i * 2) << 8);
561
562 for (j = 0; j < dbgahb->count; j += 2) {
563 int count = RANGE(dbgahb->registers, j);
564 u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE +
565 dbgahb->registers[j] - (dbgahb->base >> 2);
566
567 in += CRASHDUMP_READ(in, offset, count, out);
568
569 out += count * sizeof(u32);
570
571 if (i == 0)
572 regcount += count;
573 }
574 }
575
576 CRASHDUMP_FINI(in);
577
578 datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32);
579
580 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
581 return;
582
583 if (a6xx_crashdumper_run(gpu, dumper))
584 return;
585
586 obj->handle = dbgahb;
587 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
588 datasize);
589 }
590
a7xx_get_dbgahb_cluster(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,const struct gen7_sptp_cluster_registers * dbgahb,struct a6xx_gpu_state_obj * obj,struct a6xx_crashdumper * dumper)591 static void a7xx_get_dbgahb_cluster(struct msm_gpu *gpu,
592 struct a6xx_gpu_state *a6xx_state,
593 const struct gen7_sptp_cluster_registers *dbgahb,
594 struct a6xx_gpu_state_obj *obj,
595 struct a6xx_crashdumper *dumper)
596 {
597 u64 *in = dumper->ptr;
598 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
599 size_t datasize;
600 int i, regcount = 0;
601
602 in += CRASHDUMP_WRITE(in, REG_A7XX_SP_READ_SEL,
603 A7XX_SP_READ_SEL_LOCATION(dbgahb->location_id) |
604 A7XX_SP_READ_SEL_PIPE(dbgahb->pipe_id) |
605 A7XX_SP_READ_SEL_STATETYPE(dbgahb->statetype));
606
607 for (i = 0; dbgahb->regs[i] != UINT_MAX; i += 2) {
608 int count = RANGE(dbgahb->regs, i);
609 u32 offset = REG_A7XX_SP_AHB_READ_APERTURE +
610 dbgahb->regs[i] - dbgahb->regbase;
611
612 in += CRASHDUMP_READ(in, offset, count, out);
613
614 out += count * sizeof(u32);
615 regcount += count;
616 }
617
618 CRASHDUMP_FINI(in);
619
620 datasize = regcount * sizeof(u32);
621
622 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
623 return;
624
625 if (a6xx_crashdumper_run(gpu, dumper))
626 return;
627
628 obj->handle = dbgahb;
629 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
630 datasize);
631 }
632
a6xx_get_dbgahb_clusters(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,struct a6xx_crashdumper * dumper)633 static void a6xx_get_dbgahb_clusters(struct msm_gpu *gpu,
634 struct a6xx_gpu_state *a6xx_state,
635 struct a6xx_crashdumper *dumper)
636 {
637 int i;
638
639 a6xx_state->dbgahb_clusters = state_kcalloc(a6xx_state,
640 ARRAY_SIZE(a6xx_dbgahb_clusters),
641 sizeof(*a6xx_state->dbgahb_clusters));
642
643 if (!a6xx_state->dbgahb_clusters)
644 return;
645
646 a6xx_state->nr_dbgahb_clusters = ARRAY_SIZE(a6xx_dbgahb_clusters);
647
648 for (i = 0; i < ARRAY_SIZE(a6xx_dbgahb_clusters); i++)
649 a6xx_get_dbgahb_cluster(gpu, a6xx_state,
650 &a6xx_dbgahb_clusters[i],
651 &a6xx_state->dbgahb_clusters[i], dumper);
652 }
653
a7xx_get_dbgahb_clusters(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,struct a6xx_crashdumper * dumper)654 static void a7xx_get_dbgahb_clusters(struct msm_gpu *gpu,
655 struct a6xx_gpu_state *a6xx_state,
656 struct a6xx_crashdumper *dumper)
657 {
658 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
659 int i;
660 const struct gen7_sptp_cluster_registers *dbgahb_clusters;
661 unsigned dbgahb_clusters_size;
662
663 if (adreno_gpu->info->family == ADRENO_7XX_GEN1) {
664 dbgahb_clusters = gen7_0_0_sptp_clusters;
665 dbgahb_clusters_size = ARRAY_SIZE(gen7_0_0_sptp_clusters);
666 } else if (adreno_gpu->info->family == ADRENO_7XX_GEN2) {
667 dbgahb_clusters = gen7_2_0_sptp_clusters;
668 dbgahb_clusters_size = ARRAY_SIZE(gen7_2_0_sptp_clusters);
669 } else {
670 BUG_ON(adreno_gpu->info->family != ADRENO_7XX_GEN3);
671 dbgahb_clusters = gen7_9_0_sptp_clusters;
672 dbgahb_clusters_size = ARRAY_SIZE(gen7_9_0_sptp_clusters);
673 }
674
675 a6xx_state->dbgahb_clusters = state_kcalloc(a6xx_state,
676 dbgahb_clusters_size,
677 sizeof(*a6xx_state->dbgahb_clusters));
678
679 if (!a6xx_state->dbgahb_clusters)
680 return;
681
682 a6xx_state->nr_dbgahb_clusters = dbgahb_clusters_size;
683
684 for (i = 0; i < dbgahb_clusters_size; i++)
685 a7xx_get_dbgahb_cluster(gpu, a6xx_state,
686 &dbgahb_clusters[i],
687 &a6xx_state->dbgahb_clusters[i], dumper);
688 }
689
690 /* Read a data cluster from the CP aperture with the crashdumper */
a6xx_get_cluster(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,const struct a6xx_cluster * cluster,struct a6xx_gpu_state_obj * obj,struct a6xx_crashdumper * dumper)691 static void a6xx_get_cluster(struct msm_gpu *gpu,
692 struct a6xx_gpu_state *a6xx_state,
693 const struct a6xx_cluster *cluster,
694 struct a6xx_gpu_state_obj *obj,
695 struct a6xx_crashdumper *dumper)
696 {
697 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
698 u64 *in = dumper->ptr;
699 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
700 size_t datasize;
701 int i, regcount = 0;
702 u32 id = cluster->id;
703
704 /* Skip registers that are not present on older generation */
705 if (!adreno_is_a660_family(adreno_gpu) &&
706 cluster->registers == a660_fe_cluster)
707 return;
708
709 if (adreno_is_a650_family(adreno_gpu) &&
710 cluster->registers == a6xx_ps_cluster)
711 id = CLUSTER_VPC_PS;
712
713 /* Some clusters need a selector register to be programmed too */
714 if (cluster->sel_reg)
715 in += CRASHDUMP_WRITE(in, cluster->sel_reg, cluster->sel_val);
716
717 for (i = 0; i < A6XX_NUM_CONTEXTS; i++) {
718 int j;
719
720 in += CRASHDUMP_WRITE(in, REG_A6XX_CP_APERTURE_CNTL_CD,
721 (id << 8) | (i << 4) | i);
722
723 for (j = 0; j < cluster->count; j += 2) {
724 int count = RANGE(cluster->registers, j);
725
726 in += CRASHDUMP_READ(in, cluster->registers[j],
727 count, out);
728
729 out += count * sizeof(u32);
730
731 if (i == 0)
732 regcount += count;
733 }
734 }
735
736 CRASHDUMP_FINI(in);
737
738 datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32);
739
740 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
741 return;
742
743 if (a6xx_crashdumper_run(gpu, dumper))
744 return;
745
746 obj->handle = cluster;
747 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
748 datasize);
749 }
750
a7xx_get_cluster(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,const struct gen7_cluster_registers * cluster,struct a6xx_gpu_state_obj * obj,struct a6xx_crashdumper * dumper)751 static void a7xx_get_cluster(struct msm_gpu *gpu,
752 struct a6xx_gpu_state *a6xx_state,
753 const struct gen7_cluster_registers *cluster,
754 struct a6xx_gpu_state_obj *obj,
755 struct a6xx_crashdumper *dumper)
756 {
757 u64 *in = dumper->ptr;
758 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
759 size_t datasize;
760 int i, regcount = 0;
761
762 /* Some clusters need a selector register to be programmed too */
763 if (cluster->sel)
764 in += CRASHDUMP_WRITE(in, cluster->sel->cd_reg, cluster->sel->val);
765
766 in += CRASHDUMP_WRITE(in, REG_A7XX_CP_APERTURE_CNTL_CD,
767 A7XX_CP_APERTURE_CNTL_CD_PIPE(cluster->pipe_id) |
768 A7XX_CP_APERTURE_CNTL_CD_CLUSTER(cluster->cluster_id) |
769 A7XX_CP_APERTURE_CNTL_CD_CONTEXT(cluster->context_id));
770
771 for (i = 0; cluster->regs[i] != UINT_MAX; i += 2) {
772 int count = RANGE(cluster->regs, i);
773
774 in += CRASHDUMP_READ(in, cluster->regs[i],
775 count, out);
776
777 out += count * sizeof(u32);
778 regcount += count;
779 }
780
781 CRASHDUMP_FINI(in);
782
783 datasize = regcount * sizeof(u32);
784
785 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
786 return;
787
788 if (a6xx_crashdumper_run(gpu, dumper))
789 return;
790
791 obj->handle = cluster;
792 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
793 datasize);
794 }
795
a6xx_get_clusters(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,struct a6xx_crashdumper * dumper)796 static void a6xx_get_clusters(struct msm_gpu *gpu,
797 struct a6xx_gpu_state *a6xx_state,
798 struct a6xx_crashdumper *dumper)
799 {
800 int i;
801
802 a6xx_state->clusters = state_kcalloc(a6xx_state,
803 ARRAY_SIZE(a6xx_clusters), sizeof(*a6xx_state->clusters));
804
805 if (!a6xx_state->clusters)
806 return;
807
808 a6xx_state->nr_clusters = ARRAY_SIZE(a6xx_clusters);
809
810 for (i = 0; i < ARRAY_SIZE(a6xx_clusters); i++)
811 a6xx_get_cluster(gpu, a6xx_state, &a6xx_clusters[i],
812 &a6xx_state->clusters[i], dumper);
813 }
814
a7xx_get_clusters(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,struct a6xx_crashdumper * dumper)815 static void a7xx_get_clusters(struct msm_gpu *gpu,
816 struct a6xx_gpu_state *a6xx_state,
817 struct a6xx_crashdumper *dumper)
818 {
819 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
820 int i;
821 const struct gen7_cluster_registers *clusters;
822 unsigned clusters_size;
823
824 if (adreno_gpu->info->family == ADRENO_7XX_GEN1) {
825 clusters = gen7_0_0_clusters;
826 clusters_size = ARRAY_SIZE(gen7_0_0_clusters);
827 } else if (adreno_gpu->info->family == ADRENO_7XX_GEN2) {
828 clusters = gen7_2_0_clusters;
829 clusters_size = ARRAY_SIZE(gen7_2_0_clusters);
830 } else {
831 BUG_ON(adreno_gpu->info->family != ADRENO_7XX_GEN3);
832 clusters = gen7_9_0_clusters;
833 clusters_size = ARRAY_SIZE(gen7_9_0_clusters);
834 }
835
836 a6xx_state->clusters = state_kcalloc(a6xx_state,
837 clusters_size, sizeof(*a6xx_state->clusters));
838
839 if (!a6xx_state->clusters)
840 return;
841
842 a6xx_state->nr_clusters = clusters_size;
843
844 for (i = 0; i < clusters_size; i++)
845 a7xx_get_cluster(gpu, a6xx_state, &clusters[i],
846 &a6xx_state->clusters[i], dumper);
847 }
848
849 /* Read a shader / debug block from the HLSQ aperture with the crashdumper */
a6xx_get_shader_block(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,const struct a6xx_shader_block * block,struct a6xx_gpu_state_obj * obj,struct a6xx_crashdumper * dumper)850 static void a6xx_get_shader_block(struct msm_gpu *gpu,
851 struct a6xx_gpu_state *a6xx_state,
852 const struct a6xx_shader_block *block,
853 struct a6xx_gpu_state_obj *obj,
854 struct a6xx_crashdumper *dumper)
855 {
856 u64 *in = dumper->ptr;
857 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
858 size_t datasize = block->size * A6XX_NUM_SHADER_BANKS * sizeof(u32);
859 int i;
860
861 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
862 return;
863
864 for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) {
865 in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL,
866 (block->type << 8) | i);
867
868 in += CRASHDUMP_READ(in, REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE,
869 block->size, out);
870
871 out += block->size * sizeof(u32);
872 }
873
874 CRASHDUMP_FINI(in);
875
876 if (a6xx_crashdumper_run(gpu, dumper))
877 return;
878
879 obj->handle = block;
880 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
881 datasize);
882 }
883
a7xx_get_shader_block(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,const struct gen7_shader_block * block,struct a6xx_gpu_state_obj * obj,struct a6xx_crashdumper * dumper)884 static void a7xx_get_shader_block(struct msm_gpu *gpu,
885 struct a6xx_gpu_state *a6xx_state,
886 const struct gen7_shader_block *block,
887 struct a6xx_gpu_state_obj *obj,
888 struct a6xx_crashdumper *dumper)
889 {
890 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
891 u64 *in = dumper->ptr;
892 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
893 size_t datasize = block->size * block->num_sps * block->num_usptps * sizeof(u32);
894 int i, j;
895
896 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
897 return;
898
899 if (adreno_gpu->info->family == ADRENO_7XX_GEN1) {
900 gpu_rmw(gpu, REG_A7XX_SP_DBG_CNTL, GENMASK(1, 0), 3);
901 }
902
903 for (i = 0; i < block->num_sps; i++) {
904 for (j = 0; j < block->num_usptps; j++) {
905 in += CRASHDUMP_WRITE(in, REG_A7XX_SP_READ_SEL,
906 A7XX_SP_READ_SEL_LOCATION(block->location) |
907 A7XX_SP_READ_SEL_PIPE(block->pipeid) |
908 A7XX_SP_READ_SEL_STATETYPE(block->statetype) |
909 A7XX_SP_READ_SEL_USPTP(j) |
910 A7XX_SP_READ_SEL_SPTP(i));
911
912 in += CRASHDUMP_READ(in, REG_A7XX_SP_AHB_READ_APERTURE,
913 block->size, out);
914
915 out += block->size * sizeof(u32);
916 }
917 }
918
919 CRASHDUMP_FINI(in);
920
921 if (a6xx_crashdumper_run(gpu, dumper))
922 goto out;
923
924 obj->handle = block;
925 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
926 datasize);
927
928 out:
929 if (adreno_gpu->info->family == ADRENO_7XX_GEN1) {
930 gpu_rmw(gpu, REG_A7XX_SP_DBG_CNTL, GENMASK(1, 0), 0);
931 }
932 }
933
a6xx_get_shaders(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,struct a6xx_crashdumper * dumper)934 static void a6xx_get_shaders(struct msm_gpu *gpu,
935 struct a6xx_gpu_state *a6xx_state,
936 struct a6xx_crashdumper *dumper)
937 {
938 int i;
939
940 a6xx_state->shaders = state_kcalloc(a6xx_state,
941 ARRAY_SIZE(a6xx_shader_blocks), sizeof(*a6xx_state->shaders));
942
943 if (!a6xx_state->shaders)
944 return;
945
946 a6xx_state->nr_shaders = ARRAY_SIZE(a6xx_shader_blocks);
947
948 for (i = 0; i < ARRAY_SIZE(a6xx_shader_blocks); i++)
949 a6xx_get_shader_block(gpu, a6xx_state, &a6xx_shader_blocks[i],
950 &a6xx_state->shaders[i], dumper);
951 }
952
a7xx_get_shaders(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,struct a6xx_crashdumper * dumper)953 static void a7xx_get_shaders(struct msm_gpu *gpu,
954 struct a6xx_gpu_state *a6xx_state,
955 struct a6xx_crashdumper *dumper)
956 {
957 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
958 const struct gen7_shader_block *shader_blocks;
959 unsigned num_shader_blocks;
960 int i;
961
962 if (adreno_gpu->info->family == ADRENO_7XX_GEN1) {
963 shader_blocks = gen7_0_0_shader_blocks;
964 num_shader_blocks = ARRAY_SIZE(gen7_0_0_shader_blocks);
965 } else if (adreno_gpu->info->family == ADRENO_7XX_GEN2) {
966 shader_blocks = gen7_2_0_shader_blocks;
967 num_shader_blocks = ARRAY_SIZE(gen7_2_0_shader_blocks);
968 } else {
969 BUG_ON(adreno_gpu->info->family != ADRENO_7XX_GEN3);
970 shader_blocks = gen7_9_0_shader_blocks;
971 num_shader_blocks = ARRAY_SIZE(gen7_9_0_shader_blocks);
972 }
973
974 a6xx_state->shaders = state_kcalloc(a6xx_state,
975 num_shader_blocks, sizeof(*a6xx_state->shaders));
976
977 if (!a6xx_state->shaders)
978 return;
979
980 a6xx_state->nr_shaders = num_shader_blocks;
981
982 for (i = 0; i < num_shader_blocks; i++)
983 a7xx_get_shader_block(gpu, a6xx_state, &shader_blocks[i],
984 &a6xx_state->shaders[i], dumper);
985 }
986
987 /* Read registers from behind the HLSQ aperture with the crashdumper */
a6xx_get_crashdumper_hlsq_registers(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,const struct a6xx_registers * regs,struct a6xx_gpu_state_obj * obj,struct a6xx_crashdumper * dumper)988 static void a6xx_get_crashdumper_hlsq_registers(struct msm_gpu *gpu,
989 struct a6xx_gpu_state *a6xx_state,
990 const struct a6xx_registers *regs,
991 struct a6xx_gpu_state_obj *obj,
992 struct a6xx_crashdumper *dumper)
993
994 {
995 u64 *in = dumper->ptr;
996 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
997 int i, regcount = 0;
998
999 in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL, regs->val1);
1000
1001 for (i = 0; i < regs->count; i += 2) {
1002 u32 count = RANGE(regs->registers, i);
1003 u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE +
1004 regs->registers[i] - (regs->val0 >> 2);
1005
1006 in += CRASHDUMP_READ(in, offset, count, out);
1007
1008 out += count * sizeof(u32);
1009 regcount += count;
1010 }
1011
1012 CRASHDUMP_FINI(in);
1013
1014 if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
1015 return;
1016
1017 if (a6xx_crashdumper_run(gpu, dumper))
1018 return;
1019
1020 obj->handle = regs;
1021 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
1022 regcount * sizeof(u32));
1023 }
1024
1025 /* Read a block of registers using the crashdumper */
a6xx_get_crashdumper_registers(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,const struct a6xx_registers * regs,struct a6xx_gpu_state_obj * obj,struct a6xx_crashdumper * dumper)1026 static void a6xx_get_crashdumper_registers(struct msm_gpu *gpu,
1027 struct a6xx_gpu_state *a6xx_state,
1028 const struct a6xx_registers *regs,
1029 struct a6xx_gpu_state_obj *obj,
1030 struct a6xx_crashdumper *dumper)
1031
1032 {
1033 u64 *in = dumper->ptr;
1034 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
1035 int i, regcount = 0;
1036
1037 /* Skip unsupported registers on older generations */
1038 if (!adreno_is_a660_family(to_adreno_gpu(gpu)) &&
1039 (regs->registers == a660_registers))
1040 return;
1041
1042 /* Some blocks might need to program a selector register first */
1043 if (regs->val0)
1044 in += CRASHDUMP_WRITE(in, regs->val0, regs->val1);
1045
1046 for (i = 0; i < regs->count; i += 2) {
1047 u32 count = RANGE(regs->registers, i);
1048
1049 in += CRASHDUMP_READ(in, regs->registers[i], count, out);
1050
1051 out += count * sizeof(u32);
1052 regcount += count;
1053 }
1054
1055 CRASHDUMP_FINI(in);
1056
1057 if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
1058 return;
1059
1060 if (a6xx_crashdumper_run(gpu, dumper))
1061 return;
1062
1063 obj->handle = regs;
1064 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
1065 regcount * sizeof(u32));
1066 }
1067
a7xx_get_crashdumper_registers(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,const struct gen7_reg_list * regs,struct a6xx_gpu_state_obj * obj,struct a6xx_crashdumper * dumper)1068 static void a7xx_get_crashdumper_registers(struct msm_gpu *gpu,
1069 struct a6xx_gpu_state *a6xx_state,
1070 const struct gen7_reg_list *regs,
1071 struct a6xx_gpu_state_obj *obj,
1072 struct a6xx_crashdumper *dumper)
1073
1074 {
1075 u64 *in = dumper->ptr;
1076 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
1077 int i, regcount = 0;
1078
1079 /* Some blocks might need to program a selector register first */
1080 if (regs->sel)
1081 in += CRASHDUMP_WRITE(in, regs->sel->cd_reg, regs->sel->val);
1082
1083 for (i = 0; regs->regs[i] != UINT_MAX; i += 2) {
1084 u32 count = RANGE(regs->regs, i);
1085
1086 in += CRASHDUMP_READ(in, regs->regs[i], count, out);
1087
1088 out += count * sizeof(u32);
1089 regcount += count;
1090 }
1091
1092 CRASHDUMP_FINI(in);
1093
1094 if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
1095 return;
1096
1097 if (a6xx_crashdumper_run(gpu, dumper))
1098 return;
1099
1100 obj->handle = regs->regs;
1101 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
1102 regcount * sizeof(u32));
1103 }
1104
1105
1106 /* Read a block of registers via AHB */
a6xx_get_ahb_gpu_registers(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,const struct a6xx_registers * regs,struct a6xx_gpu_state_obj * obj)1107 static void a6xx_get_ahb_gpu_registers(struct msm_gpu *gpu,
1108 struct a6xx_gpu_state *a6xx_state,
1109 const struct a6xx_registers *regs,
1110 struct a6xx_gpu_state_obj *obj)
1111 {
1112 int i, regcount = 0, index = 0;
1113
1114 /* Skip unsupported registers on older generations */
1115 if (!adreno_is_a660_family(to_adreno_gpu(gpu)) &&
1116 (regs->registers == a660_registers))
1117 return;
1118
1119 for (i = 0; i < regs->count; i += 2)
1120 regcount += RANGE(regs->registers, i);
1121
1122 obj->handle = (const void *) regs;
1123 obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32));
1124 if (!obj->data)
1125 return;
1126
1127 for (i = 0; i < regs->count; i += 2) {
1128 u32 count = RANGE(regs->registers, i);
1129 int j;
1130
1131 for (j = 0; j < count; j++)
1132 obj->data[index++] = gpu_read(gpu,
1133 regs->registers[i] + j);
1134 }
1135 }
1136
a7xx_get_ahb_gpu_registers(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,const u32 * regs,struct a6xx_gpu_state_obj * obj)1137 static void a7xx_get_ahb_gpu_registers(struct msm_gpu *gpu,
1138 struct a6xx_gpu_state *a6xx_state,
1139 const u32 *regs,
1140 struct a6xx_gpu_state_obj *obj)
1141 {
1142 int i, regcount = 0, index = 0;
1143
1144 for (i = 0; regs[i] != UINT_MAX; i += 2)
1145 regcount += RANGE(regs, i);
1146
1147 obj->handle = (const void *) regs;
1148 obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32));
1149 if (!obj->data)
1150 return;
1151
1152 for (i = 0; regs[i] != UINT_MAX; i += 2) {
1153 u32 count = RANGE(regs, i);
1154 int j;
1155
1156 for (j = 0; j < count; j++)
1157 obj->data[index++] = gpu_read(gpu, regs[i] + j);
1158 }
1159 }
1160
a7xx_get_ahb_gpu_reglist(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,const struct gen7_reg_list * regs,struct a6xx_gpu_state_obj * obj)1161 static void a7xx_get_ahb_gpu_reglist(struct msm_gpu *gpu,
1162 struct a6xx_gpu_state *a6xx_state,
1163 const struct gen7_reg_list *regs,
1164 struct a6xx_gpu_state_obj *obj)
1165 {
1166 if (regs->sel)
1167 gpu_write(gpu, regs->sel->host_reg, regs->sel->val);
1168
1169 a7xx_get_ahb_gpu_registers(gpu, a6xx_state, regs->regs, obj);
1170 }
1171
1172 /* Read a block of GMU registers */
_a6xx_get_gmu_registers(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,const struct a6xx_registers * regs,struct a6xx_gpu_state_obj * obj,bool rscc)1173 static void _a6xx_get_gmu_registers(struct msm_gpu *gpu,
1174 struct a6xx_gpu_state *a6xx_state,
1175 const struct a6xx_registers *regs,
1176 struct a6xx_gpu_state_obj *obj,
1177 bool rscc)
1178 {
1179 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1180 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1181 struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
1182 int i, regcount = 0, index = 0;
1183
1184 for (i = 0; i < regs->count; i += 2)
1185 regcount += RANGE(regs->registers, i);
1186
1187 obj->handle = (const void *) regs;
1188 obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32));
1189 if (!obj->data)
1190 return;
1191
1192 for (i = 0; i < regs->count; i += 2) {
1193 u32 count = RANGE(regs->registers, i);
1194 int j;
1195
1196 for (j = 0; j < count; j++) {
1197 u32 offset = regs->registers[i] + j;
1198 u32 val;
1199
1200 if (rscc)
1201 val = gmu_read_rscc(gmu, offset);
1202 else
1203 val = gmu_read(gmu, offset);
1204
1205 obj->data[index++] = val;
1206 }
1207 }
1208 }
1209
a6xx_get_gmu_registers(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state)1210 static void a6xx_get_gmu_registers(struct msm_gpu *gpu,
1211 struct a6xx_gpu_state *a6xx_state)
1212 {
1213 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1214 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1215
1216 a6xx_state->gmu_registers = state_kcalloc(a6xx_state,
1217 4, sizeof(*a6xx_state->gmu_registers));
1218
1219 if (!a6xx_state->gmu_registers)
1220 return;
1221
1222 a6xx_state->nr_gmu_registers = 4;
1223
1224 /* Get the CX GMU registers from AHB */
1225 _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[0],
1226 &a6xx_state->gmu_registers[0], false);
1227 _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[1],
1228 &a6xx_state->gmu_registers[1], true);
1229
1230 if (adreno_is_a621(adreno_gpu) || adreno_is_a623(adreno_gpu))
1231 _a6xx_get_gmu_registers(gpu, a6xx_state, &a621_gpucc_reg,
1232 &a6xx_state->gmu_registers[2], false);
1233 else
1234 _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gpucc_reg,
1235 &a6xx_state->gmu_registers[2], false);
1236
1237 if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu))
1238 return;
1239
1240 /* Set the fence to ALLOW mode so we can access the registers */
1241 gpu_write(gpu, REG_A6XX_GMU_AO_AHB_FENCE_CTRL, 0);
1242
1243 _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[2],
1244 &a6xx_state->gmu_registers[3], false);
1245 }
1246
a6xx_snapshot_gmu_bo(struct a6xx_gpu_state * a6xx_state,struct a6xx_gmu_bo * bo)1247 static struct msm_gpu_state_bo *a6xx_snapshot_gmu_bo(
1248 struct a6xx_gpu_state *a6xx_state, struct a6xx_gmu_bo *bo)
1249 {
1250 struct msm_gpu_state_bo *snapshot;
1251
1252 if (!bo->size)
1253 return NULL;
1254
1255 snapshot = state_kcalloc(a6xx_state, 1, sizeof(*snapshot));
1256 if (!snapshot)
1257 return NULL;
1258
1259 snapshot->iova = bo->iova;
1260 snapshot->size = bo->size;
1261 snapshot->data = kvzalloc(snapshot->size, GFP_KERNEL);
1262 if (!snapshot->data)
1263 return NULL;
1264
1265 memcpy(snapshot->data, bo->virt, bo->size);
1266
1267 return snapshot;
1268 }
1269
a6xx_snapshot_gmu_hfi_history(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state)1270 static void a6xx_snapshot_gmu_hfi_history(struct msm_gpu *gpu,
1271 struct a6xx_gpu_state *a6xx_state)
1272 {
1273 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1274 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1275 struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
1276 unsigned i, j;
1277
1278 BUILD_BUG_ON(ARRAY_SIZE(gmu->queues) != ARRAY_SIZE(a6xx_state->hfi_queue_history));
1279
1280 for (i = 0; i < ARRAY_SIZE(gmu->queues); i++) {
1281 struct a6xx_hfi_queue *queue = &gmu->queues[i];
1282 for (j = 0; j < HFI_HISTORY_SZ; j++) {
1283 unsigned idx = (j + queue->history_idx) % HFI_HISTORY_SZ;
1284 a6xx_state->hfi_queue_history[i][j] = queue->history[idx];
1285 }
1286 }
1287 }
1288
1289 #define A6XX_REGLIST_SIZE 1
1290 #define A6XX_GBIF_REGLIST_SIZE 1
a6xx_get_registers(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,struct a6xx_crashdumper * dumper)1291 static void a6xx_get_registers(struct msm_gpu *gpu,
1292 struct a6xx_gpu_state *a6xx_state,
1293 struct a6xx_crashdumper *dumper)
1294 {
1295 int i, count = A6XX_REGLIST_SIZE +
1296 ARRAY_SIZE(a6xx_reglist) +
1297 ARRAY_SIZE(a6xx_hlsq_reglist) + A6XX_GBIF_REGLIST_SIZE;
1298 int index = 0;
1299 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1300
1301 a6xx_state->registers = state_kcalloc(a6xx_state,
1302 count, sizeof(*a6xx_state->registers));
1303
1304 if (!a6xx_state->registers)
1305 return;
1306
1307 a6xx_state->nr_registers = count;
1308
1309 a6xx_get_ahb_gpu_registers(gpu,
1310 a6xx_state, &a6xx_ahb_reglist,
1311 &a6xx_state->registers[index++]);
1312
1313 if (a6xx_has_gbif(adreno_gpu))
1314 a6xx_get_ahb_gpu_registers(gpu,
1315 a6xx_state, &a6xx_gbif_reglist,
1316 &a6xx_state->registers[index++]);
1317 else
1318 a6xx_get_ahb_gpu_registers(gpu,
1319 a6xx_state, &a6xx_vbif_reglist,
1320 &a6xx_state->registers[index++]);
1321 if (!dumper) {
1322 /*
1323 * We can't use the crashdumper when the SMMU is stalled,
1324 * because the GPU has no memory access until we resume
1325 * translation (but we don't want to do that until after
1326 * we have captured as much useful GPU state as possible).
1327 * So instead collect registers via the CPU:
1328 */
1329 for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++)
1330 a6xx_get_ahb_gpu_registers(gpu,
1331 a6xx_state, &a6xx_reglist[i],
1332 &a6xx_state->registers[index++]);
1333 return;
1334 }
1335
1336 for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++)
1337 a6xx_get_crashdumper_registers(gpu,
1338 a6xx_state, &a6xx_reglist[i],
1339 &a6xx_state->registers[index++],
1340 dumper);
1341
1342 for (i = 0; i < ARRAY_SIZE(a6xx_hlsq_reglist); i++)
1343 a6xx_get_crashdumper_hlsq_registers(gpu,
1344 a6xx_state, &a6xx_hlsq_reglist[i],
1345 &a6xx_state->registers[index++],
1346 dumper);
1347 }
1348
1349 #define A7XX_PRE_CRASHDUMPER_SIZE 1
1350 #define A7XX_POST_CRASHDUMPER_SIZE 1
a7xx_get_registers(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,struct a6xx_crashdumper * dumper)1351 static void a7xx_get_registers(struct msm_gpu *gpu,
1352 struct a6xx_gpu_state *a6xx_state,
1353 struct a6xx_crashdumper *dumper)
1354 {
1355 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1356 int i, count;
1357 int index = 0;
1358 const u32 *pre_crashdumper_regs;
1359 const struct gen7_reg_list *reglist;
1360
1361 if (adreno_gpu->info->family == ADRENO_7XX_GEN1) {
1362 reglist = gen7_0_0_reg_list;
1363 pre_crashdumper_regs = gen7_0_0_pre_crashdumper_gpu_registers;
1364 } else if (adreno_gpu->info->family == ADRENO_7XX_GEN2) {
1365 reglist = gen7_2_0_reg_list;
1366 pre_crashdumper_regs = gen7_0_0_pre_crashdumper_gpu_registers;
1367 } else {
1368 BUG_ON(adreno_gpu->info->family != ADRENO_7XX_GEN3);
1369 reglist = gen7_9_0_reg_list;
1370 pre_crashdumper_regs = gen7_9_0_pre_crashdumper_gpu_registers;
1371 }
1372
1373 count = A7XX_PRE_CRASHDUMPER_SIZE + A7XX_POST_CRASHDUMPER_SIZE;
1374
1375 /* The downstream reglist contains registers in other memory regions
1376 * (cx_misc/cx_mem and cx_dbgc) and we need to plumb through their
1377 * offsets and map them to read them on the CPU. For now only read the
1378 * first region which is the main one.
1379 */
1380 if (dumper) {
1381 for (i = 0; reglist[i].regs; i++)
1382 count++;
1383 } else {
1384 count++;
1385 }
1386
1387 a6xx_state->registers = state_kcalloc(a6xx_state,
1388 count, sizeof(*a6xx_state->registers));
1389
1390 if (!a6xx_state->registers)
1391 return;
1392
1393 a6xx_state->nr_registers = count;
1394
1395 a7xx_get_ahb_gpu_registers(gpu, a6xx_state, pre_crashdumper_regs,
1396 &a6xx_state->registers[index++]);
1397
1398 if (!dumper) {
1399 a7xx_get_ahb_gpu_reglist(gpu,
1400 a6xx_state, ®list[0],
1401 &a6xx_state->registers[index++]);
1402 return;
1403 }
1404
1405 for (i = 0; reglist[i].regs; i++)
1406 a7xx_get_crashdumper_registers(gpu,
1407 a6xx_state, ®list[i],
1408 &a6xx_state->registers[index++],
1409 dumper);
1410 }
1411
a7xx_get_post_crashdumper_registers(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state)1412 static void a7xx_get_post_crashdumper_registers(struct msm_gpu *gpu,
1413 struct a6xx_gpu_state *a6xx_state)
1414 {
1415 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1416 const u32 *regs;
1417
1418 BUG_ON(adreno_gpu->info->family > ADRENO_7XX_GEN3);
1419 regs = gen7_0_0_post_crashdumper_registers;
1420
1421 a7xx_get_ahb_gpu_registers(gpu,
1422 a6xx_state, regs,
1423 &a6xx_state->registers[a6xx_state->nr_registers - 1]);
1424 }
1425
a6xx_get_cp_roq_size(struct msm_gpu * gpu)1426 static u32 a6xx_get_cp_roq_size(struct msm_gpu *gpu)
1427 {
1428 /* The value at [16:31] is in 4dword units. Convert it to dwords */
1429 return gpu_read(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2) >> 14;
1430 }
1431
a7xx_get_cp_roq_size(struct msm_gpu * gpu)1432 static u32 a7xx_get_cp_roq_size(struct msm_gpu *gpu)
1433 {
1434 /*
1435 * The value at CP_ROQ_THRESHOLDS_2[20:31] is in 4dword units.
1436 * That register however is not directly accessible from APSS on A7xx.
1437 * Program the SQE_UCODE_DBG_ADDR with offset=0x70d3 and read the value.
1438 */
1439 gpu_write(gpu, REG_A6XX_CP_SQE_UCODE_DBG_ADDR, 0x70d3);
1440
1441 return 4 * (gpu_read(gpu, REG_A6XX_CP_SQE_UCODE_DBG_DATA) >> 20);
1442 }
1443
1444 /* Read a block of data from an indexed register pair */
a6xx_get_indexed_regs(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,const struct a6xx_indexed_registers * indexed,struct a6xx_gpu_state_obj * obj)1445 static void a6xx_get_indexed_regs(struct msm_gpu *gpu,
1446 struct a6xx_gpu_state *a6xx_state,
1447 const struct a6xx_indexed_registers *indexed,
1448 struct a6xx_gpu_state_obj *obj)
1449 {
1450 u32 count = indexed->count;
1451 int i;
1452
1453 obj->handle = (const void *) indexed;
1454 if (indexed->count_fn)
1455 count = indexed->count_fn(gpu);
1456
1457 obj->data = state_kcalloc(a6xx_state, count, sizeof(u32));
1458 obj->count = count;
1459 if (!obj->data)
1460 return;
1461
1462 /* All the indexed banks start at address 0 */
1463 gpu_write(gpu, indexed->addr, 0);
1464
1465 /* Read the data - each read increments the internal address by 1 */
1466 for (i = 0; i < count; i++)
1467 obj->data[i] = gpu_read(gpu, indexed->data);
1468 }
1469
a6xx_get_indexed_registers(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state)1470 static void a6xx_get_indexed_registers(struct msm_gpu *gpu,
1471 struct a6xx_gpu_state *a6xx_state)
1472 {
1473 u32 mempool_size;
1474 int count = ARRAY_SIZE(a6xx_indexed_reglist) + 1;
1475 int i;
1476
1477 a6xx_state->indexed_regs = state_kcalloc(a6xx_state, count,
1478 sizeof(*a6xx_state->indexed_regs));
1479 if (!a6xx_state->indexed_regs)
1480 return;
1481
1482 for (i = 0; i < ARRAY_SIZE(a6xx_indexed_reglist); i++)
1483 a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_indexed_reglist[i],
1484 &a6xx_state->indexed_regs[i]);
1485
1486 if (adreno_is_a650_family(to_adreno_gpu(gpu))) {
1487 u32 val;
1488
1489 val = gpu_read(gpu, REG_A6XX_CP_CHICKEN_DBG);
1490 gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, val | 4);
1491
1492 /* Get the contents of the CP mempool */
1493 a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_cp_mempool_indexed,
1494 &a6xx_state->indexed_regs[i]);
1495
1496 gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, val);
1497 a6xx_state->nr_indexed_regs = count;
1498 return;
1499 }
1500
1501 /* Set the CP mempool size to 0 to stabilize it while dumping */
1502 mempool_size = gpu_read(gpu, REG_A6XX_CP_MEM_POOL_SIZE);
1503 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 0);
1504
1505 /* Get the contents of the CP mempool */
1506 a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_cp_mempool_indexed,
1507 &a6xx_state->indexed_regs[i]);
1508
1509 /*
1510 * Offset 0x2000 in the mempool is the size - copy the saved size over
1511 * so the data is consistent
1512 */
1513 a6xx_state->indexed_regs[i].data[0x2000] = mempool_size;
1514
1515 /* Restore the size in the hardware */
1516 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, mempool_size);
1517
1518 a6xx_state->nr_indexed_regs = count;
1519 }
1520
a7xx_get_indexed_registers(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state)1521 static void a7xx_get_indexed_registers(struct msm_gpu *gpu,
1522 struct a6xx_gpu_state *a6xx_state)
1523 {
1524 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1525 const struct a6xx_indexed_registers *indexed_regs;
1526 int i, indexed_count, mempool_count;
1527
1528 if (adreno_gpu->info->family <= ADRENO_7XX_GEN2) {
1529 indexed_regs = a7xx_indexed_reglist;
1530 indexed_count = ARRAY_SIZE(a7xx_indexed_reglist);
1531 } else {
1532 BUG_ON(adreno_gpu->info->family != ADRENO_7XX_GEN3);
1533 indexed_regs = gen7_9_0_cp_indexed_reg_list;
1534 indexed_count = ARRAY_SIZE(gen7_9_0_cp_indexed_reg_list);
1535 }
1536
1537 mempool_count = ARRAY_SIZE(a7xx_cp_bv_mempool_indexed);
1538
1539 a6xx_state->indexed_regs = state_kcalloc(a6xx_state,
1540 indexed_count + mempool_count,
1541 sizeof(*a6xx_state->indexed_regs));
1542 if (!a6xx_state->indexed_regs)
1543 return;
1544
1545 a6xx_state->nr_indexed_regs = indexed_count + mempool_count;
1546
1547 /* First read the common regs */
1548 for (i = 0; i < indexed_count; i++)
1549 a6xx_get_indexed_regs(gpu, a6xx_state, &indexed_regs[i],
1550 &a6xx_state->indexed_regs[i]);
1551
1552 gpu_rmw(gpu, REG_A6XX_CP_CHICKEN_DBG, 0, BIT(2));
1553 gpu_rmw(gpu, REG_A7XX_CP_BV_CHICKEN_DBG, 0, BIT(2));
1554
1555 /* Get the contents of the CP_BV mempool */
1556 for (i = 0; i < mempool_count; i++)
1557 a6xx_get_indexed_regs(gpu, a6xx_state, &a7xx_cp_bv_mempool_indexed[i],
1558 &a6xx_state->indexed_regs[indexed_count + i]);
1559
1560 gpu_rmw(gpu, REG_A6XX_CP_CHICKEN_DBG, BIT(2), 0);
1561 gpu_rmw(gpu, REG_A7XX_CP_BV_CHICKEN_DBG, BIT(2), 0);
1562 return;
1563 }
1564
a6xx_gpu_state_get(struct msm_gpu * gpu)1565 struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu)
1566 {
1567 struct a6xx_crashdumper _dumper = { 0 }, *dumper = NULL;
1568 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1569 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1570 struct a6xx_gpu_state *a6xx_state = kzalloc(sizeof(*a6xx_state),
1571 GFP_KERNEL);
1572 bool stalled = !!(gpu_read(gpu, REG_A6XX_RBBM_STATUS3) &
1573 A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT);
1574
1575 if (!a6xx_state)
1576 return ERR_PTR(-ENOMEM);
1577
1578 INIT_LIST_HEAD(&a6xx_state->objs);
1579
1580 /* Get the generic state from the adreno core */
1581 adreno_gpu_state_get(gpu, &a6xx_state->base);
1582
1583 if (!adreno_has_gmu_wrapper(adreno_gpu)) {
1584 a6xx_get_gmu_registers(gpu, a6xx_state);
1585
1586 a6xx_state->gmu_log = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.log);
1587 a6xx_state->gmu_hfi = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.hfi);
1588 a6xx_state->gmu_debug = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.debug);
1589
1590 a6xx_snapshot_gmu_hfi_history(gpu, a6xx_state);
1591 }
1592
1593 /* If GX isn't on the rest of the data isn't going to be accessible */
1594 if (!adreno_has_gmu_wrapper(adreno_gpu) && !a6xx_gmu_gx_is_on(&a6xx_gpu->gmu))
1595 return &a6xx_state->base;
1596
1597 /* Get the banks of indexed registers */
1598 if (adreno_is_a7xx(adreno_gpu))
1599 a7xx_get_indexed_registers(gpu, a6xx_state);
1600 else
1601 a6xx_get_indexed_registers(gpu, a6xx_state);
1602
1603 /*
1604 * Try to initialize the crashdumper, if we are not dumping state
1605 * with the SMMU stalled. The crashdumper needs memory access to
1606 * write out GPU state, so we need to skip this when the SMMU is
1607 * stalled in response to an iova fault
1608 */
1609 if (!stalled && !gpu->needs_hw_init &&
1610 !a6xx_crashdumper_init(gpu, &_dumper)) {
1611 dumper = &_dumper;
1612 }
1613
1614 if (adreno_is_a7xx(adreno_gpu)) {
1615 a7xx_get_registers(gpu, a6xx_state, dumper);
1616
1617 if (dumper) {
1618 a7xx_get_shaders(gpu, a6xx_state, dumper);
1619 a7xx_get_clusters(gpu, a6xx_state, dumper);
1620 a7xx_get_dbgahb_clusters(gpu, a6xx_state, dumper);
1621
1622 msm_gem_kernel_put(dumper->bo, gpu->aspace);
1623 }
1624
1625 a7xx_get_post_crashdumper_registers(gpu, a6xx_state);
1626 } else {
1627 a6xx_get_registers(gpu, a6xx_state, dumper);
1628
1629 if (dumper) {
1630 a6xx_get_shaders(gpu, a6xx_state, dumper);
1631 a6xx_get_clusters(gpu, a6xx_state, dumper);
1632 a6xx_get_dbgahb_clusters(gpu, a6xx_state, dumper);
1633
1634 msm_gem_kernel_put(dumper->bo, gpu->aspace);
1635 }
1636 }
1637
1638 if (snapshot_debugbus)
1639 a6xx_get_debugbus(gpu, a6xx_state);
1640
1641 a6xx_state->gpu_initialized = !gpu->needs_hw_init;
1642
1643 return &a6xx_state->base;
1644 }
1645
a6xx_gpu_state_destroy(struct kref * kref)1646 static void a6xx_gpu_state_destroy(struct kref *kref)
1647 {
1648 struct a6xx_state_memobj *obj, *tmp;
1649 struct msm_gpu_state *state = container_of(kref,
1650 struct msm_gpu_state, ref);
1651 struct a6xx_gpu_state *a6xx_state = container_of(state,
1652 struct a6xx_gpu_state, base);
1653
1654 if (a6xx_state->gmu_log)
1655 kvfree(a6xx_state->gmu_log->data);
1656
1657 if (a6xx_state->gmu_hfi)
1658 kvfree(a6xx_state->gmu_hfi->data);
1659
1660 if (a6xx_state->gmu_debug)
1661 kvfree(a6xx_state->gmu_debug->data);
1662
1663 list_for_each_entry_safe(obj, tmp, &a6xx_state->objs, node) {
1664 list_del(&obj->node);
1665 kvfree(obj);
1666 }
1667
1668 adreno_gpu_state_destroy(state);
1669 kfree(a6xx_state);
1670 }
1671
a6xx_gpu_state_put(struct msm_gpu_state * state)1672 int a6xx_gpu_state_put(struct msm_gpu_state *state)
1673 {
1674 if (IS_ERR_OR_NULL(state))
1675 return 1;
1676
1677 return kref_put(&state->ref, a6xx_gpu_state_destroy);
1678 }
1679
a6xx_show_registers(const u32 * registers,u32 * data,size_t count,struct drm_printer * p)1680 static void a6xx_show_registers(const u32 *registers, u32 *data, size_t count,
1681 struct drm_printer *p)
1682 {
1683 int i, index = 0;
1684
1685 if (!data)
1686 return;
1687
1688 for (i = 0; i < count; i += 2) {
1689 u32 count = RANGE(registers, i);
1690 u32 offset = registers[i];
1691 int j;
1692
1693 for (j = 0; j < count; index++, offset++, j++) {
1694 if (data[index] == 0xdeafbead)
1695 continue;
1696
1697 drm_printf(p, " - { offset: 0x%06x, value: 0x%08x }\n",
1698 offset << 2, data[index]);
1699 }
1700 }
1701 }
1702
a7xx_show_registers_indented(const u32 * registers,u32 * data,struct drm_printer * p,unsigned indent)1703 static void a7xx_show_registers_indented(const u32 *registers, u32 *data,
1704 struct drm_printer *p, unsigned indent)
1705 {
1706 int i, index = 0;
1707
1708 for (i = 0; registers[i] != UINT_MAX; i += 2) {
1709 u32 count = RANGE(registers, i);
1710 u32 offset = registers[i];
1711 int j;
1712
1713 for (j = 0; j < count; index++, offset++, j++) {
1714 int k;
1715
1716 if (data[index] == 0xdeafbead)
1717 continue;
1718
1719 for (k = 0; k < indent; k++)
1720 drm_printf(p, " ");
1721 drm_printf(p, "- { offset: 0x%06x, value: 0x%08x }\n",
1722 offset << 2, data[index]);
1723 }
1724 }
1725 }
1726
a7xx_show_registers(const u32 * registers,u32 * data,struct drm_printer * p)1727 static void a7xx_show_registers(const u32 *registers, u32 *data, struct drm_printer *p)
1728 {
1729 a7xx_show_registers_indented(registers, data, p, 1);
1730 }
1731
print_ascii85(struct drm_printer * p,size_t len,u32 * data)1732 static void print_ascii85(struct drm_printer *p, size_t len, u32 *data)
1733 {
1734 char out[ASCII85_BUFSZ];
1735 long i, l, datalen = 0;
1736
1737 for (i = 0; i < len >> 2; i++) {
1738 if (data[i])
1739 datalen = (i + 1) << 2;
1740 }
1741
1742 if (datalen == 0)
1743 return;
1744
1745 drm_puts(p, " data: !!ascii85 |\n");
1746 drm_puts(p, " ");
1747
1748
1749 l = ascii85_encode_len(datalen);
1750
1751 for (i = 0; i < l; i++)
1752 drm_puts(p, ascii85_encode(data[i], out));
1753
1754 drm_puts(p, "\n");
1755 }
1756
print_name(struct drm_printer * p,const char * fmt,const char * name)1757 static void print_name(struct drm_printer *p, const char *fmt, const char *name)
1758 {
1759 drm_puts(p, fmt);
1760 drm_puts(p, name);
1761 drm_puts(p, "\n");
1762 }
1763
a6xx_show_shader(struct a6xx_gpu_state_obj * obj,struct drm_printer * p)1764 static void a6xx_show_shader(struct a6xx_gpu_state_obj *obj,
1765 struct drm_printer *p)
1766 {
1767 const struct a6xx_shader_block *block = obj->handle;
1768 int i;
1769
1770 if (!obj->handle)
1771 return;
1772
1773 print_name(p, " - type: ", block->name);
1774
1775 for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) {
1776 drm_printf(p, " - bank: %d\n", i);
1777 drm_printf(p, " size: %d\n", block->size);
1778
1779 if (!obj->data)
1780 continue;
1781
1782 print_ascii85(p, block->size << 2,
1783 obj->data + (block->size * i));
1784 }
1785 }
1786
a7xx_show_shader(struct a6xx_gpu_state_obj * obj,struct drm_printer * p)1787 static void a7xx_show_shader(struct a6xx_gpu_state_obj *obj,
1788 struct drm_printer *p)
1789 {
1790 const struct gen7_shader_block *block = obj->handle;
1791 int i, j;
1792 u32 *data = obj->data;
1793
1794 if (!obj->handle)
1795 return;
1796
1797 print_name(p, " - type: ", a7xx_statetype_names[block->statetype]);
1798 print_name(p, " - pipe: ", a7xx_pipe_names[block->pipeid]);
1799
1800 for (i = 0; i < block->num_sps; i++) {
1801 drm_printf(p, " - sp: %d\n", i);
1802
1803 for (j = 0; j < block->num_usptps; j++) {
1804 drm_printf(p, " - usptp: %d\n", j);
1805 drm_printf(p, " size: %d\n", block->size);
1806
1807 if (!obj->data)
1808 continue;
1809
1810 print_ascii85(p, block->size << 2, data);
1811
1812 data += block->size;
1813 }
1814 }
1815 }
1816
a6xx_show_cluster_data(const u32 * registers,int size,u32 * data,struct drm_printer * p)1817 static void a6xx_show_cluster_data(const u32 *registers, int size, u32 *data,
1818 struct drm_printer *p)
1819 {
1820 int ctx, index = 0;
1821
1822 for (ctx = 0; ctx < A6XX_NUM_CONTEXTS; ctx++) {
1823 int j;
1824
1825 drm_printf(p, " - context: %d\n", ctx);
1826
1827 for (j = 0; j < size; j += 2) {
1828 u32 count = RANGE(registers, j);
1829 u32 offset = registers[j];
1830 int k;
1831
1832 for (k = 0; k < count; index++, offset++, k++) {
1833 if (data[index] == 0xdeafbead)
1834 continue;
1835
1836 drm_printf(p, " - { offset: 0x%06x, value: 0x%08x }\n",
1837 offset << 2, data[index]);
1838 }
1839 }
1840 }
1841 }
1842
a6xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj * obj,struct drm_printer * p)1843 static void a6xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj *obj,
1844 struct drm_printer *p)
1845 {
1846 const struct a6xx_dbgahb_cluster *dbgahb = obj->handle;
1847
1848 if (dbgahb) {
1849 print_name(p, " - cluster-name: ", dbgahb->name);
1850 a6xx_show_cluster_data(dbgahb->registers, dbgahb->count,
1851 obj->data, p);
1852 }
1853 }
1854
a6xx_show_cluster(struct a6xx_gpu_state_obj * obj,struct drm_printer * p)1855 static void a6xx_show_cluster(struct a6xx_gpu_state_obj *obj,
1856 struct drm_printer *p)
1857 {
1858 const struct a6xx_cluster *cluster = obj->handle;
1859
1860 if (cluster) {
1861 print_name(p, " - cluster-name: ", cluster->name);
1862 a6xx_show_cluster_data(cluster->registers, cluster->count,
1863 obj->data, p);
1864 }
1865 }
1866
a7xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj * obj,struct drm_printer * p)1867 static void a7xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj *obj,
1868 struct drm_printer *p)
1869 {
1870 const struct gen7_sptp_cluster_registers *dbgahb = obj->handle;
1871
1872 if (dbgahb) {
1873 print_name(p, " - pipe: ", a7xx_pipe_names[dbgahb->pipe_id]);
1874 print_name(p, " - cluster-name: ", a7xx_cluster_names[dbgahb->cluster_id]);
1875 drm_printf(p, " - context: %d\n", dbgahb->context_id);
1876 a7xx_show_registers_indented(dbgahb->regs, obj->data, p, 4);
1877 }
1878 }
1879
a7xx_show_cluster(struct a6xx_gpu_state_obj * obj,struct drm_printer * p)1880 static void a7xx_show_cluster(struct a6xx_gpu_state_obj *obj,
1881 struct drm_printer *p)
1882 {
1883 const struct gen7_cluster_registers *cluster = obj->handle;
1884
1885 if (cluster) {
1886 int context = (cluster->context_id == STATE_FORCE_CTXT_1) ? 1 : 0;
1887
1888 print_name(p, " - pipe: ", a7xx_pipe_names[cluster->pipe_id]);
1889 print_name(p, " - cluster-name: ", a7xx_cluster_names[cluster->cluster_id]);
1890 drm_printf(p, " - context: %d\n", context);
1891 a7xx_show_registers_indented(cluster->regs, obj->data, p, 4);
1892 }
1893 }
1894
a6xx_show_indexed_regs(struct a6xx_gpu_state_obj * obj,struct drm_printer * p)1895 static void a6xx_show_indexed_regs(struct a6xx_gpu_state_obj *obj,
1896 struct drm_printer *p)
1897 {
1898 const struct a6xx_indexed_registers *indexed = obj->handle;
1899
1900 if (!indexed)
1901 return;
1902
1903 print_name(p, " - regs-name: ", indexed->name);
1904 drm_printf(p, " dwords: %d\n", obj->count);
1905
1906 print_ascii85(p, obj->count << 2, obj->data);
1907 }
1908
a6xx_show_debugbus_block(const struct a6xx_debugbus_block * block,u32 * data,struct drm_printer * p)1909 static void a6xx_show_debugbus_block(const struct a6xx_debugbus_block *block,
1910 u32 *data, struct drm_printer *p)
1911 {
1912 if (block) {
1913 print_name(p, " - debugbus-block: ", block->name);
1914
1915 /*
1916 * count for regular debugbus data is in quadwords,
1917 * but print the size in dwords for consistency
1918 */
1919 drm_printf(p, " count: %d\n", block->count << 1);
1920
1921 print_ascii85(p, block->count << 3, data);
1922 }
1923 }
1924
a6xx_show_debugbus(struct a6xx_gpu_state * a6xx_state,struct drm_printer * p)1925 static void a6xx_show_debugbus(struct a6xx_gpu_state *a6xx_state,
1926 struct drm_printer *p)
1927 {
1928 int i;
1929
1930 for (i = 0; i < a6xx_state->nr_debugbus; i++) {
1931 struct a6xx_gpu_state_obj *obj = &a6xx_state->debugbus[i];
1932
1933 a6xx_show_debugbus_block(obj->handle, obj->data, p);
1934 }
1935
1936 if (a6xx_state->vbif_debugbus) {
1937 struct a6xx_gpu_state_obj *obj = a6xx_state->vbif_debugbus;
1938
1939 drm_puts(p, " - debugbus-block: A6XX_DBGBUS_VBIF\n");
1940 drm_printf(p, " count: %d\n", VBIF_DEBUGBUS_BLOCK_SIZE);
1941
1942 /* vbif debugbus data is in dwords. Confusing, huh? */
1943 print_ascii85(p, VBIF_DEBUGBUS_BLOCK_SIZE << 2, obj->data);
1944 }
1945
1946 for (i = 0; i < a6xx_state->nr_cx_debugbus; i++) {
1947 struct a6xx_gpu_state_obj *obj = &a6xx_state->cx_debugbus[i];
1948
1949 a6xx_show_debugbus_block(obj->handle, obj->data, p);
1950 }
1951 }
1952
a6xx_show(struct msm_gpu * gpu,struct msm_gpu_state * state,struct drm_printer * p)1953 void a6xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
1954 struct drm_printer *p)
1955 {
1956 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1957 struct a6xx_gpu_state *a6xx_state = container_of(state,
1958 struct a6xx_gpu_state, base);
1959 int i;
1960
1961 if (IS_ERR_OR_NULL(state))
1962 return;
1963
1964 drm_printf(p, "gpu-initialized: %d\n", a6xx_state->gpu_initialized);
1965
1966 adreno_show(gpu, state, p);
1967
1968 drm_puts(p, "gmu-log:\n");
1969 if (a6xx_state->gmu_log) {
1970 struct msm_gpu_state_bo *gmu_log = a6xx_state->gmu_log;
1971
1972 drm_printf(p, " iova: 0x%016llx\n", gmu_log->iova);
1973 drm_printf(p, " size: %zu\n", gmu_log->size);
1974 adreno_show_object(p, &gmu_log->data, gmu_log->size,
1975 &gmu_log->encoded);
1976 }
1977
1978 drm_puts(p, "gmu-hfi:\n");
1979 if (a6xx_state->gmu_hfi) {
1980 struct msm_gpu_state_bo *gmu_hfi = a6xx_state->gmu_hfi;
1981 unsigned i, j;
1982
1983 drm_printf(p, " iova: 0x%016llx\n", gmu_hfi->iova);
1984 drm_printf(p, " size: %zu\n", gmu_hfi->size);
1985 for (i = 0; i < ARRAY_SIZE(a6xx_state->hfi_queue_history); i++) {
1986 drm_printf(p, " queue-history[%u]:", i);
1987 for (j = 0; j < HFI_HISTORY_SZ; j++) {
1988 drm_printf(p, " %d", a6xx_state->hfi_queue_history[i][j]);
1989 }
1990 drm_printf(p, "\n");
1991 }
1992 adreno_show_object(p, &gmu_hfi->data, gmu_hfi->size,
1993 &gmu_hfi->encoded);
1994 }
1995
1996 drm_puts(p, "gmu-debug:\n");
1997 if (a6xx_state->gmu_debug) {
1998 struct msm_gpu_state_bo *gmu_debug = a6xx_state->gmu_debug;
1999
2000 drm_printf(p, " iova: 0x%016llx\n", gmu_debug->iova);
2001 drm_printf(p, " size: %zu\n", gmu_debug->size);
2002 adreno_show_object(p, &gmu_debug->data, gmu_debug->size,
2003 &gmu_debug->encoded);
2004 }
2005
2006 drm_puts(p, "registers:\n");
2007 for (i = 0; i < a6xx_state->nr_registers; i++) {
2008 struct a6xx_gpu_state_obj *obj = &a6xx_state->registers[i];
2009
2010 if (!obj->handle)
2011 continue;
2012
2013 if (adreno_is_a7xx(adreno_gpu)) {
2014 a7xx_show_registers(obj->handle, obj->data, p);
2015 } else {
2016 const struct a6xx_registers *regs = obj->handle;
2017
2018 a6xx_show_registers(regs->registers, obj->data, regs->count, p);
2019 }
2020 }
2021
2022 drm_puts(p, "registers-gmu:\n");
2023 for (i = 0; i < a6xx_state->nr_gmu_registers; i++) {
2024 struct a6xx_gpu_state_obj *obj = &a6xx_state->gmu_registers[i];
2025 const struct a6xx_registers *regs = obj->handle;
2026
2027 if (!obj->handle)
2028 continue;
2029
2030 a6xx_show_registers(regs->registers, obj->data, regs->count, p);
2031 }
2032
2033 drm_puts(p, "indexed-registers:\n");
2034 for (i = 0; i < a6xx_state->nr_indexed_regs; i++)
2035 a6xx_show_indexed_regs(&a6xx_state->indexed_regs[i], p);
2036
2037 drm_puts(p, "shader-blocks:\n");
2038 for (i = 0; i < a6xx_state->nr_shaders; i++) {
2039 if (adreno_is_a7xx(adreno_gpu))
2040 a7xx_show_shader(&a6xx_state->shaders[i], p);
2041 else
2042 a6xx_show_shader(&a6xx_state->shaders[i], p);
2043 }
2044
2045 drm_puts(p, "clusters:\n");
2046 for (i = 0; i < a6xx_state->nr_clusters; i++) {
2047 if (adreno_is_a7xx(adreno_gpu))
2048 a7xx_show_cluster(&a6xx_state->clusters[i], p);
2049 else
2050 a6xx_show_cluster(&a6xx_state->clusters[i], p);
2051 }
2052
2053 for (i = 0; i < a6xx_state->nr_dbgahb_clusters; i++) {
2054 if (adreno_is_a7xx(adreno_gpu))
2055 a7xx_show_dbgahb_cluster(&a6xx_state->dbgahb_clusters[i], p);
2056 else
2057 a6xx_show_dbgahb_cluster(&a6xx_state->dbgahb_clusters[i], p);
2058 }
2059
2060 drm_puts(p, "debugbus:\n");
2061 a6xx_show_debugbus(a6xx_state, p);
2062 }
2063