1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */
3
4
5 #include "msm_gem.h"
6 #include "msm_mmu.h"
7 #include "msm_gpu_trace.h"
8 #include "a6xx_gpu.h"
9 #include "a6xx_gmu.xml.h"
10
11 #include <linux/bitfield.h>
12 #include <linux/devfreq.h>
13 #include <linux/firmware/qcom/qcom_scm.h>
14 #include <linux/pm_domain.h>
15 #include <linux/soc/qcom/llcc-qcom.h>
16
17 #define GPU_PAS_ID 13
18
a8xx_aperture_slice_set(struct msm_gpu * gpu,enum adreno_pipe pipe,u32 slice)19 static void a8xx_aperture_slice_set(struct msm_gpu *gpu, enum adreno_pipe pipe, u32 slice)
20 {
21 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
22 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
23 u32 val;
24
25 val = A8XX_CP_APERTURE_CNTL_HOST_PIPEID(pipe) | A8XX_CP_APERTURE_CNTL_HOST_SLICEID(slice);
26
27 if (a6xx_gpu->cached_aperture == val)
28 return;
29
30 gpu_write(gpu, REG_A8XX_CP_APERTURE_CNTL_HOST, val);
31
32 a6xx_gpu->cached_aperture = val;
33 }
34
a8xx_aperture_acquire(struct msm_gpu * gpu,enum adreno_pipe pipe,unsigned long * flags)35 static void a8xx_aperture_acquire(struct msm_gpu *gpu, enum adreno_pipe pipe, unsigned long *flags)
36 {
37 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
38 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
39
40 spin_lock_irqsave(&a6xx_gpu->aperture_lock, *flags);
41
42 a8xx_aperture_slice_set(gpu, pipe, 0);
43 }
44
a8xx_aperture_release(struct msm_gpu * gpu,unsigned long flags)45 static void a8xx_aperture_release(struct msm_gpu *gpu, unsigned long flags)
46 {
47 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
48 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
49
50 spin_unlock_irqrestore(&a6xx_gpu->aperture_lock, flags);
51 }
52
a8xx_aperture_clear(struct msm_gpu * gpu)53 static void a8xx_aperture_clear(struct msm_gpu *gpu)
54 {
55 unsigned long flags;
56
57 a8xx_aperture_acquire(gpu, PIPE_NONE, &flags);
58 a8xx_aperture_release(gpu, flags);
59 }
60
a8xx_write_pipe(struct msm_gpu * gpu,enum adreno_pipe pipe,u32 offset,u32 data)61 static void a8xx_write_pipe(struct msm_gpu *gpu, enum adreno_pipe pipe, u32 offset, u32 data)
62 {
63 unsigned long flags;
64
65 a8xx_aperture_acquire(gpu, pipe, &flags);
66 gpu_write(gpu, offset, data);
67 a8xx_aperture_release(gpu, flags);
68 }
69
a8xx_read_pipe_slice(struct msm_gpu * gpu,enum adreno_pipe pipe,u32 slice,u32 offset)70 static u32 a8xx_read_pipe_slice(struct msm_gpu *gpu, enum adreno_pipe pipe, u32 slice, u32 offset)
71 {
72 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
73 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
74 unsigned long flags;
75 u32 val;
76
77 spin_lock_irqsave(&a6xx_gpu->aperture_lock, flags);
78 a8xx_aperture_slice_set(gpu, pipe, slice);
79 val = gpu_read(gpu, offset);
80 spin_unlock_irqrestore(&a6xx_gpu->aperture_lock, flags);
81
82 return val;
83 }
84
a8xx_gpu_get_slice_info(struct msm_gpu * gpu)85 void a8xx_gpu_get_slice_info(struct msm_gpu *gpu)
86 {
87 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
88 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
89 const struct a6xx_info *info = adreno_gpu->info->a6xx;
90 struct device *dev = &gpu->pdev->dev;
91 u32 slice_mask;
92
93 if (adreno_gpu->info->family < ADRENO_8XX_GEN1)
94 return;
95
96 if (a6xx_gpu->slice_mask)
97 return;
98
99 slice_mask = GENMASK(info->max_slices - 1, 0);
100
101 /* GEN1 doesn't support partial slice configurations */
102 if (adreno_gpu->info->family == ADRENO_8XX_GEN1) {
103 a6xx_gpu->slice_mask = slice_mask;
104 return;
105 }
106
107 slice_mask &= a6xx_llc_read(a6xx_gpu,
108 REG_A8XX_CX_MISC_SLICE_ENABLE_FINAL);
109
110 a6xx_gpu->slice_mask = slice_mask;
111
112 /* Chip ID depends on the number of slices available. So update it */
113 adreno_gpu->chip_id |= FIELD_PREP(GENMASK(7, 4), hweight32(slice_mask));
114
115 /* Update the gpu-name to reflect the slice config: */
116 const char *name = devm_kasprintf(dev, GFP_KERNEL,
117 "%"ADRENO_CHIPID_FMT,
118 ADRENO_CHIPID_ARGS(adreno_gpu->chip_id));
119 if (name) {
120 devm_kfree(dev, adreno_gpu->base.name);
121 adreno_gpu->base.name = name;
122 }
123 }
124
a8xx_get_first_slice(struct a6xx_gpu * a6xx_gpu)125 static u32 a8xx_get_first_slice(struct a6xx_gpu *a6xx_gpu)
126 {
127 return ffs(a6xx_gpu->slice_mask) - 1;
128 }
129
_a8xx_check_idle(struct msm_gpu * gpu)130 static inline bool _a8xx_check_idle(struct msm_gpu *gpu)
131 {
132 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
133 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
134
135 /* Check that the GMU is idle */
136 if (!a6xx_gmu_isidle(&a6xx_gpu->gmu))
137 return false;
138
139 /* Check that the CX master is idle */
140 if (gpu_read(gpu, REG_A8XX_RBBM_STATUS) &
141 ~A8XX_RBBM_STATUS_CP_AHB_BUSY_CX_MASTER)
142 return false;
143
144 return !(gpu_read(gpu, REG_A8XX_RBBM_INT_0_STATUS) &
145 A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT);
146 }
147
a8xx_idle(struct msm_gpu * gpu,struct msm_ringbuffer * ring)148 static bool a8xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
149 {
150 /* wait for CP to drain ringbuffer: */
151 if (!adreno_idle(gpu, ring))
152 return false;
153
154 if (spin_until(_a8xx_check_idle(gpu))) {
155 DRM_ERROR(
156 "%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
157 gpu->name, __builtin_return_address(0),
158 gpu_read(gpu, REG_A8XX_RBBM_STATUS),
159 gpu_read(gpu, REG_A8XX_RBBM_INT_0_STATUS),
160 gpu_read(gpu, REG_A6XX_CP_RB_RPTR),
161 gpu_read(gpu, REG_A6XX_CP_RB_WPTR));
162 return false;
163 }
164
165 return true;
166 }
167
a8xx_flush(struct msm_gpu * gpu,struct msm_ringbuffer * ring)168 void a8xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
169 {
170 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
171 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
172 uint32_t wptr;
173 unsigned long flags;
174
175 spin_lock_irqsave(&ring->preempt_lock, flags);
176
177 /* Copy the shadow to the actual register */
178 ring->cur = ring->next;
179
180 /* Make sure to wrap wptr if we need to */
181 wptr = get_wptr(ring);
182
183 /* Update HW if this is the current ring and we are not in preempt*/
184 if (!a6xx_in_preempt(a6xx_gpu)) {
185 if (a6xx_gpu->cur_ring == ring)
186 a6xx_fenced_write(a6xx_gpu, REG_A6XX_CP_RB_WPTR, wptr, BIT(0), false);
187 else
188 ring->restore_wptr = true;
189 } else {
190 ring->restore_wptr = true;
191 }
192
193 spin_unlock_irqrestore(&ring->preempt_lock, flags);
194 }
195
a8xx_set_hwcg(struct msm_gpu * gpu,bool state)196 static void a8xx_set_hwcg(struct msm_gpu *gpu, bool state)
197 {
198 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
199 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
200 struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
201 u32 val;
202
203 if (adreno_is_x285(adreno_gpu) && state)
204 gpu_write(gpu, REG_A8XX_RBBM_CGC_0_PC, 0x00000702);
205
206 gmu_write(gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_MODE_CNTL,
207 state ? adreno_gpu->info->a6xx->gmu_cgc_mode : 0);
208 gmu_write(gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_DELAY_CNTL,
209 state ? 0x110111 : 0);
210 gmu_write(gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_HYST_CNTL,
211 state ? 0x55555 : 0);
212
213 gpu_write(gpu, REG_A8XX_RBBM_CLOCK_CNTL_GLOBAL, 1);
214 gpu_write(gpu, REG_A8XX_RBBM_CGC_GLOBAL_LOAD_CMD, !!state);
215
216 if (state) {
217 gpu_write(gpu, REG_A8XX_RBBM_CGC_P2S_TRIG_CMD, 1);
218
219 if (gpu_poll_timeout(gpu, REG_A8XX_RBBM_CGC_P2S_STATUS, val,
220 val & A8XX_RBBM_CGC_P2S_STATUS_TXDONE, 1, 10)) {
221 dev_err(&gpu->pdev->dev, "RBBM_CGC_P2S_STATUS TXDONE Poll failed\n");
222 return;
223 }
224
225 gpu_write(gpu, REG_A8XX_RBBM_CLOCK_CNTL_GLOBAL, 0);
226 } else {
227 /*
228 * GMU enables clk gating in GBIF during boot up. So,
229 * override that here when hwcg feature is disabled
230 */
231 gpu_rmw(gpu, REG_A8XX_GBIF_CX_CONFIG, BIT(0), 0);
232 }
233 }
234
a8xx_set_cp_protect(struct msm_gpu * gpu)235 static void a8xx_set_cp_protect(struct msm_gpu *gpu)
236 {
237 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
238 const struct adreno_protect *protect = adreno_gpu->info->a6xx->protect;
239 u32 cntl, final_cfg;
240 unsigned int i;
241
242 cntl = A8XX_CP_PROTECT_CNTL_PIPE_ACCESS_PROT_EN |
243 A8XX_CP_PROTECT_CNTL_PIPE_ACCESS_FAULT_ON_VIOL_EN |
244 A8XX_CP_PROTECT_CNTL_PIPE_LAST_SPAN_INF_RANGE |
245 A8XX_CP_PROTECT_CNTL_PIPE_HALT_SQE_RANGE__MASK;
246 /*
247 * Enable access protection to privileged registers, fault on an access
248 * protect violation and select the last span to protect from the start
249 * address all the way to the end of the register address space
250 */
251 a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_CP_PROTECT_CNTL_PIPE, cntl);
252 a8xx_write_pipe(gpu, PIPE_BV, REG_A8XX_CP_PROTECT_CNTL_PIPE, cntl);
253
254 a8xx_aperture_clear(gpu);
255
256 for (i = 0; i < protect->count; i++) {
257 /* Intentionally skip writing to some registers */
258 if (protect->regs[i]) {
259 gpu_write(gpu, REG_A8XX_CP_PROTECT_GLOBAL(i), protect->regs[i]);
260 final_cfg = protect->regs[i];
261 }
262 }
263
264 /*
265 * Last span feature is only supported on PIPE specific register.
266 * So update those here
267 */
268 a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_CP_PROTECT_PIPE(protect->count_max), final_cfg);
269 a8xx_write_pipe(gpu, PIPE_BV, REG_A8XX_CP_PROTECT_PIPE(protect->count_max), final_cfg);
270
271 a8xx_aperture_clear(gpu);
272 }
273
a8xx_set_ubwc_config(struct msm_gpu * gpu)274 static void a8xx_set_ubwc_config(struct msm_gpu *gpu)
275 {
276 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
277 const struct qcom_ubwc_cfg_data *cfg = adreno_gpu->ubwc_config;
278 u32 level2_swizzling_dis = !(cfg->ubwc_swizzle & UBWC_SWIZZLE_ENABLE_LVL2);
279 u32 level3_swizzling_dis = !(cfg->ubwc_swizzle & UBWC_SWIZZLE_ENABLE_LVL3);
280 bool rgba8888_lossless = false, fp16compoptdis = false;
281 bool yuvnotcomptofc = false, min_acc_len_64b = false;
282 bool rgb565_predicator = false, amsbc = false;
283 bool ubwc_mode = qcom_ubwc_get_ubwc_mode(cfg);
284 u32 ubwc_version = cfg->ubwc_enc_version;
285 u32 hbb, hbb_hi, hbb_lo, mode = 1;
286 u8 uavflagprd_inv = 2;
287
288 switch (ubwc_version) {
289 case UBWC_6_0:
290 yuvnotcomptofc = true;
291 mode = 5;
292 break;
293 case UBWC_5_0:
294 amsbc = true;
295 rgb565_predicator = true;
296 mode = 4;
297 break;
298 case UBWC_4_0:
299 amsbc = true;
300 rgb565_predicator = true;
301 fp16compoptdis = true;
302 rgba8888_lossless = true;
303 mode = 2;
304 break;
305 case UBWC_3_0:
306 amsbc = true;
307 mode = 1;
308 break;
309 default:
310 dev_err(&gpu->pdev->dev, "Unknown UBWC version: 0x%x\n", ubwc_version);
311 break;
312 }
313
314 /*
315 * We subtract 13 from the highest bank bit (13 is the minimum value
316 * allowed by hw) and write the lowest two bits of the remaining value
317 * as hbb_lo and the one above it as hbb_hi to the hardware.
318 */
319 WARN_ON(cfg->highest_bank_bit < 13);
320 hbb = cfg->highest_bank_bit - 13;
321 hbb_hi = hbb >> 2;
322 hbb_lo = hbb & 3;
323
324 a8xx_write_pipe(gpu, PIPE_BV, REG_A8XX_GRAS_NC_MODE_CNTL,
325 hbb << 5 |
326 level3_swizzling_dis << 4 |
327 level2_swizzling_dis << 3);
328
329 a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_GRAS_NC_MODE_CNTL,
330 hbb << 5 |
331 level3_swizzling_dis << 4 |
332 level2_swizzling_dis << 3);
333
334 a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_RB_CCU_NC_MODE_CNTL,
335 yuvnotcomptofc << 6 |
336 level3_swizzling_dis << 5 |
337 level2_swizzling_dis << 4 |
338 hbb_hi << 3 |
339 hbb_lo << 1);
340
341 a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_RB_CMP_NC_MODE_CNTL,
342 mode << 15 |
343 yuvnotcomptofc << 6 |
344 rgba8888_lossless << 4 |
345 fp16compoptdis << 3 |
346 rgb565_predicator << 2 |
347 amsbc << 1 |
348 min_acc_len_64b);
349
350 a8xx_aperture_clear(gpu);
351
352 gpu_write(gpu, REG_A6XX_SP_NC_MODE_CNTL,
353 level3_swizzling_dis << 13 |
354 level2_swizzling_dis << 12 |
355 hbb_hi << 10 |
356 uavflagprd_inv << 4 |
357 min_acc_len_64b << 3 |
358 hbb_lo << 1 | ubwc_mode);
359
360 gpu_write(gpu, REG_A6XX_TPL1_NC_MODE_CNTL,
361 level3_swizzling_dis << 7 |
362 level2_swizzling_dis << 6 |
363 hbb_hi << 4 |
364 min_acc_len_64b << 3 |
365 hbb_lo << 1 | ubwc_mode);
366 }
367
a8xx_nonctxt_config(struct msm_gpu * gpu,u32 * gmem_protect)368 static void a8xx_nonctxt_config(struct msm_gpu *gpu, u32 *gmem_protect)
369 {
370 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
371 const struct a6xx_info *info = adreno_gpu->info->a6xx;
372 const struct adreno_reglist_pipe *regs = info->nonctxt_reglist;
373 unsigned int pipe_id, i;
374 unsigned long flags;
375
376 for (pipe_id = PIPE_NONE; pipe_id <= PIPE_DDE_BV; pipe_id++) {
377 /* We don't have support for LPAC yet */
378 if (pipe_id == PIPE_LPAC)
379 continue;
380
381 a8xx_aperture_acquire(gpu, pipe_id, &flags);
382
383 for (i = 0; regs[i].offset; i++) {
384 if (!(BIT(pipe_id) & regs[i].pipe))
385 continue;
386
387 if (regs[i].offset == REG_A8XX_RB_GC_GMEM_PROTECT)
388 *gmem_protect = regs[i].value;
389
390 gpu_write(gpu, regs[i].offset, regs[i].value);
391 }
392
393 a8xx_aperture_release(gpu, flags);
394 }
395
396 a8xx_aperture_clear(gpu);
397 }
398
a8xx_patch_pwrup_reglist(struct msm_gpu * gpu)399 static void a8xx_patch_pwrup_reglist(struct msm_gpu *gpu)
400 {
401 const struct adreno_reglist_pipe_list *dyn_pwrup_reglist;
402 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
403 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
404 const struct adreno_reglist_list *reglist;
405 void *ptr = a6xx_gpu->pwrup_reglist_ptr;
406 struct cpu_gpu_lock *lock = ptr;
407 u32 *dest = (u32 *)&lock->regs[0];
408 u32 dyn_pwrup_reglist_count = 0;
409 int i;
410
411 lock->gpu_req = lock->cpu_req = lock->turn = 0;
412
413 reglist = adreno_gpu->info->a6xx->ifpc_reglist;
414 if (reglist) {
415 lock->ifpc_list_len = reglist->count;
416
417 /*
418 * For each entry in each of the lists, write the offset and the current
419 * register value into the GPU buffer
420 */
421 for (i = 0; i < reglist->count; i++) {
422 *dest++ = reglist->regs[i];
423 *dest++ = gpu_read(gpu, reglist->regs[i]);
424 }
425 }
426
427 reglist = adreno_gpu->info->a6xx->pwrup_reglist;
428 if (reglist) {
429 lock->preemption_list_len = reglist->count;
430
431 for (i = 0; i < reglist->count; i++) {
432 *dest++ = reglist->regs[i];
433 *dest++ = gpu_read(gpu, reglist->regs[i]);
434 }
435 }
436
437 /*
438 * The overall register list is composed of
439 * 1. Static IFPC-only registers
440 * 2. Static IFPC + preemption registers
441 * 3. Dynamic IFPC + preemption registers (ex: perfcounter selects)
442 *
443 * The first two lists are static. Size of these lists are stored as
444 * number of pairs in ifpc_list_len and preemption_list_len
445 * respectively. With concurrent binning, Some of the perfcounter
446 * registers being virtualized, CP needs to know the pipe id to program
447 * the aperture inorder to restore the same. Thus, third list is a
448 * dynamic list with triplets as
449 * (<aperture, shifted 12 bits> <address> <data>), and the length is
450 * stored as number for triplets in dynamic_list_len.
451 */
452 dyn_pwrup_reglist = adreno_gpu->info->a6xx->dyn_pwrup_reglist;
453 if (!dyn_pwrup_reglist)
454 goto done;
455
456 for (u32 pipe_id = PIPE_BR; pipe_id <= PIPE_DDE_BV; pipe_id++) {
457 for (i = 0; i < dyn_pwrup_reglist->count; i++) {
458 if (!(dyn_pwrup_reglist->regs[i].pipe & BIT(pipe_id)))
459 continue;
460 *dest++ = A8XX_CP_APERTURE_CNTL_HOST_PIPEID(pipe_id);
461 *dest++ = dyn_pwrup_reglist->regs[i].offset;
462 *dest++ = a8xx_read_pipe_slice(gpu,
463 pipe_id,
464 a8xx_get_first_slice(a6xx_gpu),
465 dyn_pwrup_reglist->regs[i].offset);
466 dyn_pwrup_reglist_count++;
467 }
468 }
469
470 lock->dynamic_list_len = dyn_pwrup_reglist_count;
471
472 done:
473 a8xx_aperture_clear(gpu);
474 }
475
a8xx_preempt_start(struct msm_gpu * gpu)476 static int a8xx_preempt_start(struct msm_gpu *gpu)
477 {
478 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
479 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
480 struct msm_ringbuffer *ring = gpu->rb[0];
481
482 if (gpu->nr_rings <= 1)
483 return 0;
484
485 /* Turn CP protection off */
486 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
487 OUT_RING(ring, 0);
488
489 a6xx_emit_set_pseudo_reg(ring, a6xx_gpu, NULL);
490
491 /* Yield the floor on command completion */
492 OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
493 OUT_RING(ring, 0x00);
494 OUT_RING(ring, 0x00);
495 OUT_RING(ring, 0x00);
496 /* Generate interrupt on preemption completion */
497 OUT_RING(ring, 0x00);
498
499 a6xx_flush(gpu, ring);
500
501 return a8xx_idle(gpu, ring) ? 0 : -EINVAL;
502 }
503
a8xx_cp_init(struct msm_gpu * gpu)504 static int a8xx_cp_init(struct msm_gpu *gpu)
505 {
506 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
507 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
508 struct msm_ringbuffer *ring = gpu->rb[0];
509 u32 mask;
510
511 /* Disable concurrent binning before sending CP init */
512 OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
513 OUT_RING(ring, BIT(27));
514
515 OUT_PKT7(ring, CP_ME_INIT, 7);
516
517 /* Use multiple HW contexts */
518 mask = BIT(0);
519
520 /* Enable error detection */
521 mask |= BIT(1);
522
523 /* Set default reset state */
524 mask |= BIT(3);
525
526 /* Disable save/restore of performance counters across preemption */
527 mask |= BIT(6);
528
529 /* Enable the register init list with the spinlock */
530 mask |= BIT(8);
531
532 OUT_RING(ring, mask);
533
534 /* Enable multiple hardware contexts */
535 OUT_RING(ring, 0x00000003);
536
537 /* Enable error detection */
538 OUT_RING(ring, 0x20000000);
539
540 /* Operation mode mask */
541 OUT_RING(ring, 0x00000002);
542
543 /* Lo address */
544 OUT_RING(ring, lower_32_bits(a6xx_gpu->pwrup_reglist_iova));
545 /* Hi address */
546 OUT_RING(ring, upper_32_bits(a6xx_gpu->pwrup_reglist_iova));
547
548 /* Enable dyn pwrup list with triplets (offset, value, pipe) */
549 OUT_RING(ring, BIT(31));
550
551 a6xx_flush(gpu, ring);
552 return a8xx_idle(gpu, ring) ? 0 : -EINVAL;
553 }
554
555 #define A8XX_INT_MASK \
556 (A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR | \
557 A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW | \
558 A6XX_RBBM_INT_0_MASK_RBBM_GPC_ERROR | \
559 A6XX_RBBM_INT_0_MASK_CP_SW | \
560 A6XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
561 A6XX_RBBM_INT_0_MASK_PM4CPINTERRUPT | \
562 A6XX_RBBM_INT_0_MASK_CP_RB_DONE_TS | \
563 A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
564 A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW | \
565 A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT | \
566 A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
567 A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR | \
568 A6XX_RBBM_INT_0_MASK_TSBWRITEERROR | \
569 A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION)
570
571 #define A8XX_APRIV_MASK \
572 (A8XX_CP_APRIV_CNTL_PIPE_ICACHE | \
573 A8XX_CP_APRIV_CNTL_PIPE_RBFETCH | \
574 A8XX_CP_APRIV_CNTL_PIPE_RBPRIVLEVEL | \
575 A8XX_CP_APRIV_CNTL_PIPE_RBRPWB)
576
577 #define A8XX_BR_APRIV_MASK \
578 (A8XX_APRIV_MASK | \
579 A8XX_CP_APRIV_CNTL_PIPE_CDREAD | \
580 A8XX_CP_APRIV_CNTL_PIPE_CDWRITE)
581
582 #define A8XX_CP_GLOBAL_INT_MASK \
583 (A8XX_CP_GLOBAL_INT_MASK_HWFAULTBR | \
584 A8XX_CP_GLOBAL_INT_MASK_HWFAULTBV | \
585 A8XX_CP_GLOBAL_INT_MASK_HWFAULTLPAC | \
586 A8XX_CP_GLOBAL_INT_MASK_HWFAULTAQE0 | \
587 A8XX_CP_GLOBAL_INT_MASK_HWFAULTAQE1 | \
588 A8XX_CP_GLOBAL_INT_MASK_HWFAULTDDEBR | \
589 A8XX_CP_GLOBAL_INT_MASK_HWFAULTDDEBV | \
590 A8XX_CP_GLOBAL_INT_MASK_SWFAULTBR | \
591 A8XX_CP_GLOBAL_INT_MASK_SWFAULTBV | \
592 A8XX_CP_GLOBAL_INT_MASK_SWFAULTLPAC | \
593 A8XX_CP_GLOBAL_INT_MASK_SWFAULTAQE0 | \
594 A8XX_CP_GLOBAL_INT_MASK_SWFAULTAQE1 | \
595 A8XX_CP_GLOBAL_INT_MASK_SWFAULTDDEBR | \
596 A8XX_CP_GLOBAL_INT_MASK_SWFAULTDDEBV)
597
598 #define A8XX_CP_INTERRUPT_STATUS_MASK_PIPE \
599 (A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFRBWRAP | \
600 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFIB1WRAP | \
601 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFIB2WRAP | \
602 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFIB3WRAP | \
603 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFSDSWRAP | \
604 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFMRBWRAP | \
605 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFVSDWRAP | \
606 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_OPCODEERROR | \
607 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_VSDPARITYERROR | \
608 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_REGISTERPROTECTIONERROR | \
609 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_ILLEGALINSTRUCTION | \
610 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_SMMUFAULT | \
611 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_VBIFRESPCLIENT| \
612 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_VBIFRESPTYPE | \
613 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_VBIFRESPREAD | \
614 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_VBIFRESP | \
615 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_RTWROVF | \
616 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_LRZRTWROVF | \
617 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_LRZRTREFCNTOVF | \
618 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_LRZRTCLRRESMISS)
619
620 #define A8XX_CP_HW_FAULT_STATUS_MASK_PIPE \
621 (A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFRBFAULT | \
622 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFIB1FAULT | \
623 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFIB2FAULT | \
624 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFIB3FAULT | \
625 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFSDSFAULT | \
626 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFMRBFAULT | \
627 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFVSDFAULT | \
628 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_SQEREADBURSTOVF | \
629 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_EVENTENGINEOVF | \
630 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_UCODEERROR)
631
hw_init(struct msm_gpu * gpu)632 static int hw_init(struct msm_gpu *gpu)
633 {
634 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
635 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
636 struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
637 unsigned int pipe_id, i;
638 u32 gmem_protect = 0;
639 u64 gmem_range_min;
640 int ret;
641
642 ret = a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET);
643 if (ret)
644 return ret;
645
646 /* Clear the cached value to force aperture configuration next time */
647 a6xx_gpu->cached_aperture = UINT_MAX;
648 a8xx_aperture_clear(gpu);
649
650 /* Clear GBIF halt in case GX domain was not collapsed */
651 gpu_write(gpu, REG_A6XX_GBIF_HALT, 0);
652 gpu_read(gpu, REG_A6XX_GBIF_HALT);
653
654 gpu_write(gpu, REG_A8XX_RBBM_GBIF_HALT, 0);
655 gpu_read(gpu, REG_A8XX_RBBM_GBIF_HALT);
656
657 gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_CNTL, 0);
658
659 /*
660 * Disable the trusted memory range - we don't actually supported secure
661 * memory rendering at this point in time and we don't want to block off
662 * part of the virtual memory space.
663 */
664 gpu_write64(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE, 0x00000000);
665 gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
666
667 /* Make all blocks contribute to the GPU BUSY perf counter */
668 gpu_write(gpu, REG_A8XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xffffffff);
669
670 /* Setup GMEM Range in UCHE */
671 gmem_range_min = SZ_64M;
672 /* Set the GMEM VA range [0x100000:0x100000 + gpu->gmem - 1] */
673 gpu_write64(gpu, REG_A8XX_UCHE_CCHE_GC_GMEM_RANGE_MIN, gmem_range_min);
674 gpu_write64(gpu, REG_A8XX_SP_HLSQ_GC_GMEM_RANGE_MIN, gmem_range_min);
675
676 /* Setup UCHE Trap region */
677 gpu_write64(gpu, REG_A8XX_UCHE_TRAP_BASE, adreno_gpu->uche_trap_base);
678 gpu_write64(gpu, REG_A8XX_UCHE_WRITE_THRU_BASE, adreno_gpu->uche_trap_base);
679 gpu_write64(gpu, REG_A8XX_UCHE_CCHE_TRAP_BASE, adreno_gpu->uche_trap_base);
680 gpu_write64(gpu, REG_A8XX_UCHE_CCHE_WRITE_THRU_BASE, adreno_gpu->uche_trap_base);
681
682 /* Turn on performance counters */
683 gpu_write(gpu, REG_A8XX_RBBM_PERFCTR_CNTL, 0x1);
684 gpu_write(gpu, REG_A8XX_RBBM_SLICE_PERFCTR_CNTL, 0x1);
685
686 /* Turn on the IFPC counter (countable 4 on XOCLK1) */
687 gmu_write(&a6xx_gpu->gmu, REG_A8XX_GMU_CX_GMU_POWER_COUNTER_SELECT_XOCLK_1,
688 FIELD_PREP(GENMASK(7, 0), 0x4));
689
690 /* Select CP0 to always count cycles */
691 gpu_write(gpu, REG_A8XX_CP_PERFCTR_CP_SEL(0), 1);
692
693 a8xx_set_ubwc_config(gpu);
694
695 /* Set weights for bicubic filtering */
696 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(0), 0);
697 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(1), 0x3fe05ff4);
698 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(2), 0x3fa0ebee);
699 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(3), 0x3f5193ed);
700 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(4), 0x3f0243f0);
701 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(5), 0x00000000);
702 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(6), 0x3fd093e8);
703 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(7), 0x3f4133dc);
704 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(8), 0x3ea1dfdb);
705 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(9), 0x3e0283e0);
706 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(10), 0x0000ac2b);
707 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(11), 0x0000f01d);
708 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(12), 0x00114412);
709 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(13), 0x0021980a);
710 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(14), 0x0051ec05);
711 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(15), 0x0000380e);
712 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(16), 0x3ff09001);
713 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(17), 0x3fc10bfa);
714 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(18), 0x3f9193f7);
715 gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(19), 0x3f7227f7);
716
717 gpu_write(gpu, REG_A8XX_UCHE_CLIENT_PF, BIT(7) | 0x1);
718
719 a8xx_nonctxt_config(gpu, &gmem_protect);
720
721 /* Enable fault detection */
722 gpu_write(gpu, REG_A8XX_RBBM_INTERFACE_HANG_INT_CNTL, BIT(30) | 0xcfffff);
723 gpu_write(gpu, REG_A8XX_RBBM_SLICE_INTERFACE_HANG_INT_CNTL, BIT(30));
724
725 /* Set up the CX GMU counter 0 to count busy ticks */
726 gmu_write(gmu, REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_MASK, 0xff000000);
727
728 /* Enable the power counter */
729 gmu_rmw(gmu, REG_A8XX_GMU_CX_GMU_POWER_COUNTER_SELECT_XOCLK_0, 0xff, BIT(5));
730 gmu_write(gmu, REG_A8XX_GMU_CX_GMU_POWER_COUNTER_ENABLE, 1);
731
732 /* Protect registers from the CP */
733 a8xx_set_cp_protect(gpu);
734
735 /* Enable the GMEM save/restore feature for preemption */
736 a8xx_write_pipe(gpu, PIPE_BR, REG_A6XX_RB_CONTEXT_SWITCH_GMEM_SAVE_RESTORE_ENABLE, 1);
737
738 for (pipe_id = PIPE_BR; pipe_id <= PIPE_DDE_BV; pipe_id++) {
739 u32 apriv_mask = A8XX_APRIV_MASK;
740 unsigned long flags;
741
742 if (pipe_id == PIPE_LPAC)
743 continue;
744
745 if (pipe_id == PIPE_BR)
746 apriv_mask = A8XX_BR_APRIV_MASK;
747
748 a8xx_aperture_acquire(gpu, pipe_id, &flags);
749 gpu_write(gpu, REG_A8XX_CP_APRIV_CNTL_PIPE, apriv_mask);
750 gpu_write(gpu, REG_A8XX_CP_INTERRUPT_STATUS_MASK_PIPE,
751 A8XX_CP_INTERRUPT_STATUS_MASK_PIPE);
752 gpu_write(gpu, REG_A8XX_CP_HW_FAULT_STATUS_MASK_PIPE,
753 A8XX_CP_HW_FAULT_STATUS_MASK_PIPE);
754 a8xx_aperture_release(gpu, flags);
755 }
756
757 a8xx_aperture_clear(gpu);
758
759 /* Enable interrupts */
760 gpu_write(gpu, REG_A8XX_CP_INTERRUPT_STATUS_MASK_GLOBAL, A8XX_CP_GLOBAL_INT_MASK);
761 gpu_write(gpu, REG_A8XX_RBBM_INT_0_MASK, A8XX_INT_MASK);
762
763 ret = adreno_hw_init(gpu);
764 if (ret)
765 goto out;
766
767 gpu_write64(gpu, REG_A8XX_CP_SQE_INSTR_BASE, a6xx_gpu->sqe_iova);
768 if (a6xx_gpu->aqe_iova)
769 gpu_write64(gpu, REG_A8XX_CP_AQE_INSTR_BASE_0, a6xx_gpu->aqe_iova);
770
771 /* Set the ringbuffer address */
772 gpu_write64(gpu, REG_A6XX_CP_RB_BASE, gpu->rb[0]->iova);
773 gpu_write(gpu, REG_A6XX_CP_RB_CNTL, MSM_GPU_RB_CNTL_DEFAULT);
774
775 /* Configure the RPTR shadow if needed: */
776 gpu_write64(gpu, REG_A6XX_CP_RB_RPTR_ADDR, shadowptr(a6xx_gpu, gpu->rb[0]));
777 gpu_write64(gpu, REG_A8XX_CP_RB_RPTR_ADDR_BV, rbmemptr(gpu->rb[0], bv_rptr));
778
779 a8xx_preempt_hw_init(gpu);
780
781 for (i = 0; i < gpu->nr_rings; i++)
782 a6xx_gpu->shadow[i] = 0;
783
784 /* Always come up on rb 0 */
785 a6xx_gpu->cur_ring = gpu->rb[0];
786
787 for (i = 0; i < gpu->nr_rings; i++)
788 gpu->rb[i]->cur_ctx_seqno = 0;
789
790 /* Enable the SQE_to start the CP engine */
791 gpu_write(gpu, REG_A8XX_CP_SQE_CNTL, 1);
792
793 ret = a8xx_cp_init(gpu);
794 if (ret)
795 goto out;
796
797 /*
798 * Try to load a zap shader into the secure world. If successful
799 * we can use the CP to switch out of secure mode. If not then we
800 * have no resource but to try to switch ourselves out manually. If we
801 * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
802 * be blocked and a permissions violation will soon follow.
803 */
804 ret = a6xx_zap_shader_init(gpu);
805 if (!ret) {
806 OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
807 OUT_RING(gpu->rb[0], 0x00000000);
808
809 a6xx_flush(gpu, gpu->rb[0]);
810 if (!a8xx_idle(gpu, gpu->rb[0]))
811 return -EINVAL;
812 } else if (ret == -ENODEV) {
813 /*
814 * This device does not use zap shader (but print a warning
815 * just in case someone got their dt wrong.. hopefully they
816 * have a debug UART to realize the error of their ways...
817 * if you mess this up you are about to crash horribly)
818 */
819 dev_warn_once(gpu->dev->dev,
820 "Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
821 gpu_write(gpu, REG_A6XX_RBBM_SECVID_TRUST_CNTL, 0x0);
822 ret = 0;
823 } else {
824 return ret;
825 }
826
827 /*
828 * GMEM_PROTECT register should be programmed after GPU is transitioned to
829 * non-secure mode
830 */
831 a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_RB_GC_GMEM_PROTECT, gmem_protect);
832 WARN_ON(!gmem_protect);
833 a8xx_aperture_clear(gpu);
834
835 if (!a6xx_gpu->pwrup_reglist_emitted) {
836 a8xx_patch_pwrup_reglist(gpu);
837 a6xx_gpu->pwrup_reglist_emitted = true;
838 }
839
840 /* Enable hardware clockgating */
841 a8xx_set_hwcg(gpu, true);
842 out:
843 /* Last step - yield the ringbuffer */
844 a8xx_preempt_start(gpu);
845
846 /*
847 * Tell the GMU that we are done touching the GPU and it can start power
848 * management
849 */
850 a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET);
851
852 if (!ret && (refcount_read(&gpu->sysprof_active) > 1)) {
853 ret = a6xx_gmu_set_oob(gmu, GMU_OOB_PERFCOUNTER_SET);
854 if (!ret)
855 set_bit(GMU_STATUS_OOB_PERF_SET, &gmu->status);
856 }
857
858 return ret;
859 }
860
a8xx_hw_init(struct msm_gpu * gpu)861 int a8xx_hw_init(struct msm_gpu *gpu)
862 {
863 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
864 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
865 int ret;
866
867 mutex_lock(&a6xx_gpu->gmu.lock);
868 ret = hw_init(gpu);
869 mutex_unlock(&a6xx_gpu->gmu.lock);
870
871 return ret;
872 }
873
a8xx_dump(struct msm_gpu * gpu)874 static void a8xx_dump(struct msm_gpu *gpu)
875 {
876 DRM_DEV_INFO(&gpu->pdev->dev, "status: %08x\n", gpu_read(gpu, REG_A8XX_RBBM_STATUS));
877 adreno_dump(gpu);
878 }
879
a8xx_recover(struct msm_gpu * gpu)880 void a8xx_recover(struct msm_gpu *gpu)
881 {
882 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
883 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
884 struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
885 int active_submits;
886
887 adreno_dump_info(gpu);
888
889 if (hang_debug)
890 a8xx_dump(gpu);
891
892 /*
893 * To handle recovery specific sequences during the rpm suspend we are
894 * about to trigger
895 */
896 a6xx_gpu->hung = true;
897
898 /* Halt SQE first */
899 gpu_write(gpu, REG_A8XX_CP_SQE_CNTL, 3);
900
901 pm_runtime_dont_use_autosuspend(&gpu->pdev->dev);
902
903 /* active_submit won't change until we make a submission */
904 mutex_lock(&gpu->active_lock);
905 active_submits = gpu->active_submits;
906
907 /*
908 * Temporarily clear active_submits count to silence a WARN() in the
909 * runtime suspend cb
910 */
911 gpu->active_submits = 0;
912
913 reinit_completion(&gmu->pd_gate);
914 dev_pm_genpd_add_notifier(gmu->cxpd, &gmu->pd_nb);
915 dev_pm_genpd_synced_poweroff(gmu->cxpd);
916
917 /* Drop the rpm refcount from active submits */
918 if (active_submits)
919 pm_runtime_put(&gpu->pdev->dev);
920
921 /* And the final one from recover worker */
922 pm_runtime_put_sync(&gpu->pdev->dev);
923
924 if (!wait_for_completion_timeout(&gmu->pd_gate, msecs_to_jiffies(1000)))
925 DRM_DEV_ERROR(&gpu->pdev->dev, "cx gdsc didn't collapse\n");
926
927 dev_pm_genpd_remove_notifier(gmu->cxpd);
928
929 pm_runtime_use_autosuspend(&gpu->pdev->dev);
930
931 if (active_submits)
932 pm_runtime_get(&gpu->pdev->dev);
933
934 pm_runtime_get_sync(&gpu->pdev->dev);
935
936 gpu->active_submits = active_submits;
937 mutex_unlock(&gpu->active_lock);
938
939 msm_gpu_hw_init(gpu);
940 a6xx_gpu->hung = false;
941 }
942
a8xx_uche_fault_block(struct msm_gpu * gpu,u32 mid)943 static const char *a8xx_uche_fault_block(struct msm_gpu *gpu, u32 mid)
944 {
945 static const char * const uche_clients[] = {
946 "BR_VFD", "BR_SP", "BR_VSC", "BR_VPC", "BR_HLSQ", "BR_PC", "BR_LRZ", "BR_TP",
947 "BV_VFD", "BV_SP", "BV_VSC", "BV_VPC", "BV_HLSQ", "BV_PC", "BV_LRZ", "BV_TP",
948 "STCHE",
949 };
950 static const char * const uche_clients_lpac[] = {
951 "-", "SP_LPAC", "-", "-", "HLSQ_LPAC", "-", "-", "TP_LPAC",
952 };
953 u32 val;
954
955 /*
956 * The source of the data depends on the mid ID read from FSYNR1.
957 * and the client ID read from the UCHE block
958 */
959 val = gpu_read(gpu, REG_A8XX_UCHE_CLIENT_PF);
960
961 val &= GENMASK(6, 0);
962
963 /* mid=3 refers to BR or BV */
964 if (mid == 3) {
965 if (val < ARRAY_SIZE(uche_clients))
966 return uche_clients[val];
967 else
968 return "UCHE";
969 }
970
971 /* mid=8 refers to LPAC */
972 if (mid == 8) {
973 if (val < ARRAY_SIZE(uche_clients_lpac))
974 return uche_clients_lpac[val];
975 else
976 return "UCHE_LPAC";
977 }
978
979 return "Unknown";
980 }
981
a8xx_fault_block(struct msm_gpu * gpu,u32 id)982 static const char *a8xx_fault_block(struct msm_gpu *gpu, u32 id)
983 {
984 switch (id) {
985 case 0x0:
986 return "CP";
987 case 0x1:
988 return "UCHE: Unknown";
989 case 0x2:
990 return "UCHE_LPAC: Unknown";
991 case 0x3:
992 case 0x8:
993 return a8xx_uche_fault_block(gpu, id);
994 case 0x4:
995 return "CCU";
996 case 0x5:
997 return "Flag cache";
998 case 0x6:
999 return "PREFETCH";
1000 case 0x7:
1001 return "GMU";
1002 case 0x9:
1003 return "UCHE_HPAC";
1004 }
1005
1006 return "Unknown";
1007 }
1008
a8xx_fault_handler(void * arg,unsigned long iova,int flags,void * data)1009 int a8xx_fault_handler(void *arg, unsigned long iova, int flags, void *data)
1010 {
1011 struct msm_gpu *gpu = arg;
1012 struct adreno_smmu_fault_info *info = data;
1013 const char *block = "unknown";
1014
1015 u32 scratch[] = {
1016 gpu_read(gpu, REG_A8XX_CP_SCRATCH_GLOBAL(0)),
1017 gpu_read(gpu, REG_A8XX_CP_SCRATCH_GLOBAL(1)),
1018 gpu_read(gpu, REG_A8XX_CP_SCRATCH_GLOBAL(2)),
1019 gpu_read(gpu, REG_A8XX_CP_SCRATCH_GLOBAL(3)),
1020 };
1021
1022 if (info)
1023 block = a8xx_fault_block(gpu, info->fsynr1 & 0xff);
1024
1025 return adreno_fault_handler(gpu, iova, flags, info, block, scratch);
1026 }
1027
a8xx_cp_hw_err_irq(struct msm_gpu * gpu)1028 static void a8xx_cp_hw_err_irq(struct msm_gpu *gpu)
1029 {
1030 u32 status = gpu_read(gpu, REG_A8XX_CP_INTERRUPT_STATUS_GLOBAL);
1031 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1032 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1033 u32 slice = a8xx_get_first_slice(a6xx_gpu);
1034 u32 hw_fault_mask = GENMASK(6, 0);
1035 u32 sw_fault_mask = GENMASK(22, 16);
1036 u32 pipe = 0;
1037
1038 dev_err_ratelimited(&gpu->pdev->dev, "CP Fault Global INT status: 0x%x\n", status);
1039
1040 if (status & (A8XX_CP_GLOBAL_INT_MASK_HWFAULTBR |
1041 A8XX_CP_GLOBAL_INT_MASK_SWFAULTBR))
1042 pipe |= BIT(PIPE_BR);
1043
1044 if (status & (A8XX_CP_GLOBAL_INT_MASK_HWFAULTBV |
1045 A8XX_CP_GLOBAL_INT_MASK_SWFAULTBV))
1046 pipe |= BIT(PIPE_BV);
1047
1048 if (!pipe) {
1049 dev_err_ratelimited(&gpu->pdev->dev, "CP Fault Unknown pipe\n");
1050 goto out;
1051 }
1052
1053 for (unsigned int pipe_id = PIPE_NONE; pipe_id <= PIPE_DDE_BV; pipe_id++) {
1054 if (!(BIT(pipe_id) & pipe))
1055 continue;
1056
1057 if (hw_fault_mask & status) {
1058 status = a8xx_read_pipe_slice(gpu, pipe_id, slice,
1059 REG_A8XX_CP_HW_FAULT_STATUS_PIPE);
1060 dev_err_ratelimited(&gpu->pdev->dev,
1061 "CP HW FAULT pipe: %u status: 0x%x\n", pipe_id, status);
1062 }
1063
1064 if (sw_fault_mask & status) {
1065 status = a8xx_read_pipe_slice(gpu, pipe_id, slice,
1066 REG_A8XX_CP_INTERRUPT_STATUS_PIPE);
1067 dev_err_ratelimited(&gpu->pdev->dev,
1068 "CP SW FAULT pipe: %u status: 0x%x\n", pipe_id, status);
1069
1070 if (status & BIT(8)) {
1071 a8xx_write_pipe(gpu, pipe_id, REG_A8XX_CP_SQE_STAT_ADDR_PIPE, 1);
1072 status = a8xx_read_pipe_slice(gpu, pipe_id, slice,
1073 REG_A8XX_CP_SQE_STAT_DATA_PIPE);
1074 dev_err_ratelimited(&gpu->pdev->dev,
1075 "CP Opcode error, opcode=0x%x\n", status);
1076 }
1077
1078 if (status & BIT(10)) {
1079 status = a8xx_read_pipe_slice(gpu, pipe_id, slice,
1080 REG_A8XX_CP_PROTECT_STATUS_PIPE);
1081 dev_err_ratelimited(&gpu->pdev->dev,
1082 "CP REG PROTECT error, status=0x%x\n", status);
1083 }
1084 }
1085 }
1086
1087 out:
1088 /* Turn off interrupts to avoid triggering recovery again */
1089 a8xx_aperture_clear(gpu);
1090 gpu_write(gpu, REG_A8XX_CP_INTERRUPT_STATUS_MASK_GLOBAL, 0);
1091 gpu_write(gpu, REG_A8XX_RBBM_INT_0_MASK, 0);
1092
1093 kthread_queue_work(gpu->worker, &gpu->recover_work);
1094 }
1095
gpu_periph_read(struct msm_gpu * gpu,u32 dbg_offset)1096 static u32 gpu_periph_read(struct msm_gpu *gpu, u32 dbg_offset)
1097 {
1098 gpu_write(gpu, REG_A8XX_CP_SQE_UCODE_DBG_ADDR_PIPE, dbg_offset);
1099
1100 return gpu_read(gpu, REG_A8XX_CP_SQE_UCODE_DBG_DATA_PIPE);
1101 }
1102
gpu_periph_read64(struct msm_gpu * gpu,u32 dbg_offset)1103 static u64 gpu_periph_read64(struct msm_gpu *gpu, u32 dbg_offset)
1104 {
1105 u64 lo, hi;
1106
1107 lo = gpu_periph_read(gpu, dbg_offset);
1108 hi = gpu_periph_read(gpu, dbg_offset + 1);
1109
1110 return (hi << 32) | lo;
1111 }
1112
1113 #define CP_PERIPH_IB1_BASE_LO 0x7005
1114 #define CP_PERIPH_IB1_BASE_HI 0x7006
1115 #define CP_PERIPH_IB1_SIZE 0x7007
1116 #define CP_PERIPH_IB1_OFFSET 0x7008
1117 #define CP_PERIPH_IB2_BASE_LO 0x7009
1118 #define CP_PERIPH_IB2_BASE_HI 0x700a
1119 #define CP_PERIPH_IB2_SIZE 0x700b
1120 #define CP_PERIPH_IB2_OFFSET 0x700c
1121 #define CP_PERIPH_IB3_BASE_LO 0x700d
1122 #define CP_PERIPH_IB3_BASE_HI 0x700e
1123 #define CP_PERIPH_IB3_SIZE 0x700f
1124 #define CP_PERIPH_IB3_OFFSET 0x7010
1125
a8xx_fault_detect_irq(struct msm_gpu * gpu)1126 static void a8xx_fault_detect_irq(struct msm_gpu *gpu)
1127 {
1128 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1129 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1130 struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
1131 unsigned long flags;
1132
1133 /*
1134 * If stalled on SMMU fault, we could trip the GPU's hang detection,
1135 * but the fault handler will trigger the devcore dump, and we want
1136 * to otherwise resume normally rather than killing the submit, so
1137 * just bail.
1138 */
1139 if (gpu_read(gpu, REG_A8XX_RBBM_MISC_STATUS) & A8XX_RBBM_MISC_STATUS_SMMU_STALLED_ON_FAULT)
1140 return;
1141
1142 /*
1143 * Force the GPU to stay on until after we finish
1144 * collecting information
1145 */
1146 if (!adreno_has_gmu_wrapper(adreno_gpu))
1147 gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_GMU_PWR_COL_KEEPALIVE, 1);
1148
1149 DRM_DEV_ERROR(&gpu->pdev->dev,
1150 "gpu fault ring %d fence %x status %8.8X gfx_status %8.8X\n",
1151 ring ? ring->id : -1, ring ? ring->fctx->last_fence : 0,
1152 gpu_read(gpu, REG_A8XX_RBBM_STATUS), gpu_read(gpu, REG_A8XX_RBBM_GFX_STATUS));
1153
1154 a8xx_aperture_acquire(gpu, PIPE_BR, &flags);
1155
1156 DRM_DEV_ERROR(&gpu->pdev->dev,
1157 "BR: status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x ib3 %16.16llX/%4.4x\n",
1158 gpu_read(gpu, REG_A8XX_RBBM_GFX_BR_STATUS),
1159 gpu_read(gpu, REG_A6XX_CP_RB_RPTR),
1160 gpu_read(gpu, REG_A6XX_CP_RB_WPTR),
1161 gpu_periph_read64(gpu, CP_PERIPH_IB1_BASE_LO),
1162 gpu_periph_read(gpu, CP_PERIPH_IB1_OFFSET),
1163 gpu_periph_read64(gpu, CP_PERIPH_IB2_BASE_LO),
1164 gpu_periph_read(gpu, CP_PERIPH_IB2_OFFSET),
1165 gpu_periph_read64(gpu, CP_PERIPH_IB3_BASE_LO),
1166 gpu_periph_read(gpu, CP_PERIPH_IB3_OFFSET));
1167
1168 a8xx_aperture_release(gpu, flags);
1169 a8xx_aperture_acquire(gpu, PIPE_BV, &flags);
1170
1171 DRM_DEV_ERROR(&gpu->pdev->dev,
1172 "BV: status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x ib3 %16.16llX/%4.4x\n",
1173 gpu_read(gpu, REG_A8XX_RBBM_GFX_BV_STATUS),
1174 gpu_read(gpu, REG_A8XX_CP_RB_RPTR_BV),
1175 gpu_read(gpu, REG_A6XX_CP_RB_WPTR),
1176 gpu_periph_read64(gpu, CP_PERIPH_IB1_BASE_LO),
1177 gpu_periph_read(gpu, CP_PERIPH_IB1_OFFSET),
1178 gpu_periph_read64(gpu, CP_PERIPH_IB2_BASE_LO),
1179 gpu_periph_read(gpu, CP_PERIPH_IB2_OFFSET),
1180 gpu_periph_read64(gpu, CP_PERIPH_IB3_BASE_LO),
1181 gpu_periph_read(gpu, CP_PERIPH_IB3_OFFSET));
1182
1183 a8xx_aperture_release(gpu, flags);
1184 a8xx_aperture_clear(gpu);
1185
1186 /* Turn off the hangcheck timer to keep it from bothering us */
1187 timer_delete(&gpu->hangcheck_timer);
1188
1189 kthread_queue_work(gpu->worker, &gpu->recover_work);
1190 }
1191
a8xx_sw_fuse_violation_irq(struct msm_gpu * gpu)1192 static void a8xx_sw_fuse_violation_irq(struct msm_gpu *gpu)
1193 {
1194 u32 status;
1195
1196 status = gpu_read(gpu, REG_A8XX_RBBM_SW_FUSE_INT_STATUS);
1197 gpu_write(gpu, REG_A8XX_RBBM_SW_FUSE_INT_MASK, 0);
1198
1199 dev_err_ratelimited(&gpu->pdev->dev, "SW fuse violation status=%8.8x\n", status);
1200
1201 /*
1202 * Ignore FASTBLEND violations, because the HW will silently fall back
1203 * to legacy blending.
1204 */
1205 if (status & (A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING |
1206 A7XX_CX_MISC_SW_FUSE_VALUE_LPAC)) {
1207 timer_delete(&gpu->hangcheck_timer);
1208
1209 kthread_queue_work(gpu->worker, &gpu->recover_work);
1210 }
1211 }
1212
a8xx_irq(struct msm_gpu * gpu)1213 irqreturn_t a8xx_irq(struct msm_gpu *gpu)
1214 {
1215 struct msm_drm_private *priv = gpu->dev->dev_private;
1216 u32 status = gpu_read(gpu, REG_A8XX_RBBM_INT_0_STATUS);
1217
1218 gpu_write(gpu, REG_A8XX_RBBM_INT_CLEAR_CMD, status);
1219
1220 if (priv->disable_err_irq)
1221 status &= A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS;
1222
1223 if (status & A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT)
1224 a8xx_fault_detect_irq(gpu);
1225
1226 if (status & A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR) {
1227 u32 rl0, rl1;
1228
1229 rl0 = gpu_read(gpu, REG_A8XX_CP_RL_ERROR_DETAILS_0);
1230 rl1 = gpu_read(gpu, REG_A8XX_CP_RL_ERROR_DETAILS_1);
1231 dev_err_ratelimited(&gpu->pdev->dev,
1232 "CP | AHB bus error RL_ERROR_0: %x, RL_ERROR_1: %x\n", rl0, rl1);
1233 }
1234
1235 if (status & A6XX_RBBM_INT_0_MASK_CP_HW_ERROR)
1236 a8xx_cp_hw_err_irq(gpu);
1237
1238 if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW)
1239 dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB ASYNC overflow\n");
1240
1241 if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW)
1242 dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB bus overflow\n");
1243
1244 if (status & A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS)
1245 dev_err_ratelimited(&gpu->pdev->dev, "UCHE | Out of bounds access\n");
1246
1247 if (status & A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR)
1248 dev_err_ratelimited(&gpu->pdev->dev, "UCHE | Trap interrupt\n");
1249
1250 if (status & A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION)
1251 a8xx_sw_fuse_violation_irq(gpu);
1252
1253 if (status & A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) {
1254 msm_gpu_retire(gpu);
1255 a8xx_preempt_trigger(gpu);
1256 }
1257
1258 if (status & A6XX_RBBM_INT_0_MASK_CP_SW)
1259 a8xx_preempt_irq(gpu);
1260
1261 return IRQ_HANDLED;
1262 }
1263
a8xx_llc_activate(struct a6xx_gpu * a6xx_gpu)1264 void a8xx_llc_activate(struct a6xx_gpu *a6xx_gpu)
1265 {
1266 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
1267 struct msm_gpu *gpu = &adreno_gpu->base;
1268
1269 if (!llcc_slice_activate(a6xx_gpu->llc_slice)) {
1270 u32 gpu_scid = llcc_get_slice_id(a6xx_gpu->llc_slice);
1271
1272 gpu_scid &= GENMASK(5, 0);
1273
1274 gpu_write(gpu, REG_A6XX_GBIF_SCACHE_CNTL1,
1275 FIELD_PREP(GENMASK(29, 24), gpu_scid) |
1276 FIELD_PREP(GENMASK(23, 18), gpu_scid) |
1277 FIELD_PREP(GENMASK(17, 12), gpu_scid) |
1278 FIELD_PREP(GENMASK(11, 6), gpu_scid) |
1279 FIELD_PREP(GENMASK(5, 0), gpu_scid));
1280
1281 gpu_write(gpu, REG_A6XX_GBIF_SCACHE_CNTL0,
1282 FIELD_PREP(GENMASK(27, 22), gpu_scid) |
1283 FIELD_PREP(GENMASK(21, 16), gpu_scid) |
1284 FIELD_PREP(GENMASK(15, 10), gpu_scid) |
1285 BIT(8));
1286 }
1287
1288 llcc_slice_activate(a6xx_gpu->htw_llc_slice);
1289 }
1290
1291 #define GBIF_CLIENT_HALT_MASK BIT(0)
1292 #define GBIF_ARB_HALT_MASK BIT(1)
1293 #define VBIF_XIN_HALT_CTRL0_MASK GENMASK(3, 0)
1294 #define VBIF_RESET_ACK_MASK 0xF0
1295 #define GPR0_GBIF_HALT_REQUEST 0x1E0
1296
a8xx_bus_clear_pending_transactions(struct adreno_gpu * adreno_gpu,bool gx_off)1297 void a8xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu, bool gx_off)
1298 {
1299 struct msm_gpu *gpu = &adreno_gpu->base;
1300
1301 if (gx_off) {
1302 /* Halt the gx side of GBIF */
1303 gpu_write(gpu, REG_A8XX_RBBM_GBIF_HALT, 1);
1304 spin_until(gpu_read(gpu, REG_A8XX_RBBM_GBIF_HALT_ACK) & 1);
1305 }
1306
1307 /* Halt new client requests on GBIF */
1308 gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_CLIENT_HALT_MASK);
1309 spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) &
1310 (GBIF_CLIENT_HALT_MASK)) == GBIF_CLIENT_HALT_MASK);
1311
1312 /* Halt all AXI requests on GBIF */
1313 gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_ARB_HALT_MASK);
1314 spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) &
1315 (GBIF_ARB_HALT_MASK)) == GBIF_ARB_HALT_MASK);
1316
1317 /* The GBIF halt needs to be explicitly cleared */
1318 gpu_write(gpu, REG_A6XX_GBIF_HALT, 0x0);
1319 }
1320
a8xx_gmu_get_timestamp(struct msm_gpu * gpu)1321 u64 a8xx_gmu_get_timestamp(struct msm_gpu *gpu)
1322 {
1323 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1324 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1325 u64 count_hi, count_lo, temp;
1326
1327 do {
1328 count_hi = gmu_read(&a6xx_gpu->gmu, REG_A8XX_GMU_ALWAYS_ON_COUNTER_H);
1329 count_lo = gmu_read(&a6xx_gpu->gmu, REG_A8XX_GMU_ALWAYS_ON_COUNTER_L);
1330 temp = gmu_read(&a6xx_gpu->gmu, REG_A8XX_GMU_ALWAYS_ON_COUNTER_H);
1331 } while (unlikely(count_hi != temp));
1332
1333 return (count_hi << 32) | count_lo;
1334 }
1335
a8xx_gpu_busy(struct msm_gpu * gpu,unsigned long * out_sample_rate)1336 u64 a8xx_gpu_busy(struct msm_gpu *gpu, unsigned long *out_sample_rate)
1337 {
1338 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1339 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1340 u64 busy_cycles;
1341
1342 /* 19.2MHz */
1343 *out_sample_rate = 19200000;
1344
1345 busy_cycles = gmu_read64(&a6xx_gpu->gmu,
1346 REG_A8XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_L,
1347 REG_A8XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_H);
1348
1349 return busy_cycles;
1350 }
1351
a8xx_progress(struct msm_gpu * gpu,struct msm_ringbuffer * ring)1352 bool a8xx_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
1353 {
1354 return true;
1355 }
1356