1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2017-2019 The Linux Foundation. All rights reserved. */ 3 4 5 #include "msm_gem.h" 6 #include "msm_mmu.h" 7 #include "msm_gpu_trace.h" 8 #include "a6xx_gpu.h" 9 #include "a6xx_gmu.xml.h" 10 11 #include <linux/bitfield.h> 12 #include <linux/devfreq.h> 13 #include <linux/firmware/qcom/qcom_scm.h> 14 #include <linux/pm_domain.h> 15 #include <linux/soc/qcom/llcc-qcom.h> 16 17 #define GPU_PAS_ID 13 18 19 static inline bool _a6xx_check_idle(struct msm_gpu *gpu) 20 { 21 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 22 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 23 24 /* Check that the GMU is idle */ 25 if (!adreno_has_gmu_wrapper(adreno_gpu) && !a6xx_gmu_isidle(&a6xx_gpu->gmu)) 26 return false; 27 28 /* Check tha the CX master is idle */ 29 if (gpu_read(gpu, REG_A6XX_RBBM_STATUS) & 30 ~A6XX_RBBM_STATUS_CP_AHB_BUSY_CX_MASTER) 31 return false; 32 33 return !(gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS) & 34 A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT); 35 } 36 37 static bool a6xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 38 { 39 /* wait for CP to drain ringbuffer: */ 40 if (!adreno_idle(gpu, ring)) 41 return false; 42 43 if (spin_until(_a6xx_check_idle(gpu))) { 44 DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n", 45 gpu->name, __builtin_return_address(0), 46 gpu_read(gpu, REG_A6XX_RBBM_STATUS), 47 gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS), 48 gpu_read(gpu, REG_A6XX_CP_RB_RPTR), 49 gpu_read(gpu, REG_A6XX_CP_RB_WPTR)); 50 return false; 51 } 52 53 return true; 54 } 55 56 static void update_shadow_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 57 { 58 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 59 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 60 61 /* Expanded APRIV doesn't need to issue the WHERE_AM_I opcode */ 62 if (a6xx_gpu->has_whereami && !adreno_gpu->base.hw_apriv) { 63 OUT_PKT7(ring, CP_WHERE_AM_I, 2); 64 OUT_RING(ring, lower_32_bits(shadowptr(a6xx_gpu, ring))); 65 OUT_RING(ring, upper_32_bits(shadowptr(a6xx_gpu, ring))); 66 } 67 } 68 69 static void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 70 { 71 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 72 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 73 uint32_t wptr; 74 unsigned long flags; 75 76 update_shadow_rptr(gpu, ring); 77 78 spin_lock_irqsave(&ring->preempt_lock, flags); 79 80 /* Copy the shadow to the actual register */ 81 ring->cur = ring->next; 82 83 /* Make sure to wrap wptr if we need to */ 84 wptr = get_wptr(ring); 85 86 /* Update HW if this is the current ring and we are not in preempt*/ 87 if (!a6xx_in_preempt(a6xx_gpu)) { 88 if (a6xx_gpu->cur_ring == ring) 89 gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr); 90 else 91 ring->restore_wptr = true; 92 } else { 93 ring->restore_wptr = true; 94 } 95 96 spin_unlock_irqrestore(&ring->preempt_lock, flags); 97 } 98 99 static void get_stats_counter(struct msm_ringbuffer *ring, u32 counter, 100 u64 iova) 101 { 102 OUT_PKT7(ring, CP_REG_TO_MEM, 3); 103 OUT_RING(ring, CP_REG_TO_MEM_0_REG(counter) | 104 CP_REG_TO_MEM_0_CNT(2) | 105 CP_REG_TO_MEM_0_64B); 106 OUT_RING(ring, lower_32_bits(iova)); 107 OUT_RING(ring, upper_32_bits(iova)); 108 } 109 110 static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu, 111 struct msm_ringbuffer *ring, struct msm_gem_submit *submit) 112 { 113 bool sysprof = refcount_read(&a6xx_gpu->base.base.sysprof_active) > 1; 114 struct msm_file_private *ctx = submit->queue->ctx; 115 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; 116 phys_addr_t ttbr; 117 u32 asid; 118 u64 memptr = rbmemptr(ring, ttbr0); 119 120 if (ctx->seqno == ring->cur_ctx_seqno) 121 return; 122 123 if (msm_iommu_pagetable_params(ctx->aspace->mmu, &ttbr, &asid)) 124 return; 125 126 if (adreno_gpu->info->family >= ADRENO_7XX_GEN1) { 127 /* Wait for previous submit to complete before continuing: */ 128 OUT_PKT7(ring, CP_WAIT_TIMESTAMP, 4); 129 OUT_RING(ring, 0); 130 OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence))); 131 OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence))); 132 OUT_RING(ring, submit->seqno - 1); 133 134 OUT_PKT7(ring, CP_THREAD_CONTROL, 1); 135 OUT_RING(ring, CP_SET_THREAD_BOTH); 136 137 /* Reset state used to synchronize BR and BV */ 138 OUT_PKT7(ring, CP_RESET_CONTEXT_STATE, 1); 139 OUT_RING(ring, 140 CP_RESET_CONTEXT_STATE_0_CLEAR_ON_CHIP_TS | 141 CP_RESET_CONTEXT_STATE_0_CLEAR_RESOURCE_TABLE | 142 CP_RESET_CONTEXT_STATE_0_CLEAR_BV_BR_COUNTER | 143 CP_RESET_CONTEXT_STATE_0_RESET_GLOBAL_LOCAL_TS); 144 145 OUT_PKT7(ring, CP_THREAD_CONTROL, 1); 146 OUT_RING(ring, CP_SET_THREAD_BR); 147 } 148 149 if (!sysprof) { 150 if (!adreno_is_a7xx(adreno_gpu)) { 151 /* Turn off protected mode to write to special registers */ 152 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); 153 OUT_RING(ring, 0); 154 } 155 156 OUT_PKT4(ring, REG_A6XX_RBBM_PERFCTR_SRAM_INIT_CMD, 1); 157 OUT_RING(ring, 1); 158 } 159 160 /* Execute the table update */ 161 OUT_PKT7(ring, CP_SMMU_TABLE_UPDATE, 4); 162 OUT_RING(ring, CP_SMMU_TABLE_UPDATE_0_TTBR0_LO(lower_32_bits(ttbr))); 163 164 OUT_RING(ring, 165 CP_SMMU_TABLE_UPDATE_1_TTBR0_HI(upper_32_bits(ttbr)) | 166 CP_SMMU_TABLE_UPDATE_1_ASID(asid)); 167 OUT_RING(ring, CP_SMMU_TABLE_UPDATE_2_CONTEXTIDR(0)); 168 OUT_RING(ring, CP_SMMU_TABLE_UPDATE_3_CONTEXTBANK(0)); 169 170 /* 171 * Write the new TTBR0 to the memstore. This is good for debugging. 172 * Needed for preemption 173 */ 174 OUT_PKT7(ring, CP_MEM_WRITE, 5); 175 OUT_RING(ring, CP_MEM_WRITE_0_ADDR_LO(lower_32_bits(memptr))); 176 OUT_RING(ring, CP_MEM_WRITE_1_ADDR_HI(upper_32_bits(memptr))); 177 OUT_RING(ring, lower_32_bits(ttbr)); 178 OUT_RING(ring, upper_32_bits(ttbr)); 179 OUT_RING(ring, ctx->seqno); 180 181 /* 182 * Sync both threads after switching pagetables and enable BR only 183 * to make sure BV doesn't race ahead while BR is still switching 184 * pagetables. 185 */ 186 if (adreno_is_a7xx(&a6xx_gpu->base)) { 187 OUT_PKT7(ring, CP_THREAD_CONTROL, 1); 188 OUT_RING(ring, CP_THREAD_CONTROL_0_SYNC_THREADS | CP_SET_THREAD_BR); 189 } 190 191 /* 192 * And finally, trigger a uche flush to be sure there isn't anything 193 * lingering in that part of the GPU 194 */ 195 196 OUT_PKT7(ring, CP_EVENT_WRITE, 1); 197 OUT_RING(ring, CACHE_INVALIDATE); 198 199 if (!sysprof) { 200 /* 201 * Wait for SRAM clear after the pgtable update, so the 202 * two can happen in parallel: 203 */ 204 OUT_PKT7(ring, CP_WAIT_REG_MEM, 6); 205 OUT_RING(ring, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ)); 206 OUT_RING(ring, CP_WAIT_REG_MEM_1_POLL_ADDR_LO( 207 REG_A6XX_RBBM_PERFCTR_SRAM_INIT_STATUS)); 208 OUT_RING(ring, CP_WAIT_REG_MEM_2_POLL_ADDR_HI(0)); 209 OUT_RING(ring, CP_WAIT_REG_MEM_3_REF(0x1)); 210 OUT_RING(ring, CP_WAIT_REG_MEM_4_MASK(0x1)); 211 OUT_RING(ring, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(0)); 212 213 if (!adreno_is_a7xx(adreno_gpu)) { 214 /* Re-enable protected mode: */ 215 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); 216 OUT_RING(ring, 1); 217 } 218 } 219 } 220 221 static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) 222 { 223 unsigned int index = submit->seqno % MSM_GPU_SUBMIT_STATS_COUNT; 224 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 225 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 226 struct msm_ringbuffer *ring = submit->ring; 227 unsigned int i, ibs = 0; 228 229 adreno_check_and_reenable_stall(adreno_gpu); 230 231 a6xx_set_pagetable(a6xx_gpu, ring, submit); 232 233 get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP(0), 234 rbmemptr_stats(ring, index, cpcycles_start)); 235 236 /* 237 * For PM4 the GMU register offsets are calculated from the base of the 238 * GPU registers so we need to add 0x1a800 to the register value on A630 239 * to get the right value from PM4. 240 */ 241 get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER, 242 rbmemptr_stats(ring, index, alwayson_start)); 243 244 /* Invalidate CCU depth and color */ 245 OUT_PKT7(ring, CP_EVENT_WRITE, 1); 246 OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(PC_CCU_INVALIDATE_DEPTH)); 247 248 OUT_PKT7(ring, CP_EVENT_WRITE, 1); 249 OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(PC_CCU_INVALIDATE_COLOR)); 250 251 /* Submit the commands */ 252 for (i = 0; i < submit->nr_cmds; i++) { 253 switch (submit->cmd[i].type) { 254 case MSM_SUBMIT_CMD_IB_TARGET_BUF: 255 break; 256 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: 257 if (ring->cur_ctx_seqno == submit->queue->ctx->seqno) 258 break; 259 fallthrough; 260 case MSM_SUBMIT_CMD_BUF: 261 OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3); 262 OUT_RING(ring, lower_32_bits(submit->cmd[i].iova)); 263 OUT_RING(ring, upper_32_bits(submit->cmd[i].iova)); 264 OUT_RING(ring, A5XX_CP_INDIRECT_BUFFER_2_IB_SIZE(submit->cmd[i].size)); 265 ibs++; 266 break; 267 } 268 269 /* 270 * Periodically update shadow-wptr if needed, so that we 271 * can see partial progress of submits with large # of 272 * cmds.. otherwise we could needlessly stall waiting for 273 * ringbuffer state, simply due to looking at a shadow 274 * rptr value that has not been updated 275 */ 276 if ((ibs % 32) == 0) 277 update_shadow_rptr(gpu, ring); 278 } 279 280 get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP(0), 281 rbmemptr_stats(ring, index, cpcycles_end)); 282 get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER, 283 rbmemptr_stats(ring, index, alwayson_end)); 284 285 /* Write the fence to the scratch register */ 286 OUT_PKT4(ring, REG_A6XX_CP_SCRATCH_REG(2), 1); 287 OUT_RING(ring, submit->seqno); 288 289 /* 290 * Execute a CACHE_FLUSH_TS event. This will ensure that the 291 * timestamp is written to the memory and then triggers the interrupt 292 */ 293 OUT_PKT7(ring, CP_EVENT_WRITE, 4); 294 OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_TS) | 295 CP_EVENT_WRITE_0_IRQ); 296 OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence))); 297 OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence))); 298 OUT_RING(ring, submit->seqno); 299 300 trace_msm_gpu_submit_flush(submit, 301 gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER)); 302 303 a6xx_flush(gpu, ring); 304 } 305 306 static void a6xx_emit_set_pseudo_reg(struct msm_ringbuffer *ring, 307 struct a6xx_gpu *a6xx_gpu, struct msm_gpu_submitqueue *queue) 308 { 309 u64 preempt_postamble; 310 311 OUT_PKT7(ring, CP_SET_PSEUDO_REG, 12); 312 313 OUT_RING(ring, SMMU_INFO); 314 /* don't save SMMU, we write the record from the kernel instead */ 315 OUT_RING(ring, 0); 316 OUT_RING(ring, 0); 317 318 /* privileged and non secure buffer save */ 319 OUT_RING(ring, NON_SECURE_SAVE_ADDR); 320 OUT_RING(ring, lower_32_bits( 321 a6xx_gpu->preempt_iova[ring->id])); 322 OUT_RING(ring, upper_32_bits( 323 a6xx_gpu->preempt_iova[ring->id])); 324 325 /* user context buffer save, seems to be unnused by fw */ 326 OUT_RING(ring, NON_PRIV_SAVE_ADDR); 327 OUT_RING(ring, 0); 328 OUT_RING(ring, 0); 329 330 OUT_RING(ring, COUNTER); 331 /* seems OK to set to 0 to disable it */ 332 OUT_RING(ring, 0); 333 OUT_RING(ring, 0); 334 335 /* Emit postamble to clear perfcounters */ 336 preempt_postamble = a6xx_gpu->preempt_postamble_iova; 337 338 OUT_PKT7(ring, CP_SET_AMBLE, 3); 339 OUT_RING(ring, lower_32_bits(preempt_postamble)); 340 OUT_RING(ring, upper_32_bits(preempt_postamble)); 341 OUT_RING(ring, CP_SET_AMBLE_2_DWORDS( 342 a6xx_gpu->preempt_postamble_len) | 343 CP_SET_AMBLE_2_TYPE(KMD_AMBLE_TYPE)); 344 } 345 346 static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) 347 { 348 unsigned int index = submit->seqno % MSM_GPU_SUBMIT_STATS_COUNT; 349 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 350 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 351 struct msm_ringbuffer *ring = submit->ring; 352 unsigned int i, ibs = 0; 353 354 adreno_check_and_reenable_stall(adreno_gpu); 355 356 /* 357 * Toggle concurrent binning for pagetable switch and set the thread to 358 * BR since only it can execute the pagetable switch packets. 359 */ 360 OUT_PKT7(ring, CP_THREAD_CONTROL, 1); 361 OUT_RING(ring, CP_THREAD_CONTROL_0_SYNC_THREADS | CP_SET_THREAD_BR); 362 363 a6xx_set_pagetable(a6xx_gpu, ring, submit); 364 365 /* 366 * If preemption is enabled, then set the pseudo register for the save 367 * sequence 368 */ 369 if (gpu->nr_rings > 1) 370 a6xx_emit_set_pseudo_reg(ring, a6xx_gpu, submit->queue); 371 372 get_stats_counter(ring, REG_A7XX_RBBM_PERFCTR_CP(0), 373 rbmemptr_stats(ring, index, cpcycles_start)); 374 get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER, 375 rbmemptr_stats(ring, index, alwayson_start)); 376 377 OUT_PKT7(ring, CP_THREAD_CONTROL, 1); 378 OUT_RING(ring, CP_SET_THREAD_BOTH); 379 380 OUT_PKT7(ring, CP_SET_MARKER, 1); 381 OUT_RING(ring, 0x101); /* IFPC disable */ 382 383 if (submit->queue->flags & MSM_SUBMITQUEUE_ALLOW_PREEMPT) { 384 OUT_PKT7(ring, CP_SET_MARKER, 1); 385 OUT_RING(ring, 0x00d); /* IB1LIST start */ 386 } 387 388 /* Submit the commands */ 389 for (i = 0; i < submit->nr_cmds; i++) { 390 switch (submit->cmd[i].type) { 391 case MSM_SUBMIT_CMD_IB_TARGET_BUF: 392 break; 393 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: 394 if (ring->cur_ctx_seqno == submit->queue->ctx->seqno) 395 break; 396 fallthrough; 397 case MSM_SUBMIT_CMD_BUF: 398 OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3); 399 OUT_RING(ring, lower_32_bits(submit->cmd[i].iova)); 400 OUT_RING(ring, upper_32_bits(submit->cmd[i].iova)); 401 OUT_RING(ring, A5XX_CP_INDIRECT_BUFFER_2_IB_SIZE(submit->cmd[i].size)); 402 ibs++; 403 break; 404 } 405 406 /* 407 * Periodically update shadow-wptr if needed, so that we 408 * can see partial progress of submits with large # of 409 * cmds.. otherwise we could needlessly stall waiting for 410 * ringbuffer state, simply due to looking at a shadow 411 * rptr value that has not been updated 412 */ 413 if ((ibs % 32) == 0) 414 update_shadow_rptr(gpu, ring); 415 } 416 417 if (submit->queue->flags & MSM_SUBMITQUEUE_ALLOW_PREEMPT) { 418 OUT_PKT7(ring, CP_SET_MARKER, 1); 419 OUT_RING(ring, 0x00e); /* IB1LIST end */ 420 } 421 422 get_stats_counter(ring, REG_A7XX_RBBM_PERFCTR_CP(0), 423 rbmemptr_stats(ring, index, cpcycles_end)); 424 get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER, 425 rbmemptr_stats(ring, index, alwayson_end)); 426 427 /* Write the fence to the scratch register */ 428 OUT_PKT4(ring, REG_A6XX_CP_SCRATCH_REG(2), 1); 429 OUT_RING(ring, submit->seqno); 430 431 OUT_PKT7(ring, CP_THREAD_CONTROL, 1); 432 OUT_RING(ring, CP_SET_THREAD_BR); 433 434 OUT_PKT7(ring, CP_EVENT_WRITE, 1); 435 OUT_RING(ring, CCU_INVALIDATE_DEPTH); 436 437 OUT_PKT7(ring, CP_EVENT_WRITE, 1); 438 OUT_RING(ring, CCU_INVALIDATE_COLOR); 439 440 OUT_PKT7(ring, CP_THREAD_CONTROL, 1); 441 OUT_RING(ring, CP_SET_THREAD_BV); 442 443 /* 444 * Make sure the timestamp is committed once BV pipe is 445 * completely done with this submission. 446 */ 447 OUT_PKT7(ring, CP_EVENT_WRITE, 4); 448 OUT_RING(ring, CACHE_CLEAN | BIT(27)); 449 OUT_RING(ring, lower_32_bits(rbmemptr(ring, bv_fence))); 450 OUT_RING(ring, upper_32_bits(rbmemptr(ring, bv_fence))); 451 OUT_RING(ring, submit->seqno); 452 453 OUT_PKT7(ring, CP_THREAD_CONTROL, 1); 454 OUT_RING(ring, CP_SET_THREAD_BR); 455 456 /* 457 * This makes sure that BR doesn't race ahead and commit 458 * timestamp to memstore while BV is still processing 459 * this submission. 460 */ 461 OUT_PKT7(ring, CP_WAIT_TIMESTAMP, 4); 462 OUT_RING(ring, 0); 463 OUT_RING(ring, lower_32_bits(rbmemptr(ring, bv_fence))); 464 OUT_RING(ring, upper_32_bits(rbmemptr(ring, bv_fence))); 465 OUT_RING(ring, submit->seqno); 466 467 a6xx_gpu->last_seqno[ring->id] = submit->seqno; 468 469 /* write the ringbuffer timestamp */ 470 OUT_PKT7(ring, CP_EVENT_WRITE, 4); 471 OUT_RING(ring, CACHE_CLEAN | CP_EVENT_WRITE_0_IRQ | BIT(27)); 472 OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence))); 473 OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence))); 474 OUT_RING(ring, submit->seqno); 475 476 OUT_PKT7(ring, CP_THREAD_CONTROL, 1); 477 OUT_RING(ring, CP_SET_THREAD_BOTH); 478 479 OUT_PKT7(ring, CP_SET_MARKER, 1); 480 OUT_RING(ring, 0x100); /* IFPC enable */ 481 482 /* If preemption is enabled */ 483 if (gpu->nr_rings > 1) { 484 /* Yield the floor on command completion */ 485 OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4); 486 487 /* 488 * If dword[2:1] are non zero, they specify an address for 489 * the CP to write the value of dword[3] to on preemption 490 * complete. Write 0 to skip the write 491 */ 492 OUT_RING(ring, 0x00); 493 OUT_RING(ring, 0x00); 494 /* Data value - not used if the address above is 0 */ 495 OUT_RING(ring, 0x01); 496 /* generate interrupt on preemption completion */ 497 OUT_RING(ring, 0x00); 498 } 499 500 501 trace_msm_gpu_submit_flush(submit, 502 gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER)); 503 504 a6xx_flush(gpu, ring); 505 506 /* Check to see if we need to start preemption */ 507 a6xx_preempt_trigger(gpu); 508 } 509 510 static void a6xx_set_hwcg(struct msm_gpu *gpu, bool state) 511 { 512 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 513 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 514 struct a6xx_gmu *gmu = &a6xx_gpu->gmu; 515 const struct adreno_reglist *reg; 516 unsigned int i; 517 u32 cgc_delay, cgc_hyst; 518 u32 val, clock_cntl_on; 519 520 if (!(adreno_gpu->info->a6xx->hwcg || adreno_is_a7xx(adreno_gpu))) 521 return; 522 523 if (adreno_is_a630(adreno_gpu)) 524 clock_cntl_on = 0x8aa8aa02; 525 else if (adreno_is_a610(adreno_gpu)) 526 clock_cntl_on = 0xaaa8aa82; 527 else if (adreno_is_a702(adreno_gpu)) 528 clock_cntl_on = 0xaaaaaa82; 529 else 530 clock_cntl_on = 0x8aa8aa82; 531 532 cgc_delay = adreno_is_a615_family(adreno_gpu) ? 0x111 : 0x10111; 533 cgc_hyst = adreno_is_a615_family(adreno_gpu) ? 0x555 : 0x5555; 534 535 gmu_write(&a6xx_gpu->gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_MODE_CNTL, 536 state ? adreno_gpu->info->a6xx->gmu_cgc_mode : 0); 537 gmu_write(&a6xx_gpu->gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_DELAY_CNTL, 538 state ? cgc_delay : 0); 539 gmu_write(&a6xx_gpu->gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_HYST_CNTL, 540 state ? cgc_hyst : 0); 541 542 if (!adreno_gpu->info->a6xx->hwcg) { 543 gpu_write(gpu, REG_A7XX_RBBM_CLOCK_CNTL_GLOBAL, 1); 544 gpu_write(gpu, REG_A7XX_RBBM_CGC_GLOBAL_LOAD_CMD, state ? 1 : 0); 545 546 if (state) { 547 gpu_write(gpu, REG_A7XX_RBBM_CGC_P2S_TRIG_CMD, 1); 548 549 if (gpu_poll_timeout(gpu, REG_A7XX_RBBM_CGC_P2S_STATUS, val, 550 val & A7XX_RBBM_CGC_P2S_STATUS_TXDONE, 1, 10)) { 551 dev_err(&gpu->pdev->dev, "RBBM_CGC_P2S_STATUS TXDONE Poll failed\n"); 552 return; 553 } 554 555 gpu_write(gpu, REG_A7XX_RBBM_CLOCK_CNTL_GLOBAL, 0); 556 } 557 558 return; 559 } 560 561 val = gpu_read(gpu, REG_A6XX_RBBM_CLOCK_CNTL); 562 563 /* Don't re-program the registers if they are already correct */ 564 if ((!state && !val) || (state && (val == clock_cntl_on))) 565 return; 566 567 /* Disable SP clock before programming HWCG registers */ 568 if (!adreno_is_a610_family(adreno_gpu) && !adreno_is_a7xx(adreno_gpu)) 569 gmu_rmw(gmu, REG_A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 1, 0); 570 571 for (i = 0; (reg = &adreno_gpu->info->a6xx->hwcg[i], reg->offset); i++) 572 gpu_write(gpu, reg->offset, state ? reg->value : 0); 573 574 /* Enable SP clock */ 575 if (!adreno_is_a610_family(adreno_gpu) && !adreno_is_a7xx(adreno_gpu)) 576 gmu_rmw(gmu, REG_A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 0, 1); 577 578 gpu_write(gpu, REG_A6XX_RBBM_CLOCK_CNTL, state ? clock_cntl_on : 0); 579 } 580 581 static void a6xx_set_cp_protect(struct msm_gpu *gpu) 582 { 583 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 584 const struct adreno_protect *protect = adreno_gpu->info->a6xx->protect; 585 unsigned i; 586 587 /* 588 * Enable access protection to privileged registers, fault on an access 589 * protect violation and select the last span to protect from the start 590 * address all the way to the end of the register address space 591 */ 592 gpu_write(gpu, REG_A6XX_CP_PROTECT_CNTL, 593 A6XX_CP_PROTECT_CNTL_ACCESS_PROT_EN | 594 A6XX_CP_PROTECT_CNTL_ACCESS_FAULT_ON_VIOL_EN | 595 A6XX_CP_PROTECT_CNTL_LAST_SPAN_INF_RANGE); 596 597 for (i = 0; i < protect->count - 1; i++) { 598 /* Intentionally skip writing to some registers */ 599 if (protect->regs[i]) 600 gpu_write(gpu, REG_A6XX_CP_PROTECT(i), protect->regs[i]); 601 } 602 /* last CP_PROTECT to have "infinite" length on the last entry */ 603 gpu_write(gpu, REG_A6XX_CP_PROTECT(protect->count_max - 1), protect->regs[i]); 604 } 605 606 static void a6xx_calc_ubwc_config(struct adreno_gpu *gpu) 607 { 608 gpu->ubwc_config.rgb565_predicator = 0; 609 gpu->ubwc_config.uavflagprd_inv = 0; 610 gpu->ubwc_config.min_acc_len = 0; 611 gpu->ubwc_config.ubwc_swizzle = 0x6; 612 gpu->ubwc_config.macrotile_mode = 0; 613 gpu->ubwc_config.highest_bank_bit = 15; 614 615 if (adreno_is_a610(gpu)) { 616 gpu->ubwc_config.highest_bank_bit = 13; 617 gpu->ubwc_config.min_acc_len = 1; 618 gpu->ubwc_config.ubwc_swizzle = 0x7; 619 } 620 621 if (adreno_is_a618(gpu)) 622 gpu->ubwc_config.highest_bank_bit = 14; 623 624 if (adreno_is_a619(gpu)) 625 /* TODO: Should be 14 but causes corruption at e.g. 1920x1200 on DP */ 626 gpu->ubwc_config.highest_bank_bit = 13; 627 628 if (adreno_is_a619_holi(gpu)) 629 gpu->ubwc_config.highest_bank_bit = 13; 630 631 if (adreno_is_a621(gpu)) { 632 gpu->ubwc_config.highest_bank_bit = 13; 633 gpu->ubwc_config.amsbc = 1; 634 gpu->ubwc_config.uavflagprd_inv = 2; 635 } 636 637 if (adreno_is_a623(gpu)) { 638 gpu->ubwc_config.highest_bank_bit = 16; 639 gpu->ubwc_config.amsbc = 1; 640 gpu->ubwc_config.rgb565_predicator = 1; 641 gpu->ubwc_config.uavflagprd_inv = 2; 642 gpu->ubwc_config.macrotile_mode = 1; 643 } 644 645 if (adreno_is_a640_family(gpu)) 646 gpu->ubwc_config.amsbc = 1; 647 648 if (adreno_is_a680(gpu)) 649 gpu->ubwc_config.macrotile_mode = 1; 650 651 if (adreno_is_a650(gpu) || 652 adreno_is_a660(gpu) || 653 adreno_is_a690(gpu) || 654 adreno_is_a730(gpu) || 655 adreno_is_a740_family(gpu)) { 656 /* TODO: get ddr type from bootloader and use 2 for LPDDR4 */ 657 gpu->ubwc_config.highest_bank_bit = 16; 658 gpu->ubwc_config.amsbc = 1; 659 gpu->ubwc_config.rgb565_predicator = 1; 660 gpu->ubwc_config.uavflagprd_inv = 2; 661 gpu->ubwc_config.macrotile_mode = 1; 662 } 663 664 if (adreno_is_a663(gpu)) { 665 gpu->ubwc_config.highest_bank_bit = 13; 666 gpu->ubwc_config.amsbc = 1; 667 gpu->ubwc_config.rgb565_predicator = 1; 668 gpu->ubwc_config.uavflagprd_inv = 2; 669 gpu->ubwc_config.macrotile_mode = 1; 670 gpu->ubwc_config.ubwc_swizzle = 0x4; 671 } 672 673 if (adreno_is_7c3(gpu)) { 674 gpu->ubwc_config.highest_bank_bit = 14; 675 gpu->ubwc_config.amsbc = 1; 676 gpu->ubwc_config.uavflagprd_inv = 2; 677 gpu->ubwc_config.macrotile_mode = 1; 678 } 679 680 if (adreno_is_a702(gpu)) { 681 gpu->ubwc_config.highest_bank_bit = 14; 682 gpu->ubwc_config.min_acc_len = 1; 683 } 684 } 685 686 static void a6xx_set_ubwc_config(struct msm_gpu *gpu) 687 { 688 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 689 /* 690 * We subtract 13 from the highest bank bit (13 is the minimum value 691 * allowed by hw) and write the lowest two bits of the remaining value 692 * as hbb_lo and the one above it as hbb_hi to the hardware. 693 */ 694 BUG_ON(adreno_gpu->ubwc_config.highest_bank_bit < 13); 695 u32 hbb = adreno_gpu->ubwc_config.highest_bank_bit - 13; 696 u32 hbb_hi = hbb >> 2; 697 u32 hbb_lo = hbb & 3; 698 u32 ubwc_mode = adreno_gpu->ubwc_config.ubwc_swizzle & 1; 699 u32 level2_swizzling_dis = !(adreno_gpu->ubwc_config.ubwc_swizzle & 2); 700 701 gpu_write(gpu, REG_A6XX_RB_NC_MODE_CNTL, 702 level2_swizzling_dis << 12 | 703 adreno_gpu->ubwc_config.rgb565_predicator << 11 | 704 hbb_hi << 10 | adreno_gpu->ubwc_config.amsbc << 4 | 705 adreno_gpu->ubwc_config.min_acc_len << 3 | 706 hbb_lo << 1 | ubwc_mode); 707 708 gpu_write(gpu, REG_A6XX_TPL1_NC_MODE_CNTL, 709 level2_swizzling_dis << 6 | hbb_hi << 4 | 710 adreno_gpu->ubwc_config.min_acc_len << 3 | 711 hbb_lo << 1 | ubwc_mode); 712 713 gpu_write(gpu, REG_A6XX_SP_NC_MODE_CNTL, 714 level2_swizzling_dis << 12 | hbb_hi << 10 | 715 adreno_gpu->ubwc_config.uavflagprd_inv << 4 | 716 adreno_gpu->ubwc_config.min_acc_len << 3 | 717 hbb_lo << 1 | ubwc_mode); 718 719 if (adreno_is_a7xx(adreno_gpu)) 720 gpu_write(gpu, REG_A7XX_GRAS_NC_MODE_CNTL, 721 FIELD_PREP(GENMASK(8, 5), hbb_lo)); 722 723 gpu_write(gpu, REG_A6XX_UCHE_MODE_CNTL, 724 adreno_gpu->ubwc_config.min_acc_len << 23 | hbb_lo << 21); 725 726 gpu_write(gpu, REG_A6XX_RBBM_NC_MODE_CNTL, 727 adreno_gpu->ubwc_config.macrotile_mode); 728 } 729 730 static void a7xx_patch_pwrup_reglist(struct msm_gpu *gpu) 731 { 732 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 733 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 734 const struct adreno_reglist_list *reglist; 735 void *ptr = a6xx_gpu->pwrup_reglist_ptr; 736 struct cpu_gpu_lock *lock = ptr; 737 u32 *dest = (u32 *)&lock->regs[0]; 738 int i; 739 740 reglist = adreno_gpu->info->a6xx->pwrup_reglist; 741 742 lock->gpu_req = lock->cpu_req = lock->turn = 0; 743 lock->ifpc_list_len = 0; 744 lock->preemption_list_len = reglist->count; 745 746 /* 747 * For each entry in each of the lists, write the offset and the current 748 * register value into the GPU buffer 749 */ 750 for (i = 0; i < reglist->count; i++) { 751 *dest++ = reglist->regs[i]; 752 *dest++ = gpu_read(gpu, reglist->regs[i]); 753 } 754 755 /* 756 * The overall register list is composed of 757 * 1. Static IFPC-only registers 758 * 2. Static IFPC + preemption registers 759 * 3. Dynamic IFPC + preemption registers (ex: perfcounter selects) 760 * 761 * The first two lists are static. Size of these lists are stored as 762 * number of pairs in ifpc_list_len and preemption_list_len 763 * respectively. With concurrent binning, Some of the perfcounter 764 * registers being virtualized, CP needs to know the pipe id to program 765 * the aperture inorder to restore the same. Thus, third list is a 766 * dynamic list with triplets as 767 * (<aperture, shifted 12 bits> <address> <data>), and the length is 768 * stored as number for triplets in dynamic_list_len. 769 */ 770 lock->dynamic_list_len = 0; 771 } 772 773 static int a7xx_preempt_start(struct msm_gpu *gpu) 774 { 775 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 776 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 777 struct msm_ringbuffer *ring = gpu->rb[0]; 778 779 if (gpu->nr_rings <= 1) 780 return 0; 781 782 /* Turn CP protection off */ 783 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); 784 OUT_RING(ring, 0); 785 786 a6xx_emit_set_pseudo_reg(ring, a6xx_gpu, NULL); 787 788 /* Yield the floor on command completion */ 789 OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4); 790 OUT_RING(ring, 0x00); 791 OUT_RING(ring, 0x00); 792 OUT_RING(ring, 0x00); 793 /* Generate interrupt on preemption completion */ 794 OUT_RING(ring, 0x00); 795 796 a6xx_flush(gpu, ring); 797 798 return a6xx_idle(gpu, ring) ? 0 : -EINVAL; 799 } 800 801 static int a6xx_cp_init(struct msm_gpu *gpu) 802 { 803 struct msm_ringbuffer *ring = gpu->rb[0]; 804 805 OUT_PKT7(ring, CP_ME_INIT, 8); 806 807 OUT_RING(ring, 0x0000002f); 808 809 /* Enable multiple hardware contexts */ 810 OUT_RING(ring, 0x00000003); 811 812 /* Enable error detection */ 813 OUT_RING(ring, 0x20000000); 814 815 /* Don't enable header dump */ 816 OUT_RING(ring, 0x00000000); 817 OUT_RING(ring, 0x00000000); 818 819 /* No workarounds enabled */ 820 OUT_RING(ring, 0x00000000); 821 822 /* Pad rest of the cmds with 0's */ 823 OUT_RING(ring, 0x00000000); 824 OUT_RING(ring, 0x00000000); 825 826 a6xx_flush(gpu, ring); 827 return a6xx_idle(gpu, ring) ? 0 : -EINVAL; 828 } 829 830 static int a7xx_cp_init(struct msm_gpu *gpu) 831 { 832 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 833 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 834 struct msm_ringbuffer *ring = gpu->rb[0]; 835 u32 mask; 836 837 /* Disable concurrent binning before sending CP init */ 838 OUT_PKT7(ring, CP_THREAD_CONTROL, 1); 839 OUT_RING(ring, BIT(27)); 840 841 OUT_PKT7(ring, CP_ME_INIT, 7); 842 843 /* Use multiple HW contexts */ 844 mask = BIT(0); 845 846 /* Enable error detection */ 847 mask |= BIT(1); 848 849 /* Set default reset state */ 850 mask |= BIT(3); 851 852 /* Disable save/restore of performance counters across preemption */ 853 mask |= BIT(6); 854 855 /* Enable the register init list with the spinlock */ 856 mask |= BIT(8); 857 858 OUT_RING(ring, mask); 859 860 /* Enable multiple hardware contexts */ 861 OUT_RING(ring, 0x00000003); 862 863 /* Enable error detection */ 864 OUT_RING(ring, 0x20000000); 865 866 /* Operation mode mask */ 867 OUT_RING(ring, 0x00000002); 868 869 /* *Don't* send a power up reg list for concurrent binning (TODO) */ 870 /* Lo address */ 871 OUT_RING(ring, lower_32_bits(a6xx_gpu->pwrup_reglist_iova)); 872 /* Hi address */ 873 OUT_RING(ring, upper_32_bits(a6xx_gpu->pwrup_reglist_iova)); 874 /* BIT(31) set => read the regs from the list */ 875 OUT_RING(ring, BIT(31)); 876 877 a6xx_flush(gpu, ring); 878 return a6xx_idle(gpu, ring) ? 0 : -EINVAL; 879 } 880 881 /* 882 * Check that the microcode version is new enough to include several key 883 * security fixes. Return true if the ucode is safe. 884 */ 885 static bool a6xx_ucode_check_version(struct a6xx_gpu *a6xx_gpu, 886 struct drm_gem_object *obj) 887 { 888 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; 889 struct msm_gpu *gpu = &adreno_gpu->base; 890 const char *sqe_name = adreno_gpu->info->fw[ADRENO_FW_SQE]; 891 u32 *buf = msm_gem_get_vaddr(obj); 892 bool ret = false; 893 894 if (IS_ERR(buf)) 895 return false; 896 897 /* A7xx is safe! */ 898 if (adreno_is_a7xx(adreno_gpu) || adreno_is_a702(adreno_gpu)) 899 return true; 900 901 /* 902 * Targets up to a640 (a618, a630 and a640) need to check for a 903 * microcode version that is patched to support the whereami opcode or 904 * one that is new enough to include it by default. 905 * 906 * a650 tier targets don't need whereami but still need to be 907 * equal to or newer than 0.95 for other security fixes 908 * 909 * a660 targets have all the critical security fixes from the start 910 */ 911 if (!strcmp(sqe_name, "a630_sqe.fw")) { 912 /* 913 * If the lowest nibble is 0xa that is an indication that this 914 * microcode has been patched. The actual version is in dword 915 * [3] but we only care about the patchlevel which is the lowest 916 * nibble of dword [3] 917 * 918 * Otherwise check that the firmware is greater than or equal 919 * to 1.90 which was the first version that had this fix built 920 * in 921 */ 922 if ((((buf[0] & 0xf) == 0xa) && (buf[2] & 0xf) >= 1) || 923 (buf[0] & 0xfff) >= 0x190) { 924 a6xx_gpu->has_whereami = true; 925 ret = true; 926 goto out; 927 } 928 929 DRM_DEV_ERROR(&gpu->pdev->dev, 930 "a630 SQE ucode is too old. Have version %x need at least %x\n", 931 buf[0] & 0xfff, 0x190); 932 } else if (!strcmp(sqe_name, "a650_sqe.fw")) { 933 if ((buf[0] & 0xfff) >= 0x095) { 934 ret = true; 935 goto out; 936 } 937 938 DRM_DEV_ERROR(&gpu->pdev->dev, 939 "a650 SQE ucode is too old. Have version %x need at least %x\n", 940 buf[0] & 0xfff, 0x095); 941 } else if (!strcmp(sqe_name, "a660_sqe.fw")) { 942 ret = true; 943 } else { 944 DRM_DEV_ERROR(&gpu->pdev->dev, 945 "unknown GPU, add it to a6xx_ucode_check_version()!!\n"); 946 } 947 out: 948 msm_gem_put_vaddr(obj); 949 return ret; 950 } 951 952 static int a6xx_ucode_load(struct msm_gpu *gpu) 953 { 954 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 955 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 956 957 if (!a6xx_gpu->sqe_bo) { 958 a6xx_gpu->sqe_bo = adreno_fw_create_bo(gpu, 959 adreno_gpu->fw[ADRENO_FW_SQE], &a6xx_gpu->sqe_iova); 960 961 if (IS_ERR(a6xx_gpu->sqe_bo)) { 962 int ret = PTR_ERR(a6xx_gpu->sqe_bo); 963 964 a6xx_gpu->sqe_bo = NULL; 965 DRM_DEV_ERROR(&gpu->pdev->dev, 966 "Could not allocate SQE ucode: %d\n", ret); 967 968 return ret; 969 } 970 971 msm_gem_object_set_name(a6xx_gpu->sqe_bo, "sqefw"); 972 if (!a6xx_ucode_check_version(a6xx_gpu, a6xx_gpu->sqe_bo)) { 973 msm_gem_unpin_iova(a6xx_gpu->sqe_bo, gpu->aspace); 974 drm_gem_object_put(a6xx_gpu->sqe_bo); 975 976 a6xx_gpu->sqe_bo = NULL; 977 return -EPERM; 978 } 979 } 980 981 /* 982 * Expanded APRIV and targets that support WHERE_AM_I both need a 983 * privileged buffer to store the RPTR shadow 984 */ 985 if ((adreno_gpu->base.hw_apriv || a6xx_gpu->has_whereami) && 986 !a6xx_gpu->shadow_bo) { 987 a6xx_gpu->shadow = msm_gem_kernel_new(gpu->dev, 988 sizeof(u32) * gpu->nr_rings, 989 MSM_BO_WC | MSM_BO_MAP_PRIV, 990 gpu->aspace, &a6xx_gpu->shadow_bo, 991 &a6xx_gpu->shadow_iova); 992 993 if (IS_ERR(a6xx_gpu->shadow)) 994 return PTR_ERR(a6xx_gpu->shadow); 995 996 msm_gem_object_set_name(a6xx_gpu->shadow_bo, "shadow"); 997 } 998 999 a6xx_gpu->pwrup_reglist_ptr = msm_gem_kernel_new(gpu->dev, PAGE_SIZE, 1000 MSM_BO_WC | MSM_BO_MAP_PRIV, 1001 gpu->aspace, &a6xx_gpu->pwrup_reglist_bo, 1002 &a6xx_gpu->pwrup_reglist_iova); 1003 1004 if (IS_ERR(a6xx_gpu->pwrup_reglist_ptr)) 1005 return PTR_ERR(a6xx_gpu->pwrup_reglist_ptr); 1006 1007 msm_gem_object_set_name(a6xx_gpu->pwrup_reglist_bo, "pwrup_reglist"); 1008 1009 return 0; 1010 } 1011 1012 static int a6xx_zap_shader_init(struct msm_gpu *gpu) 1013 { 1014 static bool loaded; 1015 int ret; 1016 1017 if (loaded) 1018 return 0; 1019 1020 ret = adreno_zap_shader_load(gpu, GPU_PAS_ID); 1021 1022 loaded = !ret; 1023 return ret; 1024 } 1025 1026 #define A6XX_INT_MASK (A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR | \ 1027 A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW | \ 1028 A6XX_RBBM_INT_0_MASK_CP_HW_ERROR | \ 1029 A6XX_RBBM_INT_0_MASK_CP_IB2 | \ 1030 A6XX_RBBM_INT_0_MASK_CP_IB1 | \ 1031 A6XX_RBBM_INT_0_MASK_CP_RB | \ 1032 A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \ 1033 A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW | \ 1034 A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT | \ 1035 A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \ 1036 A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR) 1037 1038 #define A7XX_INT_MASK (A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR | \ 1039 A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW | \ 1040 A6XX_RBBM_INT_0_MASK_RBBM_GPC_ERROR | \ 1041 A6XX_RBBM_INT_0_MASK_CP_SW | \ 1042 A6XX_RBBM_INT_0_MASK_CP_HW_ERROR | \ 1043 A6XX_RBBM_INT_0_MASK_PM4CPINTERRUPT | \ 1044 A6XX_RBBM_INT_0_MASK_CP_RB_DONE_TS | \ 1045 A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \ 1046 A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW | \ 1047 A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT | \ 1048 A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \ 1049 A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR | \ 1050 A6XX_RBBM_INT_0_MASK_TSBWRITEERROR | \ 1051 A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION) 1052 1053 #define A7XX_APRIV_MASK (A6XX_CP_APRIV_CNTL_ICACHE | \ 1054 A6XX_CP_APRIV_CNTL_RBFETCH | \ 1055 A6XX_CP_APRIV_CNTL_RBPRIVLEVEL | \ 1056 A6XX_CP_APRIV_CNTL_RBRPWB) 1057 1058 #define A7XX_BR_APRIVMASK (A7XX_APRIV_MASK | \ 1059 A6XX_CP_APRIV_CNTL_CDREAD | \ 1060 A6XX_CP_APRIV_CNTL_CDWRITE) 1061 1062 static int hw_init(struct msm_gpu *gpu) 1063 { 1064 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1065 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 1066 struct a6xx_gmu *gmu = &a6xx_gpu->gmu; 1067 u64 gmem_range_min; 1068 unsigned int i; 1069 int ret; 1070 1071 if (!adreno_has_gmu_wrapper(adreno_gpu)) { 1072 /* Make sure the GMU keeps the GPU on while we set it up */ 1073 ret = a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET); 1074 if (ret) 1075 return ret; 1076 } 1077 1078 /* Clear GBIF halt in case GX domain was not collapsed */ 1079 if (adreno_is_a619_holi(adreno_gpu)) { 1080 gpu_write(gpu, REG_A6XX_GBIF_HALT, 0); 1081 gpu_read(gpu, REG_A6XX_GBIF_HALT); 1082 1083 gpu_write(gpu, REG_A6XX_RBBM_GPR0_CNTL, 0); 1084 gpu_read(gpu, REG_A6XX_RBBM_GPR0_CNTL); 1085 } else if (a6xx_has_gbif(adreno_gpu)) { 1086 gpu_write(gpu, REG_A6XX_GBIF_HALT, 0); 1087 gpu_read(gpu, REG_A6XX_GBIF_HALT); 1088 1089 gpu_write(gpu, REG_A6XX_RBBM_GBIF_HALT, 0); 1090 gpu_read(gpu, REG_A6XX_RBBM_GBIF_HALT); 1091 } 1092 1093 gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_CNTL, 0); 1094 1095 if (adreno_is_a619_holi(adreno_gpu)) 1096 a6xx_sptprac_enable(gmu); 1097 1098 /* 1099 * Disable the trusted memory range - we don't actually supported secure 1100 * memory rendering at this point in time and we don't want to block off 1101 * part of the virtual memory space. 1102 */ 1103 gpu_write64(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE, 0x00000000); 1104 gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000); 1105 1106 if (!adreno_is_a7xx(adreno_gpu)) { 1107 /* Turn on 64 bit addressing for all blocks */ 1108 gpu_write(gpu, REG_A6XX_CP_ADDR_MODE_CNTL, 0x1); 1109 gpu_write(gpu, REG_A6XX_VSC_ADDR_MODE_CNTL, 0x1); 1110 gpu_write(gpu, REG_A6XX_GRAS_ADDR_MODE_CNTL, 0x1); 1111 gpu_write(gpu, REG_A6XX_RB_ADDR_MODE_CNTL, 0x1); 1112 gpu_write(gpu, REG_A6XX_PC_ADDR_MODE_CNTL, 0x1); 1113 gpu_write(gpu, REG_A6XX_HLSQ_ADDR_MODE_CNTL, 0x1); 1114 gpu_write(gpu, REG_A6XX_VFD_ADDR_MODE_CNTL, 0x1); 1115 gpu_write(gpu, REG_A6XX_VPC_ADDR_MODE_CNTL, 0x1); 1116 gpu_write(gpu, REG_A6XX_UCHE_ADDR_MODE_CNTL, 0x1); 1117 gpu_write(gpu, REG_A6XX_SP_ADDR_MODE_CNTL, 0x1); 1118 gpu_write(gpu, REG_A6XX_TPL1_ADDR_MODE_CNTL, 0x1); 1119 gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1); 1120 } 1121 1122 /* enable hardware clockgating */ 1123 a6xx_set_hwcg(gpu, true); 1124 1125 /* VBIF/GBIF start*/ 1126 if (adreno_is_a610_family(adreno_gpu) || 1127 adreno_is_a640_family(adreno_gpu) || 1128 adreno_is_a650_family(adreno_gpu) || 1129 adreno_is_a7xx(adreno_gpu)) { 1130 gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE0, 0x00071620); 1131 gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE1, 0x00071620); 1132 gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE2, 0x00071620); 1133 gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE3, 0x00071620); 1134 gpu_write(gpu, REG_A6XX_RBBM_GBIF_CLIENT_QOS_CNTL, 1135 adreno_is_a7xx(adreno_gpu) ? 0x2120212 : 0x3); 1136 } else { 1137 gpu_write(gpu, REG_A6XX_RBBM_VBIF_CLIENT_QOS_CNTL, 0x3); 1138 } 1139 1140 if (adreno_is_a630(adreno_gpu)) 1141 gpu_write(gpu, REG_A6XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009); 1142 1143 if (adreno_is_a7xx(adreno_gpu)) 1144 gpu_write(gpu, REG_A6XX_UCHE_GBIF_GX_CONFIG, 0x10240e0); 1145 1146 /* Make all blocks contribute to the GPU BUSY perf counter */ 1147 gpu_write(gpu, REG_A6XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xffffffff); 1148 1149 /* Disable L2 bypass in the UCHE */ 1150 if (adreno_is_a7xx(adreno_gpu)) { 1151 gpu_write64(gpu, REG_A6XX_UCHE_TRAP_BASE, adreno_gpu->uche_trap_base); 1152 gpu_write64(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE, adreno_gpu->uche_trap_base); 1153 } else { 1154 gpu_write64(gpu, REG_A6XX_UCHE_WRITE_RANGE_MAX, adreno_gpu->uche_trap_base + 0xfc0); 1155 gpu_write64(gpu, REG_A6XX_UCHE_TRAP_BASE, adreno_gpu->uche_trap_base); 1156 gpu_write64(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE, adreno_gpu->uche_trap_base); 1157 } 1158 1159 if (!(adreno_is_a650_family(adreno_gpu) || 1160 adreno_is_a702(adreno_gpu) || 1161 adreno_is_a730(adreno_gpu))) { 1162 gmem_range_min = adreno_is_a740_family(adreno_gpu) ? SZ_16M : SZ_1M; 1163 1164 /* Set the GMEM VA range [0x100000:0x100000 + gpu->gmem - 1] */ 1165 gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MIN, gmem_range_min); 1166 1167 gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MAX, 1168 gmem_range_min + adreno_gpu->info->gmem - 1); 1169 } 1170 1171 if (adreno_is_a7xx(adreno_gpu)) 1172 gpu_write(gpu, REG_A6XX_UCHE_CACHE_WAYS, BIT(23)); 1173 else { 1174 gpu_write(gpu, REG_A6XX_UCHE_FILTER_CNTL, 0x804); 1175 gpu_write(gpu, REG_A6XX_UCHE_CACHE_WAYS, 0x4); 1176 } 1177 1178 if (adreno_is_a640_family(adreno_gpu) || adreno_is_a650_family(adreno_gpu)) { 1179 gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x02000140); 1180 gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_1, 0x8040362c); 1181 } else if (adreno_is_a610_family(adreno_gpu)) { 1182 gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x00800060); 1183 gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_1, 0x40201b16); 1184 } else if (!adreno_is_a7xx(adreno_gpu)) { 1185 gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x010000c0); 1186 gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_1, 0x8040362c); 1187 } 1188 1189 if (adreno_is_a660_family(adreno_gpu)) 1190 gpu_write(gpu, REG_A6XX_CP_LPAC_PROG_FIFO_SIZE, 0x00000020); 1191 1192 /* Setting the mem pool size */ 1193 if (adreno_is_a610(adreno_gpu)) { 1194 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 48); 1195 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_DBG_ADDR, 47); 1196 } else if (adreno_is_a702(adreno_gpu)) { 1197 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 64); 1198 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_DBG_ADDR, 63); 1199 } else if (!adreno_is_a7xx(adreno_gpu)) 1200 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 128); 1201 1202 1203 /* Set the default primFifo threshold values */ 1204 if (adreno_gpu->info->a6xx->prim_fifo_threshold) 1205 gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 1206 adreno_gpu->info->a6xx->prim_fifo_threshold); 1207 1208 /* Set the AHB default slave response to "ERROR" */ 1209 gpu_write(gpu, REG_A6XX_CP_AHB_CNTL, 0x1); 1210 1211 /* Turn on performance counters */ 1212 gpu_write(gpu, REG_A6XX_RBBM_PERFCTR_CNTL, 0x1); 1213 1214 if (adreno_is_a7xx(adreno_gpu)) { 1215 /* Turn on the IFPC counter (countable 4 on XOCLK4) */ 1216 gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_CX_GMU_POWER_COUNTER_SELECT_1, 1217 FIELD_PREP(GENMASK(7, 0), 0x4)); 1218 } 1219 1220 /* Select CP0 to always count cycles */ 1221 gpu_write(gpu, REG_A6XX_CP_PERFCTR_CP_SEL(0), PERF_CP_ALWAYS_COUNT); 1222 1223 a6xx_set_ubwc_config(gpu); 1224 1225 /* Enable fault detection */ 1226 if (adreno_is_a730(adreno_gpu) || 1227 adreno_is_a740_family(adreno_gpu)) 1228 gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0xcfffff); 1229 else if (adreno_is_a690(adreno_gpu)) 1230 gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0x4fffff); 1231 else if (adreno_is_a619(adreno_gpu)) 1232 gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0x3fffff); 1233 else if (adreno_is_a610(adreno_gpu) || adreno_is_a702(adreno_gpu)) 1234 gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0x3ffff); 1235 else 1236 gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0x1fffff); 1237 1238 gpu_write(gpu, REG_A6XX_UCHE_CLIENT_PF, BIT(7) | 0x1); 1239 1240 /* Set weights for bicubic filtering */ 1241 if (adreno_is_a650_family(adreno_gpu) || adreno_is_x185(adreno_gpu)) { 1242 gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_0, 0); 1243 gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_1, 1244 0x3fe05ff4); 1245 gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_2, 1246 0x3fa0ebee); 1247 gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_3, 1248 0x3f5193ed); 1249 gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_4, 1250 0x3f0243f0); 1251 } 1252 1253 /* Set up the CX GMU counter 0 to count busy ticks */ 1254 gmu_write(gmu, REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_MASK, 0xff000000); 1255 1256 /* Enable the power counter */ 1257 gmu_rmw(gmu, REG_A6XX_GMU_CX_GMU_POWER_COUNTER_SELECT_0, 0xff, BIT(5)); 1258 gmu_write(gmu, REG_A6XX_GMU_CX_GMU_POWER_COUNTER_ENABLE, 1); 1259 1260 /* Protect registers from the CP */ 1261 a6xx_set_cp_protect(gpu); 1262 1263 if (adreno_is_a660_family(adreno_gpu)) { 1264 if (adreno_is_a690(adreno_gpu)) 1265 gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, 0x00028801); 1266 else 1267 gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, 0x1); 1268 gpu_write(gpu, REG_A6XX_RBBM_GBIF_CLIENT_QOS_CNTL, 0x0); 1269 } else if (adreno_is_a702(adreno_gpu)) { 1270 /* Something to do with the HLSQ cluster */ 1271 gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, BIT(24)); 1272 } 1273 1274 if (adreno_is_a690(adreno_gpu)) 1275 gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG, 0x90); 1276 /* Set dualQ + disable afull for A660 GPU */ 1277 else if (adreno_is_a660(adreno_gpu) || adreno_is_a663(adreno_gpu)) 1278 gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG, 0x66906); 1279 else if (adreno_is_a7xx(adreno_gpu)) 1280 gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG, 1281 FIELD_PREP(GENMASK(19, 16), 6) | 1282 FIELD_PREP(GENMASK(15, 12), 6) | 1283 FIELD_PREP(GENMASK(11, 8), 9) | 1284 BIT(3) | BIT(2) | 1285 FIELD_PREP(GENMASK(1, 0), 2)); 1286 1287 /* Enable expanded apriv for targets that support it */ 1288 if (gpu->hw_apriv) { 1289 if (adreno_is_a7xx(adreno_gpu)) { 1290 gpu_write(gpu, REG_A6XX_CP_APRIV_CNTL, 1291 A7XX_BR_APRIVMASK); 1292 gpu_write(gpu, REG_A7XX_CP_BV_APRIV_CNTL, 1293 A7XX_APRIV_MASK); 1294 gpu_write(gpu, REG_A7XX_CP_LPAC_APRIV_CNTL, 1295 A7XX_APRIV_MASK); 1296 } else 1297 gpu_write(gpu, REG_A6XX_CP_APRIV_CNTL, 1298 BIT(6) | BIT(5) | BIT(3) | BIT(2) | BIT(1)); 1299 } 1300 1301 if (adreno_is_a750(adreno_gpu)) { 1302 /* Disable ubwc merged UFC request feature */ 1303 gpu_rmw(gpu, REG_A6XX_RB_CMP_DBG_ECO_CNTL, BIT(19), BIT(19)); 1304 1305 /* Enable TP flaghint and other performance settings */ 1306 gpu_write(gpu, REG_A6XX_TPL1_DBG_ECO_CNTL1, 0xc0700); 1307 } else if (adreno_is_a7xx(adreno_gpu)) { 1308 /* Disable non-ubwc read reqs from passing write reqs */ 1309 gpu_rmw(gpu, REG_A6XX_RB_CMP_DBG_ECO_CNTL, BIT(11), BIT(11)); 1310 } 1311 1312 /* Enable interrupts */ 1313 gpu_write(gpu, REG_A6XX_RBBM_INT_0_MASK, 1314 adreno_is_a7xx(adreno_gpu) ? A7XX_INT_MASK : A6XX_INT_MASK); 1315 1316 ret = adreno_hw_init(gpu); 1317 if (ret) 1318 goto out; 1319 1320 gpu_write64(gpu, REG_A6XX_CP_SQE_INSTR_BASE, a6xx_gpu->sqe_iova); 1321 1322 /* Set the ringbuffer address */ 1323 gpu_write64(gpu, REG_A6XX_CP_RB_BASE, gpu->rb[0]->iova); 1324 1325 /* Targets that support extended APRIV can use the RPTR shadow from 1326 * hardware but all the other ones need to disable the feature. Targets 1327 * that support the WHERE_AM_I opcode can use that instead 1328 */ 1329 if (adreno_gpu->base.hw_apriv) 1330 gpu_write(gpu, REG_A6XX_CP_RB_CNTL, MSM_GPU_RB_CNTL_DEFAULT); 1331 else 1332 gpu_write(gpu, REG_A6XX_CP_RB_CNTL, 1333 MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE); 1334 1335 /* Configure the RPTR shadow if needed: */ 1336 if (a6xx_gpu->shadow_bo) { 1337 gpu_write64(gpu, REG_A6XX_CP_RB_RPTR_ADDR, 1338 shadowptr(a6xx_gpu, gpu->rb[0])); 1339 for (unsigned int i = 0; i < gpu->nr_rings; i++) 1340 a6xx_gpu->shadow[i] = 0; 1341 } 1342 1343 /* ..which means "always" on A7xx, also for BV shadow */ 1344 if (adreno_is_a7xx(adreno_gpu)) { 1345 gpu_write64(gpu, REG_A7XX_CP_BV_RB_RPTR_ADDR, 1346 rbmemptr(gpu->rb[0], bv_rptr)); 1347 } 1348 1349 a6xx_preempt_hw_init(gpu); 1350 1351 /* Always come up on rb 0 */ 1352 a6xx_gpu->cur_ring = gpu->rb[0]; 1353 1354 for (i = 0; i < gpu->nr_rings; i++) 1355 gpu->rb[i]->cur_ctx_seqno = 0; 1356 1357 /* Enable the SQE_to start the CP engine */ 1358 gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 1); 1359 1360 if (adreno_is_a7xx(adreno_gpu) && !a6xx_gpu->pwrup_reglist_emitted) { 1361 a7xx_patch_pwrup_reglist(gpu); 1362 a6xx_gpu->pwrup_reglist_emitted = true; 1363 } 1364 1365 ret = adreno_is_a7xx(adreno_gpu) ? a7xx_cp_init(gpu) : a6xx_cp_init(gpu); 1366 if (ret) 1367 goto out; 1368 1369 /* 1370 * Try to load a zap shader into the secure world. If successful 1371 * we can use the CP to switch out of secure mode. If not then we 1372 * have no resource but to try to switch ourselves out manually. If we 1373 * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will 1374 * be blocked and a permissions violation will soon follow. 1375 */ 1376 ret = a6xx_zap_shader_init(gpu); 1377 if (!ret) { 1378 OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1); 1379 OUT_RING(gpu->rb[0], 0x00000000); 1380 1381 a6xx_flush(gpu, gpu->rb[0]); 1382 if (!a6xx_idle(gpu, gpu->rb[0])) 1383 return -EINVAL; 1384 } else if (ret == -ENODEV) { 1385 /* 1386 * This device does not use zap shader (but print a warning 1387 * just in case someone got their dt wrong.. hopefully they 1388 * have a debug UART to realize the error of their ways... 1389 * if you mess this up you are about to crash horribly) 1390 */ 1391 dev_warn_once(gpu->dev->dev, 1392 "Zap shader not enabled - using SECVID_TRUST_CNTL instead\n"); 1393 gpu_write(gpu, REG_A6XX_RBBM_SECVID_TRUST_CNTL, 0x0); 1394 ret = 0; 1395 } else { 1396 return ret; 1397 } 1398 1399 out: 1400 if (adreno_has_gmu_wrapper(adreno_gpu)) 1401 return ret; 1402 1403 /* Last step - yield the ringbuffer */ 1404 a7xx_preempt_start(gpu); 1405 1406 /* 1407 * Tell the GMU that we are done touching the GPU and it can start power 1408 * management 1409 */ 1410 a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET); 1411 1412 if (a6xx_gpu->gmu.legacy) { 1413 /* Take the GMU out of its special boot mode */ 1414 a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_BOOT_SLUMBER); 1415 } 1416 1417 return ret; 1418 } 1419 1420 static int a6xx_hw_init(struct msm_gpu *gpu) 1421 { 1422 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1423 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 1424 int ret; 1425 1426 mutex_lock(&a6xx_gpu->gmu.lock); 1427 ret = hw_init(gpu); 1428 mutex_unlock(&a6xx_gpu->gmu.lock); 1429 1430 return ret; 1431 } 1432 1433 static void a6xx_dump(struct msm_gpu *gpu) 1434 { 1435 DRM_DEV_INFO(&gpu->pdev->dev, "status: %08x\n", 1436 gpu_read(gpu, REG_A6XX_RBBM_STATUS)); 1437 adreno_dump(gpu); 1438 } 1439 1440 static void a6xx_recover(struct msm_gpu *gpu) 1441 { 1442 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1443 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 1444 struct a6xx_gmu *gmu = &a6xx_gpu->gmu; 1445 int i, active_submits; 1446 1447 adreno_dump_info(gpu); 1448 1449 for (i = 0; i < 8; i++) 1450 DRM_DEV_INFO(&gpu->pdev->dev, "CP_SCRATCH_REG%d: %u\n", i, 1451 gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(i))); 1452 1453 if (hang_debug) 1454 a6xx_dump(gpu); 1455 1456 /* 1457 * To handle recovery specific sequences during the rpm suspend we are 1458 * about to trigger 1459 */ 1460 a6xx_gpu->hung = true; 1461 1462 /* Halt SQE first */ 1463 gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 3); 1464 1465 pm_runtime_dont_use_autosuspend(&gpu->pdev->dev); 1466 1467 /* active_submit won't change until we make a submission */ 1468 mutex_lock(&gpu->active_lock); 1469 active_submits = gpu->active_submits; 1470 1471 /* 1472 * Temporarily clear active_submits count to silence a WARN() in the 1473 * runtime suspend cb 1474 */ 1475 gpu->active_submits = 0; 1476 1477 if (adreno_has_gmu_wrapper(adreno_gpu)) { 1478 /* Drain the outstanding traffic on memory buses */ 1479 a6xx_bus_clear_pending_transactions(adreno_gpu, true); 1480 1481 /* Reset the GPU to a clean state */ 1482 a6xx_gpu_sw_reset(gpu, true); 1483 a6xx_gpu_sw_reset(gpu, false); 1484 } 1485 1486 reinit_completion(&gmu->pd_gate); 1487 dev_pm_genpd_add_notifier(gmu->cxpd, &gmu->pd_nb); 1488 dev_pm_genpd_synced_poweroff(gmu->cxpd); 1489 1490 /* Drop the rpm refcount from active submits */ 1491 if (active_submits) 1492 pm_runtime_put(&gpu->pdev->dev); 1493 1494 /* And the final one from recover worker */ 1495 pm_runtime_put_sync(&gpu->pdev->dev); 1496 1497 if (!wait_for_completion_timeout(&gmu->pd_gate, msecs_to_jiffies(1000))) 1498 DRM_DEV_ERROR(&gpu->pdev->dev, "cx gdsc didn't collapse\n"); 1499 1500 dev_pm_genpd_remove_notifier(gmu->cxpd); 1501 1502 pm_runtime_use_autosuspend(&gpu->pdev->dev); 1503 1504 if (active_submits) 1505 pm_runtime_get(&gpu->pdev->dev); 1506 1507 pm_runtime_get_sync(&gpu->pdev->dev); 1508 1509 gpu->active_submits = active_submits; 1510 mutex_unlock(&gpu->active_lock); 1511 1512 msm_gpu_hw_init(gpu); 1513 a6xx_gpu->hung = false; 1514 } 1515 1516 static const char *a6xx_uche_fault_block(struct msm_gpu *gpu, u32 mid) 1517 { 1518 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1519 static const char *uche_clients[7] = { 1520 "VFD", "SP", "VSC", "VPC", "HLSQ", "PC", "LRZ", 1521 }; 1522 u32 val; 1523 1524 if (adreno_is_a7xx(adreno_gpu)) { 1525 if (mid != 1 && mid != 2 && mid != 3 && mid != 8) 1526 return "UNKNOWN"; 1527 } else { 1528 if (mid < 1 || mid > 3) 1529 return "UNKNOWN"; 1530 } 1531 1532 /* 1533 * The source of the data depends on the mid ID read from FSYNR1. 1534 * and the client ID read from the UCHE block 1535 */ 1536 val = gpu_read(gpu, REG_A6XX_UCHE_CLIENT_PF); 1537 1538 if (adreno_is_a7xx(adreno_gpu)) { 1539 /* Bit 3 for mid=3 indicates BR or BV */ 1540 static const char *uche_clients_a7xx[16] = { 1541 "BR_VFD", "BR_SP", "BR_VSC", "BR_VPC", 1542 "BR_HLSQ", "BR_PC", "BR_LRZ", "BR_TP", 1543 "BV_VFD", "BV_SP", "BV_VSC", "BV_VPC", 1544 "BV_HLSQ", "BV_PC", "BV_LRZ", "BV_TP", 1545 }; 1546 1547 /* LPAC has the same clients as BR and BV, but because it is 1548 * compute-only some of them do not exist and there are holes 1549 * in the array. 1550 */ 1551 static const char *uche_clients_lpac_a7xx[8] = { 1552 "-", "LPAC_SP", "-", "-", 1553 "LPAC_HLSQ", "-", "-", "LPAC_TP", 1554 }; 1555 1556 val &= GENMASK(6, 0); 1557 1558 /* mid=3 refers to BR or BV */ 1559 if (mid == 3) { 1560 if (val < ARRAY_SIZE(uche_clients_a7xx)) 1561 return uche_clients_a7xx[val]; 1562 else 1563 return "UCHE"; 1564 } 1565 1566 /* mid=8 refers to LPAC */ 1567 if (mid == 8) { 1568 if (val < ARRAY_SIZE(uche_clients_lpac_a7xx)) 1569 return uche_clients_lpac_a7xx[val]; 1570 else 1571 return "UCHE_LPAC"; 1572 } 1573 1574 /* mid=2 is a catchall for everything else in LPAC */ 1575 if (mid == 2) 1576 return "UCHE_LPAC"; 1577 1578 /* mid=1 is a catchall for everything else in BR/BV */ 1579 return "UCHE"; 1580 } else if (adreno_is_a660_family(adreno_gpu)) { 1581 static const char *uche_clients_a660[8] = { 1582 "VFD", "SP", "VSC", "VPC", "HLSQ", "PC", "LRZ", "TP", 1583 }; 1584 1585 static const char *uche_clients_a660_not[8] = { 1586 "not VFD", "not SP", "not VSC", "not VPC", 1587 "not HLSQ", "not PC", "not LRZ", "not TP", 1588 }; 1589 1590 val &= GENMASK(6, 0); 1591 1592 if (mid == 3 && val < ARRAY_SIZE(uche_clients_a660)) 1593 return uche_clients_a660[val]; 1594 1595 if (mid == 1 && val < ARRAY_SIZE(uche_clients_a660_not)) 1596 return uche_clients_a660_not[val]; 1597 1598 return "UCHE"; 1599 } else { 1600 /* mid = 3 is most precise and refers to only one block per client */ 1601 if (mid == 3) 1602 return uche_clients[val & 7]; 1603 1604 /* For mid=2 the source is TP or VFD except when the client id is 0 */ 1605 if (mid == 2) 1606 return ((val & 7) == 0) ? "TP" : "TP|VFD"; 1607 1608 /* For mid=1 just return "UCHE" as a catchall for everything else */ 1609 return "UCHE"; 1610 } 1611 } 1612 1613 static const char *a6xx_fault_block(struct msm_gpu *gpu, u32 id) 1614 { 1615 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1616 1617 if (id == 0) 1618 return "CP"; 1619 else if (id == 4) 1620 return "CCU"; 1621 else if (id == 6) 1622 return "CDP Prefetch"; 1623 else if (id == 7) 1624 return "GMU"; 1625 else if (id == 5 && adreno_is_a7xx(adreno_gpu)) 1626 return "Flag cache"; 1627 1628 return a6xx_uche_fault_block(gpu, id); 1629 } 1630 1631 static int a6xx_fault_handler(void *arg, unsigned long iova, int flags, void *data) 1632 { 1633 struct msm_gpu *gpu = arg; 1634 struct adreno_smmu_fault_info *info = data; 1635 const char *block = "unknown"; 1636 1637 u32 scratch[] = { 1638 gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(4)), 1639 gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(5)), 1640 gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(6)), 1641 gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(7)), 1642 }; 1643 1644 if (info) 1645 block = a6xx_fault_block(gpu, info->fsynr1 & 0xff); 1646 1647 return adreno_fault_handler(gpu, iova, flags, info, block, scratch); 1648 } 1649 1650 static void a6xx_cp_hw_err_irq(struct msm_gpu *gpu) 1651 { 1652 u32 status = gpu_read(gpu, REG_A6XX_CP_INTERRUPT_STATUS); 1653 1654 if (status & A6XX_CP_INT_CP_OPCODE_ERROR) { 1655 u32 val; 1656 1657 gpu_write(gpu, REG_A6XX_CP_SQE_STAT_ADDR, 1); 1658 val = gpu_read(gpu, REG_A6XX_CP_SQE_STAT_DATA); 1659 dev_err_ratelimited(&gpu->pdev->dev, 1660 "CP | opcode error | possible opcode=0x%8.8X\n", 1661 val); 1662 } 1663 1664 if (status & A6XX_CP_INT_CP_UCODE_ERROR) 1665 dev_err_ratelimited(&gpu->pdev->dev, 1666 "CP ucode error interrupt\n"); 1667 1668 if (status & A6XX_CP_INT_CP_HW_FAULT_ERROR) 1669 dev_err_ratelimited(&gpu->pdev->dev, "CP | HW fault | status=0x%8.8X\n", 1670 gpu_read(gpu, REG_A6XX_CP_HW_FAULT)); 1671 1672 if (status & A6XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) { 1673 u32 val = gpu_read(gpu, REG_A6XX_CP_PROTECT_STATUS); 1674 1675 dev_err_ratelimited(&gpu->pdev->dev, 1676 "CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n", 1677 val & (1 << 20) ? "READ" : "WRITE", 1678 (val & 0x3ffff), val); 1679 } 1680 1681 if (status & A6XX_CP_INT_CP_AHB_ERROR && !adreno_is_a7xx(to_adreno_gpu(gpu))) 1682 dev_err_ratelimited(&gpu->pdev->dev, "CP AHB error interrupt\n"); 1683 1684 if (status & A6XX_CP_INT_CP_VSD_PARITY_ERROR) 1685 dev_err_ratelimited(&gpu->pdev->dev, "CP VSD decoder parity error\n"); 1686 1687 if (status & A6XX_CP_INT_CP_ILLEGAL_INSTR_ERROR) 1688 dev_err_ratelimited(&gpu->pdev->dev, "CP illegal instruction error\n"); 1689 1690 } 1691 1692 static void a6xx_fault_detect_irq(struct msm_gpu *gpu) 1693 { 1694 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 1695 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 1696 struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu); 1697 1698 /* 1699 * If stalled on SMMU fault, we could trip the GPU's hang detection, 1700 * but the fault handler will trigger the devcore dump, and we want 1701 * to otherwise resume normally rather than killing the submit, so 1702 * just bail. 1703 */ 1704 if (gpu_read(gpu, REG_A6XX_RBBM_STATUS3) & A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT) 1705 return; 1706 1707 /* 1708 * Force the GPU to stay on until after we finish 1709 * collecting information 1710 */ 1711 if (!adreno_has_gmu_wrapper(adreno_gpu)) 1712 gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_GMU_PWR_COL_KEEPALIVE, 1); 1713 1714 DRM_DEV_ERROR(&gpu->pdev->dev, 1715 "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n", 1716 ring ? ring->id : -1, ring ? ring->fctx->last_fence : 0, 1717 gpu_read(gpu, REG_A6XX_RBBM_STATUS), 1718 gpu_read(gpu, REG_A6XX_CP_RB_RPTR), 1719 gpu_read(gpu, REG_A6XX_CP_RB_WPTR), 1720 gpu_read64(gpu, REG_A6XX_CP_IB1_BASE), 1721 gpu_read(gpu, REG_A6XX_CP_IB1_REM_SIZE), 1722 gpu_read64(gpu, REG_A6XX_CP_IB2_BASE), 1723 gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE)); 1724 1725 /* Turn off the hangcheck timer to keep it from bothering us */ 1726 timer_delete(&gpu->hangcheck_timer); 1727 1728 kthread_queue_work(gpu->worker, &gpu->recover_work); 1729 } 1730 1731 static void a7xx_sw_fuse_violation_irq(struct msm_gpu *gpu) 1732 { 1733 u32 status; 1734 1735 status = gpu_read(gpu, REG_A7XX_RBBM_SW_FUSE_INT_STATUS); 1736 gpu_write(gpu, REG_A7XX_RBBM_SW_FUSE_INT_MASK, 0); 1737 1738 dev_err_ratelimited(&gpu->pdev->dev, "SW fuse violation status=%8.8x\n", status); 1739 1740 /* 1741 * Ignore FASTBLEND violations, because the HW will silently fall back 1742 * to legacy blending. 1743 */ 1744 if (status & (A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING | 1745 A7XX_CX_MISC_SW_FUSE_VALUE_LPAC)) { 1746 timer_delete(&gpu->hangcheck_timer); 1747 1748 kthread_queue_work(gpu->worker, &gpu->recover_work); 1749 } 1750 } 1751 1752 static irqreturn_t a6xx_irq(struct msm_gpu *gpu) 1753 { 1754 struct msm_drm_private *priv = gpu->dev->dev_private; 1755 u32 status = gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS); 1756 1757 gpu_write(gpu, REG_A6XX_RBBM_INT_CLEAR_CMD, status); 1758 1759 if (priv->disable_err_irq) 1760 status &= A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS; 1761 1762 if (status & A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT) 1763 a6xx_fault_detect_irq(gpu); 1764 1765 if (status & A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR) 1766 dev_err_ratelimited(&gpu->pdev->dev, "CP | AHB bus error\n"); 1767 1768 if (status & A6XX_RBBM_INT_0_MASK_CP_HW_ERROR) 1769 a6xx_cp_hw_err_irq(gpu); 1770 1771 if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW) 1772 dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB ASYNC overflow\n"); 1773 1774 if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW) 1775 dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB bus overflow\n"); 1776 1777 if (status & A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS) 1778 dev_err_ratelimited(&gpu->pdev->dev, "UCHE | Out of bounds access\n"); 1779 1780 if (status & A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION) 1781 a7xx_sw_fuse_violation_irq(gpu); 1782 1783 if (status & A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) { 1784 msm_gpu_retire(gpu); 1785 a6xx_preempt_trigger(gpu); 1786 } 1787 1788 if (status & A6XX_RBBM_INT_0_MASK_CP_SW) 1789 a6xx_preempt_irq(gpu); 1790 1791 return IRQ_HANDLED; 1792 } 1793 1794 static void a6xx_llc_deactivate(struct a6xx_gpu *a6xx_gpu) 1795 { 1796 llcc_slice_deactivate(a6xx_gpu->llc_slice); 1797 llcc_slice_deactivate(a6xx_gpu->htw_llc_slice); 1798 } 1799 1800 static void a6xx_llc_activate(struct a6xx_gpu *a6xx_gpu) 1801 { 1802 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; 1803 struct msm_gpu *gpu = &adreno_gpu->base; 1804 u32 cntl1_regval = 0; 1805 1806 if (IS_ERR(a6xx_gpu->llc_mmio)) 1807 return; 1808 1809 if (!llcc_slice_activate(a6xx_gpu->llc_slice)) { 1810 u32 gpu_scid = llcc_get_slice_id(a6xx_gpu->llc_slice); 1811 1812 gpu_scid &= 0x1f; 1813 cntl1_regval = (gpu_scid << 0) | (gpu_scid << 5) | (gpu_scid << 10) | 1814 (gpu_scid << 15) | (gpu_scid << 20); 1815 1816 /* On A660, the SCID programming for UCHE traffic is done in 1817 * A6XX_GBIF_SCACHE_CNTL0[14:10] 1818 */ 1819 if (adreno_is_a660_family(adreno_gpu)) 1820 gpu_rmw(gpu, REG_A6XX_GBIF_SCACHE_CNTL0, (0x1f << 10) | 1821 (1 << 8), (gpu_scid << 10) | (1 << 8)); 1822 } 1823 1824 /* 1825 * For targets with a MMU500, activate the slice but don't program the 1826 * register. The XBL will take care of that. 1827 */ 1828 if (!llcc_slice_activate(a6xx_gpu->htw_llc_slice)) { 1829 if (!a6xx_gpu->have_mmu500) { 1830 u32 gpuhtw_scid = llcc_get_slice_id(a6xx_gpu->htw_llc_slice); 1831 1832 gpuhtw_scid &= 0x1f; 1833 cntl1_regval |= FIELD_PREP(GENMASK(29, 25), gpuhtw_scid); 1834 } 1835 } 1836 1837 if (!cntl1_regval) 1838 return; 1839 1840 /* 1841 * Program the slice IDs for the various GPU blocks and GPU MMU 1842 * pagetables 1843 */ 1844 if (!a6xx_gpu->have_mmu500) { 1845 a6xx_llc_write(a6xx_gpu, 1846 REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_1, cntl1_regval); 1847 1848 /* 1849 * Program cacheability overrides to not allocate cache 1850 * lines on a write miss 1851 */ 1852 a6xx_llc_rmw(a6xx_gpu, 1853 REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_0, 0xF, 0x03); 1854 return; 1855 } 1856 1857 gpu_rmw(gpu, REG_A6XX_GBIF_SCACHE_CNTL1, GENMASK(24, 0), cntl1_regval); 1858 } 1859 1860 static void a7xx_llc_activate(struct a6xx_gpu *a6xx_gpu) 1861 { 1862 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; 1863 struct msm_gpu *gpu = &adreno_gpu->base; 1864 1865 if (IS_ERR(a6xx_gpu->llc_mmio)) 1866 return; 1867 1868 if (!llcc_slice_activate(a6xx_gpu->llc_slice)) { 1869 u32 gpu_scid = llcc_get_slice_id(a6xx_gpu->llc_slice); 1870 1871 gpu_scid &= GENMASK(4, 0); 1872 1873 gpu_write(gpu, REG_A6XX_GBIF_SCACHE_CNTL1, 1874 FIELD_PREP(GENMASK(29, 25), gpu_scid) | 1875 FIELD_PREP(GENMASK(24, 20), gpu_scid) | 1876 FIELD_PREP(GENMASK(19, 15), gpu_scid) | 1877 FIELD_PREP(GENMASK(14, 10), gpu_scid) | 1878 FIELD_PREP(GENMASK(9, 5), gpu_scid) | 1879 FIELD_PREP(GENMASK(4, 0), gpu_scid)); 1880 1881 gpu_write(gpu, REG_A6XX_GBIF_SCACHE_CNTL0, 1882 FIELD_PREP(GENMASK(14, 10), gpu_scid) | 1883 BIT(8)); 1884 } 1885 1886 llcc_slice_activate(a6xx_gpu->htw_llc_slice); 1887 } 1888 1889 static void a6xx_llc_slices_destroy(struct a6xx_gpu *a6xx_gpu) 1890 { 1891 /* No LLCC on non-RPMh (and by extension, non-GMU) SoCs */ 1892 if (adreno_has_gmu_wrapper(&a6xx_gpu->base)) 1893 return; 1894 1895 llcc_slice_putd(a6xx_gpu->llc_slice); 1896 llcc_slice_putd(a6xx_gpu->htw_llc_slice); 1897 } 1898 1899 static void a6xx_llc_slices_init(struct platform_device *pdev, 1900 struct a6xx_gpu *a6xx_gpu, bool is_a7xx) 1901 { 1902 struct device_node *phandle; 1903 1904 /* No LLCC on non-RPMh (and by extension, non-GMU) SoCs */ 1905 if (adreno_has_gmu_wrapper(&a6xx_gpu->base)) 1906 return; 1907 1908 /* 1909 * There is a different programming path for A6xx targets with an 1910 * mmu500 attached, so detect if that is the case 1911 */ 1912 phandle = of_parse_phandle(pdev->dev.of_node, "iommus", 0); 1913 a6xx_gpu->have_mmu500 = (phandle && 1914 of_device_is_compatible(phandle, "arm,mmu-500")); 1915 of_node_put(phandle); 1916 1917 if (is_a7xx || !a6xx_gpu->have_mmu500) 1918 a6xx_gpu->llc_mmio = msm_ioremap(pdev, "cx_mem"); 1919 else 1920 a6xx_gpu->llc_mmio = NULL; 1921 1922 a6xx_gpu->llc_slice = llcc_slice_getd(LLCC_GPU); 1923 a6xx_gpu->htw_llc_slice = llcc_slice_getd(LLCC_GPUHTW); 1924 1925 if (IS_ERR_OR_NULL(a6xx_gpu->llc_slice) && IS_ERR_OR_NULL(a6xx_gpu->htw_llc_slice)) 1926 a6xx_gpu->llc_mmio = ERR_PTR(-EINVAL); 1927 } 1928 1929 static int a7xx_cx_mem_init(struct a6xx_gpu *a6xx_gpu) 1930 { 1931 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; 1932 struct msm_gpu *gpu = &adreno_gpu->base; 1933 u32 fuse_val; 1934 int ret; 1935 1936 if (adreno_is_a750(adreno_gpu)) { 1937 /* 1938 * Assume that if qcom scm isn't available, that whatever 1939 * replacement allows writing the fuse register ourselves. 1940 * Users of alternative firmware need to make sure this 1941 * register is writeable or indicate that it's not somehow. 1942 * Print a warning because if you mess this up you're about to 1943 * crash horribly. 1944 */ 1945 if (!qcom_scm_is_available()) { 1946 dev_warn_once(gpu->dev->dev, 1947 "SCM is not available, poking fuse register\n"); 1948 a6xx_llc_write(a6xx_gpu, REG_A7XX_CX_MISC_SW_FUSE_VALUE, 1949 A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING | 1950 A7XX_CX_MISC_SW_FUSE_VALUE_FASTBLEND | 1951 A7XX_CX_MISC_SW_FUSE_VALUE_LPAC); 1952 adreno_gpu->has_ray_tracing = true; 1953 return 0; 1954 } 1955 1956 ret = qcom_scm_gpu_init_regs(QCOM_SCM_GPU_ALWAYS_EN_REQ | 1957 QCOM_SCM_GPU_TSENSE_EN_REQ); 1958 if (ret) 1959 return ret; 1960 1961 /* 1962 * On a750 raytracing may be disabled by the firmware, find out 1963 * whether that's the case. The scm call above sets the fuse 1964 * register. 1965 */ 1966 fuse_val = a6xx_llc_read(a6xx_gpu, 1967 REG_A7XX_CX_MISC_SW_FUSE_VALUE); 1968 adreno_gpu->has_ray_tracing = 1969 !!(fuse_val & A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING); 1970 } else if (adreno_is_a740(adreno_gpu)) { 1971 /* Raytracing is always enabled on a740 */ 1972 adreno_gpu->has_ray_tracing = true; 1973 } 1974 1975 return 0; 1976 } 1977 1978 1979 #define GBIF_CLIENT_HALT_MASK BIT(0) 1980 #define GBIF_ARB_HALT_MASK BIT(1) 1981 #define VBIF_XIN_HALT_CTRL0_MASK GENMASK(3, 0) 1982 #define VBIF_RESET_ACK_MASK 0xF0 1983 #define GPR0_GBIF_HALT_REQUEST 0x1E0 1984 1985 void a6xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu, bool gx_off) 1986 { 1987 struct msm_gpu *gpu = &adreno_gpu->base; 1988 1989 if (adreno_is_a619_holi(adreno_gpu)) { 1990 gpu_write(gpu, REG_A6XX_RBBM_GPR0_CNTL, GPR0_GBIF_HALT_REQUEST); 1991 spin_until((gpu_read(gpu, REG_A6XX_RBBM_VBIF_GX_RESET_STATUS) & 1992 (VBIF_RESET_ACK_MASK)) == VBIF_RESET_ACK_MASK); 1993 } else if (!a6xx_has_gbif(adreno_gpu)) { 1994 gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, VBIF_XIN_HALT_CTRL0_MASK); 1995 spin_until((gpu_read(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL1) & 1996 (VBIF_XIN_HALT_CTRL0_MASK)) == VBIF_XIN_HALT_CTRL0_MASK); 1997 gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, 0); 1998 1999 return; 2000 } 2001 2002 if (gx_off) { 2003 /* Halt the gx side of GBIF */ 2004 gpu_write(gpu, REG_A6XX_RBBM_GBIF_HALT, 1); 2005 spin_until(gpu_read(gpu, REG_A6XX_RBBM_GBIF_HALT_ACK) & 1); 2006 } 2007 2008 /* Halt new client requests on GBIF */ 2009 gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_CLIENT_HALT_MASK); 2010 spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) & 2011 (GBIF_CLIENT_HALT_MASK)) == GBIF_CLIENT_HALT_MASK); 2012 2013 /* Halt all AXI requests on GBIF */ 2014 gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_ARB_HALT_MASK); 2015 spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) & 2016 (GBIF_ARB_HALT_MASK)) == GBIF_ARB_HALT_MASK); 2017 2018 /* The GBIF halt needs to be explicitly cleared */ 2019 gpu_write(gpu, REG_A6XX_GBIF_HALT, 0x0); 2020 } 2021 2022 void a6xx_gpu_sw_reset(struct msm_gpu *gpu, bool assert) 2023 { 2024 /* 11nm chips (e.g. ones with A610) have hw issues with the reset line! */ 2025 if (adreno_is_a610(to_adreno_gpu(gpu))) 2026 return; 2027 2028 gpu_write(gpu, REG_A6XX_RBBM_SW_RESET_CMD, assert); 2029 /* Perform a bogus read and add a brief delay to ensure ordering. */ 2030 gpu_read(gpu, REG_A6XX_RBBM_SW_RESET_CMD); 2031 udelay(1); 2032 2033 /* The reset line needs to be asserted for at least 100 us */ 2034 if (assert) 2035 udelay(100); 2036 } 2037 2038 static int a6xx_gmu_pm_resume(struct msm_gpu *gpu) 2039 { 2040 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 2041 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 2042 int ret; 2043 2044 gpu->needs_hw_init = true; 2045 2046 trace_msm_gpu_resume(0); 2047 2048 mutex_lock(&a6xx_gpu->gmu.lock); 2049 ret = a6xx_gmu_resume(a6xx_gpu); 2050 mutex_unlock(&a6xx_gpu->gmu.lock); 2051 if (ret) 2052 return ret; 2053 2054 msm_devfreq_resume(gpu); 2055 2056 adreno_is_a7xx(adreno_gpu) ? a7xx_llc_activate(a6xx_gpu) : a6xx_llc_activate(a6xx_gpu); 2057 2058 return ret; 2059 } 2060 2061 static int a6xx_pm_resume(struct msm_gpu *gpu) 2062 { 2063 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 2064 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 2065 struct a6xx_gmu *gmu = &a6xx_gpu->gmu; 2066 unsigned long freq = gpu->fast_rate; 2067 struct dev_pm_opp *opp; 2068 int ret; 2069 2070 gpu->needs_hw_init = true; 2071 2072 trace_msm_gpu_resume(0); 2073 2074 mutex_lock(&a6xx_gpu->gmu.lock); 2075 2076 opp = dev_pm_opp_find_freq_ceil(&gpu->pdev->dev, &freq); 2077 if (IS_ERR(opp)) { 2078 ret = PTR_ERR(opp); 2079 goto err_set_opp; 2080 } 2081 dev_pm_opp_put(opp); 2082 2083 /* Set the core clock and bus bw, having VDD scaling in mind */ 2084 dev_pm_opp_set_opp(&gpu->pdev->dev, opp); 2085 2086 pm_runtime_resume_and_get(gmu->dev); 2087 pm_runtime_resume_and_get(gmu->gxpd); 2088 2089 ret = clk_bulk_prepare_enable(gpu->nr_clocks, gpu->grp_clks); 2090 if (ret) 2091 goto err_bulk_clk; 2092 2093 if (adreno_is_a619_holi(adreno_gpu)) 2094 a6xx_sptprac_enable(gmu); 2095 2096 /* If anything goes south, tear the GPU down piece by piece.. */ 2097 if (ret) { 2098 err_bulk_clk: 2099 pm_runtime_put(gmu->gxpd); 2100 pm_runtime_put(gmu->dev); 2101 dev_pm_opp_set_opp(&gpu->pdev->dev, NULL); 2102 } 2103 err_set_opp: 2104 mutex_unlock(&a6xx_gpu->gmu.lock); 2105 2106 if (!ret) 2107 msm_devfreq_resume(gpu); 2108 2109 return ret; 2110 } 2111 2112 static int a6xx_gmu_pm_suspend(struct msm_gpu *gpu) 2113 { 2114 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 2115 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 2116 int i, ret; 2117 2118 trace_msm_gpu_suspend(0); 2119 2120 a6xx_llc_deactivate(a6xx_gpu); 2121 2122 msm_devfreq_suspend(gpu); 2123 2124 mutex_lock(&a6xx_gpu->gmu.lock); 2125 ret = a6xx_gmu_stop(a6xx_gpu); 2126 mutex_unlock(&a6xx_gpu->gmu.lock); 2127 if (ret) 2128 return ret; 2129 2130 if (a6xx_gpu->shadow_bo) 2131 for (i = 0; i < gpu->nr_rings; i++) 2132 a6xx_gpu->shadow[i] = 0; 2133 2134 gpu->suspend_count++; 2135 2136 return 0; 2137 } 2138 2139 static int a6xx_pm_suspend(struct msm_gpu *gpu) 2140 { 2141 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 2142 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 2143 struct a6xx_gmu *gmu = &a6xx_gpu->gmu; 2144 int i; 2145 2146 trace_msm_gpu_suspend(0); 2147 2148 msm_devfreq_suspend(gpu); 2149 2150 mutex_lock(&a6xx_gpu->gmu.lock); 2151 2152 /* Drain the outstanding traffic on memory buses */ 2153 a6xx_bus_clear_pending_transactions(adreno_gpu, true); 2154 2155 if (adreno_is_a619_holi(adreno_gpu)) 2156 a6xx_sptprac_disable(gmu); 2157 2158 clk_bulk_disable_unprepare(gpu->nr_clocks, gpu->grp_clks); 2159 2160 pm_runtime_put_sync(gmu->gxpd); 2161 dev_pm_opp_set_opp(&gpu->pdev->dev, NULL); 2162 pm_runtime_put_sync(gmu->dev); 2163 2164 mutex_unlock(&a6xx_gpu->gmu.lock); 2165 2166 if (a6xx_gpu->shadow_bo) 2167 for (i = 0; i < gpu->nr_rings; i++) 2168 a6xx_gpu->shadow[i] = 0; 2169 2170 gpu->suspend_count++; 2171 2172 return 0; 2173 } 2174 2175 static int a6xx_gmu_get_timestamp(struct msm_gpu *gpu, uint64_t *value) 2176 { 2177 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 2178 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 2179 2180 mutex_lock(&a6xx_gpu->gmu.lock); 2181 2182 /* Force the GPU power on so we can read this register */ 2183 a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET); 2184 2185 *value = gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER); 2186 2187 a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET); 2188 2189 mutex_unlock(&a6xx_gpu->gmu.lock); 2190 2191 return 0; 2192 } 2193 2194 static int a6xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value) 2195 { 2196 *value = gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER); 2197 return 0; 2198 } 2199 2200 static struct msm_ringbuffer *a6xx_active_ring(struct msm_gpu *gpu) 2201 { 2202 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 2203 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 2204 2205 return a6xx_gpu->cur_ring; 2206 } 2207 2208 static void a6xx_destroy(struct msm_gpu *gpu) 2209 { 2210 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 2211 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 2212 2213 if (a6xx_gpu->sqe_bo) { 2214 msm_gem_unpin_iova(a6xx_gpu->sqe_bo, gpu->aspace); 2215 drm_gem_object_put(a6xx_gpu->sqe_bo); 2216 } 2217 2218 if (a6xx_gpu->shadow_bo) { 2219 msm_gem_unpin_iova(a6xx_gpu->shadow_bo, gpu->aspace); 2220 drm_gem_object_put(a6xx_gpu->shadow_bo); 2221 } 2222 2223 a6xx_llc_slices_destroy(a6xx_gpu); 2224 2225 a6xx_gmu_remove(a6xx_gpu); 2226 2227 adreno_gpu_cleanup(adreno_gpu); 2228 2229 kfree(a6xx_gpu); 2230 } 2231 2232 static u64 a6xx_gpu_busy(struct msm_gpu *gpu, unsigned long *out_sample_rate) 2233 { 2234 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 2235 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 2236 u64 busy_cycles; 2237 2238 /* 19.2MHz */ 2239 *out_sample_rate = 19200000; 2240 2241 busy_cycles = gmu_read64(&a6xx_gpu->gmu, 2242 REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_L, 2243 REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_H); 2244 2245 return busy_cycles; 2246 } 2247 2248 static void a6xx_gpu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp, 2249 bool suspended) 2250 { 2251 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 2252 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 2253 2254 mutex_lock(&a6xx_gpu->gmu.lock); 2255 a6xx_gmu_set_freq(gpu, opp, suspended); 2256 mutex_unlock(&a6xx_gpu->gmu.lock); 2257 } 2258 2259 static struct msm_gem_address_space * 2260 a6xx_create_address_space(struct msm_gpu *gpu, struct platform_device *pdev) 2261 { 2262 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 2263 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 2264 unsigned long quirks = 0; 2265 2266 /* 2267 * This allows GPU to set the bus attributes required to use system 2268 * cache on behalf of the iommu page table walker. 2269 */ 2270 if (!IS_ERR_OR_NULL(a6xx_gpu->htw_llc_slice) && 2271 !device_iommu_capable(&pdev->dev, IOMMU_CAP_CACHE_COHERENCY)) 2272 quirks |= IO_PGTABLE_QUIRK_ARM_OUTER_WBWA; 2273 2274 return adreno_iommu_create_address_space(gpu, pdev, quirks); 2275 } 2276 2277 static struct msm_gem_address_space * 2278 a6xx_create_private_address_space(struct msm_gpu *gpu) 2279 { 2280 struct msm_mmu *mmu; 2281 2282 mmu = msm_iommu_pagetable_create(gpu->aspace->mmu); 2283 2284 if (IS_ERR(mmu)) 2285 return ERR_CAST(mmu); 2286 2287 return msm_gem_address_space_create(mmu, 2288 "gpu", ADRENO_VM_START, 2289 adreno_private_address_space_size(gpu)); 2290 } 2291 2292 static uint32_t a6xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 2293 { 2294 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 2295 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 2296 2297 if (adreno_gpu->base.hw_apriv || a6xx_gpu->has_whereami) 2298 return a6xx_gpu->shadow[ring->id]; 2299 2300 return ring->memptrs->rptr = gpu_read(gpu, REG_A6XX_CP_RB_RPTR); 2301 } 2302 2303 static bool a6xx_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring) 2304 { 2305 struct msm_cp_state cp_state = { 2306 .ib1_base = gpu_read64(gpu, REG_A6XX_CP_IB1_BASE), 2307 .ib2_base = gpu_read64(gpu, REG_A6XX_CP_IB2_BASE), 2308 .ib1_rem = gpu_read(gpu, REG_A6XX_CP_IB1_REM_SIZE), 2309 .ib2_rem = gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE), 2310 }; 2311 bool progress; 2312 2313 /* 2314 * Adjust the remaining data to account for what has already been 2315 * fetched from memory, but not yet consumed by the SQE. 2316 * 2317 * This is not *technically* correct, the amount buffered could 2318 * exceed the IB size due to hw prefetching ahead, but: 2319 * 2320 * (1) We aren't trying to find the exact position, just whether 2321 * progress has been made 2322 * (2) The CP_REG_TO_MEM at the end of a submit should be enough 2323 * to prevent prefetching into an unrelated submit. (And 2324 * either way, at some point the ROQ will be full.) 2325 */ 2326 cp_state.ib1_rem += gpu_read(gpu, REG_A6XX_CP_ROQ_AVAIL_IB1) >> 16; 2327 cp_state.ib2_rem += gpu_read(gpu, REG_A6XX_CP_ROQ_AVAIL_IB2) >> 16; 2328 2329 progress = !!memcmp(&cp_state, &ring->last_cp_state, sizeof(cp_state)); 2330 2331 ring->last_cp_state = cp_state; 2332 2333 return progress; 2334 } 2335 2336 static u32 fuse_to_supp_hw(const struct adreno_info *info, u32 fuse) 2337 { 2338 if (!info->speedbins) 2339 return UINT_MAX; 2340 2341 for (int i = 0; info->speedbins[i].fuse != SHRT_MAX; i++) 2342 if (info->speedbins[i].fuse == fuse) 2343 return BIT(info->speedbins[i].speedbin); 2344 2345 return UINT_MAX; 2346 } 2347 2348 static int a6xx_set_supported_hw(struct device *dev, const struct adreno_info *info) 2349 { 2350 u32 supp_hw; 2351 u32 speedbin; 2352 int ret; 2353 2354 ret = adreno_read_speedbin(dev, &speedbin); 2355 /* 2356 * -ENOENT means that the platform doesn't support speedbin which is 2357 * fine 2358 */ 2359 if (ret == -ENOENT) { 2360 return 0; 2361 } else if (ret) { 2362 dev_err_probe(dev, ret, 2363 "failed to read speed-bin. Some OPPs may not be supported by hardware\n"); 2364 return ret; 2365 } 2366 2367 supp_hw = fuse_to_supp_hw(info, speedbin); 2368 2369 if (supp_hw == UINT_MAX) { 2370 DRM_DEV_ERROR(dev, 2371 "missing support for speed-bin: %u. Some OPPs may not be supported by hardware\n", 2372 speedbin); 2373 supp_hw = BIT(0); /* Default */ 2374 } 2375 2376 ret = devm_pm_opp_set_supported_hw(dev, &supp_hw, 1); 2377 if (ret) 2378 return ret; 2379 2380 return 0; 2381 } 2382 2383 static const struct adreno_gpu_funcs funcs = { 2384 .base = { 2385 .get_param = adreno_get_param, 2386 .set_param = adreno_set_param, 2387 .hw_init = a6xx_hw_init, 2388 .ucode_load = a6xx_ucode_load, 2389 .pm_suspend = a6xx_gmu_pm_suspend, 2390 .pm_resume = a6xx_gmu_pm_resume, 2391 .recover = a6xx_recover, 2392 .submit = a6xx_submit, 2393 .active_ring = a6xx_active_ring, 2394 .irq = a6xx_irq, 2395 .destroy = a6xx_destroy, 2396 #if defined(CONFIG_DRM_MSM_GPU_STATE) 2397 .show = a6xx_show, 2398 #endif 2399 .gpu_busy = a6xx_gpu_busy, 2400 .gpu_get_freq = a6xx_gmu_get_freq, 2401 .gpu_set_freq = a6xx_gpu_set_freq, 2402 #if defined(CONFIG_DRM_MSM_GPU_STATE) 2403 .gpu_state_get = a6xx_gpu_state_get, 2404 .gpu_state_put = a6xx_gpu_state_put, 2405 #endif 2406 .create_address_space = a6xx_create_address_space, 2407 .create_private_address_space = a6xx_create_private_address_space, 2408 .get_rptr = a6xx_get_rptr, 2409 .progress = a6xx_progress, 2410 }, 2411 .get_timestamp = a6xx_gmu_get_timestamp, 2412 }; 2413 2414 static const struct adreno_gpu_funcs funcs_gmuwrapper = { 2415 .base = { 2416 .get_param = adreno_get_param, 2417 .set_param = adreno_set_param, 2418 .hw_init = a6xx_hw_init, 2419 .ucode_load = a6xx_ucode_load, 2420 .pm_suspend = a6xx_pm_suspend, 2421 .pm_resume = a6xx_pm_resume, 2422 .recover = a6xx_recover, 2423 .submit = a6xx_submit, 2424 .active_ring = a6xx_active_ring, 2425 .irq = a6xx_irq, 2426 .destroy = a6xx_destroy, 2427 #if defined(CONFIG_DRM_MSM_GPU_STATE) 2428 .show = a6xx_show, 2429 #endif 2430 .gpu_busy = a6xx_gpu_busy, 2431 #if defined(CONFIG_DRM_MSM_GPU_STATE) 2432 .gpu_state_get = a6xx_gpu_state_get, 2433 .gpu_state_put = a6xx_gpu_state_put, 2434 #endif 2435 .create_address_space = a6xx_create_address_space, 2436 .create_private_address_space = a6xx_create_private_address_space, 2437 .get_rptr = a6xx_get_rptr, 2438 .progress = a6xx_progress, 2439 }, 2440 .get_timestamp = a6xx_get_timestamp, 2441 }; 2442 2443 static const struct adreno_gpu_funcs funcs_a7xx = { 2444 .base = { 2445 .get_param = adreno_get_param, 2446 .set_param = adreno_set_param, 2447 .hw_init = a6xx_hw_init, 2448 .ucode_load = a6xx_ucode_load, 2449 .pm_suspend = a6xx_gmu_pm_suspend, 2450 .pm_resume = a6xx_gmu_pm_resume, 2451 .recover = a6xx_recover, 2452 .submit = a7xx_submit, 2453 .active_ring = a6xx_active_ring, 2454 .irq = a6xx_irq, 2455 .destroy = a6xx_destroy, 2456 #if defined(CONFIG_DRM_MSM_GPU_STATE) 2457 .show = a6xx_show, 2458 #endif 2459 .gpu_busy = a6xx_gpu_busy, 2460 .gpu_get_freq = a6xx_gmu_get_freq, 2461 .gpu_set_freq = a6xx_gpu_set_freq, 2462 #if defined(CONFIG_DRM_MSM_GPU_STATE) 2463 .gpu_state_get = a6xx_gpu_state_get, 2464 .gpu_state_put = a6xx_gpu_state_put, 2465 #endif 2466 .create_address_space = a6xx_create_address_space, 2467 .create_private_address_space = a6xx_create_private_address_space, 2468 .get_rptr = a6xx_get_rptr, 2469 .progress = a6xx_progress, 2470 }, 2471 .get_timestamp = a6xx_gmu_get_timestamp, 2472 }; 2473 2474 struct msm_gpu *a6xx_gpu_init(struct drm_device *dev) 2475 { 2476 struct msm_drm_private *priv = dev->dev_private; 2477 struct platform_device *pdev = priv->gpu_pdev; 2478 struct adreno_platform_config *config = pdev->dev.platform_data; 2479 struct device_node *node; 2480 struct a6xx_gpu *a6xx_gpu; 2481 struct adreno_gpu *adreno_gpu; 2482 struct msm_gpu *gpu; 2483 extern int enable_preemption; 2484 bool is_a7xx; 2485 int ret; 2486 2487 a6xx_gpu = kzalloc(sizeof(*a6xx_gpu), GFP_KERNEL); 2488 if (!a6xx_gpu) 2489 return ERR_PTR(-ENOMEM); 2490 2491 adreno_gpu = &a6xx_gpu->base; 2492 gpu = &adreno_gpu->base; 2493 2494 mutex_init(&a6xx_gpu->gmu.lock); 2495 2496 adreno_gpu->registers = NULL; 2497 2498 /* Check if there is a GMU phandle and set it up */ 2499 node = of_parse_phandle(pdev->dev.of_node, "qcom,gmu", 0); 2500 /* FIXME: How do we gracefully handle this? */ 2501 BUG_ON(!node); 2502 2503 adreno_gpu->gmu_is_wrapper = of_device_is_compatible(node, "qcom,adreno-gmu-wrapper"); 2504 2505 adreno_gpu->base.hw_apriv = 2506 !!(config->info->quirks & ADRENO_QUIRK_HAS_HW_APRIV); 2507 2508 /* gpu->info only gets assigned in adreno_gpu_init() */ 2509 is_a7xx = config->info->family == ADRENO_7XX_GEN1 || 2510 config->info->family == ADRENO_7XX_GEN2 || 2511 config->info->family == ADRENO_7XX_GEN3; 2512 2513 a6xx_llc_slices_init(pdev, a6xx_gpu, is_a7xx); 2514 2515 ret = a6xx_set_supported_hw(&pdev->dev, config->info); 2516 if (ret) { 2517 a6xx_llc_slices_destroy(a6xx_gpu); 2518 kfree(a6xx_gpu); 2519 return ERR_PTR(ret); 2520 } 2521 2522 if ((enable_preemption == 1) || (enable_preemption == -1 && 2523 (config->info->quirks & ADRENO_QUIRK_PREEMPTION))) 2524 ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs_a7xx, 4); 2525 else if (is_a7xx) 2526 ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs_a7xx, 1); 2527 else if (adreno_has_gmu_wrapper(adreno_gpu)) 2528 ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs_gmuwrapper, 1); 2529 else 2530 ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1); 2531 if (ret) { 2532 a6xx_destroy(&(a6xx_gpu->base.base)); 2533 return ERR_PTR(ret); 2534 } 2535 2536 /* 2537 * For now only clamp to idle freq for devices where this is known not 2538 * to cause power supply issues: 2539 */ 2540 if (adreno_is_a618(adreno_gpu) || adreno_is_7c3(adreno_gpu)) 2541 priv->gpu_clamp_to_idle = true; 2542 2543 if (adreno_has_gmu_wrapper(adreno_gpu)) 2544 ret = a6xx_gmu_wrapper_init(a6xx_gpu, node); 2545 else 2546 ret = a6xx_gmu_init(a6xx_gpu, node); 2547 of_node_put(node); 2548 if (ret) { 2549 a6xx_destroy(&(a6xx_gpu->base.base)); 2550 return ERR_PTR(ret); 2551 } 2552 2553 if (adreno_is_a7xx(adreno_gpu)) { 2554 ret = a7xx_cx_mem_init(a6xx_gpu); 2555 if (ret) { 2556 a6xx_destroy(&(a6xx_gpu->base.base)); 2557 return ERR_PTR(ret); 2558 } 2559 } 2560 2561 adreno_gpu->uche_trap_base = 0x1fffffffff000ull; 2562 2563 if (gpu->aspace) 2564 msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, 2565 a6xx_fault_handler); 2566 2567 a6xx_calc_ubwc_config(adreno_gpu); 2568 /* Set up the preemption specific bits and pieces for each ringbuffer */ 2569 a6xx_preempt_init(gpu); 2570 2571 return gpu; 2572 } 2573