1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright 2013 Advanced Micro Devices, Inc.
4 * Copyright 2025 Valve Corporation
5 * Copyright 2025 Alexandre Demers
6 * All Rights Reserved.
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the
10 * "Software"), to deal in the Software without restriction, including
11 * without limitation the rights to use, copy, modify, merge, publish,
12 * distribute, sub license, and/or sell copies of the Software, and to
13 * permit persons to whom the Software is furnished to do so, subject to
14 * the following conditions:
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * The above copyright notice and this permission notice (including the
25 * next paragraph) shall be included in all copies or substantial portions
26 * of the Software.
27 *
28 * Authors: Christian König <christian.koenig@amd.com>
29 * Timur Kristóf <timur.kristof@gmail.com>
30 * Alexandre Demers <alexandre.f.demers@gmail.com>
31 */
32
33 #include <linux/firmware.h>
34
35 #include "amdgpu.h"
36 #include "amdgpu_vce.h"
37 #include "amdgpu_gart.h"
38 #include "sid.h"
39 #include "vce_v1_0.h"
40 #include "vce/vce_1_0_d.h"
41 #include "vce/vce_1_0_sh_mask.h"
42 #include "oss/oss_1_0_d.h"
43 #include "oss/oss_1_0_sh_mask.h"
44
45 #define VCE_V1_0_FW_SIZE (256 * 1024)
46 #define VCE_V1_0_STACK_SIZE (64 * 1024)
47 #define VCE_V1_0_DATA_SIZE (7808 * (AMDGPU_MAX_VCE_HANDLES + 1))
48 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK 0x02
49
50 #define VCE_V1_0_GART_PAGE_START \
51 (AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GTT_NUM_TRANSFER_WINDOWS)
52 #define VCE_V1_0_GART_ADDR_START \
53 (VCE_V1_0_GART_PAGE_START * AMDGPU_GPU_PAGE_SIZE)
54
55 static void vce_v1_0_set_ring_funcs(struct amdgpu_device *adev);
56 static void vce_v1_0_set_irq_funcs(struct amdgpu_device *adev);
57
58 struct vce_v1_0_fw_signature {
59 int32_t offset;
60 uint32_t length;
61 int32_t number;
62 struct {
63 uint32_t chip_id;
64 uint32_t keyselect;
65 uint32_t nonce[4];
66 uint32_t sigval[4];
67 } val[8];
68 };
69
70 /**
71 * vce_v1_0_ring_get_rptr - get read pointer
72 *
73 * @ring: amdgpu_ring pointer
74 *
75 * Returns the current hardware read pointer
76 */
vce_v1_0_ring_get_rptr(struct amdgpu_ring * ring)77 static uint64_t vce_v1_0_ring_get_rptr(struct amdgpu_ring *ring)
78 {
79 struct amdgpu_device *adev = ring->adev;
80
81 if (ring->me == 0)
82 return RREG32(mmVCE_RB_RPTR);
83 else
84 return RREG32(mmVCE_RB_RPTR2);
85 }
86
87 /**
88 * vce_v1_0_ring_get_wptr - get write pointer
89 *
90 * @ring: amdgpu_ring pointer
91 *
92 * Returns the current hardware write pointer
93 */
vce_v1_0_ring_get_wptr(struct amdgpu_ring * ring)94 static uint64_t vce_v1_0_ring_get_wptr(struct amdgpu_ring *ring)
95 {
96 struct amdgpu_device *adev = ring->adev;
97
98 if (ring->me == 0)
99 return RREG32(mmVCE_RB_WPTR);
100 else
101 return RREG32(mmVCE_RB_WPTR2);
102 }
103
104 /**
105 * vce_v1_0_ring_set_wptr - set write pointer
106 *
107 * @ring: amdgpu_ring pointer
108 *
109 * Commits the write pointer to the hardware
110 */
vce_v1_0_ring_set_wptr(struct amdgpu_ring * ring)111 static void vce_v1_0_ring_set_wptr(struct amdgpu_ring *ring)
112 {
113 struct amdgpu_device *adev = ring->adev;
114
115 if (ring->me == 0)
116 WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr));
117 else
118 WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr));
119 }
120
vce_v1_0_lmi_clean(struct amdgpu_device * adev)121 static int vce_v1_0_lmi_clean(struct amdgpu_device *adev)
122 {
123 int i, j;
124
125 for (i = 0; i < 10; ++i) {
126 for (j = 0; j < 100; ++j) {
127 if (RREG32(mmVCE_LMI_STATUS) & 0x337f)
128 return 0;
129
130 mdelay(10);
131 }
132 }
133
134 return -ETIMEDOUT;
135 }
136
vce_v1_0_firmware_loaded(struct amdgpu_device * adev)137 static int vce_v1_0_firmware_loaded(struct amdgpu_device *adev)
138 {
139 int i, j;
140
141 for (i = 0; i < 10; ++i) {
142 for (j = 0; j < 100; ++j) {
143 if (RREG32(mmVCE_STATUS) & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
144 return 0;
145 mdelay(10);
146 }
147
148 dev_err(adev->dev, "VCE not responding, trying to reset the ECPU\n");
149
150 WREG32_P(mmVCE_SOFT_RESET,
151 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
152 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
153 mdelay(10);
154 WREG32_P(mmVCE_SOFT_RESET, 0,
155 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
156 mdelay(10);
157 }
158
159 return -ETIMEDOUT;
160 }
161
vce_v1_0_init_cg(struct amdgpu_device * adev)162 static void vce_v1_0_init_cg(struct amdgpu_device *adev)
163 {
164 u32 tmp;
165
166 tmp = RREG32(mmVCE_CLOCK_GATING_A);
167 tmp |= VCE_CLOCK_GATING_A__CGC_DYN_CLOCK_MODE_MASK;
168 WREG32(mmVCE_CLOCK_GATING_A, tmp);
169
170 tmp = RREG32(mmVCE_CLOCK_GATING_B);
171 tmp |= 0x1e;
172 tmp &= ~0xe100e1;
173 WREG32(mmVCE_CLOCK_GATING_B, tmp);
174
175 tmp = RREG32(mmVCE_UENC_CLOCK_GATING);
176 tmp &= ~0xff9ff000;
177 WREG32(mmVCE_UENC_CLOCK_GATING, tmp);
178
179 tmp = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
180 tmp &= ~0x3ff;
181 WREG32(mmVCE_UENC_REG_CLOCK_GATING, tmp);
182 }
183
184 /**
185 * vce_v1_0_load_fw_signature - load firmware signature into VCPU BO
186 *
187 * @adev: amdgpu_device pointer
188 *
189 * The VCE1 firmware validation mechanism needs a firmware signature.
190 * This function finds the signature appropriate for the current
191 * ASIC and writes that into the VCPU BO.
192 */
vce_v1_0_load_fw_signature(struct amdgpu_device * adev)193 static int vce_v1_0_load_fw_signature(struct amdgpu_device *adev)
194 {
195 const struct common_firmware_header *hdr;
196 struct vce_v1_0_fw_signature *sign;
197 unsigned int ucode_offset;
198 uint32_t chip_id;
199 u32 *cpu_addr;
200 int i;
201
202 hdr = (const struct common_firmware_header *)adev->vce.fw->data;
203 ucode_offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
204 cpu_addr = adev->vce.cpu_addr;
205
206 sign = (void *)adev->vce.fw->data + ucode_offset;
207
208 switch (adev->asic_type) {
209 case CHIP_TAHITI:
210 chip_id = 0x01000014;
211 break;
212 case CHIP_VERDE:
213 chip_id = 0x01000015;
214 break;
215 case CHIP_PITCAIRN:
216 chip_id = 0x01000016;
217 break;
218 default:
219 dev_err(adev->dev, "asic_type %#010x was not found!", adev->asic_type);
220 return -EINVAL;
221 }
222
223 for (i = 0; i < le32_to_cpu(sign->number); ++i) {
224 if (le32_to_cpu(sign->val[i].chip_id) == chip_id)
225 break;
226 }
227
228 if (i == le32_to_cpu(sign->number)) {
229 dev_err(adev->dev, "chip_id 0x%x for %s was not found in VCE firmware",
230 chip_id, amdgpu_asic_name[adev->asic_type]);
231 return -EINVAL;
232 }
233
234 cpu_addr += (256 - 64) / 4;
235 memcpy_toio(&cpu_addr[0], &sign->val[i].nonce[0], 16);
236 cpu_addr[4] = cpu_to_le32(le32_to_cpu(sign->length) + 64);
237
238 memset_io(&cpu_addr[5], 0, 44);
239 memcpy_toio(&cpu_addr[16], &sign[1], hdr->ucode_size_bytes - sizeof(*sign));
240
241 cpu_addr += (le32_to_cpu(sign->length) + 64) / 4;
242 memcpy_toio(&cpu_addr[0], &sign->val[i].sigval[0], 16);
243
244 adev->vce.keyselect = le32_to_cpu(sign->val[i].keyselect);
245
246 return 0;
247 }
248
vce_v1_0_wait_for_fw_validation(struct amdgpu_device * adev)249 static int vce_v1_0_wait_for_fw_validation(struct amdgpu_device *adev)
250 {
251 int i;
252
253 dev_dbg(adev->dev, "VCE keyselect: %d", adev->vce.keyselect);
254 WREG32(mmVCE_LMI_FW_START_KEYSEL, adev->vce.keyselect);
255
256 for (i = 0; i < 10; ++i) {
257 mdelay(10);
258 if (RREG32(mmVCE_FW_REG_STATUS) & VCE_FW_REG_STATUS__DONE_MASK)
259 break;
260 }
261
262 if (!(RREG32(mmVCE_FW_REG_STATUS) & VCE_FW_REG_STATUS__DONE_MASK)) {
263 dev_err(adev->dev, "VCE FW validation timeout\n");
264 return -ETIMEDOUT;
265 }
266
267 if (!(RREG32(mmVCE_FW_REG_STATUS) & VCE_FW_REG_STATUS__PASS_MASK)) {
268 dev_err(adev->dev, "VCE FW validation failed\n");
269 return -EINVAL;
270 }
271
272 for (i = 0; i < 10; ++i) {
273 mdelay(10);
274 if (!(RREG32(mmVCE_FW_REG_STATUS) & VCE_FW_REG_STATUS__BUSY_MASK))
275 break;
276 }
277
278 if (RREG32(mmVCE_FW_REG_STATUS) & VCE_FW_REG_STATUS__BUSY_MASK) {
279 dev_err(adev->dev, "VCE FW busy timeout\n");
280 return -ETIMEDOUT;
281 }
282
283 return 0;
284 }
285
vce_v1_0_mc_resume(struct amdgpu_device * adev)286 static int vce_v1_0_mc_resume(struct amdgpu_device *adev)
287 {
288 uint32_t offset;
289 uint32_t size;
290
291 /*
292 * When the keyselect is already set, don't perturb VCE FW.
293 * Validation seems to always fail the second time.
294 */
295 if (RREG32(mmVCE_LMI_FW_START_KEYSEL)) {
296 dev_dbg(adev->dev, "keyselect already set: 0x%x (on CPU: 0x%x)\n",
297 RREG32(mmVCE_LMI_FW_START_KEYSEL), adev->vce.keyselect);
298
299 WREG32_P(mmVCE_LMI_CTRL2, 0x0, ~0x100);
300 return 0;
301 }
302
303 WREG32_P(mmVCE_CLOCK_GATING_A, 0, ~(1 << 16));
304 WREG32_P(mmVCE_UENC_CLOCK_GATING, 0x1FF000, ~0xFF9FF000);
305 WREG32_P(mmVCE_UENC_REG_CLOCK_GATING, 0x3F, ~0x3F);
306 WREG32(mmVCE_CLOCK_GATING_B, 0);
307
308 WREG32_P(mmVCE_LMI_FW_PERIODIC_CTRL, 0x4, ~0x4);
309
310 WREG32(mmVCE_LMI_CTRL, 0x00398000);
311
312 WREG32_P(mmVCE_LMI_CACHE_CTRL, 0x0, ~0x1);
313 WREG32(mmVCE_LMI_SWAP_CNTL, 0);
314 WREG32(mmVCE_LMI_SWAP_CNTL1, 0);
315 WREG32(mmVCE_LMI_VM_CTRL, 0);
316
317 WREG32(mmVCE_VCPU_SCRATCH7, AMDGPU_MAX_VCE_HANDLES);
318
319 offset = adev->vce.gpu_addr + AMDGPU_VCE_FIRMWARE_OFFSET;
320 size = VCE_V1_0_FW_SIZE;
321 WREG32(mmVCE_VCPU_CACHE_OFFSET0, offset & 0x7fffffff);
322 WREG32(mmVCE_VCPU_CACHE_SIZE0, size);
323
324 offset += size;
325 size = VCE_V1_0_STACK_SIZE;
326 WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset & 0x7fffffff);
327 WREG32(mmVCE_VCPU_CACHE_SIZE1, size);
328
329 offset += size;
330 size = VCE_V1_0_DATA_SIZE;
331 WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset & 0x7fffffff);
332 WREG32(mmVCE_VCPU_CACHE_SIZE2, size);
333
334 WREG32_P(mmVCE_LMI_CTRL2, 0x0, ~0x100);
335
336 return vce_v1_0_wait_for_fw_validation(adev);
337 }
338
339 /**
340 * vce_v1_0_is_idle() - Check idle status of VCE1 IP block
341 *
342 * @ip_block: amdgpu_ip_block pointer
343 *
344 * Check whether VCE is busy according to VCE_STATUS.
345 * Also check whether the SRBM thinks VCE is busy, although
346 * SRBM_STATUS.VCE_BUSY seems to be bogus because it
347 * appears to mirror the VCE_STATUS.VCPU_REPORT_FW_LOADED bit.
348 */
vce_v1_0_is_idle(struct amdgpu_ip_block * ip_block)349 static bool vce_v1_0_is_idle(struct amdgpu_ip_block *ip_block)
350 {
351 struct amdgpu_device *adev = ip_block->adev;
352 bool busy =
353 (RREG32(mmVCE_STATUS) & (VCE_STATUS__JOB_BUSY_MASK | VCE_STATUS__UENC_BUSY_MASK)) ||
354 (RREG32(mmSRBM_STATUS2) & SRBM_STATUS2__VCE_BUSY_MASK);
355
356 return !busy;
357 }
358
vce_v1_0_wait_for_idle(struct amdgpu_ip_block * ip_block)359 static int vce_v1_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
360 {
361 struct amdgpu_device *adev = ip_block->adev;
362 unsigned int i;
363
364 for (i = 0; i < adev->usec_timeout; i++) {
365 udelay(1);
366 if (vce_v1_0_is_idle(ip_block))
367 return 0;
368 }
369 return -ETIMEDOUT;
370 }
371
372 /**
373 * vce_v1_0_start - start VCE block
374 *
375 * @adev: amdgpu_device pointer
376 *
377 * Setup and start the VCE block
378 */
vce_v1_0_start(struct amdgpu_device * adev)379 static int vce_v1_0_start(struct amdgpu_device *adev)
380 {
381 struct amdgpu_ring *ring;
382 int r;
383
384 WREG32_P(mmVCE_STATUS, 1, ~1);
385
386 r = vce_v1_0_mc_resume(adev);
387 if (r)
388 return r;
389
390 ring = &adev->vce.ring[0];
391 WREG32(mmVCE_RB_RPTR, lower_32_bits(ring->wptr));
392 WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr));
393 WREG32(mmVCE_RB_BASE_LO, lower_32_bits(ring->gpu_addr));
394 WREG32(mmVCE_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
395 WREG32(mmVCE_RB_SIZE, ring->ring_size / 4);
396
397 ring = &adev->vce.ring[1];
398 WREG32(mmVCE_RB_RPTR2, lower_32_bits(ring->wptr));
399 WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr));
400 WREG32(mmVCE_RB_BASE_LO2, lower_32_bits(ring->gpu_addr));
401 WREG32(mmVCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
402 WREG32(mmVCE_RB_SIZE2, ring->ring_size / 4);
403
404 WREG32_P(mmVCE_VCPU_CNTL, VCE_VCPU_CNTL__CLK_EN_MASK,
405 ~VCE_VCPU_CNTL__CLK_EN_MASK);
406
407 WREG32_P(mmVCE_SOFT_RESET,
408 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK |
409 VCE_SOFT_RESET__FME_SOFT_RESET_MASK,
410 ~(VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK |
411 VCE_SOFT_RESET__FME_SOFT_RESET_MASK));
412
413 mdelay(100);
414
415 WREG32_P(mmVCE_SOFT_RESET, 0,
416 ~(VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK |
417 VCE_SOFT_RESET__FME_SOFT_RESET_MASK));
418
419 r = vce_v1_0_firmware_loaded(adev);
420
421 /* Clear VCE_STATUS, otherwise SRBM thinks VCE1 is busy. */
422 WREG32(mmVCE_STATUS, 0);
423
424 if (r) {
425 dev_err(adev->dev, "VCE not responding, giving up\n");
426 return r;
427 }
428
429 return 0;
430 }
431
vce_v1_0_stop(struct amdgpu_device * adev)432 static int vce_v1_0_stop(struct amdgpu_device *adev)
433 {
434 struct amdgpu_ip_block *ip_block;
435 int status;
436 int i;
437
438 ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_VCE);
439 if (!ip_block)
440 return -EINVAL;
441
442 if (vce_v1_0_lmi_clean(adev))
443 dev_warn(adev->dev, "VCE not idle\n");
444
445 if (vce_v1_0_wait_for_idle(ip_block))
446 dev_warn(adev->dev, "VCE busy: VCE_STATUS=0x%x, SRBM_STATUS2=0x%x\n",
447 RREG32(mmVCE_STATUS), RREG32(mmSRBM_STATUS2));
448
449 /* Stall UMC and register bus before resetting VCPU */
450 WREG32_P(mmVCE_LMI_CTRL2, 1 << 8, ~(1 << 8));
451
452 for (i = 0; i < 100; ++i) {
453 status = RREG32(mmVCE_LMI_STATUS);
454 if (status & 0x240)
455 break;
456 mdelay(1);
457 }
458
459 WREG32_P(mmVCE_VCPU_CNTL, 0, ~VCE_VCPU_CNTL__CLK_EN_MASK);
460
461 WREG32_P(mmVCE_SOFT_RESET,
462 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK |
463 VCE_SOFT_RESET__FME_SOFT_RESET_MASK,
464 ~(VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK |
465 VCE_SOFT_RESET__FME_SOFT_RESET_MASK));
466
467 WREG32(mmVCE_STATUS, 0);
468
469 return 0;
470 }
471
vce_v1_0_enable_mgcg(struct amdgpu_device * adev,bool enable)472 static void vce_v1_0_enable_mgcg(struct amdgpu_device *adev, bool enable)
473 {
474 u32 tmp;
475
476 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)) {
477 tmp = RREG32(mmVCE_CLOCK_GATING_A);
478 tmp |= VCE_CLOCK_GATING_A__CGC_DYN_CLOCK_MODE_MASK;
479 WREG32(mmVCE_CLOCK_GATING_A, tmp);
480
481 tmp = RREG32(mmVCE_UENC_CLOCK_GATING);
482 tmp &= ~0x1ff000;
483 tmp |= 0xff800000;
484 WREG32(mmVCE_UENC_CLOCK_GATING, tmp);
485
486 tmp = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
487 tmp &= ~0x3ff;
488 WREG32(mmVCE_UENC_REG_CLOCK_GATING, tmp);
489 } else {
490 tmp = RREG32(mmVCE_CLOCK_GATING_A);
491 tmp &= ~VCE_CLOCK_GATING_A__CGC_DYN_CLOCK_MODE_MASK;
492 WREG32(mmVCE_CLOCK_GATING_A, tmp);
493
494 tmp = RREG32(mmVCE_UENC_CLOCK_GATING);
495 tmp |= 0x1ff000;
496 tmp &= ~0xff800000;
497 WREG32(mmVCE_UENC_CLOCK_GATING, tmp);
498
499 tmp = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
500 tmp |= 0x3ff;
501 WREG32(mmVCE_UENC_REG_CLOCK_GATING, tmp);
502 }
503 }
504
vce_v1_0_early_init(struct amdgpu_ip_block * ip_block)505 static int vce_v1_0_early_init(struct amdgpu_ip_block *ip_block)
506 {
507 struct amdgpu_device *adev = ip_block->adev;
508 int r;
509
510 r = amdgpu_vce_early_init(adev);
511 if (r)
512 return r;
513
514 adev->vce.num_rings = 2;
515
516 vce_v1_0_set_ring_funcs(adev);
517 vce_v1_0_set_irq_funcs(adev);
518
519 return 0;
520 }
521
522 /**
523 * vce_v1_0_ensure_vcpu_bo_32bit_addr() - ensure the VCPU BO has a 32-bit address
524 *
525 * @adev: amdgpu_device pointer
526 *
527 * Due to various hardware limitations, the VCE1 requires
528 * the VCPU BO to be in the low 32 bit address range.
529 * Ensure that the VCPU BO has a 32-bit GPU address,
530 * or return an error code when that isn't possible.
531 *
532 * To accomodate that, we put GART to the LOW address range
533 * and reserve some GART pages where we map the VCPU BO,
534 * so that it gets a 32-bit address.
535 */
vce_v1_0_ensure_vcpu_bo_32bit_addr(struct amdgpu_device * adev)536 static int vce_v1_0_ensure_vcpu_bo_32bit_addr(struct amdgpu_device *adev)
537 {
538 u64 gpu_addr = amdgpu_bo_gpu_offset(adev->vce.vcpu_bo);
539 u64 bo_size = amdgpu_bo_size(adev->vce.vcpu_bo);
540 u64 max_vcpu_bo_addr = 0xffffffff - bo_size;
541 u64 num_pages = ALIGN(bo_size, AMDGPU_GPU_PAGE_SIZE) / AMDGPU_GPU_PAGE_SIZE;
542 u64 pa = amdgpu_gmc_vram_pa(adev, adev->vce.vcpu_bo);
543 u64 flags = AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | AMDGPU_PTE_VALID;
544
545 /*
546 * Check if the VCPU BO already has a 32-bit address.
547 * Eg. if MC is configured to put VRAM in the low address range.
548 */
549 if (gpu_addr <= max_vcpu_bo_addr)
550 return 0;
551
552 /* Check if we can map the VCPU BO in GART to a 32-bit address. */
553 if (adev->gmc.gart_start + VCE_V1_0_GART_ADDR_START > max_vcpu_bo_addr)
554 return -EINVAL;
555
556 amdgpu_gart_map_vram_range(adev, pa, VCE_V1_0_GART_PAGE_START,
557 num_pages, flags, adev->gart.ptr);
558 adev->vce.gpu_addr = adev->gmc.gart_start + VCE_V1_0_GART_ADDR_START;
559 if (adev->vce.gpu_addr > max_vcpu_bo_addr)
560 return -EINVAL;
561
562 return 0;
563 }
564
vce_v1_0_sw_init(struct amdgpu_ip_block * ip_block)565 static int vce_v1_0_sw_init(struct amdgpu_ip_block *ip_block)
566 {
567 struct amdgpu_device *adev = ip_block->adev;
568 struct amdgpu_ring *ring;
569 int r, i;
570
571 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 167, &adev->vce.irq);
572 if (r)
573 return r;
574
575 r = amdgpu_vce_sw_init(adev, VCE_V1_0_FW_SIZE +
576 VCE_V1_0_STACK_SIZE + VCE_V1_0_DATA_SIZE);
577 if (r)
578 return r;
579
580 r = amdgpu_vce_resume(adev);
581 if (r)
582 return r;
583 r = vce_v1_0_load_fw_signature(adev);
584 if (r)
585 return r;
586 r = vce_v1_0_ensure_vcpu_bo_32bit_addr(adev);
587 if (r)
588 return r;
589
590 for (i = 0; i < adev->vce.num_rings; i++) {
591 enum amdgpu_ring_priority_level hw_prio = amdgpu_vce_get_ring_prio(i);
592
593 ring = &adev->vce.ring[i];
594 sprintf(ring->name, "vce%d", i);
595 r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0,
596 hw_prio, NULL);
597 if (r)
598 return r;
599 }
600
601 return r;
602 }
603
vce_v1_0_sw_fini(struct amdgpu_ip_block * ip_block)604 static int vce_v1_0_sw_fini(struct amdgpu_ip_block *ip_block)
605 {
606 struct amdgpu_device *adev = ip_block->adev;
607 int r;
608
609 r = amdgpu_vce_suspend(adev);
610 if (r)
611 return r;
612
613 return amdgpu_vce_sw_fini(adev);
614 }
615
616 /**
617 * vce_v1_0_hw_init - start and test VCE block
618 *
619 * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
620 *
621 * Initialize the hardware, boot up the VCPU and do some testing
622 */
vce_v1_0_hw_init(struct amdgpu_ip_block * ip_block)623 static int vce_v1_0_hw_init(struct amdgpu_ip_block *ip_block)
624 {
625 struct amdgpu_device *adev = ip_block->adev;
626 int i, r;
627
628 if (adev->pm.dpm_enabled)
629 amdgpu_dpm_enable_vce(adev, true);
630 else
631 amdgpu_asic_set_vce_clocks(adev, 10000, 10000);
632
633 for (i = 0; i < adev->vce.num_rings; i++) {
634 r = amdgpu_ring_test_helper(&adev->vce.ring[i]);
635 if (r)
636 return r;
637 }
638
639 dev_info(adev->dev, "VCE initialized successfully.\n");
640
641 return 0;
642 }
643
vce_v1_0_hw_fini(struct amdgpu_ip_block * ip_block)644 static int vce_v1_0_hw_fini(struct amdgpu_ip_block *ip_block)
645 {
646 int r;
647
648 r = vce_v1_0_stop(ip_block->adev);
649 if (r)
650 return r;
651
652 cancel_delayed_work_sync(&ip_block->adev->vce.idle_work);
653 return 0;
654 }
655
vce_v1_0_suspend(struct amdgpu_ip_block * ip_block)656 static int vce_v1_0_suspend(struct amdgpu_ip_block *ip_block)
657 {
658 struct amdgpu_device *adev = ip_block->adev;
659 int r;
660
661 /*
662 * Proper cleanups before halting the HW engine:
663 * - cancel the delayed idle work
664 * - enable powergating
665 * - enable clockgating
666 * - disable dpm
667 *
668 * TODO: to align with the VCN implementation, move the
669 * jobs for clockgating/powergating/dpm setting to
670 * ->set_powergating_state().
671 */
672 cancel_delayed_work_sync(&adev->vce.idle_work);
673
674 if (adev->pm.dpm_enabled) {
675 amdgpu_dpm_enable_vce(adev, false);
676 } else {
677 amdgpu_asic_set_vce_clocks(adev, 0, 0);
678 amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
679 AMD_PG_STATE_GATE);
680 amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
681 AMD_CG_STATE_GATE);
682 }
683
684 r = vce_v1_0_hw_fini(ip_block);
685 if (r) {
686 dev_err(adev->dev, "vce_v1_0_hw_fini() failed with error %i", r);
687 return r;
688 }
689
690 return amdgpu_vce_suspend(adev);
691 }
692
vce_v1_0_resume(struct amdgpu_ip_block * ip_block)693 static int vce_v1_0_resume(struct amdgpu_ip_block *ip_block)
694 {
695 struct amdgpu_device *adev = ip_block->adev;
696 int r;
697
698 r = amdgpu_vce_resume(adev);
699 if (r)
700 return r;
701 r = vce_v1_0_load_fw_signature(adev);
702 if (r)
703 return r;
704 r = vce_v1_0_ensure_vcpu_bo_32bit_addr(adev);
705 if (r)
706 return r;
707
708 return vce_v1_0_hw_init(ip_block);
709 }
710
vce_v1_0_set_interrupt_state(struct amdgpu_device * adev,struct amdgpu_irq_src * source,unsigned int type,enum amdgpu_interrupt_state state)711 static int vce_v1_0_set_interrupt_state(struct amdgpu_device *adev,
712 struct amdgpu_irq_src *source,
713 unsigned int type,
714 enum amdgpu_interrupt_state state)
715 {
716 uint32_t val = 0;
717
718 if (state == AMDGPU_IRQ_STATE_ENABLE)
719 val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
720
721 WREG32_P(mmVCE_SYS_INT_EN, val,
722 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
723 return 0;
724 }
725
vce_v1_0_process_interrupt(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)726 static int vce_v1_0_process_interrupt(struct amdgpu_device *adev,
727 struct amdgpu_irq_src *source,
728 struct amdgpu_iv_entry *entry)
729 {
730 dev_dbg(adev->dev, "IH: VCE\n");
731 switch (entry->src_data[0]) {
732 case 0:
733 case 1:
734 amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
735 break;
736 default:
737 dev_err(adev->dev, "Unhandled interrupt: %d %d\n",
738 entry->src_id, entry->src_data[0]);
739 break;
740 }
741
742 return 0;
743 }
744
vce_v1_0_set_clockgating_state(struct amdgpu_ip_block * ip_block,enum amd_clockgating_state state)745 static int vce_v1_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
746 enum amd_clockgating_state state)
747 {
748 struct amdgpu_device *adev = ip_block->adev;
749
750 vce_v1_0_init_cg(adev);
751 vce_v1_0_enable_mgcg(adev, state == AMD_CG_STATE_GATE);
752
753 return 0;
754 }
755
vce_v1_0_set_powergating_state(struct amdgpu_ip_block * ip_block,enum amd_powergating_state state)756 static int vce_v1_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
757 enum amd_powergating_state state)
758 {
759 struct amdgpu_device *adev = ip_block->adev;
760
761 /*
762 * This doesn't actually powergate the VCE block.
763 * That's done in the dpm code via the SMC. This
764 * just re-inits the block as necessary. The actual
765 * gating still happens in the dpm code. We should
766 * revisit this when there is a cleaner line between
767 * the smc and the hw blocks
768 */
769 if (state == AMD_PG_STATE_GATE)
770 return vce_v1_0_stop(adev);
771 else
772 return vce_v1_0_start(adev);
773 }
774
775 static const struct amd_ip_funcs vce_v1_0_ip_funcs = {
776 .name = "vce_v1_0",
777 .early_init = vce_v1_0_early_init,
778 .sw_init = vce_v1_0_sw_init,
779 .sw_fini = vce_v1_0_sw_fini,
780 .hw_init = vce_v1_0_hw_init,
781 .hw_fini = vce_v1_0_hw_fini,
782 .suspend = vce_v1_0_suspend,
783 .resume = vce_v1_0_resume,
784 .is_idle = vce_v1_0_is_idle,
785 .wait_for_idle = vce_v1_0_wait_for_idle,
786 .set_clockgating_state = vce_v1_0_set_clockgating_state,
787 .set_powergating_state = vce_v1_0_set_powergating_state,
788 };
789
790 static const struct amdgpu_ring_funcs vce_v1_0_ring_funcs = {
791 .type = AMDGPU_RING_TYPE_VCE,
792 .align_mask = 0xf,
793 .nop = VCE_CMD_NO_OP,
794 .support_64bit_ptrs = false,
795 .no_user_fence = true,
796 .get_rptr = vce_v1_0_ring_get_rptr,
797 .get_wptr = vce_v1_0_ring_get_wptr,
798 .set_wptr = vce_v1_0_ring_set_wptr,
799 .parse_cs = amdgpu_vce_ring_parse_cs,
800 .emit_frame_size = 6, /* amdgpu_vce_ring_emit_fence x1 no user fence */
801 .emit_ib_size = 4, /* amdgpu_vce_ring_emit_ib */
802 .emit_ib = amdgpu_vce_ring_emit_ib,
803 .emit_fence = amdgpu_vce_ring_emit_fence,
804 .test_ring = amdgpu_vce_ring_test_ring,
805 .test_ib = amdgpu_vce_ring_test_ib,
806 .insert_nop = amdgpu_ring_insert_nop,
807 .pad_ib = amdgpu_ring_generic_pad_ib,
808 .begin_use = amdgpu_vce_ring_begin_use,
809 .end_use = amdgpu_vce_ring_end_use,
810 };
811
vce_v1_0_set_ring_funcs(struct amdgpu_device * adev)812 static void vce_v1_0_set_ring_funcs(struct amdgpu_device *adev)
813 {
814 int i;
815
816 for (i = 0; i < adev->vce.num_rings; i++) {
817 adev->vce.ring[i].funcs = &vce_v1_0_ring_funcs;
818 adev->vce.ring[i].me = i;
819 }
820 };
821
822 static const struct amdgpu_irq_src_funcs vce_v1_0_irq_funcs = {
823 .set = vce_v1_0_set_interrupt_state,
824 .process = vce_v1_0_process_interrupt,
825 };
826
vce_v1_0_set_irq_funcs(struct amdgpu_device * adev)827 static void vce_v1_0_set_irq_funcs(struct amdgpu_device *adev)
828 {
829 adev->vce.irq.num_types = 1;
830 adev->vce.irq.funcs = &vce_v1_0_irq_funcs;
831 };
832
833 const struct amdgpu_ip_block_version vce_v1_0_ip_block = {
834 .type = AMD_IP_BLOCK_TYPE_VCE,
835 .major = 1,
836 .minor = 0,
837 .rev = 0,
838 .funcs = &vce_v1_0_ip_funcs,
839 };
840