1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright 2025 Advanced Micro Devices, Inc.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21 * OTHER DEALINGS IN THE SOFTWARE.
22 *
23 */
24 #include "amdgpu_smu.h"
25 #include "amdgpu_reset.h"
26 #include "amdgpu_ras_mp1_v13_0.h"
27
28 #define RAS_MP1_MSG_QueryValidMcaCeCount 0x3A
29 #define RAS_MP1_MSG_McaBankCeDumpDW 0x3B
30
mp1_v13_0_get_valid_bank_count(struct ras_core_context * ras_core,u32 msg,u32 * count)31 static int mp1_v13_0_get_valid_bank_count(struct ras_core_context *ras_core,
32 u32 msg, u32 *count)
33 {
34 struct amdgpu_device *adev = (struct amdgpu_device *)ras_core->dev;
35 u32 smu_msg;
36 int ret = 0;
37
38 if (!count)
39 return -EINVAL;
40
41 smu_msg = (msg == RAS_MP1_MSG_QueryValidMcaCeCount) ?
42 SMU_MSG_QueryValidMcaCeCount : SMU_MSG_QueryValidMcaCount;
43
44 if (down_read_trylock(&adev->reset_domain->sem)) {
45 ret = amdgpu_smu_ras_send_msg(adev, smu_msg, 0, count);
46 up_read(&adev->reset_domain->sem);
47 } else {
48 ret = -RAS_CORE_GPU_IN_MODE1_RESET;
49 }
50
51 if (ret)
52 *count = 0;
53
54 return ret;
55 }
56
mp1_v13_0_dump_valid_bank(struct ras_core_context * ras_core,u32 msg,u32 idx,u32 reg_idx,u64 * val)57 static int mp1_v13_0_dump_valid_bank(struct ras_core_context *ras_core,
58 u32 msg, u32 idx, u32 reg_idx, u64 *val)
59 {
60 struct amdgpu_device *adev = (struct amdgpu_device *)ras_core->dev;
61 uint32_t data[2] = {0, 0};
62 uint32_t param;
63 int ret = 0;
64 int i, offset;
65 u32 smu_msg = (msg == RAS_MP1_MSG_McaBankCeDumpDW) ?
66 SMU_MSG_McaBankCeDumpDW : SMU_MSG_McaBankDumpDW;
67
68 if (down_read_trylock(&adev->reset_domain->sem)) {
69 offset = reg_idx * 8;
70 for (i = 0; i < ARRAY_SIZE(data); i++) {
71 param = ((idx & 0xffff) << 16) | ((offset + (i << 2)) & 0xfffc);
72 ret = amdgpu_smu_ras_send_msg(adev, smu_msg, param, &data[i]);
73 if (ret) {
74 RAS_DEV_ERR(adev, "ACA failed to read register[%d], offset:0x%x\n",
75 reg_idx, offset);
76 break;
77 }
78 }
79 up_read(&adev->reset_domain->sem);
80
81 if (!ret)
82 *val = (uint64_t)data[1] << 32 | data[0];
83 } else {
84 ret = -RAS_CORE_GPU_IN_MODE1_RESET;
85 }
86
87 return ret;
88 }
89
90 const struct ras_mp1_sys_func amdgpu_ras_mp1_sys_func_v13_0 = {
91 .mp1_get_valid_bank_count = mp1_v13_0_get_valid_bank_count,
92 .mp1_dump_valid_bank = mp1_v13_0_dump_valid_bank,
93 };
94
95