xref: /linux/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mp1_v13_0.c (revision 6dfafbd0299a60bfb5d5e277fdf100037c7ded07) !
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright 2025 Advanced Micro Devices, Inc.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21  * OTHER DEALINGS IN THE SOFTWARE.
22  *
23  */
24 #include "amdgpu_smu.h"
25 #include "amdgpu_reset.h"
26 #include "amdgpu_ras_mp1_v13_0.h"
27 
28 #define RAS_MP1_MSG_QueryValidMcaCeCount  0x3A
29 #define RAS_MP1_MSG_McaBankCeDumpDW       0x3B
30 
mp1_v13_0_get_valid_bank_count(struct ras_core_context * ras_core,u32 msg,u32 * count)31 static int mp1_v13_0_get_valid_bank_count(struct ras_core_context *ras_core,
32 					  u32 msg, u32 *count)
33 {
34 	struct amdgpu_device *adev = (struct amdgpu_device *)ras_core->dev;
35 	u32 smu_msg;
36 	int ret = 0;
37 
38 	if (!count)
39 		return -EINVAL;
40 
41 	smu_msg = (msg == RAS_MP1_MSG_QueryValidMcaCeCount) ?
42 			SMU_MSG_QueryValidMcaCeCount : SMU_MSG_QueryValidMcaCount;
43 
44 	if (down_read_trylock(&adev->reset_domain->sem)) {
45 		ret = amdgpu_smu_ras_send_msg(adev, smu_msg, 0, count);
46 		up_read(&adev->reset_domain->sem);
47 	} else {
48 		ret = -RAS_CORE_GPU_IN_MODE1_RESET;
49 	}
50 
51 	if (ret)
52 		*count = 0;
53 
54 	return ret;
55 }
56 
mp1_v13_0_dump_valid_bank(struct ras_core_context * ras_core,u32 msg,u32 idx,u32 reg_idx,u64 * val)57 static int mp1_v13_0_dump_valid_bank(struct ras_core_context *ras_core,
58 				     u32 msg, u32 idx, u32 reg_idx, u64 *val)
59 {
60 	struct amdgpu_device *adev = (struct amdgpu_device *)ras_core->dev;
61 	uint32_t data[2] = {0, 0};
62 	uint32_t param;
63 	int ret = 0;
64 	int i, offset;
65 	u32 smu_msg = (msg == RAS_MP1_MSG_McaBankCeDumpDW) ?
66 			     SMU_MSG_McaBankCeDumpDW : SMU_MSG_McaBankDumpDW;
67 
68 	if (down_read_trylock(&adev->reset_domain->sem)) {
69 		offset = reg_idx * 8;
70 		for (i = 0; i < ARRAY_SIZE(data); i++) {
71 			param = ((idx & 0xffff) << 16) | ((offset + (i << 2)) & 0xfffc);
72 			ret = amdgpu_smu_ras_send_msg(adev, smu_msg, param, &data[i]);
73 			if (ret) {
74 				RAS_DEV_ERR(adev, "ACA failed to read register[%d], offset:0x%x\n",
75 					reg_idx, offset);
76 				break;
77 			}
78 		}
79 		up_read(&adev->reset_domain->sem);
80 
81 		if (!ret)
82 			*val = (uint64_t)data[1] << 32 | data[0];
83 	} else {
84 		ret = -RAS_CORE_GPU_IN_MODE1_RESET;
85 	}
86 
87 	return ret;
88 }
89 
90 const struct ras_mp1_sys_func amdgpu_ras_mp1_sys_func_v13_0 = {
91 	.mp1_get_valid_bank_count = mp1_v13_0_get_valid_bank_count,
92 	.mp1_dump_valid_bank = mp1_v13_0_dump_valid_bank,
93 };
94 
95