1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4 * Copyright 2020-2022 HabanaLabs, Ltd.
5 * All Rights Reserved.
6 */
7
8 #include "gaudi2P.h"
9 #include "gaudi2_masks.h"
10 #include "../include/gaudi2/gaudi2_special_blocks.h"
11 #include "../include/hw_ip/mmu/mmu_general.h"
12 #include "../include/hw_ip/mmu/mmu_v2_0.h"
13 #include "../include/gaudi2/gaudi2_packets.h"
14 #include "../include/gaudi2/gaudi2_reg_map.h"
15 #include "../include/gaudi2/gaudi2_async_ids_map_extended.h"
16 #include "../include/gaudi2/arc/gaudi2_arc_common_packets.h"
17
18 #include <linux/module.h>
19 #include <linux/pci.h>
20 #include <linux/hwmon.h>
21 #include <linux/iommu.h>
22
23 #define GAUDI2_DMA_POOL_BLK_SIZE SZ_256 /* 256 bytes */
24
25 #define GAUDI2_RESET_TIMEOUT_MSEC 2000 /* 2000ms */
26
27 #define GAUDI2_RESET_POLL_TIMEOUT_USEC 500000 /* 500ms */
28 #define GAUDI2_PLDM_HRESET_TIMEOUT_MSEC 25000 /* 25s */
29 #define GAUDI2_PLDM_SRESET_TIMEOUT_MSEC 25000 /* 25s */
30 #define GAUDI2_PLDM_RESET_POLL_TIMEOUT_USEC 3000000 /* 3s */
31 #define GAUDI2_RESET_POLL_CNT 3
32 #define GAUDI2_RESET_WAIT_MSEC 1 /* 1ms */
33 #define GAUDI2_CPU_RESET_WAIT_MSEC 100 /* 100ms */
34 #define GAUDI2_PLDM_RESET_WAIT_MSEC 1000 /* 1s */
35 #define GAUDI2_CB_POOL_CB_CNT 512
36 #define GAUDI2_CB_POOL_CB_SIZE SZ_128K /* 128KB */
37 #define GAUDI2_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */
38 #define GAUDI2_WAIT_FOR_BL_TIMEOUT_USEC 25000000 /* 25s */
39 #define GAUDI2_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */
40 #define GAUDI2_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */
41
42 #define GAUDI2_ALLOC_CPU_MEM_RETRY_CNT 3
43
44 /*
45 * since the code already has built-in support for binning of up to MAX_FAULTY_TPCS TPCs
46 * and the code relies on that value (for array size etc..) we define another value
47 * for MAX faulty TPCs which reflects the cluster binning requirements
48 */
49 #define MAX_CLUSTER_BINNING_FAULTY_TPCS 1
50 #define MAX_FAULTY_XBARS 1
51 #define MAX_FAULTY_EDMAS 1
52 #define MAX_FAULTY_DECODERS 1
53
54 #define GAUDI2_TPC_FULL_MASK 0x1FFFFFF
55 #define GAUDI2_HIF_HMMU_FULL_MASK 0xFFFF
56 #define GAUDI2_DECODER_FULL_MASK 0x3FF
57
58 #define GAUDI2_NA_EVENT_CAUSE 0xFF
59 #define GAUDI2_NUM_OF_QM_ERR_CAUSE 18
60 #define GAUDI2_NUM_OF_LOWER_QM_ERR_CAUSE 25
61 #define GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE 3
62 #define GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE 14
63 #define GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE 3
64 #define GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE 2
65 #define GAUDI2_NUM_OF_ROT_ERR_CAUSE 22
66 #define GAUDI2_NUM_OF_TPC_INTR_CAUSE 31
67 #define GAUDI2_NUM_OF_DEC_ERR_CAUSE 25
68 #define GAUDI2_NUM_OF_MME_ERR_CAUSE 16
69 #define GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE 7
70 #define GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE 8
71 #define GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE 19
72 #define GAUDI2_NUM_OF_HBM_SEI_CAUSE 9
73 #define GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE 3
74 #define GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE 3
75 #define GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE 2
76 #define GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE 2
77 #define GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE 2
78 #define GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE 5
79
80 #define GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 10)
81 #define GAUDI2_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 200)
82 #define GAUDI2_ARB_WDT_TIMEOUT (0x1000000)
83
84 #define GAUDI2_VDEC_TIMEOUT_USEC 10000 /* 10ms */
85 #define GAUDI2_PLDM_VDEC_TIMEOUT_USEC (GAUDI2_VDEC_TIMEOUT_USEC * 100)
86
87 #define KDMA_TIMEOUT_USEC USEC_PER_SEC
88
89 #define IS_DMA_IDLE(dma_core_sts0) \
90 (!((dma_core_sts0) & (DCORE0_EDMA0_CORE_STS0_BUSY_MASK)))
91
92 #define IS_DMA_HALTED(dma_core_sts1) \
93 ((dma_core_sts1) & (DCORE0_EDMA0_CORE_STS1_IS_HALT_MASK))
94
95 #define IS_MME_IDLE(mme_arch_sts) (((mme_arch_sts) & MME_ARCH_IDLE_MASK) == MME_ARCH_IDLE_MASK)
96
97 #define IS_TPC_IDLE(tpc_cfg_sts) (((tpc_cfg_sts) & (TPC_IDLE_MASK)) == (TPC_IDLE_MASK))
98
99 #define IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) \
100 ((((qm_glbl_sts0) & (QM_IDLE_MASK)) == (QM_IDLE_MASK)) && \
101 (((qm_glbl_sts1) & (QM_ARC_IDLE_MASK)) == (QM_ARC_IDLE_MASK)) && \
102 (((qm_cgm_sts) & (CGM_IDLE_MASK)) == (CGM_IDLE_MASK)))
103
104 #define PCIE_DEC_EN_MASK 0x300
105 #define DEC_WORK_STATE_IDLE 0
106 #define DEC_WORK_STATE_PEND 3
107 #define IS_DEC_IDLE(dec_swreg15) \
108 (((dec_swreg15) & DCORE0_DEC0_CMD_SWREG15_SW_WORK_STATE_MASK) == DEC_WORK_STATE_IDLE || \
109 ((dec_swreg15) & DCORE0_DEC0_CMD_SWREG15_SW_WORK_STATE_MASK) == DEC_WORK_STATE_PEND)
110
111 /* HBM MMU address scrambling parameters */
112 #define GAUDI2_HBM_MMU_SCRM_MEM_SIZE SZ_8M
113 #define GAUDI2_HBM_MMU_SCRM_DIV_SHIFT 26
114 #define GAUDI2_HBM_MMU_SCRM_MOD_SHIFT 0
115 #define GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK DRAM_VA_HINT_MASK
116 #define GAUDI2_COMPENSATE_TLB_PAGE_SIZE_FACTOR 16
117 #define MMU_RANGE_INV_VA_LSB_SHIFT 12
118 #define MMU_RANGE_INV_VA_MSB_SHIFT 44
119 #define MMU_RANGE_INV_EN_SHIFT 0
120 #define MMU_RANGE_INV_ASID_EN_SHIFT 1
121 #define MMU_RANGE_INV_ASID_SHIFT 2
122
123 /* The last SPI_SEI cause bit, "burst_fifo_full", is expected to be triggered in PMMU because it has
124 * a 2 entries FIFO, and hence it is not enabled for it.
125 */
126 #define GAUDI2_PMMU_SPI_SEI_ENABLE_MASK GENMASK(GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE - 2, 0)
127 #define GAUDI2_HMMU_SPI_SEI_ENABLE_MASK GENMASK(GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE - 1, 0)
128
129 #define GAUDI2_MAX_STRING_LEN 64
130
131 #define GAUDI2_VDEC_MSIX_ENTRIES (GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM - \
132 GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + 1)
133
134 #define ENGINE_ID_DCORE_OFFSET (GAUDI2_DCORE1_ENGINE_ID_EDMA_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0)
135
136 /* RAZWI initiator coordinates */
137 #define RAZWI_GET_AXUSER_XY(x) \
138 ((x & 0xF8001FF0) >> 4)
139
140 #define RAZWI_GET_AXUSER_LOW_XY(x) \
141 ((x & 0x00001FF0) >> 4)
142
143 #define RAZWI_INITIATOR_AXUER_L_X_SHIFT 0
144 #define RAZWI_INITIATOR_AXUER_L_X_MASK 0x1F
145 #define RAZWI_INITIATOR_AXUER_L_Y_SHIFT 5
146 #define RAZWI_INITIATOR_AXUER_L_Y_MASK 0xF
147
148 #define RAZWI_INITIATOR_AXUER_H_X_SHIFT 23
149 #define RAZWI_INITIATOR_AXUER_H_X_MASK 0x1F
150
151 #define RAZWI_INITIATOR_ID_X_Y_LOW(x, y) \
152 ((((y) & RAZWI_INITIATOR_AXUER_L_Y_MASK) << RAZWI_INITIATOR_AXUER_L_Y_SHIFT) | \
153 (((x) & RAZWI_INITIATOR_AXUER_L_X_MASK) << RAZWI_INITIATOR_AXUER_L_X_SHIFT))
154
155 #define RAZWI_INITIATOR_ID_X_HIGH(x) \
156 (((x) & RAZWI_INITIATOR_AXUER_H_X_MASK) << RAZWI_INITIATOR_AXUER_H_X_SHIFT)
157
158 #define RAZWI_INITIATOR_ID_X_Y(xl, yl, xh) \
159 (RAZWI_INITIATOR_ID_X_Y_LOW(xl, yl) | RAZWI_INITIATOR_ID_X_HIGH(xh))
160
161 #define PSOC_RAZWI_ENG_STR_SIZE 128
162 #define PSOC_RAZWI_MAX_ENG_PER_RTR 5
163
164 /* HW scrambles only bits 0-25 */
165 #define HW_UNSCRAMBLED_BITS_MASK GENMASK_ULL(63, 26)
166
167 struct gaudi2_razwi_info {
168 u32 axuser_xy;
169 u32 rtr_ctrl;
170 u16 eng_id;
171 char *eng_name;
172 };
173
174 static struct gaudi2_razwi_info common_razwi_info[] = {
175 {RAZWI_INITIATOR_ID_X_Y(2, 4, 0), mmDCORE0_RTR0_CTRL_BASE,
176 GAUDI2_DCORE0_ENGINE_ID_DEC_0, "DEC0"},
177 {RAZWI_INITIATOR_ID_X_Y(2, 4, 4), mmDCORE0_RTR0_CTRL_BASE,
178 GAUDI2_DCORE0_ENGINE_ID_DEC_1, "DEC1"},
179 {RAZWI_INITIATOR_ID_X_Y(17, 4, 18), mmDCORE1_RTR7_CTRL_BASE,
180 GAUDI2_DCORE1_ENGINE_ID_DEC_0, "DEC2"},
181 {RAZWI_INITIATOR_ID_X_Y(17, 4, 14), mmDCORE1_RTR7_CTRL_BASE,
182 GAUDI2_DCORE1_ENGINE_ID_DEC_1, "DEC3"},
183 {RAZWI_INITIATOR_ID_X_Y(2, 11, 0), mmDCORE2_RTR0_CTRL_BASE,
184 GAUDI2_DCORE2_ENGINE_ID_DEC_0, "DEC4"},
185 {RAZWI_INITIATOR_ID_X_Y(2, 11, 4), mmDCORE2_RTR0_CTRL_BASE,
186 GAUDI2_DCORE2_ENGINE_ID_DEC_1, "DEC5"},
187 {RAZWI_INITIATOR_ID_X_Y(17, 11, 18), mmDCORE3_RTR7_CTRL_BASE,
188 GAUDI2_DCORE3_ENGINE_ID_DEC_0, "DEC6"},
189 {RAZWI_INITIATOR_ID_X_Y(17, 11, 14), mmDCORE3_RTR7_CTRL_BASE,
190 GAUDI2_DCORE3_ENGINE_ID_DEC_1, "DEC7"},
191 {RAZWI_INITIATOR_ID_X_Y(2, 4, 6), mmDCORE0_RTR0_CTRL_BASE,
192 GAUDI2_PCIE_ENGINE_ID_DEC_0, "DEC8"},
193 {RAZWI_INITIATOR_ID_X_Y(2, 4, 7), mmDCORE0_RTR0_CTRL_BASE,
194 GAUDI2_PCIE_ENGINE_ID_DEC_0, "DEC9"},
195 {RAZWI_INITIATOR_ID_X_Y(3, 4, 2), mmDCORE0_RTR1_CTRL_BASE,
196 GAUDI2_DCORE0_ENGINE_ID_TPC_0, "TPC0"},
197 {RAZWI_INITIATOR_ID_X_Y(3, 4, 4), mmDCORE0_RTR1_CTRL_BASE,
198 GAUDI2_DCORE0_ENGINE_ID_TPC_1, "TPC1"},
199 {RAZWI_INITIATOR_ID_X_Y(4, 4, 2), mmDCORE0_RTR2_CTRL_BASE,
200 GAUDI2_DCORE0_ENGINE_ID_TPC_2, "TPC2"},
201 {RAZWI_INITIATOR_ID_X_Y(4, 4, 4), mmDCORE0_RTR2_CTRL_BASE,
202 GAUDI2_DCORE0_ENGINE_ID_TPC_3, "TPC3"},
203 {RAZWI_INITIATOR_ID_X_Y(5, 4, 2), mmDCORE0_RTR3_CTRL_BASE,
204 GAUDI2_DCORE0_ENGINE_ID_TPC_4, "TPC4"},
205 {RAZWI_INITIATOR_ID_X_Y(5, 4, 4), mmDCORE0_RTR3_CTRL_BASE,
206 GAUDI2_DCORE0_ENGINE_ID_TPC_5, "TPC5"},
207 {RAZWI_INITIATOR_ID_X_Y(16, 4, 14), mmDCORE1_RTR6_CTRL_BASE,
208 GAUDI2_DCORE1_ENGINE_ID_TPC_0, "TPC6"},
209 {RAZWI_INITIATOR_ID_X_Y(16, 4, 16), mmDCORE1_RTR6_CTRL_BASE,
210 GAUDI2_DCORE1_ENGINE_ID_TPC_1, "TPC7"},
211 {RAZWI_INITIATOR_ID_X_Y(15, 4, 14), mmDCORE1_RTR5_CTRL_BASE,
212 GAUDI2_DCORE1_ENGINE_ID_TPC_2, "TPC8"},
213 {RAZWI_INITIATOR_ID_X_Y(15, 4, 16), mmDCORE1_RTR5_CTRL_BASE,
214 GAUDI2_DCORE1_ENGINE_ID_TPC_3, "TPC9"},
215 {RAZWI_INITIATOR_ID_X_Y(14, 4, 14), mmDCORE1_RTR4_CTRL_BASE,
216 GAUDI2_DCORE1_ENGINE_ID_TPC_4, "TPC10"},
217 {RAZWI_INITIATOR_ID_X_Y(14, 4, 16), mmDCORE1_RTR4_CTRL_BASE,
218 GAUDI2_DCORE1_ENGINE_ID_TPC_5, "TPC11"},
219 {RAZWI_INITIATOR_ID_X_Y(5, 11, 2), mmDCORE2_RTR3_CTRL_BASE,
220 GAUDI2_DCORE2_ENGINE_ID_TPC_0, "TPC12"},
221 {RAZWI_INITIATOR_ID_X_Y(5, 11, 4), mmDCORE2_RTR3_CTRL_BASE,
222 GAUDI2_DCORE2_ENGINE_ID_TPC_1, "TPC13"},
223 {RAZWI_INITIATOR_ID_X_Y(4, 11, 2), mmDCORE2_RTR2_CTRL_BASE,
224 GAUDI2_DCORE2_ENGINE_ID_TPC_2, "TPC14"},
225 {RAZWI_INITIATOR_ID_X_Y(4, 11, 4), mmDCORE2_RTR2_CTRL_BASE,
226 GAUDI2_DCORE2_ENGINE_ID_TPC_3, "TPC15"},
227 {RAZWI_INITIATOR_ID_X_Y(3, 11, 2), mmDCORE2_RTR1_CTRL_BASE,
228 GAUDI2_DCORE2_ENGINE_ID_TPC_4, "TPC16"},
229 {RAZWI_INITIATOR_ID_X_Y(3, 11, 4), mmDCORE2_RTR1_CTRL_BASE,
230 GAUDI2_DCORE2_ENGINE_ID_TPC_5, "TPC17"},
231 {RAZWI_INITIATOR_ID_X_Y(14, 11, 14), mmDCORE3_RTR4_CTRL_BASE,
232 GAUDI2_DCORE3_ENGINE_ID_TPC_0, "TPC18"},
233 {RAZWI_INITIATOR_ID_X_Y(14, 11, 16), mmDCORE3_RTR4_CTRL_BASE,
234 GAUDI2_DCORE3_ENGINE_ID_TPC_1, "TPC19"},
235 {RAZWI_INITIATOR_ID_X_Y(15, 11, 14), mmDCORE3_RTR5_CTRL_BASE,
236 GAUDI2_DCORE3_ENGINE_ID_TPC_2, "TPC20"},
237 {RAZWI_INITIATOR_ID_X_Y(15, 11, 16), mmDCORE3_RTR5_CTRL_BASE,
238 GAUDI2_DCORE3_ENGINE_ID_TPC_3, "TPC21"},
239 {RAZWI_INITIATOR_ID_X_Y(16, 11, 14), mmDCORE3_RTR6_CTRL_BASE,
240 GAUDI2_DCORE3_ENGINE_ID_TPC_4, "TPC22"},
241 {RAZWI_INITIATOR_ID_X_Y(16, 11, 16), mmDCORE3_RTR6_CTRL_BASE,
242 GAUDI2_DCORE3_ENGINE_ID_TPC_5, "TPC23"},
243 {RAZWI_INITIATOR_ID_X_Y(2, 4, 2), mmDCORE0_RTR0_CTRL_BASE,
244 GAUDI2_DCORE3_ENGINE_ID_TPC_5, "TPC24"},
245 {RAZWI_INITIATOR_ID_X_Y(17, 4, 8), mmDCORE1_RTR7_CTRL_BASE,
246 GAUDI2_ENGINE_ID_NIC0_0, "NIC0"},
247 {RAZWI_INITIATOR_ID_X_Y(17, 4, 10), mmDCORE1_RTR7_CTRL_BASE,
248 GAUDI2_ENGINE_ID_NIC0_1, "NIC1"},
249 {RAZWI_INITIATOR_ID_X_Y(17, 4, 12), mmDCORE1_RTR7_CTRL_BASE,
250 GAUDI2_ENGINE_ID_NIC1_0, "NIC2"},
251 {RAZWI_INITIATOR_ID_X_Y(17, 4, 14), mmDCORE1_RTR7_CTRL_BASE,
252 GAUDI2_ENGINE_ID_NIC1_1, "NIC3"},
253 {RAZWI_INITIATOR_ID_X_Y(17, 4, 15), mmDCORE1_RTR7_CTRL_BASE,
254 GAUDI2_ENGINE_ID_NIC2_0, "NIC4"},
255 {RAZWI_INITIATOR_ID_X_Y(2, 11, 2), mmDCORE2_RTR0_CTRL_BASE,
256 GAUDI2_ENGINE_ID_NIC2_1, "NIC5"},
257 {RAZWI_INITIATOR_ID_X_Y(2, 11, 4), mmDCORE2_RTR0_CTRL_BASE,
258 GAUDI2_ENGINE_ID_NIC3_0, "NIC6"},
259 {RAZWI_INITIATOR_ID_X_Y(2, 11, 6), mmDCORE2_RTR0_CTRL_BASE,
260 GAUDI2_ENGINE_ID_NIC3_1, "NIC7"},
261 {RAZWI_INITIATOR_ID_X_Y(2, 11, 8), mmDCORE2_RTR0_CTRL_BASE,
262 GAUDI2_ENGINE_ID_NIC4_0, "NIC8"},
263 {RAZWI_INITIATOR_ID_X_Y(17, 11, 12), mmDCORE3_RTR7_CTRL_BASE,
264 GAUDI2_ENGINE_ID_NIC4_1, "NIC9"},
265 {RAZWI_INITIATOR_ID_X_Y(17, 11, 14), mmDCORE3_RTR7_CTRL_BASE,
266 GAUDI2_ENGINE_ID_NIC5_0, "NIC10"},
267 {RAZWI_INITIATOR_ID_X_Y(17, 11, 16), mmDCORE3_RTR7_CTRL_BASE,
268 GAUDI2_ENGINE_ID_NIC5_1, "NIC11"},
269 {RAZWI_INITIATOR_ID_X_Y(2, 4, 2), mmDCORE0_RTR0_CTRL_BASE,
270 GAUDI2_ENGINE_ID_PDMA_0, "PDMA0"},
271 {RAZWI_INITIATOR_ID_X_Y(2, 4, 3), mmDCORE0_RTR0_CTRL_BASE,
272 GAUDI2_ENGINE_ID_PDMA_1, "PDMA1"},
273 {RAZWI_INITIATOR_ID_X_Y(2, 4, 4), mmDCORE0_RTR0_CTRL_BASE,
274 GAUDI2_ENGINE_ID_SIZE, "PMMU"},
275 {RAZWI_INITIATOR_ID_X_Y(2, 4, 5), mmDCORE0_RTR0_CTRL_BASE,
276 GAUDI2_ENGINE_ID_SIZE, "PCIE"},
277 {RAZWI_INITIATOR_ID_X_Y(17, 4, 16), mmDCORE1_RTR7_CTRL_BASE,
278 GAUDI2_ENGINE_ID_ARC_FARM, "ARC_FARM"},
279 {RAZWI_INITIATOR_ID_X_Y(17, 4, 17), mmDCORE1_RTR7_CTRL_BASE,
280 GAUDI2_ENGINE_ID_KDMA, "KDMA"},
281 {RAZWI_INITIATOR_ID_X_Y(1, 5, 1), mmSFT0_HBW_RTR_IF1_RTR_CTRL_BASE,
282 GAUDI2_DCORE0_ENGINE_ID_EDMA_0, "EDMA0"},
283 {RAZWI_INITIATOR_ID_X_Y(1, 5, 1), mmSFT0_HBW_RTR_IF0_RTR_CTRL_BASE,
284 GAUDI2_DCORE0_ENGINE_ID_EDMA_1, "EDMA1"},
285 {RAZWI_INITIATOR_ID_X_Y(18, 5, 18), mmSFT1_HBW_RTR_IF1_RTR_CTRL_BASE,
286 GAUDI2_DCORE1_ENGINE_ID_EDMA_0, "EDMA2"},
287 {RAZWI_INITIATOR_ID_X_Y(18, 5, 18), mmSFT1_HBW_RTR_IF0_RTR_CTRL_BASE,
288 GAUDI2_DCORE1_ENGINE_ID_EDMA_1, "EDMA3"},
289 {RAZWI_INITIATOR_ID_X_Y(1, 10, 1), mmSFT2_HBW_RTR_IF0_RTR_CTRL_BASE,
290 GAUDI2_DCORE2_ENGINE_ID_EDMA_0, "EDMA4"},
291 {RAZWI_INITIATOR_ID_X_Y(1, 10, 1), mmSFT2_HBW_RTR_IF1_RTR_CTRL_BASE,
292 GAUDI2_DCORE2_ENGINE_ID_EDMA_1, "EDMA5"},
293 {RAZWI_INITIATOR_ID_X_Y(18, 10, 18), mmSFT2_HBW_RTR_IF0_RTR_CTRL_BASE,
294 GAUDI2_DCORE3_ENGINE_ID_EDMA_0, "EDMA6"},
295 {RAZWI_INITIATOR_ID_X_Y(18, 10, 18), mmSFT2_HBW_RTR_IF1_RTR_CTRL_BASE,
296 GAUDI2_DCORE3_ENGINE_ID_EDMA_1, "EDMA7"},
297 {RAZWI_INITIATOR_ID_X_Y(1, 5, 0), mmDCORE0_RTR0_CTRL_BASE,
298 GAUDI2_ENGINE_ID_SIZE, "HMMU0"},
299 {RAZWI_INITIATOR_ID_X_Y(18, 5, 19), mmDCORE1_RTR7_CTRL_BASE,
300 GAUDI2_ENGINE_ID_SIZE, "HMMU1"},
301 {RAZWI_INITIATOR_ID_X_Y(1, 5, 0), mmDCORE0_RTR0_CTRL_BASE,
302 GAUDI2_ENGINE_ID_SIZE, "HMMU2"},
303 {RAZWI_INITIATOR_ID_X_Y(18, 5, 19), mmDCORE1_RTR7_CTRL_BASE,
304 GAUDI2_ENGINE_ID_SIZE, "HMMU3"},
305 {RAZWI_INITIATOR_ID_X_Y(1, 5, 0), mmDCORE0_RTR0_CTRL_BASE,
306 GAUDI2_ENGINE_ID_SIZE, "HMMU4"},
307 {RAZWI_INITIATOR_ID_X_Y(18, 5, 19), mmDCORE1_RTR7_CTRL_BASE,
308 GAUDI2_ENGINE_ID_SIZE, "HMMU5"},
309 {RAZWI_INITIATOR_ID_X_Y(1, 5, 0), mmDCORE0_RTR0_CTRL_BASE,
310 GAUDI2_ENGINE_ID_SIZE, "HMMU6"},
311 {RAZWI_INITIATOR_ID_X_Y(18, 5, 19), mmDCORE1_RTR7_CTRL_BASE,
312 GAUDI2_ENGINE_ID_SIZE, "HMMU7"},
313 {RAZWI_INITIATOR_ID_X_Y(1, 10, 0), mmDCORE2_RTR0_CTRL_BASE,
314 GAUDI2_ENGINE_ID_SIZE, "HMMU8"},
315 {RAZWI_INITIATOR_ID_X_Y(18, 10, 19), mmDCORE3_RTR7_CTRL_BASE,
316 GAUDI2_ENGINE_ID_SIZE, "HMMU9"},
317 {RAZWI_INITIATOR_ID_X_Y(1, 10, 0), mmDCORE2_RTR0_CTRL_BASE,
318 GAUDI2_ENGINE_ID_SIZE, "HMMU10"},
319 {RAZWI_INITIATOR_ID_X_Y(18, 10, 19), mmDCORE3_RTR7_CTRL_BASE,
320 GAUDI2_ENGINE_ID_SIZE, "HMMU11"},
321 {RAZWI_INITIATOR_ID_X_Y(1, 10, 0), mmDCORE2_RTR0_CTRL_BASE,
322 GAUDI2_ENGINE_ID_SIZE, "HMMU12"},
323 {RAZWI_INITIATOR_ID_X_Y(18, 10, 19), mmDCORE3_RTR7_CTRL_BASE,
324 GAUDI2_ENGINE_ID_SIZE, "HMMU13"},
325 {RAZWI_INITIATOR_ID_X_Y(1, 10, 0), mmDCORE2_RTR0_CTRL_BASE,
326 GAUDI2_ENGINE_ID_SIZE, "HMMU14"},
327 {RAZWI_INITIATOR_ID_X_Y(18, 10, 19), mmDCORE3_RTR7_CTRL_BASE,
328 GAUDI2_ENGINE_ID_SIZE, "HMMU15"},
329 {RAZWI_INITIATOR_ID_X_Y(2, 11, 2), mmDCORE2_RTR0_CTRL_BASE,
330 GAUDI2_ENGINE_ID_ROT_0, "ROT0"},
331 {RAZWI_INITIATOR_ID_X_Y(17, 11, 16), mmDCORE3_RTR7_CTRL_BASE,
332 GAUDI2_ENGINE_ID_ROT_1, "ROT1"},
333 {RAZWI_INITIATOR_ID_X_Y(2, 11, 2), mmDCORE2_RTR0_CTRL_BASE,
334 GAUDI2_ENGINE_ID_PSOC, "CPU"},
335 {RAZWI_INITIATOR_ID_X_Y(17, 11, 11), mmDCORE3_RTR7_CTRL_BASE,
336 GAUDI2_ENGINE_ID_PSOC, "PSOC"}
337 };
338
339 static struct gaudi2_razwi_info mme_razwi_info[] = {
340 /* MME X high coordinate is N/A, hence using only low coordinates */
341 {RAZWI_INITIATOR_ID_X_Y_LOW(7, 4), mmDCORE0_RTR5_CTRL_BASE,
342 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_WAP0"},
343 {RAZWI_INITIATOR_ID_X_Y_LOW(9, 4), mmDCORE0_RTR7_CTRL_BASE,
344 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_WAP1"},
345 {RAZWI_INITIATOR_ID_X_Y_LOW(8, 4), mmDCORE0_RTR6_CTRL_BASE,
346 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_CTRL_WR"},
347 {RAZWI_INITIATOR_ID_X_Y_LOW(9, 4), mmDCORE0_RTR7_CTRL_BASE,
348 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_CTRL_RD"},
349 {RAZWI_INITIATOR_ID_X_Y_LOW(6, 4), mmDCORE0_RTR4_CTRL_BASE,
350 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE0"},
351 {RAZWI_INITIATOR_ID_X_Y_LOW(6, 4), mmDCORE0_RTR4_CTRL_BASE,
352 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE1"},
353 {RAZWI_INITIATOR_ID_X_Y_LOW(7, 4), mmDCORE0_RTR5_CTRL_BASE,
354 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE2"},
355 {RAZWI_INITIATOR_ID_X_Y_LOW(8, 4), mmDCORE0_RTR6_CTRL_BASE,
356 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE3"},
357 {RAZWI_INITIATOR_ID_X_Y_LOW(9, 4), mmDCORE0_RTR7_CTRL_BASE,
358 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE4"},
359 {RAZWI_INITIATOR_ID_X_Y_LOW(12, 4), mmDCORE1_RTR2_CTRL_BASE,
360 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_WAP0"},
361 {RAZWI_INITIATOR_ID_X_Y_LOW(10, 4), mmDCORE1_RTR0_CTRL_BASE,
362 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_WAP1"},
363 {RAZWI_INITIATOR_ID_X_Y_LOW(11, 4), mmDCORE1_RTR1_CTRL_BASE,
364 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_CTRL_WR"},
365 {RAZWI_INITIATOR_ID_X_Y_LOW(10, 4), mmDCORE1_RTR0_CTRL_BASE,
366 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_CTRL_RD"},
367 {RAZWI_INITIATOR_ID_X_Y_LOW(13, 4), mmDCORE1_RTR3_CTRL_BASE,
368 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE0"},
369 {RAZWI_INITIATOR_ID_X_Y_LOW(13, 4), mmDCORE1_RTR3_CTRL_BASE,
370 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE1"},
371 {RAZWI_INITIATOR_ID_X_Y_LOW(12, 4), mmDCORE1_RTR2_CTRL_BASE,
372 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE2"},
373 {RAZWI_INITIATOR_ID_X_Y_LOW(11, 4), mmDCORE1_RTR1_CTRL_BASE,
374 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE3"},
375 {RAZWI_INITIATOR_ID_X_Y_LOW(10, 4), mmDCORE1_RTR0_CTRL_BASE,
376 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE4"},
377 {RAZWI_INITIATOR_ID_X_Y_LOW(7, 11), mmDCORE2_RTR5_CTRL_BASE,
378 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_WAP0"},
379 {RAZWI_INITIATOR_ID_X_Y_LOW(9, 11), mmDCORE2_RTR7_CTRL_BASE,
380 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_WAP1"},
381 {RAZWI_INITIATOR_ID_X_Y_LOW(8, 11), mmDCORE2_RTR6_CTRL_BASE,
382 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_CTRL_WR"},
383 {RAZWI_INITIATOR_ID_X_Y_LOW(9, 11), mmDCORE2_RTR7_CTRL_BASE,
384 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_CTRL_RD"},
385 {RAZWI_INITIATOR_ID_X_Y_LOW(6, 11), mmDCORE2_RTR4_CTRL_BASE,
386 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE0"},
387 {RAZWI_INITIATOR_ID_X_Y_LOW(6, 11), mmDCORE2_RTR4_CTRL_BASE,
388 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE1"},
389 {RAZWI_INITIATOR_ID_X_Y_LOW(7, 11), mmDCORE2_RTR5_CTRL_BASE,
390 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE2"},
391 {RAZWI_INITIATOR_ID_X_Y_LOW(8, 11), mmDCORE2_RTR6_CTRL_BASE,
392 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE3"},
393 {RAZWI_INITIATOR_ID_X_Y_LOW(9, 11), mmDCORE2_RTR7_CTRL_BASE,
394 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE4"},
395 {RAZWI_INITIATOR_ID_X_Y_LOW(12, 11), mmDCORE3_RTR2_CTRL_BASE,
396 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_WAP0"},
397 {RAZWI_INITIATOR_ID_X_Y_LOW(10, 11), mmDCORE3_RTR0_CTRL_BASE,
398 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_WAP1"},
399 {RAZWI_INITIATOR_ID_X_Y_LOW(11, 11), mmDCORE3_RTR1_CTRL_BASE,
400 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_CTRL_WR"},
401 {RAZWI_INITIATOR_ID_X_Y_LOW(10, 11), mmDCORE3_RTR0_CTRL_BASE,
402 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_CTRL_RD"},
403 {RAZWI_INITIATOR_ID_X_Y_LOW(13, 11), mmDCORE3_RTR3_CTRL_BASE,
404 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE0"},
405 {RAZWI_INITIATOR_ID_X_Y_LOW(13, 11), mmDCORE3_RTR3_CTRL_BASE,
406 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE1"},
407 {RAZWI_INITIATOR_ID_X_Y_LOW(12, 11), mmDCORE3_RTR2_CTRL_BASE,
408 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE2"},
409 {RAZWI_INITIATOR_ID_X_Y_LOW(11, 11), mmDCORE3_RTR1_CTRL_BASE,
410 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE3"},
411 {RAZWI_INITIATOR_ID_X_Y_LOW(10, 11), mmDCORE3_RTR0_CTRL_BASE,
412 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE4"}
413 };
414
415 enum hl_pmmu_fatal_cause {
416 LATENCY_RD_OUT_FIFO_OVERRUN,
417 LATENCY_WR_OUT_FIFO_OVERRUN,
418 };
419
420 enum hl_pcie_drain_ind_cause {
421 LBW_AXI_DRAIN_IND,
422 HBW_AXI_DRAIN_IND
423 };
424
425 static const u32 cluster_hmmu_hif_enabled_mask[GAUDI2_HBM_NUM] = {
426 [HBM_ID0] = 0xFFFC,
427 [HBM_ID1] = 0xFFCF,
428 [HBM_ID2] = 0xF7F7,
429 [HBM_ID3] = 0x7F7F,
430 [HBM_ID4] = 0xFCFF,
431 [HBM_ID5] = 0xCFFF,
432 };
433
434 static const u8 xbar_edge_to_hbm_cluster[EDMA_ID_SIZE] = {
435 [0] = HBM_ID0,
436 [1] = HBM_ID1,
437 [2] = HBM_ID4,
438 [3] = HBM_ID5,
439 };
440
441 static const u8 edma_to_hbm_cluster[EDMA_ID_SIZE] = {
442 [EDMA_ID_DCORE0_INSTANCE0] = HBM_ID0,
443 [EDMA_ID_DCORE0_INSTANCE1] = HBM_ID2,
444 [EDMA_ID_DCORE1_INSTANCE0] = HBM_ID1,
445 [EDMA_ID_DCORE1_INSTANCE1] = HBM_ID3,
446 [EDMA_ID_DCORE2_INSTANCE0] = HBM_ID2,
447 [EDMA_ID_DCORE2_INSTANCE1] = HBM_ID4,
448 [EDMA_ID_DCORE3_INSTANCE0] = HBM_ID3,
449 [EDMA_ID_DCORE3_INSTANCE1] = HBM_ID5,
450 };
451
452 static const int gaudi2_qman_async_event_id[] = {
453 [GAUDI2_QUEUE_ID_PDMA_0_0] = GAUDI2_EVENT_PDMA0_QM,
454 [GAUDI2_QUEUE_ID_PDMA_0_1] = GAUDI2_EVENT_PDMA0_QM,
455 [GAUDI2_QUEUE_ID_PDMA_0_2] = GAUDI2_EVENT_PDMA0_QM,
456 [GAUDI2_QUEUE_ID_PDMA_0_3] = GAUDI2_EVENT_PDMA0_QM,
457 [GAUDI2_QUEUE_ID_PDMA_1_0] = GAUDI2_EVENT_PDMA1_QM,
458 [GAUDI2_QUEUE_ID_PDMA_1_1] = GAUDI2_EVENT_PDMA1_QM,
459 [GAUDI2_QUEUE_ID_PDMA_1_2] = GAUDI2_EVENT_PDMA1_QM,
460 [GAUDI2_QUEUE_ID_PDMA_1_3] = GAUDI2_EVENT_PDMA1_QM,
461 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = GAUDI2_EVENT_HDMA0_QM,
462 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = GAUDI2_EVENT_HDMA0_QM,
463 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = GAUDI2_EVENT_HDMA0_QM,
464 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = GAUDI2_EVENT_HDMA0_QM,
465 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = GAUDI2_EVENT_HDMA1_QM,
466 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = GAUDI2_EVENT_HDMA1_QM,
467 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = GAUDI2_EVENT_HDMA1_QM,
468 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = GAUDI2_EVENT_HDMA1_QM,
469 [GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = GAUDI2_EVENT_MME0_QM,
470 [GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = GAUDI2_EVENT_MME0_QM,
471 [GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = GAUDI2_EVENT_MME0_QM,
472 [GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = GAUDI2_EVENT_MME0_QM,
473 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = GAUDI2_EVENT_TPC0_QM,
474 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = GAUDI2_EVENT_TPC0_QM,
475 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = GAUDI2_EVENT_TPC0_QM,
476 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = GAUDI2_EVENT_TPC0_QM,
477 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = GAUDI2_EVENT_TPC1_QM,
478 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = GAUDI2_EVENT_TPC1_QM,
479 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = GAUDI2_EVENT_TPC1_QM,
480 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = GAUDI2_EVENT_TPC1_QM,
481 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = GAUDI2_EVENT_TPC2_QM,
482 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = GAUDI2_EVENT_TPC2_QM,
483 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = GAUDI2_EVENT_TPC2_QM,
484 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = GAUDI2_EVENT_TPC2_QM,
485 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = GAUDI2_EVENT_TPC3_QM,
486 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = GAUDI2_EVENT_TPC3_QM,
487 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = GAUDI2_EVENT_TPC3_QM,
488 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = GAUDI2_EVENT_TPC3_QM,
489 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = GAUDI2_EVENT_TPC4_QM,
490 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = GAUDI2_EVENT_TPC4_QM,
491 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = GAUDI2_EVENT_TPC4_QM,
492 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = GAUDI2_EVENT_TPC4_QM,
493 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = GAUDI2_EVENT_TPC5_QM,
494 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = GAUDI2_EVENT_TPC5_QM,
495 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = GAUDI2_EVENT_TPC5_QM,
496 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = GAUDI2_EVENT_TPC5_QM,
497 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = GAUDI2_EVENT_TPC24_QM,
498 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = GAUDI2_EVENT_TPC24_QM,
499 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = GAUDI2_EVENT_TPC24_QM,
500 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = GAUDI2_EVENT_TPC24_QM,
501 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = GAUDI2_EVENT_HDMA2_QM,
502 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = GAUDI2_EVENT_HDMA2_QM,
503 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = GAUDI2_EVENT_HDMA2_QM,
504 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = GAUDI2_EVENT_HDMA2_QM,
505 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = GAUDI2_EVENT_HDMA3_QM,
506 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = GAUDI2_EVENT_HDMA3_QM,
507 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = GAUDI2_EVENT_HDMA3_QM,
508 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = GAUDI2_EVENT_HDMA3_QM,
509 [GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = GAUDI2_EVENT_MME1_QM,
510 [GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = GAUDI2_EVENT_MME1_QM,
511 [GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = GAUDI2_EVENT_MME1_QM,
512 [GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = GAUDI2_EVENT_MME1_QM,
513 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = GAUDI2_EVENT_TPC6_QM,
514 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = GAUDI2_EVENT_TPC6_QM,
515 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = GAUDI2_EVENT_TPC6_QM,
516 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = GAUDI2_EVENT_TPC6_QM,
517 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = GAUDI2_EVENT_TPC7_QM,
518 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = GAUDI2_EVENT_TPC7_QM,
519 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = GAUDI2_EVENT_TPC7_QM,
520 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = GAUDI2_EVENT_TPC7_QM,
521 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = GAUDI2_EVENT_TPC8_QM,
522 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = GAUDI2_EVENT_TPC8_QM,
523 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = GAUDI2_EVENT_TPC8_QM,
524 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = GAUDI2_EVENT_TPC8_QM,
525 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = GAUDI2_EVENT_TPC9_QM,
526 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = GAUDI2_EVENT_TPC9_QM,
527 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = GAUDI2_EVENT_TPC9_QM,
528 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = GAUDI2_EVENT_TPC9_QM,
529 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = GAUDI2_EVENT_TPC10_QM,
530 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = GAUDI2_EVENT_TPC10_QM,
531 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = GAUDI2_EVENT_TPC10_QM,
532 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = GAUDI2_EVENT_TPC10_QM,
533 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = GAUDI2_EVENT_TPC11_QM,
534 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = GAUDI2_EVENT_TPC11_QM,
535 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = GAUDI2_EVENT_TPC11_QM,
536 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = GAUDI2_EVENT_TPC11_QM,
537 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = GAUDI2_EVENT_HDMA4_QM,
538 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = GAUDI2_EVENT_HDMA4_QM,
539 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = GAUDI2_EVENT_HDMA4_QM,
540 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = GAUDI2_EVENT_HDMA4_QM,
541 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = GAUDI2_EVENT_HDMA5_QM,
542 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = GAUDI2_EVENT_HDMA5_QM,
543 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = GAUDI2_EVENT_HDMA5_QM,
544 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = GAUDI2_EVENT_HDMA5_QM,
545 [GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = GAUDI2_EVENT_MME2_QM,
546 [GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = GAUDI2_EVENT_MME2_QM,
547 [GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = GAUDI2_EVENT_MME2_QM,
548 [GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = GAUDI2_EVENT_MME2_QM,
549 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = GAUDI2_EVENT_TPC12_QM,
550 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = GAUDI2_EVENT_TPC12_QM,
551 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = GAUDI2_EVENT_TPC12_QM,
552 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = GAUDI2_EVENT_TPC12_QM,
553 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = GAUDI2_EVENT_TPC13_QM,
554 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = GAUDI2_EVENT_TPC13_QM,
555 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = GAUDI2_EVENT_TPC13_QM,
556 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = GAUDI2_EVENT_TPC13_QM,
557 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = GAUDI2_EVENT_TPC14_QM,
558 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = GAUDI2_EVENT_TPC14_QM,
559 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = GAUDI2_EVENT_TPC14_QM,
560 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = GAUDI2_EVENT_TPC14_QM,
561 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = GAUDI2_EVENT_TPC15_QM,
562 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = GAUDI2_EVENT_TPC15_QM,
563 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = GAUDI2_EVENT_TPC15_QM,
564 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = GAUDI2_EVENT_TPC15_QM,
565 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = GAUDI2_EVENT_TPC16_QM,
566 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = GAUDI2_EVENT_TPC16_QM,
567 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = GAUDI2_EVENT_TPC16_QM,
568 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = GAUDI2_EVENT_TPC16_QM,
569 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = GAUDI2_EVENT_TPC17_QM,
570 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = GAUDI2_EVENT_TPC17_QM,
571 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = GAUDI2_EVENT_TPC17_QM,
572 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = GAUDI2_EVENT_TPC17_QM,
573 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = GAUDI2_EVENT_HDMA6_QM,
574 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = GAUDI2_EVENT_HDMA6_QM,
575 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = GAUDI2_EVENT_HDMA6_QM,
576 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = GAUDI2_EVENT_HDMA6_QM,
577 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = GAUDI2_EVENT_HDMA7_QM,
578 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = GAUDI2_EVENT_HDMA7_QM,
579 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = GAUDI2_EVENT_HDMA7_QM,
580 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = GAUDI2_EVENT_HDMA7_QM,
581 [GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = GAUDI2_EVENT_MME3_QM,
582 [GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = GAUDI2_EVENT_MME3_QM,
583 [GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = GAUDI2_EVENT_MME3_QM,
584 [GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = GAUDI2_EVENT_MME3_QM,
585 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = GAUDI2_EVENT_TPC18_QM,
586 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = GAUDI2_EVENT_TPC18_QM,
587 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = GAUDI2_EVENT_TPC18_QM,
588 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = GAUDI2_EVENT_TPC18_QM,
589 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = GAUDI2_EVENT_TPC19_QM,
590 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = GAUDI2_EVENT_TPC19_QM,
591 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = GAUDI2_EVENT_TPC19_QM,
592 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = GAUDI2_EVENT_TPC19_QM,
593 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = GAUDI2_EVENT_TPC20_QM,
594 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = GAUDI2_EVENT_TPC20_QM,
595 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = GAUDI2_EVENT_TPC20_QM,
596 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = GAUDI2_EVENT_TPC20_QM,
597 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = GAUDI2_EVENT_TPC21_QM,
598 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = GAUDI2_EVENT_TPC21_QM,
599 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = GAUDI2_EVENT_TPC21_QM,
600 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = GAUDI2_EVENT_TPC21_QM,
601 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = GAUDI2_EVENT_TPC22_QM,
602 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = GAUDI2_EVENT_TPC22_QM,
603 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = GAUDI2_EVENT_TPC22_QM,
604 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = GAUDI2_EVENT_TPC22_QM,
605 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = GAUDI2_EVENT_TPC23_QM,
606 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = GAUDI2_EVENT_TPC23_QM,
607 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = GAUDI2_EVENT_TPC23_QM,
608 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = GAUDI2_EVENT_TPC23_QM,
609 [GAUDI2_QUEUE_ID_NIC_0_0] = GAUDI2_EVENT_NIC0_QM0,
610 [GAUDI2_QUEUE_ID_NIC_0_1] = GAUDI2_EVENT_NIC0_QM0,
611 [GAUDI2_QUEUE_ID_NIC_0_2] = GAUDI2_EVENT_NIC0_QM0,
612 [GAUDI2_QUEUE_ID_NIC_0_3] = GAUDI2_EVENT_NIC0_QM0,
613 [GAUDI2_QUEUE_ID_NIC_1_0] = GAUDI2_EVENT_NIC0_QM1,
614 [GAUDI2_QUEUE_ID_NIC_1_1] = GAUDI2_EVENT_NIC0_QM1,
615 [GAUDI2_QUEUE_ID_NIC_1_2] = GAUDI2_EVENT_NIC0_QM1,
616 [GAUDI2_QUEUE_ID_NIC_1_3] = GAUDI2_EVENT_NIC0_QM1,
617 [GAUDI2_QUEUE_ID_NIC_2_0] = GAUDI2_EVENT_NIC1_QM0,
618 [GAUDI2_QUEUE_ID_NIC_2_1] = GAUDI2_EVENT_NIC1_QM0,
619 [GAUDI2_QUEUE_ID_NIC_2_2] = GAUDI2_EVENT_NIC1_QM0,
620 [GAUDI2_QUEUE_ID_NIC_2_3] = GAUDI2_EVENT_NIC1_QM0,
621 [GAUDI2_QUEUE_ID_NIC_3_0] = GAUDI2_EVENT_NIC1_QM1,
622 [GAUDI2_QUEUE_ID_NIC_3_1] = GAUDI2_EVENT_NIC1_QM1,
623 [GAUDI2_QUEUE_ID_NIC_3_2] = GAUDI2_EVENT_NIC1_QM1,
624 [GAUDI2_QUEUE_ID_NIC_3_3] = GAUDI2_EVENT_NIC1_QM1,
625 [GAUDI2_QUEUE_ID_NIC_4_0] = GAUDI2_EVENT_NIC2_QM0,
626 [GAUDI2_QUEUE_ID_NIC_4_1] = GAUDI2_EVENT_NIC2_QM0,
627 [GAUDI2_QUEUE_ID_NIC_4_2] = GAUDI2_EVENT_NIC2_QM0,
628 [GAUDI2_QUEUE_ID_NIC_4_3] = GAUDI2_EVENT_NIC2_QM0,
629 [GAUDI2_QUEUE_ID_NIC_5_0] = GAUDI2_EVENT_NIC2_QM1,
630 [GAUDI2_QUEUE_ID_NIC_5_1] = GAUDI2_EVENT_NIC2_QM1,
631 [GAUDI2_QUEUE_ID_NIC_5_2] = GAUDI2_EVENT_NIC2_QM1,
632 [GAUDI2_QUEUE_ID_NIC_5_3] = GAUDI2_EVENT_NIC2_QM1,
633 [GAUDI2_QUEUE_ID_NIC_6_0] = GAUDI2_EVENT_NIC3_QM0,
634 [GAUDI2_QUEUE_ID_NIC_6_1] = GAUDI2_EVENT_NIC3_QM0,
635 [GAUDI2_QUEUE_ID_NIC_6_2] = GAUDI2_EVENT_NIC3_QM0,
636 [GAUDI2_QUEUE_ID_NIC_6_3] = GAUDI2_EVENT_NIC3_QM0,
637 [GAUDI2_QUEUE_ID_NIC_7_0] = GAUDI2_EVENT_NIC3_QM1,
638 [GAUDI2_QUEUE_ID_NIC_7_1] = GAUDI2_EVENT_NIC3_QM1,
639 [GAUDI2_QUEUE_ID_NIC_7_2] = GAUDI2_EVENT_NIC3_QM1,
640 [GAUDI2_QUEUE_ID_NIC_7_3] = GAUDI2_EVENT_NIC3_QM1,
641 [GAUDI2_QUEUE_ID_NIC_8_0] = GAUDI2_EVENT_NIC4_QM0,
642 [GAUDI2_QUEUE_ID_NIC_8_1] = GAUDI2_EVENT_NIC4_QM0,
643 [GAUDI2_QUEUE_ID_NIC_8_2] = GAUDI2_EVENT_NIC4_QM0,
644 [GAUDI2_QUEUE_ID_NIC_8_3] = GAUDI2_EVENT_NIC4_QM0,
645 [GAUDI2_QUEUE_ID_NIC_9_0] = GAUDI2_EVENT_NIC4_QM1,
646 [GAUDI2_QUEUE_ID_NIC_9_1] = GAUDI2_EVENT_NIC4_QM1,
647 [GAUDI2_QUEUE_ID_NIC_9_2] = GAUDI2_EVENT_NIC4_QM1,
648 [GAUDI2_QUEUE_ID_NIC_9_3] = GAUDI2_EVENT_NIC4_QM1,
649 [GAUDI2_QUEUE_ID_NIC_10_0] = GAUDI2_EVENT_NIC5_QM0,
650 [GAUDI2_QUEUE_ID_NIC_10_1] = GAUDI2_EVENT_NIC5_QM0,
651 [GAUDI2_QUEUE_ID_NIC_10_2] = GAUDI2_EVENT_NIC5_QM0,
652 [GAUDI2_QUEUE_ID_NIC_10_3] = GAUDI2_EVENT_NIC5_QM0,
653 [GAUDI2_QUEUE_ID_NIC_11_0] = GAUDI2_EVENT_NIC5_QM1,
654 [GAUDI2_QUEUE_ID_NIC_11_1] = GAUDI2_EVENT_NIC5_QM1,
655 [GAUDI2_QUEUE_ID_NIC_11_2] = GAUDI2_EVENT_NIC5_QM1,
656 [GAUDI2_QUEUE_ID_NIC_11_3] = GAUDI2_EVENT_NIC5_QM1,
657 [GAUDI2_QUEUE_ID_NIC_12_0] = GAUDI2_EVENT_NIC6_QM0,
658 [GAUDI2_QUEUE_ID_NIC_12_1] = GAUDI2_EVENT_NIC6_QM0,
659 [GAUDI2_QUEUE_ID_NIC_12_2] = GAUDI2_EVENT_NIC6_QM0,
660 [GAUDI2_QUEUE_ID_NIC_12_3] = GAUDI2_EVENT_NIC6_QM0,
661 [GAUDI2_QUEUE_ID_NIC_13_0] = GAUDI2_EVENT_NIC6_QM1,
662 [GAUDI2_QUEUE_ID_NIC_13_1] = GAUDI2_EVENT_NIC6_QM1,
663 [GAUDI2_QUEUE_ID_NIC_13_2] = GAUDI2_EVENT_NIC6_QM1,
664 [GAUDI2_QUEUE_ID_NIC_13_3] = GAUDI2_EVENT_NIC6_QM1,
665 [GAUDI2_QUEUE_ID_NIC_14_0] = GAUDI2_EVENT_NIC7_QM0,
666 [GAUDI2_QUEUE_ID_NIC_14_1] = GAUDI2_EVENT_NIC7_QM0,
667 [GAUDI2_QUEUE_ID_NIC_14_2] = GAUDI2_EVENT_NIC7_QM0,
668 [GAUDI2_QUEUE_ID_NIC_14_3] = GAUDI2_EVENT_NIC7_QM0,
669 [GAUDI2_QUEUE_ID_NIC_15_0] = GAUDI2_EVENT_NIC7_QM1,
670 [GAUDI2_QUEUE_ID_NIC_15_1] = GAUDI2_EVENT_NIC7_QM1,
671 [GAUDI2_QUEUE_ID_NIC_15_2] = GAUDI2_EVENT_NIC7_QM1,
672 [GAUDI2_QUEUE_ID_NIC_15_3] = GAUDI2_EVENT_NIC7_QM1,
673 [GAUDI2_QUEUE_ID_NIC_16_0] = GAUDI2_EVENT_NIC8_QM0,
674 [GAUDI2_QUEUE_ID_NIC_16_1] = GAUDI2_EVENT_NIC8_QM0,
675 [GAUDI2_QUEUE_ID_NIC_16_2] = GAUDI2_EVENT_NIC8_QM0,
676 [GAUDI2_QUEUE_ID_NIC_16_3] = GAUDI2_EVENT_NIC8_QM0,
677 [GAUDI2_QUEUE_ID_NIC_17_0] = GAUDI2_EVENT_NIC8_QM1,
678 [GAUDI2_QUEUE_ID_NIC_17_1] = GAUDI2_EVENT_NIC8_QM1,
679 [GAUDI2_QUEUE_ID_NIC_17_2] = GAUDI2_EVENT_NIC8_QM1,
680 [GAUDI2_QUEUE_ID_NIC_17_3] = GAUDI2_EVENT_NIC8_QM1,
681 [GAUDI2_QUEUE_ID_NIC_18_0] = GAUDI2_EVENT_NIC9_QM0,
682 [GAUDI2_QUEUE_ID_NIC_18_1] = GAUDI2_EVENT_NIC9_QM0,
683 [GAUDI2_QUEUE_ID_NIC_18_2] = GAUDI2_EVENT_NIC9_QM0,
684 [GAUDI2_QUEUE_ID_NIC_18_3] = GAUDI2_EVENT_NIC9_QM0,
685 [GAUDI2_QUEUE_ID_NIC_19_0] = GAUDI2_EVENT_NIC9_QM1,
686 [GAUDI2_QUEUE_ID_NIC_19_1] = GAUDI2_EVENT_NIC9_QM1,
687 [GAUDI2_QUEUE_ID_NIC_19_2] = GAUDI2_EVENT_NIC9_QM1,
688 [GAUDI2_QUEUE_ID_NIC_19_3] = GAUDI2_EVENT_NIC9_QM1,
689 [GAUDI2_QUEUE_ID_NIC_20_0] = GAUDI2_EVENT_NIC10_QM0,
690 [GAUDI2_QUEUE_ID_NIC_20_1] = GAUDI2_EVENT_NIC10_QM0,
691 [GAUDI2_QUEUE_ID_NIC_20_2] = GAUDI2_EVENT_NIC10_QM0,
692 [GAUDI2_QUEUE_ID_NIC_20_3] = GAUDI2_EVENT_NIC10_QM0,
693 [GAUDI2_QUEUE_ID_NIC_21_0] = GAUDI2_EVENT_NIC10_QM1,
694 [GAUDI2_QUEUE_ID_NIC_21_1] = GAUDI2_EVENT_NIC10_QM1,
695 [GAUDI2_QUEUE_ID_NIC_21_2] = GAUDI2_EVENT_NIC10_QM1,
696 [GAUDI2_QUEUE_ID_NIC_21_3] = GAUDI2_EVENT_NIC10_QM1,
697 [GAUDI2_QUEUE_ID_NIC_22_0] = GAUDI2_EVENT_NIC11_QM0,
698 [GAUDI2_QUEUE_ID_NIC_22_1] = GAUDI2_EVENT_NIC11_QM0,
699 [GAUDI2_QUEUE_ID_NIC_22_2] = GAUDI2_EVENT_NIC11_QM0,
700 [GAUDI2_QUEUE_ID_NIC_22_3] = GAUDI2_EVENT_NIC11_QM0,
701 [GAUDI2_QUEUE_ID_NIC_23_0] = GAUDI2_EVENT_NIC11_QM1,
702 [GAUDI2_QUEUE_ID_NIC_23_1] = GAUDI2_EVENT_NIC11_QM1,
703 [GAUDI2_QUEUE_ID_NIC_23_2] = GAUDI2_EVENT_NIC11_QM1,
704 [GAUDI2_QUEUE_ID_NIC_23_3] = GAUDI2_EVENT_NIC11_QM1,
705 [GAUDI2_QUEUE_ID_ROT_0_0] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
706 [GAUDI2_QUEUE_ID_ROT_0_1] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
707 [GAUDI2_QUEUE_ID_ROT_0_2] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
708 [GAUDI2_QUEUE_ID_ROT_0_3] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
709 [GAUDI2_QUEUE_ID_ROT_1_0] = GAUDI2_EVENT_ROTATOR1_ROT1_QM,
710 [GAUDI2_QUEUE_ID_ROT_1_1] = GAUDI2_EVENT_ROTATOR1_ROT1_QM,
711 [GAUDI2_QUEUE_ID_ROT_1_2] = GAUDI2_EVENT_ROTATOR1_ROT1_QM,
712 [GAUDI2_QUEUE_ID_ROT_1_3] = GAUDI2_EVENT_ROTATOR1_ROT1_QM
713 };
714
715 static const int gaudi2_dma_core_async_event_id[] = {
716 [DMA_CORE_ID_EDMA0] = GAUDI2_EVENT_HDMA0_CORE,
717 [DMA_CORE_ID_EDMA1] = GAUDI2_EVENT_HDMA1_CORE,
718 [DMA_CORE_ID_EDMA2] = GAUDI2_EVENT_HDMA2_CORE,
719 [DMA_CORE_ID_EDMA3] = GAUDI2_EVENT_HDMA3_CORE,
720 [DMA_CORE_ID_EDMA4] = GAUDI2_EVENT_HDMA4_CORE,
721 [DMA_CORE_ID_EDMA5] = GAUDI2_EVENT_HDMA5_CORE,
722 [DMA_CORE_ID_EDMA6] = GAUDI2_EVENT_HDMA6_CORE,
723 [DMA_CORE_ID_EDMA7] = GAUDI2_EVENT_HDMA7_CORE,
724 [DMA_CORE_ID_PDMA0] = GAUDI2_EVENT_PDMA0_CORE,
725 [DMA_CORE_ID_PDMA1] = GAUDI2_EVENT_PDMA1_CORE,
726 [DMA_CORE_ID_KDMA] = GAUDI2_EVENT_KDMA0_CORE,
727 };
728
729 static const char * const gaudi2_qm_sei_error_cause[GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE] = {
730 "qman sei intr",
731 "arc sei intr"
732 };
733
734 static const char * const gaudi2_cpu_sei_error_cause[GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE] = {
735 "AXI_TERMINATOR WR",
736 "AXI_TERMINATOR RD",
737 "AXI SPLIT SEI Status"
738 };
739
740 static const char * const gaudi2_arc_sei_error_cause[GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE] = {
741 "cbu_bresp_sei_intr_cause",
742 "cbu_rresp_sei_intr_cause",
743 "lbu_bresp_sei_intr_cause",
744 "lbu_rresp_sei_intr_cause",
745 "cbu_axi_split_intr_cause",
746 "lbu_axi_split_intr_cause",
747 "arc_ip_excptn_sei_intr_cause",
748 "dmi_bresp_sei_intr_cause",
749 "aux2apb_err_sei_intr_cause",
750 "cfg_lbw_wr_terminated_intr_cause",
751 "cfg_lbw_rd_terminated_intr_cause",
752 "cfg_dccm_wr_terminated_intr_cause",
753 "cfg_dccm_rd_terminated_intr_cause",
754 "cfg_hbw_rd_terminated_intr_cause"
755 };
756
757 static const char * const gaudi2_dec_error_cause[GAUDI2_NUM_OF_DEC_ERR_CAUSE] = {
758 "msix_vcd_hbw_sei",
759 "msix_l2c_hbw_sei",
760 "msix_nrm_hbw_sei",
761 "msix_abnrm_hbw_sei",
762 "msix_vcd_lbw_sei",
763 "msix_l2c_lbw_sei",
764 "msix_nrm_lbw_sei",
765 "msix_abnrm_lbw_sei",
766 "apb_vcd_lbw_sei",
767 "apb_l2c_lbw_sei",
768 "apb_nrm_lbw_sei",
769 "apb_abnrm_lbw_sei",
770 "dec_sei",
771 "dec_apb_sei",
772 "trc_apb_sei",
773 "lbw_mstr_if_sei",
774 "axi_split_bresp_err_sei",
775 "hbw_axi_wr_viol_sei",
776 "hbw_axi_rd_viol_sei",
777 "lbw_axi_wr_viol_sei",
778 "lbw_axi_rd_viol_sei",
779 "vcd_spi",
780 "l2c_spi",
781 "nrm_spi",
782 "abnrm_spi",
783 };
784
785 static const char * const gaudi2_qman_error_cause[GAUDI2_NUM_OF_QM_ERR_CAUSE] = {
786 "PQ AXI HBW error",
787 "CQ AXI HBW error",
788 "CP AXI HBW error",
789 "CP error due to undefined OPCODE",
790 "CP encountered STOP OPCODE",
791 "CP AXI LBW error",
792 "CP WRREG32 or WRBULK returned error",
793 "N/A",
794 "FENCE 0 inc over max value and clipped",
795 "FENCE 1 inc over max value and clipped",
796 "FENCE 2 inc over max value and clipped",
797 "FENCE 3 inc over max value and clipped",
798 "FENCE 0 dec under min value and clipped",
799 "FENCE 1 dec under min value and clipped",
800 "FENCE 2 dec under min value and clipped",
801 "FENCE 3 dec under min value and clipped",
802 "CPDMA Up overflow",
803 "PQC L2H error"
804 };
805
806 static const char * const gaudi2_lower_qman_error_cause[GAUDI2_NUM_OF_LOWER_QM_ERR_CAUSE] = {
807 "RSVD0",
808 "CQ AXI HBW error",
809 "CP AXI HBW error",
810 "CP error due to undefined OPCODE",
811 "CP encountered STOP OPCODE",
812 "CP AXI LBW error",
813 "CP WRREG32 or WRBULK returned error",
814 "N/A",
815 "FENCE 0 inc over max value and clipped",
816 "FENCE 1 inc over max value and clipped",
817 "FENCE 2 inc over max value and clipped",
818 "FENCE 3 inc over max value and clipped",
819 "FENCE 0 dec under min value and clipped",
820 "FENCE 1 dec under min value and clipped",
821 "FENCE 2 dec under min value and clipped",
822 "FENCE 3 dec under min value and clipped",
823 "CPDMA Up overflow",
824 "RSVD17",
825 "CQ_WR_IFIFO_CI_ERR",
826 "CQ_WR_CTL_CI_ERR",
827 "ARC_CQF_RD_ERR",
828 "ARC_CQ_WR_IFIFO_CI_ERR",
829 "ARC_CQ_WR_CTL_CI_ERR",
830 "ARC_AXI_ERR",
831 "CP_SWITCH_WDT_ERR"
832 };
833
834 static const char * const gaudi2_qman_arb_error_cause[GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE] = {
835 "Choice push while full error",
836 "Choice Q watchdog error",
837 "MSG AXI LBW returned with error"
838 };
839
840 static const char * const guadi2_rot_error_cause[GAUDI2_NUM_OF_ROT_ERR_CAUSE] = {
841 "qm_axi_err",
842 "qm_trace_fence_events",
843 "qm_sw_err",
844 "qm_cp_sw_stop",
845 "lbw_mstr_rresp_err",
846 "lbw_mstr_bresp_err",
847 "lbw_msg_slverr",
848 "hbw_msg_slverr",
849 "wbc_slverr",
850 "hbw_mstr_rresp_err",
851 "hbw_mstr_bresp_err",
852 "sb_resp_intr",
853 "mrsb_resp_intr",
854 "core_dw_status_0",
855 "core_dw_status_1",
856 "core_dw_status_2",
857 "core_dw_status_3",
858 "core_dw_status_4",
859 "core_dw_status_5",
860 "core_dw_status_6",
861 "core_dw_status_7",
862 "async_arc2cpu_sei_intr",
863 };
864
865 static const char * const gaudi2_tpc_interrupts_cause[GAUDI2_NUM_OF_TPC_INTR_CAUSE] = {
866 "tpc_address_exceed_slm",
867 "tpc_div_by_0",
868 "tpc_spu_mac_overflow",
869 "tpc_spu_addsub_overflow",
870 "tpc_spu_abs_overflow",
871 "tpc_spu_fma_fp_dst_nan",
872 "tpc_spu_fma_fp_dst_inf",
873 "tpc_spu_convert_fp_dst_nan",
874 "tpc_spu_convert_fp_dst_inf",
875 "tpc_spu_fp_dst_denorm",
876 "tpc_vpu_mac_overflow",
877 "tpc_vpu_addsub_overflow",
878 "tpc_vpu_abs_overflow",
879 "tpc_vpu_convert_fp_dst_nan",
880 "tpc_vpu_convert_fp_dst_inf",
881 "tpc_vpu_fma_fp_dst_nan",
882 "tpc_vpu_fma_fp_dst_inf",
883 "tpc_vpu_fp_dst_denorm",
884 "tpc_assertions",
885 "tpc_illegal_instruction",
886 "tpc_pc_wrap_around",
887 "tpc_qm_sw_err",
888 "tpc_hbw_rresp_err",
889 "tpc_hbw_bresp_err",
890 "tpc_lbw_rresp_err",
891 "tpc_lbw_bresp_err",
892 "st_unlock_already_locked",
893 "invalid_lock_access",
894 "LD_L protection violation",
895 "ST_L protection violation",
896 "D$ L0CS mismatch",
897 };
898
899 static const char * const guadi2_mme_error_cause[GAUDI2_NUM_OF_MME_ERR_CAUSE] = {
900 "agu_resp_intr",
901 "qman_axi_err",
902 "wap sei (wbc axi err)",
903 "arc sei",
904 "cfg access error",
905 "qm_sw_err",
906 "sbte_dbg_intr_0",
907 "sbte_dbg_intr_1",
908 "sbte_dbg_intr_2",
909 "sbte_dbg_intr_3",
910 "sbte_dbg_intr_4",
911 "sbte_prtn_intr_0",
912 "sbte_prtn_intr_1",
913 "sbte_prtn_intr_2",
914 "sbte_prtn_intr_3",
915 "sbte_prtn_intr_4",
916 };
917
918 static const char * const guadi2_mme_wap_error_cause[GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE] = {
919 "WBC ERR RESP_0",
920 "WBC ERR RESP_1",
921 "AP SOURCE POS INF",
922 "AP SOURCE NEG INF",
923 "AP SOURCE NAN",
924 "AP RESULT POS INF",
925 "AP RESULT NEG INF",
926 };
927
928 static const char * const gaudi2_dma_core_interrupts_cause[GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE] = {
929 "HBW Read returned with error RRESP",
930 "HBW write returned with error BRESP",
931 "LBW write returned with error BRESP",
932 "descriptor_fifo_overflow",
933 "KDMA SB LBW Read returned with error",
934 "KDMA WBC LBW Write returned with error",
935 "TRANSPOSE ENGINE DESC FIFO OVERFLOW",
936 "WRONG CFG FOR COMMIT IN LIN DMA"
937 };
938
939 static const char * const gaudi2_kdma_core_interrupts_cause[GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE] = {
940 "HBW/LBW Read returned with error RRESP",
941 "HBW/LBW write returned with error BRESP",
942 "LBW write returned with error BRESP",
943 "descriptor_fifo_overflow",
944 "KDMA SB LBW Read returned with error",
945 "KDMA WBC LBW Write returned with error",
946 "TRANSPOSE ENGINE DESC FIFO OVERFLOW",
947 "WRONG CFG FOR COMMIT IN LIN DMA"
948 };
949
950 struct gaudi2_sm_sei_cause_data {
951 const char *cause_name;
952 const char *log_name;
953 };
954
955 static const struct gaudi2_sm_sei_cause_data
956 gaudi2_sm_sei_cause[GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE] = {
957 {"calculated SO value overflow/underflow", "SOB ID"},
958 {"payload address of monitor is not aligned to 4B", "monitor addr"},
959 {"armed monitor write got BRESP (SLVERR or DECERR)", "AXI id"},
960 };
961
962 static const char * const
963 gaudi2_pmmu_fatal_interrupts_cause[GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE] = {
964 "LATENCY_RD_OUT_FIFO_OVERRUN",
965 "LATENCY_WR_OUT_FIFO_OVERRUN",
966 };
967
968 static const char * const
969 gaudi2_hif_fatal_interrupts_cause[GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE] = {
970 "LATENCY_RD_OUT_FIFO_OVERRUN",
971 "LATENCY_WR_OUT_FIFO_OVERRUN",
972 };
973
974 static const char * const
975 gaudi2_psoc_axi_drain_interrupts_cause[GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE] = {
976 "AXI drain HBW",
977 "AXI drain LBW",
978 };
979
980 static const char * const
981 gaudi2_pcie_addr_dec_error_cause[GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE] = {
982 "HBW error response",
983 "LBW error response",
984 "TLP is blocked by RR"
985 };
986
987 static const int gaudi2_queue_id_to_engine_id[] = {
988 [GAUDI2_QUEUE_ID_PDMA_0_0...GAUDI2_QUEUE_ID_PDMA_0_3] = GAUDI2_ENGINE_ID_PDMA_0,
989 [GAUDI2_QUEUE_ID_PDMA_1_0...GAUDI2_QUEUE_ID_PDMA_1_3] = GAUDI2_ENGINE_ID_PDMA_1,
990 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] =
991 GAUDI2_DCORE0_ENGINE_ID_EDMA_0,
992 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] =
993 GAUDI2_DCORE0_ENGINE_ID_EDMA_1,
994 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] =
995 GAUDI2_DCORE1_ENGINE_ID_EDMA_0,
996 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] =
997 GAUDI2_DCORE1_ENGINE_ID_EDMA_1,
998 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] =
999 GAUDI2_DCORE2_ENGINE_ID_EDMA_0,
1000 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] =
1001 GAUDI2_DCORE2_ENGINE_ID_EDMA_1,
1002 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] =
1003 GAUDI2_DCORE3_ENGINE_ID_EDMA_0,
1004 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] =
1005 GAUDI2_DCORE3_ENGINE_ID_EDMA_1,
1006 [GAUDI2_QUEUE_ID_DCORE0_MME_0_0...GAUDI2_QUEUE_ID_DCORE0_MME_0_3] =
1007 GAUDI2_DCORE0_ENGINE_ID_MME,
1008 [GAUDI2_QUEUE_ID_DCORE1_MME_0_0...GAUDI2_QUEUE_ID_DCORE1_MME_0_3] =
1009 GAUDI2_DCORE1_ENGINE_ID_MME,
1010 [GAUDI2_QUEUE_ID_DCORE2_MME_0_0...GAUDI2_QUEUE_ID_DCORE2_MME_0_3] =
1011 GAUDI2_DCORE2_ENGINE_ID_MME,
1012 [GAUDI2_QUEUE_ID_DCORE3_MME_0_0...GAUDI2_QUEUE_ID_DCORE3_MME_0_3] =
1013 GAUDI2_DCORE3_ENGINE_ID_MME,
1014 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_0...GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] =
1015 GAUDI2_DCORE0_ENGINE_ID_TPC_0,
1016 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_0...GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] =
1017 GAUDI2_DCORE0_ENGINE_ID_TPC_1,
1018 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_0...GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] =
1019 GAUDI2_DCORE0_ENGINE_ID_TPC_2,
1020 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_0...GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] =
1021 GAUDI2_DCORE0_ENGINE_ID_TPC_3,
1022 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_0...GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] =
1023 GAUDI2_DCORE0_ENGINE_ID_TPC_4,
1024 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_0...GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] =
1025 GAUDI2_DCORE0_ENGINE_ID_TPC_5,
1026 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_0...GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] =
1027 GAUDI2_DCORE0_ENGINE_ID_TPC_6,
1028 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_0...GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] =
1029 GAUDI2_DCORE1_ENGINE_ID_TPC_0,
1030 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_0...GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] =
1031 GAUDI2_DCORE1_ENGINE_ID_TPC_1,
1032 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_0...GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] =
1033 GAUDI2_DCORE1_ENGINE_ID_TPC_2,
1034 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_0...GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] =
1035 GAUDI2_DCORE1_ENGINE_ID_TPC_3,
1036 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_0...GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] =
1037 GAUDI2_DCORE1_ENGINE_ID_TPC_4,
1038 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_0...GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] =
1039 GAUDI2_DCORE1_ENGINE_ID_TPC_5,
1040 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_0...GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] =
1041 GAUDI2_DCORE2_ENGINE_ID_TPC_0,
1042 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_0...GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] =
1043 GAUDI2_DCORE2_ENGINE_ID_TPC_1,
1044 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_0...GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] =
1045 GAUDI2_DCORE2_ENGINE_ID_TPC_2,
1046 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_0...GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] =
1047 GAUDI2_DCORE2_ENGINE_ID_TPC_3,
1048 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_0...GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] =
1049 GAUDI2_DCORE2_ENGINE_ID_TPC_4,
1050 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_0...GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] =
1051 GAUDI2_DCORE2_ENGINE_ID_TPC_5,
1052 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_0...GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] =
1053 GAUDI2_DCORE3_ENGINE_ID_TPC_0,
1054 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_0...GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] =
1055 GAUDI2_DCORE3_ENGINE_ID_TPC_1,
1056 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_0...GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] =
1057 GAUDI2_DCORE3_ENGINE_ID_TPC_2,
1058 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_0...GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] =
1059 GAUDI2_DCORE3_ENGINE_ID_TPC_3,
1060 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_0...GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] =
1061 GAUDI2_DCORE3_ENGINE_ID_TPC_4,
1062 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_0...GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] =
1063 GAUDI2_DCORE3_ENGINE_ID_TPC_5,
1064 [GAUDI2_QUEUE_ID_NIC_0_0...GAUDI2_QUEUE_ID_NIC_0_3] = GAUDI2_ENGINE_ID_NIC0_0,
1065 [GAUDI2_QUEUE_ID_NIC_1_0...GAUDI2_QUEUE_ID_NIC_1_3] = GAUDI2_ENGINE_ID_NIC0_1,
1066 [GAUDI2_QUEUE_ID_NIC_2_0...GAUDI2_QUEUE_ID_NIC_2_3] = GAUDI2_ENGINE_ID_NIC1_0,
1067 [GAUDI2_QUEUE_ID_NIC_3_0...GAUDI2_QUEUE_ID_NIC_3_3] = GAUDI2_ENGINE_ID_NIC1_1,
1068 [GAUDI2_QUEUE_ID_NIC_4_0...GAUDI2_QUEUE_ID_NIC_4_3] = GAUDI2_ENGINE_ID_NIC2_0,
1069 [GAUDI2_QUEUE_ID_NIC_5_0...GAUDI2_QUEUE_ID_NIC_5_3] = GAUDI2_ENGINE_ID_NIC2_1,
1070 [GAUDI2_QUEUE_ID_NIC_6_0...GAUDI2_QUEUE_ID_NIC_6_3] = GAUDI2_ENGINE_ID_NIC3_0,
1071 [GAUDI2_QUEUE_ID_NIC_7_0...GAUDI2_QUEUE_ID_NIC_7_3] = GAUDI2_ENGINE_ID_NIC3_1,
1072 [GAUDI2_QUEUE_ID_NIC_8_0...GAUDI2_QUEUE_ID_NIC_8_3] = GAUDI2_ENGINE_ID_NIC4_0,
1073 [GAUDI2_QUEUE_ID_NIC_9_0...GAUDI2_QUEUE_ID_NIC_9_3] = GAUDI2_ENGINE_ID_NIC4_1,
1074 [GAUDI2_QUEUE_ID_NIC_10_0...GAUDI2_QUEUE_ID_NIC_10_3] = GAUDI2_ENGINE_ID_NIC5_0,
1075 [GAUDI2_QUEUE_ID_NIC_11_0...GAUDI2_QUEUE_ID_NIC_11_3] = GAUDI2_ENGINE_ID_NIC5_1,
1076 [GAUDI2_QUEUE_ID_NIC_12_0...GAUDI2_QUEUE_ID_NIC_12_3] = GAUDI2_ENGINE_ID_NIC6_0,
1077 [GAUDI2_QUEUE_ID_NIC_13_0...GAUDI2_QUEUE_ID_NIC_13_3] = GAUDI2_ENGINE_ID_NIC6_1,
1078 [GAUDI2_QUEUE_ID_NIC_14_0...GAUDI2_QUEUE_ID_NIC_14_3] = GAUDI2_ENGINE_ID_NIC7_0,
1079 [GAUDI2_QUEUE_ID_NIC_15_0...GAUDI2_QUEUE_ID_NIC_15_3] = GAUDI2_ENGINE_ID_NIC7_1,
1080 [GAUDI2_QUEUE_ID_NIC_16_0...GAUDI2_QUEUE_ID_NIC_16_3] = GAUDI2_ENGINE_ID_NIC8_0,
1081 [GAUDI2_QUEUE_ID_NIC_17_0...GAUDI2_QUEUE_ID_NIC_17_3] = GAUDI2_ENGINE_ID_NIC8_1,
1082 [GAUDI2_QUEUE_ID_NIC_18_0...GAUDI2_QUEUE_ID_NIC_18_3] = GAUDI2_ENGINE_ID_NIC9_0,
1083 [GAUDI2_QUEUE_ID_NIC_19_0...GAUDI2_QUEUE_ID_NIC_19_3] = GAUDI2_ENGINE_ID_NIC9_1,
1084 [GAUDI2_QUEUE_ID_NIC_20_0...GAUDI2_QUEUE_ID_NIC_20_3] = GAUDI2_ENGINE_ID_NIC10_0,
1085 [GAUDI2_QUEUE_ID_NIC_21_0...GAUDI2_QUEUE_ID_NIC_21_3] = GAUDI2_ENGINE_ID_NIC10_1,
1086 [GAUDI2_QUEUE_ID_NIC_22_0...GAUDI2_QUEUE_ID_NIC_22_3] = GAUDI2_ENGINE_ID_NIC11_0,
1087 [GAUDI2_QUEUE_ID_NIC_23_0...GAUDI2_QUEUE_ID_NIC_23_3] = GAUDI2_ENGINE_ID_NIC11_1,
1088 [GAUDI2_QUEUE_ID_ROT_0_0...GAUDI2_QUEUE_ID_ROT_0_3] = GAUDI2_ENGINE_ID_ROT_0,
1089 [GAUDI2_QUEUE_ID_ROT_1_0...GAUDI2_QUEUE_ID_ROT_1_3] = GAUDI2_ENGINE_ID_ROT_1,
1090 };
1091
1092 const u32 gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_SIZE] = {
1093 [GAUDI2_QUEUE_ID_PDMA_0_0] = mmPDMA0_QM_BASE,
1094 [GAUDI2_QUEUE_ID_PDMA_0_1] = mmPDMA0_QM_BASE,
1095 [GAUDI2_QUEUE_ID_PDMA_0_2] = mmPDMA0_QM_BASE,
1096 [GAUDI2_QUEUE_ID_PDMA_0_3] = mmPDMA0_QM_BASE,
1097 [GAUDI2_QUEUE_ID_PDMA_1_0] = mmPDMA1_QM_BASE,
1098 [GAUDI2_QUEUE_ID_PDMA_1_1] = mmPDMA1_QM_BASE,
1099 [GAUDI2_QUEUE_ID_PDMA_1_2] = mmPDMA1_QM_BASE,
1100 [GAUDI2_QUEUE_ID_PDMA_1_3] = mmPDMA1_QM_BASE,
1101 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = mmDCORE0_EDMA0_QM_BASE,
1102 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = mmDCORE0_EDMA0_QM_BASE,
1103 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = mmDCORE0_EDMA0_QM_BASE,
1104 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = mmDCORE0_EDMA0_QM_BASE,
1105 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = mmDCORE0_EDMA1_QM_BASE,
1106 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = mmDCORE0_EDMA1_QM_BASE,
1107 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = mmDCORE0_EDMA1_QM_BASE,
1108 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = mmDCORE0_EDMA1_QM_BASE,
1109 [GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = mmDCORE0_MME_QM_BASE,
1110 [GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = mmDCORE0_MME_QM_BASE,
1111 [GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = mmDCORE0_MME_QM_BASE,
1112 [GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = mmDCORE0_MME_QM_BASE,
1113 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = mmDCORE0_TPC0_QM_BASE,
1114 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = mmDCORE0_TPC0_QM_BASE,
1115 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = mmDCORE0_TPC0_QM_BASE,
1116 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = mmDCORE0_TPC0_QM_BASE,
1117 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = mmDCORE0_TPC1_QM_BASE,
1118 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = mmDCORE0_TPC1_QM_BASE,
1119 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = mmDCORE0_TPC1_QM_BASE,
1120 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = mmDCORE0_TPC1_QM_BASE,
1121 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = mmDCORE0_TPC2_QM_BASE,
1122 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = mmDCORE0_TPC2_QM_BASE,
1123 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = mmDCORE0_TPC2_QM_BASE,
1124 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = mmDCORE0_TPC2_QM_BASE,
1125 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = mmDCORE0_TPC3_QM_BASE,
1126 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = mmDCORE0_TPC3_QM_BASE,
1127 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = mmDCORE0_TPC3_QM_BASE,
1128 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = mmDCORE0_TPC3_QM_BASE,
1129 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = mmDCORE0_TPC4_QM_BASE,
1130 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = mmDCORE0_TPC4_QM_BASE,
1131 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = mmDCORE0_TPC4_QM_BASE,
1132 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = mmDCORE0_TPC4_QM_BASE,
1133 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = mmDCORE0_TPC5_QM_BASE,
1134 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = mmDCORE0_TPC5_QM_BASE,
1135 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = mmDCORE0_TPC5_QM_BASE,
1136 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = mmDCORE0_TPC5_QM_BASE,
1137 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = mmDCORE0_TPC6_QM_BASE,
1138 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = mmDCORE0_TPC6_QM_BASE,
1139 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = mmDCORE0_TPC6_QM_BASE,
1140 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = mmDCORE0_TPC6_QM_BASE,
1141 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = mmDCORE1_EDMA0_QM_BASE,
1142 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = mmDCORE1_EDMA0_QM_BASE,
1143 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = mmDCORE1_EDMA0_QM_BASE,
1144 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = mmDCORE1_EDMA0_QM_BASE,
1145 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = mmDCORE1_EDMA1_QM_BASE,
1146 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = mmDCORE1_EDMA1_QM_BASE,
1147 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = mmDCORE1_EDMA1_QM_BASE,
1148 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = mmDCORE1_EDMA1_QM_BASE,
1149 [GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = mmDCORE1_MME_QM_BASE,
1150 [GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = mmDCORE1_MME_QM_BASE,
1151 [GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = mmDCORE1_MME_QM_BASE,
1152 [GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = mmDCORE1_MME_QM_BASE,
1153 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = mmDCORE1_TPC0_QM_BASE,
1154 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = mmDCORE1_TPC0_QM_BASE,
1155 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = mmDCORE1_TPC0_QM_BASE,
1156 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = mmDCORE1_TPC0_QM_BASE,
1157 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = mmDCORE1_TPC1_QM_BASE,
1158 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = mmDCORE1_TPC1_QM_BASE,
1159 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = mmDCORE1_TPC1_QM_BASE,
1160 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = mmDCORE1_TPC1_QM_BASE,
1161 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = mmDCORE1_TPC2_QM_BASE,
1162 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = mmDCORE1_TPC2_QM_BASE,
1163 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = mmDCORE1_TPC2_QM_BASE,
1164 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = mmDCORE1_TPC2_QM_BASE,
1165 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = mmDCORE1_TPC3_QM_BASE,
1166 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = mmDCORE1_TPC3_QM_BASE,
1167 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = mmDCORE1_TPC3_QM_BASE,
1168 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = mmDCORE1_TPC3_QM_BASE,
1169 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = mmDCORE1_TPC4_QM_BASE,
1170 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = mmDCORE1_TPC4_QM_BASE,
1171 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = mmDCORE1_TPC4_QM_BASE,
1172 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = mmDCORE1_TPC4_QM_BASE,
1173 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = mmDCORE1_TPC5_QM_BASE,
1174 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = mmDCORE1_TPC5_QM_BASE,
1175 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = mmDCORE1_TPC5_QM_BASE,
1176 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = mmDCORE1_TPC5_QM_BASE,
1177 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = mmDCORE2_EDMA0_QM_BASE,
1178 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = mmDCORE2_EDMA0_QM_BASE,
1179 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = mmDCORE2_EDMA0_QM_BASE,
1180 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = mmDCORE2_EDMA0_QM_BASE,
1181 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = mmDCORE2_EDMA1_QM_BASE,
1182 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = mmDCORE2_EDMA1_QM_BASE,
1183 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = mmDCORE2_EDMA1_QM_BASE,
1184 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = mmDCORE2_EDMA1_QM_BASE,
1185 [GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = mmDCORE2_MME_QM_BASE,
1186 [GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = mmDCORE2_MME_QM_BASE,
1187 [GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = mmDCORE2_MME_QM_BASE,
1188 [GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = mmDCORE2_MME_QM_BASE,
1189 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = mmDCORE2_TPC0_QM_BASE,
1190 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = mmDCORE2_TPC0_QM_BASE,
1191 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = mmDCORE2_TPC0_QM_BASE,
1192 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = mmDCORE2_TPC0_QM_BASE,
1193 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = mmDCORE2_TPC1_QM_BASE,
1194 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = mmDCORE2_TPC1_QM_BASE,
1195 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = mmDCORE2_TPC1_QM_BASE,
1196 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = mmDCORE2_TPC1_QM_BASE,
1197 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = mmDCORE2_TPC2_QM_BASE,
1198 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = mmDCORE2_TPC2_QM_BASE,
1199 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = mmDCORE2_TPC2_QM_BASE,
1200 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = mmDCORE2_TPC2_QM_BASE,
1201 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = mmDCORE2_TPC3_QM_BASE,
1202 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = mmDCORE2_TPC3_QM_BASE,
1203 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = mmDCORE2_TPC3_QM_BASE,
1204 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = mmDCORE2_TPC3_QM_BASE,
1205 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = mmDCORE2_TPC4_QM_BASE,
1206 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = mmDCORE2_TPC4_QM_BASE,
1207 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = mmDCORE2_TPC4_QM_BASE,
1208 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = mmDCORE2_TPC4_QM_BASE,
1209 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = mmDCORE2_TPC5_QM_BASE,
1210 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = mmDCORE2_TPC5_QM_BASE,
1211 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = mmDCORE2_TPC5_QM_BASE,
1212 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = mmDCORE2_TPC5_QM_BASE,
1213 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = mmDCORE3_EDMA0_QM_BASE,
1214 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = mmDCORE3_EDMA0_QM_BASE,
1215 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = mmDCORE3_EDMA0_QM_BASE,
1216 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = mmDCORE3_EDMA0_QM_BASE,
1217 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = mmDCORE3_EDMA1_QM_BASE,
1218 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = mmDCORE3_EDMA1_QM_BASE,
1219 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = mmDCORE3_EDMA1_QM_BASE,
1220 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = mmDCORE3_EDMA1_QM_BASE,
1221 [GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = mmDCORE3_MME_QM_BASE,
1222 [GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = mmDCORE3_MME_QM_BASE,
1223 [GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = mmDCORE3_MME_QM_BASE,
1224 [GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = mmDCORE3_MME_QM_BASE,
1225 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = mmDCORE3_TPC0_QM_BASE,
1226 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = mmDCORE3_TPC0_QM_BASE,
1227 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = mmDCORE3_TPC0_QM_BASE,
1228 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = mmDCORE3_TPC0_QM_BASE,
1229 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = mmDCORE3_TPC1_QM_BASE,
1230 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = mmDCORE3_TPC1_QM_BASE,
1231 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = mmDCORE3_TPC1_QM_BASE,
1232 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = mmDCORE3_TPC1_QM_BASE,
1233 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = mmDCORE3_TPC2_QM_BASE,
1234 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = mmDCORE3_TPC2_QM_BASE,
1235 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = mmDCORE3_TPC2_QM_BASE,
1236 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = mmDCORE3_TPC2_QM_BASE,
1237 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = mmDCORE3_TPC3_QM_BASE,
1238 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = mmDCORE3_TPC3_QM_BASE,
1239 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = mmDCORE3_TPC3_QM_BASE,
1240 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = mmDCORE3_TPC3_QM_BASE,
1241 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = mmDCORE3_TPC4_QM_BASE,
1242 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = mmDCORE3_TPC4_QM_BASE,
1243 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = mmDCORE3_TPC4_QM_BASE,
1244 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = mmDCORE3_TPC4_QM_BASE,
1245 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = mmDCORE3_TPC5_QM_BASE,
1246 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = mmDCORE3_TPC5_QM_BASE,
1247 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = mmDCORE3_TPC5_QM_BASE,
1248 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = mmDCORE3_TPC5_QM_BASE,
1249 [GAUDI2_QUEUE_ID_NIC_0_0] = mmNIC0_QM0_BASE,
1250 [GAUDI2_QUEUE_ID_NIC_0_1] = mmNIC0_QM0_BASE,
1251 [GAUDI2_QUEUE_ID_NIC_0_2] = mmNIC0_QM0_BASE,
1252 [GAUDI2_QUEUE_ID_NIC_0_3] = mmNIC0_QM0_BASE,
1253 [GAUDI2_QUEUE_ID_NIC_1_0] = mmNIC0_QM1_BASE,
1254 [GAUDI2_QUEUE_ID_NIC_1_1] = mmNIC0_QM1_BASE,
1255 [GAUDI2_QUEUE_ID_NIC_1_2] = mmNIC0_QM1_BASE,
1256 [GAUDI2_QUEUE_ID_NIC_1_3] = mmNIC0_QM1_BASE,
1257 [GAUDI2_QUEUE_ID_NIC_2_0] = mmNIC1_QM0_BASE,
1258 [GAUDI2_QUEUE_ID_NIC_2_1] = mmNIC1_QM0_BASE,
1259 [GAUDI2_QUEUE_ID_NIC_2_2] = mmNIC1_QM0_BASE,
1260 [GAUDI2_QUEUE_ID_NIC_2_3] = mmNIC1_QM0_BASE,
1261 [GAUDI2_QUEUE_ID_NIC_3_0] = mmNIC1_QM1_BASE,
1262 [GAUDI2_QUEUE_ID_NIC_3_1] = mmNIC1_QM1_BASE,
1263 [GAUDI2_QUEUE_ID_NIC_3_2] = mmNIC1_QM1_BASE,
1264 [GAUDI2_QUEUE_ID_NIC_3_3] = mmNIC1_QM1_BASE,
1265 [GAUDI2_QUEUE_ID_NIC_4_0] = mmNIC2_QM0_BASE,
1266 [GAUDI2_QUEUE_ID_NIC_4_1] = mmNIC2_QM0_BASE,
1267 [GAUDI2_QUEUE_ID_NIC_4_2] = mmNIC2_QM0_BASE,
1268 [GAUDI2_QUEUE_ID_NIC_4_3] = mmNIC2_QM0_BASE,
1269 [GAUDI2_QUEUE_ID_NIC_5_0] = mmNIC2_QM1_BASE,
1270 [GAUDI2_QUEUE_ID_NIC_5_1] = mmNIC2_QM1_BASE,
1271 [GAUDI2_QUEUE_ID_NIC_5_2] = mmNIC2_QM1_BASE,
1272 [GAUDI2_QUEUE_ID_NIC_5_3] = mmNIC2_QM1_BASE,
1273 [GAUDI2_QUEUE_ID_NIC_6_0] = mmNIC3_QM0_BASE,
1274 [GAUDI2_QUEUE_ID_NIC_6_1] = mmNIC3_QM0_BASE,
1275 [GAUDI2_QUEUE_ID_NIC_6_2] = mmNIC3_QM0_BASE,
1276 [GAUDI2_QUEUE_ID_NIC_6_3] = mmNIC3_QM0_BASE,
1277 [GAUDI2_QUEUE_ID_NIC_7_0] = mmNIC3_QM1_BASE,
1278 [GAUDI2_QUEUE_ID_NIC_7_1] = mmNIC3_QM1_BASE,
1279 [GAUDI2_QUEUE_ID_NIC_7_2] = mmNIC3_QM1_BASE,
1280 [GAUDI2_QUEUE_ID_NIC_7_3] = mmNIC3_QM1_BASE,
1281 [GAUDI2_QUEUE_ID_NIC_8_0] = mmNIC4_QM0_BASE,
1282 [GAUDI2_QUEUE_ID_NIC_8_1] = mmNIC4_QM0_BASE,
1283 [GAUDI2_QUEUE_ID_NIC_8_2] = mmNIC4_QM0_BASE,
1284 [GAUDI2_QUEUE_ID_NIC_8_3] = mmNIC4_QM0_BASE,
1285 [GAUDI2_QUEUE_ID_NIC_9_0] = mmNIC4_QM1_BASE,
1286 [GAUDI2_QUEUE_ID_NIC_9_1] = mmNIC4_QM1_BASE,
1287 [GAUDI2_QUEUE_ID_NIC_9_2] = mmNIC4_QM1_BASE,
1288 [GAUDI2_QUEUE_ID_NIC_9_3] = mmNIC4_QM1_BASE,
1289 [GAUDI2_QUEUE_ID_NIC_10_0] = mmNIC5_QM0_BASE,
1290 [GAUDI2_QUEUE_ID_NIC_10_1] = mmNIC5_QM0_BASE,
1291 [GAUDI2_QUEUE_ID_NIC_10_2] = mmNIC5_QM0_BASE,
1292 [GAUDI2_QUEUE_ID_NIC_10_3] = mmNIC5_QM0_BASE,
1293 [GAUDI2_QUEUE_ID_NIC_11_0] = mmNIC5_QM1_BASE,
1294 [GAUDI2_QUEUE_ID_NIC_11_1] = mmNIC5_QM1_BASE,
1295 [GAUDI2_QUEUE_ID_NIC_11_2] = mmNIC5_QM1_BASE,
1296 [GAUDI2_QUEUE_ID_NIC_11_3] = mmNIC5_QM1_BASE,
1297 [GAUDI2_QUEUE_ID_NIC_12_0] = mmNIC6_QM0_BASE,
1298 [GAUDI2_QUEUE_ID_NIC_12_1] = mmNIC6_QM0_BASE,
1299 [GAUDI2_QUEUE_ID_NIC_12_2] = mmNIC6_QM0_BASE,
1300 [GAUDI2_QUEUE_ID_NIC_12_3] = mmNIC6_QM0_BASE,
1301 [GAUDI2_QUEUE_ID_NIC_13_0] = mmNIC6_QM1_BASE,
1302 [GAUDI2_QUEUE_ID_NIC_13_1] = mmNIC6_QM1_BASE,
1303 [GAUDI2_QUEUE_ID_NIC_13_2] = mmNIC6_QM1_BASE,
1304 [GAUDI2_QUEUE_ID_NIC_13_3] = mmNIC6_QM1_BASE,
1305 [GAUDI2_QUEUE_ID_NIC_14_0] = mmNIC7_QM0_BASE,
1306 [GAUDI2_QUEUE_ID_NIC_14_1] = mmNIC7_QM0_BASE,
1307 [GAUDI2_QUEUE_ID_NIC_14_2] = mmNIC7_QM0_BASE,
1308 [GAUDI2_QUEUE_ID_NIC_14_3] = mmNIC7_QM0_BASE,
1309 [GAUDI2_QUEUE_ID_NIC_15_0] = mmNIC7_QM1_BASE,
1310 [GAUDI2_QUEUE_ID_NIC_15_1] = mmNIC7_QM1_BASE,
1311 [GAUDI2_QUEUE_ID_NIC_15_2] = mmNIC7_QM1_BASE,
1312 [GAUDI2_QUEUE_ID_NIC_15_3] = mmNIC7_QM1_BASE,
1313 [GAUDI2_QUEUE_ID_NIC_16_0] = mmNIC8_QM0_BASE,
1314 [GAUDI2_QUEUE_ID_NIC_16_1] = mmNIC8_QM0_BASE,
1315 [GAUDI2_QUEUE_ID_NIC_16_2] = mmNIC8_QM0_BASE,
1316 [GAUDI2_QUEUE_ID_NIC_16_3] = mmNIC8_QM0_BASE,
1317 [GAUDI2_QUEUE_ID_NIC_17_0] = mmNIC8_QM1_BASE,
1318 [GAUDI2_QUEUE_ID_NIC_17_1] = mmNIC8_QM1_BASE,
1319 [GAUDI2_QUEUE_ID_NIC_17_2] = mmNIC8_QM1_BASE,
1320 [GAUDI2_QUEUE_ID_NIC_17_3] = mmNIC8_QM1_BASE,
1321 [GAUDI2_QUEUE_ID_NIC_18_0] = mmNIC9_QM0_BASE,
1322 [GAUDI2_QUEUE_ID_NIC_18_1] = mmNIC9_QM0_BASE,
1323 [GAUDI2_QUEUE_ID_NIC_18_2] = mmNIC9_QM0_BASE,
1324 [GAUDI2_QUEUE_ID_NIC_18_3] = mmNIC9_QM0_BASE,
1325 [GAUDI2_QUEUE_ID_NIC_19_0] = mmNIC9_QM1_BASE,
1326 [GAUDI2_QUEUE_ID_NIC_19_1] = mmNIC9_QM1_BASE,
1327 [GAUDI2_QUEUE_ID_NIC_19_2] = mmNIC9_QM1_BASE,
1328 [GAUDI2_QUEUE_ID_NIC_19_3] = mmNIC9_QM1_BASE,
1329 [GAUDI2_QUEUE_ID_NIC_20_0] = mmNIC10_QM0_BASE,
1330 [GAUDI2_QUEUE_ID_NIC_20_1] = mmNIC10_QM0_BASE,
1331 [GAUDI2_QUEUE_ID_NIC_20_2] = mmNIC10_QM0_BASE,
1332 [GAUDI2_QUEUE_ID_NIC_20_3] = mmNIC10_QM0_BASE,
1333 [GAUDI2_QUEUE_ID_NIC_21_0] = mmNIC10_QM1_BASE,
1334 [GAUDI2_QUEUE_ID_NIC_21_1] = mmNIC10_QM1_BASE,
1335 [GAUDI2_QUEUE_ID_NIC_21_2] = mmNIC10_QM1_BASE,
1336 [GAUDI2_QUEUE_ID_NIC_21_3] = mmNIC10_QM1_BASE,
1337 [GAUDI2_QUEUE_ID_NIC_22_0] = mmNIC11_QM0_BASE,
1338 [GAUDI2_QUEUE_ID_NIC_22_1] = mmNIC11_QM0_BASE,
1339 [GAUDI2_QUEUE_ID_NIC_22_2] = mmNIC11_QM0_BASE,
1340 [GAUDI2_QUEUE_ID_NIC_22_3] = mmNIC11_QM0_BASE,
1341 [GAUDI2_QUEUE_ID_NIC_23_0] = mmNIC11_QM1_BASE,
1342 [GAUDI2_QUEUE_ID_NIC_23_1] = mmNIC11_QM1_BASE,
1343 [GAUDI2_QUEUE_ID_NIC_23_2] = mmNIC11_QM1_BASE,
1344 [GAUDI2_QUEUE_ID_NIC_23_3] = mmNIC11_QM1_BASE,
1345 [GAUDI2_QUEUE_ID_ROT_0_0] = mmROT0_QM_BASE,
1346 [GAUDI2_QUEUE_ID_ROT_0_1] = mmROT0_QM_BASE,
1347 [GAUDI2_QUEUE_ID_ROT_0_2] = mmROT0_QM_BASE,
1348 [GAUDI2_QUEUE_ID_ROT_0_3] = mmROT0_QM_BASE,
1349 [GAUDI2_QUEUE_ID_ROT_1_0] = mmROT1_QM_BASE,
1350 [GAUDI2_QUEUE_ID_ROT_1_1] = mmROT1_QM_BASE,
1351 [GAUDI2_QUEUE_ID_ROT_1_2] = mmROT1_QM_BASE,
1352 [GAUDI2_QUEUE_ID_ROT_1_3] = mmROT1_QM_BASE
1353 };
1354
1355 static const u32 gaudi2_arc_blocks_bases[NUM_ARC_CPUS] = {
1356 [CPU_ID_SCHED_ARC0] = mmARC_FARM_ARC0_AUX_BASE,
1357 [CPU_ID_SCHED_ARC1] = mmARC_FARM_ARC1_AUX_BASE,
1358 [CPU_ID_SCHED_ARC2] = mmARC_FARM_ARC2_AUX_BASE,
1359 [CPU_ID_SCHED_ARC3] = mmARC_FARM_ARC3_AUX_BASE,
1360 [CPU_ID_SCHED_ARC4] = mmDCORE1_MME_QM_ARC_AUX_BASE,
1361 [CPU_ID_SCHED_ARC5] = mmDCORE3_MME_QM_ARC_AUX_BASE,
1362 [CPU_ID_TPC_QMAN_ARC0] = mmDCORE0_TPC0_QM_ARC_AUX_BASE,
1363 [CPU_ID_TPC_QMAN_ARC1] = mmDCORE0_TPC1_QM_ARC_AUX_BASE,
1364 [CPU_ID_TPC_QMAN_ARC2] = mmDCORE0_TPC2_QM_ARC_AUX_BASE,
1365 [CPU_ID_TPC_QMAN_ARC3] = mmDCORE0_TPC3_QM_ARC_AUX_BASE,
1366 [CPU_ID_TPC_QMAN_ARC4] = mmDCORE0_TPC4_QM_ARC_AUX_BASE,
1367 [CPU_ID_TPC_QMAN_ARC5] = mmDCORE0_TPC5_QM_ARC_AUX_BASE,
1368 [CPU_ID_TPC_QMAN_ARC6] = mmDCORE1_TPC0_QM_ARC_AUX_BASE,
1369 [CPU_ID_TPC_QMAN_ARC7] = mmDCORE1_TPC1_QM_ARC_AUX_BASE,
1370 [CPU_ID_TPC_QMAN_ARC8] = mmDCORE1_TPC2_QM_ARC_AUX_BASE,
1371 [CPU_ID_TPC_QMAN_ARC9] = mmDCORE1_TPC3_QM_ARC_AUX_BASE,
1372 [CPU_ID_TPC_QMAN_ARC10] = mmDCORE1_TPC4_QM_ARC_AUX_BASE,
1373 [CPU_ID_TPC_QMAN_ARC11] = mmDCORE1_TPC5_QM_ARC_AUX_BASE,
1374 [CPU_ID_TPC_QMAN_ARC12] = mmDCORE2_TPC0_QM_ARC_AUX_BASE,
1375 [CPU_ID_TPC_QMAN_ARC13] = mmDCORE2_TPC1_QM_ARC_AUX_BASE,
1376 [CPU_ID_TPC_QMAN_ARC14] = mmDCORE2_TPC2_QM_ARC_AUX_BASE,
1377 [CPU_ID_TPC_QMAN_ARC15] = mmDCORE2_TPC3_QM_ARC_AUX_BASE,
1378 [CPU_ID_TPC_QMAN_ARC16] = mmDCORE2_TPC4_QM_ARC_AUX_BASE,
1379 [CPU_ID_TPC_QMAN_ARC17] = mmDCORE2_TPC5_QM_ARC_AUX_BASE,
1380 [CPU_ID_TPC_QMAN_ARC18] = mmDCORE3_TPC0_QM_ARC_AUX_BASE,
1381 [CPU_ID_TPC_QMAN_ARC19] = mmDCORE3_TPC1_QM_ARC_AUX_BASE,
1382 [CPU_ID_TPC_QMAN_ARC20] = mmDCORE3_TPC2_QM_ARC_AUX_BASE,
1383 [CPU_ID_TPC_QMAN_ARC21] = mmDCORE3_TPC3_QM_ARC_AUX_BASE,
1384 [CPU_ID_TPC_QMAN_ARC22] = mmDCORE3_TPC4_QM_ARC_AUX_BASE,
1385 [CPU_ID_TPC_QMAN_ARC23] = mmDCORE3_TPC5_QM_ARC_AUX_BASE,
1386 [CPU_ID_TPC_QMAN_ARC24] = mmDCORE0_TPC6_QM_ARC_AUX_BASE,
1387 [CPU_ID_MME_QMAN_ARC0] = mmDCORE0_MME_QM_ARC_AUX_BASE,
1388 [CPU_ID_MME_QMAN_ARC1] = mmDCORE2_MME_QM_ARC_AUX_BASE,
1389 [CPU_ID_EDMA_QMAN_ARC0] = mmDCORE0_EDMA0_QM_ARC_AUX_BASE,
1390 [CPU_ID_EDMA_QMAN_ARC1] = mmDCORE0_EDMA1_QM_ARC_AUX_BASE,
1391 [CPU_ID_EDMA_QMAN_ARC2] = mmDCORE1_EDMA0_QM_ARC_AUX_BASE,
1392 [CPU_ID_EDMA_QMAN_ARC3] = mmDCORE1_EDMA1_QM_ARC_AUX_BASE,
1393 [CPU_ID_EDMA_QMAN_ARC4] = mmDCORE2_EDMA0_QM_ARC_AUX_BASE,
1394 [CPU_ID_EDMA_QMAN_ARC5] = mmDCORE2_EDMA1_QM_ARC_AUX_BASE,
1395 [CPU_ID_EDMA_QMAN_ARC6] = mmDCORE3_EDMA0_QM_ARC_AUX_BASE,
1396 [CPU_ID_EDMA_QMAN_ARC7] = mmDCORE3_EDMA1_QM_ARC_AUX_BASE,
1397 [CPU_ID_PDMA_QMAN_ARC0] = mmPDMA0_QM_ARC_AUX_BASE,
1398 [CPU_ID_PDMA_QMAN_ARC1] = mmPDMA1_QM_ARC_AUX_BASE,
1399 [CPU_ID_ROT_QMAN_ARC0] = mmROT0_QM_ARC_AUX_BASE,
1400 [CPU_ID_ROT_QMAN_ARC1] = mmROT1_QM_ARC_AUX_BASE,
1401 [CPU_ID_NIC_QMAN_ARC0] = mmNIC0_QM_ARC_AUX0_BASE,
1402 [CPU_ID_NIC_QMAN_ARC1] = mmNIC0_QM_ARC_AUX1_BASE,
1403 [CPU_ID_NIC_QMAN_ARC2] = mmNIC1_QM_ARC_AUX0_BASE,
1404 [CPU_ID_NIC_QMAN_ARC3] = mmNIC1_QM_ARC_AUX1_BASE,
1405 [CPU_ID_NIC_QMAN_ARC4] = mmNIC2_QM_ARC_AUX0_BASE,
1406 [CPU_ID_NIC_QMAN_ARC5] = mmNIC2_QM_ARC_AUX1_BASE,
1407 [CPU_ID_NIC_QMAN_ARC6] = mmNIC3_QM_ARC_AUX0_BASE,
1408 [CPU_ID_NIC_QMAN_ARC7] = mmNIC3_QM_ARC_AUX1_BASE,
1409 [CPU_ID_NIC_QMAN_ARC8] = mmNIC4_QM_ARC_AUX0_BASE,
1410 [CPU_ID_NIC_QMAN_ARC9] = mmNIC4_QM_ARC_AUX1_BASE,
1411 [CPU_ID_NIC_QMAN_ARC10] = mmNIC5_QM_ARC_AUX0_BASE,
1412 [CPU_ID_NIC_QMAN_ARC11] = mmNIC5_QM_ARC_AUX1_BASE,
1413 [CPU_ID_NIC_QMAN_ARC12] = mmNIC6_QM_ARC_AUX0_BASE,
1414 [CPU_ID_NIC_QMAN_ARC13] = mmNIC6_QM_ARC_AUX1_BASE,
1415 [CPU_ID_NIC_QMAN_ARC14] = mmNIC7_QM_ARC_AUX0_BASE,
1416 [CPU_ID_NIC_QMAN_ARC15] = mmNIC7_QM_ARC_AUX1_BASE,
1417 [CPU_ID_NIC_QMAN_ARC16] = mmNIC8_QM_ARC_AUX0_BASE,
1418 [CPU_ID_NIC_QMAN_ARC17] = mmNIC8_QM_ARC_AUX1_BASE,
1419 [CPU_ID_NIC_QMAN_ARC18] = mmNIC9_QM_ARC_AUX0_BASE,
1420 [CPU_ID_NIC_QMAN_ARC19] = mmNIC9_QM_ARC_AUX1_BASE,
1421 [CPU_ID_NIC_QMAN_ARC20] = mmNIC10_QM_ARC_AUX0_BASE,
1422 [CPU_ID_NIC_QMAN_ARC21] = mmNIC10_QM_ARC_AUX1_BASE,
1423 [CPU_ID_NIC_QMAN_ARC22] = mmNIC11_QM_ARC_AUX0_BASE,
1424 [CPU_ID_NIC_QMAN_ARC23] = mmNIC11_QM_ARC_AUX1_BASE,
1425 };
1426
1427 static const u32 gaudi2_arc_dccm_bases[NUM_ARC_CPUS] = {
1428 [CPU_ID_SCHED_ARC0] = mmARC_FARM_ARC0_DCCM0_BASE,
1429 [CPU_ID_SCHED_ARC1] = mmARC_FARM_ARC1_DCCM0_BASE,
1430 [CPU_ID_SCHED_ARC2] = mmARC_FARM_ARC2_DCCM0_BASE,
1431 [CPU_ID_SCHED_ARC3] = mmARC_FARM_ARC3_DCCM0_BASE,
1432 [CPU_ID_SCHED_ARC4] = mmDCORE1_MME_QM_ARC_DCCM_BASE,
1433 [CPU_ID_SCHED_ARC5] = mmDCORE3_MME_QM_ARC_DCCM_BASE,
1434 [CPU_ID_TPC_QMAN_ARC0] = mmDCORE0_TPC0_QM_DCCM_BASE,
1435 [CPU_ID_TPC_QMAN_ARC1] = mmDCORE0_TPC1_QM_DCCM_BASE,
1436 [CPU_ID_TPC_QMAN_ARC2] = mmDCORE0_TPC2_QM_DCCM_BASE,
1437 [CPU_ID_TPC_QMAN_ARC3] = mmDCORE0_TPC3_QM_DCCM_BASE,
1438 [CPU_ID_TPC_QMAN_ARC4] = mmDCORE0_TPC4_QM_DCCM_BASE,
1439 [CPU_ID_TPC_QMAN_ARC5] = mmDCORE0_TPC5_QM_DCCM_BASE,
1440 [CPU_ID_TPC_QMAN_ARC6] = mmDCORE1_TPC0_QM_DCCM_BASE,
1441 [CPU_ID_TPC_QMAN_ARC7] = mmDCORE1_TPC1_QM_DCCM_BASE,
1442 [CPU_ID_TPC_QMAN_ARC8] = mmDCORE1_TPC2_QM_DCCM_BASE,
1443 [CPU_ID_TPC_QMAN_ARC9] = mmDCORE1_TPC3_QM_DCCM_BASE,
1444 [CPU_ID_TPC_QMAN_ARC10] = mmDCORE1_TPC4_QM_DCCM_BASE,
1445 [CPU_ID_TPC_QMAN_ARC11] = mmDCORE1_TPC5_QM_DCCM_BASE,
1446 [CPU_ID_TPC_QMAN_ARC12] = mmDCORE2_TPC0_QM_DCCM_BASE,
1447 [CPU_ID_TPC_QMAN_ARC13] = mmDCORE2_TPC1_QM_DCCM_BASE,
1448 [CPU_ID_TPC_QMAN_ARC14] = mmDCORE2_TPC2_QM_DCCM_BASE,
1449 [CPU_ID_TPC_QMAN_ARC15] = mmDCORE2_TPC3_QM_DCCM_BASE,
1450 [CPU_ID_TPC_QMAN_ARC16] = mmDCORE2_TPC4_QM_DCCM_BASE,
1451 [CPU_ID_TPC_QMAN_ARC17] = mmDCORE2_TPC5_QM_DCCM_BASE,
1452 [CPU_ID_TPC_QMAN_ARC18] = mmDCORE3_TPC0_QM_DCCM_BASE,
1453 [CPU_ID_TPC_QMAN_ARC19] = mmDCORE3_TPC1_QM_DCCM_BASE,
1454 [CPU_ID_TPC_QMAN_ARC20] = mmDCORE3_TPC2_QM_DCCM_BASE,
1455 [CPU_ID_TPC_QMAN_ARC21] = mmDCORE3_TPC3_QM_DCCM_BASE,
1456 [CPU_ID_TPC_QMAN_ARC22] = mmDCORE3_TPC4_QM_DCCM_BASE,
1457 [CPU_ID_TPC_QMAN_ARC23] = mmDCORE3_TPC5_QM_DCCM_BASE,
1458 [CPU_ID_TPC_QMAN_ARC24] = mmDCORE0_TPC6_QM_DCCM_BASE,
1459 [CPU_ID_MME_QMAN_ARC0] = mmDCORE0_MME_QM_ARC_DCCM_BASE,
1460 [CPU_ID_MME_QMAN_ARC1] = mmDCORE2_MME_QM_ARC_DCCM_BASE,
1461 [CPU_ID_EDMA_QMAN_ARC0] = mmDCORE0_EDMA0_QM_DCCM_BASE,
1462 [CPU_ID_EDMA_QMAN_ARC1] = mmDCORE0_EDMA1_QM_DCCM_BASE,
1463 [CPU_ID_EDMA_QMAN_ARC2] = mmDCORE1_EDMA0_QM_DCCM_BASE,
1464 [CPU_ID_EDMA_QMAN_ARC3] = mmDCORE1_EDMA1_QM_DCCM_BASE,
1465 [CPU_ID_EDMA_QMAN_ARC4] = mmDCORE2_EDMA0_QM_DCCM_BASE,
1466 [CPU_ID_EDMA_QMAN_ARC5] = mmDCORE2_EDMA1_QM_DCCM_BASE,
1467 [CPU_ID_EDMA_QMAN_ARC6] = mmDCORE3_EDMA0_QM_DCCM_BASE,
1468 [CPU_ID_EDMA_QMAN_ARC7] = mmDCORE3_EDMA1_QM_DCCM_BASE,
1469 [CPU_ID_PDMA_QMAN_ARC0] = mmPDMA0_QM_ARC_DCCM_BASE,
1470 [CPU_ID_PDMA_QMAN_ARC1] = mmPDMA1_QM_ARC_DCCM_BASE,
1471 [CPU_ID_ROT_QMAN_ARC0] = mmROT0_QM_ARC_DCCM_BASE,
1472 [CPU_ID_ROT_QMAN_ARC1] = mmROT1_QM_ARC_DCCM_BASE,
1473 [CPU_ID_NIC_QMAN_ARC0] = mmNIC0_QM_DCCM0_BASE,
1474 [CPU_ID_NIC_QMAN_ARC1] = mmNIC0_QM_DCCM1_BASE,
1475 [CPU_ID_NIC_QMAN_ARC2] = mmNIC1_QM_DCCM0_BASE,
1476 [CPU_ID_NIC_QMAN_ARC3] = mmNIC1_QM_DCCM1_BASE,
1477 [CPU_ID_NIC_QMAN_ARC4] = mmNIC2_QM_DCCM0_BASE,
1478 [CPU_ID_NIC_QMAN_ARC5] = mmNIC2_QM_DCCM1_BASE,
1479 [CPU_ID_NIC_QMAN_ARC6] = mmNIC3_QM_DCCM0_BASE,
1480 [CPU_ID_NIC_QMAN_ARC7] = mmNIC3_QM_DCCM1_BASE,
1481 [CPU_ID_NIC_QMAN_ARC8] = mmNIC4_QM_DCCM0_BASE,
1482 [CPU_ID_NIC_QMAN_ARC9] = mmNIC4_QM_DCCM1_BASE,
1483 [CPU_ID_NIC_QMAN_ARC10] = mmNIC5_QM_DCCM0_BASE,
1484 [CPU_ID_NIC_QMAN_ARC11] = mmNIC5_QM_DCCM1_BASE,
1485 [CPU_ID_NIC_QMAN_ARC12] = mmNIC6_QM_DCCM0_BASE,
1486 [CPU_ID_NIC_QMAN_ARC13] = mmNIC6_QM_DCCM1_BASE,
1487 [CPU_ID_NIC_QMAN_ARC14] = mmNIC7_QM_DCCM0_BASE,
1488 [CPU_ID_NIC_QMAN_ARC15] = mmNIC7_QM_DCCM1_BASE,
1489 [CPU_ID_NIC_QMAN_ARC16] = mmNIC8_QM_DCCM0_BASE,
1490 [CPU_ID_NIC_QMAN_ARC17] = mmNIC8_QM_DCCM1_BASE,
1491 [CPU_ID_NIC_QMAN_ARC18] = mmNIC9_QM_DCCM0_BASE,
1492 [CPU_ID_NIC_QMAN_ARC19] = mmNIC9_QM_DCCM1_BASE,
1493 [CPU_ID_NIC_QMAN_ARC20] = mmNIC10_QM_DCCM0_BASE,
1494 [CPU_ID_NIC_QMAN_ARC21] = mmNIC10_QM_DCCM1_BASE,
1495 [CPU_ID_NIC_QMAN_ARC22] = mmNIC11_QM_DCCM0_BASE,
1496 [CPU_ID_NIC_QMAN_ARC23] = mmNIC11_QM_DCCM1_BASE,
1497 };
1498
1499 const u32 gaudi2_mme_ctrl_lo_blocks_bases[MME_ID_SIZE] = {
1500 [MME_ID_DCORE0] = mmDCORE0_MME_CTRL_LO_BASE,
1501 [MME_ID_DCORE1] = mmDCORE1_MME_CTRL_LO_BASE,
1502 [MME_ID_DCORE2] = mmDCORE2_MME_CTRL_LO_BASE,
1503 [MME_ID_DCORE3] = mmDCORE3_MME_CTRL_LO_BASE,
1504 };
1505
1506 static const u32 gaudi2_queue_id_to_arc_id[GAUDI2_QUEUE_ID_SIZE] = {
1507 [GAUDI2_QUEUE_ID_PDMA_0_0] = CPU_ID_PDMA_QMAN_ARC0,
1508 [GAUDI2_QUEUE_ID_PDMA_0_1] = CPU_ID_PDMA_QMAN_ARC0,
1509 [GAUDI2_QUEUE_ID_PDMA_0_2] = CPU_ID_PDMA_QMAN_ARC0,
1510 [GAUDI2_QUEUE_ID_PDMA_0_3] = CPU_ID_PDMA_QMAN_ARC0,
1511 [GAUDI2_QUEUE_ID_PDMA_1_0] = CPU_ID_PDMA_QMAN_ARC1,
1512 [GAUDI2_QUEUE_ID_PDMA_1_1] = CPU_ID_PDMA_QMAN_ARC1,
1513 [GAUDI2_QUEUE_ID_PDMA_1_2] = CPU_ID_PDMA_QMAN_ARC1,
1514 [GAUDI2_QUEUE_ID_PDMA_1_3] = CPU_ID_PDMA_QMAN_ARC1,
1515 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC0,
1516 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC0,
1517 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC0,
1518 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC0,
1519 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC1,
1520 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC1,
1521 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC1,
1522 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC1,
1523 [GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = CPU_ID_MME_QMAN_ARC0,
1524 [GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = CPU_ID_MME_QMAN_ARC0,
1525 [GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = CPU_ID_MME_QMAN_ARC0,
1526 [GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = CPU_ID_MME_QMAN_ARC0,
1527 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = CPU_ID_TPC_QMAN_ARC0,
1528 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = CPU_ID_TPC_QMAN_ARC0,
1529 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = CPU_ID_TPC_QMAN_ARC0,
1530 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = CPU_ID_TPC_QMAN_ARC0,
1531 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = CPU_ID_TPC_QMAN_ARC1,
1532 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = CPU_ID_TPC_QMAN_ARC1,
1533 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = CPU_ID_TPC_QMAN_ARC1,
1534 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = CPU_ID_TPC_QMAN_ARC1,
1535 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = CPU_ID_TPC_QMAN_ARC2,
1536 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = CPU_ID_TPC_QMAN_ARC2,
1537 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = CPU_ID_TPC_QMAN_ARC2,
1538 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = CPU_ID_TPC_QMAN_ARC2,
1539 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = CPU_ID_TPC_QMAN_ARC3,
1540 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = CPU_ID_TPC_QMAN_ARC3,
1541 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = CPU_ID_TPC_QMAN_ARC3,
1542 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = CPU_ID_TPC_QMAN_ARC3,
1543 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = CPU_ID_TPC_QMAN_ARC4,
1544 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = CPU_ID_TPC_QMAN_ARC4,
1545 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = CPU_ID_TPC_QMAN_ARC4,
1546 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = CPU_ID_TPC_QMAN_ARC4,
1547 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = CPU_ID_TPC_QMAN_ARC5,
1548 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = CPU_ID_TPC_QMAN_ARC5,
1549 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = CPU_ID_TPC_QMAN_ARC5,
1550 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = CPU_ID_TPC_QMAN_ARC5,
1551 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = CPU_ID_TPC_QMAN_ARC24,
1552 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = CPU_ID_TPC_QMAN_ARC24,
1553 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = CPU_ID_TPC_QMAN_ARC24,
1554 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = CPU_ID_TPC_QMAN_ARC24,
1555 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC2,
1556 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC2,
1557 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC2,
1558 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC2,
1559 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC3,
1560 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC3,
1561 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC3,
1562 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC3,
1563 [GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = CPU_ID_SCHED_ARC4,
1564 [GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = CPU_ID_SCHED_ARC4,
1565 [GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = CPU_ID_SCHED_ARC4,
1566 [GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = CPU_ID_SCHED_ARC4,
1567 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = CPU_ID_TPC_QMAN_ARC6,
1568 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = CPU_ID_TPC_QMAN_ARC6,
1569 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = CPU_ID_TPC_QMAN_ARC6,
1570 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = CPU_ID_TPC_QMAN_ARC6,
1571 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = CPU_ID_TPC_QMAN_ARC7,
1572 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = CPU_ID_TPC_QMAN_ARC7,
1573 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = CPU_ID_TPC_QMAN_ARC7,
1574 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = CPU_ID_TPC_QMAN_ARC7,
1575 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = CPU_ID_TPC_QMAN_ARC8,
1576 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = CPU_ID_TPC_QMAN_ARC8,
1577 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = CPU_ID_TPC_QMAN_ARC8,
1578 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = CPU_ID_TPC_QMAN_ARC8,
1579 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = CPU_ID_TPC_QMAN_ARC9,
1580 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = CPU_ID_TPC_QMAN_ARC9,
1581 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = CPU_ID_TPC_QMAN_ARC9,
1582 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = CPU_ID_TPC_QMAN_ARC9,
1583 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = CPU_ID_TPC_QMAN_ARC10,
1584 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = CPU_ID_TPC_QMAN_ARC10,
1585 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = CPU_ID_TPC_QMAN_ARC10,
1586 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = CPU_ID_TPC_QMAN_ARC10,
1587 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = CPU_ID_TPC_QMAN_ARC11,
1588 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = CPU_ID_TPC_QMAN_ARC11,
1589 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = CPU_ID_TPC_QMAN_ARC11,
1590 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = CPU_ID_TPC_QMAN_ARC11,
1591 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC4,
1592 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC4,
1593 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC4,
1594 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC4,
1595 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC5,
1596 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC5,
1597 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC5,
1598 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC5,
1599 [GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = CPU_ID_MME_QMAN_ARC1,
1600 [GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = CPU_ID_MME_QMAN_ARC1,
1601 [GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = CPU_ID_MME_QMAN_ARC1,
1602 [GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = CPU_ID_MME_QMAN_ARC1,
1603 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = CPU_ID_TPC_QMAN_ARC12,
1604 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = CPU_ID_TPC_QMAN_ARC12,
1605 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = CPU_ID_TPC_QMAN_ARC12,
1606 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = CPU_ID_TPC_QMAN_ARC12,
1607 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = CPU_ID_TPC_QMAN_ARC13,
1608 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = CPU_ID_TPC_QMAN_ARC13,
1609 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = CPU_ID_TPC_QMAN_ARC13,
1610 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = CPU_ID_TPC_QMAN_ARC13,
1611 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = CPU_ID_TPC_QMAN_ARC14,
1612 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = CPU_ID_TPC_QMAN_ARC14,
1613 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = CPU_ID_TPC_QMAN_ARC14,
1614 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = CPU_ID_TPC_QMAN_ARC14,
1615 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = CPU_ID_TPC_QMAN_ARC15,
1616 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = CPU_ID_TPC_QMAN_ARC15,
1617 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = CPU_ID_TPC_QMAN_ARC15,
1618 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = CPU_ID_TPC_QMAN_ARC15,
1619 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = CPU_ID_TPC_QMAN_ARC16,
1620 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = CPU_ID_TPC_QMAN_ARC16,
1621 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = CPU_ID_TPC_QMAN_ARC16,
1622 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = CPU_ID_TPC_QMAN_ARC16,
1623 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = CPU_ID_TPC_QMAN_ARC17,
1624 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = CPU_ID_TPC_QMAN_ARC17,
1625 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = CPU_ID_TPC_QMAN_ARC17,
1626 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = CPU_ID_TPC_QMAN_ARC17,
1627 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC6,
1628 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC6,
1629 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC6,
1630 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC6,
1631 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC7,
1632 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC7,
1633 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC7,
1634 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC7,
1635 [GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = CPU_ID_SCHED_ARC5,
1636 [GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = CPU_ID_SCHED_ARC5,
1637 [GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = CPU_ID_SCHED_ARC5,
1638 [GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = CPU_ID_SCHED_ARC5,
1639 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = CPU_ID_TPC_QMAN_ARC18,
1640 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = CPU_ID_TPC_QMAN_ARC18,
1641 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = CPU_ID_TPC_QMAN_ARC18,
1642 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = CPU_ID_TPC_QMAN_ARC18,
1643 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = CPU_ID_TPC_QMAN_ARC19,
1644 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = CPU_ID_TPC_QMAN_ARC19,
1645 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = CPU_ID_TPC_QMAN_ARC19,
1646 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = CPU_ID_TPC_QMAN_ARC19,
1647 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = CPU_ID_TPC_QMAN_ARC20,
1648 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = CPU_ID_TPC_QMAN_ARC20,
1649 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = CPU_ID_TPC_QMAN_ARC20,
1650 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = CPU_ID_TPC_QMAN_ARC20,
1651 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = CPU_ID_TPC_QMAN_ARC21,
1652 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = CPU_ID_TPC_QMAN_ARC21,
1653 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = CPU_ID_TPC_QMAN_ARC21,
1654 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = CPU_ID_TPC_QMAN_ARC21,
1655 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = CPU_ID_TPC_QMAN_ARC22,
1656 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = CPU_ID_TPC_QMAN_ARC22,
1657 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = CPU_ID_TPC_QMAN_ARC22,
1658 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = CPU_ID_TPC_QMAN_ARC22,
1659 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = CPU_ID_TPC_QMAN_ARC23,
1660 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = CPU_ID_TPC_QMAN_ARC23,
1661 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = CPU_ID_TPC_QMAN_ARC23,
1662 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = CPU_ID_TPC_QMAN_ARC23,
1663 [GAUDI2_QUEUE_ID_NIC_0_0] = CPU_ID_NIC_QMAN_ARC0,
1664 [GAUDI2_QUEUE_ID_NIC_0_1] = CPU_ID_NIC_QMAN_ARC0,
1665 [GAUDI2_QUEUE_ID_NIC_0_2] = CPU_ID_NIC_QMAN_ARC0,
1666 [GAUDI2_QUEUE_ID_NIC_0_3] = CPU_ID_NIC_QMAN_ARC0,
1667 [GAUDI2_QUEUE_ID_NIC_1_0] = CPU_ID_NIC_QMAN_ARC1,
1668 [GAUDI2_QUEUE_ID_NIC_1_1] = CPU_ID_NIC_QMAN_ARC1,
1669 [GAUDI2_QUEUE_ID_NIC_1_2] = CPU_ID_NIC_QMAN_ARC1,
1670 [GAUDI2_QUEUE_ID_NIC_1_3] = CPU_ID_NIC_QMAN_ARC1,
1671 [GAUDI2_QUEUE_ID_NIC_2_0] = CPU_ID_NIC_QMAN_ARC2,
1672 [GAUDI2_QUEUE_ID_NIC_2_1] = CPU_ID_NIC_QMAN_ARC2,
1673 [GAUDI2_QUEUE_ID_NIC_2_2] = CPU_ID_NIC_QMAN_ARC2,
1674 [GAUDI2_QUEUE_ID_NIC_2_3] = CPU_ID_NIC_QMAN_ARC2,
1675 [GAUDI2_QUEUE_ID_NIC_3_0] = CPU_ID_NIC_QMAN_ARC3,
1676 [GAUDI2_QUEUE_ID_NIC_3_1] = CPU_ID_NIC_QMAN_ARC3,
1677 [GAUDI2_QUEUE_ID_NIC_3_2] = CPU_ID_NIC_QMAN_ARC3,
1678 [GAUDI2_QUEUE_ID_NIC_3_3] = CPU_ID_NIC_QMAN_ARC3,
1679 [GAUDI2_QUEUE_ID_NIC_4_0] = CPU_ID_NIC_QMAN_ARC4,
1680 [GAUDI2_QUEUE_ID_NIC_4_1] = CPU_ID_NIC_QMAN_ARC4,
1681 [GAUDI2_QUEUE_ID_NIC_4_2] = CPU_ID_NIC_QMAN_ARC4,
1682 [GAUDI2_QUEUE_ID_NIC_4_3] = CPU_ID_NIC_QMAN_ARC4,
1683 [GAUDI2_QUEUE_ID_NIC_5_0] = CPU_ID_NIC_QMAN_ARC5,
1684 [GAUDI2_QUEUE_ID_NIC_5_1] = CPU_ID_NIC_QMAN_ARC5,
1685 [GAUDI2_QUEUE_ID_NIC_5_2] = CPU_ID_NIC_QMAN_ARC5,
1686 [GAUDI2_QUEUE_ID_NIC_5_3] = CPU_ID_NIC_QMAN_ARC5,
1687 [GAUDI2_QUEUE_ID_NIC_6_0] = CPU_ID_NIC_QMAN_ARC6,
1688 [GAUDI2_QUEUE_ID_NIC_6_1] = CPU_ID_NIC_QMAN_ARC6,
1689 [GAUDI2_QUEUE_ID_NIC_6_2] = CPU_ID_NIC_QMAN_ARC6,
1690 [GAUDI2_QUEUE_ID_NIC_6_3] = CPU_ID_NIC_QMAN_ARC6,
1691 [GAUDI2_QUEUE_ID_NIC_7_0] = CPU_ID_NIC_QMAN_ARC7,
1692 [GAUDI2_QUEUE_ID_NIC_7_1] = CPU_ID_NIC_QMAN_ARC7,
1693 [GAUDI2_QUEUE_ID_NIC_7_2] = CPU_ID_NIC_QMAN_ARC7,
1694 [GAUDI2_QUEUE_ID_NIC_7_3] = CPU_ID_NIC_QMAN_ARC7,
1695 [GAUDI2_QUEUE_ID_NIC_8_0] = CPU_ID_NIC_QMAN_ARC8,
1696 [GAUDI2_QUEUE_ID_NIC_8_1] = CPU_ID_NIC_QMAN_ARC8,
1697 [GAUDI2_QUEUE_ID_NIC_8_2] = CPU_ID_NIC_QMAN_ARC8,
1698 [GAUDI2_QUEUE_ID_NIC_8_3] = CPU_ID_NIC_QMAN_ARC8,
1699 [GAUDI2_QUEUE_ID_NIC_9_0] = CPU_ID_NIC_QMAN_ARC9,
1700 [GAUDI2_QUEUE_ID_NIC_9_1] = CPU_ID_NIC_QMAN_ARC9,
1701 [GAUDI2_QUEUE_ID_NIC_9_2] = CPU_ID_NIC_QMAN_ARC9,
1702 [GAUDI2_QUEUE_ID_NIC_9_3] = CPU_ID_NIC_QMAN_ARC9,
1703 [GAUDI2_QUEUE_ID_NIC_10_0] = CPU_ID_NIC_QMAN_ARC10,
1704 [GAUDI2_QUEUE_ID_NIC_10_1] = CPU_ID_NIC_QMAN_ARC10,
1705 [GAUDI2_QUEUE_ID_NIC_10_2] = CPU_ID_NIC_QMAN_ARC10,
1706 [GAUDI2_QUEUE_ID_NIC_10_3] = CPU_ID_NIC_QMAN_ARC10,
1707 [GAUDI2_QUEUE_ID_NIC_11_0] = CPU_ID_NIC_QMAN_ARC11,
1708 [GAUDI2_QUEUE_ID_NIC_11_1] = CPU_ID_NIC_QMAN_ARC11,
1709 [GAUDI2_QUEUE_ID_NIC_11_2] = CPU_ID_NIC_QMAN_ARC11,
1710 [GAUDI2_QUEUE_ID_NIC_11_3] = CPU_ID_NIC_QMAN_ARC11,
1711 [GAUDI2_QUEUE_ID_NIC_12_0] = CPU_ID_NIC_QMAN_ARC12,
1712 [GAUDI2_QUEUE_ID_NIC_12_1] = CPU_ID_NIC_QMAN_ARC12,
1713 [GAUDI2_QUEUE_ID_NIC_12_2] = CPU_ID_NIC_QMAN_ARC12,
1714 [GAUDI2_QUEUE_ID_NIC_12_3] = CPU_ID_NIC_QMAN_ARC12,
1715 [GAUDI2_QUEUE_ID_NIC_13_0] = CPU_ID_NIC_QMAN_ARC13,
1716 [GAUDI2_QUEUE_ID_NIC_13_1] = CPU_ID_NIC_QMAN_ARC13,
1717 [GAUDI2_QUEUE_ID_NIC_13_2] = CPU_ID_NIC_QMAN_ARC13,
1718 [GAUDI2_QUEUE_ID_NIC_13_3] = CPU_ID_NIC_QMAN_ARC13,
1719 [GAUDI2_QUEUE_ID_NIC_14_0] = CPU_ID_NIC_QMAN_ARC14,
1720 [GAUDI2_QUEUE_ID_NIC_14_1] = CPU_ID_NIC_QMAN_ARC14,
1721 [GAUDI2_QUEUE_ID_NIC_14_2] = CPU_ID_NIC_QMAN_ARC14,
1722 [GAUDI2_QUEUE_ID_NIC_14_3] = CPU_ID_NIC_QMAN_ARC14,
1723 [GAUDI2_QUEUE_ID_NIC_15_0] = CPU_ID_NIC_QMAN_ARC15,
1724 [GAUDI2_QUEUE_ID_NIC_15_1] = CPU_ID_NIC_QMAN_ARC15,
1725 [GAUDI2_QUEUE_ID_NIC_15_2] = CPU_ID_NIC_QMAN_ARC15,
1726 [GAUDI2_QUEUE_ID_NIC_15_3] = CPU_ID_NIC_QMAN_ARC15,
1727 [GAUDI2_QUEUE_ID_NIC_16_0] = CPU_ID_NIC_QMAN_ARC16,
1728 [GAUDI2_QUEUE_ID_NIC_16_1] = CPU_ID_NIC_QMAN_ARC16,
1729 [GAUDI2_QUEUE_ID_NIC_16_2] = CPU_ID_NIC_QMAN_ARC16,
1730 [GAUDI2_QUEUE_ID_NIC_16_3] = CPU_ID_NIC_QMAN_ARC16,
1731 [GAUDI2_QUEUE_ID_NIC_17_0] = CPU_ID_NIC_QMAN_ARC17,
1732 [GAUDI2_QUEUE_ID_NIC_17_1] = CPU_ID_NIC_QMAN_ARC17,
1733 [GAUDI2_QUEUE_ID_NIC_17_2] = CPU_ID_NIC_QMAN_ARC17,
1734 [GAUDI2_QUEUE_ID_NIC_17_3] = CPU_ID_NIC_QMAN_ARC17,
1735 [GAUDI2_QUEUE_ID_NIC_18_0] = CPU_ID_NIC_QMAN_ARC18,
1736 [GAUDI2_QUEUE_ID_NIC_18_1] = CPU_ID_NIC_QMAN_ARC18,
1737 [GAUDI2_QUEUE_ID_NIC_18_2] = CPU_ID_NIC_QMAN_ARC18,
1738 [GAUDI2_QUEUE_ID_NIC_18_3] = CPU_ID_NIC_QMAN_ARC18,
1739 [GAUDI2_QUEUE_ID_NIC_19_0] = CPU_ID_NIC_QMAN_ARC19,
1740 [GAUDI2_QUEUE_ID_NIC_19_1] = CPU_ID_NIC_QMAN_ARC19,
1741 [GAUDI2_QUEUE_ID_NIC_19_2] = CPU_ID_NIC_QMAN_ARC19,
1742 [GAUDI2_QUEUE_ID_NIC_19_3] = CPU_ID_NIC_QMAN_ARC19,
1743 [GAUDI2_QUEUE_ID_NIC_20_0] = CPU_ID_NIC_QMAN_ARC20,
1744 [GAUDI2_QUEUE_ID_NIC_20_1] = CPU_ID_NIC_QMAN_ARC20,
1745 [GAUDI2_QUEUE_ID_NIC_20_2] = CPU_ID_NIC_QMAN_ARC20,
1746 [GAUDI2_QUEUE_ID_NIC_20_3] = CPU_ID_NIC_QMAN_ARC20,
1747 [GAUDI2_QUEUE_ID_NIC_21_0] = CPU_ID_NIC_QMAN_ARC21,
1748 [GAUDI2_QUEUE_ID_NIC_21_1] = CPU_ID_NIC_QMAN_ARC21,
1749 [GAUDI2_QUEUE_ID_NIC_21_2] = CPU_ID_NIC_QMAN_ARC21,
1750 [GAUDI2_QUEUE_ID_NIC_21_3] = CPU_ID_NIC_QMAN_ARC21,
1751 [GAUDI2_QUEUE_ID_NIC_22_0] = CPU_ID_NIC_QMAN_ARC22,
1752 [GAUDI2_QUEUE_ID_NIC_22_1] = CPU_ID_NIC_QMAN_ARC22,
1753 [GAUDI2_QUEUE_ID_NIC_22_2] = CPU_ID_NIC_QMAN_ARC22,
1754 [GAUDI2_QUEUE_ID_NIC_22_3] = CPU_ID_NIC_QMAN_ARC22,
1755 [GAUDI2_QUEUE_ID_NIC_23_0] = CPU_ID_NIC_QMAN_ARC23,
1756 [GAUDI2_QUEUE_ID_NIC_23_1] = CPU_ID_NIC_QMAN_ARC23,
1757 [GAUDI2_QUEUE_ID_NIC_23_2] = CPU_ID_NIC_QMAN_ARC23,
1758 [GAUDI2_QUEUE_ID_NIC_23_3] = CPU_ID_NIC_QMAN_ARC23,
1759 [GAUDI2_QUEUE_ID_ROT_0_0] = CPU_ID_ROT_QMAN_ARC0,
1760 [GAUDI2_QUEUE_ID_ROT_0_1] = CPU_ID_ROT_QMAN_ARC0,
1761 [GAUDI2_QUEUE_ID_ROT_0_2] = CPU_ID_ROT_QMAN_ARC0,
1762 [GAUDI2_QUEUE_ID_ROT_0_3] = CPU_ID_ROT_QMAN_ARC0,
1763 [GAUDI2_QUEUE_ID_ROT_1_0] = CPU_ID_ROT_QMAN_ARC1,
1764 [GAUDI2_QUEUE_ID_ROT_1_1] = CPU_ID_ROT_QMAN_ARC1,
1765 [GAUDI2_QUEUE_ID_ROT_1_2] = CPU_ID_ROT_QMAN_ARC1,
1766 [GAUDI2_QUEUE_ID_ROT_1_3] = CPU_ID_ROT_QMAN_ARC1
1767 };
1768
1769 const u32 gaudi2_dma_core_blocks_bases[DMA_CORE_ID_SIZE] = {
1770 [DMA_CORE_ID_PDMA0] = mmPDMA0_CORE_BASE,
1771 [DMA_CORE_ID_PDMA1] = mmPDMA1_CORE_BASE,
1772 [DMA_CORE_ID_EDMA0] = mmDCORE0_EDMA0_CORE_BASE,
1773 [DMA_CORE_ID_EDMA1] = mmDCORE0_EDMA1_CORE_BASE,
1774 [DMA_CORE_ID_EDMA2] = mmDCORE1_EDMA0_CORE_BASE,
1775 [DMA_CORE_ID_EDMA3] = mmDCORE1_EDMA1_CORE_BASE,
1776 [DMA_CORE_ID_EDMA4] = mmDCORE2_EDMA0_CORE_BASE,
1777 [DMA_CORE_ID_EDMA5] = mmDCORE2_EDMA1_CORE_BASE,
1778 [DMA_CORE_ID_EDMA6] = mmDCORE3_EDMA0_CORE_BASE,
1779 [DMA_CORE_ID_EDMA7] = mmDCORE3_EDMA1_CORE_BASE,
1780 [DMA_CORE_ID_KDMA] = mmARC_FARM_KDMA_BASE
1781 };
1782
1783 const u32 gaudi2_mme_acc_blocks_bases[MME_ID_SIZE] = {
1784 [MME_ID_DCORE0] = mmDCORE0_MME_ACC_BASE,
1785 [MME_ID_DCORE1] = mmDCORE1_MME_ACC_BASE,
1786 [MME_ID_DCORE2] = mmDCORE2_MME_ACC_BASE,
1787 [MME_ID_DCORE3] = mmDCORE3_MME_ACC_BASE
1788 };
1789
1790 static const u32 gaudi2_tpc_cfg_blocks_bases[TPC_ID_SIZE] = {
1791 [TPC_ID_DCORE0_TPC0] = mmDCORE0_TPC0_CFG_BASE,
1792 [TPC_ID_DCORE0_TPC1] = mmDCORE0_TPC1_CFG_BASE,
1793 [TPC_ID_DCORE0_TPC2] = mmDCORE0_TPC2_CFG_BASE,
1794 [TPC_ID_DCORE0_TPC3] = mmDCORE0_TPC3_CFG_BASE,
1795 [TPC_ID_DCORE0_TPC4] = mmDCORE0_TPC4_CFG_BASE,
1796 [TPC_ID_DCORE0_TPC5] = mmDCORE0_TPC5_CFG_BASE,
1797 [TPC_ID_DCORE1_TPC0] = mmDCORE1_TPC0_CFG_BASE,
1798 [TPC_ID_DCORE1_TPC1] = mmDCORE1_TPC1_CFG_BASE,
1799 [TPC_ID_DCORE1_TPC2] = mmDCORE1_TPC2_CFG_BASE,
1800 [TPC_ID_DCORE1_TPC3] = mmDCORE1_TPC3_CFG_BASE,
1801 [TPC_ID_DCORE1_TPC4] = mmDCORE1_TPC4_CFG_BASE,
1802 [TPC_ID_DCORE1_TPC5] = mmDCORE1_TPC5_CFG_BASE,
1803 [TPC_ID_DCORE2_TPC0] = mmDCORE2_TPC0_CFG_BASE,
1804 [TPC_ID_DCORE2_TPC1] = mmDCORE2_TPC1_CFG_BASE,
1805 [TPC_ID_DCORE2_TPC2] = mmDCORE2_TPC2_CFG_BASE,
1806 [TPC_ID_DCORE2_TPC3] = mmDCORE2_TPC3_CFG_BASE,
1807 [TPC_ID_DCORE2_TPC4] = mmDCORE2_TPC4_CFG_BASE,
1808 [TPC_ID_DCORE2_TPC5] = mmDCORE2_TPC5_CFG_BASE,
1809 [TPC_ID_DCORE3_TPC0] = mmDCORE3_TPC0_CFG_BASE,
1810 [TPC_ID_DCORE3_TPC1] = mmDCORE3_TPC1_CFG_BASE,
1811 [TPC_ID_DCORE3_TPC2] = mmDCORE3_TPC2_CFG_BASE,
1812 [TPC_ID_DCORE3_TPC3] = mmDCORE3_TPC3_CFG_BASE,
1813 [TPC_ID_DCORE3_TPC4] = mmDCORE3_TPC4_CFG_BASE,
1814 [TPC_ID_DCORE3_TPC5] = mmDCORE3_TPC5_CFG_BASE,
1815 [TPC_ID_DCORE0_TPC6] = mmDCORE0_TPC6_CFG_BASE,
1816 };
1817
1818 static const u32 gaudi2_tpc_eml_cfg_blocks_bases[TPC_ID_SIZE] = {
1819 [TPC_ID_DCORE0_TPC0] = mmDCORE0_TPC0_EML_CFG_BASE,
1820 [TPC_ID_DCORE0_TPC1] = mmDCORE0_TPC1_EML_CFG_BASE,
1821 [TPC_ID_DCORE0_TPC2] = mmDCORE0_TPC2_EML_CFG_BASE,
1822 [TPC_ID_DCORE0_TPC3] = mmDCORE0_TPC3_EML_CFG_BASE,
1823 [TPC_ID_DCORE0_TPC4] = mmDCORE0_TPC4_EML_CFG_BASE,
1824 [TPC_ID_DCORE0_TPC5] = mmDCORE0_TPC5_EML_CFG_BASE,
1825 [TPC_ID_DCORE1_TPC0] = mmDCORE1_TPC0_EML_CFG_BASE,
1826 [TPC_ID_DCORE1_TPC1] = mmDCORE1_TPC1_EML_CFG_BASE,
1827 [TPC_ID_DCORE1_TPC2] = mmDCORE1_TPC2_EML_CFG_BASE,
1828 [TPC_ID_DCORE1_TPC3] = mmDCORE1_TPC3_EML_CFG_BASE,
1829 [TPC_ID_DCORE1_TPC4] = mmDCORE1_TPC4_EML_CFG_BASE,
1830 [TPC_ID_DCORE1_TPC5] = mmDCORE1_TPC5_EML_CFG_BASE,
1831 [TPC_ID_DCORE2_TPC0] = mmDCORE2_TPC0_EML_CFG_BASE,
1832 [TPC_ID_DCORE2_TPC1] = mmDCORE2_TPC1_EML_CFG_BASE,
1833 [TPC_ID_DCORE2_TPC2] = mmDCORE2_TPC2_EML_CFG_BASE,
1834 [TPC_ID_DCORE2_TPC3] = mmDCORE2_TPC3_EML_CFG_BASE,
1835 [TPC_ID_DCORE2_TPC4] = mmDCORE2_TPC4_EML_CFG_BASE,
1836 [TPC_ID_DCORE2_TPC5] = mmDCORE2_TPC5_EML_CFG_BASE,
1837 [TPC_ID_DCORE3_TPC0] = mmDCORE3_TPC0_EML_CFG_BASE,
1838 [TPC_ID_DCORE3_TPC1] = mmDCORE3_TPC1_EML_CFG_BASE,
1839 [TPC_ID_DCORE3_TPC2] = mmDCORE3_TPC2_EML_CFG_BASE,
1840 [TPC_ID_DCORE3_TPC3] = mmDCORE3_TPC3_EML_CFG_BASE,
1841 [TPC_ID_DCORE3_TPC4] = mmDCORE3_TPC4_EML_CFG_BASE,
1842 [TPC_ID_DCORE3_TPC5] = mmDCORE3_TPC5_EML_CFG_BASE,
1843 [TPC_ID_DCORE0_TPC6] = mmDCORE0_TPC6_EML_CFG_BASE,
1844 };
1845
1846 const u32 gaudi2_rot_blocks_bases[ROTATOR_ID_SIZE] = {
1847 [ROTATOR_ID_0] = mmROT0_BASE,
1848 [ROTATOR_ID_1] = mmROT1_BASE
1849 };
1850
1851 static const u32 gaudi2_tpc_id_to_queue_id[TPC_ID_SIZE] = {
1852 [TPC_ID_DCORE0_TPC0] = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0,
1853 [TPC_ID_DCORE0_TPC1] = GAUDI2_QUEUE_ID_DCORE0_TPC_1_0,
1854 [TPC_ID_DCORE0_TPC2] = GAUDI2_QUEUE_ID_DCORE0_TPC_2_0,
1855 [TPC_ID_DCORE0_TPC3] = GAUDI2_QUEUE_ID_DCORE0_TPC_3_0,
1856 [TPC_ID_DCORE0_TPC4] = GAUDI2_QUEUE_ID_DCORE0_TPC_4_0,
1857 [TPC_ID_DCORE0_TPC5] = GAUDI2_QUEUE_ID_DCORE0_TPC_5_0,
1858 [TPC_ID_DCORE1_TPC0] = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0,
1859 [TPC_ID_DCORE1_TPC1] = GAUDI2_QUEUE_ID_DCORE1_TPC_1_0,
1860 [TPC_ID_DCORE1_TPC2] = GAUDI2_QUEUE_ID_DCORE1_TPC_2_0,
1861 [TPC_ID_DCORE1_TPC3] = GAUDI2_QUEUE_ID_DCORE1_TPC_3_0,
1862 [TPC_ID_DCORE1_TPC4] = GAUDI2_QUEUE_ID_DCORE1_TPC_4_0,
1863 [TPC_ID_DCORE1_TPC5] = GAUDI2_QUEUE_ID_DCORE1_TPC_5_0,
1864 [TPC_ID_DCORE2_TPC0] = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0,
1865 [TPC_ID_DCORE2_TPC1] = GAUDI2_QUEUE_ID_DCORE2_TPC_1_0,
1866 [TPC_ID_DCORE2_TPC2] = GAUDI2_QUEUE_ID_DCORE2_TPC_2_0,
1867 [TPC_ID_DCORE2_TPC3] = GAUDI2_QUEUE_ID_DCORE2_TPC_3_0,
1868 [TPC_ID_DCORE2_TPC4] = GAUDI2_QUEUE_ID_DCORE2_TPC_4_0,
1869 [TPC_ID_DCORE2_TPC5] = GAUDI2_QUEUE_ID_DCORE2_TPC_5_0,
1870 [TPC_ID_DCORE3_TPC0] = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0,
1871 [TPC_ID_DCORE3_TPC1] = GAUDI2_QUEUE_ID_DCORE3_TPC_1_0,
1872 [TPC_ID_DCORE3_TPC2] = GAUDI2_QUEUE_ID_DCORE3_TPC_2_0,
1873 [TPC_ID_DCORE3_TPC3] = GAUDI2_QUEUE_ID_DCORE3_TPC_3_0,
1874 [TPC_ID_DCORE3_TPC4] = GAUDI2_QUEUE_ID_DCORE3_TPC_4_0,
1875 [TPC_ID_DCORE3_TPC5] = GAUDI2_QUEUE_ID_DCORE3_TPC_5_0,
1876 [TPC_ID_DCORE0_TPC6] = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0,
1877 };
1878
1879 static const u32 gaudi2_rot_id_to_queue_id[ROTATOR_ID_SIZE] = {
1880 [ROTATOR_ID_0] = GAUDI2_QUEUE_ID_ROT_0_0,
1881 [ROTATOR_ID_1] = GAUDI2_QUEUE_ID_ROT_1_0,
1882 };
1883
1884 static const u32 gaudi2_tpc_engine_id_to_tpc_id[] = {
1885 [GAUDI2_DCORE0_ENGINE_ID_TPC_0] = TPC_ID_DCORE0_TPC0,
1886 [GAUDI2_DCORE0_ENGINE_ID_TPC_1] = TPC_ID_DCORE0_TPC1,
1887 [GAUDI2_DCORE0_ENGINE_ID_TPC_2] = TPC_ID_DCORE0_TPC2,
1888 [GAUDI2_DCORE0_ENGINE_ID_TPC_3] = TPC_ID_DCORE0_TPC3,
1889 [GAUDI2_DCORE0_ENGINE_ID_TPC_4] = TPC_ID_DCORE0_TPC4,
1890 [GAUDI2_DCORE0_ENGINE_ID_TPC_5] = TPC_ID_DCORE0_TPC5,
1891 [GAUDI2_DCORE1_ENGINE_ID_TPC_0] = TPC_ID_DCORE1_TPC0,
1892 [GAUDI2_DCORE1_ENGINE_ID_TPC_1] = TPC_ID_DCORE1_TPC1,
1893 [GAUDI2_DCORE1_ENGINE_ID_TPC_2] = TPC_ID_DCORE1_TPC2,
1894 [GAUDI2_DCORE1_ENGINE_ID_TPC_3] = TPC_ID_DCORE1_TPC3,
1895 [GAUDI2_DCORE1_ENGINE_ID_TPC_4] = TPC_ID_DCORE1_TPC4,
1896 [GAUDI2_DCORE1_ENGINE_ID_TPC_5] = TPC_ID_DCORE1_TPC5,
1897 [GAUDI2_DCORE2_ENGINE_ID_TPC_0] = TPC_ID_DCORE2_TPC0,
1898 [GAUDI2_DCORE2_ENGINE_ID_TPC_1] = TPC_ID_DCORE2_TPC1,
1899 [GAUDI2_DCORE2_ENGINE_ID_TPC_2] = TPC_ID_DCORE2_TPC2,
1900 [GAUDI2_DCORE2_ENGINE_ID_TPC_3] = TPC_ID_DCORE2_TPC3,
1901 [GAUDI2_DCORE2_ENGINE_ID_TPC_4] = TPC_ID_DCORE2_TPC4,
1902 [GAUDI2_DCORE2_ENGINE_ID_TPC_5] = TPC_ID_DCORE2_TPC5,
1903 [GAUDI2_DCORE3_ENGINE_ID_TPC_0] = TPC_ID_DCORE3_TPC0,
1904 [GAUDI2_DCORE3_ENGINE_ID_TPC_1] = TPC_ID_DCORE3_TPC1,
1905 [GAUDI2_DCORE3_ENGINE_ID_TPC_2] = TPC_ID_DCORE3_TPC2,
1906 [GAUDI2_DCORE3_ENGINE_ID_TPC_3] = TPC_ID_DCORE3_TPC3,
1907 [GAUDI2_DCORE3_ENGINE_ID_TPC_4] = TPC_ID_DCORE3_TPC4,
1908 [GAUDI2_DCORE3_ENGINE_ID_TPC_5] = TPC_ID_DCORE3_TPC5,
1909 /* the PCI TPC is placed last (mapped liked HW) */
1910 [GAUDI2_DCORE0_ENGINE_ID_TPC_6] = TPC_ID_DCORE0_TPC6,
1911 };
1912
1913 static const u32 gaudi2_mme_engine_id_to_mme_id[] = {
1914 [GAUDI2_DCORE0_ENGINE_ID_MME] = MME_ID_DCORE0,
1915 [GAUDI2_DCORE1_ENGINE_ID_MME] = MME_ID_DCORE1,
1916 [GAUDI2_DCORE2_ENGINE_ID_MME] = MME_ID_DCORE2,
1917 [GAUDI2_DCORE3_ENGINE_ID_MME] = MME_ID_DCORE3,
1918 };
1919
1920 static const u32 gaudi2_edma_engine_id_to_edma_id[] = {
1921 [GAUDI2_ENGINE_ID_PDMA_0] = DMA_CORE_ID_PDMA0,
1922 [GAUDI2_ENGINE_ID_PDMA_1] = DMA_CORE_ID_PDMA1,
1923 [GAUDI2_DCORE0_ENGINE_ID_EDMA_0] = DMA_CORE_ID_EDMA0,
1924 [GAUDI2_DCORE0_ENGINE_ID_EDMA_1] = DMA_CORE_ID_EDMA1,
1925 [GAUDI2_DCORE1_ENGINE_ID_EDMA_0] = DMA_CORE_ID_EDMA2,
1926 [GAUDI2_DCORE1_ENGINE_ID_EDMA_1] = DMA_CORE_ID_EDMA3,
1927 [GAUDI2_DCORE2_ENGINE_ID_EDMA_0] = DMA_CORE_ID_EDMA4,
1928 [GAUDI2_DCORE2_ENGINE_ID_EDMA_1] = DMA_CORE_ID_EDMA5,
1929 [GAUDI2_DCORE3_ENGINE_ID_EDMA_0] = DMA_CORE_ID_EDMA6,
1930 [GAUDI2_DCORE3_ENGINE_ID_EDMA_1] = DMA_CORE_ID_EDMA7,
1931 [GAUDI2_ENGINE_ID_KDMA] = DMA_CORE_ID_KDMA,
1932 };
1933
1934 const u32 edma_stream_base[NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES] = {
1935 GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0,
1936 GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0,
1937 GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0,
1938 GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0,
1939 GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0,
1940 GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0,
1941 GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0,
1942 GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0,
1943 };
1944
1945 static const char gaudi2_vdec_irq_name[GAUDI2_VDEC_MSIX_ENTRIES][GAUDI2_MAX_STRING_LEN] = {
1946 "gaudi2 vdec 0_0", "gaudi2 vdec 0_0 abnormal",
1947 "gaudi2 vdec 0_1", "gaudi2 vdec 0_1 abnormal",
1948 "gaudi2 vdec 1_0", "gaudi2 vdec 1_0 abnormal",
1949 "gaudi2 vdec 1_1", "gaudi2 vdec 1_1 abnormal",
1950 "gaudi2 vdec 2_0", "gaudi2 vdec 2_0 abnormal",
1951 "gaudi2 vdec 2_1", "gaudi2 vdec 2_1 abnormal",
1952 "gaudi2 vdec 3_0", "gaudi2 vdec 3_0 abnormal",
1953 "gaudi2 vdec 3_1", "gaudi2 vdec 3_1 abnormal",
1954 "gaudi2 vdec s_0", "gaudi2 vdec s_0 abnormal",
1955 "gaudi2 vdec s_1", "gaudi2 vdec s_1 abnormal"
1956 };
1957
1958 enum rtr_id {
1959 DCORE0_RTR0,
1960 DCORE0_RTR1,
1961 DCORE0_RTR2,
1962 DCORE0_RTR3,
1963 DCORE0_RTR4,
1964 DCORE0_RTR5,
1965 DCORE0_RTR6,
1966 DCORE0_RTR7,
1967 DCORE1_RTR0,
1968 DCORE1_RTR1,
1969 DCORE1_RTR2,
1970 DCORE1_RTR3,
1971 DCORE1_RTR4,
1972 DCORE1_RTR5,
1973 DCORE1_RTR6,
1974 DCORE1_RTR7,
1975 DCORE2_RTR0,
1976 DCORE2_RTR1,
1977 DCORE2_RTR2,
1978 DCORE2_RTR3,
1979 DCORE2_RTR4,
1980 DCORE2_RTR5,
1981 DCORE2_RTR6,
1982 DCORE2_RTR7,
1983 DCORE3_RTR0,
1984 DCORE3_RTR1,
1985 DCORE3_RTR2,
1986 DCORE3_RTR3,
1987 DCORE3_RTR4,
1988 DCORE3_RTR5,
1989 DCORE3_RTR6,
1990 DCORE3_RTR7,
1991 };
1992
1993 static const u32 gaudi2_tpc_initiator_hbw_rtr_id[NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1] = {
1994 DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR2, DCORE0_RTR2, DCORE0_RTR3, DCORE0_RTR3,
1995 DCORE1_RTR6, DCORE1_RTR6, DCORE1_RTR5, DCORE1_RTR5, DCORE1_RTR4, DCORE1_RTR4,
1996 DCORE2_RTR3, DCORE2_RTR3, DCORE2_RTR2, DCORE2_RTR2, DCORE2_RTR1, DCORE2_RTR1,
1997 DCORE3_RTR4, DCORE3_RTR4, DCORE3_RTR5, DCORE3_RTR5, DCORE3_RTR6, DCORE3_RTR6,
1998 DCORE0_RTR0
1999 };
2000
2001 static const u32 gaudi2_tpc_initiator_lbw_rtr_id[NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1] = {
2002 DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR2, DCORE0_RTR2,
2003 DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR6, DCORE1_RTR6, DCORE1_RTR5, DCORE1_RTR5,
2004 DCORE2_RTR2, DCORE2_RTR2, DCORE2_RTR1, DCORE2_RTR1, DCORE2_RTR0, DCORE2_RTR0,
2005 DCORE3_RTR5, DCORE3_RTR5, DCORE3_RTR6, DCORE3_RTR6, DCORE3_RTR7, DCORE3_RTR7,
2006 DCORE0_RTR0
2007 };
2008
2009 static const u32 gaudi2_dec_initiator_hbw_rtr_id[NUMBER_OF_DEC] = {
2010 DCORE0_RTR0, DCORE0_RTR0, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0, DCORE2_RTR0,
2011 DCORE3_RTR7, DCORE3_RTR7, DCORE0_RTR0, DCORE0_RTR0
2012 };
2013
2014 static const u32 gaudi2_dec_initiator_lbw_rtr_id[NUMBER_OF_DEC] = {
2015 DCORE0_RTR1, DCORE0_RTR1, DCORE1_RTR6, DCORE1_RTR6, DCORE2_RTR1, DCORE2_RTR1,
2016 DCORE3_RTR6, DCORE3_RTR6, DCORE0_RTR0, DCORE0_RTR0
2017 };
2018
2019 static const u32 gaudi2_nic_initiator_hbw_rtr_id[NIC_NUMBER_OF_MACROS] = {
2020 DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0,
2021 DCORE2_RTR0, DCORE2_RTR0, DCORE2_RTR0, DCORE3_RTR7, DCORE3_RTR7, DCORE3_RTR7
2022 };
2023
2024 static const u32 gaudi2_nic_initiator_lbw_rtr_id[NIC_NUMBER_OF_MACROS] = {
2025 DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0,
2026 DCORE2_RTR0, DCORE2_RTR0, DCORE2_RTR0, DCORE3_RTR7, DCORE3_RTR7, DCORE3_RTR7
2027 };
2028
2029 static const u32 gaudi2_edma_initiator_hbw_sft[NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES] = {
2030 mmSFT0_HBW_RTR_IF1_MSTR_IF_RR_SHRD_HBW_BASE,
2031 mmSFT0_HBW_RTR_IF0_MSTR_IF_RR_SHRD_HBW_BASE,
2032 mmSFT1_HBW_RTR_IF1_MSTR_IF_RR_SHRD_HBW_BASE,
2033 mmSFT1_HBW_RTR_IF0_MSTR_IF_RR_SHRD_HBW_BASE,
2034 mmSFT2_HBW_RTR_IF0_MSTR_IF_RR_SHRD_HBW_BASE,
2035 mmSFT2_HBW_RTR_IF1_MSTR_IF_RR_SHRD_HBW_BASE,
2036 mmSFT3_HBW_RTR_IF0_MSTR_IF_RR_SHRD_HBW_BASE,
2037 mmSFT3_HBW_RTR_IF1_MSTR_IF_RR_SHRD_HBW_BASE
2038 };
2039
2040 static const u32 gaudi2_pdma_initiator_hbw_rtr_id[NUM_OF_PDMA] = {
2041 DCORE0_RTR0, DCORE0_RTR0
2042 };
2043
2044 static const u32 gaudi2_pdma_initiator_lbw_rtr_id[NUM_OF_PDMA] = {
2045 DCORE0_RTR2, DCORE0_RTR2
2046 };
2047
2048 static const u32 gaudi2_rot_initiator_hbw_rtr_id[NUM_OF_ROT] = {
2049 DCORE2_RTR0, DCORE3_RTR7
2050 };
2051
2052 static const u32 gaudi2_rot_initiator_lbw_rtr_id[NUM_OF_ROT] = {
2053 DCORE2_RTR2, DCORE3_RTR5
2054 };
2055
2056 struct mme_initiators_rtr_id {
2057 u32 wap0;
2058 u32 wap1;
2059 u32 write;
2060 u32 read;
2061 u32 sbte0;
2062 u32 sbte1;
2063 u32 sbte2;
2064 u32 sbte3;
2065 u32 sbte4;
2066 };
2067
2068 enum mme_initiators {
2069 MME_WAP0 = 0,
2070 MME_WAP1,
2071 MME_WRITE,
2072 MME_READ,
2073 MME_SBTE0,
2074 MME_SBTE1,
2075 MME_SBTE2,
2076 MME_SBTE3,
2077 MME_SBTE4,
2078 MME_INITIATORS_MAX
2079 };
2080
2081 static const struct mme_initiators_rtr_id
2082 gaudi2_mme_initiator_rtr_id[NUM_OF_MME_PER_DCORE * NUM_OF_DCORES] = {
2083 { .wap0 = 5, .wap1 = 7, .write = 6, .read = 7,
2084 .sbte0 = 7, .sbte1 = 4, .sbte2 = 4, .sbte3 = 5, .sbte4 = 6},
2085 { .wap0 = 10, .wap1 = 8, .write = 9, .read = 8,
2086 .sbte0 = 11, .sbte1 = 11, .sbte2 = 10, .sbte3 = 9, .sbte4 = 8},
2087 { .wap0 = 21, .wap1 = 23, .write = 22, .read = 23,
2088 .sbte0 = 20, .sbte1 = 20, .sbte2 = 21, .sbte3 = 22, .sbte4 = 23},
2089 { .wap0 = 30, .wap1 = 28, .write = 29, .read = 30,
2090 .sbte0 = 31, .sbte1 = 31, .sbte2 = 30, .sbte3 = 29, .sbte4 = 28},
2091 };
2092
2093 enum razwi_event_sources {
2094 RAZWI_TPC,
2095 RAZWI_MME,
2096 RAZWI_EDMA,
2097 RAZWI_PDMA,
2098 RAZWI_NIC,
2099 RAZWI_DEC,
2100 RAZWI_ROT,
2101 RAZWI_ARC_FARM
2102 };
2103
2104 struct hbm_mc_error_causes {
2105 u32 mask;
2106 char cause[50];
2107 };
2108
2109 static struct hl_special_block_info gaudi2_special_blocks[] = GAUDI2_SPECIAL_BLOCKS;
2110
2111 /* Special blocks iterator is currently used to configure security protection bits,
2112 * and read global errors. Most HW blocks are addressable and those who aren't (N/A)-
2113 * must be skipped. Following configurations are commonly used for both PB config
2114 * and global error reading, since currently they both share the same settings.
2115 * Once it changes, we must remember to use separate configurations for either one.
2116 */
2117 static int gaudi2_iterator_skip_block_types[] = {
2118 GAUDI2_BLOCK_TYPE_PLL,
2119 GAUDI2_BLOCK_TYPE_EU_BIST,
2120 GAUDI2_BLOCK_TYPE_HBM,
2121 GAUDI2_BLOCK_TYPE_XFT
2122 };
2123
2124 static struct range gaudi2_iterator_skip_block_ranges[] = {
2125 /* Skip all PSOC blocks except for PSOC_GLOBAL_CONF */
2126 {mmPSOC_I2C_M0_BASE, mmPSOC_EFUSE_BASE},
2127 {mmPSOC_BTL_BASE, mmPSOC_MSTR_IF_RR_SHRD_HBW_BASE},
2128 /* Skip all CPU blocks except for CPU_IF */
2129 {mmCPU_CA53_CFG_BASE, mmCPU_CA53_CFG_BASE},
2130 {mmCPU_TIMESTAMP_BASE, mmCPU_MSTR_IF_RR_SHRD_HBW_BASE}
2131 };
2132
2133 static struct hbm_mc_error_causes hbm_mc_spi[GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE] = {
2134 {HBM_MC_SPI_TEMP_PIN_CHG_MASK, "temperature pins changed"},
2135 {HBM_MC_SPI_THR_ENG_MASK, "temperature-based throttling engaged"},
2136 {HBM_MC_SPI_THR_DIS_ENG_MASK, "temperature-based throttling disengaged"},
2137 {HBM_MC_SPI_IEEE1500_COMP_MASK, "IEEE1500 op comp"},
2138 {HBM_MC_SPI_IEEE1500_PAUSED_MASK, "IEEE1500 op paused"},
2139 };
2140
2141 static const char * const hbm_mc_sei_cause[GAUDI2_NUM_OF_HBM_SEI_CAUSE] = {
2142 [HBM_SEI_CMD_PARITY_EVEN] = "SEI C/A parity even",
2143 [HBM_SEI_CMD_PARITY_ODD] = "SEI C/A parity odd",
2144 [HBM_SEI_READ_ERR] = "SEI read data error",
2145 [HBM_SEI_WRITE_DATA_PARITY_ERR] = "SEI write data parity error",
2146 [HBM_SEI_CATTRIP] = "SEI CATTRIP asserted",
2147 [HBM_SEI_MEM_BIST_FAIL] = "SEI memory BIST fail",
2148 [HBM_SEI_DFI] = "SEI DFI error",
2149 [HBM_SEI_INV_TEMP_READ_OUT] = "SEI invalid temp read",
2150 [HBM_SEI_BIST_FAIL] = "SEI BIST fail"
2151 };
2152
2153 struct mmu_spi_sei_cause {
2154 char cause[50];
2155 int clear_bit;
2156 };
2157
2158 static const struct mmu_spi_sei_cause gaudi2_mmu_spi_sei[GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE] = {
2159 {"page fault", 1}, /* INTERRUPT_CLR[1] */
2160 {"page access", 1}, /* INTERRUPT_CLR[1] */
2161 {"bypass ddr", 2}, /* INTERRUPT_CLR[2] */
2162 {"multi hit", 2}, /* INTERRUPT_CLR[2] */
2163 {"mmu rei0", -1}, /* no clear register bit */
2164 {"mmu rei1", -1}, /* no clear register bit */
2165 {"stlb rei0", -1}, /* no clear register bit */
2166 {"stlb rei1", -1}, /* no clear register bit */
2167 {"rr privileged write hit", 2}, /* INTERRUPT_CLR[2] */
2168 {"rr privileged read hit", 2}, /* INTERRUPT_CLR[2] */
2169 {"rr secure write hit", 2}, /* INTERRUPT_CLR[2] */
2170 {"rr secure read hit", 2}, /* INTERRUPT_CLR[2] */
2171 {"bist_fail no use", 2}, /* INTERRUPT_CLR[2] */
2172 {"bist_fail no use", 2}, /* INTERRUPT_CLR[2] */
2173 {"bist_fail no use", 2}, /* INTERRUPT_CLR[2] */
2174 {"bist_fail no use", 2}, /* INTERRUPT_CLR[2] */
2175 {"slave error", 16}, /* INTERRUPT_CLR[16] */
2176 {"dec error", 17}, /* INTERRUPT_CLR[17] */
2177 {"burst fifo full", 2} /* INTERRUPT_CLR[2] */
2178 };
2179
2180 struct gaudi2_cache_invld_params {
2181 u64 start_va;
2182 u64 end_va;
2183 u32 inv_start_val;
2184 u32 flags;
2185 bool range_invalidation;
2186 };
2187
2188 struct gaudi2_tpc_idle_data {
2189 struct engines_data *e;
2190 unsigned long *mask;
2191 bool *is_idle;
2192 const char *tpc_fmt;
2193 };
2194
2195 struct gaudi2_tpc_mmu_data {
2196 u32 rw_asid;
2197 };
2198
2199 static s64 gaudi2_state_dump_specs_props[SP_MAX] = {0};
2200
2201 static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, u64 val);
2202 static bool gaudi2_is_queue_enabled(struct hl_device *hdev, u32 hw_queue_id);
2203 static bool gaudi2_is_arc_enabled(struct hl_device *hdev, u64 arc_id);
2204 static void gaudi2_clr_arc_id_cap(struct hl_device *hdev, u64 arc_id);
2205 static void gaudi2_set_arc_id_cap(struct hl_device *hdev, u64 arc_id);
2206 static void gaudi2_memset_device_lbw(struct hl_device *hdev, u32 addr, u32 size, u32 val);
2207 static int gaudi2_send_job_to_kdma(struct hl_device *hdev, u64 src_addr, u64 dst_addr, u32 size,
2208 bool is_memset);
2209 static bool gaudi2_get_tpc_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
2210 struct engines_data *e);
2211 static bool gaudi2_get_mme_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
2212 struct engines_data *e);
2213 static bool gaudi2_get_edma_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
2214 struct engines_data *e);
2215 static u64 gaudi2_mmu_scramble_addr(struct hl_device *hdev, u64 raw_addr);
2216 static u64 gaudi2_mmu_descramble_addr(struct hl_device *hdev, u64 scrambled_addr);
2217
gaudi2_init_scrambler_hbm(struct hl_device * hdev)2218 static void gaudi2_init_scrambler_hbm(struct hl_device *hdev)
2219 {
2220
2221 }
2222
gaudi2_get_signal_cb_size(struct hl_device * hdev)2223 static u32 gaudi2_get_signal_cb_size(struct hl_device *hdev)
2224 {
2225 return sizeof(struct packet_msg_short);
2226 }
2227
gaudi2_get_wait_cb_size(struct hl_device * hdev)2228 static u32 gaudi2_get_wait_cb_size(struct hl_device *hdev)
2229 {
2230 return sizeof(struct packet_msg_short) * 4 + sizeof(struct packet_fence);
2231 }
2232
gaudi2_iterate_tpcs(struct hl_device * hdev,struct iterate_module_ctx * ctx)2233 void gaudi2_iterate_tpcs(struct hl_device *hdev, struct iterate_module_ctx *ctx)
2234 {
2235 struct asic_fixed_properties *prop = &hdev->asic_prop;
2236 int dcore, inst, tpc_seq;
2237 u32 offset;
2238
2239 /* init the return code */
2240 ctx->rc = 0;
2241
2242 for (dcore = 0; dcore < NUM_OF_DCORES; dcore++) {
2243 for (inst = 0; inst < NUM_OF_TPC_PER_DCORE; inst++) {
2244 tpc_seq = dcore * NUM_OF_TPC_PER_DCORE + inst;
2245
2246 if (!(prop->tpc_enabled_mask & BIT(tpc_seq)))
2247 continue;
2248
2249 offset = (DCORE_OFFSET * dcore) + (DCORE_TPC_OFFSET * inst);
2250
2251 ctx->fn(hdev, dcore, inst, offset, ctx);
2252 if (ctx->rc) {
2253 dev_err(hdev->dev, "TPC iterator failed for DCORE%d TPC%d\n",
2254 dcore, inst);
2255 return;
2256 }
2257 }
2258 }
2259
2260 if (!(prop->tpc_enabled_mask & BIT(TPC_ID_DCORE0_TPC6)))
2261 return;
2262
2263 /* special check for PCI TPC (DCORE0_TPC6) */
2264 offset = DCORE_TPC_OFFSET * (NUM_DCORE0_TPC - 1);
2265 ctx->fn(hdev, 0, NUM_DCORE0_TPC - 1, offset, ctx);
2266 if (ctx->rc)
2267 dev_err(hdev->dev, "TPC iterator failed for DCORE0 TPC6\n");
2268 }
2269
gaudi2_host_phys_addr_valid(u64 addr)2270 static bool gaudi2_host_phys_addr_valid(u64 addr)
2271 {
2272 if ((addr < HOST_PHYS_BASE_0 + HOST_PHYS_SIZE_0) || (addr >= HOST_PHYS_BASE_1))
2273 return true;
2274
2275 return false;
2276 }
2277
set_number_of_functional_hbms(struct hl_device * hdev)2278 static int set_number_of_functional_hbms(struct hl_device *hdev)
2279 {
2280 struct asic_fixed_properties *prop = &hdev->asic_prop;
2281 u8 faulty_hbms = hweight64(hdev->dram_binning);
2282
2283 /* check if all HBMs should be used */
2284 if (!faulty_hbms) {
2285 dev_dbg(hdev->dev, "All HBM are in use (no binning)\n");
2286 prop->num_functional_hbms = GAUDI2_HBM_NUM;
2287 return 0;
2288 }
2289
2290 /*
2291 * check for error condition in which number of binning
2292 * candidates is higher than the maximum supported by the
2293 * driver (in which case binning mask shall be ignored and driver will
2294 * set the default)
2295 */
2296 if (faulty_hbms > MAX_FAULTY_HBMS) {
2297 dev_err(hdev->dev,
2298 "HBM binning supports max of %d faulty HBMs, supplied mask 0x%llx.\n",
2299 MAX_FAULTY_HBMS, hdev->dram_binning);
2300 return -EINVAL;
2301 }
2302
2303 /*
2304 * by default, number of functional HBMs in Gaudi2 is always
2305 * GAUDI2_HBM_NUM - 1.
2306 */
2307 prop->num_functional_hbms = GAUDI2_HBM_NUM - faulty_hbms;
2308 return 0;
2309 }
2310
gaudi2_set_dram_properties(struct hl_device * hdev)2311 static int gaudi2_set_dram_properties(struct hl_device *hdev)
2312 {
2313 struct asic_fixed_properties *prop = &hdev->asic_prop;
2314 u32 basic_hbm_page_size;
2315 int rc;
2316
2317 rc = set_number_of_functional_hbms(hdev);
2318 if (rc)
2319 return -EINVAL;
2320
2321 /*
2322 * Due to HW bug in which TLB size is x16 smaller than expected we use a workaround
2323 * in which we are using x16 bigger page size to be able to populate the entire
2324 * HBM mappings in the TLB
2325 */
2326 basic_hbm_page_size = prop->num_functional_hbms * SZ_8M;
2327 prop->dram_page_size = GAUDI2_COMPENSATE_TLB_PAGE_SIZE_FACTOR * basic_hbm_page_size;
2328 prop->device_mem_alloc_default_page_size = prop->dram_page_size;
2329 prop->dram_size = prop->num_functional_hbms * SZ_16G;
2330 prop->dram_base_address = DRAM_PHYS_BASE;
2331 prop->dram_end_address = prop->dram_base_address + prop->dram_size;
2332 prop->dram_supports_virtual_memory = true;
2333
2334 prop->dram_user_base_address = DRAM_PHYS_BASE + prop->dram_page_size;
2335 prop->dram_hints_align_mask = ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK;
2336 prop->hints_dram_reserved_va_range.start_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HBM_START;
2337 prop->hints_dram_reserved_va_range.end_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HBM_END;
2338
2339 /* since DRAM page size differs from DMMU page size we need to allocate
2340 * DRAM memory in units of dram_page size and mapping this memory in
2341 * units of DMMU page size. we overcome this size mismatch using a
2342 * scrambling routine which takes a DRAM page and converts it to a DMMU
2343 * page.
2344 * We therefore:
2345 * 1. partition the virtual address space to DRAM-page (whole) pages.
2346 * (suppose we get n such pages)
2347 * 2. limit the amount of virtual address space we got from 1 above to
2348 * a multiple of 64M as we don't want the scrambled address to cross
2349 * the DRAM virtual address space.
2350 * ( m = (n * DRAM_page_size) / DMMU_page_size).
2351 * 3. determine the and address accordingly
2352 * end_addr = start_addr + m * 48M
2353 *
2354 * the DRAM address MSBs (63:48) are not part of the roundup calculation
2355 */
2356 prop->dmmu.start_addr = prop->dram_base_address +
2357 (prop->dram_page_size *
2358 DIV_ROUND_UP_SECTOR_T(prop->dram_size, prop->dram_page_size));
2359
2360 prop->dmmu.end_addr = prop->dmmu.start_addr + prop->dram_page_size *
2361 div_u64((VA_HBM_SPACE_END - prop->dmmu.start_addr), prop->dmmu.page_size);
2362
2363 return 0;
2364 }
2365
gaudi2_set_fixed_properties(struct hl_device * hdev)2366 static int gaudi2_set_fixed_properties(struct hl_device *hdev)
2367 {
2368 struct asic_fixed_properties *prop = &hdev->asic_prop;
2369 struct hw_queue_properties *q_props;
2370 u32 num_sync_stream_queues = 0;
2371 int i;
2372
2373 prop->max_queues = GAUDI2_QUEUE_ID_SIZE;
2374 prop->hw_queues_props = kcalloc(prop->max_queues, sizeof(struct hw_queue_properties),
2375 GFP_KERNEL);
2376
2377 if (!prop->hw_queues_props)
2378 return -ENOMEM;
2379
2380 q_props = prop->hw_queues_props;
2381
2382 for (i = 0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i++) {
2383 q_props[i].type = QUEUE_TYPE_HW;
2384 q_props[i].driver_only = 0;
2385
2386 if (i >= GAUDI2_QUEUE_ID_NIC_0_0 && i <= GAUDI2_QUEUE_ID_NIC_23_3) {
2387 q_props[i].supports_sync_stream = 0;
2388 } else {
2389 q_props[i].supports_sync_stream = 1;
2390 num_sync_stream_queues++;
2391 }
2392
2393 q_props[i].cb_alloc_flags = CB_ALLOC_USER;
2394 }
2395
2396 q_props[GAUDI2_QUEUE_ID_CPU_PQ].type = QUEUE_TYPE_CPU;
2397 q_props[GAUDI2_QUEUE_ID_CPU_PQ].driver_only = 1;
2398 q_props[GAUDI2_QUEUE_ID_CPU_PQ].cb_alloc_flags = CB_ALLOC_KERNEL;
2399
2400 prop->cache_line_size = DEVICE_CACHE_LINE_SIZE;
2401 prop->cfg_base_address = CFG_BASE;
2402 prop->device_dma_offset_for_host_access = HOST_PHYS_BASE_0;
2403 prop->host_base_address = HOST_PHYS_BASE_0;
2404 prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE_0;
2405 prop->max_pending_cs = GAUDI2_MAX_PENDING_CS;
2406 prop->completion_queues_count = GAUDI2_RESERVED_CQ_NUMBER;
2407 prop->user_dec_intr_count = NUMBER_OF_DEC;
2408 prop->user_interrupt_count = GAUDI2_IRQ_NUM_USER_LAST - GAUDI2_IRQ_NUM_USER_FIRST + 1;
2409 prop->completion_mode = HL_COMPLETION_MODE_CS;
2410 prop->sync_stream_first_sob = GAUDI2_RESERVED_SOB_NUMBER;
2411 prop->sync_stream_first_mon = GAUDI2_RESERVED_MON_NUMBER;
2412
2413 prop->sram_base_address = SRAM_BASE_ADDR;
2414 prop->sram_size = SRAM_SIZE;
2415 prop->sram_end_address = prop->sram_base_address + prop->sram_size;
2416 prop->sram_user_base_address = prop->sram_base_address + SRAM_USER_BASE_OFFSET;
2417
2418 prop->hints_range_reservation = true;
2419
2420 prop->rotator_enabled_mask = BIT(NUM_OF_ROT) - 1;
2421
2422 if (hdev->pldm)
2423 prop->mmu_pgt_size = 0x800000; /* 8MB */
2424 else
2425 prop->mmu_pgt_size = MMU_PAGE_TABLES_INITIAL_SIZE;
2426
2427 prop->mmu_pte_size = HL_PTE_SIZE;
2428 prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE;
2429 prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
2430
2431 prop->dmmu.hop_shifts[MMU_HOP0] = DHOP0_SHIFT;
2432 prop->dmmu.hop_shifts[MMU_HOP1] = DHOP1_SHIFT;
2433 prop->dmmu.hop_shifts[MMU_HOP2] = DHOP2_SHIFT;
2434 prop->dmmu.hop_shifts[MMU_HOP3] = DHOP3_SHIFT;
2435 prop->dmmu.hop_shifts[MMU_HOP4] = DHOP4_SHIFT;
2436 prop->dmmu.hop_masks[MMU_HOP0] = DHOP0_MASK;
2437 prop->dmmu.hop_masks[MMU_HOP1] = DHOP1_MASK;
2438 prop->dmmu.hop_masks[MMU_HOP2] = DHOP2_MASK;
2439 prop->dmmu.hop_masks[MMU_HOP3] = DHOP3_MASK;
2440 prop->dmmu.hop_masks[MMU_HOP4] = DHOP4_MASK;
2441 prop->dmmu.page_size = PAGE_SIZE_1GB;
2442 prop->dmmu.num_hops = MMU_ARCH_6_HOPS;
2443 prop->dmmu.last_mask = LAST_MASK;
2444 prop->dmmu.host_resident = 1;
2445 prop->dmmu.hop_table_size = prop->mmu_hop_table_size;
2446 prop->dmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
2447
2448 /*
2449 * this is done in order to be able to validate FW descriptor (i.e. validating that
2450 * the addresses and allocated space for FW image does not cross memory bounds).
2451 * for this reason we set the DRAM size to the minimum possible and later it will
2452 * be modified according to what reported in the cpucp info packet
2453 */
2454 prop->dram_size = (GAUDI2_HBM_NUM - 1) * SZ_16G;
2455
2456 hdev->pmmu_huge_range = true;
2457 prop->pmmu.host_resident = 1;
2458 prop->pmmu.num_hops = MMU_ARCH_6_HOPS;
2459 prop->pmmu.last_mask = LAST_MASK;
2460 prop->pmmu.hop_table_size = prop->mmu_hop_table_size;
2461 prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
2462
2463 prop->hints_host_reserved_va_range.start_addr = RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START;
2464 prop->hints_host_reserved_va_range.end_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HOST_END;
2465 prop->hints_host_hpage_reserved_va_range.start_addr =
2466 RESERVED_VA_RANGE_FOR_ARC_ON_HOST_HPAGE_START;
2467 prop->hints_host_hpage_reserved_va_range.end_addr =
2468 RESERVED_VA_RANGE_FOR_ARC_ON_HOST_HPAGE_END;
2469
2470 if (PAGE_SIZE == SZ_64K) {
2471 prop->pmmu.hop_shifts[MMU_HOP0] = HOP0_SHIFT_64K;
2472 prop->pmmu.hop_shifts[MMU_HOP1] = HOP1_SHIFT_64K;
2473 prop->pmmu.hop_shifts[MMU_HOP2] = HOP2_SHIFT_64K;
2474 prop->pmmu.hop_shifts[MMU_HOP3] = HOP3_SHIFT_64K;
2475 prop->pmmu.hop_shifts[MMU_HOP4] = HOP4_SHIFT_64K;
2476 prop->pmmu.hop_shifts[MMU_HOP5] = HOP5_SHIFT_64K;
2477 prop->pmmu.hop_masks[MMU_HOP0] = HOP0_MASK_64K;
2478 prop->pmmu.hop_masks[MMU_HOP1] = HOP1_MASK_64K;
2479 prop->pmmu.hop_masks[MMU_HOP2] = HOP2_MASK_64K;
2480 prop->pmmu.hop_masks[MMU_HOP3] = HOP3_MASK_64K;
2481 prop->pmmu.hop_masks[MMU_HOP4] = HOP4_MASK_64K;
2482 prop->pmmu.hop_masks[MMU_HOP5] = HOP5_MASK_64K;
2483 prop->pmmu.start_addr = VA_HOST_SPACE_PAGE_START;
2484 prop->pmmu.end_addr = VA_HOST_SPACE_PAGE_END;
2485 prop->pmmu.page_size = PAGE_SIZE_64KB;
2486
2487 /* shifts and masks are the same in PMMU and HPMMU */
2488 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
2489 prop->pmmu_huge.page_size = PAGE_SIZE_16MB;
2490 prop->pmmu_huge.start_addr = VA_HOST_SPACE_HPAGE_START;
2491 prop->pmmu_huge.end_addr = VA_HOST_SPACE_HPAGE_END;
2492 } else {
2493 prop->pmmu.hop_shifts[MMU_HOP0] = HOP0_SHIFT_4K;
2494 prop->pmmu.hop_shifts[MMU_HOP1] = HOP1_SHIFT_4K;
2495 prop->pmmu.hop_shifts[MMU_HOP2] = HOP2_SHIFT_4K;
2496 prop->pmmu.hop_shifts[MMU_HOP3] = HOP3_SHIFT_4K;
2497 prop->pmmu.hop_shifts[MMU_HOP4] = HOP4_SHIFT_4K;
2498 prop->pmmu.hop_shifts[MMU_HOP5] = HOP5_SHIFT_4K;
2499 prop->pmmu.hop_masks[MMU_HOP0] = HOP0_MASK_4K;
2500 prop->pmmu.hop_masks[MMU_HOP1] = HOP1_MASK_4K;
2501 prop->pmmu.hop_masks[MMU_HOP2] = HOP2_MASK_4K;
2502 prop->pmmu.hop_masks[MMU_HOP3] = HOP3_MASK_4K;
2503 prop->pmmu.hop_masks[MMU_HOP4] = HOP4_MASK_4K;
2504 prop->pmmu.hop_masks[MMU_HOP5] = HOP5_MASK_4K;
2505 prop->pmmu.start_addr = VA_HOST_SPACE_PAGE_START;
2506 prop->pmmu.end_addr = VA_HOST_SPACE_PAGE_END;
2507 prop->pmmu.page_size = PAGE_SIZE_4KB;
2508
2509 /* shifts and masks are the same in PMMU and HPMMU */
2510 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
2511 prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
2512 prop->pmmu_huge.start_addr = VA_HOST_SPACE_HPAGE_START;
2513 prop->pmmu_huge.end_addr = VA_HOST_SPACE_HPAGE_END;
2514 }
2515
2516 prop->max_num_of_engines = GAUDI2_ENGINE_ID_SIZE;
2517 prop->num_engine_cores = CPU_ID_MAX;
2518 prop->cfg_size = CFG_SIZE;
2519 prop->max_asid = MAX_ASID;
2520 prop->num_of_events = GAUDI2_EVENT_SIZE;
2521
2522 prop->supports_engine_modes = true;
2523
2524 prop->dc_power_default = DC_POWER_DEFAULT;
2525
2526 prop->cb_pool_cb_cnt = GAUDI2_CB_POOL_CB_CNT;
2527 prop->cb_pool_cb_size = GAUDI2_CB_POOL_CB_SIZE;
2528 prop->pcie_dbi_base_address = CFG_BASE + mmPCIE_DBI_BASE;
2529 prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
2530
2531 strscpy_pad(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN);
2532
2533 prop->mme_master_slave_mode = 1;
2534
2535 prop->first_available_user_sob[0] = GAUDI2_RESERVED_SOB_NUMBER +
2536 (num_sync_stream_queues * HL_RSVD_SOBS);
2537
2538 prop->first_available_user_mon[0] = GAUDI2_RESERVED_MON_NUMBER +
2539 (num_sync_stream_queues * HL_RSVD_MONS);
2540
2541 prop->first_available_user_interrupt = GAUDI2_IRQ_NUM_USER_FIRST;
2542 prop->tpc_interrupt_id = GAUDI2_IRQ_NUM_TPC_ASSERT;
2543 prop->eq_interrupt_id = GAUDI2_IRQ_NUM_EVENT_QUEUE;
2544
2545 prop->first_available_cq[0] = GAUDI2_RESERVED_CQ_NUMBER;
2546
2547 prop->fw_cpu_boot_dev_sts0_valid = false;
2548 prop->fw_cpu_boot_dev_sts1_valid = false;
2549 prop->hard_reset_done_by_fw = false;
2550 prop->gic_interrupts_enable = true;
2551
2552 prop->server_type = HL_SERVER_TYPE_UNKNOWN;
2553
2554 prop->max_dec = NUMBER_OF_DEC;
2555
2556 prop->clk_pll_index = HL_GAUDI2_MME_PLL;
2557
2558 prop->dma_mask = 64;
2559
2560 prop->hbw_flush_reg = mmPCIE_WRAP_SPECIAL_GLBL_SPARE_0;
2561
2562 return 0;
2563 }
2564
gaudi2_pci_bars_map(struct hl_device * hdev)2565 static int gaudi2_pci_bars_map(struct hl_device *hdev)
2566 {
2567 static const char * const name[] = {"CFG_SRAM", "MSIX", "DRAM"};
2568 bool is_wc[3] = {false, false, true};
2569 int rc;
2570
2571 rc = hl_pci_bars_map(hdev, name, is_wc);
2572 if (rc)
2573 return rc;
2574
2575 hdev->rmmio = hdev->pcie_bar[SRAM_CFG_BAR_ID] + (CFG_BASE - STM_FLASH_BASE_ADDR);
2576
2577 return 0;
2578 }
2579
gaudi2_set_hbm_bar_base(struct hl_device * hdev,u64 addr)2580 static u64 gaudi2_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
2581 {
2582 struct gaudi2_device *gaudi2 = hdev->asic_specific;
2583 struct hl_inbound_pci_region pci_region;
2584 u64 old_addr = addr;
2585 int rc;
2586
2587 if ((gaudi2) && (gaudi2->dram_bar_cur_addr == addr))
2588 return old_addr;
2589
2590 if (hdev->asic_prop.iatu_done_by_fw)
2591 return U64_MAX;
2592
2593 /* Inbound Region 2 - Bar 4 - Point to DRAM */
2594 pci_region.mode = PCI_BAR_MATCH_MODE;
2595 pci_region.bar = DRAM_BAR_ID;
2596 pci_region.addr = addr;
2597 rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
2598 if (rc)
2599 return U64_MAX;
2600
2601 if (gaudi2) {
2602 old_addr = gaudi2->dram_bar_cur_addr;
2603 gaudi2->dram_bar_cur_addr = addr;
2604 }
2605
2606 return old_addr;
2607 }
2608
gaudi2_init_iatu(struct hl_device * hdev)2609 static int gaudi2_init_iatu(struct hl_device *hdev)
2610 {
2611 struct hl_inbound_pci_region inbound_region;
2612 struct hl_outbound_pci_region outbound_region;
2613 u32 bar_addr_low, bar_addr_high;
2614 int rc;
2615
2616 if (hdev->asic_prop.iatu_done_by_fw)
2617 return 0;
2618
2619 /* Temporary inbound Region 0 - Bar 0 - Point to CFG
2620 * We must map this region in BAR match mode in order to
2621 * fetch BAR physical base address
2622 */
2623 inbound_region.mode = PCI_BAR_MATCH_MODE;
2624 inbound_region.bar = SRAM_CFG_BAR_ID;
2625 /* Base address must be aligned to Bar size which is 256 MB */
2626 inbound_region.addr = STM_FLASH_BASE_ADDR - STM_FLASH_ALIGNED_OFF;
2627 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
2628 if (rc)
2629 return rc;
2630
2631 /* Fetch physical BAR address */
2632 bar_addr_high = RREG32(mmPCIE_DBI_BAR1_REG + STM_FLASH_ALIGNED_OFF);
2633 bar_addr_low = RREG32(mmPCIE_DBI_BAR0_REG + STM_FLASH_ALIGNED_OFF) & ~0xF;
2634
2635 hdev->pcie_bar_phys[SRAM_CFG_BAR_ID] = (u64)bar_addr_high << 32 | bar_addr_low;
2636
2637 /* Inbound Region 0 - Bar 0 - Point to CFG */
2638 inbound_region.mode = PCI_ADDRESS_MATCH_MODE;
2639 inbound_region.bar = SRAM_CFG_BAR_ID;
2640 inbound_region.offset_in_bar = 0;
2641 inbound_region.addr = STM_FLASH_BASE_ADDR;
2642 inbound_region.size = CFG_REGION_SIZE;
2643 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
2644 if (rc)
2645 return rc;
2646
2647 /* Inbound Region 1 - Bar 0 - Point to BAR0_RESERVED + SRAM */
2648 inbound_region.mode = PCI_ADDRESS_MATCH_MODE;
2649 inbound_region.bar = SRAM_CFG_BAR_ID;
2650 inbound_region.offset_in_bar = CFG_REGION_SIZE;
2651 inbound_region.addr = BAR0_RSRVD_BASE_ADDR;
2652 inbound_region.size = BAR0_RSRVD_SIZE + SRAM_SIZE;
2653 rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
2654 if (rc)
2655 return rc;
2656
2657 /* Inbound Region 2 - Bar 4 - Point to DRAM */
2658 inbound_region.mode = PCI_BAR_MATCH_MODE;
2659 inbound_region.bar = DRAM_BAR_ID;
2660 inbound_region.addr = DRAM_PHYS_BASE;
2661 rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
2662 if (rc)
2663 return rc;
2664
2665 /* Outbound Region 0 - Point to Host */
2666 outbound_region.addr = HOST_PHYS_BASE_0;
2667 outbound_region.size = HOST_PHYS_SIZE_0;
2668 rc = hl_pci_set_outbound_region(hdev, &outbound_region);
2669
2670 return rc;
2671 }
2672
gaudi2_get_hw_state(struct hl_device * hdev)2673 static enum hl_device_hw_state gaudi2_get_hw_state(struct hl_device *hdev)
2674 {
2675 return RREG32(mmHW_STATE);
2676 }
2677
gaudi2_tpc_binning_init_prop(struct hl_device * hdev)2678 static int gaudi2_tpc_binning_init_prop(struct hl_device *hdev)
2679 {
2680 struct asic_fixed_properties *prop = &hdev->asic_prop;
2681
2682 /*
2683 * check for error condition in which number of binning candidates
2684 * is higher than the maximum supported by the driver
2685 */
2686 if (hweight64(hdev->tpc_binning) > MAX_CLUSTER_BINNING_FAULTY_TPCS) {
2687 dev_err(hdev->dev, "TPC binning is supported for max of %d faulty TPCs, provided mask 0x%llx\n",
2688 MAX_CLUSTER_BINNING_FAULTY_TPCS,
2689 hdev->tpc_binning);
2690 return -EINVAL;
2691 }
2692
2693 prop->tpc_binning_mask = hdev->tpc_binning;
2694 prop->tpc_enabled_mask = GAUDI2_TPC_FULL_MASK;
2695
2696 return 0;
2697 }
2698
gaudi2_set_tpc_binning_masks(struct hl_device * hdev)2699 static int gaudi2_set_tpc_binning_masks(struct hl_device *hdev)
2700 {
2701 struct asic_fixed_properties *prop = &hdev->asic_prop;
2702 struct hw_queue_properties *q_props = prop->hw_queues_props;
2703 u64 tpc_binning_mask;
2704 u8 subst_idx = 0;
2705 int i, rc;
2706
2707 rc = gaudi2_tpc_binning_init_prop(hdev);
2708 if (rc)
2709 return rc;
2710
2711 tpc_binning_mask = prop->tpc_binning_mask;
2712
2713 for (i = 0 ; i < MAX_FAULTY_TPCS ; i++) {
2714 u8 subst_seq, binned, qid_base;
2715
2716 if (tpc_binning_mask == 0)
2717 break;
2718
2719 if (subst_idx == 0) {
2720 subst_seq = TPC_ID_DCORE0_TPC6;
2721 qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0;
2722 } else {
2723 subst_seq = TPC_ID_DCORE3_TPC5;
2724 qid_base = GAUDI2_QUEUE_ID_DCORE3_TPC_5_0;
2725 }
2726
2727
2728 /* clear bit from mask */
2729 binned = __ffs(tpc_binning_mask);
2730 /*
2731 * Coverity complains about possible out-of-bound access in
2732 * clear_bit
2733 */
2734 if (binned >= TPC_ID_SIZE) {
2735 dev_err(hdev->dev,
2736 "Invalid binned TPC (binning mask: %llx)\n",
2737 tpc_binning_mask);
2738 return -EINVAL;
2739 }
2740 clear_bit(binned, (unsigned long *)&tpc_binning_mask);
2741
2742 /* also clear replacing TPC bit from enabled mask */
2743 clear_bit(subst_seq, (unsigned long *)&prop->tpc_enabled_mask);
2744
2745 /* bin substite TPC's Qs */
2746 q_props[qid_base].binned = 1;
2747 q_props[qid_base + 1].binned = 1;
2748 q_props[qid_base + 2].binned = 1;
2749 q_props[qid_base + 3].binned = 1;
2750
2751 subst_idx++;
2752 }
2753
2754 return 0;
2755 }
2756
gaudi2_set_dec_binning_masks(struct hl_device * hdev)2757 static int gaudi2_set_dec_binning_masks(struct hl_device *hdev)
2758 {
2759 struct asic_fixed_properties *prop = &hdev->asic_prop;
2760 u8 num_faulty;
2761
2762 num_faulty = hweight32(hdev->decoder_binning);
2763
2764 /*
2765 * check for error condition in which number of binning candidates
2766 * is higher than the maximum supported by the driver
2767 */
2768 if (num_faulty > MAX_FAULTY_DECODERS) {
2769 dev_err(hdev->dev, "decoder binning is supported for max of single faulty decoder, provided mask 0x%x\n",
2770 hdev->decoder_binning);
2771 return -EINVAL;
2772 }
2773
2774 prop->decoder_binning_mask = (hdev->decoder_binning & GAUDI2_DECODER_FULL_MASK);
2775
2776 if (prop->decoder_binning_mask)
2777 prop->decoder_enabled_mask = (GAUDI2_DECODER_FULL_MASK & ~BIT(DEC_ID_PCIE_VDEC1));
2778 else
2779 prop->decoder_enabled_mask = GAUDI2_DECODER_FULL_MASK;
2780
2781 return 0;
2782 }
2783
gaudi2_set_dram_binning_masks(struct hl_device * hdev)2784 static void gaudi2_set_dram_binning_masks(struct hl_device *hdev)
2785 {
2786 struct asic_fixed_properties *prop = &hdev->asic_prop;
2787
2788 /* check if we should override default binning */
2789 if (!hdev->dram_binning) {
2790 prop->dram_binning_mask = 0;
2791 prop->dram_enabled_mask = GAUDI2_DRAM_FULL_MASK;
2792 return;
2793 }
2794
2795 /* set DRAM binning constraints */
2796 prop->faulty_dram_cluster_map |= hdev->dram_binning;
2797 prop->dram_binning_mask = hdev->dram_binning;
2798 prop->dram_enabled_mask = GAUDI2_DRAM_FULL_MASK & ~BIT(HBM_ID5);
2799 }
2800
gaudi2_set_edma_binning_masks(struct hl_device * hdev)2801 static int gaudi2_set_edma_binning_masks(struct hl_device *hdev)
2802 {
2803 struct asic_fixed_properties *prop = &hdev->asic_prop;
2804 struct hw_queue_properties *q_props;
2805 u8 seq, num_faulty;
2806
2807 num_faulty = hweight32(hdev->edma_binning);
2808
2809 /*
2810 * check for error condition in which number of binning candidates
2811 * is higher than the maximum supported by the driver
2812 */
2813 if (num_faulty > MAX_FAULTY_EDMAS) {
2814 dev_err(hdev->dev,
2815 "EDMA binning is supported for max of single faulty EDMA, provided mask 0x%x\n",
2816 hdev->edma_binning);
2817 return -EINVAL;
2818 }
2819
2820 if (!hdev->edma_binning) {
2821 prop->edma_binning_mask = 0;
2822 prop->edma_enabled_mask = GAUDI2_EDMA_FULL_MASK;
2823 return 0;
2824 }
2825
2826 seq = __ffs((unsigned long)hdev->edma_binning);
2827
2828 /* set binning constraints */
2829 prop->faulty_dram_cluster_map |= BIT(edma_to_hbm_cluster[seq]);
2830 prop->edma_binning_mask = hdev->edma_binning;
2831 prop->edma_enabled_mask = GAUDI2_EDMA_FULL_MASK & ~BIT(EDMA_ID_DCORE3_INSTANCE1);
2832
2833 /* bin substitute EDMA's queue */
2834 q_props = prop->hw_queues_props;
2835 q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0].binned = 1;
2836 q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1].binned = 1;
2837 q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2].binned = 1;
2838 q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3].binned = 1;
2839
2840 return 0;
2841 }
2842
gaudi2_set_xbar_edge_enable_mask(struct hl_device * hdev,u32 xbar_edge_iso_mask)2843 static int gaudi2_set_xbar_edge_enable_mask(struct hl_device *hdev, u32 xbar_edge_iso_mask)
2844 {
2845 struct asic_fixed_properties *prop = &hdev->asic_prop;
2846 u8 num_faulty, seq;
2847
2848 /* check if we should override default binning */
2849 if (!xbar_edge_iso_mask) {
2850 prop->xbar_edge_enabled_mask = GAUDI2_XBAR_EDGE_FULL_MASK;
2851 return 0;
2852 }
2853
2854 /*
2855 * note that it can be set to value other than 0 only after cpucp packet (i.e.
2856 * only the FW can set a redundancy value). for user it'll always be 0.
2857 */
2858 num_faulty = hweight32(xbar_edge_iso_mask);
2859
2860 /*
2861 * check for error condition in which number of binning candidates
2862 * is higher than the maximum supported by the driver
2863 */
2864 if (num_faulty > MAX_FAULTY_XBARS) {
2865 dev_err(hdev->dev, "we cannot have more than %d faulty XBAR EDGE\n",
2866 MAX_FAULTY_XBARS);
2867 return -EINVAL;
2868 }
2869
2870 seq = __ffs((unsigned long)xbar_edge_iso_mask);
2871
2872 /* set binning constraints */
2873 prop->faulty_dram_cluster_map |= BIT(xbar_edge_to_hbm_cluster[seq]);
2874 prop->xbar_edge_enabled_mask = (~xbar_edge_iso_mask) & GAUDI2_XBAR_EDGE_FULL_MASK;
2875
2876 return 0;
2877 }
2878
gaudi2_set_cluster_binning_masks_common(struct hl_device * hdev,u8 xbar_edge_iso_mask)2879 static int gaudi2_set_cluster_binning_masks_common(struct hl_device *hdev, u8 xbar_edge_iso_mask)
2880 {
2881 int rc;
2882
2883 /*
2884 * mark all clusters as good, each component will "fail" cluster
2885 * based on eFuse/user values.
2886 * If more than single cluster is faulty- the chip is unusable
2887 */
2888 hdev->asic_prop.faulty_dram_cluster_map = 0;
2889
2890 gaudi2_set_dram_binning_masks(hdev);
2891
2892 rc = gaudi2_set_edma_binning_masks(hdev);
2893 if (rc)
2894 return rc;
2895
2896 rc = gaudi2_set_xbar_edge_enable_mask(hdev, xbar_edge_iso_mask);
2897 if (rc)
2898 return rc;
2899
2900
2901 /* always initially set to full mask */
2902 hdev->asic_prop.hmmu_hif_enabled_mask = GAUDI2_HIF_HMMU_FULL_MASK;
2903
2904 return 0;
2905 }
2906
gaudi2_set_cluster_binning_masks(struct hl_device * hdev)2907 static int gaudi2_set_cluster_binning_masks(struct hl_device *hdev)
2908 {
2909 struct asic_fixed_properties *prop = &hdev->asic_prop;
2910 int rc;
2911
2912 rc = gaudi2_set_cluster_binning_masks_common(hdev, prop->cpucp_info.xbar_binning_mask);
2913 if (rc)
2914 return rc;
2915
2916 /* if we have DRAM binning reported by FW we should perform cluster config */
2917 if (prop->faulty_dram_cluster_map) {
2918 u8 cluster_seq = __ffs((unsigned long)prop->faulty_dram_cluster_map);
2919
2920 prop->hmmu_hif_enabled_mask = cluster_hmmu_hif_enabled_mask[cluster_seq];
2921 }
2922
2923 return 0;
2924 }
2925
gaudi2_set_binning_masks(struct hl_device * hdev)2926 static int gaudi2_set_binning_masks(struct hl_device *hdev)
2927 {
2928 int rc;
2929
2930 rc = gaudi2_set_cluster_binning_masks(hdev);
2931 if (rc)
2932 return rc;
2933
2934 rc = gaudi2_set_tpc_binning_masks(hdev);
2935 if (rc)
2936 return rc;
2937
2938 rc = gaudi2_set_dec_binning_masks(hdev);
2939 if (rc)
2940 return rc;
2941
2942 return 0;
2943 }
2944
gaudi2_cpucp_info_get(struct hl_device * hdev)2945 static int gaudi2_cpucp_info_get(struct hl_device *hdev)
2946 {
2947 struct gaudi2_device *gaudi2 = hdev->asic_specific;
2948 struct asic_fixed_properties *prop = &hdev->asic_prop;
2949 long max_power;
2950 u64 dram_size;
2951 int rc;
2952
2953 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
2954 return 0;
2955
2956 /* No point of asking this information again when not doing hard reset, as the device
2957 * CPU hasn't been reset
2958 */
2959 if (hdev->reset_info.in_compute_reset)
2960 return 0;
2961
2962 rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
2963 mmCPU_BOOT_ERR1);
2964 if (rc)
2965 return rc;
2966
2967 dram_size = le64_to_cpu(prop->cpucp_info.dram_size);
2968 if (dram_size) {
2969 /* we can have wither 5 or 6 HBMs. other values are invalid */
2970
2971 if ((dram_size != ((GAUDI2_HBM_NUM - 1) * SZ_16G)) &&
2972 (dram_size != (GAUDI2_HBM_NUM * SZ_16G))) {
2973 dev_err(hdev->dev,
2974 "F/W reported invalid DRAM size %llu. Trying to use default size %llu\n",
2975 dram_size, prop->dram_size);
2976 dram_size = prop->dram_size;
2977 }
2978
2979 prop->dram_size = dram_size;
2980 prop->dram_end_address = prop->dram_base_address + dram_size;
2981 }
2982
2983 if (!strlen(prop->cpucp_info.card_name))
2984 strscpy_pad(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME,
2985 CARD_NAME_MAX_LEN);
2986
2987 /* Overwrite binning masks with the actual binning values from F/W */
2988 hdev->dram_binning = prop->cpucp_info.dram_binning_mask;
2989 hdev->edma_binning = prop->cpucp_info.edma_binning_mask;
2990 hdev->tpc_binning = le64_to_cpu(prop->cpucp_info.tpc_binning_mask);
2991 hdev->decoder_binning = lower_32_bits(le64_to_cpu(prop->cpucp_info.decoder_binning_mask));
2992
2993 dev_dbg(hdev->dev, "Read binning masks: tpc: 0x%llx, dram: 0x%llx, edma: 0x%x, dec: 0x%x\n",
2994 hdev->tpc_binning, hdev->dram_binning, hdev->edma_binning,
2995 hdev->decoder_binning);
2996
2997 /*
2998 * at this point the DRAM parameters need to be updated according to data obtained
2999 * from the FW
3000 */
3001 rc = hdev->asic_funcs->set_dram_properties(hdev);
3002 if (rc)
3003 return rc;
3004
3005 rc = hdev->asic_funcs->set_binning_masks(hdev);
3006 if (rc)
3007 return rc;
3008
3009 max_power = hl_fw_get_max_power(hdev);
3010 if (max_power < 0)
3011 return max_power;
3012
3013 prop->max_power_default = (u64) max_power;
3014
3015 return 0;
3016 }
3017
gaudi2_fetch_psoc_frequency(struct hl_device * hdev)3018 static int gaudi2_fetch_psoc_frequency(struct hl_device *hdev)
3019 {
3020 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3021 u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS];
3022 int rc;
3023
3024 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
3025 return 0;
3026
3027 rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI2_CPU_PLL, pll_freq_arr);
3028 if (rc)
3029 return rc;
3030
3031 hdev->asic_prop.psoc_timestamp_frequency = pll_freq_arr[3];
3032
3033 return 0;
3034 }
3035
gaudi2_early_init(struct hl_device * hdev)3036 static int gaudi2_early_init(struct hl_device *hdev)
3037 {
3038 struct asic_fixed_properties *prop = &hdev->asic_prop;
3039 struct pci_dev *pdev = hdev->pdev;
3040 resource_size_t pci_bar_size;
3041 int rc;
3042
3043 rc = gaudi2_set_fixed_properties(hdev);
3044 if (rc)
3045 return rc;
3046
3047 /* Check BAR sizes */
3048 pci_bar_size = pci_resource_len(pdev, SRAM_CFG_BAR_ID);
3049
3050 if (pci_bar_size != CFG_BAR_SIZE) {
3051 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
3052 SRAM_CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE);
3053 rc = -ENODEV;
3054 goto free_queue_props;
3055 }
3056
3057 pci_bar_size = pci_resource_len(pdev, MSIX_BAR_ID);
3058 if (pci_bar_size != MSIX_BAR_SIZE) {
3059 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
3060 MSIX_BAR_ID, &pci_bar_size, MSIX_BAR_SIZE);
3061 rc = -ENODEV;
3062 goto free_queue_props;
3063 }
3064
3065 prop->dram_pci_bar_size = pci_resource_len(pdev, DRAM_BAR_ID);
3066 hdev->dram_pci_bar_start = pci_resource_start(pdev, DRAM_BAR_ID);
3067
3068 /*
3069 * Only in pldm driver config iATU
3070 */
3071 if (hdev->pldm)
3072 hdev->asic_prop.iatu_done_by_fw = false;
3073 else
3074 hdev->asic_prop.iatu_done_by_fw = true;
3075
3076 rc = hl_pci_init(hdev);
3077 if (rc)
3078 goto free_queue_props;
3079
3080 /* Before continuing in the initialization, we need to read the preboot
3081 * version to determine whether we run with a security-enabled firmware
3082 */
3083 rc = hl_fw_read_preboot_status(hdev);
3084 if (rc) {
3085 if (hdev->reset_on_preboot_fail)
3086 /* we are already on failure flow, so don't check if hw_fini fails. */
3087 hdev->asic_funcs->hw_fini(hdev, true, false);
3088 goto pci_fini;
3089 }
3090
3091 if (gaudi2_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
3092 dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");
3093 rc = hdev->asic_funcs->hw_fini(hdev, true, false);
3094 if (rc) {
3095 dev_err(hdev->dev, "failed to reset HW in dirty state (%d)\n", rc);
3096 goto pci_fini;
3097 }
3098 }
3099
3100 return 0;
3101
3102 pci_fini:
3103 hl_pci_fini(hdev);
3104 free_queue_props:
3105 kfree(hdev->asic_prop.hw_queues_props);
3106 return rc;
3107 }
3108
gaudi2_early_fini(struct hl_device * hdev)3109 static int gaudi2_early_fini(struct hl_device *hdev)
3110 {
3111 kfree(hdev->asic_prop.hw_queues_props);
3112 hl_pci_fini(hdev);
3113
3114 return 0;
3115 }
3116
gaudi2_is_arc_nic_owned(u64 arc_id)3117 static bool gaudi2_is_arc_nic_owned(u64 arc_id)
3118 {
3119 switch (arc_id) {
3120 case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
3121 return true;
3122 default:
3123 return false;
3124 }
3125 }
3126
gaudi2_is_arc_tpc_owned(u64 arc_id)3127 static bool gaudi2_is_arc_tpc_owned(u64 arc_id)
3128 {
3129 switch (arc_id) {
3130 case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
3131 return true;
3132 default:
3133 return false;
3134 }
3135 }
3136
gaudi2_init_arcs(struct hl_device * hdev)3137 static void gaudi2_init_arcs(struct hl_device *hdev)
3138 {
3139 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3140 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3141 u64 arc_id;
3142 u32 i;
3143
3144 for (i = CPU_ID_SCHED_ARC0 ; i <= CPU_ID_SCHED_ARC3 ; i++) {
3145 if (gaudi2_is_arc_enabled(hdev, i))
3146 continue;
3147
3148 gaudi2_set_arc_id_cap(hdev, i);
3149 }
3150
3151 for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i += 4) {
3152 if (!gaudi2_is_queue_enabled(hdev, i))
3153 continue;
3154
3155 arc_id = gaudi2_queue_id_to_arc_id[i];
3156 if (gaudi2_is_arc_enabled(hdev, arc_id))
3157 continue;
3158
3159 if (gaudi2_is_arc_nic_owned(arc_id) &&
3160 !(hdev->nic_ports_mask & BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0)))
3161 continue;
3162
3163 if (gaudi2_is_arc_tpc_owned(arc_id) && !(gaudi2->tpc_hw_cap_initialized &
3164 BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0)))
3165 continue;
3166
3167 gaudi2_set_arc_id_cap(hdev, arc_id);
3168 }
3169
3170 /* Fetch ARC scratchpad address */
3171 hdev->asic_prop.engine_core_interrupt_reg_addr =
3172 CFG_BASE + le32_to_cpu(dyn_regs->eng_arc_irq_ctrl);
3173 }
3174
gaudi2_scrub_arc_dccm(struct hl_device * hdev,u32 cpu_id)3175 static int gaudi2_scrub_arc_dccm(struct hl_device *hdev, u32 cpu_id)
3176 {
3177 u32 reg_base, reg_val;
3178 int rc;
3179
3180 switch (cpu_id) {
3181 case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC3:
3182 /* Each ARC scheduler has 2 consecutive DCCM blocks */
3183 rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
3184 ARC_DCCM_BLOCK_SIZE * 2, true);
3185 if (rc)
3186 return rc;
3187 break;
3188 case CPU_ID_SCHED_ARC4:
3189 case CPU_ID_SCHED_ARC5:
3190 case CPU_ID_MME_QMAN_ARC0:
3191 case CPU_ID_MME_QMAN_ARC1:
3192 reg_base = gaudi2_arc_blocks_bases[cpu_id];
3193
3194 /* Scrub lower DCCM block */
3195 rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
3196 ARC_DCCM_BLOCK_SIZE, true);
3197 if (rc)
3198 return rc;
3199
3200 /* Switch to upper DCCM block */
3201 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_MME_ARC_UPPER_DCCM_EN_VAL_MASK, 1);
3202 WREG32(reg_base + ARC_DCCM_UPPER_EN_OFFSET, reg_val);
3203
3204 /* Scrub upper DCCM block */
3205 rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
3206 ARC_DCCM_BLOCK_SIZE, true);
3207 if (rc)
3208 return rc;
3209
3210 /* Switch to lower DCCM block */
3211 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_MME_ARC_UPPER_DCCM_EN_VAL_MASK, 0);
3212 WREG32(reg_base + ARC_DCCM_UPPER_EN_OFFSET, reg_val);
3213 break;
3214 default:
3215 rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
3216 ARC_DCCM_BLOCK_SIZE, true);
3217 if (rc)
3218 return rc;
3219 }
3220
3221 return 0;
3222 }
3223
gaudi2_scrub_arcs_dccm(struct hl_device * hdev)3224 static int gaudi2_scrub_arcs_dccm(struct hl_device *hdev)
3225 {
3226 u16 arc_id;
3227 int rc;
3228
3229 for (arc_id = CPU_ID_SCHED_ARC0 ; arc_id < CPU_ID_MAX ; arc_id++) {
3230 if (!gaudi2_is_arc_enabled(hdev, arc_id))
3231 continue;
3232
3233 rc = gaudi2_scrub_arc_dccm(hdev, arc_id);
3234 if (rc)
3235 return rc;
3236 }
3237
3238 return 0;
3239 }
3240
gaudi2_late_init(struct hl_device * hdev)3241 static int gaudi2_late_init(struct hl_device *hdev)
3242 {
3243 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3244 int rc;
3245
3246 hdev->asic_prop.supports_advanced_cpucp_rc = true;
3247
3248 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS,
3249 gaudi2->virt_msix_db_dma_addr);
3250 if (rc) {
3251 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
3252 return rc;
3253 }
3254
3255 rc = gaudi2_fetch_psoc_frequency(hdev);
3256 if (rc) {
3257 dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
3258 goto disable_pci_access;
3259 }
3260
3261 gaudi2_init_arcs(hdev);
3262
3263 rc = gaudi2_scrub_arcs_dccm(hdev);
3264 if (rc) {
3265 dev_err(hdev->dev, "Failed to scrub arcs DCCM\n");
3266 goto disable_pci_access;
3267 }
3268
3269 gaudi2_init_security(hdev);
3270
3271 return 0;
3272
3273 disable_pci_access:
3274 hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
3275
3276 return rc;
3277 }
3278
gaudi2_late_fini(struct hl_device * hdev)3279 static void gaudi2_late_fini(struct hl_device *hdev)
3280 {
3281 hl_hwmon_release_resources(hdev);
3282 }
3283
gaudi2_user_mapped_dec_init(struct gaudi2_device * gaudi2,u32 start_idx)3284 static void gaudi2_user_mapped_dec_init(struct gaudi2_device *gaudi2, u32 start_idx)
3285 {
3286 struct user_mapped_block *blocks = gaudi2->mapped_blocks;
3287
3288 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE0_DEC0_CMD_BASE, HL_BLOCK_SIZE);
3289 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE0_DEC1_CMD_BASE, HL_BLOCK_SIZE);
3290 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE1_DEC0_CMD_BASE, HL_BLOCK_SIZE);
3291 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE1_DEC1_CMD_BASE, HL_BLOCK_SIZE);
3292 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE2_DEC0_CMD_BASE, HL_BLOCK_SIZE);
3293 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE2_DEC1_CMD_BASE, HL_BLOCK_SIZE);
3294 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE3_DEC0_CMD_BASE, HL_BLOCK_SIZE);
3295 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE3_DEC1_CMD_BASE, HL_BLOCK_SIZE);
3296 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmPCIE_DEC0_CMD_BASE, HL_BLOCK_SIZE);
3297 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx], mmPCIE_DEC1_CMD_BASE, HL_BLOCK_SIZE);
3298 }
3299
gaudi2_user_mapped_blocks_init(struct hl_device * hdev)3300 static void gaudi2_user_mapped_blocks_init(struct hl_device *hdev)
3301 {
3302 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3303 struct user_mapped_block *blocks = gaudi2->mapped_blocks;
3304 u32 block_size, umr_start_idx, num_umr_blocks;
3305 int i;
3306
3307 for (i = 0 ; i < NUM_ARC_CPUS ; i++) {
3308 if (i >= CPU_ID_SCHED_ARC0 && i <= CPU_ID_SCHED_ARC3)
3309 block_size = ARC_DCCM_BLOCK_SIZE * 2;
3310 else
3311 block_size = ARC_DCCM_BLOCK_SIZE;
3312
3313 blocks[i].address = gaudi2_arc_dccm_bases[i];
3314 blocks[i].size = block_size;
3315 }
3316
3317 blocks[NUM_ARC_CPUS].address = mmARC_FARM_ARC0_ACP_ENG_BASE;
3318 blocks[NUM_ARC_CPUS].size = HL_BLOCK_SIZE;
3319
3320 blocks[NUM_ARC_CPUS + 1].address = mmARC_FARM_ARC1_ACP_ENG_BASE;
3321 blocks[NUM_ARC_CPUS + 1].size = HL_BLOCK_SIZE;
3322
3323 blocks[NUM_ARC_CPUS + 2].address = mmARC_FARM_ARC2_ACP_ENG_BASE;
3324 blocks[NUM_ARC_CPUS + 2].size = HL_BLOCK_SIZE;
3325
3326 blocks[NUM_ARC_CPUS + 3].address = mmARC_FARM_ARC3_ACP_ENG_BASE;
3327 blocks[NUM_ARC_CPUS + 3].size = HL_BLOCK_SIZE;
3328
3329 blocks[NUM_ARC_CPUS + 4].address = mmDCORE0_MME_QM_ARC_ACP_ENG_BASE;
3330 blocks[NUM_ARC_CPUS + 4].size = HL_BLOCK_SIZE;
3331
3332 blocks[NUM_ARC_CPUS + 5].address = mmDCORE1_MME_QM_ARC_ACP_ENG_BASE;
3333 blocks[NUM_ARC_CPUS + 5].size = HL_BLOCK_SIZE;
3334
3335 blocks[NUM_ARC_CPUS + 6].address = mmDCORE2_MME_QM_ARC_ACP_ENG_BASE;
3336 blocks[NUM_ARC_CPUS + 6].size = HL_BLOCK_SIZE;
3337
3338 blocks[NUM_ARC_CPUS + 7].address = mmDCORE3_MME_QM_ARC_ACP_ENG_BASE;
3339 blocks[NUM_ARC_CPUS + 7].size = HL_BLOCK_SIZE;
3340
3341 umr_start_idx = NUM_ARC_CPUS + NUM_OF_USER_ACP_BLOCKS;
3342 num_umr_blocks = NIC_NUMBER_OF_ENGINES * NUM_OF_USER_NIC_UMR_BLOCKS;
3343 for (i = 0 ; i < num_umr_blocks ; i++) {
3344 u8 nic_id, umr_block_id;
3345
3346 nic_id = i / NUM_OF_USER_NIC_UMR_BLOCKS;
3347 umr_block_id = i % NUM_OF_USER_NIC_UMR_BLOCKS;
3348
3349 blocks[umr_start_idx + i].address =
3350 mmNIC0_UMR0_0_UNSECURE_DOORBELL0_BASE +
3351 (nic_id / NIC_NUMBER_OF_QM_PER_MACRO) * NIC_OFFSET +
3352 (nic_id % NIC_NUMBER_OF_QM_PER_MACRO) * NIC_QM_OFFSET +
3353 umr_block_id * NIC_UMR_OFFSET;
3354 blocks[umr_start_idx + i].size = HL_BLOCK_SIZE;
3355 }
3356
3357 /* Expose decoder HW configuration block to user */
3358 gaudi2_user_mapped_dec_init(gaudi2, USR_MAPPED_BLK_DEC_START_IDX);
3359
3360 for (i = 1; i < NUM_OF_DCORES; ++i) {
3361 blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1)].size = SM_OBJS_BLOCK_SIZE;
3362 blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1) + 1].size = HL_BLOCK_SIZE;
3363
3364 blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1)].address =
3365 mmDCORE0_SYNC_MNGR_OBJS_BASE + i * DCORE_OFFSET;
3366
3367 blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1) + 1].address =
3368 mmDCORE0_SYNC_MNGR_GLBL_BASE + i * DCORE_OFFSET;
3369 }
3370 }
3371
gaudi2_alloc_cpu_accessible_dma_mem(struct hl_device * hdev)3372 static int gaudi2_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
3373 {
3374 dma_addr_t dma_addr_arr[GAUDI2_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
3375 void *virt_addr_arr[GAUDI2_ALLOC_CPU_MEM_RETRY_CNT] = {};
3376 int i, j, rc = 0;
3377
3378 /* The device ARC works with 32-bits addresses, and because there is a single HW register
3379 * that holds the extension bits (49..28), these bits must be identical in all the allocated
3380 * range.
3381 */
3382
3383 for (i = 0 ; i < GAUDI2_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
3384 virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE,
3385 &dma_addr_arr[i], GFP_KERNEL | __GFP_ZERO);
3386 if (!virt_addr_arr[i]) {
3387 rc = -ENOMEM;
3388 goto free_dma_mem_arr;
3389 }
3390
3391 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
3392 if (GAUDI2_ARC_PCI_MSB_ADDR(dma_addr_arr[i]) == GAUDI2_ARC_PCI_MSB_ADDR(end_addr))
3393 break;
3394 }
3395
3396 if (i == GAUDI2_ALLOC_CPU_MEM_RETRY_CNT) {
3397 dev_err(hdev->dev,
3398 "MSB of ARC accessible DMA memory are not identical in all range\n");
3399 rc = -EFAULT;
3400 goto free_dma_mem_arr;
3401 }
3402
3403 hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
3404 hdev->cpu_accessible_dma_address = dma_addr_arr[i];
3405
3406 free_dma_mem_arr:
3407 for (j = 0 ; j < i ; j++)
3408 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j],
3409 dma_addr_arr[j]);
3410
3411 return rc;
3412 }
3413
gaudi2_set_pci_memory_regions(struct hl_device * hdev)3414 static void gaudi2_set_pci_memory_regions(struct hl_device *hdev)
3415 {
3416 struct asic_fixed_properties *prop = &hdev->asic_prop;
3417 struct pci_mem_region *region;
3418
3419 /* CFG */
3420 region = &hdev->pci_mem_region[PCI_REGION_CFG];
3421 region->region_base = CFG_BASE;
3422 region->region_size = CFG_SIZE;
3423 region->offset_in_bar = CFG_BASE - STM_FLASH_BASE_ADDR;
3424 region->bar_size = CFG_BAR_SIZE;
3425 region->bar_id = SRAM_CFG_BAR_ID;
3426 region->used = 1;
3427
3428 /* SRAM */
3429 region = &hdev->pci_mem_region[PCI_REGION_SRAM];
3430 region->region_base = SRAM_BASE_ADDR;
3431 region->region_size = SRAM_SIZE;
3432 region->offset_in_bar = CFG_REGION_SIZE + BAR0_RSRVD_SIZE;
3433 region->bar_size = CFG_BAR_SIZE;
3434 region->bar_id = SRAM_CFG_BAR_ID;
3435 region->used = 1;
3436
3437 /* DRAM */
3438 region = &hdev->pci_mem_region[PCI_REGION_DRAM];
3439 region->region_base = DRAM_PHYS_BASE;
3440 region->region_size = hdev->asic_prop.dram_size;
3441 region->offset_in_bar = 0;
3442 region->bar_size = prop->dram_pci_bar_size;
3443 region->bar_id = DRAM_BAR_ID;
3444 region->used = 1;
3445 }
3446
gaudi2_user_interrupt_setup(struct hl_device * hdev)3447 static void gaudi2_user_interrupt_setup(struct hl_device *hdev)
3448 {
3449 struct asic_fixed_properties *prop = &hdev->asic_prop;
3450 int i, j, k;
3451
3452 /* Initialize TPC interrupt */
3453 HL_USR_INTR_STRUCT_INIT(hdev->tpc_interrupt, hdev, 0, HL_USR_INTERRUPT_TPC);
3454
3455 /* Initialize unexpected error interrupt */
3456 HL_USR_INTR_STRUCT_INIT(hdev->unexpected_error_interrupt, hdev, 0,
3457 HL_USR_INTERRUPT_UNEXPECTED);
3458
3459 /* Initialize common user CQ interrupt */
3460 HL_USR_INTR_STRUCT_INIT(hdev->common_user_cq_interrupt, hdev,
3461 HL_COMMON_USER_CQ_INTERRUPT_ID, HL_USR_INTERRUPT_CQ);
3462
3463 /* Initialize common decoder interrupt */
3464 HL_USR_INTR_STRUCT_INIT(hdev->common_decoder_interrupt, hdev,
3465 HL_COMMON_DEC_INTERRUPT_ID, HL_USR_INTERRUPT_DECODER);
3466
3467 /* User interrupts structure holds both decoder and user interrupts from various engines.
3468 * We first initialize the decoder interrupts and then we add the user interrupts.
3469 * The only limitation is that the last decoder interrupt id must be smaller
3470 * then GAUDI2_IRQ_NUM_USER_FIRST. This is checked at compilation time.
3471 */
3472
3473 /* Initialize decoder interrupts, expose only normal interrupts,
3474 * error interrupts to be handled by driver
3475 */
3476 for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM, j = 0 ; i <= GAUDI2_IRQ_NUM_SHARED_DEC1_NRM;
3477 i += 2, j++)
3478 HL_USR_INTR_STRUCT_INIT(hdev->user_interrupt[j], hdev, i,
3479 HL_USR_INTERRUPT_DECODER);
3480
3481 for (i = GAUDI2_IRQ_NUM_USER_FIRST, k = 0 ; k < prop->user_interrupt_count; i++, j++, k++)
3482 HL_USR_INTR_STRUCT_INIT(hdev->user_interrupt[j], hdev, i, HL_USR_INTERRUPT_CQ);
3483 }
3484
gaudi2_get_non_zero_random_int(void)3485 static inline int gaudi2_get_non_zero_random_int(void)
3486 {
3487 int rand = get_random_u32();
3488
3489 return rand ? rand : 1;
3490 }
3491
gaudi2_special_blocks_free(struct hl_device * hdev)3492 static void gaudi2_special_blocks_free(struct hl_device *hdev)
3493 {
3494 struct asic_fixed_properties *prop = &hdev->asic_prop;
3495 struct hl_skip_blocks_cfg *skip_special_blocks_cfg =
3496 &prop->skip_special_blocks_cfg;
3497
3498 kfree(prop->special_blocks);
3499 kfree(skip_special_blocks_cfg->block_types);
3500 kfree(skip_special_blocks_cfg->block_ranges);
3501 }
3502
gaudi2_special_blocks_iterator_free(struct hl_device * hdev)3503 static void gaudi2_special_blocks_iterator_free(struct hl_device *hdev)
3504 {
3505 gaudi2_special_blocks_free(hdev);
3506 }
3507
gaudi2_special_block_skip(struct hl_device * hdev,struct hl_special_blocks_cfg * special_blocks_cfg,u32 blk_idx,u32 major,u32 minor,u32 sub_minor)3508 static bool gaudi2_special_block_skip(struct hl_device *hdev,
3509 struct hl_special_blocks_cfg *special_blocks_cfg,
3510 u32 blk_idx, u32 major, u32 minor, u32 sub_minor)
3511 {
3512 return false;
3513 }
3514
gaudi2_special_blocks_config(struct hl_device * hdev)3515 static int gaudi2_special_blocks_config(struct hl_device *hdev)
3516 {
3517 struct asic_fixed_properties *prop = &hdev->asic_prop;
3518 int i, rc;
3519
3520 /* Configure Special blocks */
3521 prop->glbl_err_cause_num = GAUDI2_NUM_OF_GLBL_ERR_CAUSE;
3522 prop->num_of_special_blocks = ARRAY_SIZE(gaudi2_special_blocks);
3523 prop->special_blocks = kmalloc_array(prop->num_of_special_blocks,
3524 sizeof(*prop->special_blocks), GFP_KERNEL);
3525 if (!prop->special_blocks)
3526 return -ENOMEM;
3527
3528 for (i = 0 ; i < prop->num_of_special_blocks ; i++)
3529 memcpy(&prop->special_blocks[i], &gaudi2_special_blocks[i],
3530 sizeof(*prop->special_blocks));
3531
3532 /* Configure when to skip Special blocks */
3533 memset(&prop->skip_special_blocks_cfg, 0, sizeof(prop->skip_special_blocks_cfg));
3534 prop->skip_special_blocks_cfg.skip_block_hook = gaudi2_special_block_skip;
3535
3536 if (ARRAY_SIZE(gaudi2_iterator_skip_block_types)) {
3537 prop->skip_special_blocks_cfg.block_types =
3538 kmalloc_array(ARRAY_SIZE(gaudi2_iterator_skip_block_types),
3539 sizeof(gaudi2_iterator_skip_block_types[0]), GFP_KERNEL);
3540 if (!prop->skip_special_blocks_cfg.block_types) {
3541 rc = -ENOMEM;
3542 goto free_special_blocks;
3543 }
3544
3545 memcpy(prop->skip_special_blocks_cfg.block_types, gaudi2_iterator_skip_block_types,
3546 sizeof(gaudi2_iterator_skip_block_types));
3547
3548 prop->skip_special_blocks_cfg.block_types_len =
3549 ARRAY_SIZE(gaudi2_iterator_skip_block_types);
3550 }
3551
3552 if (ARRAY_SIZE(gaudi2_iterator_skip_block_ranges)) {
3553 prop->skip_special_blocks_cfg.block_ranges =
3554 kmalloc_array(ARRAY_SIZE(gaudi2_iterator_skip_block_ranges),
3555 sizeof(gaudi2_iterator_skip_block_ranges[0]), GFP_KERNEL);
3556 if (!prop->skip_special_blocks_cfg.block_ranges) {
3557 rc = -ENOMEM;
3558 goto free_skip_special_blocks_types;
3559 }
3560
3561 for (i = 0 ; i < ARRAY_SIZE(gaudi2_iterator_skip_block_ranges) ; i++)
3562 memcpy(&prop->skip_special_blocks_cfg.block_ranges[i],
3563 &gaudi2_iterator_skip_block_ranges[i],
3564 sizeof(struct range));
3565
3566 prop->skip_special_blocks_cfg.block_ranges_len =
3567 ARRAY_SIZE(gaudi2_iterator_skip_block_ranges);
3568 }
3569
3570 return 0;
3571
3572 free_skip_special_blocks_types:
3573 kfree(prop->skip_special_blocks_cfg.block_types);
3574 free_special_blocks:
3575 kfree(prop->special_blocks);
3576
3577 return rc;
3578 }
3579
gaudi2_special_blocks_iterator_config(struct hl_device * hdev)3580 static int gaudi2_special_blocks_iterator_config(struct hl_device *hdev)
3581 {
3582 return gaudi2_special_blocks_config(hdev);
3583 }
3584
gaudi2_test_queues_msgs_free(struct hl_device * hdev)3585 static void gaudi2_test_queues_msgs_free(struct hl_device *hdev)
3586 {
3587 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3588 struct gaudi2_queues_test_info *msg_info = gaudi2->queues_test_info;
3589 int i;
3590
3591 for (i = 0 ; i < GAUDI2_NUM_TESTED_QS ; i++) {
3592 /* bail-out if this is an allocation failure point */
3593 if (!msg_info[i].kern_addr)
3594 break;
3595
3596 hl_asic_dma_pool_free(hdev, msg_info[i].kern_addr, msg_info[i].dma_addr);
3597 msg_info[i].kern_addr = NULL;
3598 }
3599 }
3600
gaudi2_test_queues_msgs_alloc(struct hl_device * hdev)3601 static int gaudi2_test_queues_msgs_alloc(struct hl_device *hdev)
3602 {
3603 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3604 struct gaudi2_queues_test_info *msg_info = gaudi2->queues_test_info;
3605 int i, rc;
3606
3607 /* allocate a message-short buf for each Q we intend to test */
3608 for (i = 0 ; i < GAUDI2_NUM_TESTED_QS ; i++) {
3609 msg_info[i].kern_addr =
3610 (void *)hl_asic_dma_pool_zalloc(hdev, sizeof(struct packet_msg_short),
3611 GFP_KERNEL, &msg_info[i].dma_addr);
3612 if (!msg_info[i].kern_addr) {
3613 dev_err(hdev->dev,
3614 "Failed to allocate dma memory for H/W queue %d testing\n", i);
3615 rc = -ENOMEM;
3616 goto err_exit;
3617 }
3618 }
3619
3620 return 0;
3621
3622 err_exit:
3623 gaudi2_test_queues_msgs_free(hdev);
3624 return rc;
3625 }
3626
gaudi2_sw_init(struct hl_device * hdev)3627 static int gaudi2_sw_init(struct hl_device *hdev)
3628 {
3629 struct asic_fixed_properties *prop = &hdev->asic_prop;
3630 struct gaudi2_device *gaudi2;
3631 int i, rc;
3632
3633 /* Allocate device structure */
3634 gaudi2 = kzalloc(sizeof(*gaudi2), GFP_KERNEL);
3635 if (!gaudi2)
3636 return -ENOMEM;
3637
3638 for (i = 0 ; i < ARRAY_SIZE(gaudi2_irq_map_table) ; i++) {
3639 if (gaudi2_irq_map_table[i].msg || !gaudi2_irq_map_table[i].valid)
3640 continue;
3641
3642 if (gaudi2->num_of_valid_hw_events == GAUDI2_EVENT_SIZE) {
3643 dev_err(hdev->dev, "H/W events array exceeds the limit of %u events\n",
3644 GAUDI2_EVENT_SIZE);
3645 rc = -EINVAL;
3646 goto free_gaudi2_device;
3647 }
3648
3649 gaudi2->hw_events[gaudi2->num_of_valid_hw_events++] = gaudi2_irq_map_table[i].fc_id;
3650 }
3651
3652 for (i = 0 ; i < MME_NUM_OF_LFSR_SEEDS ; i++)
3653 gaudi2->lfsr_rand_seeds[i] = gaudi2_get_non_zero_random_int();
3654
3655 gaudi2->cpucp_info_get = gaudi2_cpucp_info_get;
3656
3657 hdev->asic_specific = gaudi2;
3658
3659 /* Create DMA pool for small allocations.
3660 * Use DEVICE_CACHE_LINE_SIZE for alignment since the NIC memory-mapped
3661 * PI/CI registers allocated from this pool have this restriction
3662 */
3663 hdev->dma_pool = dma_pool_create(dev_name(hdev->dev), &hdev->pdev->dev,
3664 GAUDI2_DMA_POOL_BLK_SIZE, DEVICE_CACHE_LINE_SIZE, 0);
3665 if (!hdev->dma_pool) {
3666 dev_err(hdev->dev, "failed to create DMA pool\n");
3667 rc = -ENOMEM;
3668 goto free_gaudi2_device;
3669 }
3670
3671 rc = gaudi2_alloc_cpu_accessible_dma_mem(hdev);
3672 if (rc)
3673 goto free_dma_pool;
3674
3675 hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
3676 if (!hdev->cpu_accessible_dma_pool) {
3677 dev_err(hdev->dev, "Failed to create CPU accessible DMA pool\n");
3678 rc = -ENOMEM;
3679 goto free_cpu_dma_mem;
3680 }
3681
3682 rc = gen_pool_add(hdev->cpu_accessible_dma_pool, (uintptr_t) hdev->cpu_accessible_dma_mem,
3683 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
3684 if (rc) {
3685 dev_err(hdev->dev, "Failed to add memory to CPU accessible DMA pool\n");
3686 rc = -EFAULT;
3687 goto free_cpu_accessible_dma_pool;
3688 }
3689
3690 gaudi2->virt_msix_db_cpu_addr = hl_cpu_accessible_dma_pool_alloc(hdev, prop->pmmu.page_size,
3691 &gaudi2->virt_msix_db_dma_addr);
3692 if (!gaudi2->virt_msix_db_cpu_addr) {
3693 dev_err(hdev->dev, "Failed to allocate DMA memory for virtual MSI-X doorbell\n");
3694 rc = -ENOMEM;
3695 goto free_cpu_accessible_dma_pool;
3696 }
3697
3698 spin_lock_init(&gaudi2->hw_queues_lock);
3699
3700 gaudi2->scratchpad_kernel_address = hl_asic_dma_alloc_coherent(hdev, PAGE_SIZE,
3701 &gaudi2->scratchpad_bus_address,
3702 GFP_KERNEL | __GFP_ZERO);
3703 if (!gaudi2->scratchpad_kernel_address) {
3704 rc = -ENOMEM;
3705 goto free_virt_msix_db_mem;
3706 }
3707
3708 gaudi2_user_mapped_blocks_init(hdev);
3709
3710 /* Initialize user interrupts */
3711 gaudi2_user_interrupt_setup(hdev);
3712
3713 hdev->supports_coresight = true;
3714 hdev->supports_sync_stream = true;
3715 hdev->supports_cb_mapping = true;
3716 hdev->supports_wait_for_multi_cs = false;
3717
3718 prop->supports_compute_reset = true;
3719
3720 /* Event queue sanity check added in FW version 1.11 */
3721 if (hl_is_fw_sw_ver_below(hdev, 1, 11))
3722 hdev->event_queue.check_eqe_index = false;
3723 else
3724 hdev->event_queue.check_eqe_index = true;
3725
3726 hdev->asic_funcs->set_pci_memory_regions(hdev);
3727
3728 rc = gaudi2_special_blocks_iterator_config(hdev);
3729 if (rc)
3730 goto free_scratchpad_mem;
3731
3732 rc = gaudi2_test_queues_msgs_alloc(hdev);
3733 if (rc)
3734 goto special_blocks_free;
3735
3736 return 0;
3737
3738 special_blocks_free:
3739 gaudi2_special_blocks_iterator_free(hdev);
3740 free_scratchpad_mem:
3741 hl_asic_dma_free_coherent(hdev, PAGE_SIZE, gaudi2->scratchpad_kernel_address,
3742 gaudi2->scratchpad_bus_address);
3743 free_virt_msix_db_mem:
3744 hl_cpu_accessible_dma_pool_free(hdev, prop->pmmu.page_size, gaudi2->virt_msix_db_cpu_addr);
3745 free_cpu_accessible_dma_pool:
3746 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
3747 free_cpu_dma_mem:
3748 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
3749 hdev->cpu_accessible_dma_address);
3750 free_dma_pool:
3751 dma_pool_destroy(hdev->dma_pool);
3752 free_gaudi2_device:
3753 kfree(gaudi2);
3754 return rc;
3755 }
3756
gaudi2_sw_fini(struct hl_device * hdev)3757 static int gaudi2_sw_fini(struct hl_device *hdev)
3758 {
3759 struct asic_fixed_properties *prop = &hdev->asic_prop;
3760 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3761
3762 gaudi2_test_queues_msgs_free(hdev);
3763
3764 gaudi2_special_blocks_iterator_free(hdev);
3765
3766 hl_cpu_accessible_dma_pool_free(hdev, prop->pmmu.page_size, gaudi2->virt_msix_db_cpu_addr);
3767
3768 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
3769
3770 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
3771 hdev->cpu_accessible_dma_address);
3772
3773 hl_asic_dma_free_coherent(hdev, PAGE_SIZE, gaudi2->scratchpad_kernel_address,
3774 gaudi2->scratchpad_bus_address);
3775
3776 dma_pool_destroy(hdev->dma_pool);
3777
3778 kfree(gaudi2);
3779
3780 return 0;
3781 }
3782
gaudi2_stop_qman_common(struct hl_device * hdev,u32 reg_base)3783 static void gaudi2_stop_qman_common(struct hl_device *hdev, u32 reg_base)
3784 {
3785 WREG32(reg_base + QM_GLBL_CFG1_OFFSET, QM_GLBL_CFG1_PQF_STOP |
3786 QM_GLBL_CFG1_CQF_STOP |
3787 QM_GLBL_CFG1_CP_STOP);
3788
3789 /* stop also the ARC */
3790 WREG32(reg_base + QM_GLBL_CFG2_OFFSET, QM_GLBL_CFG2_ARC_CQF_STOP);
3791 }
3792
gaudi2_flush_qman_common(struct hl_device * hdev,u32 reg_base)3793 static void gaudi2_flush_qman_common(struct hl_device *hdev, u32 reg_base)
3794 {
3795 WREG32(reg_base + QM_GLBL_CFG1_OFFSET, QM_GLBL_CFG1_PQF_FLUSH |
3796 QM_GLBL_CFG1_CQF_FLUSH |
3797 QM_GLBL_CFG1_CP_FLUSH);
3798 }
3799
gaudi2_flush_qman_arc_common(struct hl_device * hdev,u32 reg_base)3800 static void gaudi2_flush_qman_arc_common(struct hl_device *hdev, u32 reg_base)
3801 {
3802 WREG32(reg_base + QM_GLBL_CFG2_OFFSET, QM_GLBL_CFG2_ARC_CQF_FLUSH);
3803 }
3804
3805 /**
3806 * gaudi2_clear_qm_fence_counters_common - clear QM's fence counters
3807 *
3808 * @hdev: pointer to the habanalabs device structure
3809 * @queue_id: queue to clear fence counters to
3810 * @skip_fence: if true set maximum fence value to all fence counters to avoid
3811 * getting stuck on any fence value. otherwise set all fence
3812 * counters to 0 (standard clear of fence counters)
3813 */
gaudi2_clear_qm_fence_counters_common(struct hl_device * hdev,u32 queue_id,bool skip_fence)3814 static void gaudi2_clear_qm_fence_counters_common(struct hl_device *hdev, u32 queue_id,
3815 bool skip_fence)
3816 {
3817 u32 size, reg_base;
3818 u32 addr, val;
3819
3820 reg_base = gaudi2_qm_blocks_bases[queue_id];
3821
3822 addr = reg_base + QM_CP_FENCE0_CNT_0_OFFSET;
3823 size = mmPDMA0_QM_CP_BARRIER_CFG - mmPDMA0_QM_CP_FENCE0_CNT_0;
3824
3825 /*
3826 * in case we want to make sure that QM that is stuck on a fence will
3827 * be released we should set the fence counter to a higher value that
3828 * the value the QM waiting for. to comply with any fence counter of
3829 * any value we set maximum fence value to all counters
3830 */
3831 val = skip_fence ? U32_MAX : 0;
3832 gaudi2_memset_device_lbw(hdev, addr, size, val);
3833 }
3834
gaudi2_qman_manual_flush_common(struct hl_device * hdev,u32 queue_id)3835 static void gaudi2_qman_manual_flush_common(struct hl_device *hdev, u32 queue_id)
3836 {
3837 u32 reg_base = gaudi2_qm_blocks_bases[queue_id];
3838
3839 gaudi2_clear_qm_fence_counters_common(hdev, queue_id, true);
3840 gaudi2_flush_qman_common(hdev, reg_base);
3841 gaudi2_flush_qman_arc_common(hdev, reg_base);
3842 }
3843
gaudi2_stop_dma_qmans(struct hl_device * hdev)3844 static void gaudi2_stop_dma_qmans(struct hl_device *hdev)
3845 {
3846 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3847 int dcore, inst;
3848
3849 if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK))
3850 goto stop_edma_qmans;
3851
3852 /* Stop CPs of PDMA QMANs */
3853 gaudi2_stop_qman_common(hdev, mmPDMA0_QM_BASE);
3854 gaudi2_stop_qman_common(hdev, mmPDMA1_QM_BASE);
3855
3856 stop_edma_qmans:
3857 if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
3858 return;
3859
3860 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
3861 for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
3862 u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
3863 u32 qm_base;
3864
3865 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq)))
3866 continue;
3867
3868 qm_base = mmDCORE0_EDMA0_QM_BASE + dcore * DCORE_OFFSET +
3869 inst * DCORE_EDMA_OFFSET;
3870
3871 /* Stop CPs of EDMA QMANs */
3872 gaudi2_stop_qman_common(hdev, qm_base);
3873 }
3874 }
3875 }
3876
gaudi2_stop_mme_qmans(struct hl_device * hdev)3877 static void gaudi2_stop_mme_qmans(struct hl_device *hdev)
3878 {
3879 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3880 u32 offset, i;
3881
3882 offset = mmDCORE1_MME_QM_BASE - mmDCORE0_MME_QM_BASE;
3883
3884 for (i = 0 ; i < NUM_OF_DCORES ; i++) {
3885 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i)))
3886 continue;
3887
3888 gaudi2_stop_qman_common(hdev, mmDCORE0_MME_QM_BASE + (i * offset));
3889 }
3890 }
3891
gaudi2_stop_tpc_qmans(struct hl_device * hdev)3892 static void gaudi2_stop_tpc_qmans(struct hl_device *hdev)
3893 {
3894 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3895 u32 reg_base;
3896 int i;
3897
3898 if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
3899 return;
3900
3901 for (i = 0 ; i < TPC_ID_SIZE ; i++) {
3902 if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i)))
3903 continue;
3904
3905 reg_base = gaudi2_qm_blocks_bases[gaudi2_tpc_id_to_queue_id[i]];
3906 gaudi2_stop_qman_common(hdev, reg_base);
3907 }
3908 }
3909
gaudi2_stop_rot_qmans(struct hl_device * hdev)3910 static void gaudi2_stop_rot_qmans(struct hl_device *hdev)
3911 {
3912 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3913 u32 reg_base;
3914 int i;
3915
3916 if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK))
3917 return;
3918
3919 for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) {
3920 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i)))
3921 continue;
3922
3923 reg_base = gaudi2_qm_blocks_bases[gaudi2_rot_id_to_queue_id[i]];
3924 gaudi2_stop_qman_common(hdev, reg_base);
3925 }
3926 }
3927
gaudi2_stop_nic_qmans(struct hl_device * hdev)3928 static void gaudi2_stop_nic_qmans(struct hl_device *hdev)
3929 {
3930 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3931 u32 reg_base, queue_id;
3932 int i;
3933
3934 if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK))
3935 return;
3936
3937 queue_id = GAUDI2_QUEUE_ID_NIC_0_0;
3938
3939 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
3940 if (!(hdev->nic_ports_mask & BIT(i)))
3941 continue;
3942
3943 reg_base = gaudi2_qm_blocks_bases[queue_id];
3944 gaudi2_stop_qman_common(hdev, reg_base);
3945 }
3946 }
3947
gaudi2_stall_dma_common(struct hl_device * hdev,u32 reg_base)3948 static void gaudi2_stall_dma_common(struct hl_device *hdev, u32 reg_base)
3949 {
3950 u32 reg_val;
3951
3952 reg_val = FIELD_PREP(PDMA0_CORE_CFG_1_HALT_MASK, 0x1);
3953 WREG32(reg_base + DMA_CORE_CFG_1_OFFSET, reg_val);
3954 }
3955
gaudi2_dma_stall(struct hl_device * hdev)3956 static void gaudi2_dma_stall(struct hl_device *hdev)
3957 {
3958 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3959 int dcore, inst;
3960
3961 if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK))
3962 goto stall_edma;
3963
3964 gaudi2_stall_dma_common(hdev, mmPDMA0_CORE_BASE);
3965 gaudi2_stall_dma_common(hdev, mmPDMA1_CORE_BASE);
3966
3967 stall_edma:
3968 if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
3969 return;
3970
3971 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
3972 for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
3973 u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
3974 u32 core_base;
3975
3976 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq)))
3977 continue;
3978
3979 core_base = mmDCORE0_EDMA0_CORE_BASE + dcore * DCORE_OFFSET +
3980 inst * DCORE_EDMA_OFFSET;
3981
3982 /* Stall CPs of EDMA QMANs */
3983 gaudi2_stall_dma_common(hdev, core_base);
3984 }
3985 }
3986 }
3987
gaudi2_mme_stall(struct hl_device * hdev)3988 static void gaudi2_mme_stall(struct hl_device *hdev)
3989 {
3990 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3991 u32 offset, i;
3992
3993 offset = mmDCORE1_MME_CTRL_LO_QM_STALL - mmDCORE0_MME_CTRL_LO_QM_STALL;
3994
3995 for (i = 0 ; i < NUM_OF_DCORES ; i++)
3996 if (gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i))
3997 WREG32(mmDCORE0_MME_CTRL_LO_QM_STALL + (i * offset), 1);
3998 }
3999
gaudi2_tpc_stall(struct hl_device * hdev)4000 static void gaudi2_tpc_stall(struct hl_device *hdev)
4001 {
4002 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4003 u32 reg_base;
4004 int i;
4005
4006 if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
4007 return;
4008
4009 for (i = 0 ; i < TPC_ID_SIZE ; i++) {
4010 if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i)))
4011 continue;
4012
4013 reg_base = gaudi2_tpc_cfg_blocks_bases[i];
4014 WREG32(reg_base + TPC_CFG_STALL_OFFSET, 1);
4015 }
4016 }
4017
gaudi2_rotator_stall(struct hl_device * hdev)4018 static void gaudi2_rotator_stall(struct hl_device *hdev)
4019 {
4020 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4021 u32 reg_val;
4022 int i;
4023
4024 if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK))
4025 return;
4026
4027 reg_val = FIELD_PREP(ROT_MSS_HALT_WBC_MASK, 0x1) |
4028 FIELD_PREP(ROT_MSS_HALT_RSB_MASK, 0x1) |
4029 FIELD_PREP(ROT_MSS_HALT_MRSB_MASK, 0x1);
4030
4031 for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) {
4032 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i)))
4033 continue;
4034
4035 WREG32(mmROT0_MSS_HALT + i * ROT_OFFSET, reg_val);
4036 }
4037 }
4038
gaudi2_disable_qman_common(struct hl_device * hdev,u32 reg_base)4039 static void gaudi2_disable_qman_common(struct hl_device *hdev, u32 reg_base)
4040 {
4041 WREG32(reg_base + QM_GLBL_CFG0_OFFSET, 0);
4042 }
4043
gaudi2_disable_dma_qmans(struct hl_device * hdev)4044 static void gaudi2_disable_dma_qmans(struct hl_device *hdev)
4045 {
4046 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4047 int dcore, inst;
4048
4049 if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK))
4050 goto stop_edma_qmans;
4051
4052 gaudi2_disable_qman_common(hdev, mmPDMA0_QM_BASE);
4053 gaudi2_disable_qman_common(hdev, mmPDMA1_QM_BASE);
4054
4055 stop_edma_qmans:
4056 if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
4057 return;
4058
4059 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
4060 for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
4061 u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
4062 u32 qm_base;
4063
4064 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq)))
4065 continue;
4066
4067 qm_base = mmDCORE0_EDMA0_QM_BASE + dcore * DCORE_OFFSET +
4068 inst * DCORE_EDMA_OFFSET;
4069
4070 /* Disable CPs of EDMA QMANs */
4071 gaudi2_disable_qman_common(hdev, qm_base);
4072 }
4073 }
4074 }
4075
gaudi2_disable_mme_qmans(struct hl_device * hdev)4076 static void gaudi2_disable_mme_qmans(struct hl_device *hdev)
4077 {
4078 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4079 u32 offset, i;
4080
4081 offset = mmDCORE1_MME_QM_BASE - mmDCORE0_MME_QM_BASE;
4082
4083 for (i = 0 ; i < NUM_OF_DCORES ; i++)
4084 if (gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i))
4085 gaudi2_disable_qman_common(hdev, mmDCORE0_MME_QM_BASE + (i * offset));
4086 }
4087
gaudi2_disable_tpc_qmans(struct hl_device * hdev)4088 static void gaudi2_disable_tpc_qmans(struct hl_device *hdev)
4089 {
4090 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4091 u32 reg_base;
4092 int i;
4093
4094 if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
4095 return;
4096
4097 for (i = 0 ; i < TPC_ID_SIZE ; i++) {
4098 if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i)))
4099 continue;
4100
4101 reg_base = gaudi2_qm_blocks_bases[gaudi2_tpc_id_to_queue_id[i]];
4102 gaudi2_disable_qman_common(hdev, reg_base);
4103 }
4104 }
4105
gaudi2_disable_rot_qmans(struct hl_device * hdev)4106 static void gaudi2_disable_rot_qmans(struct hl_device *hdev)
4107 {
4108 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4109 u32 reg_base;
4110 int i;
4111
4112 if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK))
4113 return;
4114
4115 for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) {
4116 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i)))
4117 continue;
4118
4119 reg_base = gaudi2_qm_blocks_bases[gaudi2_rot_id_to_queue_id[i]];
4120 gaudi2_disable_qman_common(hdev, reg_base);
4121 }
4122 }
4123
gaudi2_disable_nic_qmans(struct hl_device * hdev)4124 static void gaudi2_disable_nic_qmans(struct hl_device *hdev)
4125 {
4126 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4127 u32 reg_base, queue_id;
4128 int i;
4129
4130 if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK))
4131 return;
4132
4133 queue_id = GAUDI2_QUEUE_ID_NIC_0_0;
4134
4135 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
4136 if (!(hdev->nic_ports_mask & BIT(i)))
4137 continue;
4138
4139 reg_base = gaudi2_qm_blocks_bases[queue_id];
4140 gaudi2_disable_qman_common(hdev, reg_base);
4141 }
4142 }
4143
gaudi2_enable_timestamp(struct hl_device * hdev)4144 static void gaudi2_enable_timestamp(struct hl_device *hdev)
4145 {
4146 /* Disable the timestamp counter */
4147 WREG32(mmPSOC_TIMESTAMP_BASE, 0);
4148
4149 /* Zero the lower/upper parts of the 64-bit counter */
4150 WREG32(mmPSOC_TIMESTAMP_BASE + 0xC, 0);
4151 WREG32(mmPSOC_TIMESTAMP_BASE + 0x8, 0);
4152
4153 /* Enable the counter */
4154 WREG32(mmPSOC_TIMESTAMP_BASE, 1);
4155 }
4156
gaudi2_disable_timestamp(struct hl_device * hdev)4157 static void gaudi2_disable_timestamp(struct hl_device *hdev)
4158 {
4159 /* Disable the timestamp counter */
4160 WREG32(mmPSOC_TIMESTAMP_BASE, 0);
4161 }
4162
gaudi2_irq_name(u16 irq_number)4163 static const char *gaudi2_irq_name(u16 irq_number)
4164 {
4165 switch (irq_number) {
4166 case GAUDI2_IRQ_NUM_EVENT_QUEUE:
4167 return "gaudi2 cpu eq";
4168 case GAUDI2_IRQ_NUM_COMPLETION:
4169 return "gaudi2 completion";
4170 case GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ... GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM:
4171 return gaudi2_vdec_irq_name[irq_number - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM];
4172 case GAUDI2_IRQ_NUM_TPC_ASSERT:
4173 return "gaudi2 tpc assert";
4174 case GAUDI2_IRQ_NUM_UNEXPECTED_ERROR:
4175 return "gaudi2 unexpected error";
4176 case GAUDI2_IRQ_NUM_USER_FIRST ... GAUDI2_IRQ_NUM_USER_LAST:
4177 return "gaudi2 user completion";
4178 case GAUDI2_IRQ_NUM_EQ_ERROR:
4179 return "gaudi2 eq error";
4180 default:
4181 return "invalid";
4182 }
4183 }
4184
gaudi2_dec_disable_msix(struct hl_device * hdev,u32 max_irq_num)4185 static void gaudi2_dec_disable_msix(struct hl_device *hdev, u32 max_irq_num)
4186 {
4187 int i, irq, relative_idx;
4188 struct hl_dec *dec;
4189
4190 for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ; i < max_irq_num ; i++) {
4191 irq = pci_irq_vector(hdev->pdev, i);
4192 relative_idx = i - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM;
4193
4194 dec = hdev->dec + relative_idx / 2;
4195
4196 /* We pass different structures depending on the irq handler. For the abnormal
4197 * interrupt we pass hl_dec and for the regular interrupt we pass the relevant
4198 * user_interrupt entry
4199 */
4200 free_irq(irq, ((relative_idx % 2) ?
4201 (void *) dec :
4202 (void *) &hdev->user_interrupt[dec->core_id]));
4203 }
4204 }
4205
gaudi2_dec_enable_msix(struct hl_device * hdev)4206 static int gaudi2_dec_enable_msix(struct hl_device *hdev)
4207 {
4208 int rc, i, irq_init_cnt, irq, relative_idx;
4209 struct hl_dec *dec;
4210
4211 for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM, irq_init_cnt = 0;
4212 i <= GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM;
4213 i++, irq_init_cnt++) {
4214
4215 irq = pci_irq_vector(hdev->pdev, i);
4216 relative_idx = i - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM;
4217
4218 /* We pass different structures depending on the irq handler. For the abnormal
4219 * interrupt we pass hl_dec and for the regular interrupt we pass the relevant
4220 * user_interrupt entry
4221 *
4222 * TODO: change the dec abnrm to threaded irq
4223 */
4224
4225 dec = hdev->dec + relative_idx / 2;
4226 if (relative_idx % 2) {
4227 rc = request_irq(irq, hl_irq_handler_dec_abnrm, 0,
4228 gaudi2_irq_name(i), (void *) dec);
4229 } else {
4230 rc = request_irq(irq, hl_irq_user_interrupt_handler, 0, gaudi2_irq_name(i),
4231 (void *) &hdev->user_interrupt[dec->core_id]);
4232 }
4233
4234 if (rc) {
4235 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4236 goto free_dec_irqs;
4237 }
4238 }
4239
4240 return 0;
4241
4242 free_dec_irqs:
4243 gaudi2_dec_disable_msix(hdev, (GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + irq_init_cnt));
4244 return rc;
4245 }
4246
gaudi2_enable_msix(struct hl_device * hdev)4247 static int gaudi2_enable_msix(struct hl_device *hdev)
4248 {
4249 struct asic_fixed_properties *prop = &hdev->asic_prop;
4250 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4251 int rc, irq, i, j, user_irq_init_cnt;
4252 struct hl_cq *cq;
4253
4254 if (gaudi2->hw_cap_initialized & HW_CAP_MSIX)
4255 return 0;
4256
4257 rc = pci_alloc_irq_vectors(hdev->pdev, GAUDI2_MSIX_ENTRIES, GAUDI2_MSIX_ENTRIES,
4258 PCI_IRQ_MSIX);
4259 if (rc < 0) {
4260 dev_err(hdev->dev, "MSI-X: Failed to enable support -- %d/%d\n",
4261 GAUDI2_MSIX_ENTRIES, rc);
4262 return rc;
4263 }
4264
4265 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION);
4266 cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_CS_COMPLETION];
4267 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi2_irq_name(GAUDI2_IRQ_NUM_COMPLETION), cq);
4268 if (rc) {
4269 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4270 goto free_irq_vectors;
4271 }
4272
4273 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE);
4274 rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi2_irq_name(GAUDI2_IRQ_NUM_EVENT_QUEUE),
4275 &hdev->event_queue);
4276 if (rc) {
4277 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4278 goto free_completion_irq;
4279 }
4280
4281 rc = gaudi2_dec_enable_msix(hdev);
4282 if (rc) {
4283 dev_err(hdev->dev, "Failed to enable decoder IRQ");
4284 goto free_event_irq;
4285 }
4286
4287 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT);
4288 rc = request_threaded_irq(irq, NULL, hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT,
4289 gaudi2_irq_name(GAUDI2_IRQ_NUM_TPC_ASSERT),
4290 &hdev->tpc_interrupt);
4291 if (rc) {
4292 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4293 goto free_dec_irq;
4294 }
4295
4296 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR);
4297 rc = request_threaded_irq(irq, NULL, hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT,
4298 gaudi2_irq_name(GAUDI2_IRQ_NUM_UNEXPECTED_ERROR),
4299 &hdev->unexpected_error_interrupt);
4300 if (rc) {
4301 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4302 goto free_tpc_irq;
4303 }
4304
4305 for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count, user_irq_init_cnt = 0;
4306 user_irq_init_cnt < prop->user_interrupt_count;
4307 i++, j++, user_irq_init_cnt++) {
4308
4309 irq = pci_irq_vector(hdev->pdev, i);
4310 rc = request_irq(irq, hl_irq_user_interrupt_handler, 0, gaudi2_irq_name(i),
4311 &hdev->user_interrupt[j]);
4312 if (rc) {
4313 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4314 goto free_user_irq;
4315 }
4316 }
4317
4318 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EQ_ERROR);
4319 rc = request_threaded_irq(irq, NULL, hl_irq_eq_error_interrupt_thread_handler,
4320 IRQF_ONESHOT, gaudi2_irq_name(GAUDI2_IRQ_NUM_EQ_ERROR),
4321 hdev);
4322 if (rc) {
4323 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4324 goto free_user_irq;
4325 }
4326
4327 gaudi2->hw_cap_initialized |= HW_CAP_MSIX;
4328
4329 return 0;
4330
4331 free_user_irq:
4332 for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count;
4333 i < GAUDI2_IRQ_NUM_USER_FIRST + user_irq_init_cnt ; i++, j++) {
4334
4335 irq = pci_irq_vector(hdev->pdev, i);
4336 free_irq(irq, &hdev->user_interrupt[j]);
4337 }
4338 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR);
4339 free_irq(irq, &hdev->unexpected_error_interrupt);
4340 free_tpc_irq:
4341 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT);
4342 free_irq(irq, &hdev->tpc_interrupt);
4343 free_dec_irq:
4344 gaudi2_dec_disable_msix(hdev, GAUDI2_IRQ_NUM_DEC_LAST + 1);
4345 free_event_irq:
4346 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE);
4347 free_irq(irq, cq);
4348
4349 free_completion_irq:
4350 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION);
4351 free_irq(irq, cq);
4352
4353 free_irq_vectors:
4354 pci_free_irq_vectors(hdev->pdev);
4355
4356 return rc;
4357 }
4358
gaudi2_sync_irqs(struct hl_device * hdev)4359 static void gaudi2_sync_irqs(struct hl_device *hdev)
4360 {
4361 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4362 int i, j;
4363 int irq;
4364
4365 if (!(gaudi2->hw_cap_initialized & HW_CAP_MSIX))
4366 return;
4367
4368 /* Wait for all pending IRQs to be finished */
4369 synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION));
4370
4371 for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ; i <= GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM ; i++) {
4372 irq = pci_irq_vector(hdev->pdev, i);
4373 synchronize_irq(irq);
4374 }
4375
4376 synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT));
4377 synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR));
4378
4379 for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = 0 ; j < hdev->asic_prop.user_interrupt_count;
4380 i++, j++) {
4381 irq = pci_irq_vector(hdev->pdev, i);
4382 synchronize_irq(irq);
4383 }
4384
4385 synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE));
4386 synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EQ_ERROR));
4387 }
4388
gaudi2_disable_msix(struct hl_device * hdev)4389 static void gaudi2_disable_msix(struct hl_device *hdev)
4390 {
4391 struct asic_fixed_properties *prop = &hdev->asic_prop;
4392 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4393 struct hl_cq *cq;
4394 int irq, i, j, k;
4395
4396 if (!(gaudi2->hw_cap_initialized & HW_CAP_MSIX))
4397 return;
4398
4399 gaudi2_sync_irqs(hdev);
4400
4401 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE);
4402 free_irq(irq, &hdev->event_queue);
4403
4404 gaudi2_dec_disable_msix(hdev, GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM + 1);
4405
4406 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT);
4407 free_irq(irq, &hdev->tpc_interrupt);
4408
4409 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR);
4410 free_irq(irq, &hdev->unexpected_error_interrupt);
4411
4412 for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count, k = 0;
4413 k < hdev->asic_prop.user_interrupt_count ; i++, j++, k++) {
4414
4415 irq = pci_irq_vector(hdev->pdev, i);
4416 free_irq(irq, &hdev->user_interrupt[j]);
4417 }
4418
4419 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION);
4420 cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_CS_COMPLETION];
4421 free_irq(irq, cq);
4422
4423 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EQ_ERROR);
4424 free_irq(irq, hdev);
4425
4426 pci_free_irq_vectors(hdev->pdev);
4427
4428 gaudi2->hw_cap_initialized &= ~HW_CAP_MSIX;
4429 }
4430
gaudi2_stop_dcore_dec(struct hl_device * hdev,int dcore_id)4431 static void gaudi2_stop_dcore_dec(struct hl_device *hdev, int dcore_id)
4432 {
4433 u32 reg_val = FIELD_PREP(DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_STOP_MASK, 0x1);
4434 u32 graceful_pend_mask = DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_PEND_MASK;
4435 u32 timeout_usec, dec_id, dec_bit, offset, graceful;
4436 int rc;
4437
4438 if (hdev->pldm)
4439 timeout_usec = GAUDI2_PLDM_VDEC_TIMEOUT_USEC;
4440 else
4441 timeout_usec = GAUDI2_VDEC_TIMEOUT_USEC;
4442
4443 for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) {
4444 dec_bit = dcore_id * NUM_OF_DEC_PER_DCORE + dec_id;
4445 if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
4446 continue;
4447
4448 offset = dcore_id * DCORE_OFFSET + dec_id * DCORE_VDEC_OFFSET;
4449
4450 WREG32(mmDCORE0_DEC0_CMD_SWREG16 + offset, 0);
4451
4452 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_GRACEFUL + offset, reg_val);
4453
4454 /* Wait till all traffic from decoder stops
4455 * before apply core reset.
4456 */
4457 rc = hl_poll_timeout(
4458 hdev,
4459 mmDCORE0_VDEC0_BRDG_CTRL_GRACEFUL + offset,
4460 graceful,
4461 (graceful & graceful_pend_mask),
4462 100,
4463 timeout_usec);
4464 if (rc)
4465 dev_err(hdev->dev,
4466 "Failed to stop traffic from DCORE%d Decoder %d\n",
4467 dcore_id, dec_id);
4468 }
4469 }
4470
gaudi2_stop_pcie_dec(struct hl_device * hdev)4471 static void gaudi2_stop_pcie_dec(struct hl_device *hdev)
4472 {
4473 u32 reg_val = FIELD_PREP(DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_STOP_MASK, 0x1);
4474 u32 graceful_pend_mask = PCIE_VDEC0_BRDG_CTRL_GRACEFUL_PEND_MASK;
4475 u32 timeout_usec, dec_id, dec_bit, offset, graceful;
4476 int rc;
4477
4478 if (hdev->pldm)
4479 timeout_usec = GAUDI2_PLDM_VDEC_TIMEOUT_USEC;
4480 else
4481 timeout_usec = GAUDI2_VDEC_TIMEOUT_USEC;
4482
4483 for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) {
4484 dec_bit = PCIE_DEC_SHIFT + dec_id;
4485 if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
4486 continue;
4487
4488 offset = dec_id * PCIE_VDEC_OFFSET;
4489
4490 WREG32(mmPCIE_DEC0_CMD_SWREG16 + offset, 0);
4491
4492 WREG32(mmPCIE_VDEC0_BRDG_CTRL_GRACEFUL + offset, reg_val);
4493
4494 /* Wait till all traffic from decoder stops
4495 * before apply core reset.
4496 */
4497 rc = hl_poll_timeout(
4498 hdev,
4499 mmPCIE_VDEC0_BRDG_CTRL_GRACEFUL + offset,
4500 graceful,
4501 (graceful & graceful_pend_mask),
4502 100,
4503 timeout_usec);
4504 if (rc)
4505 dev_err(hdev->dev,
4506 "Failed to stop traffic from PCIe Decoder %d\n",
4507 dec_id);
4508 }
4509 }
4510
gaudi2_stop_dec(struct hl_device * hdev)4511 static void gaudi2_stop_dec(struct hl_device *hdev)
4512 {
4513 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4514 int dcore_id;
4515
4516 if ((gaudi2->dec_hw_cap_initialized & HW_CAP_DEC_MASK) == 0)
4517 return;
4518
4519 for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++)
4520 gaudi2_stop_dcore_dec(hdev, dcore_id);
4521
4522 gaudi2_stop_pcie_dec(hdev);
4523 }
4524
gaudi2_set_arc_running_mode(struct hl_device * hdev,u32 cpu_id,u32 run_mode)4525 static void gaudi2_set_arc_running_mode(struct hl_device *hdev, u32 cpu_id, u32 run_mode)
4526 {
4527 u32 reg_base, reg_val;
4528
4529 reg_base = gaudi2_arc_blocks_bases[cpu_id];
4530 if (run_mode == HL_ENGINE_CORE_RUN)
4531 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_RUN_REQ_MASK, 1);
4532 else
4533 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_HALT_REQ_MASK, 1);
4534
4535 WREG32(reg_base + ARC_HALT_REQ_OFFSET, reg_val);
4536 }
4537
gaudi2_halt_arcs(struct hl_device * hdev)4538 static void gaudi2_halt_arcs(struct hl_device *hdev)
4539 {
4540 u16 arc_id;
4541
4542 for (arc_id = CPU_ID_SCHED_ARC0; arc_id < CPU_ID_MAX; arc_id++) {
4543 if (gaudi2_is_arc_enabled(hdev, arc_id))
4544 gaudi2_set_arc_running_mode(hdev, arc_id, HL_ENGINE_CORE_HALT);
4545 }
4546 }
4547
gaudi2_verify_arc_running_mode(struct hl_device * hdev,u32 cpu_id,u32 run_mode)4548 static int gaudi2_verify_arc_running_mode(struct hl_device *hdev, u32 cpu_id, u32 run_mode)
4549 {
4550 int rc;
4551 u32 reg_base, val, ack_mask, timeout_usec = 100000;
4552
4553 if (hdev->pldm)
4554 timeout_usec *= 100;
4555
4556 reg_base = gaudi2_arc_blocks_bases[cpu_id];
4557 if (run_mode == HL_ENGINE_CORE_RUN)
4558 ack_mask = ARC_FARM_ARC0_AUX_RUN_HALT_ACK_RUN_ACK_MASK;
4559 else
4560 ack_mask = ARC_FARM_ARC0_AUX_RUN_HALT_ACK_HALT_ACK_MASK;
4561
4562 rc = hl_poll_timeout(hdev, reg_base + ARC_HALT_ACK_OFFSET,
4563 val, ((val & ack_mask) == ack_mask),
4564 1000, timeout_usec);
4565
4566 if (!rc) {
4567 /* Clear */
4568 val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_RUN_REQ_MASK, 0);
4569 WREG32(reg_base + ARC_HALT_REQ_OFFSET, val);
4570 }
4571
4572 return rc;
4573 }
4574
gaudi2_reset_arcs(struct hl_device * hdev)4575 static void gaudi2_reset_arcs(struct hl_device *hdev)
4576 {
4577 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4578 u16 arc_id;
4579
4580 if (!gaudi2)
4581 return;
4582
4583 for (arc_id = CPU_ID_SCHED_ARC0; arc_id < CPU_ID_MAX; arc_id++)
4584 if (gaudi2_is_arc_enabled(hdev, arc_id))
4585 gaudi2_clr_arc_id_cap(hdev, arc_id);
4586 }
4587
gaudi2_nic_qmans_manual_flush(struct hl_device * hdev)4588 static void gaudi2_nic_qmans_manual_flush(struct hl_device *hdev)
4589 {
4590 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4591 u32 queue_id;
4592 int i;
4593
4594 if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK))
4595 return;
4596
4597 queue_id = GAUDI2_QUEUE_ID_NIC_0_0;
4598
4599 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
4600 if (!(hdev->nic_ports_mask & BIT(i)))
4601 continue;
4602
4603 gaudi2_qman_manual_flush_common(hdev, queue_id);
4604 }
4605 }
4606
gaudi2_set_engine_cores(struct hl_device * hdev,u32 * core_ids,u32 num_cores,u32 core_command)4607 static int gaudi2_set_engine_cores(struct hl_device *hdev, u32 *core_ids,
4608 u32 num_cores, u32 core_command)
4609 {
4610 int i, rc;
4611
4612 for (i = 0 ; i < num_cores ; i++) {
4613 if (gaudi2_is_arc_enabled(hdev, core_ids[i]))
4614 gaudi2_set_arc_running_mode(hdev, core_ids[i], core_command);
4615 }
4616
4617 for (i = 0 ; i < num_cores ; i++) {
4618 if (gaudi2_is_arc_enabled(hdev, core_ids[i])) {
4619 rc = gaudi2_verify_arc_running_mode(hdev, core_ids[i], core_command);
4620
4621 if (rc) {
4622 dev_err(hdev->dev, "failed to %s arc: %d\n",
4623 (core_command == HL_ENGINE_CORE_HALT) ?
4624 "HALT" : "RUN", core_ids[i]);
4625 return -1;
4626 }
4627 }
4628 }
4629
4630 return 0;
4631 }
4632
gaudi2_set_tpc_engine_mode(struct hl_device * hdev,u32 engine_id,u32 engine_command)4633 static int gaudi2_set_tpc_engine_mode(struct hl_device *hdev, u32 engine_id, u32 engine_command)
4634 {
4635 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4636 u32 reg_base, reg_addr, reg_val, tpc_id;
4637
4638 if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
4639 return 0;
4640
4641 tpc_id = gaudi2_tpc_engine_id_to_tpc_id[engine_id];
4642 if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + tpc_id)))
4643 return 0;
4644
4645 reg_base = gaudi2_tpc_cfg_blocks_bases[tpc_id];
4646 reg_addr = reg_base + TPC_CFG_STALL_OFFSET;
4647 reg_val = FIELD_PREP(DCORE0_TPC0_CFG_TPC_STALL_V_MASK,
4648 (engine_command == HL_ENGINE_STALL) ? 1 : 0);
4649 WREG32(reg_addr, reg_val);
4650
4651 if (engine_command == HL_ENGINE_RESUME) {
4652 reg_base = gaudi2_tpc_eml_cfg_blocks_bases[tpc_id];
4653 reg_addr = reg_base + TPC_EML_CFG_DBG_CNT_OFFSET;
4654 RMWREG32(reg_addr, 0x1, DCORE0_TPC0_EML_CFG_DBG_CNT_DBG_EXIT_MASK);
4655 }
4656
4657 return 0;
4658 }
4659
gaudi2_set_mme_engine_mode(struct hl_device * hdev,u32 engine_id,u32 engine_command)4660 static int gaudi2_set_mme_engine_mode(struct hl_device *hdev, u32 engine_id, u32 engine_command)
4661 {
4662 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4663 u32 reg_base, reg_addr, reg_val, mme_id;
4664
4665 mme_id = gaudi2_mme_engine_id_to_mme_id[engine_id];
4666 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + mme_id)))
4667 return 0;
4668
4669 reg_base = gaudi2_mme_ctrl_lo_blocks_bases[mme_id];
4670 reg_addr = reg_base + MME_CTRL_LO_QM_STALL_OFFSET;
4671 reg_val = FIELD_PREP(DCORE0_MME_CTRL_LO_QM_STALL_V_MASK,
4672 (engine_command == HL_ENGINE_STALL) ? 1 : 0);
4673 WREG32(reg_addr, reg_val);
4674
4675 return 0;
4676 }
4677
gaudi2_set_edma_engine_mode(struct hl_device * hdev,u32 engine_id,u32 engine_command)4678 static int gaudi2_set_edma_engine_mode(struct hl_device *hdev, u32 engine_id, u32 engine_command)
4679 {
4680 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4681 u32 reg_base, reg_addr, reg_val, edma_id;
4682
4683 if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
4684 return 0;
4685
4686 edma_id = gaudi2_edma_engine_id_to_edma_id[engine_id];
4687 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + edma_id)))
4688 return 0;
4689
4690 reg_base = gaudi2_dma_core_blocks_bases[edma_id];
4691 reg_addr = reg_base + EDMA_CORE_CFG_STALL_OFFSET;
4692 reg_val = FIELD_PREP(DCORE0_EDMA0_CORE_CFG_1_HALT_MASK,
4693 (engine_command == HL_ENGINE_STALL) ? 1 : 0);
4694 WREG32(reg_addr, reg_val);
4695
4696 if (engine_command == HL_ENGINE_STALL) {
4697 reg_val = FIELD_PREP(DCORE0_EDMA0_CORE_CFG_1_HALT_MASK, 0x1) |
4698 FIELD_PREP(DCORE0_EDMA0_CORE_CFG_1_FLUSH_MASK, 0x1);
4699 WREG32(reg_addr, reg_val);
4700 }
4701
4702 return 0;
4703 }
4704
gaudi2_set_engine_modes(struct hl_device * hdev,u32 * engine_ids,u32 num_engines,u32 engine_command)4705 static int gaudi2_set_engine_modes(struct hl_device *hdev,
4706 u32 *engine_ids, u32 num_engines, u32 engine_command)
4707 {
4708 int i, rc;
4709
4710 for (i = 0 ; i < num_engines ; ++i) {
4711 switch (engine_ids[i]) {
4712 case GAUDI2_DCORE0_ENGINE_ID_TPC_0 ... GAUDI2_DCORE0_ENGINE_ID_TPC_5:
4713 case GAUDI2_DCORE1_ENGINE_ID_TPC_0 ... GAUDI2_DCORE1_ENGINE_ID_TPC_5:
4714 case GAUDI2_DCORE2_ENGINE_ID_TPC_0 ... GAUDI2_DCORE2_ENGINE_ID_TPC_5:
4715 case GAUDI2_DCORE3_ENGINE_ID_TPC_0 ... GAUDI2_DCORE3_ENGINE_ID_TPC_5:
4716 rc = gaudi2_set_tpc_engine_mode(hdev, engine_ids[i], engine_command);
4717 if (rc)
4718 return rc;
4719
4720 break;
4721 case GAUDI2_DCORE0_ENGINE_ID_MME:
4722 case GAUDI2_DCORE1_ENGINE_ID_MME:
4723 case GAUDI2_DCORE2_ENGINE_ID_MME:
4724 case GAUDI2_DCORE3_ENGINE_ID_MME:
4725 rc = gaudi2_set_mme_engine_mode(hdev, engine_ids[i], engine_command);
4726 if (rc)
4727 return rc;
4728
4729 break;
4730 case GAUDI2_DCORE0_ENGINE_ID_EDMA_0 ... GAUDI2_DCORE0_ENGINE_ID_EDMA_1:
4731 case GAUDI2_DCORE1_ENGINE_ID_EDMA_0 ... GAUDI2_DCORE1_ENGINE_ID_EDMA_1:
4732 case GAUDI2_DCORE2_ENGINE_ID_EDMA_0 ... GAUDI2_DCORE2_ENGINE_ID_EDMA_1:
4733 case GAUDI2_DCORE3_ENGINE_ID_EDMA_0 ... GAUDI2_DCORE3_ENGINE_ID_EDMA_1:
4734 rc = gaudi2_set_edma_engine_mode(hdev, engine_ids[i], engine_command);
4735 if (rc)
4736 return rc;
4737
4738 break;
4739 default:
4740 dev_err(hdev->dev, "Invalid engine ID %u\n", engine_ids[i]);
4741 return -EINVAL;
4742 }
4743 }
4744
4745 return 0;
4746 }
4747
gaudi2_set_engines(struct hl_device * hdev,u32 * engine_ids,u32 num_engines,u32 engine_command)4748 static int gaudi2_set_engines(struct hl_device *hdev, u32 *engine_ids,
4749 u32 num_engines, u32 engine_command)
4750 {
4751 switch (engine_command) {
4752 case HL_ENGINE_CORE_HALT:
4753 case HL_ENGINE_CORE_RUN:
4754 return gaudi2_set_engine_cores(hdev, engine_ids, num_engines, engine_command);
4755
4756 case HL_ENGINE_STALL:
4757 case HL_ENGINE_RESUME:
4758 return gaudi2_set_engine_modes(hdev, engine_ids, num_engines, engine_command);
4759
4760 default:
4761 dev_err(hdev->dev, "failed to execute command id %u\n", engine_command);
4762 return -EINVAL;
4763 }
4764 }
4765
gaudi2_halt_engines(struct hl_device * hdev,bool hard_reset,bool fw_reset)4766 static void gaudi2_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
4767 {
4768 u32 wait_timeout_ms;
4769
4770 if (hdev->pldm)
4771 wait_timeout_ms = GAUDI2_PLDM_RESET_WAIT_MSEC;
4772 else
4773 wait_timeout_ms = GAUDI2_RESET_WAIT_MSEC;
4774
4775 if (fw_reset)
4776 goto skip_engines;
4777
4778 gaudi2_stop_dma_qmans(hdev);
4779 gaudi2_stop_mme_qmans(hdev);
4780 gaudi2_stop_tpc_qmans(hdev);
4781 gaudi2_stop_rot_qmans(hdev);
4782 gaudi2_stop_nic_qmans(hdev);
4783 msleep(wait_timeout_ms);
4784
4785 gaudi2_halt_arcs(hdev);
4786 gaudi2_dma_stall(hdev);
4787 gaudi2_mme_stall(hdev);
4788 gaudi2_tpc_stall(hdev);
4789 gaudi2_rotator_stall(hdev);
4790
4791 msleep(wait_timeout_ms);
4792
4793 gaudi2_stop_dec(hdev);
4794
4795 /*
4796 * in case of soft reset do a manual flush for QMANs (currently called
4797 * only for NIC QMANs
4798 */
4799 if (!hard_reset)
4800 gaudi2_nic_qmans_manual_flush(hdev);
4801
4802 gaudi2_disable_dma_qmans(hdev);
4803 gaudi2_disable_mme_qmans(hdev);
4804 gaudi2_disable_tpc_qmans(hdev);
4805 gaudi2_disable_rot_qmans(hdev);
4806 gaudi2_disable_nic_qmans(hdev);
4807 gaudi2_disable_timestamp(hdev);
4808
4809 skip_engines:
4810 if (hard_reset) {
4811 gaudi2_disable_msix(hdev);
4812 return;
4813 }
4814
4815 gaudi2_sync_irqs(hdev);
4816 }
4817
gaudi2_init_firmware_preload_params(struct hl_device * hdev)4818 static void gaudi2_init_firmware_preload_params(struct hl_device *hdev)
4819 {
4820 struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
4821
4822 pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
4823 pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0;
4824 pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1;
4825 pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0;
4826 pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1;
4827 pre_fw_load->wait_for_preboot_timeout = GAUDI2_PREBOOT_REQ_TIMEOUT_USEC;
4828 pre_fw_load->wait_for_preboot_extended_timeout =
4829 GAUDI2_PREBOOT_EXTENDED_REQ_TIMEOUT_USEC;
4830 }
4831
gaudi2_init_firmware_loader(struct hl_device * hdev)4832 static void gaudi2_init_firmware_loader(struct hl_device *hdev)
4833 {
4834 struct fw_load_mgr *fw_loader = &hdev->fw_loader;
4835 struct dynamic_fw_load_mgr *dynamic_loader;
4836 struct cpu_dyn_regs *dyn_regs;
4837
4838 /* fill common fields */
4839 fw_loader->fw_comp_loaded = FW_TYPE_NONE;
4840 fw_loader->boot_fit_img.image_name = GAUDI2_BOOT_FIT_FILE;
4841 fw_loader->linux_img.image_name = GAUDI2_LINUX_FW_FILE;
4842 fw_loader->boot_fit_timeout = GAUDI2_BOOT_FIT_REQ_TIMEOUT_USEC;
4843 fw_loader->skip_bmc = false;
4844 fw_loader->sram_bar_id = SRAM_CFG_BAR_ID;
4845 fw_loader->dram_bar_id = DRAM_BAR_ID;
4846 fw_loader->cpu_timeout = GAUDI2_CPU_TIMEOUT_USEC;
4847
4848 /* here we update initial values for few specific dynamic regs (as
4849 * before reading the first descriptor from FW those value has to be
4850 * hard-coded). in later stages of the protocol those values will be
4851 * updated automatically by reading the FW descriptor so data there
4852 * will always be up-to-date
4853 */
4854 dynamic_loader = &hdev->fw_loader.dynamic_loader;
4855 dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
4856 dyn_regs->kmd_msg_to_cpu = cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
4857 dyn_regs->cpu_cmd_status_to_host = cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
4858 dynamic_loader->wait_for_bl_timeout = GAUDI2_WAIT_FOR_BL_TIMEOUT_USEC;
4859 }
4860
gaudi2_init_cpu(struct hl_device * hdev)4861 static int gaudi2_init_cpu(struct hl_device *hdev)
4862 {
4863 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4864 int rc;
4865
4866 if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
4867 return 0;
4868
4869 if (gaudi2->hw_cap_initialized & HW_CAP_CPU)
4870 return 0;
4871
4872 rc = hl_fw_init_cpu(hdev);
4873 if (rc)
4874 return rc;
4875
4876 gaudi2->hw_cap_initialized |= HW_CAP_CPU;
4877
4878 return 0;
4879 }
4880
gaudi2_init_cpu_queues(struct hl_device * hdev,u32 cpu_timeout)4881 static int gaudi2_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
4882 {
4883 struct hl_hw_queue *cpu_pq = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ];
4884 struct asic_fixed_properties *prop = &hdev->asic_prop;
4885 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4886 struct cpu_dyn_regs *dyn_regs;
4887 struct hl_eq *eq;
4888 u32 status;
4889 int err;
4890
4891 if (!hdev->cpu_queues_enable)
4892 return 0;
4893
4894 if (gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)
4895 return 0;
4896
4897 eq = &hdev->event_queue;
4898
4899 dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4900
4901 WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
4902 WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
4903
4904 WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
4905 WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
4906
4907 WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW, lower_32_bits(hdev->cpu_accessible_dma_address));
4908 WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH, upper_32_bits(hdev->cpu_accessible_dma_address));
4909
4910 WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
4911 WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
4912 WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
4913
4914 /* Used for EQ CI */
4915 WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
4916
4917 WREG32(mmCPU_IF_PF_PQ_PI, 0);
4918
4919 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
4920
4921 /* Let the ARC know we are ready as it is now handling those queues */
4922
4923 WREG32(le32_to_cpu(dyn_regs->gic_host_pi_upd_irq),
4924 gaudi2_irq_map_table[GAUDI2_EVENT_CPU_PI_UPDATE].cpu_id);
4925
4926 err = hl_poll_timeout(
4927 hdev,
4928 mmCPU_IF_QUEUE_INIT,
4929 status,
4930 (status == PQ_INIT_STATUS_READY_FOR_HOST),
4931 1000,
4932 cpu_timeout);
4933
4934 if (err) {
4935 dev_err(hdev->dev, "Failed to communicate with device CPU (timeout)\n");
4936 return -EIO;
4937 }
4938
4939 /* update FW application security bits */
4940 if (prop->fw_cpu_boot_dev_sts0_valid)
4941 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
4942
4943 if (prop->fw_cpu_boot_dev_sts1_valid)
4944 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
4945
4946 gaudi2->hw_cap_initialized |= HW_CAP_CPU_Q;
4947 return 0;
4948 }
4949
gaudi2_init_qman_pq(struct hl_device * hdev,u32 reg_base,u32 queue_id_base)4950 static void gaudi2_init_qman_pq(struct hl_device *hdev, u32 reg_base,
4951 u32 queue_id_base)
4952 {
4953 struct hl_hw_queue *q;
4954 u32 pq_id, pq_offset;
4955
4956 for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++) {
4957 q = &hdev->kernel_queues[queue_id_base + pq_id];
4958 pq_offset = pq_id * 4;
4959
4960 WREG32(reg_base + QM_PQ_BASE_LO_0_OFFSET + pq_offset,
4961 lower_32_bits(q->bus_address));
4962 WREG32(reg_base + QM_PQ_BASE_HI_0_OFFSET + pq_offset,
4963 upper_32_bits(q->bus_address));
4964 WREG32(reg_base + QM_PQ_SIZE_0_OFFSET + pq_offset, ilog2(HL_QUEUE_LENGTH));
4965 WREG32(reg_base + QM_PQ_PI_0_OFFSET + pq_offset, 0);
4966 WREG32(reg_base + QM_PQ_CI_0_OFFSET + pq_offset, 0);
4967 }
4968 }
4969
gaudi2_init_qman_cp(struct hl_device * hdev,u32 reg_base)4970 static void gaudi2_init_qman_cp(struct hl_device *hdev, u32 reg_base)
4971 {
4972 u32 cp_id, cp_offset, mtr_base_lo, mtr_base_hi, so_base_lo, so_base_hi;
4973
4974 mtr_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
4975 mtr_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
4976 so_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
4977 so_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
4978
4979 for (cp_id = 0 ; cp_id < NUM_OF_CP_PER_QMAN; cp_id++) {
4980 cp_offset = cp_id * 4;
4981
4982 WREG32(reg_base + QM_CP_MSG_BASE0_ADDR_LO_0_OFFSET + cp_offset, mtr_base_lo);
4983 WREG32(reg_base + QM_CP_MSG_BASE0_ADDR_HI_0_OFFSET + cp_offset, mtr_base_hi);
4984 WREG32(reg_base + QM_CP_MSG_BASE1_ADDR_LO_0_OFFSET + cp_offset, so_base_lo);
4985 WREG32(reg_base + QM_CP_MSG_BASE1_ADDR_HI_0_OFFSET + cp_offset, so_base_hi);
4986 }
4987
4988 /* allow QMANs to accept work from ARC CQF */
4989 WREG32(reg_base + QM_CP_CFG_OFFSET, FIELD_PREP(PDMA0_QM_CP_CFG_SWITCH_EN_MASK, 0x1));
4990 }
4991
gaudi2_init_qman_pqc(struct hl_device * hdev,u32 reg_base,u32 queue_id_base)4992 static void gaudi2_init_qman_pqc(struct hl_device *hdev, u32 reg_base,
4993 u32 queue_id_base)
4994 {
4995 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4996 u32 pq_id, pq_offset, so_base_lo, so_base_hi;
4997
4998 so_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
4999 so_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
5000
5001 for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++) {
5002 pq_offset = pq_id * 4;
5003
5004 /* Configure QMAN HBW to scratchpad as it is not needed */
5005 WREG32(reg_base + QM_PQC_HBW_BASE_LO_0_OFFSET + pq_offset,
5006 lower_32_bits(gaudi2->scratchpad_bus_address));
5007 WREG32(reg_base + QM_PQC_HBW_BASE_HI_0_OFFSET + pq_offset,
5008 upper_32_bits(gaudi2->scratchpad_bus_address));
5009 WREG32(reg_base + QM_PQC_SIZE_0_OFFSET + pq_offset,
5010 ilog2(PAGE_SIZE / sizeof(struct hl_cq_entry)));
5011
5012 WREG32(reg_base + QM_PQC_PI_0_OFFSET + pq_offset, 0);
5013 WREG32(reg_base + QM_PQC_LBW_WDATA_0_OFFSET + pq_offset, QM_PQC_LBW_WDATA);
5014 WREG32(reg_base + QM_PQC_LBW_BASE_LO_0_OFFSET + pq_offset, so_base_lo);
5015 WREG32(reg_base + QM_PQC_LBW_BASE_HI_0_OFFSET + pq_offset, so_base_hi);
5016 }
5017
5018 /* Enable QMAN H/W completion */
5019 WREG32(reg_base + QM_PQC_CFG_OFFSET, 1 << PDMA0_QM_PQC_CFG_EN_SHIFT);
5020 }
5021
gaudi2_get_dyn_sp_reg(struct hl_device * hdev,u32 queue_id_base)5022 static u32 gaudi2_get_dyn_sp_reg(struct hl_device *hdev, u32 queue_id_base)
5023 {
5024 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
5025 u32 sp_reg_addr;
5026
5027 switch (queue_id_base) {
5028 case GAUDI2_QUEUE_ID_PDMA_0_0...GAUDI2_QUEUE_ID_PDMA_1_3:
5029 fallthrough;
5030 case GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3:
5031 fallthrough;
5032 case GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3:
5033 fallthrough;
5034 case GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3:
5035 fallthrough;
5036 case GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3:
5037 sp_reg_addr = le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
5038 break;
5039 case GAUDI2_QUEUE_ID_DCORE0_MME_0_0...GAUDI2_QUEUE_ID_DCORE0_MME_0_3:
5040 fallthrough;
5041 case GAUDI2_QUEUE_ID_DCORE1_MME_0_0...GAUDI2_QUEUE_ID_DCORE1_MME_0_3:
5042 fallthrough;
5043 case GAUDI2_QUEUE_ID_DCORE2_MME_0_0...GAUDI2_QUEUE_ID_DCORE2_MME_0_3:
5044 fallthrough;
5045 case GAUDI2_QUEUE_ID_DCORE3_MME_0_0...GAUDI2_QUEUE_ID_DCORE3_MME_0_3:
5046 sp_reg_addr = le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
5047 break;
5048 case GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_6_3:
5049 fallthrough;
5050 case GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE1_TPC_5_3:
5051 fallthrough;
5052 case GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE2_TPC_5_3:
5053 fallthrough;
5054 case GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE3_TPC_5_3:
5055 sp_reg_addr = le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
5056 break;
5057 case GAUDI2_QUEUE_ID_ROT_0_0...GAUDI2_QUEUE_ID_ROT_1_3:
5058 sp_reg_addr = le32_to_cpu(dyn_regs->gic_rot_qm_irq_ctrl);
5059 break;
5060 case GAUDI2_QUEUE_ID_NIC_0_0...GAUDI2_QUEUE_ID_NIC_23_3:
5061 sp_reg_addr = le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
5062 break;
5063 default:
5064 dev_err(hdev->dev, "Unexpected h/w queue %d\n", queue_id_base);
5065 return 0;
5066 }
5067
5068 return sp_reg_addr;
5069 }
5070
gaudi2_init_qman_common(struct hl_device * hdev,u32 reg_base,u32 queue_id_base)5071 static void gaudi2_init_qman_common(struct hl_device *hdev, u32 reg_base,
5072 u32 queue_id_base)
5073 {
5074 u32 glbl_prot = QMAN_MAKE_TRUSTED, irq_handler_offset;
5075 int map_table_entry;
5076
5077 WREG32(reg_base + QM_GLBL_PROT_OFFSET, glbl_prot);
5078
5079 irq_handler_offset = gaudi2_get_dyn_sp_reg(hdev, queue_id_base);
5080 WREG32(reg_base + QM_GLBL_ERR_ADDR_LO_OFFSET, lower_32_bits(CFG_BASE + irq_handler_offset));
5081 WREG32(reg_base + QM_GLBL_ERR_ADDR_HI_OFFSET, upper_32_bits(CFG_BASE + irq_handler_offset));
5082
5083 map_table_entry = gaudi2_qman_async_event_id[queue_id_base];
5084 WREG32(reg_base + QM_GLBL_ERR_WDATA_OFFSET,
5085 gaudi2_irq_map_table[map_table_entry].cpu_id);
5086
5087 WREG32(reg_base + QM_ARB_ERR_MSG_EN_OFFSET, QM_ARB_ERR_MSG_EN_MASK);
5088
5089 WREG32(reg_base + QM_ARB_SLV_CHOISE_WDT_OFFSET, GAUDI2_ARB_WDT_TIMEOUT);
5090 WREG32(reg_base + QM_GLBL_CFG1_OFFSET, 0);
5091 WREG32(reg_base + QM_GLBL_CFG2_OFFSET, 0);
5092
5093 /* Enable the QMAN channel.
5094 * PDMA QMAN configuration is different, as we do not allow user to
5095 * access some of the CPs.
5096 * PDMA0: CP2/3 are reserved for the ARC usage.
5097 * PDMA1: CP1/2/3 are reserved for the ARC usage.
5098 */
5099 if (reg_base == gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_1_0])
5100 WREG32(reg_base + QM_GLBL_CFG0_OFFSET, PDMA1_QMAN_ENABLE);
5101 else if (reg_base == gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_0_0])
5102 WREG32(reg_base + QM_GLBL_CFG0_OFFSET, PDMA0_QMAN_ENABLE);
5103 else
5104 WREG32(reg_base + QM_GLBL_CFG0_OFFSET, QMAN_ENABLE);
5105 }
5106
gaudi2_init_qman(struct hl_device * hdev,u32 reg_base,u32 queue_id_base)5107 static void gaudi2_init_qman(struct hl_device *hdev, u32 reg_base,
5108 u32 queue_id_base)
5109 {
5110 u32 pq_id;
5111
5112 for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++)
5113 hdev->kernel_queues[queue_id_base + pq_id].cq_id = GAUDI2_RESERVED_CQ_CS_COMPLETION;
5114
5115 gaudi2_init_qman_pq(hdev, reg_base, queue_id_base);
5116 gaudi2_init_qman_cp(hdev, reg_base);
5117 gaudi2_init_qman_pqc(hdev, reg_base, queue_id_base);
5118 gaudi2_init_qman_common(hdev, reg_base, queue_id_base);
5119 }
5120
gaudi2_init_dma_core(struct hl_device * hdev,u32 reg_base,u32 dma_core_id,bool is_secure)5121 static void gaudi2_init_dma_core(struct hl_device *hdev, u32 reg_base,
5122 u32 dma_core_id, bool is_secure)
5123 {
5124 u32 prot, irq_handler_offset;
5125 struct cpu_dyn_regs *dyn_regs;
5126 int map_table_entry;
5127
5128 prot = 1 << ARC_FARM_KDMA_PROT_ERR_VAL_SHIFT;
5129 if (is_secure)
5130 prot |= 1 << ARC_FARM_KDMA_PROT_VAL_SHIFT;
5131
5132 WREG32(reg_base + DMA_CORE_PROT_OFFSET, prot);
5133
5134 dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
5135 irq_handler_offset = le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
5136
5137 WREG32(reg_base + DMA_CORE_ERRMSG_ADDR_LO_OFFSET,
5138 lower_32_bits(CFG_BASE + irq_handler_offset));
5139
5140 WREG32(reg_base + DMA_CORE_ERRMSG_ADDR_HI_OFFSET,
5141 upper_32_bits(CFG_BASE + irq_handler_offset));
5142
5143 map_table_entry = gaudi2_dma_core_async_event_id[dma_core_id];
5144 WREG32(reg_base + DMA_CORE_ERRMSG_WDATA_OFFSET,
5145 gaudi2_irq_map_table[map_table_entry].cpu_id);
5146
5147 /* Enable the DMA channel */
5148 WREG32(reg_base + DMA_CORE_CFG_0_OFFSET, 1 << ARC_FARM_KDMA_CFG_0_EN_SHIFT);
5149 }
5150
gaudi2_init_kdma(struct hl_device * hdev)5151 static void gaudi2_init_kdma(struct hl_device *hdev)
5152 {
5153 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5154 u32 reg_base;
5155
5156 if ((gaudi2->hw_cap_initialized & HW_CAP_KDMA) == HW_CAP_KDMA)
5157 return;
5158
5159 reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_KDMA];
5160
5161 gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_KDMA, true);
5162
5163 gaudi2->hw_cap_initialized |= HW_CAP_KDMA;
5164 }
5165
gaudi2_init_pdma(struct hl_device * hdev)5166 static void gaudi2_init_pdma(struct hl_device *hdev)
5167 {
5168 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5169 u32 reg_base;
5170
5171 if ((gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK) == HW_CAP_PDMA_MASK)
5172 return;
5173
5174 reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_PDMA0];
5175 gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_PDMA0, false);
5176
5177 reg_base = gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_0_0];
5178 gaudi2_init_qman(hdev, reg_base, GAUDI2_QUEUE_ID_PDMA_0_0);
5179
5180 reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_PDMA1];
5181 gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_PDMA1, false);
5182
5183 reg_base = gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_1_0];
5184 gaudi2_init_qman(hdev, reg_base, GAUDI2_QUEUE_ID_PDMA_1_0);
5185
5186 gaudi2->hw_cap_initialized |= HW_CAP_PDMA_MASK;
5187 }
5188
gaudi2_init_edma_instance(struct hl_device * hdev,u8 seq)5189 static void gaudi2_init_edma_instance(struct hl_device *hdev, u8 seq)
5190 {
5191 u32 reg_base, base_edma_core_id, base_edma_qman_id;
5192
5193 base_edma_core_id = DMA_CORE_ID_EDMA0 + seq;
5194 base_edma_qman_id = edma_stream_base[seq];
5195
5196 reg_base = gaudi2_dma_core_blocks_bases[base_edma_core_id];
5197 gaudi2_init_dma_core(hdev, reg_base, base_edma_core_id, false);
5198
5199 reg_base = gaudi2_qm_blocks_bases[base_edma_qman_id];
5200 gaudi2_init_qman(hdev, reg_base, base_edma_qman_id);
5201 }
5202
gaudi2_init_edma(struct hl_device * hdev)5203 static void gaudi2_init_edma(struct hl_device *hdev)
5204 {
5205 struct asic_fixed_properties *prop = &hdev->asic_prop;
5206 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5207 int dcore, inst;
5208
5209 if ((gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK) == HW_CAP_EDMA_MASK)
5210 return;
5211
5212 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
5213 for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
5214 u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
5215
5216 if (!(prop->edma_enabled_mask & BIT(seq)))
5217 continue;
5218
5219 gaudi2_init_edma_instance(hdev, seq);
5220
5221 gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_EDMA_SHIFT + seq);
5222 }
5223 }
5224 }
5225
5226 /*
5227 * gaudi2_arm_monitors_for_virt_msix_db() - Arm monitors for writing to the virtual MSI-X doorbell.
5228 * @hdev: pointer to habanalabs device structure.
5229 * @sob_id: sync object ID.
5230 * @first_mon_id: ID of first monitor out of 3 consecutive monitors.
5231 * @interrupt_id: interrupt ID.
5232 *
5233 * Some initiators cannot have HBW address in their completion address registers, and thus cannot
5234 * write directly to the HBW host memory of the virtual MSI-X doorbell.
5235 * Instead, they are configured to LBW write to a sync object, and a monitor will do the HBW write.
5236 *
5237 * The mechanism in the sync manager block is composed of a master monitor with 3 messages.
5238 * In addition to the HBW write, the other 2 messages are for preparing the monitor to next
5239 * completion, by decrementing the sync object value and re-arming the monitor.
5240 */
gaudi2_arm_monitors_for_virt_msix_db(struct hl_device * hdev,u32 sob_id,u32 first_mon_id,u32 interrupt_id)5241 static void gaudi2_arm_monitors_for_virt_msix_db(struct hl_device *hdev, u32 sob_id,
5242 u32 first_mon_id, u32 interrupt_id)
5243 {
5244 u32 sob_offset, first_mon_offset, mon_offset, payload, sob_group, mode, arm, config;
5245 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5246 u64 addr;
5247 u8 mask;
5248
5249 /* Reset the SOB value */
5250 sob_offset = sob_id * sizeof(u32);
5251 WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset, 0);
5252
5253 /* Configure 3 monitors:
5254 * 1. Write interrupt ID to the virtual MSI-X doorbell (master monitor)
5255 * 2. Decrement SOB value by 1.
5256 * 3. Re-arm the master monitor.
5257 */
5258
5259 first_mon_offset = first_mon_id * sizeof(u32);
5260
5261 /* 2nd monitor: Decrement SOB value by 1 */
5262 mon_offset = first_mon_offset + sizeof(u32);
5263
5264 addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
5265 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr));
5266 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr));
5267
5268 payload = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 0x7FFF) | /* "-1" */
5269 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_SIGN_MASK, 1) |
5270 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1);
5271 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload);
5272
5273 /* 3rd monitor: Re-arm the master monitor */
5274 mon_offset = first_mon_offset + 2 * sizeof(u32);
5275
5276 addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + first_mon_offset;
5277 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr));
5278 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr));
5279
5280 sob_group = sob_id / 8;
5281 mask = ~BIT(sob_id & 0x7);
5282 mode = 0; /* comparison mode is "greater than or equal to" */
5283 arm = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SID_MASK, sob_group) |
5284 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_MASK_MASK, mask) |
5285 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOP_MASK, mode) |
5286 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOD_MASK, 1);
5287
5288 payload = arm;
5289 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload);
5290
5291 /* 1st monitor (master): Write interrupt ID to the virtual MSI-X doorbell */
5292 mon_offset = first_mon_offset;
5293
5294 config = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_WR_NUM_MASK, 2); /* "2": 3 writes */
5295 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + mon_offset, config);
5296
5297 addr = gaudi2->virt_msix_db_dma_addr;
5298 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr));
5299 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr));
5300
5301 payload = interrupt_id;
5302 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload);
5303
5304 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + mon_offset, arm);
5305 }
5306
gaudi2_prepare_sm_for_virt_msix_db(struct hl_device * hdev)5307 static void gaudi2_prepare_sm_for_virt_msix_db(struct hl_device *hdev)
5308 {
5309 u32 decoder_id, sob_id, first_mon_id, interrupt_id;
5310 struct asic_fixed_properties *prop = &hdev->asic_prop;
5311
5312 /* Decoder normal/abnormal interrupts */
5313 for (decoder_id = 0 ; decoder_id < NUMBER_OF_DEC ; ++decoder_id) {
5314 if (!(prop->decoder_enabled_mask & BIT(decoder_id)))
5315 continue;
5316
5317 sob_id = GAUDI2_RESERVED_SOB_DEC_NRM_FIRST + decoder_id;
5318 first_mon_id = GAUDI2_RESERVED_MON_DEC_NRM_FIRST + 3 * decoder_id;
5319 interrupt_id = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + 2 * decoder_id;
5320 gaudi2_arm_monitors_for_virt_msix_db(hdev, sob_id, first_mon_id, interrupt_id);
5321
5322 sob_id = GAUDI2_RESERVED_SOB_DEC_ABNRM_FIRST + decoder_id;
5323 first_mon_id = GAUDI2_RESERVED_MON_DEC_ABNRM_FIRST + 3 * decoder_id;
5324 interrupt_id += 1;
5325 gaudi2_arm_monitors_for_virt_msix_db(hdev, sob_id, first_mon_id, interrupt_id);
5326 }
5327 }
5328
gaudi2_init_sm(struct hl_device * hdev)5329 static void gaudi2_init_sm(struct hl_device *hdev)
5330 {
5331 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5332 u64 cq_address;
5333 u32 reg_val;
5334 int i;
5335
5336 /* Enable HBW/LBW CQ for completion monitors */
5337 reg_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_CQ_EN_MASK, 1);
5338 reg_val |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_LBW_EN_MASK, 1);
5339
5340 for (i = 0 ; i < GAUDI2_MAX_PENDING_CS ; i++)
5341 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + (4 * i), reg_val);
5342
5343 /* Enable only HBW CQ for KDMA completion monitor */
5344 reg_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_CQ_EN_MASK, 1);
5345 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + (4 * i), reg_val);
5346
5347 /* Init CQ0 DB - configure the monitor to trigger MSI-X interrupt */
5348 WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0, lower_32_bits(gaudi2->virt_msix_db_dma_addr));
5349 WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0, upper_32_bits(gaudi2->virt_msix_db_dma_addr));
5350 WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0, GAUDI2_IRQ_NUM_COMPLETION);
5351
5352 for (i = 0 ; i < GAUDI2_RESERVED_CQ_NUMBER ; i++) {
5353 cq_address =
5354 hdev->completion_queue[i].bus_address;
5355
5356 WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + (4 * i),
5357 lower_32_bits(cq_address));
5358 WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + (4 * i),
5359 upper_32_bits(cq_address));
5360 WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + (4 * i),
5361 ilog2(HL_CQ_SIZE_IN_BYTES));
5362 }
5363
5364 /* Configure kernel ASID and MMU BP*/
5365 WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_SEC, 0x10000);
5366 WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_NONE_SEC_PRIV, 0);
5367
5368 /* Initialize sync objects and monitors which are used for the virtual MSI-X doorbell */
5369 gaudi2_prepare_sm_for_virt_msix_db(hdev);
5370 }
5371
gaudi2_init_mme_acc(struct hl_device * hdev,u32 reg_base)5372 static void gaudi2_init_mme_acc(struct hl_device *hdev, u32 reg_base)
5373 {
5374 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5375 u32 reg_val;
5376 int i;
5377
5378 reg_val = FIELD_PREP(MME_ACC_INTR_MASK_WBC_ERR_RESP_MASK, 0);
5379 reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_POS_INF_MASK, 1);
5380 reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_NEG_INF_MASK, 1);
5381 reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_NAN_MASK, 1);
5382 reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_RESULT_POS_INF_MASK, 1);
5383 reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_RESULT_NEG_INF_MASK, 1);
5384
5385 WREG32(reg_base + MME_ACC_INTR_MASK_OFFSET, reg_val);
5386 WREG32(reg_base + MME_ACC_AP_LFSR_POLY_OFFSET, 0x80DEADAF);
5387
5388 for (i = 0 ; i < MME_NUM_OF_LFSR_SEEDS ; i++) {
5389 WREG32(reg_base + MME_ACC_AP_LFSR_SEED_SEL_OFFSET, i);
5390 WREG32(reg_base + MME_ACC_AP_LFSR_SEED_WDATA_OFFSET, gaudi2->lfsr_rand_seeds[i]);
5391 }
5392 }
5393
gaudi2_init_dcore_mme(struct hl_device * hdev,int dcore_id,bool config_qman_only)5394 static void gaudi2_init_dcore_mme(struct hl_device *hdev, int dcore_id,
5395 bool config_qman_only)
5396 {
5397 u32 queue_id_base, reg_base;
5398
5399 switch (dcore_id) {
5400 case 0:
5401 queue_id_base = GAUDI2_QUEUE_ID_DCORE0_MME_0_0;
5402 break;
5403 case 1:
5404 queue_id_base = GAUDI2_QUEUE_ID_DCORE1_MME_0_0;
5405 break;
5406 case 2:
5407 queue_id_base = GAUDI2_QUEUE_ID_DCORE2_MME_0_0;
5408 break;
5409 case 3:
5410 queue_id_base = GAUDI2_QUEUE_ID_DCORE3_MME_0_0;
5411 break;
5412 default:
5413 dev_err(hdev->dev, "Invalid dcore id %u\n", dcore_id);
5414 return;
5415 }
5416
5417 if (!config_qman_only) {
5418 reg_base = gaudi2_mme_acc_blocks_bases[dcore_id];
5419 gaudi2_init_mme_acc(hdev, reg_base);
5420 }
5421
5422 reg_base = gaudi2_qm_blocks_bases[queue_id_base];
5423 gaudi2_init_qman(hdev, reg_base, queue_id_base);
5424 }
5425
gaudi2_init_mme(struct hl_device * hdev)5426 static void gaudi2_init_mme(struct hl_device *hdev)
5427 {
5428 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5429 int i;
5430
5431 if ((gaudi2->hw_cap_initialized & HW_CAP_MME_MASK) == HW_CAP_MME_MASK)
5432 return;
5433
5434 for (i = 0 ; i < NUM_OF_DCORES ; i++) {
5435 gaudi2_init_dcore_mme(hdev, i, false);
5436
5437 gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_MME_SHIFT + i);
5438 }
5439 }
5440
gaudi2_init_tpc_cfg(struct hl_device * hdev,u32 reg_base)5441 static void gaudi2_init_tpc_cfg(struct hl_device *hdev, u32 reg_base)
5442 {
5443 /* Mask arithmetic and QM interrupts in TPC */
5444 WREG32(reg_base + TPC_CFG_TPC_INTR_MASK_OFFSET, 0x23FFFE);
5445
5446 /* Set 16 cache lines */
5447 WREG32(reg_base + TPC_CFG_MSS_CONFIG_OFFSET,
5448 2 << DCORE0_TPC0_CFG_MSS_CONFIG_ICACHE_FETCH_LINE_NUM_SHIFT);
5449 }
5450
5451 struct gaudi2_tpc_init_cfg_data {
5452 enum gaudi2_queue_id dcore_tpc_qid_base[NUM_OF_DCORES];
5453 };
5454
gaudi2_init_tpc_config(struct hl_device * hdev,int dcore,int inst,u32 offset,struct iterate_module_ctx * ctx)5455 static void gaudi2_init_tpc_config(struct hl_device *hdev, int dcore, int inst,
5456 u32 offset, struct iterate_module_ctx *ctx)
5457 {
5458 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5459 struct gaudi2_tpc_init_cfg_data *cfg_data = ctx->data;
5460 u32 queue_id_base;
5461 u8 seq;
5462
5463 queue_id_base = cfg_data->dcore_tpc_qid_base[dcore] + (inst * NUM_OF_PQ_PER_QMAN);
5464
5465 if (dcore == 0 && inst == (NUM_DCORE0_TPC - 1))
5466 /* gets last sequence number */
5467 seq = NUM_OF_DCORES * NUM_OF_TPC_PER_DCORE;
5468 else
5469 seq = dcore * NUM_OF_TPC_PER_DCORE + inst;
5470
5471 gaudi2_init_tpc_cfg(hdev, mmDCORE0_TPC0_CFG_BASE + offset);
5472 gaudi2_init_qman(hdev, mmDCORE0_TPC0_QM_BASE + offset, queue_id_base);
5473
5474 gaudi2->tpc_hw_cap_initialized |= BIT_ULL(HW_CAP_TPC_SHIFT + seq);
5475 }
5476
gaudi2_init_tpc(struct hl_device * hdev)5477 static void gaudi2_init_tpc(struct hl_device *hdev)
5478 {
5479 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5480 struct gaudi2_tpc_init_cfg_data init_cfg_data;
5481 struct iterate_module_ctx tpc_iter;
5482
5483 if (!hdev->asic_prop.tpc_enabled_mask)
5484 return;
5485
5486 if ((gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK) == HW_CAP_TPC_MASK)
5487 return;
5488
5489 init_cfg_data.dcore_tpc_qid_base[0] = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0;
5490 init_cfg_data.dcore_tpc_qid_base[1] = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0;
5491 init_cfg_data.dcore_tpc_qid_base[2] = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0;
5492 init_cfg_data.dcore_tpc_qid_base[3] = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0;
5493 tpc_iter.fn = &gaudi2_init_tpc_config;
5494 tpc_iter.data = &init_cfg_data;
5495 gaudi2_iterate_tpcs(hdev, &tpc_iter);
5496 }
5497
gaudi2_init_rotator(struct hl_device * hdev)5498 static void gaudi2_init_rotator(struct hl_device *hdev)
5499 {
5500 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5501 u32 i, reg_base, queue_id;
5502
5503 queue_id = GAUDI2_QUEUE_ID_ROT_0_0;
5504
5505 for (i = 0 ; i < NUM_OF_ROT ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
5506 reg_base = gaudi2_qm_blocks_bases[queue_id];
5507 gaudi2_init_qman(hdev, reg_base, queue_id);
5508
5509 gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_ROT_SHIFT + i);
5510 }
5511 }
5512
gaudi2_init_vdec_brdg_ctrl(struct hl_device * hdev,u64 base_addr,u32 decoder_id)5513 static void gaudi2_init_vdec_brdg_ctrl(struct hl_device *hdev, u64 base_addr, u32 decoder_id)
5514 {
5515 u32 sob_id;
5516
5517 /* VCMD normal interrupt */
5518 sob_id = GAUDI2_RESERVED_SOB_DEC_NRM_FIRST + decoder_id;
5519 WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_AWADDR,
5520 mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_id * sizeof(u32));
5521 WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_WDATA, GAUDI2_SOB_INCREMENT_BY_ONE);
5522
5523 /* VCMD abnormal interrupt */
5524 sob_id = GAUDI2_RESERVED_SOB_DEC_ABNRM_FIRST + decoder_id;
5525 WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_AWADDR,
5526 mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_id * sizeof(u32));
5527 WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_WDATA, GAUDI2_SOB_INCREMENT_BY_ONE);
5528 }
5529
gaudi2_init_dec(struct hl_device * hdev)5530 static void gaudi2_init_dec(struct hl_device *hdev)
5531 {
5532 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5533 u32 dcore_id, dec_id, dec_bit;
5534 u64 base_addr;
5535
5536 if (!hdev->asic_prop.decoder_enabled_mask)
5537 return;
5538
5539 if ((gaudi2->dec_hw_cap_initialized & HW_CAP_DEC_MASK) == HW_CAP_DEC_MASK)
5540 return;
5541
5542 for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++)
5543 for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) {
5544 dec_bit = dcore_id * NUM_OF_DEC_PER_DCORE + dec_id;
5545
5546 if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
5547 continue;
5548
5549 base_addr = mmDCORE0_DEC0_CMD_BASE +
5550 BRDG_CTRL_BLOCK_OFFSET +
5551 dcore_id * DCORE_OFFSET +
5552 dec_id * DCORE_VDEC_OFFSET;
5553
5554 gaudi2_init_vdec_brdg_ctrl(hdev, base_addr, dec_bit);
5555
5556 gaudi2->dec_hw_cap_initialized |= BIT_ULL(HW_CAP_DEC_SHIFT + dec_bit);
5557 }
5558
5559 for (dec_id = 0 ; dec_id < NUM_OF_PCIE_VDEC ; dec_id++) {
5560 dec_bit = PCIE_DEC_SHIFT + dec_id;
5561 if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
5562 continue;
5563
5564 base_addr = mmPCIE_DEC0_CMD_BASE + BRDG_CTRL_BLOCK_OFFSET +
5565 dec_id * DCORE_VDEC_OFFSET;
5566
5567 gaudi2_init_vdec_brdg_ctrl(hdev, base_addr, dec_bit);
5568
5569 gaudi2->dec_hw_cap_initialized |= BIT_ULL(HW_CAP_DEC_SHIFT + dec_bit);
5570 }
5571 }
5572
gaudi2_mmu_update_asid_hop0_addr(struct hl_device * hdev,u32 stlb_base,u32 asid,u64 phys_addr)5573 static int gaudi2_mmu_update_asid_hop0_addr(struct hl_device *hdev,
5574 u32 stlb_base, u32 asid, u64 phys_addr)
5575 {
5576 u32 status, timeout_usec;
5577 int rc;
5578
5579 if (hdev->pldm || !hdev->pdev)
5580 timeout_usec = GAUDI2_PLDM_MMU_TIMEOUT_USEC;
5581 else
5582 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
5583
5584 WREG32(stlb_base + STLB_ASID_OFFSET, asid);
5585 WREG32(stlb_base + STLB_HOP0_PA43_12_OFFSET, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
5586 WREG32(stlb_base + STLB_HOP0_PA63_44_OFFSET, phys_addr >> MMU_HOP0_PA63_44_SHIFT);
5587 WREG32(stlb_base + STLB_BUSY_OFFSET, 0x80000000);
5588
5589 rc = hl_poll_timeout(
5590 hdev,
5591 stlb_base + STLB_BUSY_OFFSET,
5592 status,
5593 !(status & 0x80000000),
5594 1000,
5595 timeout_usec);
5596
5597 if (rc) {
5598 dev_err(hdev->dev, "Timeout during MMU hop0 config of asid %d\n", asid);
5599 return rc;
5600 }
5601
5602 return 0;
5603 }
5604
gaudi2_mmu_send_invalidate_cache_cmd(struct hl_device * hdev,u32 stlb_base,u32 start_offset,u32 inv_start_val,u32 flags)5605 static void gaudi2_mmu_send_invalidate_cache_cmd(struct hl_device *hdev, u32 stlb_base,
5606 u32 start_offset, u32 inv_start_val,
5607 u32 flags)
5608 {
5609 /* clear PMMU mem line cache (only needed in mmu range invalidation) */
5610 if (flags & MMU_OP_CLEAR_MEMCACHE)
5611 WREG32(mmPMMU_HBW_STLB_MEM_CACHE_INVALIDATION, 0x1);
5612
5613 if (flags & MMU_OP_SKIP_LOW_CACHE_INV)
5614 return;
5615
5616 WREG32(stlb_base + start_offset, inv_start_val);
5617 }
5618
gaudi2_mmu_invalidate_cache_status_poll(struct hl_device * hdev,u32 stlb_base,struct gaudi2_cache_invld_params * inv_params)5619 static int gaudi2_mmu_invalidate_cache_status_poll(struct hl_device *hdev, u32 stlb_base,
5620 struct gaudi2_cache_invld_params *inv_params)
5621 {
5622 u32 status, timeout_usec, start_offset;
5623 int rc;
5624
5625 timeout_usec = (hdev->pldm) ? GAUDI2_PLDM_MMU_TIMEOUT_USEC :
5626 GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC;
5627
5628 /* poll PMMU mem line cache (only needed in mmu range invalidation) */
5629 if (inv_params->flags & MMU_OP_CLEAR_MEMCACHE) {
5630 rc = hl_poll_timeout(
5631 hdev,
5632 mmPMMU_HBW_STLB_MEM_CACHE_INV_STATUS,
5633 status,
5634 status & 0x1,
5635 1000,
5636 timeout_usec);
5637
5638 if (rc)
5639 return rc;
5640
5641 /* Need to manually reset the status to 0 */
5642 WREG32(mmPMMU_HBW_STLB_MEM_CACHE_INV_STATUS, 0x0);
5643 }
5644
5645 /* Lower cache does not work with cache lines, hence we can skip its
5646 * invalidation upon map and invalidate only upon unmap
5647 */
5648 if (inv_params->flags & MMU_OP_SKIP_LOW_CACHE_INV)
5649 return 0;
5650
5651 start_offset = inv_params->range_invalidation ?
5652 STLB_RANGE_CACHE_INVALIDATION_OFFSET : STLB_INV_ALL_START_OFFSET;
5653
5654 rc = hl_poll_timeout(
5655 hdev,
5656 stlb_base + start_offset,
5657 status,
5658 !(status & 0x1),
5659 1000,
5660 timeout_usec);
5661
5662 return rc;
5663 }
5664
gaudi2_is_hmmu_enabled(struct hl_device * hdev,int dcore_id,int hmmu_id)5665 bool gaudi2_is_hmmu_enabled(struct hl_device *hdev, int dcore_id, int hmmu_id)
5666 {
5667 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5668 u32 hw_cap;
5669
5670 hw_cap = HW_CAP_DCORE0_DMMU0 << (NUM_OF_HMMU_PER_DCORE * dcore_id + hmmu_id);
5671
5672 if (gaudi2->hw_cap_initialized & hw_cap)
5673 return true;
5674
5675 return false;
5676 }
5677
5678 /* this function shall be called only for HMMUs for which capability bit is set */
get_hmmu_stlb_base(int dcore_id,int hmmu_id)5679 static inline u32 get_hmmu_stlb_base(int dcore_id, int hmmu_id)
5680 {
5681 u32 offset;
5682
5683 offset = (u32) (dcore_id * DCORE_OFFSET + hmmu_id * DCORE_HMMU_OFFSET);
5684 return (u32)(mmDCORE0_HMMU0_STLB_BASE + offset);
5685 }
5686
gaudi2_mmu_invalidate_cache_trigger(struct hl_device * hdev,u32 stlb_base,struct gaudi2_cache_invld_params * inv_params)5687 static void gaudi2_mmu_invalidate_cache_trigger(struct hl_device *hdev, u32 stlb_base,
5688 struct gaudi2_cache_invld_params *inv_params)
5689 {
5690 u32 start_offset;
5691
5692 if (inv_params->range_invalidation) {
5693 /* Set the addresses range
5694 * Note: that the start address we set in register, is not included in
5695 * the range of the invalidation, by design.
5696 * that's why we need to set lower address than the one we actually
5697 * want to be included in the range invalidation.
5698 */
5699 u64 start = inv_params->start_va - 1;
5700
5701 start_offset = STLB_RANGE_CACHE_INVALIDATION_OFFSET;
5702
5703 WREG32(stlb_base + STLB_RANGE_INV_START_LSB_OFFSET,
5704 start >> MMU_RANGE_INV_VA_LSB_SHIFT);
5705
5706 WREG32(stlb_base + STLB_RANGE_INV_START_MSB_OFFSET,
5707 start >> MMU_RANGE_INV_VA_MSB_SHIFT);
5708
5709 WREG32(stlb_base + STLB_RANGE_INV_END_LSB_OFFSET,
5710 inv_params->end_va >> MMU_RANGE_INV_VA_LSB_SHIFT);
5711
5712 WREG32(stlb_base + STLB_RANGE_INV_END_MSB_OFFSET,
5713 inv_params->end_va >> MMU_RANGE_INV_VA_MSB_SHIFT);
5714 } else {
5715 start_offset = STLB_INV_ALL_START_OFFSET;
5716 }
5717
5718 gaudi2_mmu_send_invalidate_cache_cmd(hdev, stlb_base, start_offset,
5719 inv_params->inv_start_val, inv_params->flags);
5720 }
5721
gaudi2_hmmu_invalidate_cache_trigger(struct hl_device * hdev,int dcore_id,int hmmu_id,struct gaudi2_cache_invld_params * inv_params)5722 static inline void gaudi2_hmmu_invalidate_cache_trigger(struct hl_device *hdev,
5723 int dcore_id, int hmmu_id,
5724 struct gaudi2_cache_invld_params *inv_params)
5725 {
5726 u32 stlb_base = get_hmmu_stlb_base(dcore_id, hmmu_id);
5727
5728 gaudi2_mmu_invalidate_cache_trigger(hdev, stlb_base, inv_params);
5729 }
5730
gaudi2_hmmu_invalidate_cache_status_poll(struct hl_device * hdev,int dcore_id,int hmmu_id,struct gaudi2_cache_invld_params * inv_params)5731 static inline int gaudi2_hmmu_invalidate_cache_status_poll(struct hl_device *hdev,
5732 int dcore_id, int hmmu_id,
5733 struct gaudi2_cache_invld_params *inv_params)
5734 {
5735 u32 stlb_base = get_hmmu_stlb_base(dcore_id, hmmu_id);
5736
5737 return gaudi2_mmu_invalidate_cache_status_poll(hdev, stlb_base, inv_params);
5738 }
5739
gaudi2_hmmus_invalidate_cache(struct hl_device * hdev,struct gaudi2_cache_invld_params * inv_params)5740 static int gaudi2_hmmus_invalidate_cache(struct hl_device *hdev,
5741 struct gaudi2_cache_invld_params *inv_params)
5742 {
5743 int dcore_id, hmmu_id;
5744
5745 /* first send all invalidation commands */
5746 for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
5747 for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE ; hmmu_id++) {
5748 if (!gaudi2_is_hmmu_enabled(hdev, dcore_id, hmmu_id))
5749 continue;
5750
5751 gaudi2_hmmu_invalidate_cache_trigger(hdev, dcore_id, hmmu_id, inv_params);
5752 }
5753 }
5754
5755 /* next, poll all invalidations status */
5756 for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
5757 for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE ; hmmu_id++) {
5758 int rc;
5759
5760 if (!gaudi2_is_hmmu_enabled(hdev, dcore_id, hmmu_id))
5761 continue;
5762
5763 rc = gaudi2_hmmu_invalidate_cache_status_poll(hdev, dcore_id, hmmu_id,
5764 inv_params);
5765 if (rc)
5766 return rc;
5767 }
5768 }
5769
5770 return 0;
5771 }
5772
gaudi2_mmu_invalidate_cache(struct hl_device * hdev,bool is_hard,u32 flags)5773 static int gaudi2_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags)
5774 {
5775 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5776 struct gaudi2_cache_invld_params invld_params;
5777 int rc = 0;
5778
5779 if (hdev->reset_info.hard_reset_pending)
5780 return rc;
5781
5782 invld_params.range_invalidation = false;
5783 invld_params.inv_start_val = 1;
5784
5785 if ((flags & MMU_OP_USERPTR) && (gaudi2->hw_cap_initialized & HW_CAP_PMMU)) {
5786 invld_params.flags = flags;
5787 gaudi2_mmu_invalidate_cache_trigger(hdev, mmPMMU_HBW_STLB_BASE, &invld_params);
5788 rc = gaudi2_mmu_invalidate_cache_status_poll(hdev, mmPMMU_HBW_STLB_BASE,
5789 &invld_params);
5790 } else if (flags & MMU_OP_PHYS_PACK) {
5791 invld_params.flags = 0;
5792 rc = gaudi2_hmmus_invalidate_cache(hdev, &invld_params);
5793 }
5794
5795 return rc;
5796 }
5797
gaudi2_mmu_invalidate_cache_range(struct hl_device * hdev,bool is_hard,u32 flags,u32 asid,u64 va,u64 size)5798 static int gaudi2_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_hard,
5799 u32 flags, u32 asid, u64 va, u64 size)
5800 {
5801 struct gaudi2_cache_invld_params invld_params = {0};
5802 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5803 u64 start_va, end_va;
5804 u32 inv_start_val;
5805 int rc = 0;
5806
5807 if (hdev->reset_info.hard_reset_pending)
5808 return 0;
5809
5810 inv_start_val = (1 << MMU_RANGE_INV_EN_SHIFT |
5811 1 << MMU_RANGE_INV_ASID_EN_SHIFT |
5812 asid << MMU_RANGE_INV_ASID_SHIFT);
5813 start_va = va;
5814 end_va = start_va + size;
5815
5816 if ((flags & MMU_OP_USERPTR) && (gaudi2->hw_cap_initialized & HW_CAP_PMMU)) {
5817 /* As range invalidation does not support zero address we will
5818 * do full invalidation in this case
5819 */
5820 if (start_va) {
5821 invld_params.range_invalidation = true;
5822 invld_params.start_va = start_va;
5823 invld_params.end_va = end_va;
5824 invld_params.inv_start_val = inv_start_val;
5825 invld_params.flags = flags | MMU_OP_CLEAR_MEMCACHE;
5826 } else {
5827 invld_params.range_invalidation = false;
5828 invld_params.inv_start_val = 1;
5829 invld_params.flags = flags;
5830 }
5831
5832
5833 gaudi2_mmu_invalidate_cache_trigger(hdev, mmPMMU_HBW_STLB_BASE, &invld_params);
5834 rc = gaudi2_mmu_invalidate_cache_status_poll(hdev, mmPMMU_HBW_STLB_BASE,
5835 &invld_params);
5836 if (rc)
5837 return rc;
5838
5839 } else if (flags & MMU_OP_PHYS_PACK) {
5840 invld_params.start_va = gaudi2_mmu_scramble_addr(hdev, start_va);
5841 invld_params.end_va = gaudi2_mmu_scramble_addr(hdev, end_va);
5842 invld_params.inv_start_val = inv_start_val;
5843 invld_params.flags = flags;
5844 rc = gaudi2_hmmus_invalidate_cache(hdev, &invld_params);
5845 }
5846
5847 return rc;
5848 }
5849
gaudi2_mmu_update_hop0_addr(struct hl_device * hdev,u32 stlb_base)5850 static int gaudi2_mmu_update_hop0_addr(struct hl_device *hdev, u32 stlb_base)
5851 {
5852 struct asic_fixed_properties *prop = &hdev->asic_prop;
5853 u64 hop0_addr;
5854 u32 asid, max_asid = prop->max_asid;
5855 int rc;
5856
5857 /* it takes too much time to init all of the ASIDs on palladium */
5858 if (hdev->pldm)
5859 max_asid = min((u32) 8, max_asid);
5860
5861 for (asid = 0 ; asid < max_asid ; asid++) {
5862 hop0_addr = hdev->mmu_priv.hr.mmu_asid_hop0[asid].phys_addr;
5863 rc = gaudi2_mmu_update_asid_hop0_addr(hdev, stlb_base, asid, hop0_addr);
5864 if (rc) {
5865 dev_err(hdev->dev, "failed to set hop0 addr for asid %d\n", asid);
5866 return rc;
5867 }
5868 }
5869
5870 return 0;
5871 }
5872
gaudi2_mmu_init_common(struct hl_device * hdev,u32 mmu_base,u32 stlb_base)5873 static int gaudi2_mmu_init_common(struct hl_device *hdev, u32 mmu_base, u32 stlb_base)
5874 {
5875 u32 status, timeout_usec;
5876 int rc;
5877
5878 if (hdev->pldm || !hdev->pdev)
5879 timeout_usec = GAUDI2_PLDM_MMU_TIMEOUT_USEC;
5880 else
5881 timeout_usec = GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC;
5882
5883 WREG32(stlb_base + STLB_INV_ALL_START_OFFSET, 1);
5884
5885 rc = hl_poll_timeout(
5886 hdev,
5887 stlb_base + STLB_SRAM_INIT_OFFSET,
5888 status,
5889 !status,
5890 1000,
5891 timeout_usec);
5892
5893 if (rc)
5894 dev_notice_ratelimited(hdev->dev, "Timeout when waiting for MMU SRAM init\n");
5895
5896 rc = gaudi2_mmu_update_hop0_addr(hdev, stlb_base);
5897 if (rc)
5898 return rc;
5899
5900 WREG32(mmu_base + MMU_BYPASS_OFFSET, 0);
5901
5902 rc = hl_poll_timeout(
5903 hdev,
5904 stlb_base + STLB_INV_ALL_START_OFFSET,
5905 status,
5906 !status,
5907 1000,
5908 timeout_usec);
5909
5910 if (rc)
5911 dev_notice_ratelimited(hdev->dev, "Timeout when waiting for MMU invalidate all\n");
5912
5913 WREG32(mmu_base + MMU_ENABLE_OFFSET, 1);
5914
5915 return rc;
5916 }
5917
gaudi2_pci_mmu_init(struct hl_device * hdev)5918 static int gaudi2_pci_mmu_init(struct hl_device *hdev)
5919 {
5920 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5921 u32 mmu_base, stlb_base;
5922 int rc;
5923
5924 if (gaudi2->hw_cap_initialized & HW_CAP_PMMU)
5925 return 0;
5926
5927 mmu_base = mmPMMU_HBW_MMU_BASE;
5928 stlb_base = mmPMMU_HBW_STLB_BASE;
5929
5930 RMWREG32_SHIFTED(stlb_base + STLB_HOP_CONFIGURATION_OFFSET,
5931 (0 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_HOP_SHIFT) |
5932 (5 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_SHIFT) |
5933 (4 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_SHIFT) |
5934 (5 << PMMU_HBW_STLB_HOP_CONFIGURATION_LAST_HOP_SHIFT) |
5935 (5 << PMMU_HBW_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_SHIFT),
5936 PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK |
5937 PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK |
5938 PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK |
5939 PMMU_HBW_STLB_HOP_CONFIGURATION_LAST_HOP_MASK |
5940 PMMU_HBW_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK);
5941
5942 WREG32(stlb_base + STLB_LL_LOOKUP_MASK_63_32_OFFSET, 0);
5943
5944 if (PAGE_SIZE == SZ_64K) {
5945 /* Set page sizes to 64K on hop5 and 16M on hop4 + enable 8 bit hops */
5946 RMWREG32_SHIFTED(mmu_base + MMU_STATIC_MULTI_PAGE_SIZE_OFFSET,
5947 FIELD_PREP(DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP5_PAGE_SIZE_MASK, 4) |
5948 FIELD_PREP(DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK, 3) |
5949 FIELD_PREP(
5950 DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_CFG_8_BITS_HOP_MODE_EN_MASK,
5951 1),
5952 DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP5_PAGE_SIZE_MASK |
5953 DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK |
5954 DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_CFG_8_BITS_HOP_MODE_EN_MASK);
5955 }
5956
5957 WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_PMMU_SPI_SEI_ENABLE_MASK);
5958
5959 rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base);
5960 if (rc)
5961 return rc;
5962
5963 gaudi2->hw_cap_initialized |= HW_CAP_PMMU;
5964
5965 return 0;
5966 }
5967
gaudi2_dcore_hmmu_init(struct hl_device * hdev,int dcore_id,int hmmu_id)5968 static int gaudi2_dcore_hmmu_init(struct hl_device *hdev, int dcore_id,
5969 int hmmu_id)
5970 {
5971 struct asic_fixed_properties *prop = &hdev->asic_prop;
5972 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5973 u32 offset, mmu_base, stlb_base, hw_cap;
5974 u8 dmmu_seq;
5975 int rc;
5976
5977 dmmu_seq = NUM_OF_HMMU_PER_DCORE * dcore_id + hmmu_id;
5978 hw_cap = HW_CAP_DCORE0_DMMU0 << dmmu_seq;
5979
5980 /*
5981 * return if DMMU is already initialized or if it's not out of
5982 * isolation (due to cluster binning)
5983 */
5984 if ((gaudi2->hw_cap_initialized & hw_cap) || !(prop->hmmu_hif_enabled_mask & BIT(dmmu_seq)))
5985 return 0;
5986
5987 offset = (u32) (dcore_id * DCORE_OFFSET + hmmu_id * DCORE_HMMU_OFFSET);
5988 mmu_base = mmDCORE0_HMMU0_MMU_BASE + offset;
5989 stlb_base = mmDCORE0_HMMU0_STLB_BASE + offset;
5990
5991 RMWREG32(mmu_base + MMU_STATIC_MULTI_PAGE_SIZE_OFFSET, 5 /* 64MB */,
5992 MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK);
5993
5994 RMWREG32_SHIFTED(stlb_base + STLB_HOP_CONFIGURATION_OFFSET,
5995 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK, 0) |
5996 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK, 3) |
5997 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK, 3) |
5998 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_LAST_HOP_MASK, 3) |
5999 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK, 3),
6000 DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK |
6001 DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK |
6002 DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK |
6003 DCORE0_HMMU0_STLB_HOP_CONFIGURATION_LAST_HOP_MASK |
6004 DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK);
6005
6006 RMWREG32(stlb_base + STLB_HOP_CONFIGURATION_OFFSET, 1,
6007 STLB_HOP_CONFIGURATION_ONLY_LARGE_PAGE_MASK);
6008
6009 WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_HMMU_SPI_SEI_ENABLE_MASK);
6010
6011 rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base);
6012 if (rc)
6013 return rc;
6014
6015 gaudi2->hw_cap_initialized |= hw_cap;
6016
6017 return 0;
6018 }
6019
gaudi2_hbm_mmu_init(struct hl_device * hdev)6020 static int gaudi2_hbm_mmu_init(struct hl_device *hdev)
6021 {
6022 int rc, dcore_id, hmmu_id;
6023
6024 for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++)
6025 for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE; hmmu_id++) {
6026 rc = gaudi2_dcore_hmmu_init(hdev, dcore_id, hmmu_id);
6027 if (rc)
6028 return rc;
6029 }
6030
6031 return 0;
6032 }
6033
gaudi2_mmu_init(struct hl_device * hdev)6034 static int gaudi2_mmu_init(struct hl_device *hdev)
6035 {
6036 int rc;
6037
6038 rc = gaudi2_pci_mmu_init(hdev);
6039 if (rc)
6040 return rc;
6041
6042 rc = gaudi2_hbm_mmu_init(hdev);
6043 if (rc)
6044 return rc;
6045
6046 return 0;
6047 }
6048
gaudi2_hw_init(struct hl_device * hdev)6049 static int gaudi2_hw_init(struct hl_device *hdev)
6050 {
6051 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6052 int rc;
6053
6054 /* Let's mark in the H/W that we have reached this point. We check
6055 * this value in the reset_before_init function to understand whether
6056 * we need to reset the chip before doing H/W init. This register is
6057 * cleared by the H/W upon H/W reset
6058 */
6059 WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
6060
6061 /* Perform read from the device to make sure device is up */
6062 RREG32(mmHW_STATE);
6063
6064 /* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
6065 * So we set it here and if anyone tries to move it later to
6066 * a different address, there will be an error
6067 */
6068 if (hdev->asic_prop.iatu_done_by_fw)
6069 gaudi2->dram_bar_cur_addr = DRAM_PHYS_BASE;
6070
6071 /*
6072 * Before pushing u-boot/linux to device, need to set the hbm bar to
6073 * base address of dram
6074 */
6075 if (gaudi2_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
6076 dev_err(hdev->dev, "failed to map HBM bar to DRAM base address\n");
6077 return -EIO;
6078 }
6079
6080 rc = gaudi2_init_cpu(hdev);
6081 if (rc) {
6082 dev_err(hdev->dev, "failed to initialize CPU\n");
6083 return rc;
6084 }
6085
6086 gaudi2_init_scrambler_hbm(hdev);
6087 gaudi2_init_kdma(hdev);
6088
6089 rc = gaudi2_init_cpu_queues(hdev, GAUDI2_CPU_TIMEOUT_USEC);
6090 if (rc) {
6091 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n", rc);
6092 return rc;
6093 }
6094
6095 rc = gaudi2->cpucp_info_get(hdev);
6096 if (rc) {
6097 dev_err(hdev->dev, "Failed to get cpucp info\n");
6098 return rc;
6099 }
6100
6101 rc = gaudi2_mmu_init(hdev);
6102 if (rc)
6103 return rc;
6104
6105 gaudi2_init_pdma(hdev);
6106 gaudi2_init_edma(hdev);
6107 gaudi2_init_sm(hdev);
6108 gaudi2_init_tpc(hdev);
6109 gaudi2_init_mme(hdev);
6110 gaudi2_init_rotator(hdev);
6111 gaudi2_init_dec(hdev);
6112 gaudi2_enable_timestamp(hdev);
6113
6114 rc = gaudi2_coresight_init(hdev);
6115 if (rc)
6116 goto disable_queues;
6117
6118 rc = gaudi2_enable_msix(hdev);
6119 if (rc)
6120 goto disable_queues;
6121
6122 /* Perform read from the device to flush all configuration */
6123 RREG32(mmHW_STATE);
6124
6125 return 0;
6126
6127 disable_queues:
6128 gaudi2_disable_dma_qmans(hdev);
6129 gaudi2_disable_mme_qmans(hdev);
6130 gaudi2_disable_tpc_qmans(hdev);
6131 gaudi2_disable_rot_qmans(hdev);
6132 gaudi2_disable_nic_qmans(hdev);
6133
6134 gaudi2_disable_timestamp(hdev);
6135
6136 return rc;
6137 }
6138
6139 /**
6140 * gaudi2_send_hard_reset_cmd - common function to handle reset
6141 *
6142 * @hdev: pointer to the habanalabs device structure
6143 *
6144 * This function handles the various possible scenarios for reset.
6145 * It considers if reset is handled by driver\FW and what FW components are loaded
6146 */
gaudi2_send_hard_reset_cmd(struct hl_device * hdev)6147 static void gaudi2_send_hard_reset_cmd(struct hl_device *hdev)
6148 {
6149 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
6150 bool heartbeat_reset, preboot_only, cpu_initialized = false;
6151 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6152 u32 cpu_boot_status;
6153
6154 preboot_only = (hdev->fw_loader.fw_comp_loaded == FW_TYPE_PREBOOT_CPU);
6155 heartbeat_reset = (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT);
6156
6157 /*
6158 * Handle corner case where failure was at cpu management app load,
6159 * and driver didn't detect any failure while loading the FW,
6160 * then at such scenario driver will send only HALT_MACHINE
6161 * and no one will respond to this request since FW already back to preboot
6162 * and it cannot handle such cmd.
6163 * In this case next time the management app loads it'll check on events register
6164 * which will still have the halt indication, and will reboot the device.
6165 * The solution is to let preboot clear all relevant registers before next boot
6166 * once driver send COMMS_RST_DEV.
6167 */
6168 cpu_boot_status = RREG32(mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS);
6169
6170 if (gaudi2 && (gaudi2->hw_cap_initialized & HW_CAP_CPU) &&
6171 (cpu_boot_status == CPU_BOOT_STATUS_SRAM_AVAIL))
6172 cpu_initialized = true;
6173
6174 /*
6175 * when Linux/Bootfit exist this write to the SP can be interpreted in 2 ways:
6176 * 1. FW reset: FW initiate the reset sequence
6177 * 2. driver reset: FW will start HALT sequence (the preparations for the
6178 * reset but not the reset itself as it is not implemented
6179 * on their part) and LKD will wait to let FW complete the
6180 * sequence before issuing the reset
6181 */
6182 if (!preboot_only && cpu_initialized) {
6183 WREG32(le32_to_cpu(dyn_regs->gic_host_halt_irq),
6184 gaudi2_irq_map_table[GAUDI2_EVENT_CPU_HALT_MACHINE].cpu_id);
6185
6186 msleep(GAUDI2_CPU_RESET_WAIT_MSEC);
6187 }
6188
6189 /*
6190 * When working with preboot (without Linux/Boot fit) we can
6191 * communicate only using the COMMS commands to issue halt/reset.
6192 *
6193 * For the case in which we are working with Linux/Bootfit this is a hail-mary
6194 * attempt to revive the card in the small chance that the f/w has
6195 * experienced a watchdog event, which caused it to return back to preboot.
6196 * In that case, triggering reset through GIC won't help. We need to
6197 * trigger the reset as if Linux wasn't loaded.
6198 *
6199 * We do it only if the reset cause was HB, because that would be the
6200 * indication of such an event.
6201 *
6202 * In case watchdog hasn't expired but we still got HB, then this won't
6203 * do any damage.
6204 */
6205
6206 if (heartbeat_reset || preboot_only || !cpu_initialized) {
6207 if (hdev->asic_prop.hard_reset_done_by_fw)
6208 hl_fw_ask_hard_reset_without_linux(hdev);
6209 else
6210 hl_fw_ask_halt_machine_without_linux(hdev);
6211 }
6212 }
6213
6214 /**
6215 * gaudi2_execute_hard_reset - execute hard reset by driver/FW
6216 *
6217 * @hdev: pointer to the habanalabs device structure
6218 *
6219 * This function executes hard reset based on if driver/FW should do the reset
6220 */
gaudi2_execute_hard_reset(struct hl_device * hdev)6221 static void gaudi2_execute_hard_reset(struct hl_device *hdev)
6222 {
6223 if (hdev->asic_prop.hard_reset_done_by_fw) {
6224 gaudi2_send_hard_reset_cmd(hdev);
6225 return;
6226 }
6227
6228 /* Set device to handle FLR by H/W as we will put the device
6229 * CPU to halt mode
6230 */
6231 WREG32(mmPCIE_AUX_FLR_CTRL,
6232 (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK | PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
6233
6234 gaudi2_send_hard_reset_cmd(hdev);
6235
6236 WREG32(mmPSOC_RESET_CONF_SW_ALL_RST, 1);
6237 }
6238
gaudi2_get_soft_rst_done_indication(struct hl_device * hdev,u32 poll_timeout_us)6239 static int gaudi2_get_soft_rst_done_indication(struct hl_device *hdev, u32 poll_timeout_us)
6240 {
6241 int i, rc = 0;
6242 u32 reg_val;
6243
6244 for (i = 0 ; i < GAUDI2_RESET_POLL_CNT ; i++)
6245 rc = hl_poll_timeout(
6246 hdev,
6247 mmCPU_RST_STATUS_TO_HOST,
6248 reg_val,
6249 reg_val == CPU_RST_STATUS_SOFT_RST_DONE,
6250 1000,
6251 poll_timeout_us);
6252
6253 if (rc)
6254 dev_err(hdev->dev, "Timeout while waiting for FW to complete soft reset (0x%x)\n",
6255 reg_val);
6256 return rc;
6257 }
6258
6259 /**
6260 * gaudi2_execute_soft_reset - execute soft reset by driver/FW
6261 *
6262 * @hdev: pointer to the habanalabs device structure
6263 * @driver_performs_reset: true if driver should perform reset instead of f/w.
6264 * @poll_timeout_us: time to wait for response from f/w.
6265 *
6266 * This function executes soft reset based on if driver/FW should do the reset
6267 */
gaudi2_execute_soft_reset(struct hl_device * hdev,bool driver_performs_reset,u32 poll_timeout_us)6268 static int gaudi2_execute_soft_reset(struct hl_device *hdev, bool driver_performs_reset,
6269 u32 poll_timeout_us)
6270 {
6271 int rc;
6272
6273 if (!driver_performs_reset) {
6274 if (hl_is_fw_sw_ver_below(hdev, 1, 10)) {
6275 /* set SP to indicate reset request sent to FW */
6276 WREG32(mmCPU_RST_STATUS_TO_HOST, CPU_RST_STATUS_NA);
6277
6278 WREG32(mmGIC_HOST_SOFT_RST_IRQ_POLL_REG,
6279 gaudi2_irq_map_table[GAUDI2_EVENT_CPU_SOFT_RESET].cpu_id);
6280
6281 /* wait for f/w response */
6282 rc = gaudi2_get_soft_rst_done_indication(hdev, poll_timeout_us);
6283 } else {
6284 rc = hl_fw_send_soft_reset(hdev);
6285 }
6286 return rc;
6287 }
6288
6289 /* Block access to engines, QMANs and SM during reset, these
6290 * RRs will be reconfigured after soft reset.
6291 * PCIE_MSIX is left unsecured to allow NIC packets processing during the reset.
6292 */
6293 gaudi2_write_rr_to_all_lbw_rtrs(hdev, RR_TYPE_LONG, NUM_LONG_LBW_RR - 1,
6294 mmDCORE0_TPC0_QM_DCCM_BASE, mmPCIE_MSIX_BASE);
6295
6296 gaudi2_write_rr_to_all_lbw_rtrs(hdev, RR_TYPE_LONG, NUM_LONG_LBW_RR - 2,
6297 mmPCIE_MSIX_BASE + HL_BLOCK_SIZE,
6298 mmPCIE_VDEC1_MSTR_IF_RR_SHRD_HBW_BASE + HL_BLOCK_SIZE);
6299
6300 WREG32(mmPSOC_RESET_CONF_SOFT_RST, 1);
6301 return 0;
6302 }
6303
gaudi2_poll_btm_indication(struct hl_device * hdev,u32 poll_timeout_us)6304 static void gaudi2_poll_btm_indication(struct hl_device *hdev, u32 poll_timeout_us)
6305 {
6306 int i, rc = 0;
6307 u32 reg_val;
6308
6309 /* We poll the BTM done indication multiple times after reset due to
6310 * a HW errata 'GAUDI2_0300'
6311 */
6312 for (i = 0 ; i < GAUDI2_RESET_POLL_CNT ; i++)
6313 rc = hl_poll_timeout(
6314 hdev,
6315 mmPSOC_GLOBAL_CONF_BTM_FSM,
6316 reg_val,
6317 reg_val == 0,
6318 1000,
6319 poll_timeout_us);
6320
6321 if (rc)
6322 dev_err(hdev->dev, "Timeout while waiting for device to reset 0x%x\n", reg_val);
6323 }
6324
gaudi2_hw_fini(struct hl_device * hdev,bool hard_reset,bool fw_reset)6325 static int gaudi2_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
6326 {
6327 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6328 u32 poll_timeout_us, reset_sleep_ms;
6329 bool driver_performs_reset = false;
6330 int rc;
6331
6332 if (hdev->pldm) {
6333 reset_sleep_ms = hard_reset ? GAUDI2_PLDM_HRESET_TIMEOUT_MSEC :
6334 GAUDI2_PLDM_SRESET_TIMEOUT_MSEC;
6335 poll_timeout_us = GAUDI2_PLDM_RESET_POLL_TIMEOUT_USEC;
6336 } else {
6337 reset_sleep_ms = GAUDI2_RESET_TIMEOUT_MSEC;
6338 poll_timeout_us = GAUDI2_RESET_POLL_TIMEOUT_USEC;
6339 }
6340
6341 if (fw_reset)
6342 goto skip_reset;
6343
6344 gaudi2_reset_arcs(hdev);
6345
6346 if (hard_reset) {
6347 driver_performs_reset = !hdev->asic_prop.hard_reset_done_by_fw;
6348 gaudi2_execute_hard_reset(hdev);
6349 } else {
6350 /*
6351 * As we have to support also work with preboot only (which does not supports
6352 * soft reset) we have to make sure that security is disabled before letting driver
6353 * do the reset. user shall control the BFE flags to avoid asking soft reset in
6354 * secured device with preboot only.
6355 */
6356 driver_performs_reset = (hdev->fw_components == FW_TYPE_PREBOOT_CPU &&
6357 !hdev->asic_prop.fw_security_enabled);
6358 rc = gaudi2_execute_soft_reset(hdev, driver_performs_reset, poll_timeout_us);
6359 if (rc)
6360 return rc;
6361 }
6362
6363 skip_reset:
6364 if (driver_performs_reset || hard_reset) {
6365 /*
6366 * Instead of waiting for BTM indication we should wait for preboot ready:
6367 * Consider the below scenario:
6368 * 1. FW update is being triggered
6369 * - setting the dirty bit
6370 * 2. hard reset will be triggered due to the dirty bit
6371 * 3. FW initiates the reset:
6372 * - dirty bit cleared
6373 * - BTM indication cleared
6374 * - preboot ready indication cleared
6375 * 4. during hard reset:
6376 * - BTM indication will be set
6377 * - BIST test performed and another reset triggered
6378 * 5. only after this reset the preboot will set the preboot ready
6379 *
6380 * when polling on BTM indication alone we can lose sync with FW while trying to
6381 * communicate with FW that is during reset.
6382 * to overcome this we will always wait to preboot ready indication
6383 */
6384
6385 /* without this sleep reset will not work */
6386 msleep(reset_sleep_ms);
6387
6388 if (hdev->fw_components & FW_TYPE_PREBOOT_CPU)
6389 hl_fw_wait_preboot_ready(hdev);
6390 else
6391 gaudi2_poll_btm_indication(hdev, poll_timeout_us);
6392 }
6393
6394 if (!gaudi2)
6395 return 0;
6396
6397 gaudi2->dec_hw_cap_initialized &= ~(HW_CAP_DEC_MASK);
6398 gaudi2->tpc_hw_cap_initialized &= ~(HW_CAP_TPC_MASK);
6399
6400 /*
6401 * Clear NIC capability mask in order for driver to re-configure
6402 * NIC QMANs. NIC ports will not be re-configured during soft
6403 * reset as we call gaudi2_nic_init only during hard reset
6404 */
6405 gaudi2->nic_hw_cap_initialized &= ~(HW_CAP_NIC_MASK);
6406
6407 if (hard_reset) {
6408 gaudi2->hw_cap_initialized &=
6409 ~(HW_CAP_DRAM | HW_CAP_CLK_GATE | HW_CAP_HBM_SCRAMBLER_MASK |
6410 HW_CAP_PMMU | HW_CAP_CPU | HW_CAP_CPU_Q |
6411 HW_CAP_SRAM_SCRAMBLER | HW_CAP_DMMU_MASK |
6412 HW_CAP_PDMA_MASK | HW_CAP_EDMA_MASK | HW_CAP_KDMA |
6413 HW_CAP_MME_MASK | HW_CAP_ROT_MASK);
6414
6415 memset(gaudi2->events_stat, 0, sizeof(gaudi2->events_stat));
6416 } else {
6417 gaudi2->hw_cap_initialized &=
6418 ~(HW_CAP_CLK_GATE | HW_CAP_HBM_SCRAMBLER_SW_RESET |
6419 HW_CAP_PDMA_MASK | HW_CAP_EDMA_MASK | HW_CAP_MME_MASK |
6420 HW_CAP_ROT_MASK);
6421 }
6422 return 0;
6423 }
6424
gaudi2_suspend(struct hl_device * hdev)6425 static int gaudi2_suspend(struct hl_device *hdev)
6426 {
6427 int rc;
6428
6429 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
6430 if (rc)
6431 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
6432
6433 return rc;
6434 }
6435
gaudi2_resume(struct hl_device * hdev)6436 static int gaudi2_resume(struct hl_device *hdev)
6437 {
6438 return gaudi2_init_iatu(hdev);
6439 }
6440
gaudi2_mmap(struct hl_device * hdev,struct vm_area_struct * vma,void * cpu_addr,dma_addr_t dma_addr,size_t size)6441 static int gaudi2_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
6442 void *cpu_addr, dma_addr_t dma_addr, size_t size)
6443 {
6444 int rc;
6445
6446 vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
6447 VM_DONTCOPY | VM_NORESERVE);
6448
6449 #ifdef _HAS_DMA_MMAP_COHERENT
6450
6451 rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, dma_addr, size);
6452 if (rc)
6453 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
6454
6455 #else
6456
6457 rc = remap_pfn_range(vma, vma->vm_start,
6458 virt_to_phys(cpu_addr) >> PAGE_SHIFT,
6459 size, vma->vm_page_prot);
6460 if (rc)
6461 dev_err(hdev->dev, "remap_pfn_range error %d", rc);
6462
6463 #endif
6464
6465 return rc;
6466 }
6467
gaudi2_is_queue_enabled(struct hl_device * hdev,u32 hw_queue_id)6468 static bool gaudi2_is_queue_enabled(struct hl_device *hdev, u32 hw_queue_id)
6469 {
6470 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6471 u64 hw_cap_mask = 0;
6472 u64 hw_tpc_cap_bit = 0;
6473 u64 hw_nic_cap_bit = 0;
6474 u64 hw_test_cap_bit = 0;
6475
6476 switch (hw_queue_id) {
6477 case GAUDI2_QUEUE_ID_PDMA_0_0:
6478 case GAUDI2_QUEUE_ID_PDMA_0_1:
6479 case GAUDI2_QUEUE_ID_PDMA_1_0:
6480 hw_cap_mask = HW_CAP_PDMA_MASK;
6481 break;
6482 case GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3:
6483 hw_test_cap_bit = HW_CAP_EDMA_SHIFT +
6484 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0) >> 2);
6485 break;
6486 case GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3:
6487 hw_test_cap_bit = HW_CAP_EDMA_SHIFT + NUM_OF_EDMA_PER_DCORE +
6488 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0) >> 2);
6489 break;
6490 case GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3:
6491 hw_test_cap_bit = HW_CAP_EDMA_SHIFT + 2 * NUM_OF_EDMA_PER_DCORE +
6492 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0) >> 2);
6493 break;
6494 case GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3:
6495 hw_test_cap_bit = HW_CAP_EDMA_SHIFT + 3 * NUM_OF_EDMA_PER_DCORE +
6496 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0) >> 2);
6497 break;
6498
6499 case GAUDI2_QUEUE_ID_DCORE0_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE0_MME_0_3:
6500 hw_test_cap_bit = HW_CAP_MME_SHIFT;
6501 break;
6502
6503 case GAUDI2_QUEUE_ID_DCORE1_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE1_MME_0_3:
6504 hw_test_cap_bit = HW_CAP_MME_SHIFT + 1;
6505 break;
6506
6507 case GAUDI2_QUEUE_ID_DCORE2_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE2_MME_0_3:
6508 hw_test_cap_bit = HW_CAP_MME_SHIFT + 2;
6509 break;
6510
6511 case GAUDI2_QUEUE_ID_DCORE3_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE3_MME_0_3:
6512 hw_test_cap_bit = HW_CAP_MME_SHIFT + 3;
6513 break;
6514
6515 case GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_5_3:
6516 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT +
6517 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE0_TPC_0_0) >> 2);
6518
6519 /* special case where cap bit refers to the first queue id */
6520 if (!hw_tpc_cap_bit)
6521 return !!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(0));
6522 break;
6523
6524 case GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE1_TPC_5_3:
6525 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + NUM_OF_TPC_PER_DCORE +
6526 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE1_TPC_0_0) >> 2);
6527 break;
6528
6529 case GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE2_TPC_5_3:
6530 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (2 * NUM_OF_TPC_PER_DCORE) +
6531 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE2_TPC_0_0) >> 2);
6532 break;
6533
6534 case GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE3_TPC_5_3:
6535 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (3 * NUM_OF_TPC_PER_DCORE) +
6536 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE3_TPC_0_0) >> 2);
6537 break;
6538
6539 case GAUDI2_QUEUE_ID_DCORE0_TPC_6_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_6_3:
6540 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (4 * NUM_OF_TPC_PER_DCORE);
6541 break;
6542
6543 case GAUDI2_QUEUE_ID_ROT_0_0 ... GAUDI2_QUEUE_ID_ROT_1_3:
6544 hw_test_cap_bit = HW_CAP_ROT_SHIFT + ((hw_queue_id - GAUDI2_QUEUE_ID_ROT_0_0) >> 2);
6545 break;
6546
6547 case GAUDI2_QUEUE_ID_NIC_0_0 ... GAUDI2_QUEUE_ID_NIC_23_3:
6548 hw_nic_cap_bit = HW_CAP_NIC_SHIFT + ((hw_queue_id - GAUDI2_QUEUE_ID_NIC_0_0) >> 2);
6549
6550 /* special case where cap bit refers to the first queue id */
6551 if (!hw_nic_cap_bit)
6552 return !!(gaudi2->nic_hw_cap_initialized & BIT_ULL(0));
6553 break;
6554
6555 case GAUDI2_QUEUE_ID_CPU_PQ:
6556 return !!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q);
6557
6558 default:
6559 return false;
6560 }
6561
6562 if (hw_tpc_cap_bit)
6563 return !!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(hw_tpc_cap_bit));
6564
6565 if (hw_nic_cap_bit)
6566 return !!(gaudi2->nic_hw_cap_initialized & BIT_ULL(hw_nic_cap_bit));
6567
6568 if (hw_test_cap_bit)
6569 hw_cap_mask = BIT_ULL(hw_test_cap_bit);
6570
6571 return !!(gaudi2->hw_cap_initialized & hw_cap_mask);
6572 }
6573
gaudi2_is_arc_enabled(struct hl_device * hdev,u64 arc_id)6574 static bool gaudi2_is_arc_enabled(struct hl_device *hdev, u64 arc_id)
6575 {
6576 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6577
6578 switch (arc_id) {
6579 case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5:
6580 case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1:
6581 return !!(gaudi2->active_hw_arc & BIT_ULL(arc_id));
6582
6583 case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
6584 return !!(gaudi2->active_tpc_arc & BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0));
6585
6586 case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
6587 return !!(gaudi2->active_nic_arc & BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0));
6588
6589 default:
6590 return false;
6591 }
6592 }
6593
gaudi2_clr_arc_id_cap(struct hl_device * hdev,u64 arc_id)6594 static void gaudi2_clr_arc_id_cap(struct hl_device *hdev, u64 arc_id)
6595 {
6596 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6597
6598 switch (arc_id) {
6599 case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5:
6600 case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1:
6601 gaudi2->active_hw_arc &= ~(BIT_ULL(arc_id));
6602 break;
6603
6604 case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
6605 gaudi2->active_tpc_arc &= ~(BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0));
6606 break;
6607
6608 case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
6609 gaudi2->active_nic_arc &= ~(BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0));
6610 break;
6611
6612 default:
6613 return;
6614 }
6615 }
6616
gaudi2_set_arc_id_cap(struct hl_device * hdev,u64 arc_id)6617 static void gaudi2_set_arc_id_cap(struct hl_device *hdev, u64 arc_id)
6618 {
6619 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6620
6621 switch (arc_id) {
6622 case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5:
6623 case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1:
6624 gaudi2->active_hw_arc |= BIT_ULL(arc_id);
6625 break;
6626
6627 case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
6628 gaudi2->active_tpc_arc |= BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0);
6629 break;
6630
6631 case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
6632 gaudi2->active_nic_arc |= BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0);
6633 break;
6634
6635 default:
6636 return;
6637 }
6638 }
6639
gaudi2_ring_doorbell(struct hl_device * hdev,u32 hw_queue_id,u32 pi)6640 static void gaudi2_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
6641 {
6642 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
6643 u32 pq_offset, reg_base, db_reg_offset, db_value;
6644
6645 if (hw_queue_id != GAUDI2_QUEUE_ID_CPU_PQ) {
6646 /*
6647 * QMAN has 4 successive PQ_PI registers, 1 for each of the QMAN PQs.
6648 * Masking the H/W queue ID with 0x3 extracts the QMAN internal PQ
6649 * number.
6650 */
6651 pq_offset = (hw_queue_id & 0x3) * 4;
6652 reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
6653 db_reg_offset = reg_base + QM_PQ_PI_0_OFFSET + pq_offset;
6654 } else {
6655 db_reg_offset = mmCPU_IF_PF_PQ_PI;
6656 }
6657
6658 db_value = pi;
6659
6660 /* ring the doorbell */
6661 WREG32(db_reg_offset, db_value);
6662
6663 if (hw_queue_id == GAUDI2_QUEUE_ID_CPU_PQ) {
6664 /* make sure device CPU will read latest data from host */
6665 mb();
6666 WREG32(le32_to_cpu(dyn_regs->gic_host_pi_upd_irq),
6667 gaudi2_irq_map_table[GAUDI2_EVENT_CPU_PI_UPDATE].cpu_id);
6668 }
6669 }
6670
gaudi2_pqe_write(struct hl_device * hdev,__le64 * pqe,struct hl_bd * bd)6671 static void gaudi2_pqe_write(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd)
6672 {
6673 __le64 *pbd = (__le64 *) bd;
6674
6675 /* The QMANs are on the host memory so a simple copy suffice */
6676 pqe[0] = pbd[0];
6677 pqe[1] = pbd[1];
6678 }
6679
gaudi2_dma_alloc_coherent(struct hl_device * hdev,size_t size,dma_addr_t * dma_handle,gfp_t flags)6680 static void *gaudi2_dma_alloc_coherent(struct hl_device *hdev, size_t size,
6681 dma_addr_t *dma_handle, gfp_t flags)
6682 {
6683 return dma_alloc_coherent(&hdev->pdev->dev, size, dma_handle, flags);
6684 }
6685
gaudi2_dma_free_coherent(struct hl_device * hdev,size_t size,void * cpu_addr,dma_addr_t dma_handle)6686 static void gaudi2_dma_free_coherent(struct hl_device *hdev, size_t size,
6687 void *cpu_addr, dma_addr_t dma_handle)
6688 {
6689 dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, dma_handle);
6690 }
6691
gaudi2_send_cpu_message(struct hl_device * hdev,u32 * msg,u16 len,u32 timeout,u64 * result)6692 static int gaudi2_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len,
6693 u32 timeout, u64 *result)
6694 {
6695 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6696
6697 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)) {
6698 if (result)
6699 *result = 0;
6700 return 0;
6701 }
6702
6703 if (!timeout)
6704 timeout = GAUDI2_MSG_TO_CPU_TIMEOUT_USEC;
6705
6706 return hl_fw_send_cpu_message(hdev, GAUDI2_QUEUE_ID_CPU_PQ, msg, len, timeout, result);
6707 }
6708
gaudi2_dma_pool_zalloc(struct hl_device * hdev,size_t size,gfp_t mem_flags,dma_addr_t * dma_handle)6709 static void *gaudi2_dma_pool_zalloc(struct hl_device *hdev, size_t size,
6710 gfp_t mem_flags, dma_addr_t *dma_handle)
6711 {
6712 if (size > GAUDI2_DMA_POOL_BLK_SIZE)
6713 return NULL;
6714
6715 return dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
6716 }
6717
gaudi2_dma_pool_free(struct hl_device * hdev,void * vaddr,dma_addr_t dma_addr)6718 static void gaudi2_dma_pool_free(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr)
6719 {
6720 dma_pool_free(hdev->dma_pool, vaddr, dma_addr);
6721 }
6722
gaudi2_cpu_accessible_dma_pool_alloc(struct hl_device * hdev,size_t size,dma_addr_t * dma_handle)6723 static void *gaudi2_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
6724 dma_addr_t *dma_handle)
6725 {
6726 return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
6727 }
6728
gaudi2_cpu_accessible_dma_pool_free(struct hl_device * hdev,size_t size,void * vaddr)6729 static void gaudi2_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *vaddr)
6730 {
6731 hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
6732 }
6733
gaudi2_validate_cb_address(struct hl_device * hdev,struct hl_cs_parser * parser)6734 static int gaudi2_validate_cb_address(struct hl_device *hdev, struct hl_cs_parser *parser)
6735 {
6736 struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
6737 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6738
6739 if (!gaudi2_is_queue_enabled(hdev, parser->hw_queue_id)) {
6740 dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id);
6741 return -EINVAL;
6742 }
6743
6744 /* Just check if CB address is valid */
6745
6746 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
6747 parser->user_cb_size,
6748 asic_prop->sram_user_base_address,
6749 asic_prop->sram_end_address))
6750 return 0;
6751
6752 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
6753 parser->user_cb_size,
6754 asic_prop->dram_user_base_address,
6755 asic_prop->dram_end_address))
6756 return 0;
6757
6758 if ((gaudi2->hw_cap_initialized & HW_CAP_DMMU_MASK) &&
6759 hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
6760 parser->user_cb_size,
6761 asic_prop->dmmu.start_addr,
6762 asic_prop->dmmu.end_addr))
6763 return 0;
6764
6765 if (gaudi2->hw_cap_initialized & HW_CAP_PMMU) {
6766 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
6767 parser->user_cb_size,
6768 asic_prop->pmmu.start_addr,
6769 asic_prop->pmmu.end_addr) ||
6770 hl_mem_area_inside_range(
6771 (u64) (uintptr_t) parser->user_cb,
6772 parser->user_cb_size,
6773 asic_prop->pmmu_huge.start_addr,
6774 asic_prop->pmmu_huge.end_addr))
6775 return 0;
6776
6777 } else if (gaudi2_host_phys_addr_valid((u64) (uintptr_t) parser->user_cb)) {
6778 if (!hdev->pdev)
6779 return 0;
6780
6781 if (!device_iommu_mapped(&hdev->pdev->dev))
6782 return 0;
6783 }
6784
6785 dev_err(hdev->dev, "CB address %p + 0x%x for internal QMAN is not valid\n",
6786 parser->user_cb, parser->user_cb_size);
6787
6788 return -EFAULT;
6789 }
6790
gaudi2_cs_parser(struct hl_device * hdev,struct hl_cs_parser * parser)6791 static int gaudi2_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
6792 {
6793 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6794
6795 if (!parser->is_kernel_allocated_cb)
6796 return gaudi2_validate_cb_address(hdev, parser);
6797
6798 if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU)) {
6799 dev_err(hdev->dev, "PMMU not initialized - Unsupported mode in Gaudi2\n");
6800 return -EINVAL;
6801 }
6802
6803 return 0;
6804 }
6805
gaudi2_send_heartbeat(struct hl_device * hdev)6806 static int gaudi2_send_heartbeat(struct hl_device *hdev)
6807 {
6808 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6809
6810 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
6811 return 0;
6812
6813 return hl_fw_send_heartbeat(hdev);
6814 }
6815
6816 /* This is an internal helper function, used to update the KDMA mmu props.
6817 * Should be called with a proper kdma lock.
6818 */
gaudi2_kdma_set_mmbp_asid(struct hl_device * hdev,bool mmu_bypass,u32 asid)6819 static void gaudi2_kdma_set_mmbp_asid(struct hl_device *hdev,
6820 bool mmu_bypass, u32 asid)
6821 {
6822 u32 rw_asid, rw_mmu_bp;
6823
6824 rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) |
6825 (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT);
6826
6827 rw_mmu_bp = (!!mmu_bypass << ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_RD_SHIFT) |
6828 (!!mmu_bypass << ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_WR_SHIFT);
6829
6830 WREG32(mmARC_FARM_KDMA_CTX_AXUSER_HB_ASID, rw_asid);
6831 WREG32(mmARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP, rw_mmu_bp);
6832 }
6833
gaudi2_arm_cq_monitor(struct hl_device * hdev,u32 sob_id,u32 mon_id,u32 cq_id,u32 mon_payload,u32 sync_value)6834 static void gaudi2_arm_cq_monitor(struct hl_device *hdev, u32 sob_id, u32 mon_id, u32 cq_id,
6835 u32 mon_payload, u32 sync_value)
6836 {
6837 u32 sob_offset, mon_offset, sync_group_id, mode, mon_arm;
6838 u8 mask;
6839
6840 sob_offset = sob_id * 4;
6841 mon_offset = mon_id * 4;
6842
6843 /* Reset the SOB value */
6844 WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset, 0);
6845
6846 /* Configure this address with CQ_ID 0 because CQ_EN is set */
6847 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, cq_id);
6848
6849 /* Configure this address with CS index because CQ_EN is set */
6850 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, mon_payload);
6851
6852 sync_group_id = sob_id / 8;
6853 mask = ~(1 << (sob_id & 0x7));
6854 mode = 1; /* comparison mode is "equal to" */
6855
6856 mon_arm = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOD_MASK, sync_value);
6857 mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOP_MASK, mode);
6858 mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_MASK_MASK, mask);
6859 mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SID_MASK, sync_group_id);
6860 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + mon_offset, mon_arm);
6861 }
6862
6863 /* This is an internal helper function used by gaudi2_send_job_to_kdma only */
gaudi2_send_job_to_kdma(struct hl_device * hdev,u64 src_addr,u64 dst_addr,u32 size,bool is_memset)6864 static int gaudi2_send_job_to_kdma(struct hl_device *hdev,
6865 u64 src_addr, u64 dst_addr,
6866 u32 size, bool is_memset)
6867 {
6868 u32 comp_val, commit_mask, *polling_addr, timeout, status = 0;
6869 struct hl_cq_entry *cq_base;
6870 struct hl_cq *cq;
6871 u64 comp_addr;
6872 int rc;
6873
6874 gaudi2_arm_cq_monitor(hdev, GAUDI2_RESERVED_SOB_KDMA_COMPLETION,
6875 GAUDI2_RESERVED_MON_KDMA_COMPLETION,
6876 GAUDI2_RESERVED_CQ_KDMA_COMPLETION, 1, 1);
6877
6878 comp_addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 +
6879 (GAUDI2_RESERVED_SOB_KDMA_COMPLETION * sizeof(u32));
6880
6881 comp_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1) |
6882 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1);
6883
6884 WREG32(mmARC_FARM_KDMA_CTX_SRC_BASE_LO, lower_32_bits(src_addr));
6885 WREG32(mmARC_FARM_KDMA_CTX_SRC_BASE_HI, upper_32_bits(src_addr));
6886 WREG32(mmARC_FARM_KDMA_CTX_DST_BASE_LO, lower_32_bits(dst_addr));
6887 WREG32(mmARC_FARM_KDMA_CTX_DST_BASE_HI, upper_32_bits(dst_addr));
6888 WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_ADDR_LO, lower_32_bits(comp_addr));
6889 WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_ADDR_HI, upper_32_bits(comp_addr));
6890 WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_WDATA, comp_val);
6891 WREG32(mmARC_FARM_KDMA_CTX_DST_TSIZE_0, size);
6892
6893 commit_mask = FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_LIN_MASK, 1) |
6894 FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_WR_COMP_EN_MASK, 1);
6895
6896 if (is_memset)
6897 commit_mask |= FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_MEM_SET_MASK, 1);
6898
6899 WREG32(mmARC_FARM_KDMA_CTX_COMMIT, commit_mask);
6900
6901 /* Wait for completion */
6902 cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_KDMA_COMPLETION];
6903 cq_base = cq->kernel_address;
6904 polling_addr = (u32 *)&cq_base[cq->ci];
6905
6906 if (hdev->pldm)
6907 /* for each 1MB 20 second of timeout */
6908 timeout = ((size / SZ_1M) + 1) * USEC_PER_SEC * 20;
6909 else
6910 timeout = KDMA_TIMEOUT_USEC;
6911
6912 /* Polling */
6913 rc = hl_poll_timeout_memory(
6914 hdev,
6915 polling_addr,
6916 status,
6917 (status == 1),
6918 1000,
6919 timeout,
6920 true);
6921
6922 *polling_addr = 0;
6923
6924 if (rc) {
6925 dev_err(hdev->dev, "Timeout while waiting for KDMA to be idle\n");
6926 WREG32(mmARC_FARM_KDMA_CFG_1, 1 << ARC_FARM_KDMA_CFG_1_HALT_SHIFT);
6927 return rc;
6928 }
6929
6930 cq->ci = hl_cq_inc_ptr(cq->ci);
6931
6932 return 0;
6933 }
6934
gaudi2_memset_device_lbw(struct hl_device * hdev,u32 addr,u32 size,u32 val)6935 static void gaudi2_memset_device_lbw(struct hl_device *hdev, u32 addr, u32 size, u32 val)
6936 {
6937 u32 i;
6938
6939 for (i = 0 ; i < size ; i += sizeof(u32))
6940 WREG32(addr + i, val);
6941 }
6942
gaudi2_qman_set_test_mode(struct hl_device * hdev,u32 hw_queue_id,bool enable)6943 static void gaudi2_qman_set_test_mode(struct hl_device *hdev, u32 hw_queue_id, bool enable)
6944 {
6945 u32 reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
6946
6947 if (enable) {
6948 WREG32(reg_base + QM_GLBL_PROT_OFFSET, QMAN_MAKE_TRUSTED_TEST_MODE);
6949 WREG32(reg_base + QM_PQC_CFG_OFFSET, 0);
6950 } else {
6951 WREG32(reg_base + QM_GLBL_PROT_OFFSET, QMAN_MAKE_TRUSTED);
6952 WREG32(reg_base + QM_PQC_CFG_OFFSET, 1 << PDMA0_QM_PQC_CFG_EN_SHIFT);
6953 }
6954 }
6955
gaudi2_test_queue_hw_queue_id_to_sob_id(struct hl_device * hdev,u32 hw_queue_id)6956 static inline u32 gaudi2_test_queue_hw_queue_id_to_sob_id(struct hl_device *hdev, u32 hw_queue_id)
6957 {
6958 return hdev->asic_prop.first_available_user_sob[0] +
6959 hw_queue_id - GAUDI2_QUEUE_ID_PDMA_0_0;
6960 }
6961
gaudi2_test_queue_clear(struct hl_device * hdev,u32 hw_queue_id)6962 static void gaudi2_test_queue_clear(struct hl_device *hdev, u32 hw_queue_id)
6963 {
6964 u32 sob_offset = gaudi2_test_queue_hw_queue_id_to_sob_id(hdev, hw_queue_id) * 4;
6965 u32 sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
6966
6967 /* Reset the SOB value */
6968 WREG32(sob_addr, 0);
6969 }
6970
gaudi2_test_queue_send_msg_short(struct hl_device * hdev,u32 hw_queue_id,u32 sob_val,struct gaudi2_queues_test_info * msg_info)6971 static int gaudi2_test_queue_send_msg_short(struct hl_device *hdev, u32 hw_queue_id, u32 sob_val,
6972 struct gaudi2_queues_test_info *msg_info)
6973 {
6974 u32 sob_offset = gaudi2_test_queue_hw_queue_id_to_sob_id(hdev, hw_queue_id) * 4;
6975 u32 tmp, sob_base = 1;
6976 struct packet_msg_short *msg_short_pkt = msg_info->kern_addr;
6977 size_t pkt_size = sizeof(struct packet_msg_short);
6978 int rc;
6979
6980 tmp = (PACKET_MSG_SHORT << GAUDI2_PKT_CTL_OPCODE_SHIFT) |
6981 (1 << GAUDI2_PKT_CTL_EB_SHIFT) |
6982 (1 << GAUDI2_PKT_CTL_MB_SHIFT) |
6983 (sob_base << GAUDI2_PKT_SHORT_CTL_BASE_SHIFT) |
6984 (sob_offset << GAUDI2_PKT_SHORT_CTL_ADDR_SHIFT);
6985
6986 msg_short_pkt->value = cpu_to_le32(sob_val);
6987 msg_short_pkt->ctl = cpu_to_le32(tmp);
6988
6989 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, msg_info->dma_addr);
6990 if (rc)
6991 dev_err(hdev->dev,
6992 "Failed to send msg_short packet to H/W queue %d\n", hw_queue_id);
6993
6994 return rc;
6995 }
6996
gaudi2_test_queue_wait_completion(struct hl_device * hdev,u32 hw_queue_id,u32 sob_val)6997 static int gaudi2_test_queue_wait_completion(struct hl_device *hdev, u32 hw_queue_id, u32 sob_val)
6998 {
6999 u32 sob_offset = gaudi2_test_queue_hw_queue_id_to_sob_id(hdev, hw_queue_id) * 4;
7000 u32 sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
7001 u32 timeout_usec, tmp;
7002 int rc;
7003
7004 if (hdev->pldm)
7005 timeout_usec = GAUDI2_PLDM_TEST_QUEUE_WAIT_USEC;
7006 else
7007 timeout_usec = GAUDI2_TEST_QUEUE_WAIT_USEC;
7008
7009 rc = hl_poll_timeout(
7010 hdev,
7011 sob_addr,
7012 tmp,
7013 (tmp == sob_val),
7014 1000,
7015 timeout_usec);
7016
7017 if (rc == -ETIMEDOUT) {
7018 dev_err(hdev->dev, "H/W queue %d test failed (SOB_OBJ_0 == 0x%x)\n",
7019 hw_queue_id, tmp);
7020 rc = -EIO;
7021 }
7022
7023 return rc;
7024 }
7025
gaudi2_test_cpu_queue(struct hl_device * hdev)7026 static int gaudi2_test_cpu_queue(struct hl_device *hdev)
7027 {
7028 struct gaudi2_device *gaudi2 = hdev->asic_specific;
7029
7030 /*
7031 * check capability here as send_cpu_message() won't update the result
7032 * value if no capability
7033 */
7034 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
7035 return 0;
7036
7037 return hl_fw_test_cpu_queue(hdev);
7038 }
7039
gaudi2_test_queues(struct hl_device * hdev)7040 static int gaudi2_test_queues(struct hl_device *hdev)
7041 {
7042 struct gaudi2_device *gaudi2 = hdev->asic_specific;
7043 struct gaudi2_queues_test_info *msg_info;
7044 u32 sob_val = 0x5a5a;
7045 int i, rc;
7046
7047 /* send test message on all enabled Qs */
7048 for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ; i++) {
7049 if (!gaudi2_is_queue_enabled(hdev, i))
7050 continue;
7051
7052 msg_info = &gaudi2->queues_test_info[i - GAUDI2_QUEUE_ID_PDMA_0_0];
7053 gaudi2_qman_set_test_mode(hdev, i, true);
7054 gaudi2_test_queue_clear(hdev, i);
7055 rc = gaudi2_test_queue_send_msg_short(hdev, i, sob_val, msg_info);
7056 if (rc)
7057 goto done;
7058 }
7059
7060 rc = gaudi2_test_cpu_queue(hdev);
7061 if (rc)
7062 goto done;
7063
7064 /* verify that all messages were processed */
7065 for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ; i++) {
7066 if (!gaudi2_is_queue_enabled(hdev, i))
7067 continue;
7068
7069 rc = gaudi2_test_queue_wait_completion(hdev, i, sob_val);
7070 if (rc)
7071 /* chip is not usable, no need for cleanups, just bail-out with error */
7072 goto done;
7073
7074 gaudi2_test_queue_clear(hdev, i);
7075 gaudi2_qman_set_test_mode(hdev, i, false);
7076 }
7077
7078 done:
7079 return rc;
7080 }
7081
gaudi2_compute_reset_late_init(struct hl_device * hdev)7082 static int gaudi2_compute_reset_late_init(struct hl_device *hdev)
7083 {
7084 struct gaudi2_device *gaudi2 = hdev->asic_specific;
7085 size_t irq_arr_size;
7086 int rc;
7087
7088 gaudi2_init_arcs(hdev);
7089
7090 rc = gaudi2_scrub_arcs_dccm(hdev);
7091 if (rc) {
7092 dev_err(hdev->dev, "Failed to scrub arcs DCCM\n");
7093 return rc;
7094 }
7095
7096 gaudi2_init_security(hdev);
7097
7098 /* Unmask all IRQs since some could have been received during the soft reset */
7099 irq_arr_size = gaudi2->num_of_valid_hw_events * sizeof(gaudi2->hw_events[0]);
7100 return hl_fw_unmask_irq_arr(hdev, gaudi2->hw_events, irq_arr_size);
7101 }
7102
gaudi2_get_edma_idle_status(struct hl_device * hdev,u64 * mask_arr,u8 mask_len,struct engines_data * e)7103 static bool gaudi2_get_edma_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7104 struct engines_data *e)
7105 {
7106 u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts, dma_core_sts0, dma_core_sts1;
7107 struct asic_fixed_properties *prop = &hdev->asic_prop;
7108 unsigned long *mask = (unsigned long *) mask_arr;
7109 const char *edma_fmt = "%-6d%-6d%-9s%#-14x%#-15x%#x\n";
7110 bool is_idle = true, is_eng_idle;
7111 int engine_idx, i, j;
7112 u64 offset;
7113
7114 if (e)
7115 hl_engine_data_sprintf(e,
7116 "\nCORE EDMA is_idle QM_GLBL_STS0 DMA_CORE_STS0 DMA_CORE_STS1\n"
7117 "---- ---- ------- ------------ ------------- -------------\n");
7118
7119 for (i = 0; i < NUM_OF_DCORES; i++) {
7120 for (j = 0 ; j < NUM_OF_EDMA_PER_DCORE ; j++) {
7121 int seq = i * NUM_OF_EDMA_PER_DCORE + j;
7122
7123 if (!(prop->edma_enabled_mask & BIT(seq)))
7124 continue;
7125
7126 engine_idx = GAUDI2_DCORE0_ENGINE_ID_EDMA_0 +
7127 i * GAUDI2_ENGINE_ID_DCORE_OFFSET + j;
7128 offset = i * DCORE_OFFSET + j * DCORE_EDMA_OFFSET;
7129
7130 dma_core_sts0 = RREG32(mmDCORE0_EDMA0_CORE_STS0 + offset);
7131 dma_core_sts1 = RREG32(mmDCORE0_EDMA0_CORE_STS1 + offset);
7132
7133 qm_glbl_sts0 = RREG32(mmDCORE0_EDMA0_QM_GLBL_STS0 + offset);
7134 qm_glbl_sts1 = RREG32(mmDCORE0_EDMA0_QM_GLBL_STS1 + offset);
7135 qm_cgm_sts = RREG32(mmDCORE0_EDMA0_QM_CGM_STS + offset);
7136
7137 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) &&
7138 IS_DMA_IDLE(dma_core_sts0) && !IS_DMA_HALTED(dma_core_sts1);
7139 is_idle &= is_eng_idle;
7140
7141 if (mask && !is_eng_idle)
7142 set_bit(engine_idx, mask);
7143
7144 if (e)
7145 hl_engine_data_sprintf(e, edma_fmt, i, j, is_eng_idle ? "Y" : "N",
7146 qm_glbl_sts0, dma_core_sts0, dma_core_sts1);
7147 }
7148 }
7149
7150 return is_idle;
7151 }
7152
gaudi2_get_pdma_idle_status(struct hl_device * hdev,u64 * mask_arr,u8 mask_len,struct engines_data * e)7153 static bool gaudi2_get_pdma_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7154 struct engines_data *e)
7155 {
7156 u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts, dma_core_sts0, dma_core_sts1;
7157 unsigned long *mask = (unsigned long *) mask_arr;
7158 const char *pdma_fmt = "%-6d%-9s%#-14x%#-15x%#x\n";
7159 bool is_idle = true, is_eng_idle;
7160 int engine_idx, i;
7161 u64 offset;
7162
7163 if (e)
7164 hl_engine_data_sprintf(e,
7165 "\nPDMA is_idle QM_GLBL_STS0 DMA_CORE_STS0 DMA_CORE_STS1\n"
7166 "---- ------- ------------ ------------- -------------\n");
7167
7168 for (i = 0 ; i < NUM_OF_PDMA ; i++) {
7169 engine_idx = GAUDI2_ENGINE_ID_PDMA_0 + i;
7170 offset = i * PDMA_OFFSET;
7171 dma_core_sts0 = RREG32(mmPDMA0_CORE_STS0 + offset);
7172 dma_core_sts1 = RREG32(mmPDMA0_CORE_STS1 + offset);
7173
7174 qm_glbl_sts0 = RREG32(mmPDMA0_QM_GLBL_STS0 + offset);
7175 qm_glbl_sts1 = RREG32(mmPDMA0_QM_GLBL_STS1 + offset);
7176 qm_cgm_sts = RREG32(mmPDMA0_QM_CGM_STS + offset);
7177
7178 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) &&
7179 IS_DMA_IDLE(dma_core_sts0) && !IS_DMA_HALTED(dma_core_sts1);
7180 is_idle &= is_eng_idle;
7181
7182 if (mask && !is_eng_idle)
7183 set_bit(engine_idx, mask);
7184
7185 if (e)
7186 hl_engine_data_sprintf(e, pdma_fmt, i, is_eng_idle ? "Y" : "N",
7187 qm_glbl_sts0, dma_core_sts0, dma_core_sts1);
7188 }
7189
7190 return is_idle;
7191 }
7192
gaudi2_get_nic_idle_status(struct hl_device * hdev,u64 * mask_arr,u8 mask_len,struct engines_data * e)7193 static bool gaudi2_get_nic_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7194 struct engines_data *e)
7195 {
7196 unsigned long *mask = (unsigned long *) mask_arr;
7197 const char *nic_fmt = "%-5d%-9s%#-14x%#-12x\n";
7198 u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts;
7199 bool is_idle = true, is_eng_idle;
7200 int engine_idx, i;
7201 u64 offset = 0;
7202
7203 /* NIC, twelve macros in Full chip */
7204 if (e && hdev->nic_ports_mask)
7205 hl_engine_data_sprintf(e,
7206 "\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n"
7207 "--- ------- ------------ ----------\n");
7208
7209 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
7210 if (!(i & 1))
7211 offset = i / 2 * NIC_OFFSET;
7212 else
7213 offset += NIC_QM_OFFSET;
7214
7215 if (!(hdev->nic_ports_mask & BIT(i)))
7216 continue;
7217
7218 engine_idx = GAUDI2_ENGINE_ID_NIC0_0 + i;
7219
7220
7221 qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
7222 qm_glbl_sts1 = RREG32(mmNIC0_QM0_GLBL_STS1 + offset);
7223 qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
7224
7225 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
7226 is_idle &= is_eng_idle;
7227
7228 if (mask && !is_eng_idle)
7229 set_bit(engine_idx, mask);
7230
7231 if (e)
7232 hl_engine_data_sprintf(e, nic_fmt, i, is_eng_idle ? "Y" : "N",
7233 qm_glbl_sts0, qm_cgm_sts);
7234 }
7235
7236 return is_idle;
7237 }
7238
gaudi2_get_mme_idle_status(struct hl_device * hdev,u64 * mask_arr,u8 mask_len,struct engines_data * e)7239 static bool gaudi2_get_mme_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7240 struct engines_data *e)
7241 {
7242 u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts, mme_arch_sts;
7243 unsigned long *mask = (unsigned long *) mask_arr;
7244 const char *mme_fmt = "%-5d%-6s%-9s%#-14x%#x\n";
7245 bool is_idle = true, is_eng_idle;
7246 int engine_idx, i;
7247 u64 offset;
7248
7249 if (e)
7250 hl_engine_data_sprintf(e,
7251 "\nMME Stub is_idle QM_GLBL_STS0 MME_ARCH_STATUS\n"
7252 "--- ---- ------- ------------ ---------------\n");
7253 /* MME, one per Dcore */
7254 for (i = 0 ; i < NUM_OF_DCORES ; i++) {
7255 engine_idx = GAUDI2_DCORE0_ENGINE_ID_MME + i * GAUDI2_ENGINE_ID_DCORE_OFFSET;
7256 offset = i * DCORE_OFFSET;
7257
7258 qm_glbl_sts0 = RREG32(mmDCORE0_MME_QM_GLBL_STS0 + offset);
7259 qm_glbl_sts1 = RREG32(mmDCORE0_MME_QM_GLBL_STS1 + offset);
7260 qm_cgm_sts = RREG32(mmDCORE0_MME_QM_CGM_STS + offset);
7261
7262 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
7263 is_idle &= is_eng_idle;
7264
7265 mme_arch_sts = RREG32(mmDCORE0_MME_CTRL_LO_ARCH_STATUS + offset);
7266 is_eng_idle &= IS_MME_IDLE(mme_arch_sts);
7267 is_idle &= is_eng_idle;
7268
7269 if (e)
7270 hl_engine_data_sprintf(e, mme_fmt, i, "N",
7271 is_eng_idle ? "Y" : "N",
7272 qm_glbl_sts0,
7273 mme_arch_sts);
7274
7275 if (mask && !is_eng_idle)
7276 set_bit(engine_idx, mask);
7277 }
7278
7279 return is_idle;
7280 }
7281
gaudi2_is_tpc_engine_idle(struct hl_device * hdev,int dcore,int inst,u32 offset,struct iterate_module_ctx * ctx)7282 static void gaudi2_is_tpc_engine_idle(struct hl_device *hdev, int dcore, int inst, u32 offset,
7283 struct iterate_module_ctx *ctx)
7284 {
7285 struct gaudi2_tpc_idle_data *idle_data = ctx->data;
7286 u32 tpc_cfg_sts, qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts;
7287 bool is_eng_idle;
7288 int engine_idx;
7289
7290 if ((dcore == 0) && (inst == (NUM_DCORE0_TPC - 1)))
7291 engine_idx = GAUDI2_DCORE0_ENGINE_ID_TPC_6;
7292 else
7293 engine_idx = GAUDI2_DCORE0_ENGINE_ID_TPC_0 +
7294 dcore * GAUDI2_ENGINE_ID_DCORE_OFFSET + inst;
7295
7296 tpc_cfg_sts = RREG32(mmDCORE0_TPC0_CFG_STATUS + offset);
7297 qm_glbl_sts0 = RREG32(mmDCORE0_TPC0_QM_GLBL_STS0 + offset);
7298 qm_glbl_sts1 = RREG32(mmDCORE0_TPC0_QM_GLBL_STS1 + offset);
7299 qm_cgm_sts = RREG32(mmDCORE0_TPC0_QM_CGM_STS + offset);
7300
7301 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) &&
7302 IS_TPC_IDLE(tpc_cfg_sts);
7303 *(idle_data->is_idle) &= is_eng_idle;
7304
7305 if (idle_data->mask && !is_eng_idle)
7306 set_bit(engine_idx, idle_data->mask);
7307
7308 if (idle_data->e)
7309 hl_engine_data_sprintf(idle_data->e,
7310 idle_data->tpc_fmt, dcore, inst,
7311 is_eng_idle ? "Y" : "N",
7312 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
7313 }
7314
gaudi2_get_tpc_idle_status(struct hl_device * hdev,u64 * mask_arr,u8 mask_len,struct engines_data * e)7315 static bool gaudi2_get_tpc_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7316 struct engines_data *e)
7317 {
7318 struct asic_fixed_properties *prop = &hdev->asic_prop;
7319 unsigned long *mask = (unsigned long *) mask_arr;
7320 bool is_idle = true;
7321
7322 struct gaudi2_tpc_idle_data tpc_idle_data = {
7323 .tpc_fmt = "%-6d%-5d%-9s%#-14x%#-12x%#x\n",
7324 .e = e,
7325 .mask = mask,
7326 .is_idle = &is_idle,
7327 };
7328 struct iterate_module_ctx tpc_iter = {
7329 .fn = &gaudi2_is_tpc_engine_idle,
7330 .data = &tpc_idle_data,
7331 };
7332
7333 if (e && prop->tpc_enabled_mask)
7334 hl_engine_data_sprintf(e,
7335 "\nCORE TPC is_idle QM_GLBL_STS0 QM_CGM_STS STATUS\n"
7336 "---- --- ------- ------------ ---------- ------\n");
7337
7338 gaudi2_iterate_tpcs(hdev, &tpc_iter);
7339
7340 return *tpc_idle_data.is_idle;
7341 }
7342
gaudi2_get_decoder_idle_status(struct hl_device * hdev,u64 * mask_arr,u8 mask_len,struct engines_data * e)7343 static bool gaudi2_get_decoder_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7344 struct engines_data *e)
7345 {
7346 struct asic_fixed_properties *prop = &hdev->asic_prop;
7347 unsigned long *mask = (unsigned long *) mask_arr;
7348 const char *pcie_dec_fmt = "%-10d%-9s%#x\n";
7349 const char *dec_fmt = "%-6d%-5d%-9s%#x\n";
7350 bool is_idle = true, is_eng_idle;
7351 u32 dec_swreg15, dec_enabled_bit;
7352 int engine_idx, i, j;
7353 u64 offset;
7354
7355 /* Decoders, two each Dcore and two shared PCIe decoders */
7356 if (e && (prop->decoder_enabled_mask & (~PCIE_DEC_EN_MASK)))
7357 hl_engine_data_sprintf(e,
7358 "\nCORE DEC is_idle VSI_CMD_SWREG15\n"
7359 "---- --- ------- ---------------\n");
7360
7361 for (i = 0 ; i < NUM_OF_DCORES ; i++) {
7362 for (j = 0 ; j < NUM_OF_DEC_PER_DCORE ; j++) {
7363 dec_enabled_bit = 1 << (i * NUM_OF_DEC_PER_DCORE + j);
7364 if (!(prop->decoder_enabled_mask & dec_enabled_bit))
7365 continue;
7366
7367 engine_idx = GAUDI2_DCORE0_ENGINE_ID_DEC_0 +
7368 i * GAUDI2_ENGINE_ID_DCORE_OFFSET + j;
7369 offset = i * DCORE_OFFSET + j * DCORE_DEC_OFFSET;
7370
7371 dec_swreg15 = RREG32(mmDCORE0_DEC0_CMD_SWREG15 + offset);
7372 is_eng_idle = IS_DEC_IDLE(dec_swreg15);
7373 is_idle &= is_eng_idle;
7374
7375 if (mask && !is_eng_idle)
7376 set_bit(engine_idx, mask);
7377
7378 if (e)
7379 hl_engine_data_sprintf(e, dec_fmt, i, j,
7380 is_eng_idle ? "Y" : "N", dec_swreg15);
7381 }
7382 }
7383
7384 if (e && (prop->decoder_enabled_mask & PCIE_DEC_EN_MASK))
7385 hl_engine_data_sprintf(e,
7386 "\nPCIe DEC is_idle VSI_CMD_SWREG15\n"
7387 "-------- ------- ---------------\n");
7388
7389 /* Check shared(PCIe) decoders */
7390 for (i = 0 ; i < NUM_OF_DEC_PER_DCORE ; i++) {
7391 dec_enabled_bit = PCIE_DEC_SHIFT + i;
7392 if (!(prop->decoder_enabled_mask & BIT(dec_enabled_bit)))
7393 continue;
7394
7395 engine_idx = GAUDI2_PCIE_ENGINE_ID_DEC_0 + i;
7396 offset = i * DCORE_DEC_OFFSET;
7397 dec_swreg15 = RREG32(mmPCIE_DEC0_CMD_SWREG15 + offset);
7398 is_eng_idle = IS_DEC_IDLE(dec_swreg15);
7399 is_idle &= is_eng_idle;
7400
7401 if (mask && !is_eng_idle)
7402 set_bit(engine_idx, mask);
7403
7404 if (e)
7405 hl_engine_data_sprintf(e, pcie_dec_fmt, i,
7406 is_eng_idle ? "Y" : "N", dec_swreg15);
7407 }
7408
7409 return is_idle;
7410 }
7411
gaudi2_get_rotator_idle_status(struct hl_device * hdev,u64 * mask_arr,u8 mask_len,struct engines_data * e)7412 static bool gaudi2_get_rotator_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7413 struct engines_data *e)
7414 {
7415 const char *rot_fmt = "%-6d%-5d%-9s%#-14x%#-14x%#x\n";
7416 unsigned long *mask = (unsigned long *) mask_arr;
7417 u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts;
7418 bool is_idle = true, is_eng_idle;
7419 int engine_idx, i;
7420 u64 offset;
7421
7422 if (e)
7423 hl_engine_data_sprintf(e,
7424 "\nCORE ROT is_idle QM_GLBL_STS0 QM_GLBL_STS1 QM_CGM_STS\n"
7425 "---- --- ------- ------------ ------------ ----------\n");
7426
7427 for (i = 0 ; i < NUM_OF_ROT ; i++) {
7428 engine_idx = GAUDI2_ENGINE_ID_ROT_0 + i;
7429
7430 offset = i * ROT_OFFSET;
7431
7432 qm_glbl_sts0 = RREG32(mmROT0_QM_GLBL_STS0 + offset);
7433 qm_glbl_sts1 = RREG32(mmROT0_QM_GLBL_STS1 + offset);
7434 qm_cgm_sts = RREG32(mmROT0_QM_CGM_STS + offset);
7435
7436 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
7437 is_idle &= is_eng_idle;
7438
7439 if (mask && !is_eng_idle)
7440 set_bit(engine_idx, mask);
7441
7442 if (e)
7443 hl_engine_data_sprintf(e, rot_fmt, i, 0, is_eng_idle ? "Y" : "N",
7444 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
7445 }
7446
7447 return is_idle;
7448 }
7449
gaudi2_is_device_idle(struct hl_device * hdev,u64 * mask_arr,u8 mask_len,struct engines_data * e)7450 static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7451 struct engines_data *e)
7452 {
7453 bool is_idle = true;
7454
7455 is_idle &= gaudi2_get_edma_idle_status(hdev, mask_arr, mask_len, e);
7456 is_idle &= gaudi2_get_pdma_idle_status(hdev, mask_arr, mask_len, e);
7457 is_idle &= gaudi2_get_nic_idle_status(hdev, mask_arr, mask_len, e);
7458 is_idle &= gaudi2_get_mme_idle_status(hdev, mask_arr, mask_len, e);
7459 is_idle &= gaudi2_get_tpc_idle_status(hdev, mask_arr, mask_len, e);
7460 is_idle &= gaudi2_get_decoder_idle_status(hdev, mask_arr, mask_len, e);
7461 is_idle &= gaudi2_get_rotator_idle_status(hdev, mask_arr, mask_len, e);
7462
7463 return is_idle;
7464 }
7465
gaudi2_hw_queues_lock(struct hl_device * hdev)7466 static void gaudi2_hw_queues_lock(struct hl_device *hdev)
7467 __acquires(&gaudi2->hw_queues_lock)
7468 {
7469 struct gaudi2_device *gaudi2 = hdev->asic_specific;
7470
7471 spin_lock(&gaudi2->hw_queues_lock);
7472 }
7473
gaudi2_hw_queues_unlock(struct hl_device * hdev)7474 static void gaudi2_hw_queues_unlock(struct hl_device *hdev)
7475 __releases(&gaudi2->hw_queues_lock)
7476 {
7477 struct gaudi2_device *gaudi2 = hdev->asic_specific;
7478
7479 spin_unlock(&gaudi2->hw_queues_lock);
7480 }
7481
gaudi2_get_pci_id(struct hl_device * hdev)7482 static u32 gaudi2_get_pci_id(struct hl_device *hdev)
7483 {
7484 return hdev->pdev->device;
7485 }
7486
gaudi2_get_eeprom_data(struct hl_device * hdev,void * data,size_t max_size)7487 static int gaudi2_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)
7488 {
7489 struct gaudi2_device *gaudi2 = hdev->asic_specific;
7490
7491 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
7492 return 0;
7493
7494 return hl_fw_get_eeprom_data(hdev, data, max_size);
7495 }
7496
gaudi2_update_eq_ci(struct hl_device * hdev,u32 val)7497 static void gaudi2_update_eq_ci(struct hl_device *hdev, u32 val)
7498 {
7499 WREG32(mmCPU_IF_EQ_RD_OFFS, val);
7500 }
7501
gaudi2_get_events_stat(struct hl_device * hdev,bool aggregate,u32 * size)7502 static void *gaudi2_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
7503 {
7504 struct gaudi2_device *gaudi2 = hdev->asic_specific;
7505
7506 if (aggregate) {
7507 *size = (u32) sizeof(gaudi2->events_stat_aggregate);
7508 return gaudi2->events_stat_aggregate;
7509 }
7510
7511 *size = (u32) sizeof(gaudi2->events_stat);
7512 return gaudi2->events_stat;
7513 }
7514
gaudi2_mmu_vdec_dcore_prepare(struct hl_device * hdev,int dcore_id,int dcore_vdec_id,u32 rw_asid,u32 rw_mmu_bp)7515 static void gaudi2_mmu_vdec_dcore_prepare(struct hl_device *hdev, int dcore_id,
7516 int dcore_vdec_id, u32 rw_asid, u32 rw_mmu_bp)
7517 {
7518 u32 offset = (mmDCORE0_VDEC1_BRDG_CTRL_BASE - mmDCORE0_VDEC0_BRDG_CTRL_BASE) *
7519 dcore_vdec_id + DCORE_OFFSET * dcore_id;
7520
7521 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_MMU_BP + offset, rw_mmu_bp);
7522 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_ASID + offset, rw_asid);
7523
7524 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_MMU_BP + offset, rw_mmu_bp);
7525 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_ASID + offset, rw_asid);
7526
7527 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_MMU_BP + offset, rw_mmu_bp);
7528 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_ASID + offset, rw_asid);
7529
7530 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_MMU_BP + offset, rw_mmu_bp);
7531 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_ASID + offset, rw_asid);
7532
7533 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_MMU_BP + offset, rw_mmu_bp);
7534 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_ASID + offset, rw_asid);
7535 }
7536
gaudi2_mmu_dcore_prepare(struct hl_device * hdev,int dcore_id,u32 asid)7537 static void gaudi2_mmu_dcore_prepare(struct hl_device *hdev, int dcore_id, u32 asid)
7538 {
7539 u32 rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) |
7540 (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT);
7541 struct asic_fixed_properties *prop = &hdev->asic_prop;
7542 u32 dcore_offset = dcore_id * DCORE_OFFSET;
7543 u32 vdec_id, i, ports_offset, reg_val;
7544 u8 edma_seq_base;
7545
7546 /* EDMA */
7547 edma_seq_base = dcore_id * NUM_OF_EDMA_PER_DCORE;
7548 if (prop->edma_enabled_mask & BIT(edma_seq_base)) {
7549 WREG32(mmDCORE0_EDMA0_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0);
7550 WREG32(mmDCORE0_EDMA0_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid);
7551 WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP + dcore_offset, 0);
7552 WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_ASID + dcore_offset, rw_asid);
7553 }
7554
7555 if (prop->edma_enabled_mask & BIT(edma_seq_base + 1)) {
7556 WREG32(mmDCORE0_EDMA1_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0);
7557 WREG32(mmDCORE0_EDMA1_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid);
7558 WREG32(mmDCORE0_EDMA1_CORE_CTX_AXUSER_HB_ASID + dcore_offset, rw_asid);
7559 WREG32(mmDCORE0_EDMA1_CORE_CTX_AXUSER_HB_MMU_BP + dcore_offset, 0);
7560 }
7561
7562 /* Sync Mngr */
7563 WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_NONE_SEC_PRIV + dcore_offset, asid);
7564 /*
7565 * Sync Mngrs on dcores 1 - 3 are exposed to user, so must use user ASID
7566 * for any access type
7567 */
7568 if (dcore_id > 0) {
7569 reg_val = (asid << DCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID_RD_SHIFT) |
7570 (asid << DCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID_WR_SHIFT);
7571 WREG32(mmDCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID + dcore_offset, reg_val);
7572 WREG32(mmDCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_MMU_BP + dcore_offset, 0);
7573 }
7574
7575 WREG32(mmDCORE0_MME_CTRL_LO_MME_AXUSER_HB_MMU_BP + dcore_offset, 0);
7576 WREG32(mmDCORE0_MME_CTRL_LO_MME_AXUSER_HB_ASID + dcore_offset, rw_asid);
7577
7578 for (i = 0 ; i < NUM_OF_MME_SBTE_PORTS ; i++) {
7579 ports_offset = i * DCORE_MME_SBTE_OFFSET;
7580 WREG32(mmDCORE0_MME_SBTE0_MSTR_IF_AXUSER_HB_MMU_BP +
7581 dcore_offset + ports_offset, 0);
7582 WREG32(mmDCORE0_MME_SBTE0_MSTR_IF_AXUSER_HB_ASID +
7583 dcore_offset + ports_offset, rw_asid);
7584 }
7585
7586 for (i = 0 ; i < NUM_OF_MME_WB_PORTS ; i++) {
7587 ports_offset = i * DCORE_MME_WB_OFFSET;
7588 WREG32(mmDCORE0_MME_WB0_MSTR_IF_AXUSER_HB_MMU_BP +
7589 dcore_offset + ports_offset, 0);
7590 WREG32(mmDCORE0_MME_WB0_MSTR_IF_AXUSER_HB_ASID +
7591 dcore_offset + ports_offset, rw_asid);
7592 }
7593
7594 WREG32(mmDCORE0_MME_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0);
7595 WREG32(mmDCORE0_MME_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid);
7596
7597 /*
7598 * Decoders
7599 */
7600 for (vdec_id = 0 ; vdec_id < NUM_OF_DEC_PER_DCORE ; vdec_id++) {
7601 if (prop->decoder_enabled_mask & BIT(dcore_id * NUM_OF_DEC_PER_DCORE + vdec_id))
7602 gaudi2_mmu_vdec_dcore_prepare(hdev, dcore_id, vdec_id, rw_asid, 0);
7603 }
7604 }
7605
gudi2_mmu_vdec_shared_prepare(struct hl_device * hdev,int shared_vdec_id,u32 rw_asid,u32 rw_mmu_bp)7606 static void gudi2_mmu_vdec_shared_prepare(struct hl_device *hdev,
7607 int shared_vdec_id, u32 rw_asid, u32 rw_mmu_bp)
7608 {
7609 u32 offset = (mmPCIE_VDEC1_BRDG_CTRL_BASE - mmPCIE_VDEC0_BRDG_CTRL_BASE) * shared_vdec_id;
7610
7611 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_MMU_BP + offset, rw_mmu_bp);
7612 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_ASID + offset, rw_asid);
7613
7614 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_MMU_BP + offset, rw_mmu_bp);
7615 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_ASID + offset, rw_asid);
7616
7617 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_MMU_BP + offset, rw_mmu_bp);
7618 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_ASID + offset, rw_asid);
7619
7620 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_MMU_BP + offset, rw_mmu_bp);
7621 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_ASID + offset, rw_asid);
7622
7623 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_MMU_BP + offset, rw_mmu_bp);
7624 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_ASID + offset, rw_asid);
7625 }
7626
gudi2_mmu_arc_farm_arc_dup_eng_prepare(struct hl_device * hdev,int arc_farm_id,u32 rw_asid,u32 rw_mmu_bp)7627 static void gudi2_mmu_arc_farm_arc_dup_eng_prepare(struct hl_device *hdev, int arc_farm_id,
7628 u32 rw_asid, u32 rw_mmu_bp)
7629 {
7630 u32 offset = (mmARC_FARM_ARC1_DUP_ENG_BASE - mmARC_FARM_ARC0_DUP_ENG_BASE) * arc_farm_id;
7631
7632 WREG32(mmARC_FARM_ARC0_DUP_ENG_AXUSER_HB_MMU_BP + offset, rw_mmu_bp);
7633 WREG32(mmARC_FARM_ARC0_DUP_ENG_AXUSER_HB_ASID + offset, rw_asid);
7634 }
7635
gaudi2_arc_mmu_prepare(struct hl_device * hdev,u32 cpu_id,u32 asid)7636 static void gaudi2_arc_mmu_prepare(struct hl_device *hdev, u32 cpu_id, u32 asid)
7637 {
7638 u32 reg_base, reg_offset, reg_val = 0;
7639
7640 reg_base = gaudi2_arc_blocks_bases[cpu_id];
7641
7642 /* Enable MMU and configure asid for all relevant ARC regions */
7643 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_ARC_REGION_CFG_MMU_BP_MASK, 0);
7644 reg_val |= FIELD_PREP(ARC_FARM_ARC0_AUX_ARC_REGION_CFG_0_ASID_MASK, asid);
7645
7646 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION3_GENERAL);
7647 WREG32(reg_base + reg_offset, reg_val);
7648
7649 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION4_HBM0_FW);
7650 WREG32(reg_base + reg_offset, reg_val);
7651
7652 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION5_HBM1_GC_DATA);
7653 WREG32(reg_base + reg_offset, reg_val);
7654
7655 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION6_HBM2_GC_DATA);
7656 WREG32(reg_base + reg_offset, reg_val);
7657
7658 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION7_HBM3_GC_DATA);
7659 WREG32(reg_base + reg_offset, reg_val);
7660
7661 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION9_PCIE);
7662 WREG32(reg_base + reg_offset, reg_val);
7663
7664 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION10_GENERAL);
7665 WREG32(reg_base + reg_offset, reg_val);
7666
7667 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION11_GENERAL);
7668 WREG32(reg_base + reg_offset, reg_val);
7669
7670 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION12_GENERAL);
7671 WREG32(reg_base + reg_offset, reg_val);
7672
7673 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION13_GENERAL);
7674 WREG32(reg_base + reg_offset, reg_val);
7675
7676 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION14_GENERAL);
7677 WREG32(reg_base + reg_offset, reg_val);
7678 }
7679
gaudi2_arc_mmu_prepare_all(struct hl_device * hdev,u32 asid)7680 static int gaudi2_arc_mmu_prepare_all(struct hl_device *hdev, u32 asid)
7681 {
7682 int i;
7683
7684 if (hdev->fw_components & FW_TYPE_BOOT_CPU)
7685 return hl_fw_cpucp_engine_core_asid_set(hdev, asid);
7686
7687 for (i = CPU_ID_SCHED_ARC0 ; i < NUM_OF_ARC_FARMS_ARC ; i++)
7688 gaudi2_arc_mmu_prepare(hdev, i, asid);
7689
7690 for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i += 4) {
7691 if (!gaudi2_is_queue_enabled(hdev, i))
7692 continue;
7693
7694 gaudi2_arc_mmu_prepare(hdev, gaudi2_queue_id_to_arc_id[i], asid);
7695 }
7696
7697 return 0;
7698 }
7699
gaudi2_mmu_shared_prepare(struct hl_device * hdev,u32 asid)7700 static int gaudi2_mmu_shared_prepare(struct hl_device *hdev, u32 asid)
7701 {
7702 struct asic_fixed_properties *prop = &hdev->asic_prop;
7703 u32 rw_asid, offset;
7704 int rc, i;
7705
7706 rw_asid = FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_MASK, asid) |
7707 FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_MASK, asid);
7708
7709 WREG32(mmPDMA0_QM_AXUSER_NONSECURED_HB_ASID, rw_asid);
7710 WREG32(mmPDMA0_QM_AXUSER_NONSECURED_HB_MMU_BP, 0);
7711 WREG32(mmPDMA0_CORE_CTX_AXUSER_HB_ASID, rw_asid);
7712 WREG32(mmPDMA0_CORE_CTX_AXUSER_HB_MMU_BP, 0);
7713
7714 WREG32(mmPDMA1_QM_AXUSER_NONSECURED_HB_ASID, rw_asid);
7715 WREG32(mmPDMA1_QM_AXUSER_NONSECURED_HB_MMU_BP, 0);
7716 WREG32(mmPDMA1_CORE_CTX_AXUSER_HB_ASID, rw_asid);
7717 WREG32(mmPDMA1_CORE_CTX_AXUSER_HB_MMU_BP, 0);
7718
7719 /* ROT */
7720 for (i = 0 ; i < NUM_OF_ROT ; i++) {
7721 offset = i * ROT_OFFSET;
7722 WREG32(mmROT0_QM_AXUSER_NONSECURED_HB_ASID + offset, rw_asid);
7723 WREG32(mmROT0_QM_AXUSER_NONSECURED_HB_MMU_BP + offset, 0);
7724 RMWREG32(mmROT0_CPL_QUEUE_AWUSER + offset, asid, MMUBP_ASID_MASK);
7725 RMWREG32(mmROT0_DESC_HBW_ARUSER_LO + offset, asid, MMUBP_ASID_MASK);
7726 RMWREG32(mmROT0_DESC_HBW_AWUSER_LO + offset, asid, MMUBP_ASID_MASK);
7727 }
7728
7729 /* Shared Decoders are the last bits in the decoders mask */
7730 if (prop->decoder_enabled_mask & BIT(NUM_OF_DCORES * NUM_OF_DEC_PER_DCORE + 0))
7731 gudi2_mmu_vdec_shared_prepare(hdev, 0, rw_asid, 0);
7732
7733 if (prop->decoder_enabled_mask & BIT(NUM_OF_DCORES * NUM_OF_DEC_PER_DCORE + 1))
7734 gudi2_mmu_vdec_shared_prepare(hdev, 1, rw_asid, 0);
7735
7736 /* arc farm arc dup eng */
7737 for (i = 0 ; i < NUM_OF_ARC_FARMS_ARC ; i++)
7738 gudi2_mmu_arc_farm_arc_dup_eng_prepare(hdev, i, rw_asid, 0);
7739
7740 rc = gaudi2_arc_mmu_prepare_all(hdev, asid);
7741 if (rc)
7742 return rc;
7743
7744 return 0;
7745 }
7746
gaudi2_tpc_mmu_prepare(struct hl_device * hdev,int dcore,int inst,u32 offset,struct iterate_module_ctx * ctx)7747 static void gaudi2_tpc_mmu_prepare(struct hl_device *hdev, int dcore, int inst, u32 offset,
7748 struct iterate_module_ctx *ctx)
7749 {
7750 struct gaudi2_tpc_mmu_data *mmu_data = ctx->data;
7751
7752 WREG32(mmDCORE0_TPC0_CFG_AXUSER_HB_MMU_BP + offset, 0);
7753 WREG32(mmDCORE0_TPC0_CFG_AXUSER_HB_ASID + offset, mmu_data->rw_asid);
7754 WREG32(mmDCORE0_TPC0_QM_AXUSER_NONSECURED_HB_MMU_BP + offset, 0);
7755 WREG32(mmDCORE0_TPC0_QM_AXUSER_NONSECURED_HB_ASID + offset, mmu_data->rw_asid);
7756 }
7757
7758 /* zero the MMUBP and set the ASID */
gaudi2_mmu_prepare(struct hl_device * hdev,u32 asid)7759 static int gaudi2_mmu_prepare(struct hl_device *hdev, u32 asid)
7760 {
7761 struct gaudi2_device *gaudi2 = hdev->asic_specific;
7762 struct gaudi2_tpc_mmu_data tpc_mmu_data;
7763 struct iterate_module_ctx tpc_iter = {
7764 .fn = &gaudi2_tpc_mmu_prepare,
7765 .data = &tpc_mmu_data,
7766 };
7767 int rc, i;
7768
7769 if (asid & ~DCORE0_HMMU0_STLB_ASID_ASID_MASK) {
7770 dev_crit(hdev->dev, "asid %u is too big\n", asid);
7771 return -EINVAL;
7772 }
7773
7774 if (!(gaudi2->hw_cap_initialized & HW_CAP_MMU_MASK))
7775 return 0;
7776
7777 rc = gaudi2_mmu_shared_prepare(hdev, asid);
7778 if (rc)
7779 return rc;
7780
7781 /* configure DCORE MMUs */
7782 tpc_mmu_data.rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) |
7783 (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT);
7784 gaudi2_iterate_tpcs(hdev, &tpc_iter);
7785 for (i = 0 ; i < NUM_OF_DCORES ; i++)
7786 gaudi2_mmu_dcore_prepare(hdev, i, asid);
7787
7788 return 0;
7789 }
7790
is_info_event(u32 event)7791 static inline bool is_info_event(u32 event)
7792 {
7793 switch (event) {
7794 case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_CAUSE:
7795 case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S ... GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
7796 case GAUDI2_EVENT_ARC_PWR_BRK_ENTRY ... GAUDI2_EVENT_ARC_PWR_RD_MODE3:
7797
7798 /* return in case of NIC status event - these events are received periodically and not as
7799 * an indication to an error.
7800 */
7801 case GAUDI2_EVENT_CPU0_STATUS_NIC0_ENG0 ... GAUDI2_EVENT_CPU11_STATUS_NIC11_ENG1:
7802 case GAUDI2_EVENT_ARC_EQ_HEARTBEAT:
7803 return true;
7804 default:
7805 return false;
7806 }
7807 }
7808
gaudi2_print_event(struct hl_device * hdev,u16 event_type,bool ratelimited,const char * fmt,...)7809 static void gaudi2_print_event(struct hl_device *hdev, u16 event_type,
7810 bool ratelimited, const char *fmt, ...)
7811 {
7812 struct va_format vaf;
7813 va_list args;
7814
7815 va_start(args, fmt);
7816 vaf.fmt = fmt;
7817 vaf.va = &args;
7818
7819 if (ratelimited)
7820 dev_err_ratelimited(hdev->dev, "%s: %pV\n",
7821 gaudi2_irq_map_table[event_type].valid ?
7822 gaudi2_irq_map_table[event_type].name : "N/A Event", &vaf);
7823 else
7824 dev_err(hdev->dev, "%s: %pV\n",
7825 gaudi2_irq_map_table[event_type].valid ?
7826 gaudi2_irq_map_table[event_type].name : "N/A Event", &vaf);
7827
7828 va_end(args);
7829 }
7830
gaudi2_handle_ecc_event(struct hl_device * hdev,u16 event_type,struct hl_eq_ecc_data * ecc_data)7831 static bool gaudi2_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7832 struct hl_eq_ecc_data *ecc_data)
7833 {
7834 u64 ecc_address = 0, ecc_syndrome = 0;
7835 u8 memory_wrapper_idx = 0;
7836 bool has_block_id = false;
7837 u16 block_id;
7838
7839 if (!hl_is_fw_sw_ver_below(hdev, 1, 12))
7840 has_block_id = true;
7841
7842 ecc_address = le64_to_cpu(ecc_data->ecc_address);
7843 ecc_syndrome = le64_to_cpu(ecc_data->ecc_syndrom);
7844 memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7845
7846 if (has_block_id) {
7847 block_id = le16_to_cpu(ecc_data->block_id);
7848 gaudi2_print_event(hdev, event_type, !ecc_data->is_critical,
7849 "ECC error detected. address: %#llx. Syndrome: %#llx. wrapper id %u. block id %#x. critical %u.",
7850 ecc_address, ecc_syndrome, memory_wrapper_idx, block_id,
7851 ecc_data->is_critical);
7852 } else {
7853 gaudi2_print_event(hdev, event_type, !ecc_data->is_critical,
7854 "ECC error detected. address: %#llx. Syndrome: %#llx. wrapper id %u. critical %u.",
7855 ecc_address, ecc_syndrome, memory_wrapper_idx, ecc_data->is_critical);
7856 }
7857
7858 return !!ecc_data->is_critical;
7859 }
7860
handle_lower_qman_data_on_err(struct hl_device * hdev,u64 qman_base,u32 engine_id)7861 static void handle_lower_qman_data_on_err(struct hl_device *hdev, u64 qman_base, u32 engine_id)
7862 {
7863 struct undefined_opcode_info *undef_opcode = &hdev->captured_err_info.undef_opcode;
7864 u64 cq_ptr, cp_current_inst;
7865 u32 lo, hi, cq_size, cp_sts;
7866 bool is_arc_cq;
7867
7868 cp_sts = RREG32(qman_base + QM_CP_STS_4_OFFSET);
7869 is_arc_cq = FIELD_GET(PDMA0_QM_CP_STS_CUR_CQ_MASK, cp_sts); /* 0 - legacy CQ, 1 - ARC_CQ */
7870
7871 if (is_arc_cq) {
7872 lo = RREG32(qman_base + QM_ARC_CQ_PTR_LO_STS_OFFSET);
7873 hi = RREG32(qman_base + QM_ARC_CQ_PTR_HI_STS_OFFSET);
7874 cq_ptr = ((u64) hi) << 32 | lo;
7875 cq_size = RREG32(qman_base + QM_ARC_CQ_TSIZE_STS_OFFSET);
7876 } else {
7877 lo = RREG32(qman_base + QM_CQ_PTR_LO_STS_4_OFFSET);
7878 hi = RREG32(qman_base + QM_CQ_PTR_HI_STS_4_OFFSET);
7879 cq_ptr = ((u64) hi) << 32 | lo;
7880 cq_size = RREG32(qman_base + QM_CQ_TSIZE_STS_4_OFFSET);
7881 }
7882
7883 lo = RREG32(qman_base + QM_CP_CURRENT_INST_LO_4_OFFSET);
7884 hi = RREG32(qman_base + QM_CP_CURRENT_INST_HI_4_OFFSET);
7885 cp_current_inst = ((u64) hi) << 32 | lo;
7886
7887 dev_info(hdev->dev,
7888 "LowerQM. %sCQ: {ptr %#llx, size %u}, CP: {instruction %#018llx}\n",
7889 is_arc_cq ? "ARC_" : "", cq_ptr, cq_size, cp_current_inst);
7890
7891 if (undef_opcode->write_enable) {
7892 memset(undef_opcode, 0, sizeof(*undef_opcode));
7893 undef_opcode->timestamp = ktime_get();
7894 undef_opcode->cq_addr = cq_ptr;
7895 undef_opcode->cq_size = cq_size;
7896 undef_opcode->engine_id = engine_id;
7897 undef_opcode->stream_id = QMAN_STREAMS;
7898 undef_opcode->write_enable = 0;
7899 }
7900 }
7901
gaudi2_handle_qman_err_generic(struct hl_device * hdev,u16 event_type,u64 qman_base,u32 qid_base,u64 * event_mask)7902 static int gaudi2_handle_qman_err_generic(struct hl_device *hdev, u16 event_type,
7903 u64 qman_base, u32 qid_base, u64 *event_mask)
7904 {
7905 u32 i, j, glbl_sts_val, arb_err_val, num_error_causes, error_count = 0;
7906 u64 glbl_sts_addr, arb_err_addr;
7907 char reg_desc[32];
7908
7909 glbl_sts_addr = qman_base + (mmDCORE0_TPC0_QM_GLBL_ERR_STS_0 - mmDCORE0_TPC0_QM_BASE);
7910 arb_err_addr = qman_base + (mmDCORE0_TPC0_QM_ARB_ERR_CAUSE - mmDCORE0_TPC0_QM_BASE);
7911
7912 /* Iterate through all stream GLBL_ERR_STS registers + Lower CP */
7913 for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
7914 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
7915
7916 if (!glbl_sts_val)
7917 continue;
7918
7919 if (i == QMAN_STREAMS) {
7920 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerQM");
7921 num_error_causes = GAUDI2_NUM_OF_LOWER_QM_ERR_CAUSE;
7922 } else {
7923 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
7924 num_error_causes = GAUDI2_NUM_OF_QM_ERR_CAUSE;
7925 }
7926
7927 for (j = 0 ; j < num_error_causes ; j++)
7928 if (glbl_sts_val & BIT(j)) {
7929 gaudi2_print_event(hdev, event_type, true,
7930 "%s. err cause: %s", reg_desc,
7931 i == QMAN_STREAMS ?
7932 gaudi2_lower_qman_error_cause[j] :
7933 gaudi2_qman_error_cause[j]);
7934 error_count++;
7935 }
7936
7937 /* Check for undefined opcode error in lower QM */
7938 if ((i == QMAN_STREAMS) &&
7939 (glbl_sts_val & PDMA0_QM_GLBL_ERR_STS_CP_UNDEF_CMD_ERR_MASK)) {
7940 handle_lower_qman_data_on_err(hdev, qman_base,
7941 gaudi2_queue_id_to_engine_id[qid_base]);
7942 *event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE;
7943 }
7944 }
7945
7946 arb_err_val = RREG32(arb_err_addr);
7947
7948 if (!arb_err_val)
7949 goto out;
7950
7951 for (j = 0 ; j < GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
7952 if (arb_err_val & BIT(j)) {
7953 gaudi2_print_event(hdev, event_type, true,
7954 "ARB_ERR. err cause: %s",
7955 gaudi2_qman_arb_error_cause[j]);
7956 error_count++;
7957 }
7958 }
7959
7960 out:
7961 return error_count;
7962 }
7963
gaudi2_razwi_rr_hbw_shared_printf_info(struct hl_device * hdev,u64 rtr_mstr_if_base_addr,bool is_write,char * name,enum gaudi2_engine_id id,u64 * event_mask)7964 static void gaudi2_razwi_rr_hbw_shared_printf_info(struct hl_device *hdev,
7965 u64 rtr_mstr_if_base_addr, bool is_write, char *name,
7966 enum gaudi2_engine_id id, u64 *event_mask)
7967 {
7968 u32 razwi_hi, razwi_lo, razwi_xy;
7969 u16 eng_id = id;
7970 u8 rd_wr_flag;
7971
7972 if (is_write) {
7973 razwi_hi = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HI);
7974 razwi_lo = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_LO);
7975 razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_XY);
7976 rd_wr_flag = HL_RAZWI_WRITE;
7977 } else {
7978 razwi_hi = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HI);
7979 razwi_lo = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_LO);
7980 razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_XY);
7981 rd_wr_flag = HL_RAZWI_READ;
7982 }
7983
7984 hl_handle_razwi(hdev, (u64)razwi_hi << 32 | razwi_lo, &eng_id, 1,
7985 rd_wr_flag | HL_RAZWI_HBW, event_mask);
7986
7987 dev_err_ratelimited(hdev->dev,
7988 "%s-RAZWI SHARED RR HBW %s error, address %#llx, Initiator coordinates 0x%x\n",
7989 name, is_write ? "WR" : "RD", (u64)razwi_hi << 32 | razwi_lo, razwi_xy);
7990 }
7991
gaudi2_razwi_rr_lbw_shared_printf_info(struct hl_device * hdev,u64 rtr_mstr_if_base_addr,bool is_write,char * name,enum gaudi2_engine_id id,u64 * event_mask)7992 static void gaudi2_razwi_rr_lbw_shared_printf_info(struct hl_device *hdev,
7993 u64 rtr_mstr_if_base_addr, bool is_write, char *name,
7994 enum gaudi2_engine_id id, u64 *event_mask)
7995 {
7996 u64 razwi_addr = CFG_BASE;
7997 u32 razwi_xy;
7998 u16 eng_id = id;
7999 u8 rd_wr_flag;
8000
8001 if (is_write) {
8002 razwi_addr += RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI);
8003 razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_XY);
8004 rd_wr_flag = HL_RAZWI_WRITE;
8005 } else {
8006 razwi_addr += RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI);
8007 razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_XY);
8008 rd_wr_flag = HL_RAZWI_READ;
8009 }
8010
8011 hl_handle_razwi(hdev, razwi_addr, &eng_id, 1, rd_wr_flag | HL_RAZWI_LBW, event_mask);
8012 dev_err_ratelimited(hdev->dev,
8013 "%s-RAZWI SHARED RR LBW %s error, mstr_if 0x%llx, captured address 0x%llX Initiator coordinates 0x%x\n",
8014 name, is_write ? "WR" : "RD", rtr_mstr_if_base_addr, razwi_addr,
8015 razwi_xy);
8016 }
8017
gaudi2_razwi_calc_engine_id(struct hl_device * hdev,enum razwi_event_sources module,u8 module_idx)8018 static enum gaudi2_engine_id gaudi2_razwi_calc_engine_id(struct hl_device *hdev,
8019 enum razwi_event_sources module, u8 module_idx)
8020 {
8021 switch (module) {
8022 case RAZWI_TPC:
8023 if (module_idx == (NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES))
8024 return GAUDI2_DCORE0_ENGINE_ID_TPC_6;
8025 return (((module_idx / NUM_OF_TPC_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) +
8026 (module_idx % NUM_OF_TPC_PER_DCORE) +
8027 (GAUDI2_DCORE0_ENGINE_ID_TPC_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0));
8028
8029 case RAZWI_MME:
8030 return ((GAUDI2_DCORE0_ENGINE_ID_MME - GAUDI2_DCORE0_ENGINE_ID_EDMA_0) +
8031 (module_idx * ENGINE_ID_DCORE_OFFSET));
8032
8033 case RAZWI_EDMA:
8034 return (((module_idx / NUM_OF_EDMA_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) +
8035 (module_idx % NUM_OF_EDMA_PER_DCORE));
8036
8037 case RAZWI_PDMA:
8038 return (GAUDI2_ENGINE_ID_PDMA_0 + module_idx);
8039
8040 case RAZWI_NIC:
8041 return (GAUDI2_ENGINE_ID_NIC0_0 + (NIC_NUMBER_OF_QM_PER_MACRO * module_idx));
8042
8043 case RAZWI_DEC:
8044 if (module_idx == 8)
8045 return GAUDI2_PCIE_ENGINE_ID_DEC_0;
8046
8047 if (module_idx == 9)
8048 return GAUDI2_PCIE_ENGINE_ID_DEC_1;
8049 ;
8050 return (((module_idx / NUM_OF_DEC_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) +
8051 (module_idx % NUM_OF_DEC_PER_DCORE) +
8052 (GAUDI2_DCORE0_ENGINE_ID_DEC_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0));
8053
8054 case RAZWI_ROT:
8055 return GAUDI2_ENGINE_ID_ROT_0 + module_idx;
8056
8057 case RAZWI_ARC_FARM:
8058 return GAUDI2_ENGINE_ID_ARC_FARM;
8059
8060 default:
8061 return GAUDI2_ENGINE_ID_SIZE;
8062 }
8063 }
8064
8065 /*
8066 * This function handles RR(Range register) hit events.
8067 * raised be initiators not PSOC RAZWI.
8068 */
gaudi2_ack_module_razwi_event_handler(struct hl_device * hdev,enum razwi_event_sources module,u8 module_idx,u8 module_sub_idx,u64 * event_mask)8069 static void gaudi2_ack_module_razwi_event_handler(struct hl_device *hdev,
8070 enum razwi_event_sources module, u8 module_idx,
8071 u8 module_sub_idx, u64 *event_mask)
8072 {
8073 bool via_sft = false;
8074 u32 hbw_rtr_id, lbw_rtr_id, dcore_id, dcore_rtr_id, eng_id, binned_idx;
8075 u64 hbw_rtr_mstr_if_base_addr, lbw_rtr_mstr_if_base_addr;
8076 u32 hbw_shrd_aw = 0, hbw_shrd_ar = 0;
8077 u32 lbw_shrd_aw = 0, lbw_shrd_ar = 0;
8078 char initiator_name[64];
8079
8080 switch (module) {
8081 case RAZWI_TPC:
8082 sprintf(initiator_name, "TPC_%u", module_idx);
8083 if (hdev->tpc_binning) {
8084 binned_idx = __ffs(hdev->tpc_binning);
8085 if (binned_idx == module_idx)
8086 module_idx = TPC_ID_DCORE0_TPC6;
8087 }
8088
8089 hbw_rtr_id = gaudi2_tpc_initiator_hbw_rtr_id[module_idx];
8090
8091 if (hl_is_fw_sw_ver_below(hdev, 1, 9) &&
8092 !hdev->asic_prop.fw_security_enabled &&
8093 ((module_idx == 0) || (module_idx == 1)))
8094 lbw_rtr_id = DCORE0_RTR0;
8095 else
8096 lbw_rtr_id = gaudi2_tpc_initiator_lbw_rtr_id[module_idx];
8097 break;
8098 case RAZWI_MME:
8099 sprintf(initiator_name, "MME_%u", module_idx);
8100 switch (module_sub_idx) {
8101 case MME_WAP0:
8102 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].wap0;
8103 break;
8104 case MME_WAP1:
8105 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].wap1;
8106 break;
8107 case MME_WRITE:
8108 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].write;
8109 break;
8110 case MME_READ:
8111 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].read;
8112 break;
8113 case MME_SBTE0:
8114 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte0;
8115 break;
8116 case MME_SBTE1:
8117 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte1;
8118 break;
8119 case MME_SBTE2:
8120 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte2;
8121 break;
8122 case MME_SBTE3:
8123 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte3;
8124 break;
8125 case MME_SBTE4:
8126 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte4;
8127 break;
8128 default:
8129 return;
8130 }
8131 lbw_rtr_id = hbw_rtr_id;
8132 break;
8133 case RAZWI_EDMA:
8134 hbw_rtr_mstr_if_base_addr = gaudi2_edma_initiator_hbw_sft[module_idx];
8135 dcore_id = module_idx / NUM_OF_EDMA_PER_DCORE;
8136 /* SFT has separate MSTR_IF for LBW, only there we can
8137 * read the LBW razwi related registers
8138 */
8139 lbw_rtr_mstr_if_base_addr = mmSFT0_LBW_RTR_IF_MSTR_IF_RR_SHRD_HBW_BASE +
8140 dcore_id * SFT_DCORE_OFFSET;
8141 via_sft = true;
8142 sprintf(initiator_name, "EDMA_%u", module_idx);
8143 break;
8144 case RAZWI_PDMA:
8145 hbw_rtr_id = gaudi2_pdma_initiator_hbw_rtr_id[module_idx];
8146 lbw_rtr_id = gaudi2_pdma_initiator_lbw_rtr_id[module_idx];
8147 sprintf(initiator_name, "PDMA_%u", module_idx);
8148 break;
8149 case RAZWI_NIC:
8150 hbw_rtr_id = gaudi2_nic_initiator_hbw_rtr_id[module_idx];
8151 lbw_rtr_id = gaudi2_nic_initiator_lbw_rtr_id[module_idx];
8152 sprintf(initiator_name, "NIC_%u", module_idx);
8153 break;
8154 case RAZWI_DEC:
8155 sprintf(initiator_name, "DEC_%u", module_idx);
8156 if (hdev->decoder_binning) {
8157 binned_idx = __ffs(hdev->decoder_binning);
8158 if (binned_idx == module_idx)
8159 module_idx = DEC_ID_PCIE_VDEC1;
8160 }
8161 hbw_rtr_id = gaudi2_dec_initiator_hbw_rtr_id[module_idx];
8162 lbw_rtr_id = gaudi2_dec_initiator_lbw_rtr_id[module_idx];
8163 break;
8164 case RAZWI_ROT:
8165 hbw_rtr_id = gaudi2_rot_initiator_hbw_rtr_id[module_idx];
8166 lbw_rtr_id = gaudi2_rot_initiator_lbw_rtr_id[module_idx];
8167 sprintf(initiator_name, "ROT_%u", module_idx);
8168 break;
8169 case RAZWI_ARC_FARM:
8170 lbw_rtr_id = DCORE1_RTR5;
8171 hbw_rtr_id = DCORE1_RTR7;
8172 sprintf(initiator_name, "ARC_FARM_%u", module_idx);
8173 break;
8174 default:
8175 return;
8176 }
8177
8178 /* Find router mstr_if register base */
8179 if (!via_sft) {
8180 dcore_id = hbw_rtr_id / NUM_OF_RTR_PER_DCORE;
8181 dcore_rtr_id = hbw_rtr_id % NUM_OF_RTR_PER_DCORE;
8182 hbw_rtr_mstr_if_base_addr = mmDCORE0_RTR0_CTRL_BASE +
8183 dcore_id * DCORE_OFFSET +
8184 dcore_rtr_id * DCORE_RTR_OFFSET +
8185 RTR_MSTR_IF_OFFSET;
8186 lbw_rtr_mstr_if_base_addr = hbw_rtr_mstr_if_base_addr +
8187 (((s32)lbw_rtr_id - hbw_rtr_id) * DCORE_RTR_OFFSET);
8188 }
8189
8190 /* Find out event cause by reading "RAZWI_HAPPENED" registers */
8191 hbw_shrd_aw = RREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED);
8192 hbw_shrd_ar = RREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED);
8193 lbw_shrd_aw = RREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED);
8194 lbw_shrd_ar = RREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED);
8195
8196 eng_id = gaudi2_razwi_calc_engine_id(hdev, module, module_idx);
8197 if (hbw_shrd_aw) {
8198 gaudi2_razwi_rr_hbw_shared_printf_info(hdev, hbw_rtr_mstr_if_base_addr, true,
8199 initiator_name, eng_id, event_mask);
8200
8201 /* Clear event indication */
8202 WREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED, hbw_shrd_aw);
8203 }
8204
8205 if (hbw_shrd_ar) {
8206 gaudi2_razwi_rr_hbw_shared_printf_info(hdev, hbw_rtr_mstr_if_base_addr, false,
8207 initiator_name, eng_id, event_mask);
8208
8209 /* Clear event indication */
8210 WREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED, hbw_shrd_ar);
8211 }
8212
8213 if (lbw_shrd_aw) {
8214 gaudi2_razwi_rr_lbw_shared_printf_info(hdev, lbw_rtr_mstr_if_base_addr, true,
8215 initiator_name, eng_id, event_mask);
8216
8217 /* Clear event indication */
8218 WREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED, lbw_shrd_aw);
8219 }
8220
8221 if (lbw_shrd_ar) {
8222 gaudi2_razwi_rr_lbw_shared_printf_info(hdev, lbw_rtr_mstr_if_base_addr, false,
8223 initiator_name, eng_id, event_mask);
8224
8225 /* Clear event indication */
8226 WREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED, lbw_shrd_ar);
8227 }
8228 }
8229
gaudi2_check_if_razwi_happened(struct hl_device * hdev)8230 static void gaudi2_check_if_razwi_happened(struct hl_device *hdev)
8231 {
8232 struct asic_fixed_properties *prop = &hdev->asic_prop;
8233 u8 mod_idx, sub_mod;
8234
8235 /* check all TPCs */
8236 for (mod_idx = 0 ; mod_idx < (NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1) ; mod_idx++) {
8237 if (prop->tpc_enabled_mask & BIT(mod_idx))
8238 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_TPC, mod_idx, 0, NULL);
8239 }
8240
8241 /* check all MMEs */
8242 for (mod_idx = 0 ; mod_idx < (NUM_OF_MME_PER_DCORE * NUM_OF_DCORES) ; mod_idx++)
8243 for (sub_mod = MME_WAP0 ; sub_mod < MME_INITIATORS_MAX ; sub_mod++)
8244 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mod_idx,
8245 sub_mod, NULL);
8246
8247 /* check all EDMAs */
8248 for (mod_idx = 0 ; mod_idx < (NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES) ; mod_idx++)
8249 if (prop->edma_enabled_mask & BIT(mod_idx))
8250 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_EDMA, mod_idx, 0, NULL);
8251
8252 /* check all PDMAs */
8253 for (mod_idx = 0 ; mod_idx < NUM_OF_PDMA ; mod_idx++)
8254 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_PDMA, mod_idx, 0, NULL);
8255
8256 /* check all NICs */
8257 for (mod_idx = 0 ; mod_idx < NIC_NUMBER_OF_PORTS ; mod_idx++)
8258 if (hdev->nic_ports_mask & BIT(mod_idx))
8259 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_NIC, mod_idx >> 1, 0,
8260 NULL);
8261
8262 /* check all DECs */
8263 for (mod_idx = 0 ; mod_idx < NUMBER_OF_DEC ; mod_idx++)
8264 if (prop->decoder_enabled_mask & BIT(mod_idx))
8265 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_DEC, mod_idx, 0, NULL);
8266
8267 /* check all ROTs */
8268 for (mod_idx = 0 ; mod_idx < NUM_OF_ROT ; mod_idx++)
8269 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ROT, mod_idx, 0, NULL);
8270 }
8271
gaudi2_psoc_razwi_get_engines(struct gaudi2_razwi_info * razwi_info,u32 array_size,u32 axuser_xy,u32 * base,u16 * eng_id,char * eng_name)8272 static int gaudi2_psoc_razwi_get_engines(struct gaudi2_razwi_info *razwi_info, u32 array_size,
8273 u32 axuser_xy, u32 *base, u16 *eng_id,
8274 char *eng_name)
8275 {
8276
8277 int i, num_of_eng = 0;
8278 u16 str_size = 0;
8279
8280 for (i = 0 ; i < array_size ; i++) {
8281 if (axuser_xy != razwi_info[i].axuser_xy)
8282 continue;
8283
8284 eng_id[num_of_eng] = razwi_info[i].eng_id;
8285 base[num_of_eng] = razwi_info[i].rtr_ctrl;
8286 if (!num_of_eng)
8287 str_size += scnprintf(eng_name + str_size,
8288 PSOC_RAZWI_ENG_STR_SIZE - str_size, "%s",
8289 razwi_info[i].eng_name);
8290 else
8291 str_size += scnprintf(eng_name + str_size,
8292 PSOC_RAZWI_ENG_STR_SIZE - str_size, " or %s",
8293 razwi_info[i].eng_name);
8294 num_of_eng++;
8295 }
8296
8297 return num_of_eng;
8298 }
8299
gaudi2_handle_psoc_razwi_happened(struct hl_device * hdev,u32 razwi_reg,u64 * event_mask)8300 static bool gaudi2_handle_psoc_razwi_happened(struct hl_device *hdev, u32 razwi_reg,
8301 u64 *event_mask)
8302 {
8303 u32 axuser_xy = RAZWI_GET_AXUSER_XY(razwi_reg), addr_hi = 0, addr_lo = 0;
8304 u32 base[PSOC_RAZWI_MAX_ENG_PER_RTR];
8305 u16 num_of_eng, eng_id[PSOC_RAZWI_MAX_ENG_PER_RTR];
8306 char eng_name_str[PSOC_RAZWI_ENG_STR_SIZE];
8307 bool razwi_happened = false;
8308 u64 addr;
8309 int i;
8310
8311 num_of_eng = gaudi2_psoc_razwi_get_engines(common_razwi_info, ARRAY_SIZE(common_razwi_info),
8312 axuser_xy, base, eng_id, eng_name_str);
8313
8314 /* If no match for XY coordinates, try to find it in MME razwi table */
8315 if (!num_of_eng) {
8316 axuser_xy = RAZWI_GET_AXUSER_LOW_XY(razwi_reg);
8317 num_of_eng = gaudi2_psoc_razwi_get_engines(mme_razwi_info,
8318 ARRAY_SIZE(mme_razwi_info),
8319 axuser_xy, base, eng_id,
8320 eng_name_str);
8321 }
8322
8323 for (i = 0 ; i < num_of_eng ; i++) {
8324 if (RREG32(base[i] + DEC_RAZWI_HBW_AW_SET)) {
8325 addr_hi = RREG32(base[i] + DEC_RAZWI_HBW_AW_ADDR_HI);
8326 addr_lo = RREG32(base[i] + DEC_RAZWI_HBW_AW_ADDR_LO);
8327 addr = ((u64)addr_hi << 32) + addr_lo;
8328 if (addr) {
8329 dev_err(hdev->dev,
8330 "PSOC HBW AW RAZWI: %s, address (aligned to 128 byte): 0x%llX\n",
8331 eng_name_str, addr);
8332 hl_handle_razwi(hdev, addr, &eng_id[0],
8333 num_of_eng, HL_RAZWI_HBW | HL_RAZWI_WRITE, event_mask);
8334 razwi_happened = true;
8335 }
8336 }
8337
8338 if (RREG32(base[i] + DEC_RAZWI_HBW_AR_SET)) {
8339 addr_hi = RREG32(base[i] + DEC_RAZWI_HBW_AR_ADDR_HI);
8340 addr_lo = RREG32(base[i] + DEC_RAZWI_HBW_AR_ADDR_LO);
8341 addr = ((u64)addr_hi << 32) + addr_lo;
8342 if (addr) {
8343 dev_err(hdev->dev,
8344 "PSOC HBW AR RAZWI: %s, address (aligned to 128 byte): 0x%llX\n",
8345 eng_name_str, addr);
8346 hl_handle_razwi(hdev, addr, &eng_id[0],
8347 num_of_eng, HL_RAZWI_HBW | HL_RAZWI_READ, event_mask);
8348 razwi_happened = true;
8349 }
8350 }
8351
8352 if (RREG32(base[i] + DEC_RAZWI_LBW_AW_SET)) {
8353 addr_lo = RREG32(base[i] + DEC_RAZWI_LBW_AW_ADDR);
8354 if (addr_lo) {
8355 dev_err(hdev->dev,
8356 "PSOC LBW AW RAZWI: %s, address (aligned to 128 byte): 0x%X\n",
8357 eng_name_str, addr_lo);
8358 hl_handle_razwi(hdev, addr_lo, &eng_id[0],
8359 num_of_eng, HL_RAZWI_LBW | HL_RAZWI_WRITE, event_mask);
8360 razwi_happened = true;
8361 }
8362 }
8363
8364 if (RREG32(base[i] + DEC_RAZWI_LBW_AR_SET)) {
8365 addr_lo = RREG32(base[i] + DEC_RAZWI_LBW_AR_ADDR);
8366 if (addr_lo) {
8367 dev_err(hdev->dev,
8368 "PSOC LBW AR RAZWI: %s, address (aligned to 128 byte): 0x%X\n",
8369 eng_name_str, addr_lo);
8370 hl_handle_razwi(hdev, addr_lo, &eng_id[0],
8371 num_of_eng, HL_RAZWI_LBW | HL_RAZWI_READ, event_mask);
8372 razwi_happened = true;
8373 }
8374 }
8375 /* In common case the loop will break, when there is only one engine id, or
8376 * several engines with the same router. The exceptional case is with psoc razwi
8377 * from EDMA, where it's possible to get axuser id which fits 2 routers (2
8378 * interfaces of sft router). In this case, maybe the first router won't hold info
8379 * and we will need to iterate on the other router.
8380 */
8381 if (razwi_happened)
8382 break;
8383 }
8384
8385 return razwi_happened;
8386 }
8387
8388 /* PSOC RAZWI interrupt occurs only when trying to access a bad address */
gaudi2_ack_psoc_razwi_event_handler(struct hl_device * hdev,u64 * event_mask)8389 static int gaudi2_ack_psoc_razwi_event_handler(struct hl_device *hdev, u64 *event_mask)
8390 {
8391 u32 razwi_mask_info, razwi_intr = 0, error_count = 0;
8392
8393 if (hdev->pldm || !(hdev->fw_components & FW_TYPE_LINUX)) {
8394 razwi_intr = RREG32(mmPSOC_GLOBAL_CONF_RAZWI_INTERRUPT);
8395 if (!razwi_intr)
8396 return 0;
8397 }
8398
8399 razwi_mask_info = RREG32(mmPSOC_GLOBAL_CONF_RAZWI_MASK_INFO);
8400
8401 dev_err_ratelimited(hdev->dev,
8402 "PSOC RAZWI interrupt: Mask %d, AR %d, AW %d, AXUSER_L 0x%x AXUSER_H 0x%x\n",
8403 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_MASK_MASK, razwi_mask_info),
8404 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AR_MASK, razwi_mask_info),
8405 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AW_MASK, razwi_mask_info),
8406 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_L_MASK, razwi_mask_info),
8407 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_H_MASK, razwi_mask_info));
8408
8409 if (gaudi2_handle_psoc_razwi_happened(hdev, razwi_mask_info, event_mask))
8410 error_count++;
8411 else
8412 dev_err_ratelimited(hdev->dev,
8413 "PSOC RAZWI interrupt: invalid razwi info (0x%x)\n",
8414 razwi_mask_info);
8415
8416 /* Clear Interrupts only on pldm or if f/w doesn't handle interrupts */
8417 if (hdev->pldm || !(hdev->fw_components & FW_TYPE_LINUX))
8418 WREG32(mmPSOC_GLOBAL_CONF_RAZWI_INTERRUPT, razwi_intr);
8419
8420 return error_count;
8421 }
8422
_gaudi2_handle_qm_sei_err(struct hl_device * hdev,u64 qman_base,u16 event_type)8423 static int _gaudi2_handle_qm_sei_err(struct hl_device *hdev, u64 qman_base, u16 event_type)
8424 {
8425 u32 i, sts_val, sts_clr_val = 0, error_count = 0;
8426
8427 sts_val = RREG32(qman_base + QM_SEI_STATUS_OFFSET);
8428
8429 for (i = 0 ; i < GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE ; i++) {
8430 if (sts_val & BIT(i)) {
8431 gaudi2_print_event(hdev, event_type, true,
8432 "err cause: %s", gaudi2_qm_sei_error_cause[i]);
8433 sts_clr_val |= BIT(i);
8434 error_count++;
8435 }
8436 }
8437
8438 WREG32(qman_base + QM_SEI_STATUS_OFFSET, sts_clr_val);
8439
8440 return error_count;
8441 }
8442
gaudi2_handle_qm_sei_err(struct hl_device * hdev,u16 event_type,bool extended_err_check,u64 * event_mask)8443 static int gaudi2_handle_qm_sei_err(struct hl_device *hdev, u16 event_type,
8444 bool extended_err_check, u64 *event_mask)
8445 {
8446 enum razwi_event_sources module;
8447 u32 error_count = 0;
8448 u64 qman_base;
8449 u8 index;
8450
8451 switch (event_type) {
8452 case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC23_AXI_ERR_RSP:
8453 index = event_type - GAUDI2_EVENT_TPC0_AXI_ERR_RSP;
8454 qman_base = mmDCORE0_TPC0_QM_BASE +
8455 (index / NUM_OF_TPC_PER_DCORE) * DCORE_OFFSET +
8456 (index % NUM_OF_TPC_PER_DCORE) * DCORE_TPC_OFFSET;
8457 module = RAZWI_TPC;
8458 break;
8459 case GAUDI2_EVENT_TPC24_AXI_ERR_RSP:
8460 qman_base = mmDCORE0_TPC6_QM_BASE;
8461 module = RAZWI_TPC;
8462 break;
8463 case GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE:
8464 case GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE:
8465 case GAUDI2_EVENT_MME2_CTRL_AXI_ERROR_RESPONSE:
8466 case GAUDI2_EVENT_MME3_CTRL_AXI_ERROR_RESPONSE:
8467 index = (event_type - GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE) /
8468 (GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE -
8469 GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE);
8470 qman_base = mmDCORE0_MME_QM_BASE + index * DCORE_OFFSET;
8471 module = RAZWI_MME;
8472 break;
8473 case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP:
8474 case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP:
8475 index = event_type - GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP;
8476 qman_base = mmPDMA0_QM_BASE + index * PDMA_OFFSET;
8477 module = RAZWI_PDMA;
8478 break;
8479 case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE:
8480 case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE:
8481 index = event_type - GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE;
8482 qman_base = mmROT0_QM_BASE + index * ROT_OFFSET;
8483 module = RAZWI_ROT;
8484 break;
8485 default:
8486 return 0;
8487 }
8488
8489 error_count = _gaudi2_handle_qm_sei_err(hdev, qman_base, event_type);
8490
8491 /* There is a single event per NIC macro, so should check its both QMAN blocks */
8492 if (event_type >= GAUDI2_EVENT_NIC0_AXI_ERROR_RESPONSE &&
8493 event_type <= GAUDI2_EVENT_NIC11_AXI_ERROR_RESPONSE)
8494 error_count += _gaudi2_handle_qm_sei_err(hdev,
8495 qman_base + NIC_QM_OFFSET, event_type);
8496
8497 if (extended_err_check) {
8498 /* check if RAZWI happened */
8499 gaudi2_ack_module_razwi_event_handler(hdev, module, 0, 0, event_mask);
8500 hl_check_for_glbl_errors(hdev);
8501 }
8502
8503 return error_count;
8504 }
8505
gaudi2_handle_qman_err(struct hl_device * hdev,u16 event_type,u64 * event_mask)8506 static int gaudi2_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
8507 {
8508 u32 qid_base, error_count = 0;
8509 u64 qman_base;
8510 u8 index = 0;
8511
8512 switch (event_type) {
8513 case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_TPC5_QM:
8514 index = event_type - GAUDI2_EVENT_TPC0_QM;
8515 qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 + index * QMAN_STREAMS;
8516 qman_base = mmDCORE0_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
8517 break;
8518 case GAUDI2_EVENT_TPC6_QM ... GAUDI2_EVENT_TPC11_QM:
8519 index = event_type - GAUDI2_EVENT_TPC6_QM;
8520 qid_base = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 + index * QMAN_STREAMS;
8521 qman_base = mmDCORE1_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
8522 break;
8523 case GAUDI2_EVENT_TPC12_QM ... GAUDI2_EVENT_TPC17_QM:
8524 index = event_type - GAUDI2_EVENT_TPC12_QM;
8525 qid_base = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 + index * QMAN_STREAMS;
8526 qman_base = mmDCORE2_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
8527 break;
8528 case GAUDI2_EVENT_TPC18_QM ... GAUDI2_EVENT_TPC23_QM:
8529 index = event_type - GAUDI2_EVENT_TPC18_QM;
8530 qid_base = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 + index * QMAN_STREAMS;
8531 qman_base = mmDCORE3_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
8532 break;
8533 case GAUDI2_EVENT_TPC24_QM:
8534 qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0;
8535 qman_base = mmDCORE0_TPC6_QM_BASE;
8536 break;
8537 case GAUDI2_EVENT_MME0_QM:
8538 qid_base = GAUDI2_QUEUE_ID_DCORE0_MME_0_0;
8539 qman_base = mmDCORE0_MME_QM_BASE;
8540 break;
8541 case GAUDI2_EVENT_MME1_QM:
8542 qid_base = GAUDI2_QUEUE_ID_DCORE1_MME_0_0;
8543 qman_base = mmDCORE1_MME_QM_BASE;
8544 break;
8545 case GAUDI2_EVENT_MME2_QM:
8546 qid_base = GAUDI2_QUEUE_ID_DCORE2_MME_0_0;
8547 qman_base = mmDCORE2_MME_QM_BASE;
8548 break;
8549 case GAUDI2_EVENT_MME3_QM:
8550 qid_base = GAUDI2_QUEUE_ID_DCORE3_MME_0_0;
8551 qman_base = mmDCORE3_MME_QM_BASE;
8552 break;
8553 case GAUDI2_EVENT_HDMA0_QM:
8554 index = 0;
8555 qid_base = GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0;
8556 qman_base = mmDCORE0_EDMA0_QM_BASE;
8557 break;
8558 case GAUDI2_EVENT_HDMA1_QM:
8559 index = 1;
8560 qid_base = GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0;
8561 qman_base = mmDCORE0_EDMA1_QM_BASE;
8562 break;
8563 case GAUDI2_EVENT_HDMA2_QM:
8564 index = 2;
8565 qid_base = GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0;
8566 qman_base = mmDCORE1_EDMA0_QM_BASE;
8567 break;
8568 case GAUDI2_EVENT_HDMA3_QM:
8569 index = 3;
8570 qid_base = GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0;
8571 qman_base = mmDCORE1_EDMA1_QM_BASE;
8572 break;
8573 case GAUDI2_EVENT_HDMA4_QM:
8574 index = 4;
8575 qid_base = GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0;
8576 qman_base = mmDCORE2_EDMA0_QM_BASE;
8577 break;
8578 case GAUDI2_EVENT_HDMA5_QM:
8579 index = 5;
8580 qid_base = GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0;
8581 qman_base = mmDCORE2_EDMA1_QM_BASE;
8582 break;
8583 case GAUDI2_EVENT_HDMA6_QM:
8584 index = 6;
8585 qid_base = GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0;
8586 qman_base = mmDCORE3_EDMA0_QM_BASE;
8587 break;
8588 case GAUDI2_EVENT_HDMA7_QM:
8589 index = 7;
8590 qid_base = GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0;
8591 qman_base = mmDCORE3_EDMA1_QM_BASE;
8592 break;
8593 case GAUDI2_EVENT_PDMA0_QM:
8594 qid_base = GAUDI2_QUEUE_ID_PDMA_0_0;
8595 qman_base = mmPDMA0_QM_BASE;
8596 break;
8597 case GAUDI2_EVENT_PDMA1_QM:
8598 qid_base = GAUDI2_QUEUE_ID_PDMA_1_0;
8599 qman_base = mmPDMA1_QM_BASE;
8600 break;
8601 case GAUDI2_EVENT_ROTATOR0_ROT0_QM:
8602 qid_base = GAUDI2_QUEUE_ID_ROT_0_0;
8603 qman_base = mmROT0_QM_BASE;
8604 break;
8605 case GAUDI2_EVENT_ROTATOR1_ROT1_QM:
8606 qid_base = GAUDI2_QUEUE_ID_ROT_1_0;
8607 qman_base = mmROT1_QM_BASE;
8608 break;
8609 default:
8610 return 0;
8611 }
8612
8613 error_count = gaudi2_handle_qman_err_generic(hdev, event_type, qman_base,
8614 qid_base, event_mask);
8615
8616 /* Handle EDMA QM SEI here because there is no AXI error response event for EDMA */
8617 if (event_type >= GAUDI2_EVENT_HDMA2_QM && event_type <= GAUDI2_EVENT_HDMA5_QM) {
8618 error_count += _gaudi2_handle_qm_sei_err(hdev, qman_base, event_type);
8619 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_EDMA, index, 0, event_mask);
8620 }
8621
8622 hl_check_for_glbl_errors(hdev);
8623
8624 return error_count;
8625 }
8626
gaudi2_handle_arc_farm_sei_err(struct hl_device * hdev,u16 event_type,u64 * event_mask)8627 static int gaudi2_handle_arc_farm_sei_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
8628 {
8629 u32 i, sts_val, sts_clr_val, error_count = 0, arc_farm;
8630
8631 for (arc_farm = 0 ; arc_farm < NUM_OF_ARC_FARMS_ARC ; arc_farm++) {
8632 sts_clr_val = 0;
8633 sts_val = RREG32(mmARC_FARM_ARC0_AUX_ARC_SEI_INTR_STS +
8634 (arc_farm * ARC_FARM_OFFSET));
8635
8636 for (i = 0 ; i < GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE ; i++) {
8637 if (sts_val & BIT(i)) {
8638 gaudi2_print_event(hdev, event_type, true,
8639 "ARC FARM ARC %u err cause: %s",
8640 arc_farm, gaudi2_arc_sei_error_cause[i]);
8641 sts_clr_val |= BIT(i);
8642 error_count++;
8643 }
8644 }
8645 WREG32(mmARC_FARM_ARC0_AUX_ARC_SEI_INTR_CLR + (arc_farm * ARC_FARM_OFFSET),
8646 sts_clr_val);
8647 }
8648
8649 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ARC_FARM, 0, 0, event_mask);
8650 hl_check_for_glbl_errors(hdev);
8651
8652 return error_count;
8653 }
8654
gaudi2_handle_cpu_sei_err(struct hl_device * hdev,u16 event_type)8655 static int gaudi2_handle_cpu_sei_err(struct hl_device *hdev, u16 event_type)
8656 {
8657 u32 i, sts_val, sts_clr_val = 0, error_count = 0;
8658
8659 sts_val = RREG32(mmCPU_IF_CPU_SEI_INTR_STS);
8660
8661 for (i = 0 ; i < GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE ; i++) {
8662 if (sts_val & BIT(i)) {
8663 gaudi2_print_event(hdev, event_type, true,
8664 "err cause: %s", gaudi2_cpu_sei_error_cause[i]);
8665 sts_clr_val |= BIT(i);
8666 error_count++;
8667 }
8668 }
8669
8670 hl_check_for_glbl_errors(hdev);
8671
8672 WREG32(mmCPU_IF_CPU_SEI_INTR_CLR, sts_clr_val);
8673
8674 return error_count;
8675 }
8676
gaudi2_handle_rot_err(struct hl_device * hdev,u8 rot_index,u16 event_type,struct hl_eq_razwi_with_intr_cause * razwi_with_intr_cause,u64 * event_mask)8677 static int gaudi2_handle_rot_err(struct hl_device *hdev, u8 rot_index, u16 event_type,
8678 struct hl_eq_razwi_with_intr_cause *razwi_with_intr_cause,
8679 u64 *event_mask)
8680 {
8681 u64 intr_cause_data = le64_to_cpu(razwi_with_intr_cause->intr_cause.intr_cause_data);
8682 u32 error_count = 0;
8683 int i;
8684
8685 for (i = 0 ; i < GAUDI2_NUM_OF_ROT_ERR_CAUSE ; i++)
8686 if (intr_cause_data & BIT(i)) {
8687 gaudi2_print_event(hdev, event_type, true,
8688 "err cause: %s", guadi2_rot_error_cause[i]);
8689 error_count++;
8690 }
8691
8692 /* check if RAZWI happened */
8693 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ROT, rot_index, 0, event_mask);
8694 hl_check_for_glbl_errors(hdev);
8695
8696 return error_count;
8697 }
8698
gaudi2_tpc_ack_interrupts(struct hl_device * hdev,u8 tpc_index,u16 event_type,struct hl_eq_razwi_with_intr_cause * razwi_with_intr_cause,u64 * event_mask)8699 static int gaudi2_tpc_ack_interrupts(struct hl_device *hdev, u8 tpc_index, u16 event_type,
8700 struct hl_eq_razwi_with_intr_cause *razwi_with_intr_cause,
8701 u64 *event_mask)
8702 {
8703 u64 intr_cause_data = le64_to_cpu(razwi_with_intr_cause->intr_cause.intr_cause_data);
8704 u32 error_count = 0;
8705 int i;
8706
8707 for (i = 0 ; i < GAUDI2_NUM_OF_TPC_INTR_CAUSE ; i++)
8708 if (intr_cause_data & BIT(i)) {
8709 gaudi2_print_event(hdev, event_type, true,
8710 "interrupt cause: %s", gaudi2_tpc_interrupts_cause[i]);
8711 error_count++;
8712 }
8713
8714 /* check if RAZWI happened */
8715 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_TPC, tpc_index, 0, event_mask);
8716 hl_check_for_glbl_errors(hdev);
8717
8718 return error_count;
8719 }
8720
gaudi2_handle_dec_err(struct hl_device * hdev,u8 dec_index,u16 event_type,u64 * event_mask)8721 static int gaudi2_handle_dec_err(struct hl_device *hdev, u8 dec_index, u16 event_type,
8722 u64 *event_mask)
8723 {
8724 u32 sts_addr, sts_val, sts_clr_val = 0, error_count = 0;
8725 int i;
8726
8727 if (dec_index < NUM_OF_VDEC_PER_DCORE * NUM_OF_DCORES)
8728 /* DCORE DEC */
8729 sts_addr = mmDCORE0_VDEC0_BRDG_CTRL_CAUSE_INTR +
8730 DCORE_OFFSET * (dec_index / NUM_OF_DEC_PER_DCORE) +
8731 DCORE_VDEC_OFFSET * (dec_index % NUM_OF_DEC_PER_DCORE);
8732 else
8733 /* PCIE DEC */
8734 sts_addr = mmPCIE_VDEC0_BRDG_CTRL_CAUSE_INTR + PCIE_VDEC_OFFSET *
8735 (dec_index - NUM_OF_VDEC_PER_DCORE * NUM_OF_DCORES);
8736
8737 sts_val = RREG32(sts_addr);
8738
8739 for (i = 0 ; i < GAUDI2_NUM_OF_DEC_ERR_CAUSE ; i++) {
8740 if (sts_val & BIT(i)) {
8741 gaudi2_print_event(hdev, event_type, true,
8742 "err cause: %s", gaudi2_dec_error_cause[i]);
8743 sts_clr_val |= BIT(i);
8744 error_count++;
8745 }
8746 }
8747
8748 /* check if RAZWI happened */
8749 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_DEC, dec_index, 0, event_mask);
8750 hl_check_for_glbl_errors(hdev);
8751
8752 /* Write 1 clear errors */
8753 WREG32(sts_addr, sts_clr_val);
8754
8755 return error_count;
8756 }
8757
gaudi2_handle_mme_err(struct hl_device * hdev,u8 mme_index,u16 event_type,u64 * event_mask)8758 static int gaudi2_handle_mme_err(struct hl_device *hdev, u8 mme_index, u16 event_type,
8759 u64 *event_mask)
8760 {
8761 u32 sts_addr, sts_val, sts_clr_addr, sts_clr_val = 0, error_count = 0;
8762 int i;
8763
8764 sts_addr = mmDCORE0_MME_CTRL_LO_INTR_CAUSE + DCORE_OFFSET * mme_index;
8765 sts_clr_addr = mmDCORE0_MME_CTRL_LO_INTR_CLEAR + DCORE_OFFSET * mme_index;
8766
8767 sts_val = RREG32(sts_addr);
8768
8769 for (i = 0 ; i < GAUDI2_NUM_OF_MME_ERR_CAUSE ; i++) {
8770 if (sts_val & BIT(i)) {
8771 gaudi2_print_event(hdev, event_type, true,
8772 "err cause: %s", guadi2_mme_error_cause[i]);
8773 sts_clr_val |= BIT(i);
8774 error_count++;
8775 }
8776 }
8777
8778 /* check if RAZWI happened */
8779 for (i = MME_WRITE ; i < MME_INITIATORS_MAX ; i++)
8780 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, i, event_mask);
8781
8782 hl_check_for_glbl_errors(hdev);
8783
8784 WREG32(sts_clr_addr, sts_clr_val);
8785
8786 return error_count;
8787 }
8788
gaudi2_handle_mme_sbte_err(struct hl_device * hdev,u16 event_type)8789 static int gaudi2_handle_mme_sbte_err(struct hl_device *hdev, u16 event_type)
8790 {
8791 /*
8792 * We have a single error cause here but the report mechanism is
8793 * buggy. Hence there is no good reason to fetch the cause so we
8794 * just check for glbl_errors and exit.
8795 */
8796 hl_check_for_glbl_errors(hdev);
8797
8798 return GAUDI2_NA_EVENT_CAUSE;
8799 }
8800
gaudi2_handle_mme_wap_err(struct hl_device * hdev,u8 mme_index,u16 event_type,u64 * event_mask)8801 static int gaudi2_handle_mme_wap_err(struct hl_device *hdev, u8 mme_index, u16 event_type,
8802 u64 *event_mask)
8803 {
8804 u32 sts_addr, sts_val, sts_clr_addr, sts_clr_val = 0, error_count = 0;
8805 int i;
8806
8807 sts_addr = mmDCORE0_MME_ACC_INTR_CAUSE + DCORE_OFFSET * mme_index;
8808 sts_clr_addr = mmDCORE0_MME_ACC_INTR_CLEAR + DCORE_OFFSET * mme_index;
8809
8810 sts_val = RREG32(sts_addr);
8811
8812 for (i = 0 ; i < GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE ; i++) {
8813 if (sts_val & BIT(i)) {
8814 gaudi2_print_event(hdev, event_type, true,
8815 "err cause: %s", guadi2_mme_wap_error_cause[i]);
8816 sts_clr_val |= BIT(i);
8817 error_count++;
8818 }
8819 }
8820
8821 /* check if RAZWI happened on WAP0/1 */
8822 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, MME_WAP0, event_mask);
8823 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, MME_WAP1, event_mask);
8824 hl_check_for_glbl_errors(hdev);
8825
8826 WREG32(sts_clr_addr, sts_clr_val);
8827
8828 return error_count;
8829 }
8830
gaudi2_handle_kdma_core_event(struct hl_device * hdev,u16 event_type,u64 intr_cause_data)8831 static int gaudi2_handle_kdma_core_event(struct hl_device *hdev, u16 event_type,
8832 u64 intr_cause_data)
8833 {
8834 u32 error_count = 0;
8835 int i;
8836
8837 /* If an AXI read or write error is received, an error is reported and
8838 * interrupt message is sent. Due to an HW errata, when reading the cause
8839 * register of the KDMA engine, the reported error is always HBW even if
8840 * the actual error caused by a LBW KDMA transaction.
8841 */
8842 for (i = 0 ; i < GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE ; i++)
8843 if (intr_cause_data & BIT(i)) {
8844 gaudi2_print_event(hdev, event_type, true,
8845 "err cause: %s", gaudi2_kdma_core_interrupts_cause[i]);
8846 error_count++;
8847 }
8848
8849 hl_check_for_glbl_errors(hdev);
8850
8851 return error_count;
8852 }
8853
gaudi2_handle_dma_core_event(struct hl_device * hdev,u16 event_type,u64 intr_cause)8854 static int gaudi2_handle_dma_core_event(struct hl_device *hdev, u16 event_type, u64 intr_cause)
8855 {
8856 u32 error_count = 0;
8857 int i;
8858
8859 for (i = 0 ; i < GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE ; i++)
8860 if (intr_cause & BIT(i)) {
8861 gaudi2_print_event(hdev, event_type, true,
8862 "err cause: %s", gaudi2_dma_core_interrupts_cause[i]);
8863 error_count++;
8864 }
8865
8866 hl_check_for_glbl_errors(hdev);
8867
8868 return error_count;
8869 }
8870
gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(struct hl_device * hdev,u64 * event_mask)8871 static void gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(struct hl_device *hdev, u64 *event_mask)
8872 {
8873 u32 mstr_if_base_addr = mmPCIE_MSTR_RR_MSTR_IF_RR_SHRD_HBW_BASE, razwi_happened_addr;
8874
8875 razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED;
8876 if (RREG32(razwi_happened_addr)) {
8877 gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE",
8878 GAUDI2_ENGINE_ID_PCIE, event_mask);
8879 WREG32(razwi_happened_addr, 0x1);
8880 }
8881
8882 razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED;
8883 if (RREG32(razwi_happened_addr)) {
8884 gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE",
8885 GAUDI2_ENGINE_ID_PCIE, event_mask);
8886 WREG32(razwi_happened_addr, 0x1);
8887 }
8888
8889 razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED;
8890 if (RREG32(razwi_happened_addr)) {
8891 gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE",
8892 GAUDI2_ENGINE_ID_PCIE, event_mask);
8893 WREG32(razwi_happened_addr, 0x1);
8894 }
8895
8896 razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED;
8897 if (RREG32(razwi_happened_addr)) {
8898 gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE",
8899 GAUDI2_ENGINE_ID_PCIE, event_mask);
8900 WREG32(razwi_happened_addr, 0x1);
8901 }
8902 }
8903
gaudi2_print_pcie_addr_dec_info(struct hl_device * hdev,u16 event_type,u64 intr_cause_data,u64 * event_mask)8904 static int gaudi2_print_pcie_addr_dec_info(struct hl_device *hdev, u16 event_type,
8905 u64 intr_cause_data, u64 *event_mask)
8906 {
8907 u32 error_count = 0;
8908 int i;
8909
8910 gaudi2_print_event(hdev, event_type, true,
8911 "intr_cause_data: %#llx", intr_cause_data);
8912
8913 for (i = 0 ; i < GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE ; i++) {
8914 if (!(intr_cause_data & BIT_ULL(i)))
8915 continue;
8916
8917 gaudi2_print_event(hdev, event_type, true,
8918 "err cause: %s", gaudi2_pcie_addr_dec_error_cause[i]);
8919 error_count++;
8920
8921 /*
8922 * Always check for LBW and HBW additional info as the indication itself is
8923 * sometimes missing
8924 */
8925 }
8926
8927 hl_check_for_glbl_errors(hdev);
8928 gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(hdev, event_mask);
8929
8930 return error_count;
8931 }
8932
gaudi2_handle_pif_fatal(struct hl_device * hdev,u16 event_type,u64 intr_cause_data)8933 static int gaudi2_handle_pif_fatal(struct hl_device *hdev, u16 event_type,
8934 u64 intr_cause_data)
8935
8936 {
8937 u32 error_count = 0;
8938 int i;
8939
8940 for (i = 0 ; i < GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE ; i++) {
8941 if (intr_cause_data & BIT_ULL(i)) {
8942 gaudi2_print_event(hdev, event_type, true,
8943 "err cause: %s", gaudi2_pmmu_fatal_interrupts_cause[i]);
8944 error_count++;
8945 }
8946 }
8947
8948 return error_count;
8949 }
8950
gaudi2_handle_hif_fatal(struct hl_device * hdev,u16 event_type,u64 intr_cause_data)8951 static int gaudi2_handle_hif_fatal(struct hl_device *hdev, u16 event_type, u64 intr_cause_data)
8952 {
8953 u32 error_count = 0;
8954 int i;
8955
8956 for (i = 0 ; i < GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE ; i++) {
8957 if (intr_cause_data & BIT_ULL(i)) {
8958 gaudi2_print_event(hdev, event_type, true,
8959 "err cause: %s", gaudi2_hif_fatal_interrupts_cause[i]);
8960 error_count++;
8961 }
8962 }
8963
8964 return error_count;
8965 }
8966
gaudi2_handle_page_error(struct hl_device * hdev,u64 mmu_base,bool is_pmmu,u64 * event_mask)8967 static void gaudi2_handle_page_error(struct hl_device *hdev, u64 mmu_base, bool is_pmmu,
8968 u64 *event_mask)
8969 {
8970 u32 valid, val;
8971 u64 addr;
8972
8973 valid = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID));
8974
8975 if (!(valid & DCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID_PAGE_ERR_VALID_ENTRY_MASK))
8976 return;
8977
8978 val = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE));
8979 addr = val & DCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE_VA_63_32_MASK;
8980 addr <<= 32;
8981 addr |= RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE_VA));
8982
8983 if (is_pmmu) {
8984 dev_err_ratelimited(hdev->dev, "PMMU page fault on va 0x%llx\n", addr);
8985 } else {
8986
8987 addr = gaudi2_mmu_descramble_addr(hdev, addr);
8988 addr &= HW_UNSCRAMBLED_BITS_MASK;
8989 dev_err_ratelimited(hdev->dev, "HMMU page fault on va range 0x%llx - 0x%llx\n",
8990 addr, addr + ~HW_UNSCRAMBLED_BITS_MASK);
8991 }
8992
8993 hl_handle_page_fault(hdev, addr, 0, is_pmmu, event_mask);
8994
8995 WREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID), 0);
8996 }
8997
gaudi2_handle_access_error(struct hl_device * hdev,u64 mmu_base,bool is_pmmu)8998 static void gaudi2_handle_access_error(struct hl_device *hdev, u64 mmu_base, bool is_pmmu)
8999 {
9000 u32 valid, val;
9001 u64 addr;
9002
9003 valid = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID));
9004
9005 if (!(valid & DCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID_ACCESS_ERR_VALID_ENTRY_MASK))
9006 return;
9007
9008 val = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE));
9009 addr = val & DCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE_VA_63_32_MASK;
9010 addr <<= 32;
9011 addr |= RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE_VA));
9012
9013 if (!is_pmmu)
9014 addr = gaudi2_mmu_descramble_addr(hdev, addr);
9015
9016 dev_err_ratelimited(hdev->dev, "%s access error on va 0x%llx\n",
9017 is_pmmu ? "PMMU" : "HMMU", addr);
9018 WREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID), 0);
9019 }
9020
gaudi2_handle_mmu_spi_sei_generic(struct hl_device * hdev,u16 event_type,u64 mmu_base,bool is_pmmu,u64 * event_mask)9021 static int gaudi2_handle_mmu_spi_sei_generic(struct hl_device *hdev, u16 event_type,
9022 u64 mmu_base, bool is_pmmu, u64 *event_mask)
9023 {
9024 u32 spi_sei_cause, interrupt_clr = 0x0, error_count = 0;
9025 int i;
9026
9027 spi_sei_cause = RREG32(mmu_base + MMU_SPI_SEI_CAUSE_OFFSET);
9028
9029 for (i = 0 ; i < GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE ; i++) {
9030 if (spi_sei_cause & BIT(i)) {
9031 gaudi2_print_event(hdev, event_type, true,
9032 "err cause: %s", gaudi2_mmu_spi_sei[i].cause);
9033
9034 if (i == 0)
9035 gaudi2_handle_page_error(hdev, mmu_base, is_pmmu, event_mask);
9036 else if (i == 1)
9037 gaudi2_handle_access_error(hdev, mmu_base, is_pmmu);
9038
9039 if (gaudi2_mmu_spi_sei[i].clear_bit >= 0)
9040 interrupt_clr |= BIT(gaudi2_mmu_spi_sei[i].clear_bit);
9041
9042 error_count++;
9043 }
9044 }
9045
9046 /* Clear cause */
9047 WREG32_AND(mmu_base + MMU_SPI_SEI_CAUSE_OFFSET, ~spi_sei_cause);
9048
9049 /* Clear interrupt */
9050 WREG32(mmu_base + MMU_INTERRUPT_CLR_OFFSET, interrupt_clr);
9051
9052 return error_count;
9053 }
9054
gaudi2_handle_sm_err(struct hl_device * hdev,u16 event_type,u8 sm_index)9055 static int gaudi2_handle_sm_err(struct hl_device *hdev, u16 event_type, u8 sm_index)
9056 {
9057 u32 sei_cause_addr, sei_cause_val, sei_cause_cause, sei_cause_log,
9058 cq_intr_addr, cq_intr_val, cq_intr_queue_index, error_count = 0;
9059 int i;
9060
9061 sei_cause_addr = mmDCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE + DCORE_OFFSET * sm_index;
9062 cq_intr_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_INTR + DCORE_OFFSET * sm_index;
9063
9064 sei_cause_val = RREG32(sei_cause_addr);
9065 sei_cause_cause = FIELD_GET(DCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE_CAUSE_MASK, sei_cause_val);
9066 cq_intr_val = RREG32(cq_intr_addr);
9067
9068 /* SEI interrupt */
9069 if (sei_cause_cause) {
9070 /* There are corresponding SEI_CAUSE_log bits for every SEI_CAUSE_cause bit */
9071 sei_cause_log = FIELD_GET(DCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE_LOG_MASK,
9072 sei_cause_val);
9073
9074 for (i = 0 ; i < GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE ; i++) {
9075 if (!(sei_cause_cause & BIT(i)))
9076 continue;
9077
9078 gaudi2_print_event(hdev, event_type, true,
9079 "err cause: %s. %s: 0x%X",
9080 gaudi2_sm_sei_cause[i].cause_name,
9081 gaudi2_sm_sei_cause[i].log_name,
9082 sei_cause_log);
9083 error_count++;
9084 break;
9085 }
9086
9087 /* Clear SM_SEI_CAUSE */
9088 WREG32(sei_cause_addr, 0);
9089 }
9090
9091 /* CQ interrupt */
9092 if (cq_intr_val & DCORE0_SYNC_MNGR_GLBL_CQ_INTR_CQ_SEC_INTR_MASK) {
9093 cq_intr_queue_index =
9094 FIELD_GET(DCORE0_SYNC_MNGR_GLBL_CQ_INTR_CQ_INTR_QUEUE_INDEX_MASK,
9095 cq_intr_val);
9096
9097 dev_err_ratelimited(hdev->dev, "SM%u err. err cause: CQ_INTR. queue index: %u\n",
9098 sm_index, cq_intr_queue_index);
9099 error_count++;
9100
9101 /* Clear CQ_INTR */
9102 WREG32(cq_intr_addr, 0);
9103 }
9104
9105 hl_check_for_glbl_errors(hdev);
9106
9107 return error_count;
9108 }
9109
get_hmmu_base(u16 event_type)9110 static u64 get_hmmu_base(u16 event_type)
9111 {
9112 u8 dcore, index_in_dcore;
9113
9114 switch (event_type) {
9115 case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP:
9116 case GAUDI2_EVENT_HMMU0_SPI_BASE ... GAUDI2_EVENT_HMMU0_SECURITY_ERROR:
9117 dcore = 0;
9118 index_in_dcore = 0;
9119 break;
9120 case GAUDI2_EVENT_HMMU_1_AXI_ERR_RSP:
9121 case GAUDI2_EVENT_HMMU1_SPI_BASE ... GAUDI2_EVENT_HMMU1_SECURITY_ERROR:
9122 dcore = 1;
9123 index_in_dcore = 0;
9124 break;
9125 case GAUDI2_EVENT_HMMU_2_AXI_ERR_RSP:
9126 case GAUDI2_EVENT_HMMU2_SPI_BASE ... GAUDI2_EVENT_HMMU2_SECURITY_ERROR:
9127 dcore = 0;
9128 index_in_dcore = 1;
9129 break;
9130 case GAUDI2_EVENT_HMMU_3_AXI_ERR_RSP:
9131 case GAUDI2_EVENT_HMMU3_SPI_BASE ... GAUDI2_EVENT_HMMU3_SECURITY_ERROR:
9132 dcore = 1;
9133 index_in_dcore = 1;
9134 break;
9135 case GAUDI2_EVENT_HMMU_4_AXI_ERR_RSP:
9136 case GAUDI2_EVENT_HMMU4_SPI_BASE ... GAUDI2_EVENT_HMMU4_SECURITY_ERROR:
9137 dcore = 3;
9138 index_in_dcore = 2;
9139 break;
9140 case GAUDI2_EVENT_HMMU_5_AXI_ERR_RSP:
9141 case GAUDI2_EVENT_HMMU5_SPI_BASE ... GAUDI2_EVENT_HMMU5_SECURITY_ERROR:
9142 dcore = 2;
9143 index_in_dcore = 2;
9144 break;
9145 case GAUDI2_EVENT_HMMU_6_AXI_ERR_RSP:
9146 case GAUDI2_EVENT_HMMU6_SPI_BASE ... GAUDI2_EVENT_HMMU6_SECURITY_ERROR:
9147 dcore = 3;
9148 index_in_dcore = 3;
9149 break;
9150 case GAUDI2_EVENT_HMMU_7_AXI_ERR_RSP:
9151 case GAUDI2_EVENT_HMMU7_SPI_BASE ... GAUDI2_EVENT_HMMU7_SECURITY_ERROR:
9152 dcore = 2;
9153 index_in_dcore = 3;
9154 break;
9155 case GAUDI2_EVENT_HMMU_8_AXI_ERR_RSP:
9156 case GAUDI2_EVENT_HMMU8_SPI_BASE ... GAUDI2_EVENT_HMMU8_SECURITY_ERROR:
9157 dcore = 0;
9158 index_in_dcore = 2;
9159 break;
9160 case GAUDI2_EVENT_HMMU_9_AXI_ERR_RSP:
9161 case GAUDI2_EVENT_HMMU9_SPI_BASE ... GAUDI2_EVENT_HMMU9_SECURITY_ERROR:
9162 dcore = 1;
9163 index_in_dcore = 2;
9164 break;
9165 case GAUDI2_EVENT_HMMU_10_AXI_ERR_RSP:
9166 case GAUDI2_EVENT_HMMU10_SPI_BASE ... GAUDI2_EVENT_HMMU10_SECURITY_ERROR:
9167 dcore = 0;
9168 index_in_dcore = 3;
9169 break;
9170 case GAUDI2_EVENT_HMMU_11_AXI_ERR_RSP:
9171 case GAUDI2_EVENT_HMMU11_SPI_BASE ... GAUDI2_EVENT_HMMU11_SECURITY_ERROR:
9172 dcore = 1;
9173 index_in_dcore = 3;
9174 break;
9175 case GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP:
9176 case GAUDI2_EVENT_HMMU12_SPI_BASE ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR:
9177 dcore = 3;
9178 index_in_dcore = 0;
9179 break;
9180 case GAUDI2_EVENT_HMMU_13_AXI_ERR_RSP:
9181 case GAUDI2_EVENT_HMMU13_SPI_BASE ... GAUDI2_EVENT_HMMU13_SECURITY_ERROR:
9182 dcore = 2;
9183 index_in_dcore = 0;
9184 break;
9185 case GAUDI2_EVENT_HMMU_14_AXI_ERR_RSP:
9186 case GAUDI2_EVENT_HMMU14_SPI_BASE ... GAUDI2_EVENT_HMMU14_SECURITY_ERROR:
9187 dcore = 3;
9188 index_in_dcore = 1;
9189 break;
9190 case GAUDI2_EVENT_HMMU_15_AXI_ERR_RSP:
9191 case GAUDI2_EVENT_HMMU15_SPI_BASE ... GAUDI2_EVENT_HMMU15_SECURITY_ERROR:
9192 dcore = 2;
9193 index_in_dcore = 1;
9194 break;
9195 default:
9196 return ULONG_MAX;
9197 }
9198
9199 return mmDCORE0_HMMU0_MMU_BASE + dcore * DCORE_OFFSET + index_in_dcore * DCORE_HMMU_OFFSET;
9200 }
9201
gaudi2_handle_mmu_spi_sei_err(struct hl_device * hdev,u16 event_type,u64 * event_mask)9202 static int gaudi2_handle_mmu_spi_sei_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
9203 {
9204 bool is_pmmu = false;
9205 u32 error_count = 0;
9206 u64 mmu_base;
9207
9208 switch (event_type) {
9209 case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP:
9210 case GAUDI2_EVENT_HMMU0_SPI_BASE ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR:
9211 mmu_base = get_hmmu_base(event_type);
9212 break;
9213
9214 case GAUDI2_EVENT_PMMU0_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_PMMU0_SECURITY_ERROR:
9215 case GAUDI2_EVENT_PMMU_AXI_ERR_RSP_0:
9216 is_pmmu = true;
9217 mmu_base = mmPMMU_HBW_MMU_BASE;
9218 break;
9219 default:
9220 return 0;
9221 }
9222
9223 if (mmu_base == ULONG_MAX)
9224 return 0;
9225
9226 error_count = gaudi2_handle_mmu_spi_sei_generic(hdev, event_type, mmu_base,
9227 is_pmmu, event_mask);
9228 hl_check_for_glbl_errors(hdev);
9229
9230 return error_count;
9231 }
9232
9233
9234 /* returns true if hard reset is required (ECC DERR or Read parity), false otherwise (ECC SERR) */
gaudi2_hbm_sei_handle_read_err(struct hl_device * hdev,struct hl_eq_hbm_sei_read_err_intr_info * rd_err_data,u32 err_cnt)9235 static bool gaudi2_hbm_sei_handle_read_err(struct hl_device *hdev,
9236 struct hl_eq_hbm_sei_read_err_intr_info *rd_err_data, u32 err_cnt)
9237 {
9238 u32 addr, beat, beat_shift;
9239 bool rc = false;
9240
9241 dev_err_ratelimited(hdev->dev,
9242 "READ ERROR count: ECC SERR: %d, ECC DERR: %d, RD_PARITY: %d\n",
9243 FIELD_GET(HBM_ECC_SERR_CNTR_MASK, err_cnt),
9244 FIELD_GET(HBM_ECC_DERR_CNTR_MASK, err_cnt),
9245 FIELD_GET(HBM_RD_PARITY_CNTR_MASK, err_cnt));
9246
9247 addr = le32_to_cpu(rd_err_data->dbg_rd_err_addr.rd_addr_val);
9248 dev_err_ratelimited(hdev->dev,
9249 "READ ERROR address: sid(%u), bg(%u), ba(%u), col(%u), row(%u)\n",
9250 FIELD_GET(HBM_RD_ADDR_SID_MASK, addr),
9251 FIELD_GET(HBM_RD_ADDR_BG_MASK, addr),
9252 FIELD_GET(HBM_RD_ADDR_BA_MASK, addr),
9253 FIELD_GET(HBM_RD_ADDR_COL_MASK, addr),
9254 FIELD_GET(HBM_RD_ADDR_ROW_MASK, addr));
9255
9256 /* For each beat (RDQS edge), look for possible errors and print relevant info */
9257 for (beat = 0 ; beat < 4 ; beat++) {
9258 if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
9259 (HBM_RD_ERR_SERR_BEAT0_MASK << beat))
9260 dev_err_ratelimited(hdev->dev, "Beat%d ECC SERR: DM: %#x, Syndrome: %#x\n",
9261 beat,
9262 le32_to_cpu(rd_err_data->dbg_rd_err_dm),
9263 le32_to_cpu(rd_err_data->dbg_rd_err_syndrome));
9264
9265 if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
9266 (HBM_RD_ERR_DERR_BEAT0_MASK << beat)) {
9267 dev_err_ratelimited(hdev->dev, "Beat%d ECC DERR: DM: %#x, Syndrome: %#x\n",
9268 beat,
9269 le32_to_cpu(rd_err_data->dbg_rd_err_dm),
9270 le32_to_cpu(rd_err_data->dbg_rd_err_syndrome));
9271 rc |= true;
9272 }
9273
9274 beat_shift = beat * HBM_RD_ERR_BEAT_SHIFT;
9275 if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
9276 (HBM_RD_ERR_PAR_ERR_BEAT0_MASK << beat_shift)) {
9277 dev_err_ratelimited(hdev->dev,
9278 "Beat%d read PARITY: DM: %#x, PAR data: %#x\n",
9279 beat,
9280 le32_to_cpu(rd_err_data->dbg_rd_err_dm),
9281 (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
9282 (HBM_RD_ERR_PAR_DATA_BEAT0_MASK << beat_shift)) >>
9283 (HBM_RD_ERR_PAR_DATA_BEAT0_SHIFT + beat_shift));
9284 rc |= true;
9285 }
9286
9287 dev_err_ratelimited(hdev->dev, "Beat%d DQ data:\n", beat);
9288 dev_err_ratelimited(hdev->dev, "\t0x%08x\n",
9289 le32_to_cpu(rd_err_data->dbg_rd_err_data[beat * 2]));
9290 dev_err_ratelimited(hdev->dev, "\t0x%08x\n",
9291 le32_to_cpu(rd_err_data->dbg_rd_err_data[beat * 2 + 1]));
9292 }
9293
9294 return rc;
9295 }
9296
gaudi2_hbm_sei_print_wr_par_info(struct hl_device * hdev,struct hl_eq_hbm_sei_wr_par_intr_info * wr_par_err_data,u32 err_cnt)9297 static void gaudi2_hbm_sei_print_wr_par_info(struct hl_device *hdev,
9298 struct hl_eq_hbm_sei_wr_par_intr_info *wr_par_err_data, u32 err_cnt)
9299 {
9300 struct hbm_sei_wr_cmd_address *wr_cmd_addr = wr_par_err_data->dbg_last_wr_cmds;
9301 u32 i, curr_addr, derr = wr_par_err_data->dbg_derr;
9302
9303 dev_err_ratelimited(hdev->dev, "WRITE PARITY ERROR count: %d\n", err_cnt);
9304
9305 dev_err_ratelimited(hdev->dev, "CK-0 DERR: 0x%02x, CK-1 DERR: 0x%02x\n",
9306 derr & 0x3, derr & 0xc);
9307
9308 /* JIRA H6-3286 - the following prints may not be valid */
9309 dev_err_ratelimited(hdev->dev, "Last latched write commands addresses:\n");
9310 for (i = 0 ; i < HBM_WR_PAR_CMD_LIFO_LEN ; i++) {
9311 curr_addr = le32_to_cpu(wr_cmd_addr[i].dbg_wr_cmd_addr);
9312 dev_err_ratelimited(hdev->dev,
9313 "\twrite cmd[%u]: Address: SID(%u) BG(%u) BA(%u) COL(%u).\n",
9314 i,
9315 FIELD_GET(WR_PAR_LAST_CMD_SID_MASK, curr_addr),
9316 FIELD_GET(WR_PAR_LAST_CMD_BG_MASK, curr_addr),
9317 FIELD_GET(WR_PAR_LAST_CMD_BA_MASK, curr_addr),
9318 FIELD_GET(WR_PAR_LAST_CMD_COL_MASK, curr_addr));
9319 }
9320 }
9321
gaudi2_hbm_sei_print_ca_par_info(struct hl_device * hdev,struct hl_eq_hbm_sei_ca_par_intr_info * ca_par_err_data,u32 err_cnt)9322 static void gaudi2_hbm_sei_print_ca_par_info(struct hl_device *hdev,
9323 struct hl_eq_hbm_sei_ca_par_intr_info *ca_par_err_data, u32 err_cnt)
9324 {
9325 __le32 *col_cmd = ca_par_err_data->dbg_col;
9326 __le16 *row_cmd = ca_par_err_data->dbg_row;
9327 u32 i;
9328
9329 dev_err_ratelimited(hdev->dev, "CA ERROR count: %d\n", err_cnt);
9330
9331 dev_err_ratelimited(hdev->dev, "Last latched C&R bus commands:\n");
9332 for (i = 0 ; i < HBM_CA_ERR_CMD_LIFO_LEN ; i++)
9333 dev_err_ratelimited(hdev->dev, "cmd%u: ROW(0x%04x) COL(0x%05x)\n", i,
9334 le16_to_cpu(row_cmd[i]) & (u16)GENMASK(13, 0),
9335 le32_to_cpu(col_cmd[i]) & (u32)GENMASK(17, 0));
9336 }
9337
9338 /* Returns true if hard reset is needed or false otherwise */
gaudi2_handle_hbm_mc_sei_err(struct hl_device * hdev,u16 event_type,struct hl_eq_hbm_sei_data * sei_data)9339 static bool gaudi2_handle_hbm_mc_sei_err(struct hl_device *hdev, u16 event_type,
9340 struct hl_eq_hbm_sei_data *sei_data)
9341 {
9342 bool require_hard_reset = false;
9343 u32 hbm_id, mc_id, cause_idx;
9344
9345 hbm_id = (event_type - GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE) / 4;
9346 mc_id = ((event_type - GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE) / 2) % 2;
9347
9348 cause_idx = sei_data->hdr.sei_cause;
9349 if (cause_idx > GAUDI2_NUM_OF_HBM_SEI_CAUSE - 1) {
9350 gaudi2_print_event(hdev, event_type, true,
9351 "err cause: %s",
9352 "Invalid HBM SEI event cause (%d) provided by FW", cause_idx);
9353 return true;
9354 }
9355
9356 gaudi2_print_event(hdev, event_type, !sei_data->hdr.is_critical,
9357 "System %s Error Interrupt - HBM(%u) MC(%u) MC_CH(%u) MC_PC(%u). Error cause: %s",
9358 sei_data->hdr.is_critical ? "Critical" : "Non-critical",
9359 hbm_id, mc_id, sei_data->hdr.mc_channel, sei_data->hdr.mc_pseudo_channel,
9360 hbm_mc_sei_cause[cause_idx]);
9361
9362 /* Print error-specific info */
9363 switch (cause_idx) {
9364 case HBM_SEI_CATTRIP:
9365 require_hard_reset = true;
9366 break;
9367
9368 case HBM_SEI_CMD_PARITY_EVEN:
9369 gaudi2_hbm_sei_print_ca_par_info(hdev, &sei_data->ca_parity_even_info,
9370 le32_to_cpu(sei_data->hdr.cnt));
9371 require_hard_reset = true;
9372 break;
9373
9374 case HBM_SEI_CMD_PARITY_ODD:
9375 gaudi2_hbm_sei_print_ca_par_info(hdev, &sei_data->ca_parity_odd_info,
9376 le32_to_cpu(sei_data->hdr.cnt));
9377 require_hard_reset = true;
9378 break;
9379
9380 case HBM_SEI_WRITE_DATA_PARITY_ERR:
9381 gaudi2_hbm_sei_print_wr_par_info(hdev, &sei_data->wr_parity_info,
9382 le32_to_cpu(sei_data->hdr.cnt));
9383 require_hard_reset = true;
9384 break;
9385
9386 case HBM_SEI_READ_ERR:
9387 /* Unlike other SEI events, read error requires further processing of the
9388 * raw data in order to determine the root cause.
9389 */
9390 require_hard_reset = gaudi2_hbm_sei_handle_read_err(hdev,
9391 &sei_data->read_err_info,
9392 le32_to_cpu(sei_data->hdr.cnt));
9393 break;
9394
9395 default:
9396 break;
9397 }
9398
9399 require_hard_reset |= !!sei_data->hdr.is_critical;
9400
9401 return require_hard_reset;
9402 }
9403
gaudi2_handle_hbm_cattrip(struct hl_device * hdev,u16 event_type,u64 intr_cause_data)9404 static int gaudi2_handle_hbm_cattrip(struct hl_device *hdev, u16 event_type,
9405 u64 intr_cause_data)
9406 {
9407 if (intr_cause_data) {
9408 gaudi2_print_event(hdev, event_type, true,
9409 "temperature error cause: %#llx", intr_cause_data);
9410 return 1;
9411 }
9412
9413 return 0;
9414 }
9415
gaudi2_handle_hbm_mc_spi(struct hl_device * hdev,u64 intr_cause_data)9416 static int gaudi2_handle_hbm_mc_spi(struct hl_device *hdev, u64 intr_cause_data)
9417 {
9418 u32 i, error_count = 0;
9419
9420 for (i = 0 ; i < GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE ; i++)
9421 if (intr_cause_data & hbm_mc_spi[i].mask) {
9422 dev_dbg(hdev->dev, "HBM spi event: notification cause(%s)\n",
9423 hbm_mc_spi[i].cause);
9424 error_count++;
9425 }
9426
9427 return error_count;
9428 }
9429
gaudi2_print_clk_change_info(struct hl_device * hdev,u16 event_type,u64 * event_mask)9430 static void gaudi2_print_clk_change_info(struct hl_device *hdev, u16 event_type, u64 *event_mask)
9431 {
9432 ktime_t zero_time = ktime_set(0, 0);
9433
9434 mutex_lock(&hdev->clk_throttling.lock);
9435
9436 switch (event_type) {
9437 case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S:
9438 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;
9439 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;
9440 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
9441 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
9442 dev_dbg_ratelimited(hdev->dev, "Clock throttling due to power consumption\n");
9443 break;
9444
9445 case GAUDI2_EVENT_CPU_FIX_POWER_ENV_E:
9446 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;
9447 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
9448 dev_dbg_ratelimited(hdev->dev, "Power envelop is safe, back to optimal clock\n");
9449 break;
9450
9451 case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_S:
9452 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;
9453 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
9454 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
9455 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
9456 *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9457 dev_info_ratelimited(hdev->dev, "Clock throttling due to overheating\n");
9458 break;
9459
9460 case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
9461 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
9462 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
9463 *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9464 dev_info_ratelimited(hdev->dev, "Thermal envelop is safe, back to optimal clock\n");
9465 break;
9466
9467 default:
9468 dev_err(hdev->dev, "Received invalid clock change event %d\n", event_type);
9469 break;
9470 }
9471
9472 mutex_unlock(&hdev->clk_throttling.lock);
9473 }
9474
gaudi2_print_out_of_sync_info(struct hl_device * hdev,u16 event_type,struct cpucp_pkt_sync_err * sync_err)9475 static void gaudi2_print_out_of_sync_info(struct hl_device *hdev, u16 event_type,
9476 struct cpucp_pkt_sync_err *sync_err)
9477 {
9478 struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ];
9479
9480 gaudi2_print_event(hdev, event_type, false,
9481 "FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d",
9482 le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci),
9483 q->pi, atomic_read(&q->ci));
9484 }
9485
gaudi2_handle_pcie_p2p_msix(struct hl_device * hdev,u16 event_type)9486 static int gaudi2_handle_pcie_p2p_msix(struct hl_device *hdev, u16 event_type)
9487 {
9488 u32 p2p_intr, msix_gw_intr, error_count = 0;
9489
9490 p2p_intr = RREG32(mmPCIE_WRAP_P2P_INTR);
9491 msix_gw_intr = RREG32(mmPCIE_WRAP_MSIX_GW_INTR);
9492
9493 if (p2p_intr) {
9494 gaudi2_print_event(hdev, event_type, true,
9495 "pcie p2p transaction terminated due to security, req_id(0x%x)",
9496 RREG32(mmPCIE_WRAP_P2P_REQ_ID));
9497
9498 WREG32(mmPCIE_WRAP_P2P_INTR, 0x1);
9499 error_count++;
9500 }
9501
9502 if (msix_gw_intr) {
9503 gaudi2_print_event(hdev, event_type, true,
9504 "pcie msi-x gen denied due to vector num check failure, vec(0x%X)",
9505 RREG32(mmPCIE_WRAP_MSIX_GW_VEC));
9506
9507 WREG32(mmPCIE_WRAP_MSIX_GW_INTR, 0x1);
9508 error_count++;
9509 }
9510
9511 return error_count;
9512 }
9513
gaudi2_handle_pcie_drain(struct hl_device * hdev,struct hl_eq_pcie_drain_ind_data * drain_data)9514 static int gaudi2_handle_pcie_drain(struct hl_device *hdev,
9515 struct hl_eq_pcie_drain_ind_data *drain_data)
9516 {
9517 u64 lbw_rd, lbw_wr, hbw_rd, hbw_wr, cause, error_count = 0;
9518
9519 cause = le64_to_cpu(drain_data->intr_cause.intr_cause_data);
9520 lbw_rd = le64_to_cpu(drain_data->drain_rd_addr_lbw);
9521 lbw_wr = le64_to_cpu(drain_data->drain_wr_addr_lbw);
9522 hbw_rd = le64_to_cpu(drain_data->drain_rd_addr_hbw);
9523 hbw_wr = le64_to_cpu(drain_data->drain_wr_addr_hbw);
9524
9525 if (cause & BIT_ULL(0)) {
9526 dev_err_ratelimited(hdev->dev,
9527 "PCIE AXI drain LBW completed, read_err %u, write_err %u\n",
9528 !!lbw_rd, !!lbw_wr);
9529 error_count++;
9530 }
9531
9532 if (cause & BIT_ULL(1)) {
9533 dev_err_ratelimited(hdev->dev,
9534 "PCIE AXI drain HBW completed, raddr %#llx, waddr %#llx\n",
9535 hbw_rd, hbw_wr);
9536 error_count++;
9537 }
9538
9539 return error_count;
9540 }
9541
gaudi2_handle_psoc_drain(struct hl_device * hdev,u64 intr_cause_data)9542 static int gaudi2_handle_psoc_drain(struct hl_device *hdev, u64 intr_cause_data)
9543 {
9544 u32 error_count = 0;
9545 int i;
9546
9547 for (i = 0 ; i < GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE ; i++) {
9548 if (intr_cause_data & BIT_ULL(i)) {
9549 dev_err_ratelimited(hdev->dev, "PSOC %s completed\n",
9550 gaudi2_psoc_axi_drain_interrupts_cause[i]);
9551 error_count++;
9552 }
9553 }
9554
9555 hl_check_for_glbl_errors(hdev);
9556
9557 return error_count;
9558 }
9559
gaudi2_print_cpu_pkt_failure_info(struct hl_device * hdev,u16 event_type,struct cpucp_pkt_sync_err * sync_err)9560 static void gaudi2_print_cpu_pkt_failure_info(struct hl_device *hdev, u16 event_type,
9561 struct cpucp_pkt_sync_err *sync_err)
9562 {
9563 struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ];
9564
9565 gaudi2_print_event(hdev, event_type, false,
9566 "FW reported sanity check failure, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d",
9567 le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci));
9568 }
9569
hl_arc_event_handle(struct hl_device * hdev,u16 event_type,struct hl_eq_engine_arc_intr_data * data)9570 static int hl_arc_event_handle(struct hl_device *hdev, u16 event_type,
9571 struct hl_eq_engine_arc_intr_data *data)
9572 {
9573 struct hl_engine_arc_dccm_queue_full_irq *q;
9574 u32 intr_type, engine_id;
9575 u64 payload;
9576
9577 intr_type = le32_to_cpu(data->intr_type);
9578 engine_id = le32_to_cpu(data->engine_id);
9579 payload = le64_to_cpu(data->payload);
9580
9581 switch (intr_type) {
9582 case ENGINE_ARC_DCCM_QUEUE_FULL_IRQ:
9583 q = (struct hl_engine_arc_dccm_queue_full_irq *) &payload;
9584
9585 gaudi2_print_event(hdev, event_type, true,
9586 "ARC DCCM Full event: EngId: %u, Intr_type: %u, Qidx: %u",
9587 engine_id, intr_type, q->queue_index);
9588 return 1;
9589 default:
9590 gaudi2_print_event(hdev, event_type, true, "Unknown ARC event type");
9591 return 0;
9592 }
9593 }
9594
event_id_to_engine_id(struct hl_device * hdev,u16 event_type)9595 static u16 event_id_to_engine_id(struct hl_device *hdev, u16 event_type)
9596 {
9597 enum gaudi2_block_types type = GAUDI2_BLOCK_TYPE_MAX;
9598 u16 index;
9599
9600 switch (event_type) {
9601 case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC24_AXI_ERR_RSP:
9602 index = event_type - GAUDI2_EVENT_TPC0_AXI_ERR_RSP;
9603 type = GAUDI2_BLOCK_TYPE_TPC;
9604 break;
9605 case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_TPC24_QM:
9606 index = event_type - GAUDI2_EVENT_TPC0_QM;
9607 type = GAUDI2_BLOCK_TYPE_TPC;
9608 break;
9609 case GAUDI2_EVENT_MME0_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE:
9610 case GAUDI2_EVENT_MME0_SPI_BASE ... GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID:
9611 case GAUDI2_EVENT_MME0_QM:
9612 index = 0;
9613 type = GAUDI2_BLOCK_TYPE_MME;
9614 break;
9615 case GAUDI2_EVENT_MME1_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE:
9616 case GAUDI2_EVENT_MME1_SPI_BASE ... GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID:
9617 case GAUDI2_EVENT_MME1_QM:
9618 index = 1;
9619 type = GAUDI2_BLOCK_TYPE_MME;
9620 break;
9621 case GAUDI2_EVENT_MME2_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME2_CTRL_AXI_ERROR_RESPONSE:
9622 case GAUDI2_EVENT_MME2_SPI_BASE ... GAUDI2_EVENT_MME2_WAP_SOURCE_RESULT_INVALID:
9623 case GAUDI2_EVENT_MME2_QM:
9624 index = 2;
9625 type = GAUDI2_BLOCK_TYPE_MME;
9626 break;
9627 case GAUDI2_EVENT_MME3_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME3_CTRL_AXI_ERROR_RESPONSE:
9628 case GAUDI2_EVENT_MME3_SPI_BASE ... GAUDI2_EVENT_MME3_WAP_SOURCE_RESULT_INVALID:
9629 case GAUDI2_EVENT_MME3_QM:
9630 index = 3;
9631 type = GAUDI2_BLOCK_TYPE_MME;
9632 break;
9633 case GAUDI2_EVENT_KDMA_CH0_AXI_ERR_RSP:
9634 case GAUDI2_EVENT_KDMA_BM_SPMU:
9635 case GAUDI2_EVENT_KDMA0_CORE:
9636 return GAUDI2_ENGINE_ID_KDMA;
9637 case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP:
9638 case GAUDI2_EVENT_PDMA0_CORE:
9639 case GAUDI2_EVENT_PDMA0_BM_SPMU:
9640 case GAUDI2_EVENT_PDMA0_QM:
9641 return GAUDI2_ENGINE_ID_PDMA_0;
9642 case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP:
9643 case GAUDI2_EVENT_PDMA1_CORE:
9644 case GAUDI2_EVENT_PDMA1_BM_SPMU:
9645 case GAUDI2_EVENT_PDMA1_QM:
9646 return GAUDI2_ENGINE_ID_PDMA_1;
9647 case GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE ... GAUDI2_EVENT_DEC9_AXI_ERR_RSPONSE:
9648 index = event_type - GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE;
9649 type = GAUDI2_BLOCK_TYPE_DEC;
9650 break;
9651 case GAUDI2_EVENT_DEC0_SPI ... GAUDI2_EVENT_DEC9_BMON_SPMU:
9652 index = (event_type - GAUDI2_EVENT_DEC0_SPI) >> 1;
9653 type = GAUDI2_BLOCK_TYPE_DEC;
9654 break;
9655 case GAUDI2_EVENT_NIC0_AXI_ERROR_RESPONSE ... GAUDI2_EVENT_NIC11_AXI_ERROR_RESPONSE:
9656 index = event_type - GAUDI2_EVENT_NIC0_AXI_ERROR_RESPONSE;
9657 return GAUDI2_ENGINE_ID_NIC0_0 + (index * 2);
9658 case GAUDI2_EVENT_NIC0_QM0 ... GAUDI2_EVENT_NIC11_QM1:
9659 index = event_type - GAUDI2_EVENT_NIC0_QM0;
9660 return GAUDI2_ENGINE_ID_NIC0_0 + index;
9661 case GAUDI2_EVENT_NIC0_BMON_SPMU ... GAUDI2_EVENT_NIC11_SW_ERROR:
9662 index = event_type - GAUDI2_EVENT_NIC0_BMON_SPMU;
9663 return GAUDI2_ENGINE_ID_NIC0_0 + (index * 2);
9664 case GAUDI2_EVENT_TPC0_BMON_SPMU ... GAUDI2_EVENT_TPC24_KERNEL_ERR:
9665 index = (event_type - GAUDI2_EVENT_TPC0_BMON_SPMU) >> 1;
9666 type = GAUDI2_BLOCK_TYPE_TPC;
9667 break;
9668 case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE:
9669 case GAUDI2_EVENT_ROTATOR0_BMON_SPMU:
9670 case GAUDI2_EVENT_ROTATOR0_ROT0_QM:
9671 return GAUDI2_ENGINE_ID_ROT_0;
9672 case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE:
9673 case GAUDI2_EVENT_ROTATOR1_BMON_SPMU:
9674 case GAUDI2_EVENT_ROTATOR1_ROT1_QM:
9675 return GAUDI2_ENGINE_ID_ROT_1;
9676 case GAUDI2_EVENT_HDMA0_BM_SPMU:
9677 case GAUDI2_EVENT_HDMA0_QM:
9678 case GAUDI2_EVENT_HDMA0_CORE:
9679 return GAUDI2_DCORE0_ENGINE_ID_EDMA_0;
9680 case GAUDI2_EVENT_HDMA1_BM_SPMU:
9681 case GAUDI2_EVENT_HDMA1_QM:
9682 case GAUDI2_EVENT_HDMA1_CORE:
9683 return GAUDI2_DCORE0_ENGINE_ID_EDMA_1;
9684 case GAUDI2_EVENT_HDMA2_BM_SPMU:
9685 case GAUDI2_EVENT_HDMA2_QM:
9686 case GAUDI2_EVENT_HDMA2_CORE:
9687 return GAUDI2_DCORE1_ENGINE_ID_EDMA_0;
9688 case GAUDI2_EVENT_HDMA3_BM_SPMU:
9689 case GAUDI2_EVENT_HDMA3_QM:
9690 case GAUDI2_EVENT_HDMA3_CORE:
9691 return GAUDI2_DCORE1_ENGINE_ID_EDMA_1;
9692 case GAUDI2_EVENT_HDMA4_BM_SPMU:
9693 case GAUDI2_EVENT_HDMA4_QM:
9694 case GAUDI2_EVENT_HDMA4_CORE:
9695 return GAUDI2_DCORE2_ENGINE_ID_EDMA_0;
9696 case GAUDI2_EVENT_HDMA5_BM_SPMU:
9697 case GAUDI2_EVENT_HDMA5_QM:
9698 case GAUDI2_EVENT_HDMA5_CORE:
9699 return GAUDI2_DCORE2_ENGINE_ID_EDMA_1;
9700 case GAUDI2_EVENT_HDMA6_BM_SPMU:
9701 case GAUDI2_EVENT_HDMA6_QM:
9702 case GAUDI2_EVENT_HDMA6_CORE:
9703 return GAUDI2_DCORE3_ENGINE_ID_EDMA_0;
9704 case GAUDI2_EVENT_HDMA7_BM_SPMU:
9705 case GAUDI2_EVENT_HDMA7_QM:
9706 case GAUDI2_EVENT_HDMA7_CORE:
9707 return GAUDI2_DCORE3_ENGINE_ID_EDMA_1;
9708 default:
9709 break;
9710 }
9711
9712 switch (type) {
9713 case GAUDI2_BLOCK_TYPE_TPC:
9714 switch (index) {
9715 case TPC_ID_DCORE0_TPC0 ... TPC_ID_DCORE0_TPC5:
9716 return GAUDI2_DCORE0_ENGINE_ID_TPC_0 + index;
9717 case TPC_ID_DCORE1_TPC0 ... TPC_ID_DCORE1_TPC5:
9718 return GAUDI2_DCORE1_ENGINE_ID_TPC_0 + index - TPC_ID_DCORE1_TPC0;
9719 case TPC_ID_DCORE2_TPC0 ... TPC_ID_DCORE2_TPC5:
9720 return GAUDI2_DCORE2_ENGINE_ID_TPC_0 + index - TPC_ID_DCORE2_TPC0;
9721 case TPC_ID_DCORE3_TPC0 ... TPC_ID_DCORE3_TPC5:
9722 return GAUDI2_DCORE3_ENGINE_ID_TPC_0 + index - TPC_ID_DCORE3_TPC0;
9723 default:
9724 break;
9725 }
9726 break;
9727 case GAUDI2_BLOCK_TYPE_MME:
9728 switch (index) {
9729 case MME_ID_DCORE0: return GAUDI2_DCORE0_ENGINE_ID_MME;
9730 case MME_ID_DCORE1: return GAUDI2_DCORE1_ENGINE_ID_MME;
9731 case MME_ID_DCORE2: return GAUDI2_DCORE2_ENGINE_ID_MME;
9732 case MME_ID_DCORE3: return GAUDI2_DCORE3_ENGINE_ID_MME;
9733 default:
9734 break;
9735 }
9736 break;
9737 case GAUDI2_BLOCK_TYPE_DEC:
9738 switch (index) {
9739 case DEC_ID_DCORE0_DEC0: return GAUDI2_DCORE0_ENGINE_ID_DEC_0;
9740 case DEC_ID_DCORE0_DEC1: return GAUDI2_DCORE0_ENGINE_ID_DEC_1;
9741 case DEC_ID_DCORE1_DEC0: return GAUDI2_DCORE1_ENGINE_ID_DEC_0;
9742 case DEC_ID_DCORE1_DEC1: return GAUDI2_DCORE1_ENGINE_ID_DEC_1;
9743 case DEC_ID_DCORE2_DEC0: return GAUDI2_DCORE2_ENGINE_ID_DEC_0;
9744 case DEC_ID_DCORE2_DEC1: return GAUDI2_DCORE2_ENGINE_ID_DEC_1;
9745 case DEC_ID_DCORE3_DEC0: return GAUDI2_DCORE3_ENGINE_ID_DEC_0;
9746 case DEC_ID_DCORE3_DEC1: return GAUDI2_DCORE3_ENGINE_ID_DEC_1;
9747 case DEC_ID_PCIE_VDEC0: return GAUDI2_PCIE_ENGINE_ID_DEC_0;
9748 case DEC_ID_PCIE_VDEC1: return GAUDI2_PCIE_ENGINE_ID_DEC_1;
9749 default:
9750 break;
9751 }
9752 break;
9753 default:
9754 break;
9755 }
9756
9757 return U16_MAX;
9758 }
9759
hl_eq_heartbeat_event_handle(struct hl_device * hdev)9760 static void hl_eq_heartbeat_event_handle(struct hl_device *hdev)
9761 {
9762 hdev->eq_heartbeat_received = true;
9763 }
9764
gaudi2_handle_eqe(struct hl_device * hdev,struct hl_eq_entry * eq_entry)9765 static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
9766 {
9767 struct gaudi2_device *gaudi2 = hdev->asic_specific;
9768 bool reset_required = false, is_critical = false;
9769 u32 index, ctl, reset_flags = 0, error_count = 0;
9770 u64 event_mask = 0;
9771 u16 event_type;
9772
9773 ctl = le32_to_cpu(eq_entry->hdr.ctl);
9774 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK) >> EQ_CTL_EVENT_TYPE_SHIFT);
9775
9776 if (event_type >= GAUDI2_EVENT_SIZE) {
9777 dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
9778 event_type, GAUDI2_EVENT_SIZE - 1);
9779 return;
9780 }
9781
9782 gaudi2->events_stat[event_type]++;
9783 gaudi2->events_stat_aggregate[event_type]++;
9784
9785 switch (event_type) {
9786 case GAUDI2_EVENT_PCIE_CORE_SERR ... GAUDI2_EVENT_ARC0_ECC_DERR:
9787 fallthrough;
9788 case GAUDI2_EVENT_ROTATOR0_SERR ... GAUDI2_EVENT_ROTATOR1_DERR:
9789 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9790 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9791 reset_required = gaudi2_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
9792 is_critical = eq_entry->ecc_data.is_critical;
9793 error_count++;
9794 break;
9795
9796 case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_PDMA1_QM:
9797 fallthrough;
9798 case GAUDI2_EVENT_ROTATOR0_ROT0_QM ... GAUDI2_EVENT_ROTATOR1_ROT1_QM:
9799 fallthrough;
9800 case GAUDI2_EVENT_NIC0_QM0 ... GAUDI2_EVENT_NIC11_QM1:
9801 error_count = gaudi2_handle_qman_err(hdev, event_type, &event_mask);
9802 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9803 break;
9804
9805 case GAUDI2_EVENT_ARC_AXI_ERROR_RESPONSE_0:
9806 error_count = gaudi2_handle_arc_farm_sei_err(hdev, event_type, &event_mask);
9807 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9808 break;
9809
9810 case GAUDI2_EVENT_CPU_AXI_ERR_RSP:
9811 error_count = gaudi2_handle_cpu_sei_err(hdev, event_type);
9812 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9813 event_mask |= HL_NOTIFIER_EVENT_CRITICL_FW_ERR;
9814 break;
9815
9816 case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP:
9817 case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP:
9818 error_count = gaudi2_handle_qm_sei_err(hdev, event_type, true, &event_mask);
9819 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9820 break;
9821
9822 case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE:
9823 case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE:
9824 index = event_type - GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE;
9825 error_count = gaudi2_handle_rot_err(hdev, index, event_type,
9826 &eq_entry->razwi_with_intr_cause, &event_mask);
9827 error_count += gaudi2_handle_qm_sei_err(hdev, event_type, false, &event_mask);
9828 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9829 break;
9830
9831 case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC24_AXI_ERR_RSP:
9832 index = event_type - GAUDI2_EVENT_TPC0_AXI_ERR_RSP;
9833 error_count = gaudi2_tpc_ack_interrupts(hdev, index, event_type,
9834 &eq_entry->razwi_with_intr_cause, &event_mask);
9835 error_count += gaudi2_handle_qm_sei_err(hdev, event_type, false, &event_mask);
9836 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9837 break;
9838
9839 case GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE ... GAUDI2_EVENT_DEC9_AXI_ERR_RSPONSE:
9840 index = event_type - GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE;
9841 error_count = gaudi2_handle_dec_err(hdev, index, event_type, &event_mask);
9842 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9843 break;
9844
9845 case GAUDI2_EVENT_TPC0_KERNEL_ERR:
9846 case GAUDI2_EVENT_TPC1_KERNEL_ERR:
9847 case GAUDI2_EVENT_TPC2_KERNEL_ERR:
9848 case GAUDI2_EVENT_TPC3_KERNEL_ERR:
9849 case GAUDI2_EVENT_TPC4_KERNEL_ERR:
9850 case GAUDI2_EVENT_TPC5_KERNEL_ERR:
9851 case GAUDI2_EVENT_TPC6_KERNEL_ERR:
9852 case GAUDI2_EVENT_TPC7_KERNEL_ERR:
9853 case GAUDI2_EVENT_TPC8_KERNEL_ERR:
9854 case GAUDI2_EVENT_TPC9_KERNEL_ERR:
9855 case GAUDI2_EVENT_TPC10_KERNEL_ERR:
9856 case GAUDI2_EVENT_TPC11_KERNEL_ERR:
9857 case GAUDI2_EVENT_TPC12_KERNEL_ERR:
9858 case GAUDI2_EVENT_TPC13_KERNEL_ERR:
9859 case GAUDI2_EVENT_TPC14_KERNEL_ERR:
9860 case GAUDI2_EVENT_TPC15_KERNEL_ERR:
9861 case GAUDI2_EVENT_TPC16_KERNEL_ERR:
9862 case GAUDI2_EVENT_TPC17_KERNEL_ERR:
9863 case GAUDI2_EVENT_TPC18_KERNEL_ERR:
9864 case GAUDI2_EVENT_TPC19_KERNEL_ERR:
9865 case GAUDI2_EVENT_TPC20_KERNEL_ERR:
9866 case GAUDI2_EVENT_TPC21_KERNEL_ERR:
9867 case GAUDI2_EVENT_TPC22_KERNEL_ERR:
9868 case GAUDI2_EVENT_TPC23_KERNEL_ERR:
9869 case GAUDI2_EVENT_TPC24_KERNEL_ERR:
9870 index = (event_type - GAUDI2_EVENT_TPC0_KERNEL_ERR) /
9871 (GAUDI2_EVENT_TPC1_KERNEL_ERR - GAUDI2_EVENT_TPC0_KERNEL_ERR);
9872 error_count = gaudi2_tpc_ack_interrupts(hdev, index, event_type,
9873 &eq_entry->razwi_with_intr_cause, &event_mask);
9874 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9875 break;
9876
9877 case GAUDI2_EVENT_DEC0_SPI:
9878 case GAUDI2_EVENT_DEC1_SPI:
9879 case GAUDI2_EVENT_DEC2_SPI:
9880 case GAUDI2_EVENT_DEC3_SPI:
9881 case GAUDI2_EVENT_DEC4_SPI:
9882 case GAUDI2_EVENT_DEC5_SPI:
9883 case GAUDI2_EVENT_DEC6_SPI:
9884 case GAUDI2_EVENT_DEC7_SPI:
9885 case GAUDI2_EVENT_DEC8_SPI:
9886 case GAUDI2_EVENT_DEC9_SPI:
9887 index = (event_type - GAUDI2_EVENT_DEC0_SPI) /
9888 (GAUDI2_EVENT_DEC1_SPI - GAUDI2_EVENT_DEC0_SPI);
9889 error_count = gaudi2_handle_dec_err(hdev, index, event_type, &event_mask);
9890 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9891 break;
9892
9893 case GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE:
9894 case GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE:
9895 case GAUDI2_EVENT_MME2_CTRL_AXI_ERROR_RESPONSE:
9896 case GAUDI2_EVENT_MME3_CTRL_AXI_ERROR_RESPONSE:
9897 index = (event_type - GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE) /
9898 (GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE -
9899 GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE);
9900 error_count = gaudi2_handle_mme_err(hdev, index, event_type, &event_mask);
9901 error_count += gaudi2_handle_qm_sei_err(hdev, event_type, false, &event_mask);
9902 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9903 break;
9904
9905 case GAUDI2_EVENT_MME0_QMAN_SW_ERROR:
9906 case GAUDI2_EVENT_MME1_QMAN_SW_ERROR:
9907 case GAUDI2_EVENT_MME2_QMAN_SW_ERROR:
9908 case GAUDI2_EVENT_MME3_QMAN_SW_ERROR:
9909 index = (event_type - GAUDI2_EVENT_MME0_QMAN_SW_ERROR) /
9910 (GAUDI2_EVENT_MME1_QMAN_SW_ERROR -
9911 GAUDI2_EVENT_MME0_QMAN_SW_ERROR);
9912 error_count = gaudi2_handle_mme_err(hdev, index, event_type, &event_mask);
9913 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9914 break;
9915
9916 case GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID:
9917 case GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID:
9918 case GAUDI2_EVENT_MME2_WAP_SOURCE_RESULT_INVALID:
9919 case GAUDI2_EVENT_MME3_WAP_SOURCE_RESULT_INVALID:
9920 index = (event_type - GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID) /
9921 (GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID -
9922 GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID);
9923 error_count = gaudi2_handle_mme_wap_err(hdev, index, event_type, &event_mask);
9924 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9925 break;
9926
9927 case GAUDI2_EVENT_KDMA_CH0_AXI_ERR_RSP:
9928 case GAUDI2_EVENT_KDMA0_CORE:
9929 error_count = gaudi2_handle_kdma_core_event(hdev, event_type,
9930 le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9931 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9932 break;
9933
9934 case GAUDI2_EVENT_HDMA2_CORE ... GAUDI2_EVENT_HDMA5_CORE:
9935 error_count = gaudi2_handle_dma_core_event(hdev, event_type,
9936 le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9937 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9938 break;
9939
9940 case GAUDI2_EVENT_PDMA0_CORE ... GAUDI2_EVENT_PDMA1_CORE:
9941 error_count = gaudi2_handle_dma_core_event(hdev, event_type,
9942 le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9943 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9944 break;
9945
9946 case GAUDI2_EVENT_PCIE_ADDR_DEC_ERR:
9947 error_count = gaudi2_print_pcie_addr_dec_info(hdev, event_type,
9948 le64_to_cpu(eq_entry->intr_cause.intr_cause_data), &event_mask);
9949 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9950 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9951 break;
9952
9953 case GAUDI2_EVENT_HMMU0_PAGE_FAULT_OR_WR_PERM ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR:
9954 case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP:
9955 case GAUDI2_EVENT_PMMU0_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_PMMU0_SECURITY_ERROR:
9956 case GAUDI2_EVENT_PMMU_AXI_ERR_RSP_0:
9957 error_count = gaudi2_handle_mmu_spi_sei_err(hdev, event_type, &event_mask);
9958 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9959 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9960 break;
9961
9962 case GAUDI2_EVENT_HIF0_FATAL ... GAUDI2_EVENT_HIF12_FATAL:
9963 error_count = gaudi2_handle_hif_fatal(hdev, event_type,
9964 le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9965 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9966 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9967 break;
9968
9969 case GAUDI2_EVENT_PMMU_FATAL_0:
9970 error_count = gaudi2_handle_pif_fatal(hdev, event_type,
9971 le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9972 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9973 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9974 break;
9975
9976 case GAUDI2_EVENT_PSOC63_RAZWI_OR_PID_MIN_MAX_INTERRUPT:
9977 error_count = gaudi2_ack_psoc_razwi_event_handler(hdev, &event_mask);
9978 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9979 break;
9980
9981 case GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE ... GAUDI2_EVENT_HBM5_MC1_SEI_NON_SEVERE:
9982 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9983 if (gaudi2_handle_hbm_mc_sei_err(hdev, event_type, &eq_entry->sei_data)) {
9984 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9985 reset_required = true;
9986 }
9987 error_count++;
9988 break;
9989
9990 case GAUDI2_EVENT_HBM_CATTRIP_0 ... GAUDI2_EVENT_HBM_CATTRIP_5:
9991 error_count = gaudi2_handle_hbm_cattrip(hdev, event_type,
9992 le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9993 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9994 break;
9995
9996 case GAUDI2_EVENT_HBM0_MC0_SPI ... GAUDI2_EVENT_HBM5_MC1_SPI:
9997 error_count = gaudi2_handle_hbm_mc_spi(hdev,
9998 le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9999 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10000 break;
10001
10002 case GAUDI2_EVENT_PCIE_DRAIN_COMPLETE:
10003 error_count = gaudi2_handle_pcie_drain(hdev, &eq_entry->pcie_drain_ind_data);
10004 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
10005 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10006 if (hl_is_fw_sw_ver_equal_or_greater(hdev, 1, 13))
10007 is_critical = true;
10008 break;
10009
10010 case GAUDI2_EVENT_PSOC59_RPM_ERROR_OR_DRAIN:
10011 error_count = gaudi2_handle_psoc_drain(hdev,
10012 le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
10013 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
10014 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10015 break;
10016
10017 case GAUDI2_EVENT_CPU_AXI_ECC:
10018 error_count = GAUDI2_NA_EVENT_CAUSE;
10019 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
10020 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10021 break;
10022 case GAUDI2_EVENT_CPU_L2_RAM_ECC:
10023 error_count = GAUDI2_NA_EVENT_CAUSE;
10024 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
10025 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10026 break;
10027 case GAUDI2_EVENT_MME0_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME0_SBTE4_AXI_ERR_RSP:
10028 case GAUDI2_EVENT_MME1_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME1_SBTE4_AXI_ERR_RSP:
10029 case GAUDI2_EVENT_MME2_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME2_SBTE4_AXI_ERR_RSP:
10030 case GAUDI2_EVENT_MME3_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME3_SBTE4_AXI_ERR_RSP:
10031 error_count = gaudi2_handle_mme_sbte_err(hdev, event_type);
10032 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
10033 break;
10034 case GAUDI2_EVENT_VM0_ALARM_A ... GAUDI2_EVENT_VM3_ALARM_B:
10035 error_count = GAUDI2_NA_EVENT_CAUSE;
10036 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
10037 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10038 break;
10039 case GAUDI2_EVENT_PSOC_AXI_ERR_RSP:
10040 error_count = GAUDI2_NA_EVENT_CAUSE;
10041 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
10042 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10043 break;
10044 case GAUDI2_EVENT_PSOC_PRSTN_FALL:
10045 error_count = GAUDI2_NA_EVENT_CAUSE;
10046 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10047 break;
10048 case GAUDI2_EVENT_PCIE_APB_TIMEOUT:
10049 error_count = GAUDI2_NA_EVENT_CAUSE;
10050 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
10051 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10052 break;
10053 case GAUDI2_EVENT_PCIE_FATAL_ERR:
10054 error_count = GAUDI2_NA_EVENT_CAUSE;
10055 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
10056 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10057 break;
10058 case GAUDI2_EVENT_TPC0_BMON_SPMU:
10059 case GAUDI2_EVENT_TPC1_BMON_SPMU:
10060 case GAUDI2_EVENT_TPC2_BMON_SPMU:
10061 case GAUDI2_EVENT_TPC3_BMON_SPMU:
10062 case GAUDI2_EVENT_TPC4_BMON_SPMU:
10063 case GAUDI2_EVENT_TPC5_BMON_SPMU:
10064 case GAUDI2_EVENT_TPC6_BMON_SPMU:
10065 case GAUDI2_EVENT_TPC7_BMON_SPMU:
10066 case GAUDI2_EVENT_TPC8_BMON_SPMU:
10067 case GAUDI2_EVENT_TPC9_BMON_SPMU:
10068 case GAUDI2_EVENT_TPC10_BMON_SPMU:
10069 case GAUDI2_EVENT_TPC11_BMON_SPMU:
10070 case GAUDI2_EVENT_TPC12_BMON_SPMU:
10071 case GAUDI2_EVENT_TPC13_BMON_SPMU:
10072 case GAUDI2_EVENT_TPC14_BMON_SPMU:
10073 case GAUDI2_EVENT_TPC15_BMON_SPMU:
10074 case GAUDI2_EVENT_TPC16_BMON_SPMU:
10075 case GAUDI2_EVENT_TPC17_BMON_SPMU:
10076 case GAUDI2_EVENT_TPC18_BMON_SPMU:
10077 case GAUDI2_EVENT_TPC19_BMON_SPMU:
10078 case GAUDI2_EVENT_TPC20_BMON_SPMU:
10079 case GAUDI2_EVENT_TPC21_BMON_SPMU:
10080 case GAUDI2_EVENT_TPC22_BMON_SPMU:
10081 case GAUDI2_EVENT_TPC23_BMON_SPMU:
10082 case GAUDI2_EVENT_TPC24_BMON_SPMU:
10083 case GAUDI2_EVENT_MME0_CTRL_BMON_SPMU:
10084 case GAUDI2_EVENT_MME0_SBTE_BMON_SPMU:
10085 case GAUDI2_EVENT_MME0_WAP_BMON_SPMU:
10086 case GAUDI2_EVENT_MME1_CTRL_BMON_SPMU:
10087 case GAUDI2_EVENT_MME1_SBTE_BMON_SPMU:
10088 case GAUDI2_EVENT_MME1_WAP_BMON_SPMU:
10089 case GAUDI2_EVENT_MME2_CTRL_BMON_SPMU:
10090 case GAUDI2_EVENT_MME2_SBTE_BMON_SPMU:
10091 case GAUDI2_EVENT_MME2_WAP_BMON_SPMU:
10092 case GAUDI2_EVENT_MME3_CTRL_BMON_SPMU:
10093 case GAUDI2_EVENT_MME3_SBTE_BMON_SPMU:
10094 case GAUDI2_EVENT_MME3_WAP_BMON_SPMU:
10095 case GAUDI2_EVENT_HDMA2_BM_SPMU ... GAUDI2_EVENT_PDMA1_BM_SPMU:
10096 fallthrough;
10097 case GAUDI2_EVENT_DEC0_BMON_SPMU:
10098 case GAUDI2_EVENT_DEC1_BMON_SPMU:
10099 case GAUDI2_EVENT_DEC2_BMON_SPMU:
10100 case GAUDI2_EVENT_DEC3_BMON_SPMU:
10101 case GAUDI2_EVENT_DEC4_BMON_SPMU:
10102 case GAUDI2_EVENT_DEC5_BMON_SPMU:
10103 case GAUDI2_EVENT_DEC6_BMON_SPMU:
10104 case GAUDI2_EVENT_DEC7_BMON_SPMU:
10105 case GAUDI2_EVENT_DEC8_BMON_SPMU:
10106 case GAUDI2_EVENT_DEC9_BMON_SPMU:
10107 case GAUDI2_EVENT_ROTATOR0_BMON_SPMU ... GAUDI2_EVENT_SM3_BMON_SPMU:
10108 error_count = GAUDI2_NA_EVENT_CAUSE;
10109 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
10110 break;
10111
10112 case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S:
10113 case GAUDI2_EVENT_CPU_FIX_POWER_ENV_E:
10114 case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_S:
10115 case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
10116 gaudi2_print_clk_change_info(hdev, event_type, &event_mask);
10117 error_count = GAUDI2_NA_EVENT_CAUSE;
10118 break;
10119
10120 case GAUDI2_EVENT_CPU_PKT_QUEUE_OUT_SYNC:
10121 gaudi2_print_out_of_sync_info(hdev, event_type, &eq_entry->pkt_sync_err);
10122 error_count = GAUDI2_NA_EVENT_CAUSE;
10123 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
10124 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10125 break;
10126
10127 case GAUDI2_EVENT_PCIE_FLR_REQUESTED:
10128 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10129 error_count = GAUDI2_NA_EVENT_CAUSE;
10130 /* Do nothing- FW will handle it */
10131 break;
10132
10133 case GAUDI2_EVENT_PCIE_P2P_MSIX:
10134 error_count = gaudi2_handle_pcie_p2p_msix(hdev, event_type);
10135 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
10136 break;
10137
10138 case GAUDI2_EVENT_SM0_AXI_ERROR_RESPONSE ... GAUDI2_EVENT_SM3_AXI_ERROR_RESPONSE:
10139 index = event_type - GAUDI2_EVENT_SM0_AXI_ERROR_RESPONSE;
10140 error_count = gaudi2_handle_sm_err(hdev, event_type, index);
10141 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
10142 break;
10143
10144 case GAUDI2_EVENT_PSOC_MME_PLL_LOCK_ERR ... GAUDI2_EVENT_DCORE2_HBM_PLL_LOCK_ERR:
10145 error_count = GAUDI2_NA_EVENT_CAUSE;
10146 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10147 break;
10148
10149 case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_CAUSE:
10150 dev_info(hdev->dev, "CPLD shutdown cause, reset reason: 0x%llx\n",
10151 le64_to_cpu(eq_entry->data[0]));
10152 error_count = GAUDI2_NA_EVENT_CAUSE;
10153 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10154 break;
10155 case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_EVENT:
10156 dev_err(hdev->dev, "CPLD shutdown event, reset reason: 0x%llx\n",
10157 le64_to_cpu(eq_entry->data[0]));
10158 error_count = GAUDI2_NA_EVENT_CAUSE;
10159 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10160 break;
10161
10162 case GAUDI2_EVENT_CPU_PKT_SANITY_FAILED:
10163 gaudi2_print_cpu_pkt_failure_info(hdev, event_type, &eq_entry->pkt_sync_err);
10164 error_count = GAUDI2_NA_EVENT_CAUSE;
10165 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
10166 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10167 break;
10168
10169 case GAUDI2_EVENT_ARC_DCCM_FULL:
10170 error_count = hl_arc_event_handle(hdev, event_type, &eq_entry->arc_data);
10171 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
10172 break;
10173
10174 case GAUDI2_EVENT_CPU_FP32_NOT_SUPPORTED:
10175 case GAUDI2_EVENT_CPU_DEV_RESET_REQ:
10176 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10177 error_count = GAUDI2_NA_EVENT_CAUSE;
10178 is_critical = true;
10179 break;
10180
10181 case GAUDI2_EVENT_ARC_PWR_BRK_ENTRY:
10182 case GAUDI2_EVENT_ARC_PWR_BRK_EXT:
10183 case GAUDI2_EVENT_ARC_PWR_RD_MODE0:
10184 case GAUDI2_EVENT_ARC_PWR_RD_MODE1:
10185 case GAUDI2_EVENT_ARC_PWR_RD_MODE2:
10186 case GAUDI2_EVENT_ARC_PWR_RD_MODE3:
10187 error_count = GAUDI2_NA_EVENT_CAUSE;
10188 dev_info_ratelimited(hdev->dev, "%s event received\n",
10189 gaudi2_irq_map_table[event_type].name);
10190 break;
10191
10192 case GAUDI2_EVENT_ARC_EQ_HEARTBEAT:
10193 hl_eq_heartbeat_event_handle(hdev);
10194 error_count = GAUDI2_NA_EVENT_CAUSE;
10195 break;
10196 default:
10197 if (gaudi2_irq_map_table[event_type].valid) {
10198 dev_err_ratelimited(hdev->dev, "Cannot find handler for event %d\n",
10199 event_type);
10200 error_count = GAUDI2_NA_EVENT_CAUSE;
10201 }
10202 }
10203
10204 if (event_mask & HL_NOTIFIER_EVENT_USER_ENGINE_ERR)
10205 hl_capture_engine_err(hdev, event_id_to_engine_id(hdev, event_type), error_count);
10206
10207 /* Make sure to dump an error in case no error cause was printed so far.
10208 * Note that although we have counted the errors, we use this number as
10209 * a boolean.
10210 */
10211 if (error_count == GAUDI2_NA_EVENT_CAUSE && !is_info_event(event_type))
10212 gaudi2_print_event(hdev, event_type, true, "%d", event_type);
10213 else if (error_count == 0)
10214 gaudi2_print_event(hdev, event_type, true,
10215 "No error cause for H/W event %u", event_type);
10216
10217 if ((gaudi2_irq_map_table[event_type].reset != EVENT_RESET_TYPE_NONE) ||
10218 reset_required) {
10219 if (reset_required ||
10220 (gaudi2_irq_map_table[event_type].reset == EVENT_RESET_TYPE_HARD))
10221 reset_flags |= HL_DRV_RESET_HARD;
10222
10223 if (hdev->hard_reset_on_fw_events ||
10224 (hdev->asic_prop.fw_security_enabled && is_critical))
10225 goto reset_device;
10226 }
10227
10228 /* Send unmask irq only for interrupts not classified as MSG */
10229 if (!gaudi2_irq_map_table[event_type].msg)
10230 hl_fw_unmask_irq(hdev, event_type);
10231
10232 if (event_mask)
10233 hl_notifier_event_send_all(hdev, event_mask);
10234
10235 return;
10236
10237 reset_device:
10238 if (hdev->asic_prop.fw_security_enabled && is_critical) {
10239 reset_flags |= HL_DRV_RESET_BYPASS_REQ_TO_FW;
10240 event_mask |= HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE;
10241 } else {
10242 reset_flags |= HL_DRV_RESET_DELAY;
10243 }
10244 /* escalate general hw errors to critical/fatal error */
10245 if (event_mask & HL_NOTIFIER_EVENT_GENERAL_HW_ERR)
10246 hl_handle_critical_hw_err(hdev, event_type, &event_mask);
10247
10248 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
10249 hl_device_cond_reset(hdev, reset_flags, event_mask);
10250 }
10251
gaudi2_memset_memory_chunk_using_edma_qm(struct hl_device * hdev,struct packet_lin_dma * lin_dma_pkt,dma_addr_t pkt_dma_addr,u32 hw_queue_id,u32 size,u64 addr,u32 val)10252 static int gaudi2_memset_memory_chunk_using_edma_qm(struct hl_device *hdev,
10253 struct packet_lin_dma *lin_dma_pkt, dma_addr_t pkt_dma_addr,
10254 u32 hw_queue_id, u32 size, u64 addr, u32 val)
10255 {
10256 u32 ctl, pkt_size;
10257 int rc = 0;
10258
10259 ctl = FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
10260 ctl |= FIELD_PREP(GAUDI2_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
10261 ctl |= FIELD_PREP(GAUDI2_PKT_LIN_DMA_CTL_WRCOMP_MASK, 1);
10262 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 1);
10263
10264 lin_dma_pkt->ctl = cpu_to_le32(ctl);
10265 lin_dma_pkt->src_addr = cpu_to_le64(val);
10266 lin_dma_pkt->dst_addr = cpu_to_le64(addr);
10267 lin_dma_pkt->tsize = cpu_to_le32(size);
10268
10269 pkt_size = sizeof(struct packet_lin_dma);
10270
10271 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, pkt_dma_addr);
10272 if (rc)
10273 dev_err(hdev->dev, "Failed to send lin dma packet to H/W queue %d\n",
10274 hw_queue_id);
10275
10276 return rc;
10277 }
10278
gaudi2_memset_device_memory(struct hl_device * hdev,u64 addr,u64 size,u64 val)10279 static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, u64 val)
10280 {
10281 u32 edma_queues_id[] = {GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0,
10282 GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0,
10283 GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0,
10284 GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0};
10285 u32 chunk_size, dcore, edma_idx, sob_offset, sob_addr, comp_val,
10286 old_mmubp, mmubp, num_of_pkts, busy, pkt_size;
10287 u64 comp_addr, cur_addr = addr, end_addr = addr + size;
10288 struct asic_fixed_properties *prop = &hdev->asic_prop;
10289 void *lin_dma_pkts_arr;
10290 dma_addr_t pkt_dma_addr;
10291 int rc = 0, dma_num = 0;
10292
10293 if (prop->edma_enabled_mask == 0) {
10294 dev_info(hdev->dev, "non of the EDMA engines is enabled - skip dram scrubbing\n");
10295 return -EIO;
10296 }
10297
10298 sob_offset = hdev->asic_prop.first_available_user_sob[0] * 4;
10299 sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
10300 comp_addr = CFG_BASE + sob_addr;
10301 comp_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1) |
10302 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1);
10303 mmubp = FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_WR_MASK, 1) |
10304 FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_RD_MASK, 1);
10305
10306 /* Calculate how many lin dma pkts we'll need */
10307 num_of_pkts = div64_u64(round_up(size, SZ_2G), SZ_2G);
10308 pkt_size = sizeof(struct packet_lin_dma);
10309
10310 lin_dma_pkts_arr = hl_asic_dma_alloc_coherent(hdev, pkt_size * num_of_pkts,
10311 &pkt_dma_addr, GFP_KERNEL);
10312 if (!lin_dma_pkts_arr)
10313 return -ENOMEM;
10314
10315 /*
10316 * set mmu bypass for the scrubbing - all ddmas are configured the same so save
10317 * only the first one to restore later
10318 * also set the sob addr for all edma cores for completion.
10319 * set QM as trusted to allow it to access physical address with MMU bp.
10320 */
10321 old_mmubp = RREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP);
10322 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
10323 for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) {
10324 u32 edma_offset = dcore * DCORE_OFFSET + edma_idx * DCORE_EDMA_OFFSET;
10325 u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx;
10326
10327 if (!(prop->edma_enabled_mask & BIT(edma_bit)))
10328 continue;
10329
10330 WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP +
10331 edma_offset, mmubp);
10332 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_LO + edma_offset,
10333 lower_32_bits(comp_addr));
10334 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_HI + edma_offset,
10335 upper_32_bits(comp_addr));
10336 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_WDATA + edma_offset,
10337 comp_val);
10338 gaudi2_qman_set_test_mode(hdev,
10339 edma_queues_id[dcore] + 4 * edma_idx, true);
10340 }
10341 }
10342
10343 WREG32(sob_addr, 0);
10344
10345 while (cur_addr < end_addr) {
10346 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
10347 for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) {
10348 u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx;
10349
10350 if (!(prop->edma_enabled_mask & BIT(edma_bit)))
10351 continue;
10352
10353 chunk_size = min_t(u64, SZ_2G, end_addr - cur_addr);
10354
10355 rc = gaudi2_memset_memory_chunk_using_edma_qm(hdev,
10356 (struct packet_lin_dma *)lin_dma_pkts_arr + dma_num,
10357 pkt_dma_addr + dma_num * pkt_size,
10358 edma_queues_id[dcore] + edma_idx * 4,
10359 chunk_size, cur_addr, val);
10360 if (rc)
10361 goto end;
10362
10363 dma_num++;
10364 cur_addr += chunk_size;
10365 if (cur_addr == end_addr)
10366 break;
10367 }
10368 }
10369 }
10370
10371 rc = hl_poll_timeout(hdev, sob_addr, busy, (busy == dma_num), 1000, 1000000);
10372 if (rc) {
10373 dev_err(hdev->dev, "DMA Timeout during HBM scrubbing\n");
10374 goto end;
10375 }
10376 end:
10377 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
10378 for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) {
10379 u32 edma_offset = dcore * DCORE_OFFSET + edma_idx * DCORE_EDMA_OFFSET;
10380 u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx;
10381
10382 if (!(prop->edma_enabled_mask & BIT(edma_bit)))
10383 continue;
10384
10385 WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP + edma_offset, old_mmubp);
10386 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_LO + edma_offset, 0);
10387 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_HI + edma_offset, 0);
10388 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_WDATA + edma_offset, 0);
10389 gaudi2_qman_set_test_mode(hdev,
10390 edma_queues_id[dcore] + 4 * edma_idx, false);
10391 }
10392 }
10393
10394 WREG32(sob_addr, 0);
10395 hl_asic_dma_free_coherent(hdev, pkt_size * num_of_pkts, lin_dma_pkts_arr, pkt_dma_addr);
10396
10397 return rc;
10398 }
10399
gaudi2_scrub_device_dram(struct hl_device * hdev,u64 val)10400 static int gaudi2_scrub_device_dram(struct hl_device *hdev, u64 val)
10401 {
10402 int rc;
10403 struct asic_fixed_properties *prop = &hdev->asic_prop;
10404 u64 size = prop->dram_end_address - prop->dram_user_base_address;
10405
10406 rc = gaudi2_memset_device_memory(hdev, prop->dram_user_base_address, size, val);
10407
10408 if (rc)
10409 dev_err(hdev->dev, "Failed to scrub dram, address: 0x%llx size: %llu\n",
10410 prop->dram_user_base_address, size);
10411 return rc;
10412 }
10413
gaudi2_scrub_device_mem(struct hl_device * hdev)10414 static int gaudi2_scrub_device_mem(struct hl_device *hdev)
10415 {
10416 int rc;
10417 struct asic_fixed_properties *prop = &hdev->asic_prop;
10418 u64 val = hdev->memory_scrub_val;
10419 u64 addr, size;
10420
10421 if (!hdev->memory_scrub)
10422 return 0;
10423
10424 /* scrub SRAM */
10425 addr = prop->sram_user_base_address;
10426 size = hdev->pldm ? 0x10000 : (prop->sram_size - SRAM_USER_BASE_OFFSET);
10427 dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx, val: 0x%llx\n",
10428 addr, addr + size, val);
10429 rc = gaudi2_memset_device_memory(hdev, addr, size, val);
10430 if (rc) {
10431 dev_err(hdev->dev, "scrubbing SRAM failed (%d)\n", rc);
10432 return rc;
10433 }
10434
10435 /* scrub DRAM */
10436 rc = gaudi2_scrub_device_dram(hdev, val);
10437 if (rc) {
10438 dev_err(hdev->dev, "scrubbing DRAM failed (%d)\n", rc);
10439 return rc;
10440 }
10441 return 0;
10442 }
10443
gaudi2_restore_user_sm_registers(struct hl_device * hdev)10444 static void gaudi2_restore_user_sm_registers(struct hl_device *hdev)
10445 {
10446 u64 addr, mon_sts_addr, mon_cfg_addr, cq_lbw_l_addr, cq_lbw_h_addr,
10447 cq_lbw_data_addr, cq_base_l_addr, cq_base_h_addr, cq_size_addr;
10448 u32 val, size, offset;
10449 int dcore_id;
10450
10451 offset = hdev->asic_prop.first_available_cq[0] * 4;
10452 cq_lbw_l_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + offset;
10453 cq_lbw_h_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 + offset;
10454 cq_lbw_data_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0 + offset;
10455 cq_base_l_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + offset;
10456 cq_base_h_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + offset;
10457 cq_size_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + offset;
10458 size = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 -
10459 (mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + offset);
10460
10461 /* memset dcore0 CQ registers */
10462 gaudi2_memset_device_lbw(hdev, cq_lbw_l_addr, size, 0);
10463 gaudi2_memset_device_lbw(hdev, cq_lbw_h_addr, size, 0);
10464 gaudi2_memset_device_lbw(hdev, cq_lbw_data_addr, size, 0);
10465 gaudi2_memset_device_lbw(hdev, cq_base_l_addr, size, 0);
10466 gaudi2_memset_device_lbw(hdev, cq_base_h_addr, size, 0);
10467 gaudi2_memset_device_lbw(hdev, cq_size_addr, size, 0);
10468
10469 cq_lbw_l_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + DCORE_OFFSET;
10470 cq_lbw_h_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 + DCORE_OFFSET;
10471 cq_lbw_data_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0 + DCORE_OFFSET;
10472 cq_base_l_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + DCORE_OFFSET;
10473 cq_base_h_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + DCORE_OFFSET;
10474 cq_size_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + DCORE_OFFSET;
10475 size = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 - mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0;
10476
10477 for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
10478 gaudi2_memset_device_lbw(hdev, cq_lbw_l_addr, size, 0);
10479 gaudi2_memset_device_lbw(hdev, cq_lbw_h_addr, size, 0);
10480 gaudi2_memset_device_lbw(hdev, cq_lbw_data_addr, size, 0);
10481 gaudi2_memset_device_lbw(hdev, cq_base_l_addr, size, 0);
10482 gaudi2_memset_device_lbw(hdev, cq_base_h_addr, size, 0);
10483 gaudi2_memset_device_lbw(hdev, cq_size_addr, size, 0);
10484
10485 cq_lbw_l_addr += DCORE_OFFSET;
10486 cq_lbw_h_addr += DCORE_OFFSET;
10487 cq_lbw_data_addr += DCORE_OFFSET;
10488 cq_base_l_addr += DCORE_OFFSET;
10489 cq_base_h_addr += DCORE_OFFSET;
10490 cq_size_addr += DCORE_OFFSET;
10491 }
10492
10493 offset = hdev->asic_prop.first_available_user_mon[0] * 4;
10494 addr = mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + offset;
10495 val = 1 << DCORE0_SYNC_MNGR_OBJS_MON_STATUS_PROT_SHIFT;
10496 size = mmDCORE0_SYNC_MNGR_OBJS_SM_SEC_0 - (mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + offset);
10497
10498 /* memset dcore0 monitors */
10499 gaudi2_memset_device_lbw(hdev, addr, size, val);
10500
10501 addr = mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + offset;
10502 gaudi2_memset_device_lbw(hdev, addr, size, 0);
10503
10504 mon_sts_addr = mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + DCORE_OFFSET;
10505 mon_cfg_addr = mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + DCORE_OFFSET;
10506 size = mmDCORE0_SYNC_MNGR_OBJS_SM_SEC_0 - mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0;
10507
10508 for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
10509 gaudi2_memset_device_lbw(hdev, mon_sts_addr, size, val);
10510 gaudi2_memset_device_lbw(hdev, mon_cfg_addr, size, 0);
10511 mon_sts_addr += DCORE_OFFSET;
10512 mon_cfg_addr += DCORE_OFFSET;
10513 }
10514
10515 offset = hdev->asic_prop.first_available_user_sob[0] * 4;
10516 addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset;
10517 val = 0;
10518 size = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 -
10519 (mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset);
10520
10521 /* memset dcore0 sobs */
10522 gaudi2_memset_device_lbw(hdev, addr, size, val);
10523
10524 addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + DCORE_OFFSET;
10525 size = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 - mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0;
10526
10527 for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
10528 gaudi2_memset_device_lbw(hdev, addr, size, val);
10529 addr += DCORE_OFFSET;
10530 }
10531
10532 /* Flush all WREG to prevent race */
10533 val = RREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset);
10534 }
10535
gaudi2_restore_user_qm_registers(struct hl_device * hdev)10536 static void gaudi2_restore_user_qm_registers(struct hl_device *hdev)
10537 {
10538 u32 reg_base, hw_queue_id;
10539
10540 for (hw_queue_id = GAUDI2_QUEUE_ID_PDMA_0_0 ; hw_queue_id <= GAUDI2_QUEUE_ID_ROT_1_0;
10541 hw_queue_id += NUM_OF_PQ_PER_QMAN) {
10542 if (!gaudi2_is_queue_enabled(hdev, hw_queue_id))
10543 continue;
10544
10545 gaudi2_clear_qm_fence_counters_common(hdev, hw_queue_id, false);
10546
10547 reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
10548 WREG32(reg_base + QM_ARB_CFG_0_OFFSET, 0);
10549 }
10550
10551 /* Flush all WREG to prevent race */
10552 RREG32(mmPDMA0_QM_ARB_CFG_0);
10553 }
10554
gaudi2_restore_nic_qm_registers(struct hl_device * hdev)10555 static void gaudi2_restore_nic_qm_registers(struct hl_device *hdev)
10556 {
10557 u32 reg_base, hw_queue_id;
10558
10559 for (hw_queue_id = GAUDI2_QUEUE_ID_NIC_0_0 ; hw_queue_id <= GAUDI2_QUEUE_ID_NIC_23_3;
10560 hw_queue_id += NUM_OF_PQ_PER_QMAN) {
10561 if (!gaudi2_is_queue_enabled(hdev, hw_queue_id))
10562 continue;
10563
10564 gaudi2_clear_qm_fence_counters_common(hdev, hw_queue_id, false);
10565
10566 reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
10567 WREG32(reg_base + QM_ARB_CFG_0_OFFSET, 0);
10568 }
10569
10570 /* Flush all WREG to prevent race */
10571 RREG32(mmPDMA0_QM_ARB_CFG_0);
10572 }
10573
gaudi2_context_switch(struct hl_device * hdev,u32 asid)10574 static int gaudi2_context_switch(struct hl_device *hdev, u32 asid)
10575 {
10576 return 0;
10577 }
10578
gaudi2_restore_phase_topology(struct hl_device * hdev)10579 static void gaudi2_restore_phase_topology(struct hl_device *hdev)
10580 {
10581 }
10582
gaudi2_init_block_instances(struct hl_device * hdev,u32 block_idx,struct dup_block_ctx * cfg_ctx)10583 static void gaudi2_init_block_instances(struct hl_device *hdev, u32 block_idx,
10584 struct dup_block_ctx *cfg_ctx)
10585 {
10586 u64 block_base = cfg_ctx->base + block_idx * cfg_ctx->block_off;
10587 u8 seq;
10588 int i;
10589
10590 for (i = 0 ; i < cfg_ctx->instances ; i++) {
10591 seq = block_idx * cfg_ctx->instances + i;
10592
10593 /* skip disabled instance */
10594 if (!(cfg_ctx->enabled_mask & BIT_ULL(seq)))
10595 continue;
10596
10597 cfg_ctx->instance_cfg_fn(hdev, block_base + i * cfg_ctx->instance_off,
10598 cfg_ctx->data);
10599 }
10600 }
10601
gaudi2_init_blocks_with_mask(struct hl_device * hdev,struct dup_block_ctx * cfg_ctx,u64 mask)10602 static void gaudi2_init_blocks_with_mask(struct hl_device *hdev, struct dup_block_ctx *cfg_ctx,
10603 u64 mask)
10604 {
10605 int i;
10606
10607 cfg_ctx->enabled_mask = mask;
10608
10609 for (i = 0 ; i < cfg_ctx->blocks ; i++)
10610 gaudi2_init_block_instances(hdev, i, cfg_ctx);
10611 }
10612
gaudi2_init_blocks(struct hl_device * hdev,struct dup_block_ctx * cfg_ctx)10613 void gaudi2_init_blocks(struct hl_device *hdev, struct dup_block_ctx *cfg_ctx)
10614 {
10615 gaudi2_init_blocks_with_mask(hdev, cfg_ctx, U64_MAX);
10616 }
10617
gaudi2_debugfs_read_dma(struct hl_device * hdev,u64 addr,u32 size,void * blob_addr)10618 static int gaudi2_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, void *blob_addr)
10619 {
10620 void *host_mem_virtual_addr;
10621 dma_addr_t host_mem_dma_addr;
10622 u64 reserved_va_base;
10623 u32 pos, size_left, size_to_dma;
10624 struct hl_ctx *ctx;
10625 int rc = 0;
10626
10627 /* Fetch the ctx */
10628 ctx = hl_get_compute_ctx(hdev);
10629 if (!ctx) {
10630 dev_err(hdev->dev, "No ctx available\n");
10631 return -EINVAL;
10632 }
10633
10634 /* Allocate buffers for read and for poll */
10635 host_mem_virtual_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &host_mem_dma_addr,
10636 GFP_KERNEL | __GFP_ZERO);
10637 if (host_mem_virtual_addr == NULL) {
10638 dev_err(hdev->dev, "Failed to allocate memory for KDMA read\n");
10639 rc = -ENOMEM;
10640 goto put_ctx;
10641 }
10642
10643 /* Reserve VM region on asic side */
10644 reserved_va_base = hl_reserve_va_block(hdev, ctx, HL_VA_RANGE_TYPE_HOST, SZ_2M,
10645 HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
10646 if (!reserved_va_base) {
10647 dev_err(hdev->dev, "Failed to reserve vmem on asic\n");
10648 rc = -ENOMEM;
10649 goto free_data_buffer;
10650 }
10651
10652 /* Create mapping on asic side */
10653 mutex_lock(&hdev->mmu_lock);
10654
10655 rc = hl_mmu_map_contiguous(ctx, reserved_va_base, host_mem_dma_addr, SZ_2M);
10656 if (rc) {
10657 dev_err(hdev->dev, "Failed to create mapping on asic mmu\n");
10658 goto unreserve_va;
10659 }
10660
10661 rc = hl_mmu_invalidate_cache_range(hdev, false,
10662 MMU_OP_USERPTR | MMU_OP_SKIP_LOW_CACHE_INV,
10663 ctx->asid, reserved_va_base, SZ_2M);
10664 if (rc) {
10665 hl_mmu_unmap_contiguous(ctx, reserved_va_base, SZ_2M);
10666 goto unreserve_va;
10667 }
10668
10669 mutex_unlock(&hdev->mmu_lock);
10670
10671 /* Enable MMU on KDMA */
10672 gaudi2_kdma_set_mmbp_asid(hdev, false, ctx->asid);
10673
10674 pos = 0;
10675 size_left = size;
10676 size_to_dma = SZ_2M;
10677
10678 while (size_left > 0) {
10679 if (size_left < SZ_2M)
10680 size_to_dma = size_left;
10681
10682 rc = gaudi2_send_job_to_kdma(hdev, addr, reserved_va_base, size_to_dma, false);
10683 if (rc)
10684 break;
10685
10686 memcpy(blob_addr + pos, host_mem_virtual_addr, size_to_dma);
10687
10688 if (size_left <= SZ_2M)
10689 break;
10690
10691 pos += SZ_2M;
10692 addr += SZ_2M;
10693 size_left -= SZ_2M;
10694 }
10695
10696 gaudi2_kdma_set_mmbp_asid(hdev, true, HL_KERNEL_ASID_ID);
10697
10698 mutex_lock(&hdev->mmu_lock);
10699
10700 rc = hl_mmu_unmap_contiguous(ctx, reserved_va_base, SZ_2M);
10701 if (rc)
10702 goto unreserve_va;
10703
10704 rc = hl_mmu_invalidate_cache_range(hdev, false, MMU_OP_USERPTR,
10705 ctx->asid, reserved_va_base, SZ_2M);
10706
10707 unreserve_va:
10708 mutex_unlock(&hdev->mmu_lock);
10709 hl_unreserve_va_block(hdev, ctx, reserved_va_base, SZ_2M);
10710 free_data_buffer:
10711 hl_asic_dma_free_coherent(hdev, SZ_2M, host_mem_virtual_addr, host_mem_dma_addr);
10712 put_ctx:
10713 hl_ctx_put(ctx);
10714
10715 return rc;
10716 }
10717
gaudi2_internal_cb_pool_init(struct hl_device * hdev,struct hl_ctx * ctx)10718 static int gaudi2_internal_cb_pool_init(struct hl_device *hdev, struct hl_ctx *ctx)
10719 {
10720 struct gaudi2_device *gaudi2 = hdev->asic_specific;
10721 int min_alloc_order, rc;
10722
10723 if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU))
10724 return 0;
10725
10726 hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev,
10727 HOST_SPACE_INTERNAL_CB_SZ,
10728 &hdev->internal_cb_pool_dma_addr,
10729 GFP_KERNEL | __GFP_ZERO);
10730
10731 if (!hdev->internal_cb_pool_virt_addr)
10732 return -ENOMEM;
10733
10734 min_alloc_order = ilog2(min(gaudi2_get_signal_cb_size(hdev),
10735 gaudi2_get_wait_cb_size(hdev)));
10736
10737 hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
10738 if (!hdev->internal_cb_pool) {
10739 dev_err(hdev->dev, "Failed to create internal CB pool\n");
10740 rc = -ENOMEM;
10741 goto free_internal_cb_pool;
10742 }
10743
10744 rc = gen_pool_add(hdev->internal_cb_pool, (uintptr_t) hdev->internal_cb_pool_virt_addr,
10745 HOST_SPACE_INTERNAL_CB_SZ, -1);
10746 if (rc) {
10747 dev_err(hdev->dev, "Failed to add memory to internal CB pool\n");
10748 rc = -EFAULT;
10749 goto destroy_internal_cb_pool;
10750 }
10751
10752 hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx, HL_VA_RANGE_TYPE_HOST,
10753 HOST_SPACE_INTERNAL_CB_SZ, HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
10754
10755 if (!hdev->internal_cb_va_base) {
10756 rc = -ENOMEM;
10757 goto destroy_internal_cb_pool;
10758 }
10759
10760 mutex_lock(&hdev->mmu_lock);
10761
10762 rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base, hdev->internal_cb_pool_dma_addr,
10763 HOST_SPACE_INTERNAL_CB_SZ);
10764 if (rc)
10765 goto unreserve_internal_cb_pool;
10766
10767 rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
10768 if (rc)
10769 goto unmap_internal_cb_pool;
10770
10771 mutex_unlock(&hdev->mmu_lock);
10772
10773 return 0;
10774
10775 unmap_internal_cb_pool:
10776 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
10777 unreserve_internal_cb_pool:
10778 mutex_unlock(&hdev->mmu_lock);
10779 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
10780 destroy_internal_cb_pool:
10781 gen_pool_destroy(hdev->internal_cb_pool);
10782 free_internal_cb_pool:
10783 hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
10784 hdev->internal_cb_pool_dma_addr);
10785
10786 return rc;
10787 }
10788
gaudi2_internal_cb_pool_fini(struct hl_device * hdev,struct hl_ctx * ctx)10789 static void gaudi2_internal_cb_pool_fini(struct hl_device *hdev, struct hl_ctx *ctx)
10790 {
10791 struct gaudi2_device *gaudi2 = hdev->asic_specific;
10792
10793 if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU))
10794 return;
10795
10796 mutex_lock(&hdev->mmu_lock);
10797 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
10798 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
10799 hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
10800 mutex_unlock(&hdev->mmu_lock);
10801
10802 gen_pool_destroy(hdev->internal_cb_pool);
10803
10804 hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
10805 hdev->internal_cb_pool_dma_addr);
10806 }
10807
gaudi2_restore_user_registers(struct hl_device * hdev)10808 static void gaudi2_restore_user_registers(struct hl_device *hdev)
10809 {
10810 gaudi2_restore_user_sm_registers(hdev);
10811 gaudi2_restore_user_qm_registers(hdev);
10812 }
10813
gaudi2_map_virtual_msix_doorbell_memory(struct hl_ctx * ctx)10814 static int gaudi2_map_virtual_msix_doorbell_memory(struct hl_ctx *ctx)
10815 {
10816 struct hl_device *hdev = ctx->hdev;
10817 struct asic_fixed_properties *prop = &hdev->asic_prop;
10818 struct gaudi2_device *gaudi2 = hdev->asic_specific;
10819 int rc;
10820
10821 rc = hl_mmu_map_page(ctx, RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START,
10822 gaudi2->virt_msix_db_dma_addr, prop->pmmu.page_size, true);
10823 if (rc)
10824 dev_err(hdev->dev, "Failed to map VA %#llx for virtual MSI-X doorbell memory\n",
10825 RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START);
10826
10827 return rc;
10828 }
10829
gaudi2_unmap_virtual_msix_doorbell_memory(struct hl_ctx * ctx)10830 static void gaudi2_unmap_virtual_msix_doorbell_memory(struct hl_ctx *ctx)
10831 {
10832 struct hl_device *hdev = ctx->hdev;
10833 struct asic_fixed_properties *prop = &hdev->asic_prop;
10834 int rc;
10835
10836 rc = hl_mmu_unmap_page(ctx, RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START,
10837 prop->pmmu.page_size, true);
10838 if (rc)
10839 dev_err(hdev->dev, "Failed to unmap VA %#llx of virtual MSI-X doorbell memory\n",
10840 RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START);
10841 }
10842
gaudi2_ctx_init(struct hl_ctx * ctx)10843 static int gaudi2_ctx_init(struct hl_ctx *ctx)
10844 {
10845 int rc;
10846
10847 if (ctx->asid == HL_KERNEL_ASID_ID)
10848 return 0;
10849
10850 rc = gaudi2_mmu_prepare(ctx->hdev, ctx->asid);
10851 if (rc)
10852 return rc;
10853
10854 /* No need to clear user registers if the device has just
10855 * performed reset, we restore only nic qm registers
10856 */
10857 if (ctx->hdev->reset_upon_device_release)
10858 gaudi2_restore_nic_qm_registers(ctx->hdev);
10859 else
10860 gaudi2_restore_user_registers(ctx->hdev);
10861
10862 rc = gaudi2_internal_cb_pool_init(ctx->hdev, ctx);
10863 if (rc)
10864 return rc;
10865
10866 rc = gaudi2_map_virtual_msix_doorbell_memory(ctx);
10867 if (rc)
10868 gaudi2_internal_cb_pool_fini(ctx->hdev, ctx);
10869
10870 return rc;
10871 }
10872
gaudi2_ctx_fini(struct hl_ctx * ctx)10873 static void gaudi2_ctx_fini(struct hl_ctx *ctx)
10874 {
10875 if (ctx->asid == HL_KERNEL_ASID_ID)
10876 return;
10877
10878 gaudi2_internal_cb_pool_fini(ctx->hdev, ctx);
10879
10880 gaudi2_unmap_virtual_msix_doorbell_memory(ctx);
10881 }
10882
gaudi2_pre_schedule_cs(struct hl_cs * cs)10883 static int gaudi2_pre_schedule_cs(struct hl_cs *cs)
10884 {
10885 struct hl_device *hdev = cs->ctx->hdev;
10886 int index = cs->sequence & (hdev->asic_prop.max_pending_cs - 1);
10887 u32 mon_payload, sob_id, mon_id;
10888
10889 if (!cs_needs_completion(cs))
10890 return 0;
10891
10892 /*
10893 * First 64 SOB/MON are reserved for driver for QMAN auto completion
10894 * mechanism. Each SOB/MON pair are used for a pending CS with the same
10895 * cyclic index. The SOB value is increased when each of the CS jobs is
10896 * completed. When the SOB reaches the number of CS jobs, the monitor
10897 * generates MSI-X interrupt.
10898 */
10899
10900 sob_id = mon_id = index;
10901 mon_payload = (1 << CQ_ENTRY_SHADOW_INDEX_VALID_SHIFT) |
10902 (1 << CQ_ENTRY_READY_SHIFT) | index;
10903
10904 gaudi2_arm_cq_monitor(hdev, sob_id, mon_id, GAUDI2_RESERVED_CQ_CS_COMPLETION, mon_payload,
10905 cs->jobs_cnt);
10906
10907 return 0;
10908 }
10909
gaudi2_get_queue_id_for_cq(struct hl_device * hdev,u32 cq_idx)10910 static u32 gaudi2_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
10911 {
10912 return HL_INVALID_QUEUE;
10913 }
10914
gaudi2_gen_signal_cb(struct hl_device * hdev,void * data,u16 sob_id,u32 size,bool eb)10915 static u32 gaudi2_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, u32 size, bool eb)
10916 {
10917 struct hl_cb *cb = data;
10918 struct packet_msg_short *pkt;
10919 u32 value, ctl, pkt_size = sizeof(*pkt);
10920
10921 pkt = (struct packet_msg_short *) (uintptr_t) (cb->kernel_address + size);
10922 memset(pkt, 0, pkt_size);
10923
10924 /* Inc by 1, Mode ADD */
10925 value = FIELD_PREP(GAUDI2_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
10926 value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
10927
10928 ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
10929 ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 1); /* SOB base */
10930 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
10931 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, eb);
10932 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1);
10933
10934 pkt->value = cpu_to_le32(value);
10935 pkt->ctl = cpu_to_le32(ctl);
10936
10937 return size + pkt_size;
10938 }
10939
gaudi2_add_mon_msg_short(struct packet_msg_short * pkt,u32 value,u16 addr)10940 static u32 gaudi2_add_mon_msg_short(struct packet_msg_short *pkt, u32 value, u16 addr)
10941 {
10942 u32 ctl, pkt_size = sizeof(*pkt);
10943
10944 memset(pkt, 0, pkt_size);
10945
10946 ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, addr);
10947 ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 0); /* MON base */
10948 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
10949 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0);
10950 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 0);
10951
10952 pkt->value = cpu_to_le32(value);
10953 pkt->ctl = cpu_to_le32(ctl);
10954
10955 return pkt_size;
10956 }
10957
gaudi2_add_arm_monitor_pkt(struct hl_device * hdev,struct packet_msg_short * pkt,u16 sob_base,u8 sob_mask,u16 sob_val,u16 addr)10958 static u32 gaudi2_add_arm_monitor_pkt(struct hl_device *hdev, struct packet_msg_short *pkt,
10959 u16 sob_base, u8 sob_mask, u16 sob_val, u16 addr)
10960 {
10961 u32 ctl, value, pkt_size = sizeof(*pkt);
10962 u8 mask;
10963
10964 if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
10965 dev_err(hdev->dev, "sob_base %u (mask %#x) is not valid\n", sob_base, sob_mask);
10966 return 0;
10967 }
10968
10969 memset(pkt, 0, pkt_size);
10970
10971 value = FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
10972 value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
10973 value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_MODE_MASK, 0); /* GREATER OR EQUAL*/
10974 value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_MASK_MASK, mask);
10975
10976 ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, addr);
10977 ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 0); /* MON base */
10978 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
10979 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0);
10980 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1);
10981
10982 pkt->value = cpu_to_le32(value);
10983 pkt->ctl = cpu_to_le32(ctl);
10984
10985 return pkt_size;
10986 }
10987
gaudi2_add_fence_pkt(struct packet_fence * pkt)10988 static u32 gaudi2_add_fence_pkt(struct packet_fence *pkt)
10989 {
10990 u32 ctl, cfg, pkt_size = sizeof(*pkt);
10991
10992 memset(pkt, 0, pkt_size);
10993
10994 cfg = FIELD_PREP(GAUDI2_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
10995 cfg |= FIELD_PREP(GAUDI2_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
10996 cfg |= FIELD_PREP(GAUDI2_PKT_FENCE_CFG_ID_MASK, 2);
10997
10998 ctl = FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
10999 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0);
11000 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1);
11001
11002 pkt->cfg = cpu_to_le32(cfg);
11003 pkt->ctl = cpu_to_le32(ctl);
11004
11005 return pkt_size;
11006 }
11007
gaudi2_gen_wait_cb(struct hl_device * hdev,struct hl_gen_wait_properties * prop)11008 static u32 gaudi2_gen_wait_cb(struct hl_device *hdev, struct hl_gen_wait_properties *prop)
11009 {
11010 struct hl_cb *cb = prop->data;
11011 void *buf = (void *) (uintptr_t) (cb->kernel_address);
11012
11013 u64 monitor_base, fence_addr = 0;
11014 u32 stream_index, size = prop->size;
11015 u16 msg_addr_offset;
11016
11017 stream_index = prop->q_idx % 4;
11018 fence_addr = CFG_BASE + gaudi2_qm_blocks_bases[prop->q_idx] +
11019 QM_FENCE2_OFFSET + stream_index * 4;
11020
11021 /*
11022 * monitor_base should be the content of the base0 address registers,
11023 * so it will be added to the msg short offsets
11024 */
11025 monitor_base = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
11026
11027 /* First monitor config packet: low address of the sync */
11028 msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + prop->mon_id * 4) -
11029 monitor_base;
11030
11031 size += gaudi2_add_mon_msg_short(buf + size, (u32) fence_addr, msg_addr_offset);
11032
11033 /* Second monitor config packet: high address of the sync */
11034 msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + prop->mon_id * 4) -
11035 monitor_base;
11036
11037 size += gaudi2_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32), msg_addr_offset);
11038
11039 /*
11040 * Third monitor config packet: the payload, i.e. what to write when the
11041 * sync triggers
11042 */
11043 msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + prop->mon_id * 4) -
11044 monitor_base;
11045
11046 size += gaudi2_add_mon_msg_short(buf + size, 1, msg_addr_offset);
11047
11048 /* Fourth monitor config packet: bind the monitor to a sync object */
11049 msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + prop->mon_id * 4) - monitor_base;
11050
11051 size += gaudi2_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base, prop->sob_mask,
11052 prop->sob_val, msg_addr_offset);
11053
11054 /* Fence packet */
11055 size += gaudi2_add_fence_pkt(buf + size);
11056
11057 return size;
11058 }
11059
gaudi2_reset_sob(struct hl_device * hdev,void * data)11060 static void gaudi2_reset_sob(struct hl_device *hdev, void *data)
11061 {
11062 struct hl_hw_sob *hw_sob = data;
11063
11064 dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx, hw_sob->sob_id);
11065
11066 WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + hw_sob->sob_id * 4, 0);
11067
11068 kref_init(&hw_sob->kref);
11069 }
11070
gaudi2_reset_sob_group(struct hl_device * hdev,u16 sob_group)11071 static void gaudi2_reset_sob_group(struct hl_device *hdev, u16 sob_group)
11072 {
11073 }
11074
gaudi2_get_device_time(struct hl_device * hdev)11075 static u64 gaudi2_get_device_time(struct hl_device *hdev)
11076 {
11077 u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
11078
11079 return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
11080 }
11081
gaudi2_collective_wait_init_cs(struct hl_cs * cs)11082 static int gaudi2_collective_wait_init_cs(struct hl_cs *cs)
11083 {
11084 return 0;
11085 }
11086
gaudi2_collective_wait_create_jobs(struct hl_device * hdev,struct hl_ctx * ctx,struct hl_cs * cs,u32 wait_queue_id,u32 collective_engine_id,u32 encaps_signal_offset)11087 static int gaudi2_collective_wait_create_jobs(struct hl_device *hdev, struct hl_ctx *ctx,
11088 struct hl_cs *cs, u32 wait_queue_id,
11089 u32 collective_engine_id, u32 encaps_signal_offset)
11090 {
11091 return -EINVAL;
11092 }
11093
11094 /*
11095 * hl_mmu_scramble - converts a dram (non power of 2) page-size aligned address
11096 * to DMMU page-size address (64MB) before mapping it in
11097 * the MMU.
11098 * The operation is performed on both the virtual and physical addresses.
11099 * for device with 6 HBMs the scramble is:
11100 * (addr[47:0] / 48M) * 64M + addr % 48M + addr[63:48]
11101 *
11102 * Example:
11103 * =============================================================================
11104 * Allocated DRAM Reserved VA scrambled VA for MMU mapping Scrambled PA
11105 * Phys address in MMU last
11106 * HOP
11107 * =============================================================================
11108 * PA1 0x3000000 VA1 0x9C000000 SVA1= (VA1/48M)*64M 0xD0000000 <- PA1/48M 0x1
11109 * PA2 0x9000000 VA2 0x9F000000 SVA2= (VA2/48M)*64M 0xD4000000 <- PA2/48M 0x3
11110 * =============================================================================
11111 */
gaudi2_mmu_scramble_addr(struct hl_device * hdev,u64 raw_addr)11112 static u64 gaudi2_mmu_scramble_addr(struct hl_device *hdev, u64 raw_addr)
11113 {
11114 struct asic_fixed_properties *prop = &hdev->asic_prop;
11115 u32 divisor, mod_va;
11116 u64 div_va;
11117
11118 /* accept any address in the DRAM address space */
11119 if (hl_mem_area_inside_range(raw_addr, sizeof(raw_addr), DRAM_PHYS_BASE,
11120 VA_HBM_SPACE_END)) {
11121
11122 divisor = prop->num_functional_hbms * GAUDI2_HBM_MMU_SCRM_MEM_SIZE;
11123 div_va = div_u64_rem(raw_addr & GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK, divisor, &mod_va);
11124 return (raw_addr & ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK) |
11125 (div_va << GAUDI2_HBM_MMU_SCRM_DIV_SHIFT) |
11126 (mod_va << GAUDI2_HBM_MMU_SCRM_MOD_SHIFT);
11127 }
11128
11129 return raw_addr;
11130 }
11131
gaudi2_mmu_descramble_addr(struct hl_device * hdev,u64 scrambled_addr)11132 static u64 gaudi2_mmu_descramble_addr(struct hl_device *hdev, u64 scrambled_addr)
11133 {
11134 struct asic_fixed_properties *prop = &hdev->asic_prop;
11135 u32 divisor, mod_va;
11136 u64 div_va;
11137
11138 /* accept any address in the DRAM address space */
11139 if (hl_mem_area_inside_range(scrambled_addr, sizeof(scrambled_addr), DRAM_PHYS_BASE,
11140 VA_HBM_SPACE_END)) {
11141
11142 divisor = prop->num_functional_hbms * GAUDI2_HBM_MMU_SCRM_MEM_SIZE;
11143 div_va = div_u64_rem(scrambled_addr & GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK,
11144 PAGE_SIZE_64MB, &mod_va);
11145
11146 return ((scrambled_addr & ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK) +
11147 (div_va * divisor + mod_va));
11148 }
11149
11150 return scrambled_addr;
11151 }
11152
gaudi2_get_dec_base_addr(struct hl_device * hdev,u32 core_id)11153 static u32 gaudi2_get_dec_base_addr(struct hl_device *hdev, u32 core_id)
11154 {
11155 u32 base = 0, dcore_id, dec_id;
11156
11157 if (core_id >= NUMBER_OF_DEC) {
11158 dev_err(hdev->dev, "Unexpected core number %d for DEC\n", core_id);
11159 goto out;
11160 }
11161
11162 if (core_id < 8) {
11163 dcore_id = core_id / NUM_OF_DEC_PER_DCORE;
11164 dec_id = core_id % NUM_OF_DEC_PER_DCORE;
11165
11166 base = mmDCORE0_DEC0_CMD_BASE + dcore_id * DCORE_OFFSET +
11167 dec_id * DCORE_VDEC_OFFSET;
11168 } else {
11169 /* PCIe Shared Decoder */
11170 base = mmPCIE_DEC0_CMD_BASE + ((core_id % 8) * PCIE_VDEC_OFFSET);
11171 }
11172 out:
11173 return base;
11174 }
11175
gaudi2_get_hw_block_id(struct hl_device * hdev,u64 block_addr,u32 * block_size,u32 * block_id)11176 static int gaudi2_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
11177 u32 *block_size, u32 *block_id)
11178 {
11179 struct gaudi2_device *gaudi2 = hdev->asic_specific;
11180 int i;
11181
11182 for (i = 0 ; i < NUM_USER_MAPPED_BLOCKS ; i++) {
11183 if (block_addr == CFG_BASE + gaudi2->mapped_blocks[i].address) {
11184 *block_id = i;
11185 if (block_size)
11186 *block_size = gaudi2->mapped_blocks[i].size;
11187 return 0;
11188 }
11189 }
11190
11191 dev_err(hdev->dev, "Invalid block address %#llx", block_addr);
11192
11193 return -EINVAL;
11194 }
11195
gaudi2_block_mmap(struct hl_device * hdev,struct vm_area_struct * vma,u32 block_id,u32 block_size)11196 static int gaudi2_block_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
11197 u32 block_id, u32 block_size)
11198 {
11199 struct gaudi2_device *gaudi2 = hdev->asic_specific;
11200 u64 offset_in_bar;
11201 u64 address;
11202 int rc;
11203
11204 if (block_id >= NUM_USER_MAPPED_BLOCKS) {
11205 dev_err(hdev->dev, "Invalid block id %u", block_id);
11206 return -EINVAL;
11207 }
11208
11209 /* we allow mapping only an entire block */
11210 if (block_size != gaudi2->mapped_blocks[block_id].size) {
11211 dev_err(hdev->dev, "Invalid block size %u", block_size);
11212 return -EINVAL;
11213 }
11214
11215 offset_in_bar = CFG_BASE + gaudi2->mapped_blocks[block_id].address - STM_FLASH_BASE_ADDR;
11216
11217 address = pci_resource_start(hdev->pdev, SRAM_CFG_BAR_ID) + offset_in_bar;
11218
11219 vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
11220 VM_DONTCOPY | VM_NORESERVE);
11221
11222 rc = remap_pfn_range(vma, vma->vm_start, address >> PAGE_SHIFT,
11223 block_size, vma->vm_page_prot);
11224 if (rc)
11225 dev_err(hdev->dev, "remap_pfn_range error %d", rc);
11226
11227 return rc;
11228 }
11229
gaudi2_enable_events_from_fw(struct hl_device * hdev)11230 static void gaudi2_enable_events_from_fw(struct hl_device *hdev)
11231 {
11232 struct gaudi2_device *gaudi2 = hdev->asic_specific;
11233
11234 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
11235 u32 irq_handler_offset = le32_to_cpu(dyn_regs->gic_host_ints_irq);
11236
11237 if (gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)
11238 WREG32(irq_handler_offset,
11239 gaudi2_irq_map_table[GAUDI2_EVENT_CPU_INTS_REGISTER].cpu_id);
11240 }
11241
gaudi2_get_mmu_base(struct hl_device * hdev,u64 mmu_id,u32 * mmu_base)11242 static int gaudi2_get_mmu_base(struct hl_device *hdev, u64 mmu_id, u32 *mmu_base)
11243 {
11244 switch (mmu_id) {
11245 case HW_CAP_DCORE0_DMMU0:
11246 *mmu_base = mmDCORE0_HMMU0_MMU_BASE;
11247 break;
11248 case HW_CAP_DCORE0_DMMU1:
11249 *mmu_base = mmDCORE0_HMMU1_MMU_BASE;
11250 break;
11251 case HW_CAP_DCORE0_DMMU2:
11252 *mmu_base = mmDCORE0_HMMU2_MMU_BASE;
11253 break;
11254 case HW_CAP_DCORE0_DMMU3:
11255 *mmu_base = mmDCORE0_HMMU3_MMU_BASE;
11256 break;
11257 case HW_CAP_DCORE1_DMMU0:
11258 *mmu_base = mmDCORE1_HMMU0_MMU_BASE;
11259 break;
11260 case HW_CAP_DCORE1_DMMU1:
11261 *mmu_base = mmDCORE1_HMMU1_MMU_BASE;
11262 break;
11263 case HW_CAP_DCORE1_DMMU2:
11264 *mmu_base = mmDCORE1_HMMU2_MMU_BASE;
11265 break;
11266 case HW_CAP_DCORE1_DMMU3:
11267 *mmu_base = mmDCORE1_HMMU3_MMU_BASE;
11268 break;
11269 case HW_CAP_DCORE2_DMMU0:
11270 *mmu_base = mmDCORE2_HMMU0_MMU_BASE;
11271 break;
11272 case HW_CAP_DCORE2_DMMU1:
11273 *mmu_base = mmDCORE2_HMMU1_MMU_BASE;
11274 break;
11275 case HW_CAP_DCORE2_DMMU2:
11276 *mmu_base = mmDCORE2_HMMU2_MMU_BASE;
11277 break;
11278 case HW_CAP_DCORE2_DMMU3:
11279 *mmu_base = mmDCORE2_HMMU3_MMU_BASE;
11280 break;
11281 case HW_CAP_DCORE3_DMMU0:
11282 *mmu_base = mmDCORE3_HMMU0_MMU_BASE;
11283 break;
11284 case HW_CAP_DCORE3_DMMU1:
11285 *mmu_base = mmDCORE3_HMMU1_MMU_BASE;
11286 break;
11287 case HW_CAP_DCORE3_DMMU2:
11288 *mmu_base = mmDCORE3_HMMU2_MMU_BASE;
11289 break;
11290 case HW_CAP_DCORE3_DMMU3:
11291 *mmu_base = mmDCORE3_HMMU3_MMU_BASE;
11292 break;
11293 case HW_CAP_PMMU:
11294 *mmu_base = mmPMMU_HBW_MMU_BASE;
11295 break;
11296 default:
11297 return -EINVAL;
11298 }
11299
11300 return 0;
11301 }
11302
gaudi2_ack_mmu_error(struct hl_device * hdev,u64 mmu_id)11303 static void gaudi2_ack_mmu_error(struct hl_device *hdev, u64 mmu_id)
11304 {
11305 bool is_pmmu = (mmu_id == HW_CAP_PMMU);
11306 struct gaudi2_device *gaudi2 = hdev->asic_specific;
11307 u32 mmu_base;
11308
11309 if (!(gaudi2->hw_cap_initialized & mmu_id))
11310 return;
11311
11312 if (gaudi2_get_mmu_base(hdev, mmu_id, &mmu_base))
11313 return;
11314
11315 gaudi2_handle_page_error(hdev, mmu_base, is_pmmu, NULL);
11316 gaudi2_handle_access_error(hdev, mmu_base, is_pmmu);
11317 }
11318
gaudi2_ack_mmu_page_fault_or_access_error(struct hl_device * hdev,u64 mmu_cap_mask)11319 static int gaudi2_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask)
11320 {
11321 u32 i, mmu_id, num_of_hmmus = NUM_OF_HMMU_PER_DCORE * NUM_OF_DCORES;
11322
11323 /* check all HMMUs */
11324 for (i = 0 ; i < num_of_hmmus ; i++) {
11325 mmu_id = HW_CAP_DCORE0_DMMU0 << i;
11326
11327 if (mmu_cap_mask & mmu_id)
11328 gaudi2_ack_mmu_error(hdev, mmu_id);
11329 }
11330
11331 /* check PMMU */
11332 if (mmu_cap_mask & HW_CAP_PMMU)
11333 gaudi2_ack_mmu_error(hdev, HW_CAP_PMMU);
11334
11335 return 0;
11336 }
11337
gaudi2_get_msi_info(__le32 * table)11338 static void gaudi2_get_msi_info(__le32 *table)
11339 {
11340 table[CPUCP_EVENT_QUEUE_MSI_TYPE] = cpu_to_le32(GAUDI2_EVENT_QUEUE_MSIX_IDX);
11341 table[CPUCP_EVENT_QUEUE_ERR_MSI_TYPE] = cpu_to_le32(GAUDI2_IRQ_NUM_EQ_ERROR);
11342 }
11343
gaudi2_map_pll_idx_to_fw_idx(u32 pll_idx)11344 static int gaudi2_map_pll_idx_to_fw_idx(u32 pll_idx)
11345 {
11346 switch (pll_idx) {
11347 case HL_GAUDI2_CPU_PLL: return CPU_PLL;
11348 case HL_GAUDI2_PCI_PLL: return PCI_PLL;
11349 case HL_GAUDI2_NIC_PLL: return NIC_PLL;
11350 case HL_GAUDI2_DMA_PLL: return DMA_PLL;
11351 case HL_GAUDI2_MESH_PLL: return MESH_PLL;
11352 case HL_GAUDI2_MME_PLL: return MME_PLL;
11353 case HL_GAUDI2_TPC_PLL: return TPC_PLL;
11354 case HL_GAUDI2_IF_PLL: return IF_PLL;
11355 case HL_GAUDI2_SRAM_PLL: return SRAM_PLL;
11356 case HL_GAUDI2_HBM_PLL: return HBM_PLL;
11357 case HL_GAUDI2_VID_PLL: return VID_PLL;
11358 case HL_GAUDI2_MSS_PLL: return MSS_PLL;
11359 default: return -EINVAL;
11360 }
11361 }
11362
gaudi2_gen_sync_to_engine_map(struct hl_device * hdev,struct hl_sync_to_engine_map * map)11363 static int gaudi2_gen_sync_to_engine_map(struct hl_device *hdev, struct hl_sync_to_engine_map *map)
11364 {
11365 /* Not implemented */
11366 return 0;
11367 }
11368
gaudi2_monitor_valid(struct hl_mon_state_dump * mon)11369 static int gaudi2_monitor_valid(struct hl_mon_state_dump *mon)
11370 {
11371 /* Not implemented */
11372 return 0;
11373 }
11374
gaudi2_print_single_monitor(char ** buf,size_t * size,size_t * offset,struct hl_device * hdev,struct hl_mon_state_dump * mon)11375 static int gaudi2_print_single_monitor(char **buf, size_t *size, size_t *offset,
11376 struct hl_device *hdev, struct hl_mon_state_dump *mon)
11377 {
11378 /* Not implemented */
11379 return 0;
11380 }
11381
11382
gaudi2_print_fences_single_engine(struct hl_device * hdev,u64 base_offset,u64 status_base_offset,enum hl_sync_engine_type engine_type,u32 engine_id,char ** buf,size_t * size,size_t * offset)11383 static int gaudi2_print_fences_single_engine(struct hl_device *hdev, u64 base_offset,
11384 u64 status_base_offset, enum hl_sync_engine_type engine_type,
11385 u32 engine_id, char **buf, size_t *size, size_t *offset)
11386 {
11387 /* Not implemented */
11388 return 0;
11389 }
11390
11391
11392 static struct hl_state_dump_specs_funcs gaudi2_state_dump_funcs = {
11393 .monitor_valid = gaudi2_monitor_valid,
11394 .print_single_monitor = gaudi2_print_single_monitor,
11395 .gen_sync_to_engine_map = gaudi2_gen_sync_to_engine_map,
11396 .print_fences_single_engine = gaudi2_print_fences_single_engine,
11397 };
11398
gaudi2_state_dump_init(struct hl_device * hdev)11399 static void gaudi2_state_dump_init(struct hl_device *hdev)
11400 {
11401 /* Not implemented */
11402 hdev->state_dump_specs.props = gaudi2_state_dump_specs_props;
11403 hdev->state_dump_specs.funcs = gaudi2_state_dump_funcs;
11404 }
11405
gaudi2_get_sob_addr(struct hl_device * hdev,u32 sob_id)11406 static u32 gaudi2_get_sob_addr(struct hl_device *hdev, u32 sob_id)
11407 {
11408 return 0;
11409 }
11410
gaudi2_get_stream_master_qid_arr(void)11411 static u32 *gaudi2_get_stream_master_qid_arr(void)
11412 {
11413 return NULL;
11414 }
11415
gaudi2_add_device_attr(struct hl_device * hdev,struct attribute_group * dev_clk_attr_grp,struct attribute_group * dev_vrm_attr_grp)11416 static void gaudi2_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
11417 struct attribute_group *dev_vrm_attr_grp)
11418 {
11419 hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp);
11420 hl_sysfs_add_dev_vrm_attr(hdev, dev_vrm_attr_grp);
11421 }
11422
gaudi2_mmu_get_real_page_size(struct hl_device * hdev,struct hl_mmu_properties * mmu_prop,u32 page_size,u32 * real_page_size,bool is_dram_addr)11423 static int gaudi2_mmu_get_real_page_size(struct hl_device *hdev, struct hl_mmu_properties *mmu_prop,
11424 u32 page_size, u32 *real_page_size, bool is_dram_addr)
11425 {
11426 struct asic_fixed_properties *prop = &hdev->asic_prop;
11427
11428 /* for host pages the page size must be */
11429 if (!is_dram_addr) {
11430 if (page_size % mmu_prop->page_size)
11431 goto page_size_err;
11432
11433 *real_page_size = mmu_prop->page_size;
11434 return 0;
11435 }
11436
11437 if ((page_size % prop->dram_page_size) || (prop->dram_page_size > mmu_prop->page_size))
11438 goto page_size_err;
11439
11440 /*
11441 * MMU page size is different from DRAM page size (more precisely, DMMU page is greater
11442 * than DRAM page size).
11443 * for this reason work with the DRAM page size and let the MMU scrambling routine handle
11444 * this mismatch when calculating the address to place in the MMU page table.
11445 * (in that case also make sure that the dram_page_size is not greater than the
11446 * mmu page size)
11447 */
11448 *real_page_size = prop->dram_page_size;
11449
11450 return 0;
11451
11452 page_size_err:
11453 dev_err(hdev->dev, "page size of %u is not %uKB aligned, can't map\n",
11454 page_size, mmu_prop->page_size >> 10);
11455 return -EFAULT;
11456 }
11457
gaudi2_get_monitor_dump(struct hl_device * hdev,void * data)11458 static int gaudi2_get_monitor_dump(struct hl_device *hdev, void *data)
11459 {
11460 return -EOPNOTSUPP;
11461 }
11462
gaudi2_send_device_activity(struct hl_device * hdev,bool open)11463 int gaudi2_send_device_activity(struct hl_device *hdev, bool open)
11464 {
11465 struct gaudi2_device *gaudi2 = hdev->asic_specific;
11466
11467 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
11468 return 0;
11469
11470 return hl_fw_send_device_activity(hdev, open);
11471 }
11472
11473 static const struct hl_asic_funcs gaudi2_funcs = {
11474 .early_init = gaudi2_early_init,
11475 .early_fini = gaudi2_early_fini,
11476 .late_init = gaudi2_late_init,
11477 .late_fini = gaudi2_late_fini,
11478 .sw_init = gaudi2_sw_init,
11479 .sw_fini = gaudi2_sw_fini,
11480 .hw_init = gaudi2_hw_init,
11481 .hw_fini = gaudi2_hw_fini,
11482 .halt_engines = gaudi2_halt_engines,
11483 .suspend = gaudi2_suspend,
11484 .resume = gaudi2_resume,
11485 .mmap = gaudi2_mmap,
11486 .ring_doorbell = gaudi2_ring_doorbell,
11487 .pqe_write = gaudi2_pqe_write,
11488 .asic_dma_alloc_coherent = gaudi2_dma_alloc_coherent,
11489 .asic_dma_free_coherent = gaudi2_dma_free_coherent,
11490 .scrub_device_mem = gaudi2_scrub_device_mem,
11491 .scrub_device_dram = gaudi2_scrub_device_dram,
11492 .get_int_queue_base = NULL,
11493 .test_queues = gaudi2_test_queues,
11494 .asic_dma_pool_zalloc = gaudi2_dma_pool_zalloc,
11495 .asic_dma_pool_free = gaudi2_dma_pool_free,
11496 .cpu_accessible_dma_pool_alloc = gaudi2_cpu_accessible_dma_pool_alloc,
11497 .cpu_accessible_dma_pool_free = gaudi2_cpu_accessible_dma_pool_free,
11498 .dma_unmap_sgtable = hl_asic_dma_unmap_sgtable,
11499 .cs_parser = gaudi2_cs_parser,
11500 .dma_map_sgtable = hl_asic_dma_map_sgtable,
11501 .add_end_of_cb_packets = NULL,
11502 .update_eq_ci = gaudi2_update_eq_ci,
11503 .context_switch = gaudi2_context_switch,
11504 .restore_phase_topology = gaudi2_restore_phase_topology,
11505 .debugfs_read_dma = gaudi2_debugfs_read_dma,
11506 .add_device_attr = gaudi2_add_device_attr,
11507 .handle_eqe = gaudi2_handle_eqe,
11508 .get_events_stat = gaudi2_get_events_stat,
11509 .read_pte = NULL,
11510 .write_pte = NULL,
11511 .mmu_invalidate_cache = gaudi2_mmu_invalidate_cache,
11512 .mmu_invalidate_cache_range = gaudi2_mmu_invalidate_cache_range,
11513 .mmu_prefetch_cache_range = NULL,
11514 .send_heartbeat = gaudi2_send_heartbeat,
11515 .debug_coresight = gaudi2_debug_coresight,
11516 .is_device_idle = gaudi2_is_device_idle,
11517 .compute_reset_late_init = gaudi2_compute_reset_late_init,
11518 .hw_queues_lock = gaudi2_hw_queues_lock,
11519 .hw_queues_unlock = gaudi2_hw_queues_unlock,
11520 .get_pci_id = gaudi2_get_pci_id,
11521 .get_eeprom_data = gaudi2_get_eeprom_data,
11522 .get_monitor_dump = gaudi2_get_monitor_dump,
11523 .send_cpu_message = gaudi2_send_cpu_message,
11524 .pci_bars_map = gaudi2_pci_bars_map,
11525 .init_iatu = gaudi2_init_iatu,
11526 .rreg = hl_rreg,
11527 .wreg = hl_wreg,
11528 .halt_coresight = gaudi2_halt_coresight,
11529 .ctx_init = gaudi2_ctx_init,
11530 .ctx_fini = gaudi2_ctx_fini,
11531 .pre_schedule_cs = gaudi2_pre_schedule_cs,
11532 .get_queue_id_for_cq = gaudi2_get_queue_id_for_cq,
11533 .load_firmware_to_device = NULL,
11534 .load_boot_fit_to_device = NULL,
11535 .get_signal_cb_size = gaudi2_get_signal_cb_size,
11536 .get_wait_cb_size = gaudi2_get_wait_cb_size,
11537 .gen_signal_cb = gaudi2_gen_signal_cb,
11538 .gen_wait_cb = gaudi2_gen_wait_cb,
11539 .reset_sob = gaudi2_reset_sob,
11540 .reset_sob_group = gaudi2_reset_sob_group,
11541 .get_device_time = gaudi2_get_device_time,
11542 .pb_print_security_errors = gaudi2_pb_print_security_errors,
11543 .collective_wait_init_cs = gaudi2_collective_wait_init_cs,
11544 .collective_wait_create_jobs = gaudi2_collective_wait_create_jobs,
11545 .get_dec_base_addr = gaudi2_get_dec_base_addr,
11546 .scramble_addr = gaudi2_mmu_scramble_addr,
11547 .descramble_addr = gaudi2_mmu_descramble_addr,
11548 .ack_protection_bits_errors = gaudi2_ack_protection_bits_errors,
11549 .get_hw_block_id = gaudi2_get_hw_block_id,
11550 .hw_block_mmap = gaudi2_block_mmap,
11551 .enable_events_from_fw = gaudi2_enable_events_from_fw,
11552 .ack_mmu_errors = gaudi2_ack_mmu_page_fault_or_access_error,
11553 .get_msi_info = gaudi2_get_msi_info,
11554 .map_pll_idx_to_fw_idx = gaudi2_map_pll_idx_to_fw_idx,
11555 .init_firmware_preload_params = gaudi2_init_firmware_preload_params,
11556 .init_firmware_loader = gaudi2_init_firmware_loader,
11557 .init_cpu_scrambler_dram = gaudi2_init_scrambler_hbm,
11558 .state_dump_init = gaudi2_state_dump_init,
11559 .get_sob_addr = &gaudi2_get_sob_addr,
11560 .set_pci_memory_regions = gaudi2_set_pci_memory_regions,
11561 .get_stream_master_qid_arr = gaudi2_get_stream_master_qid_arr,
11562 .check_if_razwi_happened = gaudi2_check_if_razwi_happened,
11563 .mmu_get_real_page_size = gaudi2_mmu_get_real_page_size,
11564 .access_dev_mem = hl_access_dev_mem,
11565 .set_dram_bar_base = gaudi2_set_hbm_bar_base,
11566 .set_engine_cores = gaudi2_set_engine_cores,
11567 .set_engines = gaudi2_set_engines,
11568 .send_device_activity = gaudi2_send_device_activity,
11569 .set_dram_properties = gaudi2_set_dram_properties,
11570 .set_binning_masks = gaudi2_set_binning_masks,
11571 };
11572
gaudi2_set_asic_funcs(struct hl_device * hdev)11573 void gaudi2_set_asic_funcs(struct hl_device *hdev)
11574 {
11575 hdev->asic_funcs = &gaudi2_funcs;
11576 }
11577