1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4 * Copyright 2016-2022 HabanaLabs, Ltd.
5 * All Rights Reserved.
6 */
7
8 #include "gaudiP.h"
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 #include "../include/hw_ip/mmu/mmu_v1_1.h"
11 #include "../include/gaudi/gaudi_masks.h"
12 #include "../include/gaudi/gaudi_fw_if.h"
13 #include "../include/gaudi/gaudi_reg_map.h"
14 #include "../include/gaudi/gaudi_async_ids_map_extended.h"
15
16 #include <linux/module.h>
17 #include <linux/pci.h>
18 #include <linux/firmware.h>
19 #include <linux/hwmon.h>
20 #include <linux/iommu.h>
21 #include <linux/seq_file.h>
22
23 /*
24 * Gaudi security scheme:
25 *
26 * 1. Host is protected by:
27 * - Range registers
28 * - MMU
29 *
30 * 2. DDR is protected by:
31 * - Range registers (protect the first 512MB)
32 *
33 * 3. Configuration is protected by:
34 * - Range registers
35 * - Protection bits
36 *
37 * MMU is always enabled.
38 *
39 * QMAN DMA channels 0,1 (PCI DMAN):
40 * - DMA is not secured.
41 * - PQ and CQ are secured.
42 * - CP is secured: The driver needs to parse CB but WREG should be allowed
43 * because of TDMA (tensor DMA). Hence, WREG is always not
44 * secured.
45 *
46 * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
47 * channel 0 to be secured, execute the DMA and change it back to not secured.
48 * Currently, the driver doesn't use the DMA while there are compute jobs
49 * running.
50 *
51 * The current use cases for the driver to use the DMA are:
52 * - Clear SRAM on context switch (happens on context switch when device is
53 * idle)
54 * - MMU page tables area clear (happens on init)
55 *
56 * QMAN DMA 2-7, TPC, MME, NIC:
57 * PQ is secured and is located on the Host (HBM CON TPC3 bug)
58 * CQ, CP and the engine are not secured
59 *
60 */
61
62 #define GAUDI_BOOT_FIT_FILE "habanalabs/gaudi/gaudi-boot-fit.itb"
63 #define GAUDI_LINUX_FW_FILE "habanalabs/gaudi/gaudi-fit.itb"
64 #define GAUDI_TPC_FW_FILE "habanalabs/gaudi/gaudi_tpc.bin"
65
66 MODULE_FIRMWARE(GAUDI_BOOT_FIT_FILE);
67 MODULE_FIRMWARE(GAUDI_LINUX_FW_FILE);
68 MODULE_FIRMWARE(GAUDI_TPC_FW_FILE);
69
70 #define GAUDI_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */
71
72 #define GAUDI_RESET_TIMEOUT_MSEC 2000 /* 2000ms */
73 #define GAUDI_RESET_WAIT_MSEC 1 /* 1ms */
74 #define GAUDI_CPU_RESET_WAIT_MSEC 200 /* 200ms */
75 #define GAUDI_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */
76
77 #define GAUDI_PLDM_RESET_WAIT_MSEC 1000 /* 1s */
78 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC 20000 /* 20s */
79 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */
80 #define GAUDI_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100)
81 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
82 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
83 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 4000000 /* 4s */
84 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */
85 #define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC 15000000 /* 15s */
86
87 #define GAUDI_QMAN0_FENCE_VAL 0x72E91AB9
88
89 #define GAUDI_MAX_STRING_LEN 20
90
91 #define GAUDI_CB_POOL_CB_CNT 512
92 #define GAUDI_CB_POOL_CB_SIZE 0x20000 /* 128KB */
93
94 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT 3
95
96 #define GAUDI_NUM_OF_TPC_INTR_CAUSE 20
97
98 #define GAUDI_NUM_OF_QM_ERR_CAUSE 16
99
100 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3
101
102 #define GAUDI_ARB_WDT_TIMEOUT 0xEE6b27FF /* 8 seconds */
103
104 #define HBM_SCRUBBING_TIMEOUT_US 1000000 /* 1s */
105
106 #define BIN_REG_STRING_SIZE sizeof("0b10101010101010101010101010101010")
107
108 #define MONITOR_SOB_STRING_SIZE 256
109
110 static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = {
111 GAUDI_QUEUE_ID_DMA_0_0,
112 GAUDI_QUEUE_ID_DMA_0_1,
113 GAUDI_QUEUE_ID_DMA_0_2,
114 GAUDI_QUEUE_ID_DMA_0_3,
115 GAUDI_QUEUE_ID_DMA_1_0,
116 GAUDI_QUEUE_ID_DMA_1_1,
117 GAUDI_QUEUE_ID_DMA_1_2,
118 GAUDI_QUEUE_ID_DMA_1_3
119 };
120
121 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
122 [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
123 [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
124 [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
125 [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
126 [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
127 [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
128 [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
129 [GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
130 };
131
132 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
133 [0] = GAUDI_QUEUE_ID_DMA_0_0,
134 [1] = GAUDI_QUEUE_ID_DMA_0_1,
135 [2] = GAUDI_QUEUE_ID_DMA_0_2,
136 [3] = GAUDI_QUEUE_ID_DMA_0_3,
137 [4] = GAUDI_QUEUE_ID_DMA_1_0,
138 [5] = GAUDI_QUEUE_ID_DMA_1_1,
139 [6] = GAUDI_QUEUE_ID_DMA_1_2,
140 [7] = GAUDI_QUEUE_ID_DMA_1_3,
141 };
142
143 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
144 [PACKET_WREG_32] = sizeof(struct packet_wreg32),
145 [PACKET_WREG_BULK] = sizeof(struct packet_wreg_bulk),
146 [PACKET_MSG_LONG] = sizeof(struct packet_msg_long),
147 [PACKET_MSG_SHORT] = sizeof(struct packet_msg_short),
148 [PACKET_CP_DMA] = sizeof(struct packet_cp_dma),
149 [PACKET_REPEAT] = sizeof(struct packet_repeat),
150 [PACKET_MSG_PROT] = sizeof(struct packet_msg_prot),
151 [PACKET_FENCE] = sizeof(struct packet_fence),
152 [PACKET_LIN_DMA] = sizeof(struct packet_lin_dma),
153 [PACKET_NOP] = sizeof(struct packet_nop),
154 [PACKET_STOP] = sizeof(struct packet_stop),
155 [PACKET_ARB_POINT] = sizeof(struct packet_arb_point),
156 [PACKET_WAIT] = sizeof(struct packet_wait),
157 [PACKET_LOAD_AND_EXE] = sizeof(struct packet_load_and_exe)
158 };
159
validate_packet_id(enum packet_id id)160 static inline bool validate_packet_id(enum packet_id id)
161 {
162 switch (id) {
163 case PACKET_WREG_32:
164 case PACKET_WREG_BULK:
165 case PACKET_MSG_LONG:
166 case PACKET_MSG_SHORT:
167 case PACKET_CP_DMA:
168 case PACKET_REPEAT:
169 case PACKET_MSG_PROT:
170 case PACKET_FENCE:
171 case PACKET_LIN_DMA:
172 case PACKET_NOP:
173 case PACKET_STOP:
174 case PACKET_ARB_POINT:
175 case PACKET_WAIT:
176 case PACKET_LOAD_AND_EXE:
177 return true;
178 default:
179 return false;
180 }
181 }
182
183 static const char * const
184 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
185 "tpc_address_exceed_slm",
186 "tpc_div_by_0",
187 "tpc_spu_mac_overflow",
188 "tpc_spu_addsub_overflow",
189 "tpc_spu_abs_overflow",
190 "tpc_spu_fp_dst_nan_inf",
191 "tpc_spu_fp_dst_denorm",
192 "tpc_vpu_mac_overflow",
193 "tpc_vpu_addsub_overflow",
194 "tpc_vpu_abs_overflow",
195 "tpc_vpu_fp_dst_nan_inf",
196 "tpc_vpu_fp_dst_denorm",
197 "tpc_assertions",
198 "tpc_illegal_instruction",
199 "tpc_pc_wrap_around",
200 "tpc_qm_sw_err",
201 "tpc_hbw_rresp_err",
202 "tpc_hbw_bresp_err",
203 "tpc_lbw_rresp_err",
204 "tpc_lbw_bresp_err"
205 };
206
207 static const char * const
208 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
209 "PQ AXI HBW error",
210 "CQ AXI HBW error",
211 "CP AXI HBW error",
212 "CP error due to undefined OPCODE",
213 "CP encountered STOP OPCODE",
214 "CP AXI LBW error",
215 "CP WRREG32 or WRBULK returned error",
216 "N/A",
217 "FENCE 0 inc over max value and clipped",
218 "FENCE 1 inc over max value and clipped",
219 "FENCE 2 inc over max value and clipped",
220 "FENCE 3 inc over max value and clipped",
221 "FENCE 0 dec under min value and clipped",
222 "FENCE 1 dec under min value and clipped",
223 "FENCE 2 dec under min value and clipped",
224 "FENCE 3 dec under min value and clipped"
225 };
226
227 static const char * const
228 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
229 "Choice push while full error",
230 "Choice Q watchdog error",
231 "MSG AXI LBW returned with error"
232 };
233
234 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
235 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
236 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
237 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
238 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
239 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
240 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
241 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
242 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
243 QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
244 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
245 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
246 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
247 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
248 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
249 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
250 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
251 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
252 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
253 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
254 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
255 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
256 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
257 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
258 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
259 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
260 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
261 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
262 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
263 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
264 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
265 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
266 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
267 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
268 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
269 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
270 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
271 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
272 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
273 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
274 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
275 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
276 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
277 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
278 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
279 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
280 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
281 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
282 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
283 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
284 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
285 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
286 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
287 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
288 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
289 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
290 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
291 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
292 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
293 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
294 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
295 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
296 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
297 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
298 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
299 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
300 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
301 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
302 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
303 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
304 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
305 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
306 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
307 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
308 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
309 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
310 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
311 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
312 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
313 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
314 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
315 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
316 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
317 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
318 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
319 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
320 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
321 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
322 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
323 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
324 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
325 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
326 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
327 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
328 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
329 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
330 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
331 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
332 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
333 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
334 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
335 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
336 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
337 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
338 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
339 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
340 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
341 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
342 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
343 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
344 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
345 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
346 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
347 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
348 };
349
350 static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = {
351 { .id = 0, .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" },
352 { .id = 1, .name = "SYNC_OBJ_DMA_UP_FEEDBACK" },
353 { .id = 2, .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" },
354 { .id = 3, .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" },
355 { .id = 4, .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" },
356 { .id = 5, .name = "SYNC_OBJ_HOST_DRAM_DONE" },
357 { .id = 6, .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" },
358 { .id = 7, .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" },
359 { .id = 8, .name = "SYNC_OBJ_ENGINE_SEM_MME_0" },
360 { .id = 9, .name = "SYNC_OBJ_ENGINE_SEM_MME_1" },
361 { .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" },
362 { .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" },
363 { .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" },
364 { .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" },
365 { .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" },
366 { .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" },
367 { .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" },
368 { .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" },
369 { .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" },
370 { .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" },
371 { .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" },
372 { .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" },
373 { .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" },
374 { .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" },
375 { .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" },
376 { .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" },
377 { .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" },
378 };
379
380 static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = {
381 { .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" },
382 { .id = 201, .name = "MON_OBJ_DMA_UP_FEEDBACK_RESET" },
383 { .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" },
384 { .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" },
385 { .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" },
386 { .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" },
387 { .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" },
388 { .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" },
389 { .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" },
390 { .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" },
391 { .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" },
392 };
393
394 static s64 gaudi_state_dump_specs_props[] = {
395 [SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0,
396 [SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL,
397 [SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK,
398 [SP_MON_OBJ_WR_ADDR_LOW] =
399 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0,
400 [SP_MON_OBJ_WR_ADDR_HIGH] =
401 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0,
402 [SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0,
403 [SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0,
404 [SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0,
405 [SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK,
406 [SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0,
407 [SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR,
408 [SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0,
409 [SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0,
410 [SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL,
411 [SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0,
412 [SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0,
413 [SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO,
414 [SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0,
415 [SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES,
416 [SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES,
417 [SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES,
418 [SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES,
419 [SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES,
420 [SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS,
421 [SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES,
422 [SP_FENCE0_CNT_OFFSET] =
423 mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0,
424 [SP_FENCE0_RDATA_OFFSET] =
425 mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0,
426 [SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0,
427 [SP_NUM_CORES] = 1,
428 };
429
430 static const int gaudi_queue_id_to_engine_id[] = {
431 [GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3] = GAUDI_ENGINE_ID_DMA_0,
432 [GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3] = GAUDI_ENGINE_ID_DMA_1,
433 [GAUDI_QUEUE_ID_CPU_PQ] = GAUDI_ENGINE_ID_SIZE,
434 [GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3] = GAUDI_ENGINE_ID_DMA_2,
435 [GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3] = GAUDI_ENGINE_ID_DMA_3,
436 [GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3] = GAUDI_ENGINE_ID_DMA_4,
437 [GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3] = GAUDI_ENGINE_ID_DMA_5,
438 [GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3] = GAUDI_ENGINE_ID_DMA_6,
439 [GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3] = GAUDI_ENGINE_ID_DMA_7,
440 [GAUDI_QUEUE_ID_MME_0_0...GAUDI_QUEUE_ID_MME_0_3] = GAUDI_ENGINE_ID_MME_0,
441 [GAUDI_QUEUE_ID_MME_1_0...GAUDI_QUEUE_ID_MME_1_3] = GAUDI_ENGINE_ID_MME_2,
442 [GAUDI_QUEUE_ID_TPC_0_0...GAUDI_QUEUE_ID_TPC_0_3] = GAUDI_ENGINE_ID_TPC_0,
443 [GAUDI_QUEUE_ID_TPC_1_0...GAUDI_QUEUE_ID_TPC_1_3] = GAUDI_ENGINE_ID_TPC_1,
444 [GAUDI_QUEUE_ID_TPC_2_0...GAUDI_QUEUE_ID_TPC_2_3] = GAUDI_ENGINE_ID_TPC_2,
445 [GAUDI_QUEUE_ID_TPC_3_0...GAUDI_QUEUE_ID_TPC_3_3] = GAUDI_ENGINE_ID_TPC_3,
446 [GAUDI_QUEUE_ID_TPC_4_0...GAUDI_QUEUE_ID_TPC_4_3] = GAUDI_ENGINE_ID_TPC_4,
447 [GAUDI_QUEUE_ID_TPC_5_0...GAUDI_QUEUE_ID_TPC_5_3] = GAUDI_ENGINE_ID_TPC_5,
448 [GAUDI_QUEUE_ID_TPC_6_0...GAUDI_QUEUE_ID_TPC_6_3] = GAUDI_ENGINE_ID_TPC_6,
449 [GAUDI_QUEUE_ID_TPC_7_0...GAUDI_QUEUE_ID_TPC_7_3] = GAUDI_ENGINE_ID_TPC_7,
450 [GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3] = GAUDI_ENGINE_ID_NIC_0,
451 [GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3] = GAUDI_ENGINE_ID_NIC_1,
452 [GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3] = GAUDI_ENGINE_ID_NIC_2,
453 [GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3] = GAUDI_ENGINE_ID_NIC_3,
454 [GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3] = GAUDI_ENGINE_ID_NIC_4,
455 [GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3] = GAUDI_ENGINE_ID_NIC_5,
456 [GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3] = GAUDI_ENGINE_ID_NIC_6,
457 [GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3] = GAUDI_ENGINE_ID_NIC_7,
458 [GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3] = GAUDI_ENGINE_ID_NIC_8,
459 [GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3] = GAUDI_ENGINE_ID_NIC_9,
460 };
461
462 /* The order here is opposite to the order of the indexing in the h/w.
463 * i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc.
464 */
465 static const char * const gaudi_sync_manager_names[] = {
466 "SYNC_MGR_E_N",
467 "SYNC_MGR_W_N",
468 "SYNC_MGR_E_S",
469 "SYNC_MGR_W_S",
470 NULL
471 };
472
473 struct ecc_info_extract_params {
474 u64 block_address;
475 u32 num_memories;
476 bool derr;
477 };
478
479 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
480 u64 phys_addr);
481 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
482 struct hl_cs_job *job);
483 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
484 u32 size, u64 val);
485 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
486 u32 num_regs, u32 val);
487 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
488 u32 tpc_id);
489 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
490 static int gaudi_cpucp_info_get(struct hl_device *hdev);
491 static void gaudi_disable_clock_gating(struct hl_device *hdev);
492 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
493 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
494 u32 size, bool eb);
495 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
496 struct hl_gen_wait_properties *prop);
497 static inline enum hl_collective_mode
get_collective_mode(struct hl_device * hdev,u32 queue_id)498 get_collective_mode(struct hl_device *hdev, u32 queue_id)
499 {
500 if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
501 return HL_COLLECTIVE_MASTER;
502
503 if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
504 queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
505 return HL_COLLECTIVE_SLAVE;
506
507 if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
508 queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
509 return HL_COLLECTIVE_SLAVE;
510
511 if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
512 queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
513 return HL_COLLECTIVE_SLAVE;
514
515 return HL_COLLECTIVE_NOT_SUPPORTED;
516 }
517
set_default_power_values(struct hl_device * hdev)518 static inline void set_default_power_values(struct hl_device *hdev)
519 {
520 struct asic_fixed_properties *prop = &hdev->asic_prop;
521
522 if (hdev->card_type == cpucp_card_type_pmc) {
523 prop->max_power_default = MAX_POWER_DEFAULT_PMC;
524
525 if (prop->fw_security_enabled)
526 prop->dc_power_default = DC_POWER_DEFAULT_PMC_SEC;
527 else
528 prop->dc_power_default = DC_POWER_DEFAULT_PMC;
529 } else {
530 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
531 prop->dc_power_default = DC_POWER_DEFAULT_PCI;
532 }
533 }
534
gaudi_set_fixed_properties(struct hl_device * hdev)535 static int gaudi_set_fixed_properties(struct hl_device *hdev)
536 {
537 struct asic_fixed_properties *prop = &hdev->asic_prop;
538 u32 num_sync_stream_queues = 0;
539 int i;
540
541 prop->max_queues = GAUDI_QUEUE_ID_SIZE;
542 prop->hw_queues_props = kcalloc(prop->max_queues,
543 sizeof(struct hw_queue_properties),
544 GFP_KERNEL);
545
546 if (!prop->hw_queues_props)
547 return -ENOMEM;
548
549 for (i = 0 ; i < prop->max_queues ; i++) {
550 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
551 prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
552 prop->hw_queues_props[i].driver_only = 0;
553 prop->hw_queues_props[i].supports_sync_stream = 1;
554 prop->hw_queues_props[i].cb_alloc_flags =
555 CB_ALLOC_KERNEL;
556 num_sync_stream_queues++;
557 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
558 prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
559 prop->hw_queues_props[i].driver_only = 1;
560 prop->hw_queues_props[i].supports_sync_stream = 0;
561 prop->hw_queues_props[i].cb_alloc_flags =
562 CB_ALLOC_KERNEL;
563 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
564 prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
565 prop->hw_queues_props[i].driver_only = 0;
566 prop->hw_queues_props[i].supports_sync_stream = 0;
567 prop->hw_queues_props[i].cb_alloc_flags =
568 CB_ALLOC_USER;
569
570 }
571 prop->hw_queues_props[i].collective_mode =
572 get_collective_mode(hdev, i);
573 }
574
575 prop->cache_line_size = DEVICE_CACHE_LINE_SIZE;
576 prop->cfg_base_address = CFG_BASE;
577 prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
578 prop->host_base_address = HOST_PHYS_BASE;
579 prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE;
580 prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
581 prop->completion_mode = HL_COMPLETION_MODE_JOB;
582 prop->collective_first_sob = 0;
583 prop->collective_first_mon = 0;
584
585 /* 2 SOBs per internal queue stream are reserved for collective */
586 prop->sync_stream_first_sob =
587 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
588 * QMAN_STREAMS * HL_RSVD_SOBS;
589
590 /* 1 monitor per internal queue stream are reserved for collective
591 * 2 monitors per external queue stream are reserved for collective
592 */
593 prop->sync_stream_first_mon =
594 (NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
595 (NUMBER_OF_EXT_HW_QUEUES * 2);
596
597 prop->dram_base_address = DRAM_PHYS_BASE;
598 prop->dram_size = GAUDI_HBM_SIZE_32GB;
599 prop->dram_end_address = prop->dram_base_address + prop->dram_size;
600 prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
601
602 prop->sram_base_address = SRAM_BASE_ADDR;
603 prop->sram_size = SRAM_SIZE;
604 prop->sram_end_address = prop->sram_base_address + prop->sram_size;
605 prop->sram_user_base_address =
606 prop->sram_base_address + SRAM_USER_BASE_OFFSET;
607
608 prop->mmu_cache_mng_addr = MMU_CACHE_MNG_ADDR;
609 prop->mmu_cache_mng_size = MMU_CACHE_MNG_SIZE;
610
611 prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
612 if (hdev->pldm)
613 prop->mmu_pgt_size = 0x800000; /* 8MB */
614 else
615 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
616 prop->mmu_pte_size = HL_PTE_SIZE;
617 prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE;
618 prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
619 prop->dram_page_size = PAGE_SIZE_2MB;
620 prop->device_mem_alloc_default_page_size = prop->dram_page_size;
621 prop->dram_supports_virtual_memory = false;
622
623 prop->pmmu.hop_shifts[MMU_HOP0] = MMU_V1_1_HOP0_SHIFT;
624 prop->pmmu.hop_shifts[MMU_HOP1] = MMU_V1_1_HOP1_SHIFT;
625 prop->pmmu.hop_shifts[MMU_HOP2] = MMU_V1_1_HOP2_SHIFT;
626 prop->pmmu.hop_shifts[MMU_HOP3] = MMU_V1_1_HOP3_SHIFT;
627 prop->pmmu.hop_shifts[MMU_HOP4] = MMU_V1_1_HOP4_SHIFT;
628 prop->pmmu.hop_masks[MMU_HOP0] = MMU_V1_1_HOP0_MASK;
629 prop->pmmu.hop_masks[MMU_HOP1] = MMU_V1_1_HOP1_MASK;
630 prop->pmmu.hop_masks[MMU_HOP2] = MMU_V1_1_HOP2_MASK;
631 prop->pmmu.hop_masks[MMU_HOP3] = MMU_V1_1_HOP3_MASK;
632 prop->pmmu.hop_masks[MMU_HOP4] = MMU_V1_1_HOP4_MASK;
633 prop->pmmu.start_addr = VA_HOST_SPACE_START;
634 prop->pmmu.end_addr =
635 (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
636 prop->pmmu.page_size = PAGE_SIZE_4KB;
637 prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
638 prop->pmmu.last_mask = LAST_MASK;
639 /* TODO: will be duplicated until implementing per-MMU props */
640 prop->pmmu.hop_table_size = prop->mmu_hop_table_size;
641 prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
642
643 /* PMMU and HPMMU are the same except of page size */
644 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
645 prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
646
647 /* shifts and masks are the same in PMMU and DMMU */
648 memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
649 prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
650 prop->dmmu.end_addr = VA_HOST_SPACE_END;
651 prop->dmmu.page_size = PAGE_SIZE_2MB;
652
653 prop->cfg_size = CFG_SIZE;
654 prop->max_asid = MAX_ASID;
655 prop->num_of_events = GAUDI_EVENT_SIZE;
656 prop->max_num_of_engines = GAUDI_ENGINE_ID_SIZE;
657 prop->tpc_enabled_mask = TPC_ENABLED_MASK;
658
659 set_default_power_values(hdev);
660
661 prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
662 prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
663
664 prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
665 prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
666
667 strscpy_pad(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
668 CARD_NAME_MAX_LEN);
669
670 prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
671
672 prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
673 prop->sync_stream_first_sob +
674 (num_sync_stream_queues * HL_RSVD_SOBS);
675 prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
676 prop->sync_stream_first_mon +
677 (num_sync_stream_queues * HL_RSVD_MONS);
678
679 prop->first_available_user_interrupt = USHRT_MAX;
680 prop->tpc_interrupt_id = USHRT_MAX;
681
682 /* single msi */
683 prop->eq_interrupt_id = 0;
684
685 for (i = 0 ; i < HL_MAX_DCORES ; i++)
686 prop->first_available_cq[i] = USHRT_MAX;
687
688 prop->fw_cpu_boot_dev_sts0_valid = false;
689 prop->fw_cpu_boot_dev_sts1_valid = false;
690 prop->hard_reset_done_by_fw = false;
691 prop->gic_interrupts_enable = true;
692
693 prop->server_type = HL_SERVER_TYPE_UNKNOWN;
694
695 prop->clk_pll_index = HL_GAUDI_MME_PLL;
696 prop->max_freq_value = GAUDI_MAX_CLK_FREQ;
697
698 prop->use_get_power_for_reset_history = true;
699
700 prop->configurable_stop_on_err = true;
701
702 prop->set_max_power_on_device_init = true;
703
704 prop->dma_mask = 48;
705
706 prop->hbw_flush_reg = mmPCIE_WRAP_RR_ELBI_RD_SEC_REG_CTRL;
707
708 return 0;
709 }
710
gaudi_pci_bars_map(struct hl_device * hdev)711 static int gaudi_pci_bars_map(struct hl_device *hdev)
712 {
713 static const char * const name[] = {"SRAM", "CFG", "HBM"};
714 bool is_wc[3] = {false, false, true};
715 int rc;
716
717 rc = hl_pci_bars_map(hdev, name, is_wc);
718 if (rc)
719 return rc;
720
721 hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
722 (CFG_BASE - SPI_FLASH_BASE_ADDR);
723
724 return 0;
725 }
726
gaudi_set_hbm_bar_base(struct hl_device * hdev,u64 addr)727 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
728 {
729 struct gaudi_device *gaudi = hdev->asic_specific;
730 struct hl_inbound_pci_region pci_region;
731 u64 old_addr = addr;
732 int rc;
733
734 if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
735 return old_addr;
736
737 if (hdev->asic_prop.iatu_done_by_fw)
738 return U64_MAX;
739
740 /* Inbound Region 2 - Bar 4 - Point to HBM */
741 pci_region.mode = PCI_BAR_MATCH_MODE;
742 pci_region.bar = HBM_BAR_ID;
743 pci_region.addr = addr;
744 rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
745 if (rc)
746 return U64_MAX;
747
748 if (gaudi) {
749 old_addr = gaudi->hbm_bar_cur_addr;
750 gaudi->hbm_bar_cur_addr = addr;
751 }
752
753 return old_addr;
754 }
755
gaudi_init_iatu(struct hl_device * hdev)756 static int gaudi_init_iatu(struct hl_device *hdev)
757 {
758 struct hl_inbound_pci_region inbound_region;
759 struct hl_outbound_pci_region outbound_region;
760 int rc;
761
762 if (hdev->asic_prop.iatu_done_by_fw)
763 return 0;
764
765 /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
766 inbound_region.mode = PCI_BAR_MATCH_MODE;
767 inbound_region.bar = SRAM_BAR_ID;
768 inbound_region.addr = SRAM_BASE_ADDR;
769 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
770 if (rc)
771 goto done;
772
773 /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
774 inbound_region.mode = PCI_BAR_MATCH_MODE;
775 inbound_region.bar = CFG_BAR_ID;
776 inbound_region.addr = SPI_FLASH_BASE_ADDR;
777 rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
778 if (rc)
779 goto done;
780
781 /* Inbound Region 2 - Bar 4 - Point to HBM */
782 inbound_region.mode = PCI_BAR_MATCH_MODE;
783 inbound_region.bar = HBM_BAR_ID;
784 inbound_region.addr = DRAM_PHYS_BASE;
785 rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
786 if (rc)
787 goto done;
788
789 /* Outbound Region 0 - Point to Host */
790 outbound_region.addr = HOST_PHYS_BASE;
791 outbound_region.size = HOST_PHYS_SIZE;
792 rc = hl_pci_set_outbound_region(hdev, &outbound_region);
793
794 done:
795 return rc;
796 }
797
gaudi_get_hw_state(struct hl_device * hdev)798 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
799 {
800 return RREG32(mmHW_STATE);
801 }
802
gaudi_early_init(struct hl_device * hdev)803 static int gaudi_early_init(struct hl_device *hdev)
804 {
805 struct asic_fixed_properties *prop = &hdev->asic_prop;
806 struct pci_dev *pdev = hdev->pdev;
807 resource_size_t pci_bar_size;
808 u32 fw_boot_status;
809 int rc;
810
811 rc = gaudi_set_fixed_properties(hdev);
812 if (rc) {
813 dev_err(hdev->dev, "Failed setting fixed properties\n");
814 return rc;
815 }
816
817 /* Check BAR sizes */
818 pci_bar_size = pci_resource_len(pdev, SRAM_BAR_ID);
819
820 if (pci_bar_size != SRAM_BAR_SIZE) {
821 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
822 SRAM_BAR_ID, &pci_bar_size, SRAM_BAR_SIZE);
823 rc = -ENODEV;
824 goto free_queue_props;
825 }
826
827 pci_bar_size = pci_resource_len(pdev, CFG_BAR_ID);
828
829 if (pci_bar_size != CFG_BAR_SIZE) {
830 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
831 CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE);
832 rc = -ENODEV;
833 goto free_queue_props;
834 }
835
836 prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
837 hdev->dram_pci_bar_start = pci_resource_start(pdev, HBM_BAR_ID);
838
839 /* If FW security is enabled at this point it means no access to ELBI */
840 if (hdev->asic_prop.fw_security_enabled) {
841 hdev->asic_prop.iatu_done_by_fw = true;
842
843 /*
844 * GIC-security-bit can ONLY be set by CPUCP, so in this stage
845 * decision can only be taken based on PCI ID security.
846 */
847 hdev->asic_prop.gic_interrupts_enable = false;
848 goto pci_init;
849 }
850
851 rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
852 &fw_boot_status);
853 if (rc)
854 goto free_queue_props;
855
856 /* Check whether FW is configuring iATU */
857 if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
858 (fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
859 hdev->asic_prop.iatu_done_by_fw = true;
860
861 pci_init:
862 rc = hl_pci_init(hdev);
863 if (rc)
864 goto free_queue_props;
865
866 /* Before continuing in the initialization, we need to read the preboot
867 * version to determine whether we run with a security-enabled firmware
868 */
869 rc = hl_fw_read_preboot_status(hdev);
870 if (rc) {
871 if (hdev->reset_on_preboot_fail)
872 /* we are already on failure flow, so don't check if hw_fini fails. */
873 hdev->asic_funcs->hw_fini(hdev, true, false);
874 goto pci_fini;
875 }
876
877 if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
878 dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");
879 rc = hdev->asic_funcs->hw_fini(hdev, true, false);
880 if (rc) {
881 dev_err(hdev->dev, "failed to reset HW in dirty state (%d)\n", rc);
882 goto pci_fini;
883 }
884 }
885
886 return 0;
887
888 pci_fini:
889 hl_pci_fini(hdev);
890 free_queue_props:
891 kfree(hdev->asic_prop.hw_queues_props);
892 return rc;
893 }
894
gaudi_early_fini(struct hl_device * hdev)895 static int gaudi_early_fini(struct hl_device *hdev)
896 {
897 kfree(hdev->asic_prop.hw_queues_props);
898 hl_pci_fini(hdev);
899
900 return 0;
901 }
902
903 /**
904 * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
905 *
906 * @hdev: pointer to hl_device structure
907 *
908 */
gaudi_fetch_psoc_frequency(struct hl_device * hdev)909 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
910 {
911 u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
912 struct asic_fixed_properties *prop = &hdev->asic_prop;
913 u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
914 int rc;
915
916 if ((hdev->fw_components & FW_TYPE_LINUX) &&
917 (prop->fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_PLL_INFO_EN)) {
918 struct gaudi_device *gaudi = hdev->asic_specific;
919
920 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
921 return 0;
922
923 rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
924
925 if (rc)
926 return rc;
927
928 freq = pll_freq_arr[2];
929 } else {
930 /* Backward compatibility */
931 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
932 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
933 nr = RREG32(mmPSOC_CPU_PLL_NR);
934 nf = RREG32(mmPSOC_CPU_PLL_NF);
935 od = RREG32(mmPSOC_CPU_PLL_OD);
936
937 if (div_sel == DIV_SEL_REF_CLK ||
938 div_sel == DIV_SEL_DIVIDED_REF) {
939 if (div_sel == DIV_SEL_REF_CLK)
940 freq = PLL_REF_CLK;
941 else
942 freq = PLL_REF_CLK / (div_fctr + 1);
943 } else if (div_sel == DIV_SEL_PLL_CLK ||
944 div_sel == DIV_SEL_DIVIDED_PLL) {
945 pll_clk = PLL_REF_CLK * (nf + 1) /
946 ((nr + 1) * (od + 1));
947 if (div_sel == DIV_SEL_PLL_CLK)
948 freq = pll_clk;
949 else
950 freq = pll_clk / (div_fctr + 1);
951 } else {
952 dev_warn(hdev->dev, "Received invalid div select value: %#x", div_sel);
953 freq = 0;
954 }
955 }
956
957 prop->psoc_timestamp_frequency = freq;
958 prop->psoc_pci_pll_nr = nr;
959 prop->psoc_pci_pll_nf = nf;
960 prop->psoc_pci_pll_od = od;
961 prop->psoc_pci_pll_div_factor = div_fctr;
962
963 return 0;
964 }
965
_gaudi_init_tpc_mem(struct hl_device * hdev,dma_addr_t tpc_kernel_src_addr,u32 tpc_kernel_size)966 static int _gaudi_init_tpc_mem(struct hl_device *hdev,
967 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
968 {
969 struct asic_fixed_properties *prop = &hdev->asic_prop;
970 struct packet_lin_dma *init_tpc_mem_pkt;
971 struct hl_cs_job *job;
972 struct hl_cb *cb;
973 u64 dst_addr;
974 u32 cb_size, ctl;
975 u8 tpc_id;
976 int rc;
977
978 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
979 if (!cb)
980 return -EFAULT;
981
982 init_tpc_mem_pkt = cb->kernel_address;
983 cb_size = sizeof(*init_tpc_mem_pkt);
984 memset(init_tpc_mem_pkt, 0, cb_size);
985
986 init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
987
988 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
989 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
990 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
991 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
992
993 init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
994
995 init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
996
997 /* TPC_CMD is configured with I$ prefetch enabled, so address should be aligned to 8KB */
998 dst_addr = FIELD_PREP(GAUDI_PKT_LIN_DMA_DST_ADDR_MASK,
999 round_up(prop->sram_user_base_address, SZ_8K));
1000 init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
1001
1002 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
1003 if (!job) {
1004 dev_err(hdev->dev, "Failed to allocate a new job\n");
1005 rc = -ENOMEM;
1006 goto release_cb;
1007 }
1008
1009 job->id = 0;
1010 job->user_cb = cb;
1011 atomic_inc(&job->user_cb->cs_cnt);
1012 job->user_cb_size = cb_size;
1013 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
1014 job->patched_cb = job->user_cb;
1015 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
1016
1017 hl_debugfs_add_job(hdev, job);
1018
1019 rc = gaudi_send_job_on_qman0(hdev, job);
1020
1021 if (rc)
1022 goto free_job;
1023
1024 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
1025 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
1026 if (rc)
1027 break;
1028 }
1029
1030 free_job:
1031 hl_userptr_delete_list(hdev, &job->userptr_list);
1032 hl_debugfs_remove_job(hdev, job);
1033 kfree(job);
1034 atomic_dec(&cb->cs_cnt);
1035
1036 release_cb:
1037 hl_cb_put(cb);
1038 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
1039
1040 return rc;
1041 }
1042
1043 /*
1044 * gaudi_init_tpc_mem() - Initialize TPC memories.
1045 * @hdev: Pointer to hl_device structure.
1046 *
1047 * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
1048 *
1049 * Return: 0 for success, negative value for error.
1050 */
gaudi_init_tpc_mem(struct hl_device * hdev)1051 static int gaudi_init_tpc_mem(struct hl_device *hdev)
1052 {
1053 const struct firmware *fw;
1054 size_t fw_size;
1055 void *cpu_addr;
1056 dma_addr_t dma_handle;
1057 int rc, count = 5;
1058
1059 again:
1060 rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
1061 if (rc == -EINTR && count-- > 0) {
1062 msleep(50);
1063 goto again;
1064 }
1065
1066 if (rc) {
1067 dev_err(hdev->dev, "Failed to load firmware file %s\n",
1068 GAUDI_TPC_FW_FILE);
1069 goto out;
1070 }
1071
1072 fw_size = fw->size;
1073 cpu_addr = hl_asic_dma_alloc_coherent(hdev, fw_size, &dma_handle, GFP_KERNEL | __GFP_ZERO);
1074 if (!cpu_addr) {
1075 dev_err(hdev->dev,
1076 "Failed to allocate %zu of dma memory for TPC kernel\n",
1077 fw_size);
1078 rc = -ENOMEM;
1079 goto out;
1080 }
1081
1082 memcpy(cpu_addr, fw->data, fw_size);
1083
1084 rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
1085
1086 hl_asic_dma_free_coherent(hdev, fw->size, cpu_addr, dma_handle);
1087
1088 out:
1089 release_firmware(fw);
1090 return rc;
1091 }
1092
gaudi_collective_map_sobs(struct hl_device * hdev,u32 stream)1093 static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
1094 {
1095 struct gaudi_device *gaudi = hdev->asic_specific;
1096 struct gaudi_collective_properties *prop = &gaudi->collective_props;
1097 struct hl_hw_queue *q;
1098 u32 i, sob_id, sob_group_id, queue_id;
1099
1100 /* Iterate through SOB groups and assign a SOB for each slave queue */
1101 sob_group_id =
1102 stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
1103 sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
1104
1105 queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1106 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
1107 q = &hdev->kernel_queues[queue_id + (4 * i)];
1108 q->sync_stream_prop.collective_sob_id = sob_id + i;
1109 }
1110
1111 /* Both DMA5 and TPC7 use the same resources since only a single
1112 * engine need to participate in the reduction process
1113 */
1114 queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1115 q = &hdev->kernel_queues[queue_id];
1116 q->sync_stream_prop.collective_sob_id =
1117 sob_id + NIC_NUMBER_OF_ENGINES;
1118
1119 queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1120 q = &hdev->kernel_queues[queue_id];
1121 q->sync_stream_prop.collective_sob_id =
1122 sob_id + NIC_NUMBER_OF_ENGINES;
1123 }
1124
gaudi_sob_group_hw_reset(struct kref * ref)1125 static void gaudi_sob_group_hw_reset(struct kref *ref)
1126 {
1127 struct gaudi_hw_sob_group *hw_sob_group =
1128 container_of(ref, struct gaudi_hw_sob_group, kref);
1129 struct hl_device *hdev = hw_sob_group->hdev;
1130 int i;
1131
1132 for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++)
1133 WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
1134 (hw_sob_group->base_sob_id * 4) + (i * 4)), 0);
1135
1136 kref_init(&hw_sob_group->kref);
1137 }
1138
gaudi_sob_group_reset_error(struct kref * ref)1139 static void gaudi_sob_group_reset_error(struct kref *ref)
1140 {
1141 struct gaudi_hw_sob_group *hw_sob_group =
1142 container_of(ref, struct gaudi_hw_sob_group, kref);
1143 struct hl_device *hdev = hw_sob_group->hdev;
1144
1145 dev_crit(hdev->dev,
1146 "SOB release shouldn't be called here, base_sob_id: %d\n",
1147 hw_sob_group->base_sob_id);
1148 }
1149
gaudi_collective_mstr_sob_mask_set(struct gaudi_device * gaudi)1150 static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi)
1151 {
1152 struct gaudi_collective_properties *prop;
1153 int i;
1154
1155 prop = &gaudi->collective_props;
1156
1157 memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask));
1158
1159 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++)
1160 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1161 prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1162 BIT(i % HL_MAX_SOBS_PER_MONITOR);
1163 /* Set collective engine bit */
1164 prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1165 BIT(i % HL_MAX_SOBS_PER_MONITOR);
1166 }
1167
gaudi_collective_init(struct hl_device * hdev)1168 static int gaudi_collective_init(struct hl_device *hdev)
1169 {
1170 u32 i, sob_id, reserved_sobs_per_group;
1171 struct gaudi_collective_properties *prop;
1172 struct gaudi_device *gaudi;
1173
1174 gaudi = hdev->asic_specific;
1175 prop = &gaudi->collective_props;
1176 sob_id = hdev->asic_prop.collective_first_sob;
1177
1178 /* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
1179 reserved_sobs_per_group =
1180 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
1181
1182 /* Init SOB groups */
1183 for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
1184 prop->hw_sob_group[i].hdev = hdev;
1185 prop->hw_sob_group[i].base_sob_id = sob_id;
1186 sob_id += reserved_sobs_per_group;
1187 gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
1188 }
1189
1190 for (i = 0 ; i < QMAN_STREAMS; i++) {
1191 prop->next_sob_group_val[i] = 1;
1192 prop->curr_sob_group_idx[i] = 0;
1193 gaudi_collective_map_sobs(hdev, i);
1194 }
1195
1196 gaudi_collective_mstr_sob_mask_set(gaudi);
1197
1198 return 0;
1199 }
1200
gaudi_reset_sob_group(struct hl_device * hdev,u16 sob_group)1201 static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
1202 {
1203 struct gaudi_device *gaudi = hdev->asic_specific;
1204 struct gaudi_collective_properties *cprop = &gaudi->collective_props;
1205
1206 kref_put(&cprop->hw_sob_group[sob_group].kref,
1207 gaudi_sob_group_hw_reset);
1208 }
1209
gaudi_collective_master_init_job(struct hl_device * hdev,struct hl_cs_job * job,u32 stream,u32 sob_group_offset)1210 static void gaudi_collective_master_init_job(struct hl_device *hdev,
1211 struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
1212 {
1213 u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
1214 struct gaudi_collective_properties *cprop;
1215 struct hl_gen_wait_properties wait_prop;
1216 struct hl_sync_stream_properties *prop;
1217 struct gaudi_device *gaudi;
1218
1219 gaudi = hdev->asic_specific;
1220 cprop = &gaudi->collective_props;
1221 queue_id = job->hw_queue_id;
1222 prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1223
1224 master_sob_base =
1225 cprop->hw_sob_group[sob_group_offset].base_sob_id;
1226 master_monitor = prop->collective_mstr_mon_id[0];
1227
1228 cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
1229
1230 dev_dbg(hdev->dev,
1231 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1232 master_sob_base, cprop->mstr_sob_mask[0],
1233 cprop->next_sob_group_val[stream],
1234 master_monitor, queue_id);
1235
1236 wait_prop.data = (void *) job->patched_cb;
1237 wait_prop.sob_base = master_sob_base;
1238 wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1239 wait_prop.sob_val = cprop->next_sob_group_val[stream];
1240 wait_prop.mon_id = master_monitor;
1241 wait_prop.q_idx = queue_id;
1242 wait_prop.size = cb_size;
1243 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1244
1245 master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1246 master_monitor = prop->collective_mstr_mon_id[1];
1247
1248 dev_dbg(hdev->dev,
1249 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1250 master_sob_base, cprop->mstr_sob_mask[1],
1251 cprop->next_sob_group_val[stream],
1252 master_monitor, queue_id);
1253
1254 wait_prop.sob_base = master_sob_base;
1255 wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1256 wait_prop.mon_id = master_monitor;
1257 wait_prop.size = cb_size;
1258 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1259 }
1260
gaudi_collective_slave_init_job(struct hl_device * hdev,struct hl_cs_job * job,struct hl_cs_compl * cs_cmpl)1261 static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1262 struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1263 {
1264 struct hl_gen_wait_properties wait_prop;
1265 struct hl_sync_stream_properties *prop;
1266 u32 queue_id, cb_size = 0;
1267
1268 queue_id = job->hw_queue_id;
1269 prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1270
1271 if (job->cs->encaps_signals) {
1272 /* use the encaps signal handle store earlier in the flow
1273 * and set the SOB information from the encaps
1274 * signals handle
1275 */
1276 hl_hw_queue_encaps_sig_set_sob_info(hdev, job->cs, job,
1277 cs_cmpl);
1278
1279 dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u, wait for sob_val: %u\n",
1280 job->cs->sequence,
1281 cs_cmpl->hw_sob->sob_id,
1282 cs_cmpl->sob_val);
1283 }
1284
1285 /* Add to wait CBs using slave monitor */
1286 wait_prop.data = (void *) job->user_cb;
1287 wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1288 wait_prop.sob_mask = 0x1;
1289 wait_prop.sob_val = cs_cmpl->sob_val;
1290 wait_prop.mon_id = prop->collective_slave_mon_id;
1291 wait_prop.q_idx = queue_id;
1292 wait_prop.size = cb_size;
1293
1294 dev_dbg(hdev->dev,
1295 "Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n",
1296 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1297 prop->collective_slave_mon_id, queue_id);
1298
1299 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1300
1301 dev_dbg(hdev->dev,
1302 "generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1303 prop->collective_sob_id, queue_id);
1304
1305 cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
1306 prop->collective_sob_id, cb_size, false);
1307 }
1308
gaudi_collective_wait_init_cs(struct hl_cs * cs)1309 static int gaudi_collective_wait_init_cs(struct hl_cs *cs)
1310 {
1311 struct hl_cs_compl *signal_cs_cmpl =
1312 container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1313 struct hl_cs_compl *cs_cmpl =
1314 container_of(cs->fence, struct hl_cs_compl, base_fence);
1315 struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl;
1316 struct gaudi_collective_properties *cprop;
1317 u32 stream, queue_id, sob_group_offset;
1318 struct gaudi_device *gaudi;
1319 struct hl_device *hdev;
1320 struct hl_cs_job *job;
1321 struct hl_ctx *ctx;
1322
1323 ctx = cs->ctx;
1324 hdev = ctx->hdev;
1325 gaudi = hdev->asic_specific;
1326 cprop = &gaudi->collective_props;
1327
1328 if (cs->encaps_signals) {
1329 cs_cmpl->hw_sob = handle->hw_sob;
1330 /* at this checkpoint we only need the hw_sob pointer
1331 * for the completion check before start going over the jobs
1332 * of the master/slaves, the sob_value will be taken later on
1333 * in gaudi_collective_slave_init_job depends on each
1334 * job wait offset value.
1335 */
1336 cs_cmpl->sob_val = 0;
1337 } else {
1338 /* copy the SOB id and value of the signal CS */
1339 cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1340 cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1341 }
1342
1343 /* check again if the signal cs already completed.
1344 * if yes then don't send any wait cs since the hw_sob
1345 * could be in reset already. if signal is not completed
1346 * then get refcount to hw_sob to prevent resetting the sob
1347 * while wait cs is not submitted.
1348 * note that this check is protected by two locks,
1349 * hw queue lock and completion object lock,
1350 * and the same completion object lock also protects
1351 * the hw_sob reset handler function.
1352 * The hw_queue lock prevent out of sync of hw_sob
1353 * refcount value, changed by signal/wait flows.
1354 */
1355 spin_lock(&signal_cs_cmpl->lock);
1356
1357 if (completion_done(&cs->signal_fence->completion)) {
1358 spin_unlock(&signal_cs_cmpl->lock);
1359 return -EINVAL;
1360 }
1361 /* Increment kref since all slave queues are now waiting on it */
1362 kref_get(&cs_cmpl->hw_sob->kref);
1363
1364 spin_unlock(&signal_cs_cmpl->lock);
1365
1366 /* Calculate the stream from collective master queue (1st job) */
1367 job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1368 stream = job->hw_queue_id % 4;
1369 sob_group_offset =
1370 stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1371
1372 list_for_each_entry(job, &cs->job_list, cs_node) {
1373 queue_id = job->hw_queue_id;
1374
1375 if (hdev->kernel_queues[queue_id].collective_mode ==
1376 HL_COLLECTIVE_MASTER)
1377 gaudi_collective_master_init_job(hdev, job, stream,
1378 sob_group_offset);
1379 else
1380 gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1381 }
1382
1383 cs_cmpl->sob_group = sob_group_offset;
1384
1385 /* Handle sob group kref and wraparound */
1386 kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1387 cprop->next_sob_group_val[stream]++;
1388
1389 if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1390 /*
1391 * Decrement as we reached the max value.
1392 * The release function won't be called here as we've
1393 * just incremented the refcount.
1394 */
1395 kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1396 gaudi_sob_group_reset_error);
1397 cprop->next_sob_group_val[stream] = 1;
1398 /* only two SOBs are currently in use */
1399 cprop->curr_sob_group_idx[stream] =
1400 (cprop->curr_sob_group_idx[stream] + 1) &
1401 (HL_RSVD_SOBS - 1);
1402
1403 gaudi_collective_map_sobs(hdev, stream);
1404
1405 dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1406 cprop->curr_sob_group_idx[stream], stream);
1407 }
1408
1409 mb();
1410 hl_fence_put(cs->signal_fence);
1411 cs->signal_fence = NULL;
1412
1413 return 0;
1414 }
1415
gaudi_get_patched_cb_extra_size(u32 user_cb_size)1416 static u32 gaudi_get_patched_cb_extra_size(u32 user_cb_size)
1417 {
1418 u32 cacheline_end, additional_commands;
1419
1420 cacheline_end = round_up(user_cb_size, DEVICE_CACHE_LINE_SIZE);
1421 additional_commands = sizeof(struct packet_msg_prot) * 2;
1422
1423 if (user_cb_size + additional_commands > cacheline_end)
1424 return cacheline_end - user_cb_size + additional_commands;
1425 else
1426 return additional_commands;
1427 }
1428
gaudi_collective_wait_create_job(struct hl_device * hdev,struct hl_ctx * ctx,struct hl_cs * cs,enum hl_collective_mode mode,u32 queue_id,u32 wait_queue_id,u32 encaps_signal_offset)1429 static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1430 struct hl_ctx *ctx, struct hl_cs *cs,
1431 enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id,
1432 u32 encaps_signal_offset)
1433 {
1434 struct hw_queue_properties *hw_queue_prop;
1435 struct hl_cs_counters_atomic *cntr;
1436 struct hl_cs_job *job;
1437 struct hl_cb *cb;
1438 u32 cb_size;
1439 bool patched_cb;
1440
1441 cntr = &hdev->aggregated_cs_counters;
1442
1443 if (mode == HL_COLLECTIVE_MASTER) {
1444 /* CB size of collective master queue contains
1445 * 4 msg short packets for monitor 1 configuration
1446 * 1 fence packet
1447 * 4 msg short packets for monitor 2 configuration
1448 * 1 fence packet
1449 * 2 msg prot packets for completion and MSI
1450 */
1451 cb_size = sizeof(struct packet_msg_short) * 8 +
1452 sizeof(struct packet_fence) * 2 +
1453 sizeof(struct packet_msg_prot) * 2;
1454 patched_cb = true;
1455 } else {
1456 /* CB size of collective slave queues contains
1457 * 4 msg short packets for monitor configuration
1458 * 1 fence packet
1459 * 1 additional msg short packet for sob signal
1460 */
1461 cb_size = sizeof(struct packet_msg_short) * 5 +
1462 sizeof(struct packet_fence);
1463 patched_cb = false;
1464 }
1465
1466 hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1467 job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1468 if (!job) {
1469 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1470 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1471 dev_err(hdev->dev, "Failed to allocate a new job\n");
1472 return -ENOMEM;
1473 }
1474
1475 /* Allocate internal mapped CB for non patched CBs */
1476 cb = hl_cb_kernel_create(hdev, cb_size, !patched_cb);
1477 if (!cb) {
1478 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1479 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1480 kfree(job);
1481 return -EFAULT;
1482 }
1483
1484 job->id = 0;
1485 job->cs = cs;
1486 job->user_cb = cb;
1487 atomic_inc(&job->user_cb->cs_cnt);
1488 job->user_cb_size = cb_size;
1489 job->hw_queue_id = queue_id;
1490
1491 /* since its guaranteed to have only one chunk in the collective wait
1492 * cs, we can use this chunk to set the encapsulated signal offset
1493 * in the jobs.
1494 */
1495 if (cs->encaps_signals)
1496 job->encaps_sig_wait_offset = encaps_signal_offset;
1497
1498 /*
1499 * No need in parsing, user CB is the patched CB.
1500 * We call hl_cb_destroy() out of two reasons - we don't need
1501 * the CB in the CB idr anymore and to decrement its refcount as
1502 * it was incremented inside hl_cb_kernel_create().
1503 */
1504 if (patched_cb)
1505 job->patched_cb = job->user_cb;
1506 else
1507 job->patched_cb = NULL;
1508
1509 job->job_cb_size = job->user_cb_size;
1510 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
1511
1512 /* increment refcount as for external queues we get completion */
1513 if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1514 cs_get(cs);
1515
1516 cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1517
1518 list_add_tail(&job->cs_node, &cs->job_list);
1519
1520 hl_debugfs_add_job(hdev, job);
1521
1522 return 0;
1523 }
1524
gaudi_collective_wait_create_jobs(struct hl_device * hdev,struct hl_ctx * ctx,struct hl_cs * cs,u32 wait_queue_id,u32 collective_engine_id,u32 encaps_signal_offset)1525 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1526 struct hl_ctx *ctx, struct hl_cs *cs,
1527 u32 wait_queue_id, u32 collective_engine_id,
1528 u32 encaps_signal_offset)
1529 {
1530 struct gaudi_device *gaudi = hdev->asic_specific;
1531 struct hw_queue_properties *hw_queue_prop;
1532 u32 queue_id, collective_queue, num_jobs;
1533 u32 stream, nic_queue, nic_idx = 0;
1534 bool skip;
1535 int i, rc = 0;
1536
1537 /* Verify wait queue id is configured as master */
1538 hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1539 if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1540 dev_err(hdev->dev,
1541 "Queue %d is not configured as collective master\n",
1542 wait_queue_id);
1543 return -EINVAL;
1544 }
1545
1546 /* Verify engine id is supported */
1547 if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1548 collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1549 dev_err(hdev->dev,
1550 "Collective wait does not support engine %u\n",
1551 collective_engine_id);
1552 return -EINVAL;
1553 }
1554
1555 stream = wait_queue_id % 4;
1556
1557 if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1558 collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1559 else
1560 collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1561
1562 num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1563 nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1564
1565 /* First job goes to the collective master queue, it will wait for
1566 * the collective slave queues to finish execution.
1567 * The synchronization is done using two monitors:
1568 * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1569 * reduction engine (DMA5/TPC7).
1570 *
1571 * Rest of the jobs goes to the collective slave queues which will
1572 * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1573 */
1574 for (i = 0 ; i < num_jobs ; i++) {
1575 if (i == 0) {
1576 queue_id = wait_queue_id;
1577 rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1578 HL_COLLECTIVE_MASTER, queue_id,
1579 wait_queue_id, encaps_signal_offset);
1580 } else {
1581 if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1582 if (gaudi->hw_cap_initialized &
1583 BIT(HW_CAP_NIC_SHIFT + nic_idx))
1584 skip = false;
1585 else
1586 skip = true;
1587
1588 queue_id = nic_queue;
1589 nic_queue += 4;
1590 nic_idx++;
1591
1592 if (skip)
1593 continue;
1594 } else {
1595 queue_id = collective_queue;
1596 }
1597
1598 rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1599 HL_COLLECTIVE_SLAVE, queue_id,
1600 wait_queue_id, encaps_signal_offset);
1601 }
1602
1603 if (rc)
1604 return rc;
1605 }
1606
1607 return rc;
1608 }
1609
gaudi_late_init(struct hl_device * hdev)1610 static int gaudi_late_init(struct hl_device *hdev)
1611 {
1612 struct gaudi_device *gaudi = hdev->asic_specific;
1613 int rc;
1614
1615 rc = gaudi->cpucp_info_get(hdev);
1616 if (rc) {
1617 dev_err(hdev->dev, "Failed to get cpucp info\n");
1618 return rc;
1619 }
1620
1621 if ((hdev->card_type == cpucp_card_type_pci) &&
1622 (hdev->nic_ports_mask & 0x3)) {
1623 dev_info(hdev->dev,
1624 "PCI card detected, only 8 ports are enabled\n");
1625 hdev->nic_ports_mask &= ~0x3;
1626
1627 /* Stop and disable unused NIC QMANs */
1628 WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1629 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1630 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1631
1632 WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1633 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1634 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1635
1636 WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1637 WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1638
1639 gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1640 }
1641
1642 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS, 0x0);
1643 if (rc) {
1644 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
1645 return rc;
1646 }
1647
1648 /* Scrub both SRAM and DRAM */
1649 rc = hdev->asic_funcs->scrub_device_mem(hdev);
1650 if (rc)
1651 goto disable_pci_access;
1652
1653 rc = gaudi_fetch_psoc_frequency(hdev);
1654 if (rc) {
1655 dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1656 goto disable_pci_access;
1657 }
1658
1659 rc = gaudi_mmu_clear_pgt_range(hdev);
1660 if (rc) {
1661 dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1662 goto disable_pci_access;
1663 }
1664
1665 rc = gaudi_init_tpc_mem(hdev);
1666 if (rc) {
1667 dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1668 goto disable_pci_access;
1669 }
1670
1671 rc = gaudi_collective_init(hdev);
1672 if (rc) {
1673 dev_err(hdev->dev, "Failed to init collective\n");
1674 goto disable_pci_access;
1675 }
1676
1677 /* We only support a single ASID for the user, so for the sake of optimization, just
1678 * initialize the ASID one time during device initialization with the fixed value of 1
1679 */
1680 gaudi_mmu_prepare(hdev, 1);
1681
1682 hl_fw_set_pll_profile(hdev);
1683
1684 return 0;
1685
1686 disable_pci_access:
1687 hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
1688
1689 return rc;
1690 }
1691
gaudi_late_fini(struct hl_device * hdev)1692 static void gaudi_late_fini(struct hl_device *hdev)
1693 {
1694 hl_hwmon_release_resources(hdev);
1695 }
1696
gaudi_alloc_cpu_accessible_dma_mem(struct hl_device * hdev)1697 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1698 {
1699 dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1700 void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1701 int i, j, rc = 0;
1702
1703 /*
1704 * The device CPU works with 40-bits addresses, while bit 39 must be set
1705 * to '1' when accessing the host.
1706 * Bits 49:39 of the full host address are saved for a later
1707 * configuration of the HW to perform extension to 50 bits.
1708 * Because there is a single HW register that holds the extension bits,
1709 * these bits must be identical in all allocated range.
1710 */
1711
1712 for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1713 virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE,
1714 &dma_addr_arr[i],
1715 GFP_KERNEL | __GFP_ZERO);
1716 if (!virt_addr_arr[i]) {
1717 rc = -ENOMEM;
1718 goto free_dma_mem_arr;
1719 }
1720
1721 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1722 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1723 GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1724 break;
1725 }
1726
1727 if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1728 dev_err(hdev->dev,
1729 "MSB of CPU accessible DMA memory are not identical in all range\n");
1730 rc = -EFAULT;
1731 goto free_dma_mem_arr;
1732 }
1733
1734 hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1735 hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1736 hdev->cpu_pci_msb_addr =
1737 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1738
1739 if (!hdev->asic_prop.fw_security_enabled)
1740 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
1741
1742 free_dma_mem_arr:
1743 for (j = 0 ; j < i ; j++)
1744 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j],
1745 dma_addr_arr[j]);
1746
1747 return rc;
1748 }
1749
gaudi_free_internal_qmans_pq_mem(struct hl_device * hdev)1750 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1751 {
1752 struct gaudi_device *gaudi = hdev->asic_specific;
1753 struct gaudi_internal_qman_info *q;
1754 u32 i;
1755
1756 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1757 q = &gaudi->internal_qmans[i];
1758 if (!q->pq_kernel_addr)
1759 continue;
1760 hl_asic_dma_free_coherent(hdev, q->pq_size, q->pq_kernel_addr, q->pq_dma_addr);
1761 }
1762 }
1763
gaudi_alloc_internal_qmans_pq_mem(struct hl_device * hdev)1764 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1765 {
1766 struct gaudi_device *gaudi = hdev->asic_specific;
1767 struct gaudi_internal_qman_info *q;
1768 int rc, i;
1769
1770 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1771 if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1772 continue;
1773
1774 q = &gaudi->internal_qmans[i];
1775
1776 switch (i) {
1777 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
1778 q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1779 break;
1780 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1781 q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1782 break;
1783 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1784 q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1785 break;
1786 case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1787 q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1788 break;
1789 default:
1790 dev_err(hdev->dev, "Bad internal queue index %d", i);
1791 rc = -EINVAL;
1792 goto free_internal_qmans_pq_mem;
1793 }
1794
1795 q->pq_kernel_addr = hl_asic_dma_alloc_coherent(hdev, q->pq_size, &q->pq_dma_addr,
1796 GFP_KERNEL | __GFP_ZERO);
1797 if (!q->pq_kernel_addr) {
1798 rc = -ENOMEM;
1799 goto free_internal_qmans_pq_mem;
1800 }
1801 }
1802
1803 return 0;
1804
1805 free_internal_qmans_pq_mem:
1806 gaudi_free_internal_qmans_pq_mem(hdev);
1807 return rc;
1808 }
1809
gaudi_set_pci_memory_regions(struct hl_device * hdev)1810 static void gaudi_set_pci_memory_regions(struct hl_device *hdev)
1811 {
1812 struct asic_fixed_properties *prop = &hdev->asic_prop;
1813 struct pci_mem_region *region;
1814
1815 /* CFG */
1816 region = &hdev->pci_mem_region[PCI_REGION_CFG];
1817 region->region_base = CFG_BASE;
1818 region->region_size = CFG_SIZE;
1819 region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR;
1820 region->bar_size = CFG_BAR_SIZE;
1821 region->bar_id = CFG_BAR_ID;
1822 region->used = 1;
1823
1824 /* SRAM */
1825 region = &hdev->pci_mem_region[PCI_REGION_SRAM];
1826 region->region_base = SRAM_BASE_ADDR;
1827 region->region_size = SRAM_SIZE;
1828 region->offset_in_bar = 0;
1829 region->bar_size = SRAM_BAR_SIZE;
1830 region->bar_id = SRAM_BAR_ID;
1831 region->used = 1;
1832
1833 /* DRAM */
1834 region = &hdev->pci_mem_region[PCI_REGION_DRAM];
1835 region->region_base = DRAM_PHYS_BASE;
1836 region->region_size = hdev->asic_prop.dram_size;
1837 region->offset_in_bar = 0;
1838 region->bar_size = prop->dram_pci_bar_size;
1839 region->bar_id = HBM_BAR_ID;
1840 region->used = 1;
1841
1842 /* SP SRAM */
1843 region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM];
1844 region->region_base = PSOC_SCRATCHPAD_ADDR;
1845 region->region_size = PSOC_SCRATCHPAD_SIZE;
1846 region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR;
1847 region->bar_size = CFG_BAR_SIZE;
1848 region->bar_id = CFG_BAR_ID;
1849 region->used = 1;
1850 }
1851
gaudi_sw_init(struct hl_device * hdev)1852 static int gaudi_sw_init(struct hl_device *hdev)
1853 {
1854 struct gaudi_device *gaudi;
1855 u32 i, event_id = 0;
1856 int rc;
1857
1858 /* Allocate device structure */
1859 gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1860 if (!gaudi)
1861 return -ENOMEM;
1862
1863 for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1864 if (gaudi_irq_map_table[i].valid) {
1865 if (event_id == GAUDI_EVENT_SIZE) {
1866 dev_err(hdev->dev,
1867 "Event array exceeds the limit of %u events\n",
1868 GAUDI_EVENT_SIZE);
1869 rc = -EINVAL;
1870 goto free_gaudi_device;
1871 }
1872
1873 gaudi->events[event_id++] =
1874 gaudi_irq_map_table[i].fc_id;
1875 }
1876 }
1877
1878 gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1879
1880 hdev->asic_specific = gaudi;
1881
1882 /* Create DMA pool for small allocations */
1883 hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1884 &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1885 if (!hdev->dma_pool) {
1886 dev_err(hdev->dev, "failed to create DMA pool\n");
1887 rc = -ENOMEM;
1888 goto free_gaudi_device;
1889 }
1890
1891 rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1892 if (rc)
1893 goto free_dma_pool;
1894
1895 hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1896 if (!hdev->cpu_accessible_dma_pool) {
1897 dev_err(hdev->dev,
1898 "Failed to create CPU accessible DMA pool\n");
1899 rc = -ENOMEM;
1900 goto free_cpu_dma_mem;
1901 }
1902
1903 rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1904 (uintptr_t) hdev->cpu_accessible_dma_mem,
1905 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1906 if (rc) {
1907 dev_err(hdev->dev,
1908 "Failed to add memory to CPU accessible DMA pool\n");
1909 rc = -EFAULT;
1910 goto free_cpu_accessible_dma_pool;
1911 }
1912
1913 rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1914 if (rc)
1915 goto free_cpu_accessible_dma_pool;
1916
1917 spin_lock_init(&gaudi->hw_queues_lock);
1918
1919 hdev->supports_sync_stream = true;
1920 hdev->supports_coresight = true;
1921 hdev->supports_staged_submission = true;
1922 hdev->supports_wait_for_multi_cs = true;
1923
1924 hdev->asic_funcs->set_pci_memory_regions(hdev);
1925 hdev->stream_master_qid_arr =
1926 hdev->asic_funcs->get_stream_master_qid_arr();
1927 hdev->stream_master_qid_arr_size = GAUDI_STREAM_MASTER_ARR_SIZE;
1928
1929 return 0;
1930
1931 free_cpu_accessible_dma_pool:
1932 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1933 free_cpu_dma_mem:
1934 if (!hdev->asic_prop.fw_security_enabled)
1935 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1936 hdev->cpu_pci_msb_addr);
1937 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1938 hdev->cpu_accessible_dma_address);
1939 free_dma_pool:
1940 dma_pool_destroy(hdev->dma_pool);
1941 free_gaudi_device:
1942 kfree(gaudi);
1943 return rc;
1944 }
1945
gaudi_sw_fini(struct hl_device * hdev)1946 static int gaudi_sw_fini(struct hl_device *hdev)
1947 {
1948 struct gaudi_device *gaudi = hdev->asic_specific;
1949
1950 gaudi_free_internal_qmans_pq_mem(hdev);
1951
1952 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1953
1954 if (!hdev->asic_prop.fw_security_enabled)
1955 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1956 hdev->cpu_pci_msb_addr);
1957
1958 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1959 hdev->cpu_accessible_dma_address);
1960
1961 dma_pool_destroy(hdev->dma_pool);
1962
1963 kfree(gaudi);
1964
1965 return 0;
1966 }
1967
gaudi_irq_handler_single(int irq,void * arg)1968 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1969 {
1970 struct hl_device *hdev = arg;
1971 int i;
1972
1973 if (hdev->disabled)
1974 return IRQ_HANDLED;
1975
1976 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1977 hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1978
1979 hl_irq_handler_eq(irq, &hdev->event_queue);
1980
1981 return IRQ_HANDLED;
1982 }
1983
1984 /*
1985 * For backward compatibility, new MSI interrupts should be set after the
1986 * existing CPU and NIC interrupts.
1987 */
gaudi_pci_irq_vector(struct hl_device * hdev,unsigned int nr,bool cpu_eq)1988 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1989 bool cpu_eq)
1990 {
1991 int msi_vec;
1992
1993 if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1994 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1995 GAUDI_EVENT_QUEUE_MSI_IDX);
1996
1997 msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1998 (nr + NIC_NUMBER_OF_ENGINES + 1);
1999
2000 return pci_irq_vector(hdev->pdev, msi_vec);
2001 }
2002
gaudi_enable_msi_single(struct hl_device * hdev)2003 static int gaudi_enable_msi_single(struct hl_device *hdev)
2004 {
2005 int rc, irq;
2006
2007 dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
2008
2009 irq = gaudi_pci_irq_vector(hdev, 0, false);
2010 rc = request_irq(irq, gaudi_irq_handler_single, 0,
2011 "gaudi single msi", hdev);
2012 if (rc)
2013 dev_err(hdev->dev,
2014 "Failed to request single MSI IRQ\n");
2015
2016 return rc;
2017 }
2018
gaudi_enable_msi(struct hl_device * hdev)2019 static int gaudi_enable_msi(struct hl_device *hdev)
2020 {
2021 struct gaudi_device *gaudi = hdev->asic_specific;
2022 int rc;
2023
2024 if (gaudi->hw_cap_initialized & HW_CAP_MSI)
2025 return 0;
2026
2027 rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI);
2028 if (rc < 0) {
2029 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
2030 return rc;
2031 }
2032
2033 rc = gaudi_enable_msi_single(hdev);
2034 if (rc)
2035 goto free_pci_irq_vectors;
2036
2037 gaudi->hw_cap_initialized |= HW_CAP_MSI;
2038
2039 return 0;
2040
2041 free_pci_irq_vectors:
2042 pci_free_irq_vectors(hdev->pdev);
2043 return rc;
2044 }
2045
gaudi_sync_irqs(struct hl_device * hdev)2046 static void gaudi_sync_irqs(struct hl_device *hdev)
2047 {
2048 struct gaudi_device *gaudi = hdev->asic_specific;
2049
2050 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2051 return;
2052
2053 /* Wait for all pending IRQs to be finished */
2054 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
2055 }
2056
gaudi_disable_msi(struct hl_device * hdev)2057 static void gaudi_disable_msi(struct hl_device *hdev)
2058 {
2059 struct gaudi_device *gaudi = hdev->asic_specific;
2060
2061 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2062 return;
2063
2064 gaudi_sync_irqs(hdev);
2065 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
2066 pci_free_irq_vectors(hdev->pdev);
2067
2068 gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
2069 }
2070
gaudi_init_scrambler_sram(struct hl_device * hdev)2071 static void gaudi_init_scrambler_sram(struct hl_device *hdev)
2072 {
2073 struct gaudi_device *gaudi = hdev->asic_specific;
2074
2075 if (hdev->asic_prop.fw_security_enabled)
2076 return;
2077
2078 if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
2079 CPU_BOOT_DEV_STS0_SRAM_SCR_EN)
2080 return;
2081
2082 if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
2083 return;
2084
2085 WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2086 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2087 WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2088 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2089 WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2090 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2091 WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2092 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2093 WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2094 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2095 WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2096 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2097 WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2098 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2099 WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2100 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2101
2102 WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2103 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2104 WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2105 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2106 WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2107 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2108 WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2109 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2110 WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2111 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2112 WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2113 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2114 WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2115 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2116 WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2117 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2118
2119 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
2120 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2121 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
2122 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2123 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
2124 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2125 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
2126 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2127 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
2128 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2129 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
2130 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2131 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
2132 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2133 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
2134 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2135
2136 gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
2137 }
2138
gaudi_init_scrambler_hbm(struct hl_device * hdev)2139 static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
2140 {
2141 struct gaudi_device *gaudi = hdev->asic_specific;
2142
2143 if (hdev->asic_prop.fw_security_enabled)
2144 return;
2145
2146 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2147 CPU_BOOT_DEV_STS0_DRAM_SCR_EN)
2148 return;
2149
2150 if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
2151 return;
2152
2153 WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
2154 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2155 WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
2156 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2157 WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
2158 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2159 WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
2160 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2161 WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
2162 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2163 WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
2164 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2165 WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
2166 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2167 WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
2168 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2169
2170 WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
2171 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2172 WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
2173 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2174 WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
2175 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2176 WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
2177 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2178 WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
2179 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2180 WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
2181 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2182 WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
2183 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2184 WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
2185 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2186
2187 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
2188 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2189 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
2190 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2191 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
2192 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2193 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
2194 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2195 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
2196 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2197 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
2198 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2199 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
2200 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2201 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
2202 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2203
2204 gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
2205 }
2206
gaudi_init_e2e(struct hl_device * hdev)2207 static void gaudi_init_e2e(struct hl_device *hdev)
2208 {
2209 if (hdev->asic_prop.fw_security_enabled)
2210 return;
2211
2212 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2213 CPU_BOOT_DEV_STS0_E2E_CRED_EN)
2214 return;
2215
2216 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
2217 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
2218 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
2219 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
2220
2221 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2222 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2223 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2224 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2225
2226 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2227 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2228 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2229 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2230
2231 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2232 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2233 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2234 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2235
2236 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2237 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2238 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2239 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2240
2241 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2242 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2243 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2244 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2245
2246 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2247 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2248 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2249 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2250
2251 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2252 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2253 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2254 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2255
2256 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2257 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2258 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2259 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2260
2261 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2262 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2263 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2264 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2265
2266 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2267 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2268 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2269 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2270
2271 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2272 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2273 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2274 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2275
2276 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2277 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2278 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2279 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2280
2281 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2282 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2283 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2284 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2285
2286 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2287 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2288 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2289 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2290
2291 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2292 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2293 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2294 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2295
2296 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2297 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2298 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2299 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2300
2301 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2302 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2303 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2304 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2305
2306 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2307 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2308 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2309 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2310
2311 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2312 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2313 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2314 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2315
2316 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2317 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2318 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2319 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2320
2321 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2322 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2323 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2324 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2325
2326 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2327 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2328 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2329 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2330
2331 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2332 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2333 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2334 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2335
2336 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2337 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2338 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2339 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2340
2341 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2342 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2343 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2344 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2345
2346 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2347 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2348 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2349 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2350
2351 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2352 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2353 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2354 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2355
2356 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2357 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2358 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2359 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2360
2361 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2362 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2363 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2364 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2365
2366 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2367 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2368 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2369 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2370
2371 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2372 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2373 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2374 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2375
2376 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2377 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2378 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2379 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2380
2381 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2382 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2383 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2384 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2385
2386 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2387 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2388 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2389 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2390
2391 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2392 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2393 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2394 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2395
2396 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2397 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2398 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2399 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2400
2401 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2402 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2403 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2404 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2405
2406 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2407 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2408 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2409 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2410
2411 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2412 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2413 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2414 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2415
2416 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2417 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2418 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2419 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2420
2421 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2422 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2423 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2424 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2425
2426 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2427 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2428 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2429 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2430
2431 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2432 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2433 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2434 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2435
2436 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2437 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2438 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2439 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2440
2441 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2442 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2443 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2444 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2445
2446 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2447 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2448 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2449 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2450
2451 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2452 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2453 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2454 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2455 }
2456
gaudi_init_hbm_cred(struct hl_device * hdev)2457 static void gaudi_init_hbm_cred(struct hl_device *hdev)
2458 {
2459 u32 hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2460
2461 if (hdev->asic_prop.fw_security_enabled)
2462 return;
2463
2464 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2465 CPU_BOOT_DEV_STS0_HBM_CRED_EN)
2466 return;
2467
2468 hbm0_wr = 0x33333333;
2469 hbm0_rd = 0x77777777;
2470 hbm1_wr = 0x55555555;
2471 hbm1_rd = 0xDDDDDDDD;
2472
2473 WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2474 WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2475 WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2476 WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2477
2478 WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2479 WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2480 WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2481 WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2482
2483 WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2484 WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2485 WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2486 WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2487
2488 WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2489 WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2490 WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2491 WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2492
2493 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2494 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2495 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2496 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2497 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2498 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2499 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2500 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2501 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2502 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2503 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2504 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2505
2506 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2507 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2508 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2509 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2510 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2511 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2512 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2513 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2514 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2515 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2516 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2517 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2518 }
2519
gaudi_init_golden_registers(struct hl_device * hdev)2520 static void gaudi_init_golden_registers(struct hl_device *hdev)
2521 {
2522 u32 tpc_offset;
2523 int tpc_id, i;
2524
2525 gaudi_init_e2e(hdev);
2526 gaudi_init_hbm_cred(hdev);
2527
2528 for (tpc_id = 0, tpc_offset = 0;
2529 tpc_id < TPC_NUMBER_OF_ENGINES;
2530 tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2531 /* Mask all arithmetic interrupts from TPC */
2532 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFE);
2533 /* Set 16 cache lines */
2534 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2535 ICACHE_FETCH_LINE_NUM, 2);
2536 }
2537
2538 /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2539 for (i = 0 ; i < 128 ; i += 8)
2540 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2541
2542 WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2543 WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2544 WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2545 WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2546 }
2547
gaudi_init_pci_dma_qman(struct hl_device * hdev,int dma_id,int qman_id,dma_addr_t qman_pq_addr)2548 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2549 int qman_id, dma_addr_t qman_pq_addr)
2550 {
2551 struct cpu_dyn_regs *dyn_regs =
2552 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2553 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2554 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2555 u32 q_off, dma_qm_offset;
2556 u32 dma_qm_err_cfg, irq_handler_offset;
2557
2558 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2559
2560 mtr_base_en_lo = lower_32_bits(CFG_BASE +
2561 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2562 mtr_base_en_hi = upper_32_bits(CFG_BASE +
2563 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2564 so_base_en_lo = lower_32_bits(CFG_BASE +
2565 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2566 so_base_en_hi = upper_32_bits(CFG_BASE +
2567 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2568 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2569 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2570 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2571 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2572 so_base_ws_lo = lower_32_bits(CFG_BASE +
2573 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2574 so_base_ws_hi = upper_32_bits(CFG_BASE +
2575 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2576
2577 q_off = dma_qm_offset + qman_id * 4;
2578
2579 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2580 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2581
2582 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2583 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2584 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2585
2586 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2587 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2588 QMAN_LDMA_SRC_OFFSET);
2589 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2590 QMAN_LDMA_DST_OFFSET);
2591
2592 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2593 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2594 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2595 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2596 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2597 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2598 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2599 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2600
2601 WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2602
2603 /* The following configuration is needed only once per QMAN */
2604 if (qman_id == 0) {
2605 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2606 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2607 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2608
2609 /* Configure RAZWI IRQ */
2610 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2611 if (hdev->stop_on_err)
2612 dma_qm_err_cfg |=
2613 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2614
2615 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2616
2617 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2618 lower_32_bits(CFG_BASE + irq_handler_offset));
2619 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2620 upper_32_bits(CFG_BASE + irq_handler_offset));
2621
2622 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2623 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2624 dma_id);
2625
2626 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2627 QM_ARB_ERR_MSG_EN_MASK);
2628
2629 /* Set timeout to maximum */
2630 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
2631
2632 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2633 QMAN_EXTERNAL_MAKE_TRUSTED);
2634
2635 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2636 }
2637 }
2638
gaudi_init_dma_core(struct hl_device * hdev,int dma_id)2639 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2640 {
2641 struct cpu_dyn_regs *dyn_regs =
2642 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2643 u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2644 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2645 u32 irq_handler_offset;
2646
2647 /* Set to maximum possible according to physical size */
2648 WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2649 WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2650
2651 /* WA for H/W bug H3-2116 */
2652 WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2653
2654 /* STOP_ON bit implies no completion to operation in case of RAZWI */
2655 if (hdev->stop_on_err)
2656 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2657
2658 WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2659
2660 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2661 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2662 le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
2663
2664 WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2665 lower_32_bits(CFG_BASE + irq_handler_offset));
2666 WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2667 upper_32_bits(CFG_BASE + irq_handler_offset));
2668
2669 WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2670 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2671 WREG32(mmDMA0_CORE_PROT + dma_offset,
2672 1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2673 /* If the channel is secured, it should be in MMU bypass mode */
2674 WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2675 1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2676 WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2677 }
2678
gaudi_enable_qman(struct hl_device * hdev,int dma_id,u32 enable_mask)2679 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2680 u32 enable_mask)
2681 {
2682 u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2683
2684 WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2685 }
2686
gaudi_init_pci_dma_qmans(struct hl_device * hdev)2687 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2688 {
2689 struct gaudi_device *gaudi = hdev->asic_specific;
2690 struct hl_hw_queue *q;
2691 int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2692
2693 if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2694 return;
2695
2696 for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2697 dma_id = gaudi_dma_assignment[i];
2698 /*
2699 * For queues after the CPU Q need to add 1 to get the correct
2700 * queue. In addition, need to add the CPU EQ and NIC IRQs in
2701 * order to get the correct MSI register.
2702 */
2703 if (dma_id > 1) {
2704 cpu_skip = 1;
2705 nic_skip = NIC_NUMBER_OF_ENGINES;
2706 } else {
2707 cpu_skip = 0;
2708 nic_skip = 0;
2709 }
2710
2711 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2712 q_idx = 4 * dma_id + j + cpu_skip;
2713 q = &hdev->kernel_queues[q_idx];
2714 q->cq_id = cq_id++;
2715 q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2716 gaudi_init_pci_dma_qman(hdev, dma_id, j,
2717 q->bus_address);
2718 }
2719
2720 gaudi_init_dma_core(hdev, dma_id);
2721
2722 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2723 }
2724
2725 gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2726 }
2727
gaudi_init_hbm_dma_qman(struct hl_device * hdev,int dma_id,int qman_id,u64 qman_base_addr)2728 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2729 int qman_id, u64 qman_base_addr)
2730 {
2731 struct cpu_dyn_regs *dyn_regs =
2732 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2733 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2734 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2735 u32 dma_qm_err_cfg, irq_handler_offset;
2736 u32 q_off, dma_qm_offset;
2737
2738 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2739
2740 mtr_base_en_lo = lower_32_bits(CFG_BASE +
2741 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2742 mtr_base_en_hi = upper_32_bits(CFG_BASE +
2743 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2744 so_base_en_lo = lower_32_bits(CFG_BASE +
2745 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2746 so_base_en_hi = upper_32_bits(CFG_BASE +
2747 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2748 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2749 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2750 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2751 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2752 so_base_ws_lo = lower_32_bits(CFG_BASE +
2753 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2754 so_base_ws_hi = upper_32_bits(CFG_BASE +
2755 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2756
2757 q_off = dma_qm_offset + qman_id * 4;
2758
2759 if (qman_id < 4) {
2760 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2761 lower_32_bits(qman_base_addr));
2762 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2763 upper_32_bits(qman_base_addr));
2764
2765 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2766 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2767 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2768
2769 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2770 QMAN_CPDMA_SIZE_OFFSET);
2771 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2772 QMAN_CPDMA_SRC_OFFSET);
2773 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2774 QMAN_CPDMA_DST_OFFSET);
2775 } else {
2776 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2777 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2778 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2779
2780 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2781 QMAN_LDMA_SIZE_OFFSET);
2782 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2783 QMAN_LDMA_SRC_OFFSET);
2784 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2785 QMAN_LDMA_DST_OFFSET);
2786
2787 /* Configure RAZWI IRQ */
2788 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2789 if (hdev->stop_on_err)
2790 dma_qm_err_cfg |=
2791 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2792
2793 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2794
2795 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2796 lower_32_bits(CFG_BASE + irq_handler_offset));
2797 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2798 upper_32_bits(CFG_BASE + irq_handler_offset));
2799
2800 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2801 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2802 dma_id);
2803
2804 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2805 QM_ARB_ERR_MSG_EN_MASK);
2806
2807 /* Set timeout to maximum */
2808 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
2809
2810 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2811 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2812 QMAN_INTERNAL_MAKE_TRUSTED);
2813 }
2814
2815 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2816 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2817 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2818 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2819
2820 /* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
2821 if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
2822 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2823 mtr_base_ws_lo);
2824 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2825 mtr_base_ws_hi);
2826 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2827 so_base_ws_lo);
2828 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2829 so_base_ws_hi);
2830 }
2831 }
2832
gaudi_init_hbm_dma_qmans(struct hl_device * hdev)2833 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2834 {
2835 struct gaudi_device *gaudi = hdev->asic_specific;
2836 struct gaudi_internal_qman_info *q;
2837 u64 qman_base_addr;
2838 int i, j, dma_id, internal_q_index;
2839
2840 if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2841 return;
2842
2843 for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2844 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2845
2846 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2847 /*
2848 * Add the CPU queue in order to get the correct queue
2849 * number as all internal queue are placed after it
2850 */
2851 internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2852
2853 q = &gaudi->internal_qmans[internal_q_index];
2854 qman_base_addr = (u64) q->pq_dma_addr;
2855 gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2856 qman_base_addr);
2857 }
2858
2859 /* Initializing lower CP for HBM DMA QMAN */
2860 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2861
2862 gaudi_init_dma_core(hdev, dma_id);
2863
2864 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2865 }
2866
2867 gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2868 }
2869
gaudi_init_mme_qman(struct hl_device * hdev,u32 mme_offset,int qman_id,u64 qman_base_addr)2870 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2871 int qman_id, u64 qman_base_addr)
2872 {
2873 struct cpu_dyn_regs *dyn_regs =
2874 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2875 u32 mtr_base_lo, mtr_base_hi;
2876 u32 so_base_lo, so_base_hi;
2877 u32 irq_handler_offset;
2878 u32 q_off, mme_id;
2879 u32 mme_qm_err_cfg;
2880
2881 mtr_base_lo = lower_32_bits(CFG_BASE +
2882 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2883 mtr_base_hi = upper_32_bits(CFG_BASE +
2884 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2885 so_base_lo = lower_32_bits(CFG_BASE +
2886 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2887 so_base_hi = upper_32_bits(CFG_BASE +
2888 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2889
2890 q_off = mme_offset + qman_id * 4;
2891
2892 if (qman_id < 4) {
2893 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2894 lower_32_bits(qman_base_addr));
2895 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2896 upper_32_bits(qman_base_addr));
2897
2898 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2899 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2900 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2901
2902 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2903 QMAN_CPDMA_SIZE_OFFSET);
2904 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2905 QMAN_CPDMA_SRC_OFFSET);
2906 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2907 QMAN_CPDMA_DST_OFFSET);
2908 } else {
2909 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2910 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2911 le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
2912
2913 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2914 QMAN_LDMA_SIZE_OFFSET);
2915 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2916 QMAN_LDMA_SRC_OFFSET);
2917 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2918 QMAN_LDMA_DST_OFFSET);
2919
2920 /* Configure RAZWI IRQ */
2921 mme_id = mme_offset /
2922 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;
2923
2924 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2925 if (hdev->stop_on_err)
2926 mme_qm_err_cfg |=
2927 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2928
2929 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
2930
2931 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
2932 lower_32_bits(CFG_BASE + irq_handler_offset));
2933 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
2934 upper_32_bits(CFG_BASE + irq_handler_offset));
2935
2936 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
2937 gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
2938 mme_id);
2939
2940 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
2941 QM_ARB_ERR_MSG_EN_MASK);
2942
2943 /* Set timeout to maximum */
2944 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset, GAUDI_ARB_WDT_TIMEOUT);
2945
2946 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
2947 WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
2948 QMAN_INTERNAL_MAKE_TRUSTED);
2949 }
2950
2951 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2952 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2953 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2954 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2955 }
2956
gaudi_init_mme_qmans(struct hl_device * hdev)2957 static void gaudi_init_mme_qmans(struct hl_device *hdev)
2958 {
2959 struct gaudi_device *gaudi = hdev->asic_specific;
2960 struct gaudi_internal_qman_info *q;
2961 u64 qman_base_addr;
2962 u32 mme_offset;
2963 int i, internal_q_index;
2964
2965 if (gaudi->hw_cap_initialized & HW_CAP_MME)
2966 return;
2967
2968 /*
2969 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
2970 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
2971 */
2972
2973 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2974
2975 for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
2976 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
2977 q = &gaudi->internal_qmans[internal_q_index];
2978 qman_base_addr = (u64) q->pq_dma_addr;
2979 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
2980 qman_base_addr);
2981 if (i == 3)
2982 mme_offset = 0;
2983 }
2984
2985 /* Initializing lower CP for MME QMANs */
2986 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2987 gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
2988 gaudi_init_mme_qman(hdev, 0, 4, 0);
2989
2990 WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2991 WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2992
2993 gaudi->hw_cap_initialized |= HW_CAP_MME;
2994 }
2995
gaudi_init_tpc_qman(struct hl_device * hdev,u32 tpc_offset,int qman_id,u64 qman_base_addr)2996 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
2997 int qman_id, u64 qman_base_addr)
2998 {
2999 struct cpu_dyn_regs *dyn_regs =
3000 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3001 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3002 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3003 u32 tpc_qm_err_cfg, irq_handler_offset;
3004 u32 q_off, tpc_id;
3005
3006 mtr_base_en_lo = lower_32_bits(CFG_BASE +
3007 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3008 mtr_base_en_hi = upper_32_bits(CFG_BASE +
3009 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3010 so_base_en_lo = lower_32_bits(CFG_BASE +
3011 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3012 so_base_en_hi = upper_32_bits(CFG_BASE +
3013 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3014 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3015 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3016 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3017 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3018 so_base_ws_lo = lower_32_bits(CFG_BASE +
3019 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3020 so_base_ws_hi = upper_32_bits(CFG_BASE +
3021 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3022
3023 q_off = tpc_offset + qman_id * 4;
3024
3025 tpc_id = tpc_offset /
3026 (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
3027
3028 if (qman_id < 4) {
3029 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
3030 lower_32_bits(qman_base_addr));
3031 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
3032 upper_32_bits(qman_base_addr));
3033
3034 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
3035 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
3036 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
3037
3038 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3039 QMAN_CPDMA_SIZE_OFFSET);
3040 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3041 QMAN_CPDMA_SRC_OFFSET);
3042 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3043 QMAN_CPDMA_DST_OFFSET);
3044 } else {
3045 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3046 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3047 le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
3048
3049 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3050 QMAN_LDMA_SIZE_OFFSET);
3051 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3052 QMAN_LDMA_SRC_OFFSET);
3053 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3054 QMAN_LDMA_DST_OFFSET);
3055
3056 /* Configure RAZWI IRQ */
3057 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3058 if (hdev->stop_on_err)
3059 tpc_qm_err_cfg |=
3060 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3061
3062 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
3063
3064 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
3065 lower_32_bits(CFG_BASE + irq_handler_offset));
3066 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
3067 upper_32_bits(CFG_BASE + irq_handler_offset));
3068
3069 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
3070 gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
3071 tpc_id);
3072
3073 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
3074 QM_ARB_ERR_MSG_EN_MASK);
3075
3076 /* Set timeout to maximum */
3077 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset, GAUDI_ARB_WDT_TIMEOUT);
3078
3079 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
3080 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
3081 QMAN_INTERNAL_MAKE_TRUSTED);
3082 }
3083
3084 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3085 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3086 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3087 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3088
3089 /* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
3090 if (tpc_id == 6) {
3091 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3092 mtr_base_ws_lo);
3093 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3094 mtr_base_ws_hi);
3095 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3096 so_base_ws_lo);
3097 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3098 so_base_ws_hi);
3099 }
3100 }
3101
gaudi_init_tpc_qmans(struct hl_device * hdev)3102 static void gaudi_init_tpc_qmans(struct hl_device *hdev)
3103 {
3104 struct gaudi_device *gaudi = hdev->asic_specific;
3105 struct gaudi_internal_qman_info *q;
3106 u64 qman_base_addr;
3107 u32 so_base_hi, tpc_offset = 0;
3108 u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
3109 mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
3110 int i, tpc_id, internal_q_index;
3111
3112 if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
3113 return;
3114
3115 so_base_hi = upper_32_bits(CFG_BASE +
3116 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3117
3118 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3119 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3120 internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
3121 tpc_id * QMAN_STREAMS + i;
3122 q = &gaudi->internal_qmans[internal_q_index];
3123 qman_base_addr = (u64) q->pq_dma_addr;
3124 gaudi_init_tpc_qman(hdev, tpc_offset, i,
3125 qman_base_addr);
3126
3127 if (i == 3) {
3128 /* Initializing lower CP for TPC QMAN */
3129 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
3130
3131 /* Enable the QMAN and TPC channel */
3132 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
3133 QMAN_TPC_ENABLE);
3134 }
3135 }
3136
3137 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
3138 so_base_hi);
3139
3140 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3141
3142 gaudi->hw_cap_initialized |=
3143 FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
3144 }
3145 }
3146
gaudi_init_nic_qman(struct hl_device * hdev,u32 nic_offset,int qman_id,u64 qman_base_addr,int nic_id)3147 static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3148 int qman_id, u64 qman_base_addr, int nic_id)
3149 {
3150 struct cpu_dyn_regs *dyn_regs =
3151 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3152 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3153 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3154 u32 nic_qm_err_cfg, irq_handler_offset;
3155 u32 q_off;
3156
3157 mtr_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3158 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3159 mtr_base_en_hi = upper_32_bits(CFG_BASE +
3160 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3161 so_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3162 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3163 so_base_en_hi = upper_32_bits(CFG_BASE +
3164 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3165 mtr_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3166 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3167 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3168 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3169 so_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3170 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3171 so_base_ws_hi = upper_32_bits(CFG_BASE +
3172 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3173
3174 q_off = nic_offset + qman_id * 4;
3175
3176 WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3177 WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3178
3179 WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3180 WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3181 WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3182
3183 WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3184 QMAN_LDMA_SIZE_OFFSET);
3185 WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3186 QMAN_LDMA_SRC_OFFSET);
3187 WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3188 QMAN_LDMA_DST_OFFSET);
3189
3190 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3191 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3192 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3193 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3194
3195 /* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3196 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3197 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3198 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3199 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
3200
3201 if (qman_id == 0) {
3202 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3203 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3204 le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
3205
3206 /* Configure RAZWI IRQ */
3207 nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3208 if (hdev->stop_on_err)
3209 nic_qm_err_cfg |=
3210 NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3211
3212 WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
3213
3214 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
3215 lower_32_bits(CFG_BASE + irq_handler_offset));
3216 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
3217 upper_32_bits(CFG_BASE + irq_handler_offset));
3218
3219 WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3220 gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3221 nic_id);
3222
3223 WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3224 QM_ARB_ERR_MSG_EN_MASK);
3225
3226 /* Set timeout to maximum */
3227 WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset, GAUDI_ARB_WDT_TIMEOUT);
3228
3229 WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3230 WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3231 QMAN_INTERNAL_MAKE_TRUSTED);
3232 }
3233 }
3234
gaudi_init_nic_qmans(struct hl_device * hdev)3235 static void gaudi_init_nic_qmans(struct hl_device *hdev)
3236 {
3237 struct gaudi_device *gaudi = hdev->asic_specific;
3238 struct gaudi_internal_qman_info *q;
3239 u64 qman_base_addr;
3240 u32 nic_offset = 0;
3241 u32 nic_delta_between_qmans =
3242 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3243 u32 nic_delta_between_nics =
3244 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3245 int i, nic_id, internal_q_index;
3246
3247 if (!hdev->nic_ports_mask)
3248 return;
3249
3250 if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3251 return;
3252
3253 dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3254
3255 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3256 if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3257 nic_offset += nic_delta_between_qmans;
3258 if (nic_id & 1) {
3259 nic_offset -= (nic_delta_between_qmans * 2);
3260 nic_offset += nic_delta_between_nics;
3261 }
3262 continue;
3263 }
3264
3265 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3266 internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3267 nic_id * QMAN_STREAMS + i;
3268 q = &gaudi->internal_qmans[internal_q_index];
3269 qman_base_addr = (u64) q->pq_dma_addr;
3270 gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3271 qman_base_addr, nic_id);
3272 }
3273
3274 /* Enable the QMAN */
3275 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3276
3277 nic_offset += nic_delta_between_qmans;
3278 if (nic_id & 1) {
3279 nic_offset -= (nic_delta_between_qmans * 2);
3280 nic_offset += nic_delta_between_nics;
3281 }
3282
3283 gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3284 }
3285 }
3286
gaudi_disable_pci_dma_qmans(struct hl_device * hdev)3287 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3288 {
3289 struct gaudi_device *gaudi = hdev->asic_specific;
3290
3291 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3292 return;
3293
3294 WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3295 WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3296 WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3297 }
3298
gaudi_disable_hbm_dma_qmans(struct hl_device * hdev)3299 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3300 {
3301 struct gaudi_device *gaudi = hdev->asic_specific;
3302
3303 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3304 return;
3305
3306 WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3307 WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3308 WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3309 WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3310 WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3311 }
3312
gaudi_disable_mme_qmans(struct hl_device * hdev)3313 static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3314 {
3315 struct gaudi_device *gaudi = hdev->asic_specific;
3316
3317 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3318 return;
3319
3320 WREG32(mmMME2_QM_GLBL_CFG0, 0);
3321 WREG32(mmMME0_QM_GLBL_CFG0, 0);
3322 }
3323
gaudi_disable_tpc_qmans(struct hl_device * hdev)3324 static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3325 {
3326 struct gaudi_device *gaudi = hdev->asic_specific;
3327 u32 tpc_offset = 0;
3328 int tpc_id;
3329
3330 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3331 return;
3332
3333 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3334 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3335 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3336 }
3337 }
3338
gaudi_disable_nic_qmans(struct hl_device * hdev)3339 static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3340 {
3341 struct gaudi_device *gaudi = hdev->asic_specific;
3342 u32 nic_mask, nic_offset = 0;
3343 u32 nic_delta_between_qmans =
3344 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3345 u32 nic_delta_between_nics =
3346 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3347 int nic_id;
3348
3349 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3350 nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3351
3352 if (gaudi->hw_cap_initialized & nic_mask)
3353 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3354
3355 nic_offset += nic_delta_between_qmans;
3356 if (nic_id & 1) {
3357 nic_offset -= (nic_delta_between_qmans * 2);
3358 nic_offset += nic_delta_between_nics;
3359 }
3360 }
3361 }
3362
gaudi_stop_pci_dma_qmans(struct hl_device * hdev)3363 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3364 {
3365 struct gaudi_device *gaudi = hdev->asic_specific;
3366
3367 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3368 return;
3369
3370 /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3371 WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3372 WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3373 WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3374 }
3375
gaudi_stop_hbm_dma_qmans(struct hl_device * hdev)3376 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3377 {
3378 struct gaudi_device *gaudi = hdev->asic_specific;
3379
3380 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3381 return;
3382
3383 /* Stop CPs of HBM DMA QMANs */
3384
3385 WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3386 WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3387 WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3388 WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3389 WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3390 }
3391
gaudi_stop_mme_qmans(struct hl_device * hdev)3392 static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3393 {
3394 struct gaudi_device *gaudi = hdev->asic_specific;
3395
3396 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3397 return;
3398
3399 /* Stop CPs of MME QMANs */
3400 WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3401 WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3402 }
3403
gaudi_stop_tpc_qmans(struct hl_device * hdev)3404 static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3405 {
3406 struct gaudi_device *gaudi = hdev->asic_specific;
3407
3408 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3409 return;
3410
3411 WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3412 WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3413 WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3414 WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3415 WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3416 WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3417 WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3418 WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3419 }
3420
gaudi_stop_nic_qmans(struct hl_device * hdev)3421 static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3422 {
3423 struct gaudi_device *gaudi = hdev->asic_specific;
3424
3425 /* Stop upper CPs of QMANs */
3426
3427 if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3428 WREG32(mmNIC0_QM0_GLBL_CFG1,
3429 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3430 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3431 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3432
3433 if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3434 WREG32(mmNIC0_QM1_GLBL_CFG1,
3435 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3436 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3437 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3438
3439 if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3440 WREG32(mmNIC1_QM0_GLBL_CFG1,
3441 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3442 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3443 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3444
3445 if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3446 WREG32(mmNIC1_QM1_GLBL_CFG1,
3447 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3448 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3449 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3450
3451 if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3452 WREG32(mmNIC2_QM0_GLBL_CFG1,
3453 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3454 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3455 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3456
3457 if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3458 WREG32(mmNIC2_QM1_GLBL_CFG1,
3459 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3460 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3461 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3462
3463 if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3464 WREG32(mmNIC3_QM0_GLBL_CFG1,
3465 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3466 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3467 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3468
3469 if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3470 WREG32(mmNIC3_QM1_GLBL_CFG1,
3471 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3472 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3473 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3474
3475 if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3476 WREG32(mmNIC4_QM0_GLBL_CFG1,
3477 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3478 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3479 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3480
3481 if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3482 WREG32(mmNIC4_QM1_GLBL_CFG1,
3483 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3484 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3485 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3486 }
3487
gaudi_pci_dma_stall(struct hl_device * hdev)3488 static void gaudi_pci_dma_stall(struct hl_device *hdev)
3489 {
3490 struct gaudi_device *gaudi = hdev->asic_specific;
3491
3492 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3493 return;
3494
3495 WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3496 WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3497 WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3498 }
3499
gaudi_hbm_dma_stall(struct hl_device * hdev)3500 static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3501 {
3502 struct gaudi_device *gaudi = hdev->asic_specific;
3503
3504 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3505 return;
3506
3507 WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3508 WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3509 WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3510 WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3511 WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3512 }
3513
gaudi_mme_stall(struct hl_device * hdev)3514 static void gaudi_mme_stall(struct hl_device *hdev)
3515 {
3516 struct gaudi_device *gaudi = hdev->asic_specific;
3517
3518 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3519 return;
3520
3521 /* WA for H3-1800 bug: do ACC and SBAB writes twice */
3522 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3523 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3524 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3525 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3526 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3527 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3528 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3529 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3530 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3531 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3532 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3533 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3534 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3535 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3536 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3537 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3538 }
3539
gaudi_tpc_stall(struct hl_device * hdev)3540 static void gaudi_tpc_stall(struct hl_device *hdev)
3541 {
3542 struct gaudi_device *gaudi = hdev->asic_specific;
3543
3544 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3545 return;
3546
3547 WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3548 WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3549 WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3550 WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3551 WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3552 WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3553 WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3554 WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3555 }
3556
gaudi_disable_clock_gating(struct hl_device * hdev)3557 static void gaudi_disable_clock_gating(struct hl_device *hdev)
3558 {
3559 u32 qman_offset;
3560 int i;
3561
3562 if (hdev->asic_prop.fw_security_enabled)
3563 return;
3564
3565 for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3566 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3567 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3568
3569 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3570 }
3571
3572 WREG32(mmMME0_QM_CGM_CFG, 0);
3573 WREG32(mmMME0_QM_CGM_CFG1, 0);
3574 WREG32(mmMME2_QM_CGM_CFG, 0);
3575 WREG32(mmMME2_QM_CGM_CFG1, 0);
3576
3577 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3578 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3579 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3580
3581 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3582 }
3583 }
3584
gaudi_enable_timestamp(struct hl_device * hdev)3585 static void gaudi_enable_timestamp(struct hl_device *hdev)
3586 {
3587 /* Disable the timestamp counter */
3588 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3589
3590 /* Zero the lower/upper parts of the 64-bit counter */
3591 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3592 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3593
3594 /* Enable the counter */
3595 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3596 }
3597
gaudi_disable_timestamp(struct hl_device * hdev)3598 static void gaudi_disable_timestamp(struct hl_device *hdev)
3599 {
3600 /* Disable the timestamp counter */
3601 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3602 }
3603
gaudi_halt_engines(struct hl_device * hdev,bool hard_reset,bool fw_reset)3604 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
3605 {
3606 u32 wait_timeout_ms;
3607
3608 if (hdev->pldm)
3609 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3610 else
3611 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
3612
3613 if (fw_reset)
3614 goto skip_engines;
3615
3616 gaudi_stop_nic_qmans(hdev);
3617 gaudi_stop_mme_qmans(hdev);
3618 gaudi_stop_tpc_qmans(hdev);
3619 gaudi_stop_hbm_dma_qmans(hdev);
3620 gaudi_stop_pci_dma_qmans(hdev);
3621
3622 msleep(wait_timeout_ms);
3623
3624 gaudi_pci_dma_stall(hdev);
3625 gaudi_hbm_dma_stall(hdev);
3626 gaudi_tpc_stall(hdev);
3627 gaudi_mme_stall(hdev);
3628
3629 msleep(wait_timeout_ms);
3630
3631 gaudi_disable_nic_qmans(hdev);
3632 gaudi_disable_mme_qmans(hdev);
3633 gaudi_disable_tpc_qmans(hdev);
3634 gaudi_disable_hbm_dma_qmans(hdev);
3635 gaudi_disable_pci_dma_qmans(hdev);
3636
3637 gaudi_disable_timestamp(hdev);
3638
3639 skip_engines:
3640 gaudi_disable_msi(hdev);
3641 }
3642
gaudi_mmu_init(struct hl_device * hdev)3643 static int gaudi_mmu_init(struct hl_device *hdev)
3644 {
3645 struct asic_fixed_properties *prop = &hdev->asic_prop;
3646 struct gaudi_device *gaudi = hdev->asic_specific;
3647 u64 hop0_addr;
3648 int rc, i;
3649
3650 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3651 return 0;
3652
3653 for (i = 0 ; i < prop->max_asid ; i++) {
3654 hop0_addr = prop->mmu_pgt_addr +
3655 (i * prop->mmu_hop_table_size);
3656
3657 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3658 if (rc) {
3659 dev_err(hdev->dev,
3660 "failed to set hop0 addr for asid %d\n", i);
3661 return rc;
3662 }
3663 }
3664
3665 /* init MMU cache manage page */
3666 WREG32(mmSTLB_CACHE_INV_BASE_39_8, prop->mmu_cache_mng_addr >> 8);
3667 WREG32(mmSTLB_CACHE_INV_BASE_49_40, prop->mmu_cache_mng_addr >> 40);
3668
3669 /* mem cache invalidation */
3670 WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1);
3671
3672 rc = hl_mmu_invalidate_cache(hdev, true, 0);
3673 if (rc)
3674 return rc;
3675
3676 WREG32(mmMMU_UP_MMU_ENABLE, 1);
3677 WREG32(mmMMU_UP_SPI_MASK, 0xF);
3678
3679 WREG32(mmSTLB_HOP_CONFIGURATION, 0x30440);
3680
3681 /*
3682 * The H/W expects the first PI after init to be 1. After wraparound
3683 * we'll write 0.
3684 */
3685 gaudi->mmu_cache_inv_pi = 1;
3686
3687 gaudi->hw_cap_initialized |= HW_CAP_MMU;
3688
3689 return 0;
3690 }
3691
gaudi_load_firmware_to_device(struct hl_device * hdev)3692 static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3693 {
3694 void __iomem *dst;
3695
3696 dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3697
3698 return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
3699 }
3700
gaudi_load_boot_fit_to_device(struct hl_device * hdev)3701 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3702 {
3703 void __iomem *dst;
3704
3705 dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3706
3707 return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
3708 }
3709
gaudi_init_dynamic_firmware_loader(struct hl_device * hdev)3710 static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev)
3711 {
3712 struct dynamic_fw_load_mgr *dynamic_loader;
3713 struct cpu_dyn_regs *dyn_regs;
3714
3715 dynamic_loader = &hdev->fw_loader.dynamic_loader;
3716
3717 /*
3718 * here we update initial values for few specific dynamic regs (as
3719 * before reading the first descriptor from FW those value has to be
3720 * hard-coded) in later stages of the protocol those values will be
3721 * updated automatically by reading the FW descriptor so data there
3722 * will always be up-to-date
3723 */
3724 dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
3725 dyn_regs->kmd_msg_to_cpu =
3726 cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
3727 dyn_regs->cpu_cmd_status_to_host =
3728 cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
3729
3730 dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC;
3731 }
3732
gaudi_init_static_firmware_loader(struct hl_device * hdev)3733 static void gaudi_init_static_firmware_loader(struct hl_device *hdev)
3734 {
3735 struct static_fw_load_mgr *static_loader;
3736
3737 static_loader = &hdev->fw_loader.static_loader;
3738
3739 static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3740 static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3741 static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
3742 static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
3743 static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3744 static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
3745 static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
3746 static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
3747 static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
3748 static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
3749 static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
3750 static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
3751 static_loader->cpu_reset_wait_msec = hdev->pldm ?
3752 GAUDI_PLDM_RESET_WAIT_MSEC :
3753 GAUDI_CPU_RESET_WAIT_MSEC;
3754 }
3755
gaudi_init_firmware_preload_params(struct hl_device * hdev)3756 static void gaudi_init_firmware_preload_params(struct hl_device *hdev)
3757 {
3758 struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
3759
3760 pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3761 pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0;
3762 pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1;
3763 pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0;
3764 pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1;
3765 pre_fw_load->wait_for_preboot_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3766 }
3767
gaudi_init_firmware_loader(struct hl_device * hdev)3768 static void gaudi_init_firmware_loader(struct hl_device *hdev)
3769 {
3770 struct asic_fixed_properties *prop = &hdev->asic_prop;
3771 struct fw_load_mgr *fw_loader = &hdev->fw_loader;
3772
3773 /* fill common fields */
3774 fw_loader->fw_comp_loaded = FW_TYPE_NONE;
3775 fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE;
3776 fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE;
3777 fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC;
3778 fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3779 fw_loader->skip_bmc = !hdev->bmc_enable;
3780 fw_loader->sram_bar_id = SRAM_BAR_ID;
3781 fw_loader->dram_bar_id = HBM_BAR_ID;
3782
3783 if (prop->dynamic_fw_load)
3784 gaudi_init_dynamic_firmware_loader(hdev);
3785 else
3786 gaudi_init_static_firmware_loader(hdev);
3787 }
3788
gaudi_init_cpu(struct hl_device * hdev)3789 static int gaudi_init_cpu(struct hl_device *hdev)
3790 {
3791 struct gaudi_device *gaudi = hdev->asic_specific;
3792 int rc;
3793
3794 if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
3795 return 0;
3796
3797 if (gaudi->hw_cap_initialized & HW_CAP_CPU)
3798 return 0;
3799
3800 /*
3801 * The device CPU works with 40 bits addresses.
3802 * This register sets the extension to 50 bits.
3803 */
3804 if (!hdev->asic_prop.fw_security_enabled)
3805 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
3806
3807 rc = hl_fw_init_cpu(hdev);
3808
3809 if (rc)
3810 return rc;
3811
3812 gaudi->hw_cap_initialized |= HW_CAP_CPU;
3813
3814 return 0;
3815 }
3816
gaudi_init_cpu_queues(struct hl_device * hdev,u32 cpu_timeout)3817 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
3818 {
3819 struct cpu_dyn_regs *dyn_regs =
3820 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3821 struct asic_fixed_properties *prop = &hdev->asic_prop;
3822 struct gaudi_device *gaudi = hdev->asic_specific;
3823 u32 status, irq_handler_offset;
3824 struct hl_eq *eq;
3825 struct hl_hw_queue *cpu_pq =
3826 &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
3827 int err;
3828
3829 if (!hdev->cpu_queues_enable)
3830 return 0;
3831
3832 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3833 return 0;
3834
3835 eq = &hdev->event_queue;
3836
3837 WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
3838 WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
3839
3840 WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
3841 WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
3842
3843 WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
3844 lower_32_bits(hdev->cpu_accessible_dma_address));
3845 WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
3846 upper_32_bits(hdev->cpu_accessible_dma_address));
3847
3848 WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
3849 WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
3850 WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
3851
3852 /* Used for EQ CI */
3853 WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
3854
3855 WREG32(mmCPU_IF_PF_PQ_PI, 0);
3856
3857 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
3858
3859 irq_handler_offset = prop->gic_interrupts_enable ?
3860 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3861 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
3862
3863 WREG32(irq_handler_offset,
3864 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
3865
3866 err = hl_poll_timeout(
3867 hdev,
3868 mmCPU_IF_QUEUE_INIT,
3869 status,
3870 (status == PQ_INIT_STATUS_READY_FOR_HOST),
3871 1000,
3872 cpu_timeout);
3873
3874 if (err) {
3875 dev_err(hdev->dev,
3876 "Failed to communicate with Device CPU (CPU-CP timeout)\n");
3877 return -EIO;
3878 }
3879
3880 /* update FW application security bits */
3881 if (prop->fw_cpu_boot_dev_sts0_valid)
3882 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
3883 if (prop->fw_cpu_boot_dev_sts1_valid)
3884 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
3885
3886 gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
3887 return 0;
3888 }
3889
gaudi_pre_hw_init(struct hl_device * hdev)3890 static void gaudi_pre_hw_init(struct hl_device *hdev)
3891 {
3892 /* Perform read from the device to make sure device is up */
3893 RREG32(mmHW_STATE);
3894
3895 if (!hdev->asic_prop.fw_security_enabled) {
3896 /* Set the access through PCI bars (Linux driver only) as
3897 * secured
3898 */
3899 WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
3900 (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
3901 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
3902
3903 /* Perform read to flush the waiting writes to ensure
3904 * configuration was set in the device
3905 */
3906 RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
3907 }
3908
3909 /*
3910 * Let's mark in the H/W that we have reached this point. We check
3911 * this value in the reset_before_init function to understand whether
3912 * we need to reset the chip before doing H/W init. This register is
3913 * cleared by the H/W upon H/W reset
3914 */
3915 WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
3916 }
3917
gaudi_hw_init(struct hl_device * hdev)3918 static int gaudi_hw_init(struct hl_device *hdev)
3919 {
3920 struct gaudi_device *gaudi = hdev->asic_specific;
3921 int rc;
3922
3923 gaudi_pre_hw_init(hdev);
3924
3925 /* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
3926 * So we set it here and if anyone tries to move it later to
3927 * a different address, there will be an error
3928 */
3929 if (hdev->asic_prop.iatu_done_by_fw)
3930 gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE;
3931
3932 /*
3933 * Before pushing u-boot/linux to device, need to set the hbm bar to
3934 * base address of dram
3935 */
3936 if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
3937 dev_err(hdev->dev,
3938 "failed to map HBM bar to DRAM base address\n");
3939 return -EIO;
3940 }
3941
3942 rc = gaudi_init_cpu(hdev);
3943 if (rc) {
3944 dev_err(hdev->dev, "failed to initialize CPU\n");
3945 return rc;
3946 }
3947
3948 /* In case the clock gating was enabled in preboot we need to disable
3949 * it here before touching the MME/TPC registers.
3950 */
3951 gaudi_disable_clock_gating(hdev);
3952
3953 /* SRAM scrambler must be initialized after CPU is running from HBM */
3954 gaudi_init_scrambler_sram(hdev);
3955
3956 /* This is here just in case we are working without CPU */
3957 gaudi_init_scrambler_hbm(hdev);
3958
3959 gaudi_init_golden_registers(hdev);
3960
3961 rc = gaudi_mmu_init(hdev);
3962 if (rc)
3963 return rc;
3964
3965 gaudi_init_security(hdev);
3966
3967 gaudi_init_pci_dma_qmans(hdev);
3968
3969 gaudi_init_hbm_dma_qmans(hdev);
3970
3971 gaudi_init_mme_qmans(hdev);
3972
3973 gaudi_init_tpc_qmans(hdev);
3974
3975 gaudi_init_nic_qmans(hdev);
3976
3977 gaudi_enable_timestamp(hdev);
3978
3979 /* MSI must be enabled before CPU queues and NIC are initialized */
3980 rc = gaudi_enable_msi(hdev);
3981 if (rc)
3982 goto disable_queues;
3983
3984 /* must be called after MSI was enabled */
3985 rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
3986 if (rc) {
3987 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
3988 rc);
3989 goto disable_msi;
3990 }
3991
3992 /* Perform read from the device to flush all configuration */
3993 RREG32(mmHW_STATE);
3994
3995 return 0;
3996
3997 disable_msi:
3998 gaudi_disable_msi(hdev);
3999 disable_queues:
4000 gaudi_disable_mme_qmans(hdev);
4001 gaudi_disable_pci_dma_qmans(hdev);
4002
4003 return rc;
4004 }
4005
gaudi_hw_fini(struct hl_device * hdev,bool hard_reset,bool fw_reset)4006 static int gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
4007 {
4008 struct cpu_dyn_regs *dyn_regs =
4009 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4010 u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;
4011 struct gaudi_device *gaudi = hdev->asic_specific;
4012 bool driver_performs_reset;
4013
4014 if (!hard_reset) {
4015 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
4016 return 0;
4017 }
4018
4019 if (hdev->pldm) {
4020 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
4021 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
4022 } else {
4023 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
4024 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
4025 }
4026
4027 if (fw_reset) {
4028 dev_dbg(hdev->dev,
4029 "Firmware performs HARD reset, going to wait %dms\n",
4030 reset_timeout_ms);
4031
4032 goto skip_reset;
4033 }
4034
4035 driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
4036 !hdev->asic_prop.hard_reset_done_by_fw);
4037
4038 /* Set device to handle FLR by H/W as we will put the device CPU to
4039 * halt mode
4040 */
4041 if (driver_performs_reset)
4042 WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
4043 PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
4044
4045 /* If linux is loaded in the device CPU we need to communicate with it
4046 * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU
4047 * registers in case of old F/Ws
4048 */
4049 if (hdev->fw_loader.fw_comp_loaded & FW_TYPE_LINUX) {
4050 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4051 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4052 le32_to_cpu(dyn_regs->gic_host_halt_irq);
4053
4054 WREG32(irq_handler_offset,
4055 gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id);
4056
4057 /* This is a hail-mary attempt to revive the card in the small chance that the
4058 * f/w has experienced a watchdog event, which caused it to return back to preboot.
4059 * In that case, triggering reset through GIC won't help. We need to trigger the
4060 * reset as if Linux wasn't loaded.
4061 *
4062 * We do it only if the reset cause was HB, because that would be the indication
4063 * of such an event.
4064 *
4065 * In case watchdog hasn't expired but we still got HB, then this won't do any
4066 * damage.
4067 */
4068 if (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT) {
4069 if (hdev->asic_prop.hard_reset_done_by_fw)
4070 hl_fw_ask_hard_reset_without_linux(hdev);
4071 else
4072 hl_fw_ask_halt_machine_without_linux(hdev);
4073 }
4074 } else {
4075 if (hdev->asic_prop.hard_reset_done_by_fw)
4076 hl_fw_ask_hard_reset_without_linux(hdev);
4077 else
4078 hl_fw_ask_halt_machine_without_linux(hdev);
4079 }
4080
4081 if (driver_performs_reset) {
4082
4083 /* Configure the reset registers. Must be done as early as
4084 * possible in case we fail during H/W initialization
4085 */
4086 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
4087 (CFG_RST_H_DMA_MASK |
4088 CFG_RST_H_MME_MASK |
4089 CFG_RST_H_SM_MASK |
4090 CFG_RST_H_TPC_7_MASK));
4091
4092 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
4093
4094 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
4095 (CFG_RST_H_HBM_MASK |
4096 CFG_RST_H_TPC_7_MASK |
4097 CFG_RST_H_NIC_MASK |
4098 CFG_RST_H_SM_MASK |
4099 CFG_RST_H_DMA_MASK |
4100 CFG_RST_H_MME_MASK |
4101 CFG_RST_H_CPU_MASK |
4102 CFG_RST_H_MMU_MASK));
4103
4104 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
4105 (CFG_RST_L_IF_MASK |
4106 CFG_RST_L_PSOC_MASK |
4107 CFG_RST_L_TPC_MASK));
4108
4109 msleep(cpu_timeout_ms);
4110
4111 /* Tell ASIC not to re-initialize PCIe */
4112 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
4113
4114 /* Restart BTL/BLR upon hard-reset */
4115 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
4116
4117 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
4118 1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
4119
4120 dev_dbg(hdev->dev,
4121 "Issued HARD reset command, going to wait %dms\n",
4122 reset_timeout_ms);
4123 } else {
4124 dev_dbg(hdev->dev,
4125 "Firmware performs HARD reset, going to wait %dms\n",
4126 reset_timeout_ms);
4127 }
4128
4129 skip_reset:
4130 /*
4131 * After hard reset, we can't poll the BTM_FSM register because the PSOC
4132 * itself is in reset. Need to wait until the reset is deasserted
4133 */
4134 msleep(reset_timeout_ms);
4135
4136 status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
4137 if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK) {
4138 dev_err(hdev->dev, "Timeout while waiting for device to reset 0x%x\n", status);
4139 return -ETIMEDOUT;
4140 }
4141
4142 if (gaudi) {
4143 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | HW_CAP_HBM |
4144 HW_CAP_PCI_DMA | HW_CAP_MME | HW_CAP_TPC_MASK |
4145 HW_CAP_HBM_DMA | HW_CAP_PLL | HW_CAP_NIC_MASK |
4146 HW_CAP_MMU | HW_CAP_SRAM_SCRAMBLER |
4147 HW_CAP_HBM_SCRAMBLER);
4148
4149 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
4150
4151 hdev->device_cpu_is_halted = false;
4152 }
4153 return 0;
4154 }
4155
gaudi_suspend(struct hl_device * hdev)4156 static int gaudi_suspend(struct hl_device *hdev)
4157 {
4158 int rc;
4159
4160 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
4161 if (rc)
4162 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
4163
4164 return rc;
4165 }
4166
gaudi_resume(struct hl_device * hdev)4167 static int gaudi_resume(struct hl_device *hdev)
4168 {
4169 return gaudi_init_iatu(hdev);
4170 }
4171
gaudi_mmap(struct hl_device * hdev,struct vm_area_struct * vma,void * cpu_addr,dma_addr_t dma_addr,size_t size)4172 static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
4173 void *cpu_addr, dma_addr_t dma_addr, size_t size)
4174 {
4175 int rc;
4176
4177 vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
4178 VM_DONTCOPY | VM_NORESERVE);
4179
4180 rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
4181 (dma_addr - HOST_PHYS_BASE), size);
4182 if (rc)
4183 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
4184
4185 return rc;
4186 }
4187
gaudi_ring_doorbell(struct hl_device * hdev,u32 hw_queue_id,u32 pi)4188 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
4189 {
4190 struct cpu_dyn_regs *dyn_regs =
4191 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4192 u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset;
4193 struct gaudi_device *gaudi = hdev->asic_specific;
4194 bool invalid_queue = false;
4195 int dma_id;
4196
4197 switch (hw_queue_id) {
4198 case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
4199 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
4200 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4201 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4202 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4203 break;
4204
4205 case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
4206 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
4207 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4208 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4209 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4210 break;
4211
4212 case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
4213 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
4214 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4215 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4216 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4217 break;
4218
4219 case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
4220 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
4221 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4222 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4223 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4224 break;
4225
4226 case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
4227 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
4228 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4229 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4230 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4231 break;
4232
4233 case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
4234 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
4235 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4236 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4237 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4238 break;
4239
4240 case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
4241 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
4242 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4243 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4244 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4245 break;
4246
4247 case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
4248 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
4249 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4250 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4251 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4252 break;
4253
4254 case GAUDI_QUEUE_ID_CPU_PQ:
4255 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4256 db_reg_offset = mmCPU_IF_PF_PQ_PI;
4257 else
4258 invalid_queue = true;
4259 break;
4260
4261 case GAUDI_QUEUE_ID_MME_0_0:
4262 db_reg_offset = mmMME2_QM_PQ_PI_0;
4263 break;
4264
4265 case GAUDI_QUEUE_ID_MME_0_1:
4266 db_reg_offset = mmMME2_QM_PQ_PI_1;
4267 break;
4268
4269 case GAUDI_QUEUE_ID_MME_0_2:
4270 db_reg_offset = mmMME2_QM_PQ_PI_2;
4271 break;
4272
4273 case GAUDI_QUEUE_ID_MME_0_3:
4274 db_reg_offset = mmMME2_QM_PQ_PI_3;
4275 break;
4276
4277 case GAUDI_QUEUE_ID_MME_1_0:
4278 db_reg_offset = mmMME0_QM_PQ_PI_0;
4279 break;
4280
4281 case GAUDI_QUEUE_ID_MME_1_1:
4282 db_reg_offset = mmMME0_QM_PQ_PI_1;
4283 break;
4284
4285 case GAUDI_QUEUE_ID_MME_1_2:
4286 db_reg_offset = mmMME0_QM_PQ_PI_2;
4287 break;
4288
4289 case GAUDI_QUEUE_ID_MME_1_3:
4290 db_reg_offset = mmMME0_QM_PQ_PI_3;
4291 break;
4292
4293 case GAUDI_QUEUE_ID_TPC_0_0:
4294 db_reg_offset = mmTPC0_QM_PQ_PI_0;
4295 break;
4296
4297 case GAUDI_QUEUE_ID_TPC_0_1:
4298 db_reg_offset = mmTPC0_QM_PQ_PI_1;
4299 break;
4300
4301 case GAUDI_QUEUE_ID_TPC_0_2:
4302 db_reg_offset = mmTPC0_QM_PQ_PI_2;
4303 break;
4304
4305 case GAUDI_QUEUE_ID_TPC_0_3:
4306 db_reg_offset = mmTPC0_QM_PQ_PI_3;
4307 break;
4308
4309 case GAUDI_QUEUE_ID_TPC_1_0:
4310 db_reg_offset = mmTPC1_QM_PQ_PI_0;
4311 break;
4312
4313 case GAUDI_QUEUE_ID_TPC_1_1:
4314 db_reg_offset = mmTPC1_QM_PQ_PI_1;
4315 break;
4316
4317 case GAUDI_QUEUE_ID_TPC_1_2:
4318 db_reg_offset = mmTPC1_QM_PQ_PI_2;
4319 break;
4320
4321 case GAUDI_QUEUE_ID_TPC_1_3:
4322 db_reg_offset = mmTPC1_QM_PQ_PI_3;
4323 break;
4324
4325 case GAUDI_QUEUE_ID_TPC_2_0:
4326 db_reg_offset = mmTPC2_QM_PQ_PI_0;
4327 break;
4328
4329 case GAUDI_QUEUE_ID_TPC_2_1:
4330 db_reg_offset = mmTPC2_QM_PQ_PI_1;
4331 break;
4332
4333 case GAUDI_QUEUE_ID_TPC_2_2:
4334 db_reg_offset = mmTPC2_QM_PQ_PI_2;
4335 break;
4336
4337 case GAUDI_QUEUE_ID_TPC_2_3:
4338 db_reg_offset = mmTPC2_QM_PQ_PI_3;
4339 break;
4340
4341 case GAUDI_QUEUE_ID_TPC_3_0:
4342 db_reg_offset = mmTPC3_QM_PQ_PI_0;
4343 break;
4344
4345 case GAUDI_QUEUE_ID_TPC_3_1:
4346 db_reg_offset = mmTPC3_QM_PQ_PI_1;
4347 break;
4348
4349 case GAUDI_QUEUE_ID_TPC_3_2:
4350 db_reg_offset = mmTPC3_QM_PQ_PI_2;
4351 break;
4352
4353 case GAUDI_QUEUE_ID_TPC_3_3:
4354 db_reg_offset = mmTPC3_QM_PQ_PI_3;
4355 break;
4356
4357 case GAUDI_QUEUE_ID_TPC_4_0:
4358 db_reg_offset = mmTPC4_QM_PQ_PI_0;
4359 break;
4360
4361 case GAUDI_QUEUE_ID_TPC_4_1:
4362 db_reg_offset = mmTPC4_QM_PQ_PI_1;
4363 break;
4364
4365 case GAUDI_QUEUE_ID_TPC_4_2:
4366 db_reg_offset = mmTPC4_QM_PQ_PI_2;
4367 break;
4368
4369 case GAUDI_QUEUE_ID_TPC_4_3:
4370 db_reg_offset = mmTPC4_QM_PQ_PI_3;
4371 break;
4372
4373 case GAUDI_QUEUE_ID_TPC_5_0:
4374 db_reg_offset = mmTPC5_QM_PQ_PI_0;
4375 break;
4376
4377 case GAUDI_QUEUE_ID_TPC_5_1:
4378 db_reg_offset = mmTPC5_QM_PQ_PI_1;
4379 break;
4380
4381 case GAUDI_QUEUE_ID_TPC_5_2:
4382 db_reg_offset = mmTPC5_QM_PQ_PI_2;
4383 break;
4384
4385 case GAUDI_QUEUE_ID_TPC_5_3:
4386 db_reg_offset = mmTPC5_QM_PQ_PI_3;
4387 break;
4388
4389 case GAUDI_QUEUE_ID_TPC_6_0:
4390 db_reg_offset = mmTPC6_QM_PQ_PI_0;
4391 break;
4392
4393 case GAUDI_QUEUE_ID_TPC_6_1:
4394 db_reg_offset = mmTPC6_QM_PQ_PI_1;
4395 break;
4396
4397 case GAUDI_QUEUE_ID_TPC_6_2:
4398 db_reg_offset = mmTPC6_QM_PQ_PI_2;
4399 break;
4400
4401 case GAUDI_QUEUE_ID_TPC_6_3:
4402 db_reg_offset = mmTPC6_QM_PQ_PI_3;
4403 break;
4404
4405 case GAUDI_QUEUE_ID_TPC_7_0:
4406 db_reg_offset = mmTPC7_QM_PQ_PI_0;
4407 break;
4408
4409 case GAUDI_QUEUE_ID_TPC_7_1:
4410 db_reg_offset = mmTPC7_QM_PQ_PI_1;
4411 break;
4412
4413 case GAUDI_QUEUE_ID_TPC_7_2:
4414 db_reg_offset = mmTPC7_QM_PQ_PI_2;
4415 break;
4416
4417 case GAUDI_QUEUE_ID_TPC_7_3:
4418 db_reg_offset = mmTPC7_QM_PQ_PI_3;
4419 break;
4420
4421 case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3:
4422 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0))
4423 invalid_queue = true;
4424
4425 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4426 db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off;
4427 break;
4428
4429 case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3:
4430 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1))
4431 invalid_queue = true;
4432
4433 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4434 db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off;
4435 break;
4436
4437 case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3:
4438 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2))
4439 invalid_queue = true;
4440
4441 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4442 db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off;
4443 break;
4444
4445 case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3:
4446 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3))
4447 invalid_queue = true;
4448
4449 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4450 db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off;
4451 break;
4452
4453 case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3:
4454 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4))
4455 invalid_queue = true;
4456
4457 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4458 db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off;
4459 break;
4460
4461 case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3:
4462 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5))
4463 invalid_queue = true;
4464
4465 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4466 db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off;
4467 break;
4468
4469 case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3:
4470 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6))
4471 invalid_queue = true;
4472
4473 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4474 db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off;
4475 break;
4476
4477 case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3:
4478 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7))
4479 invalid_queue = true;
4480
4481 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4482 db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off;
4483 break;
4484
4485 case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3:
4486 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8))
4487 invalid_queue = true;
4488
4489 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4490 db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off;
4491 break;
4492
4493 case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3:
4494 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9))
4495 invalid_queue = true;
4496
4497 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4498 db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off;
4499 break;
4500
4501 default:
4502 invalid_queue = true;
4503 }
4504
4505 if (invalid_queue) {
4506 /* Should never get here */
4507 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4508 hw_queue_id);
4509 return;
4510 }
4511
4512 db_value = pi;
4513
4514 /* ring the doorbell */
4515 WREG32(db_reg_offset, db_value);
4516
4517 if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
4518 /* make sure device CPU will read latest data from host */
4519 mb();
4520
4521 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4522 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4523 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4524
4525 WREG32(irq_handler_offset,
4526 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4527 }
4528 }
4529
gaudi_pqe_write(struct hl_device * hdev,__le64 * pqe,struct hl_bd * bd)4530 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4531 struct hl_bd *bd)
4532 {
4533 __le64 *pbd = (__le64 *) bd;
4534
4535 /* The QMANs are on the host memory so a simple copy suffice */
4536 pqe[0] = pbd[0];
4537 pqe[1] = pbd[1];
4538 }
4539
gaudi_dma_alloc_coherent(struct hl_device * hdev,size_t size,dma_addr_t * dma_handle,gfp_t flags)4540 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4541 dma_addr_t *dma_handle, gfp_t flags)
4542 {
4543 void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
4544 dma_handle, flags);
4545
4546 /* Shift to the device's base physical address of host memory */
4547 if (kernel_addr)
4548 *dma_handle += HOST_PHYS_BASE;
4549
4550 return kernel_addr;
4551 }
4552
gaudi_dma_free_coherent(struct hl_device * hdev,size_t size,void * cpu_addr,dma_addr_t dma_handle)4553 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4554 void *cpu_addr, dma_addr_t dma_handle)
4555 {
4556 /* Cancel the device's base physical address of host memory */
4557 dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4558
4559 dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
4560 }
4561
gaudi_scrub_device_dram(struct hl_device * hdev,u64 val)4562 static int gaudi_scrub_device_dram(struct hl_device *hdev, u64 val)
4563 {
4564 struct asic_fixed_properties *prop = &hdev->asic_prop;
4565 u64 cur_addr = prop->dram_user_base_address;
4566 u32 chunk_size, busy;
4567 int rc, dma_id;
4568
4569 while (cur_addr < prop->dram_end_address) {
4570 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4571 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4572
4573 chunk_size =
4574 min((u64)SZ_2G, prop->dram_end_address - cur_addr);
4575
4576 dev_dbg(hdev->dev,
4577 "Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4578 cur_addr, cur_addr + chunk_size);
4579
4580 WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset,
4581 lower_32_bits(val));
4582 WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset,
4583 upper_32_bits(val));
4584 WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
4585 lower_32_bits(cur_addr));
4586 WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
4587 upper_32_bits(cur_addr));
4588 WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
4589 chunk_size);
4590 WREG32(mmDMA0_CORE_COMMIT + dma_offset,
4591 ((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
4592 (1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
4593
4594 cur_addr += chunk_size;
4595
4596 if (cur_addr == prop->dram_end_address)
4597 break;
4598 }
4599
4600 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4601 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4602
4603 rc = hl_poll_timeout(
4604 hdev,
4605 mmDMA0_CORE_STS0 + dma_offset,
4606 busy,
4607 ((busy & DMA0_CORE_STS0_BUSY_MASK) == 0),
4608 1000,
4609 HBM_SCRUBBING_TIMEOUT_US);
4610
4611 if (rc) {
4612 dev_err(hdev->dev,
4613 "DMA Timeout during HBM scrubbing of DMA #%d\n",
4614 dma_id);
4615 return -EIO;
4616 }
4617 }
4618 }
4619
4620 return 0;
4621 }
4622
gaudi_scrub_device_mem(struct hl_device * hdev)4623 static int gaudi_scrub_device_mem(struct hl_device *hdev)
4624 {
4625 struct asic_fixed_properties *prop = &hdev->asic_prop;
4626 u64 wait_to_idle_time = HBM_SCRUBBING_TIMEOUT_US;
4627 u64 addr, size, val = hdev->memory_scrub_val;
4628 ktime_t timeout;
4629 int rc = 0;
4630
4631 if (!hdev->memory_scrub)
4632 return 0;
4633
4634 timeout = ktime_add_us(ktime_get(), wait_to_idle_time);
4635 while (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
4636 if (ktime_compare(ktime_get(), timeout) > 0) {
4637 dev_err(hdev->dev, "waiting for idle timeout\n");
4638 return -ETIMEDOUT;
4639 }
4640 usleep_range((1000 >> 2) + 1, 1000);
4641 }
4642
4643 /* Scrub SRAM */
4644 addr = prop->sram_user_base_address;
4645 size = hdev->pldm ? 0x10000 : prop->sram_size - SRAM_USER_BASE_OFFSET;
4646
4647 dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx val: 0x%llx\n",
4648 addr, addr + size, val);
4649 rc = gaudi_memset_device_memory(hdev, addr, size, val);
4650 if (rc) {
4651 dev_err(hdev->dev, "Failed to clear SRAM (%d)\n", rc);
4652 return rc;
4653 }
4654
4655 /* Scrub HBM using all DMA channels in parallel */
4656 rc = gaudi_scrub_device_dram(hdev, val);
4657 if (rc) {
4658 dev_err(hdev->dev, "Failed to clear HBM (%d)\n", rc);
4659 return rc;
4660 }
4661
4662 return 0;
4663 }
4664
gaudi_get_int_queue_base(struct hl_device * hdev,u32 queue_id,dma_addr_t * dma_handle,u16 * queue_len)4665 static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4666 u32 queue_id, dma_addr_t *dma_handle,
4667 u16 *queue_len)
4668 {
4669 struct gaudi_device *gaudi = hdev->asic_specific;
4670 struct gaudi_internal_qman_info *q;
4671
4672 if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4673 gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4674 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4675 return NULL;
4676 }
4677
4678 q = &gaudi->internal_qmans[queue_id];
4679 *dma_handle = q->pq_dma_addr;
4680 *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4681
4682 return q->pq_kernel_addr;
4683 }
4684
gaudi_send_cpu_message(struct hl_device * hdev,u32 * msg,u16 len,u32 timeout,u64 * result)4685 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
4686 u16 len, u32 timeout, u64 *result)
4687 {
4688 struct gaudi_device *gaudi = hdev->asic_specific;
4689
4690 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4691 if (result)
4692 *result = 0;
4693 return 0;
4694 }
4695
4696 if (!timeout)
4697 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4698
4699 return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4700 timeout, result);
4701 }
4702
gaudi_test_queue(struct hl_device * hdev,u32 hw_queue_id)4703 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4704 {
4705 struct packet_msg_prot *fence_pkt;
4706 dma_addr_t pkt_dma_addr;
4707 u32 fence_val, tmp, timeout_usec;
4708 dma_addr_t fence_dma_addr;
4709 u32 *fence_ptr;
4710 int rc;
4711
4712 if (hdev->pldm)
4713 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4714 else
4715 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4716
4717 fence_val = GAUDI_QMAN0_FENCE_VAL;
4718
4719 fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
4720 if (!fence_ptr) {
4721 dev_err(hdev->dev,
4722 "Failed to allocate memory for H/W queue %d testing\n",
4723 hw_queue_id);
4724 return -ENOMEM;
4725 }
4726
4727 *fence_ptr = 0;
4728
4729 fence_pkt = hl_asic_dma_pool_zalloc(hdev, sizeof(struct packet_msg_prot), GFP_KERNEL,
4730 &pkt_dma_addr);
4731 if (!fence_pkt) {
4732 dev_err(hdev->dev,
4733 "Failed to allocate packet for H/W queue %d testing\n",
4734 hw_queue_id);
4735 rc = -ENOMEM;
4736 goto free_fence_ptr;
4737 }
4738
4739 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4740 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4741 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4742
4743 fence_pkt->ctl = cpu_to_le32(tmp);
4744 fence_pkt->value = cpu_to_le32(fence_val);
4745 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4746
4747 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
4748 sizeof(struct packet_msg_prot),
4749 pkt_dma_addr);
4750 if (rc) {
4751 dev_err(hdev->dev,
4752 "Failed to send fence packet to H/W queue %d\n",
4753 hw_queue_id);
4754 goto free_pkt;
4755 }
4756
4757 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
4758 1000, timeout_usec, true);
4759
4760 hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
4761
4762 if (rc == -ETIMEDOUT) {
4763 dev_err(hdev->dev,
4764 "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
4765 hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
4766 rc = -EIO;
4767 }
4768
4769 free_pkt:
4770 hl_asic_dma_pool_free(hdev, (void *) fence_pkt, pkt_dma_addr);
4771 free_fence_ptr:
4772 hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
4773 return rc;
4774 }
4775
gaudi_test_cpu_queue(struct hl_device * hdev)4776 static int gaudi_test_cpu_queue(struct hl_device *hdev)
4777 {
4778 struct gaudi_device *gaudi = hdev->asic_specific;
4779
4780 /*
4781 * check capability here as send_cpu_message() won't update the result
4782 * value if no capability
4783 */
4784 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
4785 return 0;
4786
4787 return hl_fw_test_cpu_queue(hdev);
4788 }
4789
gaudi_test_queues(struct hl_device * hdev)4790 static int gaudi_test_queues(struct hl_device *hdev)
4791 {
4792 int i, rc, ret_val = 0;
4793
4794 for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
4795 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
4796 rc = gaudi_test_queue(hdev, i);
4797 if (rc)
4798 ret_val = -EINVAL;
4799 }
4800 }
4801
4802 rc = gaudi_test_cpu_queue(hdev);
4803 if (rc)
4804 ret_val = -EINVAL;
4805
4806 return ret_val;
4807 }
4808
gaudi_dma_pool_zalloc(struct hl_device * hdev,size_t size,gfp_t mem_flags,dma_addr_t * dma_handle)4809 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
4810 gfp_t mem_flags, dma_addr_t *dma_handle)
4811 {
4812 void *kernel_addr;
4813
4814 if (size > GAUDI_DMA_POOL_BLK_SIZE)
4815 return NULL;
4816
4817 kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
4818
4819 /* Shift to the device's base physical address of host memory */
4820 if (kernel_addr)
4821 *dma_handle += HOST_PHYS_BASE;
4822
4823 return kernel_addr;
4824 }
4825
gaudi_dma_pool_free(struct hl_device * hdev,void * vaddr,dma_addr_t dma_addr)4826 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
4827 dma_addr_t dma_addr)
4828 {
4829 /* Cancel the device's base physical address of host memory */
4830 dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
4831
4832 dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
4833 }
4834
gaudi_cpu_accessible_dma_pool_alloc(struct hl_device * hdev,size_t size,dma_addr_t * dma_handle)4835 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
4836 size_t size, dma_addr_t *dma_handle)
4837 {
4838 return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
4839 }
4840
gaudi_cpu_accessible_dma_pool_free(struct hl_device * hdev,size_t size,void * vaddr)4841 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
4842 size_t size, void *vaddr)
4843 {
4844 hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
4845 }
4846
gaudi_get_dma_desc_list_size(struct hl_device * hdev,struct sg_table * sgt)4847 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt)
4848 {
4849 struct scatterlist *sg, *sg_next_iter;
4850 u32 count, dma_desc_cnt;
4851 u64 len, len_next;
4852 dma_addr_t addr, addr_next;
4853
4854 dma_desc_cnt = 0;
4855
4856 for_each_sgtable_dma_sg(sgt, sg, count) {
4857 len = sg_dma_len(sg);
4858 addr = sg_dma_address(sg);
4859
4860 if (len == 0)
4861 break;
4862
4863 while ((count + 1) < sgt->nents) {
4864 sg_next_iter = sg_next(sg);
4865 len_next = sg_dma_len(sg_next_iter);
4866 addr_next = sg_dma_address(sg_next_iter);
4867
4868 if (len_next == 0)
4869 break;
4870
4871 if ((addr + len == addr_next) &&
4872 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
4873 len += len_next;
4874 count++;
4875 sg = sg_next_iter;
4876 } else {
4877 break;
4878 }
4879 }
4880
4881 dma_desc_cnt++;
4882 }
4883
4884 return dma_desc_cnt * sizeof(struct packet_lin_dma);
4885 }
4886
gaudi_pin_memory_before_cs(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt,u64 addr,enum dma_data_direction dir)4887 static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
4888 struct hl_cs_parser *parser,
4889 struct packet_lin_dma *user_dma_pkt,
4890 u64 addr, enum dma_data_direction dir)
4891 {
4892 struct hl_userptr *userptr;
4893 int rc;
4894
4895 if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4896 parser->job_userptr_list, &userptr))
4897 goto already_pinned;
4898
4899 userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
4900 if (!userptr)
4901 return -ENOMEM;
4902
4903 rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4904 userptr);
4905 if (rc)
4906 goto free_userptr;
4907
4908 list_add_tail(&userptr->job_node, parser->job_userptr_list);
4909
4910 rc = hl_dma_map_sgtable(hdev, userptr->sgt, dir);
4911 if (rc) {
4912 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
4913 goto unpin_memory;
4914 }
4915
4916 userptr->dma_mapped = true;
4917 userptr->dir = dir;
4918
4919 already_pinned:
4920 parser->patched_cb_size +=
4921 gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
4922
4923 return 0;
4924
4925 unpin_memory:
4926 list_del(&userptr->job_node);
4927 hl_unpin_host_memory(hdev, userptr);
4928 free_userptr:
4929 kfree(userptr);
4930 return rc;
4931 }
4932
gaudi_validate_dma_pkt_host(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt,bool src_in_host)4933 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
4934 struct hl_cs_parser *parser,
4935 struct packet_lin_dma *user_dma_pkt,
4936 bool src_in_host)
4937 {
4938 enum dma_data_direction dir;
4939 bool skip_host_mem_pin = false, user_memset;
4940 u64 addr;
4941 int rc = 0;
4942
4943 user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
4944 GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
4945 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
4946
4947 if (src_in_host) {
4948 if (user_memset)
4949 skip_host_mem_pin = true;
4950
4951 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
4952 dir = DMA_TO_DEVICE;
4953 addr = le64_to_cpu(user_dma_pkt->src_addr);
4954 } else {
4955 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
4956 dir = DMA_FROM_DEVICE;
4957 addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
4958 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
4959 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
4960 }
4961
4962 if (skip_host_mem_pin)
4963 parser->patched_cb_size += sizeof(*user_dma_pkt);
4964 else
4965 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
4966 addr, dir);
4967
4968 return rc;
4969 }
4970
gaudi_validate_dma_pkt_no_mmu(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt)4971 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
4972 struct hl_cs_parser *parser,
4973 struct packet_lin_dma *user_dma_pkt)
4974 {
4975 bool src_in_host = false;
4976 u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
4977 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
4978 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
4979
4980 dev_dbg(hdev->dev, "DMA packet details:\n");
4981 dev_dbg(hdev->dev, "source == 0x%llx\n",
4982 le64_to_cpu(user_dma_pkt->src_addr));
4983 dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
4984 dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
4985
4986 /*
4987 * Special handling for DMA with size 0. Bypass all validations
4988 * because no transactions will be done except for WR_COMP, which
4989 * is not a security issue
4990 */
4991 if (!le32_to_cpu(user_dma_pkt->tsize)) {
4992 parser->patched_cb_size += sizeof(*user_dma_pkt);
4993 return 0;
4994 }
4995
4996 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
4997 src_in_host = true;
4998
4999 return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
5000 src_in_host);
5001 }
5002
gaudi_validate_load_and_exe_pkt(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_load_and_exe * user_pkt)5003 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
5004 struct hl_cs_parser *parser,
5005 struct packet_load_and_exe *user_pkt)
5006 {
5007 u32 cfg;
5008
5009 cfg = le32_to_cpu(user_pkt->cfg);
5010
5011 if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
5012 dev_err(hdev->dev,
5013 "User not allowed to use Load and Execute\n");
5014 return -EPERM;
5015 }
5016
5017 parser->patched_cb_size += sizeof(struct packet_load_and_exe);
5018
5019 return 0;
5020 }
5021
gaudi_validate_cb(struct hl_device * hdev,struct hl_cs_parser * parser,bool is_mmu)5022 static int gaudi_validate_cb(struct hl_device *hdev,
5023 struct hl_cs_parser *parser, bool is_mmu)
5024 {
5025 u32 cb_parsed_length = 0;
5026 int rc = 0;
5027
5028 parser->patched_cb_size = 0;
5029
5030 /* cb_user_size is more than 0 so loop will always be executed */
5031 while (cb_parsed_length < parser->user_cb_size) {
5032 enum packet_id pkt_id;
5033 u16 pkt_size;
5034 struct gaudi_packet *user_pkt;
5035
5036 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5037
5038 pkt_id = (enum packet_id) (
5039 (le64_to_cpu(user_pkt->header) &
5040 PACKET_HEADER_PACKET_ID_MASK) >>
5041 PACKET_HEADER_PACKET_ID_SHIFT);
5042
5043 if (!validate_packet_id(pkt_id)) {
5044 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5045 rc = -EINVAL;
5046 break;
5047 }
5048
5049 pkt_size = gaudi_packet_sizes[pkt_id];
5050 cb_parsed_length += pkt_size;
5051 if (cb_parsed_length > parser->user_cb_size) {
5052 dev_err(hdev->dev,
5053 "packet 0x%x is out of CB boundary\n", pkt_id);
5054 rc = -EINVAL;
5055 break;
5056 }
5057
5058 switch (pkt_id) {
5059 case PACKET_MSG_PROT:
5060 dev_err(hdev->dev,
5061 "User not allowed to use MSG_PROT\n");
5062 rc = -EPERM;
5063 break;
5064
5065 case PACKET_CP_DMA:
5066 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5067 rc = -EPERM;
5068 break;
5069
5070 case PACKET_STOP:
5071 dev_err(hdev->dev, "User not allowed to use STOP\n");
5072 rc = -EPERM;
5073 break;
5074
5075 case PACKET_WREG_BULK:
5076 dev_err(hdev->dev,
5077 "User not allowed to use WREG_BULK\n");
5078 rc = -EPERM;
5079 break;
5080
5081 case PACKET_LOAD_AND_EXE:
5082 rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
5083 (struct packet_load_and_exe *) user_pkt);
5084 break;
5085
5086 case PACKET_LIN_DMA:
5087 parser->contains_dma_pkt = true;
5088 if (is_mmu)
5089 parser->patched_cb_size += pkt_size;
5090 else
5091 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
5092 (struct packet_lin_dma *) user_pkt);
5093 break;
5094
5095 case PACKET_WREG_32:
5096 case PACKET_MSG_LONG:
5097 case PACKET_MSG_SHORT:
5098 case PACKET_REPEAT:
5099 case PACKET_FENCE:
5100 case PACKET_NOP:
5101 case PACKET_ARB_POINT:
5102 parser->patched_cb_size += pkt_size;
5103 break;
5104
5105 default:
5106 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5107 pkt_id);
5108 rc = -EINVAL;
5109 break;
5110 }
5111
5112 if (rc)
5113 break;
5114 }
5115
5116 /*
5117 * The new CB should have space at the end for two MSG_PROT packets:
5118 * 1. Optional NOP padding for cacheline alignment
5119 * 2. A packet that will act as a completion packet
5120 * 3. A packet that will generate MSI interrupt
5121 */
5122 if (parser->completion)
5123 parser->patched_cb_size += gaudi_get_patched_cb_extra_size(
5124 parser->patched_cb_size);
5125
5126 return rc;
5127 }
5128
gaudi_patch_dma_packet(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt,struct packet_lin_dma * new_dma_pkt,u32 * new_dma_pkt_size)5129 static int gaudi_patch_dma_packet(struct hl_device *hdev,
5130 struct hl_cs_parser *parser,
5131 struct packet_lin_dma *user_dma_pkt,
5132 struct packet_lin_dma *new_dma_pkt,
5133 u32 *new_dma_pkt_size)
5134 {
5135 struct hl_userptr *userptr;
5136 struct scatterlist *sg, *sg_next_iter;
5137 u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
5138 u64 len, len_next;
5139 dma_addr_t dma_addr, dma_addr_next;
5140 u64 device_memory_addr, addr;
5141 enum dma_data_direction dir;
5142 struct sg_table *sgt;
5143 bool src_in_host = false;
5144 bool skip_host_mem_pin = false;
5145 bool user_memset;
5146
5147 ctl = le32_to_cpu(user_dma_pkt->ctl);
5148
5149 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5150 src_in_host = true;
5151
5152 user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5153 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5154
5155 if (src_in_host) {
5156 addr = le64_to_cpu(user_dma_pkt->src_addr);
5157 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
5158 dir = DMA_TO_DEVICE;
5159 if (user_memset)
5160 skip_host_mem_pin = true;
5161 } else {
5162 addr = le64_to_cpu(user_dma_pkt->dst_addr);
5163 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
5164 dir = DMA_FROM_DEVICE;
5165 }
5166
5167 if ((!skip_host_mem_pin) &&
5168 (!hl_userptr_is_pinned(hdev, addr,
5169 le32_to_cpu(user_dma_pkt->tsize),
5170 parser->job_userptr_list, &userptr))) {
5171 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
5172 addr, user_dma_pkt->tsize);
5173 return -EFAULT;
5174 }
5175
5176 if ((user_memset) && (dir == DMA_TO_DEVICE)) {
5177 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
5178 *new_dma_pkt_size = sizeof(*user_dma_pkt);
5179 return 0;
5180 }
5181
5182 user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5183
5184 sgt = userptr->sgt;
5185 dma_desc_cnt = 0;
5186
5187 for_each_sgtable_dma_sg(sgt, sg, count) {
5188 len = sg_dma_len(sg);
5189 dma_addr = sg_dma_address(sg);
5190
5191 if (len == 0)
5192 break;
5193
5194 while ((count + 1) < sgt->nents) {
5195 sg_next_iter = sg_next(sg);
5196 len_next = sg_dma_len(sg_next_iter);
5197 dma_addr_next = sg_dma_address(sg_next_iter);
5198
5199 if (len_next == 0)
5200 break;
5201
5202 if ((dma_addr + len == dma_addr_next) &&
5203 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5204 len += len_next;
5205 count++;
5206 sg = sg_next_iter;
5207 } else {
5208 break;
5209 }
5210 }
5211
5212 ctl = le32_to_cpu(user_dma_pkt->ctl);
5213 if (likely(dma_desc_cnt))
5214 ctl &= ~GAUDI_PKT_CTL_EB_MASK;
5215 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5216 new_dma_pkt->ctl = cpu_to_le32(ctl);
5217 new_dma_pkt->tsize = cpu_to_le32(len);
5218
5219 if (dir == DMA_TO_DEVICE) {
5220 new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
5221 new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
5222 } else {
5223 new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
5224 new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
5225 }
5226
5227 if (!user_memset)
5228 device_memory_addr += len;
5229 dma_desc_cnt++;
5230 new_dma_pkt++;
5231 }
5232
5233 if (!dma_desc_cnt) {
5234 dev_err(hdev->dev,
5235 "Error of 0 SG entries when patching DMA packet\n");
5236 return -EFAULT;
5237 }
5238
5239 /* Fix the last dma packet - wrcomp must be as user set it */
5240 new_dma_pkt--;
5241 new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
5242
5243 *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
5244
5245 return 0;
5246 }
5247
gaudi_patch_cb(struct hl_device * hdev,struct hl_cs_parser * parser)5248 static int gaudi_patch_cb(struct hl_device *hdev,
5249 struct hl_cs_parser *parser)
5250 {
5251 u32 cb_parsed_length = 0;
5252 u32 cb_patched_cur_length = 0;
5253 int rc = 0;
5254
5255 /* cb_user_size is more than 0 so loop will always be executed */
5256 while (cb_parsed_length < parser->user_cb_size) {
5257 enum packet_id pkt_id;
5258 u16 pkt_size;
5259 u32 new_pkt_size = 0;
5260 struct gaudi_packet *user_pkt, *kernel_pkt;
5261
5262 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5263 kernel_pkt = parser->patched_cb->kernel_address +
5264 cb_patched_cur_length;
5265
5266 pkt_id = (enum packet_id) (
5267 (le64_to_cpu(user_pkt->header) &
5268 PACKET_HEADER_PACKET_ID_MASK) >>
5269 PACKET_HEADER_PACKET_ID_SHIFT);
5270
5271 if (!validate_packet_id(pkt_id)) {
5272 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5273 rc = -EINVAL;
5274 break;
5275 }
5276
5277 pkt_size = gaudi_packet_sizes[pkt_id];
5278 cb_parsed_length += pkt_size;
5279 if (cb_parsed_length > parser->user_cb_size) {
5280 dev_err(hdev->dev,
5281 "packet 0x%x is out of CB boundary\n", pkt_id);
5282 rc = -EINVAL;
5283 break;
5284 }
5285
5286 switch (pkt_id) {
5287 case PACKET_LIN_DMA:
5288 rc = gaudi_patch_dma_packet(hdev, parser,
5289 (struct packet_lin_dma *) user_pkt,
5290 (struct packet_lin_dma *) kernel_pkt,
5291 &new_pkt_size);
5292 cb_patched_cur_length += new_pkt_size;
5293 break;
5294
5295 case PACKET_MSG_PROT:
5296 dev_err(hdev->dev,
5297 "User not allowed to use MSG_PROT\n");
5298 rc = -EPERM;
5299 break;
5300
5301 case PACKET_CP_DMA:
5302 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5303 rc = -EPERM;
5304 break;
5305
5306 case PACKET_STOP:
5307 dev_err(hdev->dev, "User not allowed to use STOP\n");
5308 rc = -EPERM;
5309 break;
5310
5311 case PACKET_WREG_32:
5312 case PACKET_WREG_BULK:
5313 case PACKET_MSG_LONG:
5314 case PACKET_MSG_SHORT:
5315 case PACKET_REPEAT:
5316 case PACKET_FENCE:
5317 case PACKET_NOP:
5318 case PACKET_ARB_POINT:
5319 case PACKET_LOAD_AND_EXE:
5320 memcpy(kernel_pkt, user_pkt, pkt_size);
5321 cb_patched_cur_length += pkt_size;
5322 break;
5323
5324 default:
5325 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5326 pkt_id);
5327 rc = -EINVAL;
5328 break;
5329 }
5330
5331 if (rc)
5332 break;
5333 }
5334
5335 return rc;
5336 }
5337
gaudi_parse_cb_mmu(struct hl_device * hdev,struct hl_cs_parser * parser)5338 static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5339 struct hl_cs_parser *parser)
5340 {
5341 u64 handle;
5342 u32 patched_cb_size;
5343 struct hl_cb *user_cb;
5344 int rc;
5345
5346 /*
5347 * The new CB should have space at the end for two MSG_PROT packets:
5348 * 1. Optional NOP padding for cacheline alignment
5349 * 2. A packet that will act as a completion packet
5350 * 3. A packet that will generate MSI interrupt
5351 */
5352 if (parser->completion)
5353 parser->patched_cb_size = parser->user_cb_size +
5354 gaudi_get_patched_cb_extra_size(parser->user_cb_size);
5355 else
5356 parser->patched_cb_size = parser->user_cb_size;
5357
5358 rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
5359 parser->patched_cb_size, false, false,
5360 &handle);
5361
5362 if (rc) {
5363 dev_err(hdev->dev,
5364 "Failed to allocate patched CB for DMA CS %d\n",
5365 rc);
5366 return rc;
5367 }
5368
5369 parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
5370 /* hl_cb_get should never fail */
5371 if (!parser->patched_cb) {
5372 dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
5373 rc = -EFAULT;
5374 goto out;
5375 }
5376
5377 /*
5378 * We are protected from overflow because the check
5379 * "parser->user_cb_size <= parser->user_cb->size" was done in get_cb_from_cs_chunk()
5380 * in the common code. That check is done only if is_kernel_allocated_cb is true.
5381 *
5382 * There is no option to reach here without going through that check because:
5383 * 1. validate_queue_index() assigns true to is_kernel_allocated_cb for any submission to
5384 * an external queue.
5385 * 2. For Gaudi, we only parse CBs that were submitted to the external queues.
5386 */
5387 memcpy(parser->patched_cb->kernel_address,
5388 parser->user_cb->kernel_address,
5389 parser->user_cb_size);
5390
5391 patched_cb_size = parser->patched_cb_size;
5392
5393 /* Validate patched CB instead of user CB */
5394 user_cb = parser->user_cb;
5395 parser->user_cb = parser->patched_cb;
5396 rc = gaudi_validate_cb(hdev, parser, true);
5397 parser->user_cb = user_cb;
5398
5399 if (rc) {
5400 hl_cb_put(parser->patched_cb);
5401 goto out;
5402 }
5403
5404 if (patched_cb_size != parser->patched_cb_size) {
5405 dev_err(hdev->dev, "user CB size mismatch\n");
5406 hl_cb_put(parser->patched_cb);
5407 rc = -EINVAL;
5408 goto out;
5409 }
5410
5411 out:
5412 /*
5413 * Always call cb destroy here because we still have 1 reference
5414 * to it by calling cb_get earlier. After the job will be completed,
5415 * cb_put will release it, but here we want to remove it from the
5416 * idr
5417 */
5418 hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
5419
5420 return rc;
5421 }
5422
gaudi_parse_cb_no_mmu(struct hl_device * hdev,struct hl_cs_parser * parser)5423 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5424 struct hl_cs_parser *parser)
5425 {
5426 u64 handle;
5427 int rc;
5428
5429 rc = gaudi_validate_cb(hdev, parser, false);
5430
5431 if (rc)
5432 goto free_userptr;
5433
5434 rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
5435 parser->patched_cb_size, false, false,
5436 &handle);
5437 if (rc) {
5438 dev_err(hdev->dev,
5439 "Failed to allocate patched CB for DMA CS %d\n", rc);
5440 goto free_userptr;
5441 }
5442
5443 parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
5444 /* hl_cb_get should never fail here */
5445 if (!parser->patched_cb) {
5446 dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
5447 rc = -EFAULT;
5448 goto out;
5449 }
5450
5451 rc = gaudi_patch_cb(hdev, parser);
5452
5453 if (rc)
5454 hl_cb_put(parser->patched_cb);
5455
5456 out:
5457 /*
5458 * Always call cb destroy here because we still have 1 reference
5459 * to it by calling cb_get earlier. After the job will be completed,
5460 * cb_put will release it, but here we want to remove it from the
5461 * idr
5462 */
5463 hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
5464
5465 free_userptr:
5466 if (rc)
5467 hl_userptr_delete_list(hdev, parser->job_userptr_list);
5468 return rc;
5469 }
5470
gaudi_parse_cb_no_ext_queue(struct hl_device * hdev,struct hl_cs_parser * parser)5471 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5472 struct hl_cs_parser *parser)
5473 {
5474 struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5475 struct gaudi_device *gaudi = hdev->asic_specific;
5476 u32 nic_queue_offset, nic_mask_q_id;
5477
5478 if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5479 (parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3)) {
5480 nic_queue_offset = parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0;
5481 nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT + (nic_queue_offset >> 2));
5482
5483 if (!(gaudi->hw_cap_initialized & nic_mask_q_id)) {
5484 dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id);
5485 return -EINVAL;
5486 }
5487 }
5488
5489 /* For internal queue jobs just check if CB address is valid */
5490 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5491 parser->user_cb_size,
5492 asic_prop->sram_user_base_address,
5493 asic_prop->sram_end_address))
5494 return 0;
5495
5496 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5497 parser->user_cb_size,
5498 asic_prop->dram_user_base_address,
5499 asic_prop->dram_end_address))
5500 return 0;
5501
5502 /* PMMU and HPMMU addresses are equal, check only one of them */
5503 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5504 parser->user_cb_size,
5505 asic_prop->pmmu.start_addr,
5506 asic_prop->pmmu.end_addr))
5507 return 0;
5508
5509 dev_err(hdev->dev,
5510 "CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5511 parser->user_cb, parser->user_cb_size);
5512
5513 return -EFAULT;
5514 }
5515
gaudi_cs_parser(struct hl_device * hdev,struct hl_cs_parser * parser)5516 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5517 {
5518 struct gaudi_device *gaudi = hdev->asic_specific;
5519
5520 if (parser->queue_type == QUEUE_TYPE_INT)
5521 return gaudi_parse_cb_no_ext_queue(hdev, parser);
5522
5523 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5524 return gaudi_parse_cb_mmu(hdev, parser);
5525 else
5526 return gaudi_parse_cb_no_mmu(hdev, parser);
5527 }
5528
gaudi_add_end_of_cb_packets(struct hl_device * hdev,void * kernel_address,u32 len,u32 original_len,u64 cq_addr,u32 cq_val,u32 msi_vec,bool eb)5529 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_address,
5530 u32 len, u32 original_len, u64 cq_addr, u32 cq_val,
5531 u32 msi_vec, bool eb)
5532 {
5533 struct packet_msg_prot *cq_pkt;
5534 struct packet_nop *cq_padding;
5535 u64 msi_addr;
5536 u32 tmp;
5537
5538 cq_padding = kernel_address + original_len;
5539 cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
5540
5541 while ((void *)cq_padding < (void *)cq_pkt) {
5542 cq_padding->ctl = cpu_to_le32(FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_NOP));
5543 cq_padding++;
5544 }
5545
5546 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5547 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5548
5549 if (eb)
5550 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5551
5552 cq_pkt->ctl = cpu_to_le32(tmp);
5553 cq_pkt->value = cpu_to_le32(cq_val);
5554 cq_pkt->addr = cpu_to_le64(cq_addr);
5555
5556 cq_pkt++;
5557
5558 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5559 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5560 cq_pkt->ctl = cpu_to_le32(tmp);
5561 cq_pkt->value = cpu_to_le32(1);
5562 msi_addr = hdev->pdev ? mmPCIE_CORE_MSI_REQ : mmPCIE_MSI_INTR_0 + msi_vec * 4;
5563 cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr);
5564 }
5565
gaudi_update_eq_ci(struct hl_device * hdev,u32 val)5566 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5567 {
5568 WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5569 }
5570
gaudi_memset_device_memory(struct hl_device * hdev,u64 addr,u32 size,u64 val)5571 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5572 u32 size, u64 val)
5573 {
5574 struct packet_lin_dma *lin_dma_pkt;
5575 struct hl_cs_job *job;
5576 u32 cb_size, ctl, err_cause;
5577 struct hl_cb *cb;
5578 int rc;
5579
5580 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
5581 if (!cb)
5582 return -EFAULT;
5583
5584 lin_dma_pkt = cb->kernel_address;
5585 memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5586 cb_size = sizeof(*lin_dma_pkt);
5587
5588 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5589 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5590 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5591 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5592 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5593
5594 lin_dma_pkt->ctl = cpu_to_le32(ctl);
5595 lin_dma_pkt->src_addr = cpu_to_le64(val);
5596 lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5597 lin_dma_pkt->tsize = cpu_to_le32(size);
5598
5599 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5600 if (!job) {
5601 dev_err(hdev->dev, "Failed to allocate a new job\n");
5602 rc = -ENOMEM;
5603 goto release_cb;
5604 }
5605
5606 /* Verify DMA is OK */
5607 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5608 if (err_cause && !hdev->init_done) {
5609 dev_dbg(hdev->dev,
5610 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5611 err_cause);
5612 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5613 }
5614
5615 job->id = 0;
5616 job->user_cb = cb;
5617 atomic_inc(&job->user_cb->cs_cnt);
5618 job->user_cb_size = cb_size;
5619 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5620 job->patched_cb = job->user_cb;
5621 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5622
5623 hl_debugfs_add_job(hdev, job);
5624
5625 rc = gaudi_send_job_on_qman0(hdev, job);
5626 hl_debugfs_remove_job(hdev, job);
5627 kfree(job);
5628 atomic_dec(&cb->cs_cnt);
5629
5630 /* Verify DMA is OK */
5631 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5632 if (err_cause) {
5633 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5634 rc = -EIO;
5635 if (!hdev->init_done) {
5636 dev_dbg(hdev->dev,
5637 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5638 err_cause);
5639 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5640 }
5641 }
5642
5643 release_cb:
5644 hl_cb_put(cb);
5645 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
5646
5647 return rc;
5648 }
5649
gaudi_memset_registers(struct hl_device * hdev,u64 reg_base,u32 num_regs,u32 val)5650 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
5651 u32 num_regs, u32 val)
5652 {
5653 struct packet_msg_long *pkt;
5654 struct hl_cs_job *job;
5655 u32 cb_size, ctl;
5656 struct hl_cb *cb;
5657 int i, rc;
5658
5659 cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
5660
5661 if (cb_size > SZ_2M) {
5662 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5663 return -ENOMEM;
5664 }
5665
5666 cb = hl_cb_kernel_create(hdev, cb_size, false);
5667 if (!cb)
5668 return -EFAULT;
5669
5670 pkt = cb->kernel_address;
5671
5672 ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5673 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5674 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5675 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5676 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5677
5678 for (i = 0; i < num_regs ; i++, pkt++) {
5679 pkt->ctl = cpu_to_le32(ctl);
5680 pkt->value = cpu_to_le32(val);
5681 pkt->addr = cpu_to_le64(reg_base + (i * 4));
5682 }
5683
5684 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5685 if (!job) {
5686 dev_err(hdev->dev, "Failed to allocate a new job\n");
5687 rc = -ENOMEM;
5688 goto release_cb;
5689 }
5690
5691 job->id = 0;
5692 job->user_cb = cb;
5693 atomic_inc(&job->user_cb->cs_cnt);
5694 job->user_cb_size = cb_size;
5695 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5696 job->patched_cb = job->user_cb;
5697 job->job_cb_size = cb_size;
5698
5699 hl_debugfs_add_job(hdev, job);
5700
5701 rc = gaudi_send_job_on_qman0(hdev, job);
5702 hl_debugfs_remove_job(hdev, job);
5703 kfree(job);
5704 atomic_dec(&cb->cs_cnt);
5705
5706 release_cb:
5707 hl_cb_put(cb);
5708 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
5709
5710 return rc;
5711 }
5712
gaudi_restore_sm_registers(struct hl_device * hdev)5713 static int gaudi_restore_sm_registers(struct hl_device *hdev)
5714 {
5715 u64 base_addr;
5716 u32 num_regs;
5717 int rc;
5718
5719 base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5720 num_regs = NUM_OF_SOB_IN_BLOCK;
5721 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5722 if (rc) {
5723 dev_err(hdev->dev, "failed resetting SM registers");
5724 return -ENOMEM;
5725 }
5726
5727 base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
5728 num_regs = NUM_OF_SOB_IN_BLOCK;
5729 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5730 if (rc) {
5731 dev_err(hdev->dev, "failed resetting SM registers");
5732 return -ENOMEM;
5733 }
5734
5735 base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5736 num_regs = NUM_OF_SOB_IN_BLOCK;
5737 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5738 if (rc) {
5739 dev_err(hdev->dev, "failed resetting SM registers");
5740 return -ENOMEM;
5741 }
5742
5743 base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5744 num_regs = NUM_OF_MONITORS_IN_BLOCK;
5745 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5746 if (rc) {
5747 dev_err(hdev->dev, "failed resetting SM registers");
5748 return -ENOMEM;
5749 }
5750
5751 base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
5752 num_regs = NUM_OF_MONITORS_IN_BLOCK;
5753 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5754 if (rc) {
5755 dev_err(hdev->dev, "failed resetting SM registers");
5756 return -ENOMEM;
5757 }
5758
5759 base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5760 num_regs = NUM_OF_MONITORS_IN_BLOCK;
5761 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5762 if (rc) {
5763 dev_err(hdev->dev, "failed resetting SM registers");
5764 return -ENOMEM;
5765 }
5766
5767 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5768 (GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
5769 num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
5770 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5771 if (rc) {
5772 dev_err(hdev->dev, "failed resetting SM registers");
5773 return -ENOMEM;
5774 }
5775
5776 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
5777 (GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
5778 num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
5779 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5780 if (rc) {
5781 dev_err(hdev->dev, "failed resetting SM registers");
5782 return -ENOMEM;
5783 }
5784
5785 return 0;
5786 }
5787
gaudi_restore_dma_registers(struct hl_device * hdev)5788 static void gaudi_restore_dma_registers(struct hl_device *hdev)
5789 {
5790 u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
5791 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5792 int i;
5793
5794 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5795 u64 sob_addr = CFG_BASE +
5796 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5797 (i * sob_delta);
5798 u32 dma_offset = i * DMA_CORE_OFFSET;
5799
5800 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
5801 lower_32_bits(sob_addr));
5802 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
5803 upper_32_bits(sob_addr));
5804 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
5805
5806 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
5807 * modified by the user for SRAM reduction
5808 */
5809 if (i > 1)
5810 WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
5811 0x00000001);
5812 }
5813 }
5814
gaudi_restore_qm_registers(struct hl_device * hdev)5815 static void gaudi_restore_qm_registers(struct hl_device *hdev)
5816 {
5817 u32 qman_offset;
5818 int i;
5819
5820 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5821 qman_offset = i * DMA_QMAN_OFFSET;
5822 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
5823 }
5824
5825 for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
5826 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
5827 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
5828 }
5829
5830 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
5831 qman_offset = i * TPC_QMAN_OFFSET;
5832 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
5833 }
5834
5835 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
5836 qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
5837 (i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
5838 WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
5839 }
5840 }
5841
gaudi_restore_user_registers(struct hl_device * hdev)5842 static int gaudi_restore_user_registers(struct hl_device *hdev)
5843 {
5844 int rc;
5845
5846 rc = gaudi_restore_sm_registers(hdev);
5847 if (rc)
5848 return rc;
5849
5850 gaudi_restore_dma_registers(hdev);
5851 gaudi_restore_qm_registers(hdev);
5852
5853 return 0;
5854 }
5855
gaudi_context_switch(struct hl_device * hdev,u32 asid)5856 static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
5857 {
5858 return 0;
5859 }
5860
gaudi_mmu_clear_pgt_range(struct hl_device * hdev)5861 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
5862 {
5863 u32 size = hdev->asic_prop.mmu_pgt_size +
5864 hdev->asic_prop.mmu_cache_mng_size;
5865 struct gaudi_device *gaudi = hdev->asic_specific;
5866 u64 addr = hdev->asic_prop.mmu_pgt_addr;
5867
5868 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
5869 return 0;
5870
5871 return gaudi_memset_device_memory(hdev, addr, size, 0);
5872 }
5873
gaudi_restore_phase_topology(struct hl_device * hdev)5874 static void gaudi_restore_phase_topology(struct hl_device *hdev)
5875 {
5876
5877 }
5878
gaudi_dma_core_transfer(struct hl_device * hdev,int dma_id,u64 addr,u32 size_to_dma,dma_addr_t dma_addr)5879 static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,
5880 u32 size_to_dma, dma_addr_t dma_addr)
5881 {
5882 u32 err_cause, val;
5883 u64 dma_offset;
5884 int rc;
5885
5886 dma_offset = dma_id * DMA_CORE_OFFSET;
5887
5888 WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr));
5889 WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr));
5890 WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr));
5891 WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr));
5892 WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma);
5893 WREG32(mmDMA0_CORE_COMMIT + dma_offset,
5894 (1 << DMA0_CORE_COMMIT_LIN_SHIFT));
5895
5896 rc = hl_poll_timeout(
5897 hdev,
5898 mmDMA0_CORE_STS0 + dma_offset,
5899 val,
5900 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
5901 0,
5902 1000000);
5903
5904 if (rc) {
5905 dev_err(hdev->dev,
5906 "DMA %d timed-out during reading of 0x%llx\n",
5907 dma_id, addr);
5908 return -EIO;
5909 }
5910
5911 /* Verify DMA is OK */
5912 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
5913 if (err_cause) {
5914 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5915 dev_dbg(hdev->dev,
5916 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5917 err_cause);
5918 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
5919
5920 return -EIO;
5921 }
5922
5923 return 0;
5924 }
5925
gaudi_debugfs_read_dma(struct hl_device * hdev,u64 addr,u32 size,void * blob_addr)5926 static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
5927 void *blob_addr)
5928 {
5929 u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
5930 u32 qm_glbl_sts0, qm_cgm_sts;
5931 u64 dma_offset, qm_offset;
5932 dma_addr_t dma_addr;
5933 void *kernel_addr;
5934 bool is_eng_idle;
5935 int rc = 0, dma_id;
5936
5937 kernel_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &dma_addr, GFP_KERNEL | __GFP_ZERO);
5938
5939 if (!kernel_addr)
5940 return -ENOMEM;
5941
5942 hdev->asic_funcs->hw_queues_lock(hdev);
5943
5944 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
5945 dma_offset = dma_id * DMA_CORE_OFFSET;
5946 qm_offset = dma_id * DMA_QMAN_OFFSET;
5947 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
5948 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
5949 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
5950 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
5951 IS_DMA_IDLE(dma_core_sts0);
5952
5953 if (!is_eng_idle) {
5954 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
5955 dma_offset = dma_id * DMA_CORE_OFFSET;
5956 qm_offset = dma_id * DMA_QMAN_OFFSET;
5957 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
5958 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
5959 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
5960 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
5961 IS_DMA_IDLE(dma_core_sts0);
5962
5963 if (!is_eng_idle) {
5964 dev_err_ratelimited(hdev->dev,
5965 "Can't read via DMA because it is BUSY\n");
5966 rc = -EAGAIN;
5967 goto out;
5968 }
5969 }
5970
5971 cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset);
5972 WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset,
5973 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
5974
5975 /* TODO: remove this by mapping the DMA temporary buffer to the MMU
5976 * using the compute ctx ASID, if exists. If not, use the kernel ctx
5977 * ASID
5978 */
5979 WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
5980
5981 /* Verify DMA is OK */
5982 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
5983 if (err_cause) {
5984 dev_dbg(hdev->dev,
5985 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5986 err_cause);
5987 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
5988 }
5989
5990 pos = 0;
5991 size_left = size;
5992 size_to_dma = SZ_2M;
5993
5994 while (size_left > 0) {
5995
5996 if (size_left < SZ_2M)
5997 size_to_dma = size_left;
5998
5999 rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma,
6000 dma_addr);
6001 if (rc)
6002 break;
6003
6004 memcpy(blob_addr + pos, kernel_addr, size_to_dma);
6005
6006 if (size_left <= SZ_2M)
6007 break;
6008
6009 pos += SZ_2M;
6010 addr += SZ_2M;
6011 size_left -= SZ_2M;
6012 }
6013
6014 /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6015 * using the compute ctx ASID, if exists. If not, use the kernel ctx
6016 * ASID
6017 */
6018 WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6019 ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6020
6021 WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1);
6022
6023 out:
6024 hdev->asic_funcs->hw_queues_unlock(hdev);
6025
6026 hl_asic_dma_free_coherent(hdev, SZ_2M, kernel_addr, dma_addr);
6027
6028 return rc;
6029 }
6030
gaudi_read_pte(struct hl_device * hdev,u64 addr)6031 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
6032 {
6033 struct gaudi_device *gaudi = hdev->asic_specific;
6034
6035 if (hdev->reset_info.hard_reset_pending)
6036 return U64_MAX;
6037
6038 return readq(hdev->pcie_bar[HBM_BAR_ID] +
6039 (addr - gaudi->hbm_bar_cur_addr));
6040 }
6041
gaudi_write_pte(struct hl_device * hdev,u64 addr,u64 val)6042 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
6043 {
6044 struct gaudi_device *gaudi = hdev->asic_specific;
6045
6046 if (hdev->reset_info.hard_reset_pending)
6047 return;
6048
6049 writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6050 (addr - gaudi->hbm_bar_cur_addr));
6051 }
6052
gaudi_mmu_prepare_reg(struct hl_device * hdev,u64 reg,u32 asid)6053 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
6054 {
6055 /* mask to zero the MMBP and ASID bits */
6056 WREG32_AND(reg, ~0x7FF);
6057 WREG32_OR(reg, asid);
6058 }
6059
gaudi_mmu_prepare(struct hl_device * hdev,u32 asid)6060 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
6061 {
6062 struct gaudi_device *gaudi = hdev->asic_specific;
6063
6064 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6065 return;
6066
6067 if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
6068 dev_crit(hdev->dev, "asid %u is too big\n", asid);
6069 return;
6070 }
6071
6072 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6073 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6074 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6075 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6076 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6077
6078 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6079 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6080 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6081 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6082 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6083
6084 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6085 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6086 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6087 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6088 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6089
6090 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6091 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6092 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6093 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6094 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6095
6096 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6097 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6098 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6099 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6100 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6101
6102 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6103 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6104 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6105 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6106 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6107
6108 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6109 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6110 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6111 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6112 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6113
6114 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6115 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6116 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6117 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6118 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6119
6120 gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
6121 gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
6122 gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
6123 gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
6124 gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
6125 gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
6126 gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
6127 gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
6128
6129 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6130 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6131 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6132 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6133 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6134 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
6135 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
6136
6137 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6138 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6139 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6140 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6141 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6142 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
6143 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
6144
6145 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6146 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6147 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6148 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6149 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6150 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
6151 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
6152
6153 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6154 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6155 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6156 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6157 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6158 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
6159 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
6160
6161 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6162 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6163 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6164 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6165 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6166 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
6167 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
6168
6169 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6170 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6171 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6172 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6173 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6174 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
6175 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
6176
6177 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6178 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6179 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6180 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6181 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6182 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
6183 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
6184
6185 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6186 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6187 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6188 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6189 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6190 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
6191 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
6192
6193 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6194 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6195 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6196 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6197 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6198 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6199 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6200 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6201 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6202 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6203
6204 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
6205 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
6206 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
6207 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
6208 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
6209 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
6210 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
6211 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
6212 gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
6213 gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
6214 gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
6215 gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
6216
6217 if (gaudi->hw_cap_initialized & HW_CAP_NIC0) {
6218 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
6219 asid);
6220 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
6221 asid);
6222 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
6223 asid);
6224 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
6225 asid);
6226 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
6227 asid);
6228 }
6229
6230 if (gaudi->hw_cap_initialized & HW_CAP_NIC1) {
6231 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
6232 asid);
6233 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
6234 asid);
6235 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
6236 asid);
6237 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
6238 asid);
6239 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
6240 asid);
6241 }
6242
6243 if (gaudi->hw_cap_initialized & HW_CAP_NIC2) {
6244 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
6245 asid);
6246 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
6247 asid);
6248 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
6249 asid);
6250 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
6251 asid);
6252 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
6253 asid);
6254 }
6255
6256 if (gaudi->hw_cap_initialized & HW_CAP_NIC3) {
6257 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
6258 asid);
6259 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
6260 asid);
6261 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
6262 asid);
6263 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
6264 asid);
6265 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
6266 asid);
6267 }
6268
6269 if (gaudi->hw_cap_initialized & HW_CAP_NIC4) {
6270 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
6271 asid);
6272 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
6273 asid);
6274 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
6275 asid);
6276 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
6277 asid);
6278 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
6279 asid);
6280 }
6281
6282 if (gaudi->hw_cap_initialized & HW_CAP_NIC5) {
6283 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
6284 asid);
6285 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
6286 asid);
6287 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
6288 asid);
6289 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
6290 asid);
6291 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
6292 asid);
6293 }
6294
6295 if (gaudi->hw_cap_initialized & HW_CAP_NIC6) {
6296 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
6297 asid);
6298 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
6299 asid);
6300 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
6301 asid);
6302 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
6303 asid);
6304 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
6305 asid);
6306 }
6307
6308 if (gaudi->hw_cap_initialized & HW_CAP_NIC7) {
6309 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
6310 asid);
6311 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
6312 asid);
6313 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
6314 asid);
6315 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
6316 asid);
6317 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
6318 asid);
6319 }
6320
6321 if (gaudi->hw_cap_initialized & HW_CAP_NIC8) {
6322 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
6323 asid);
6324 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
6325 asid);
6326 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
6327 asid);
6328 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
6329 asid);
6330 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
6331 asid);
6332 }
6333
6334 if (gaudi->hw_cap_initialized & HW_CAP_NIC9) {
6335 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
6336 asid);
6337 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
6338 asid);
6339 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
6340 asid);
6341 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
6342 asid);
6343 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
6344 asid);
6345 }
6346
6347 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
6348 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
6349 }
6350
gaudi_send_job_on_qman0(struct hl_device * hdev,struct hl_cs_job * job)6351 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6352 struct hl_cs_job *job)
6353 {
6354 struct packet_msg_prot *fence_pkt;
6355 u32 *fence_ptr;
6356 dma_addr_t fence_dma_addr;
6357 struct hl_cb *cb;
6358 u32 tmp, timeout, dma_offset;
6359 int rc;
6360
6361 if (hdev->pldm)
6362 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6363 else
6364 timeout = HL_DEVICE_TIMEOUT_USEC;
6365
6366 fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
6367 if (!fence_ptr) {
6368 dev_err(hdev->dev,
6369 "Failed to allocate fence memory for QMAN0\n");
6370 return -ENOMEM;
6371 }
6372
6373 cb = job->patched_cb;
6374
6375 fence_pkt = cb->kernel_address +
6376 job->job_cb_size - sizeof(struct packet_msg_prot);
6377
6378 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6379 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6380 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6381
6382 fence_pkt->ctl = cpu_to_le32(tmp);
6383 fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6384 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6385
6386 dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6387
6388 WREG32(mmDMA0_CORE_PROT + dma_offset,
6389 BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT) | BIT(DMA0_CORE_PROT_VAL_SHIFT));
6390
6391 rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
6392 job->job_cb_size, cb->bus_address);
6393 if (rc) {
6394 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6395 goto free_fence_ptr;
6396 }
6397
6398 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6399 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6400 timeout, true);
6401
6402 hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
6403
6404 if (rc == -ETIMEDOUT) {
6405 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6406 goto free_fence_ptr;
6407 }
6408
6409 free_fence_ptr:
6410 WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT));
6411
6412 hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
6413 return rc;
6414 }
6415
gaudi_get_event_desc(u16 event_type,char * desc,size_t size)6416 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6417 {
6418 if (event_type >= GAUDI_EVENT_SIZE)
6419 goto event_not_supported;
6420
6421 if (!gaudi_irq_map_table[event_type].valid)
6422 goto event_not_supported;
6423
6424 snprintf(desc, size, gaudi_irq_map_table[event_type].name);
6425
6426 return;
6427
6428 event_not_supported:
6429 snprintf(desc, size, "N/A");
6430 }
6431
gaudi_get_razwi_initiator_dma_name(struct hl_device * hdev,u32 x_y,bool is_write,u16 * engine_id_1,u16 * engine_id_2)6432 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev, u32 x_y,
6433 bool is_write, u16 *engine_id_1,
6434 u16 *engine_id_2)
6435 {
6436 u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6437
6438 mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6439 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6440
6441 switch (x_y) {
6442 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6443 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6444 dma_id[0] = 0;
6445 dma_id[1] = 2;
6446 break;
6447 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6448 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6449 dma_id[0] = 1;
6450 dma_id[1] = 3;
6451 break;
6452 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6453 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6454 dma_id[0] = 4;
6455 dma_id[1] = 6;
6456 break;
6457 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6458 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6459 dma_id[0] = 5;
6460 dma_id[1] = 7;
6461 break;
6462 default:
6463 goto unknown_initiator;
6464 }
6465
6466 for (i = 0 ; i < 2 ; i++) {
6467 dma_offset = dma_id[i] * DMA_CORE_OFFSET;
6468 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6469 }
6470
6471 switch (x_y) {
6472 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6473 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6474 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6475 *engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6476 return "DMA0";
6477 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6478 *engine_id_1 = GAUDI_ENGINE_ID_DMA_2;
6479 return "DMA2";
6480 } else {
6481 *engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6482 *engine_id_2 = GAUDI_ENGINE_ID_DMA_2;
6483 return "DMA0 or DMA2";
6484 }
6485 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6486 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6487 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6488 *engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6489 return "DMA1";
6490 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6491 *engine_id_1 = GAUDI_ENGINE_ID_DMA_3;
6492 return "DMA3";
6493 } else {
6494 *engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6495 *engine_id_2 = GAUDI_ENGINE_ID_DMA_3;
6496 return "DMA1 or DMA3";
6497 }
6498 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6499 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6500 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6501 *engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6502 return "DMA4";
6503 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6504 *engine_id_1 = GAUDI_ENGINE_ID_DMA_6;
6505 return "DMA6";
6506 } else {
6507 *engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6508 *engine_id_2 = GAUDI_ENGINE_ID_DMA_6;
6509 return "DMA4 or DMA6";
6510 }
6511 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6512 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6513 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6514 *engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6515 return "DMA5";
6516 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6517 *engine_id_1 = GAUDI_ENGINE_ID_DMA_7;
6518 return "DMA7";
6519 } else {
6520 *engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6521 *engine_id_2 = GAUDI_ENGINE_ID_DMA_7;
6522 return "DMA5 or DMA7";
6523 }
6524 }
6525
6526 unknown_initiator:
6527 return "unknown initiator";
6528 }
6529
gaudi_get_razwi_initiator_name(struct hl_device * hdev,bool is_write,u16 * engine_id_1,u16 * engine_id_2)6530 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev, bool is_write,
6531 u16 *engine_id_1, u16 *engine_id_2)
6532 {
6533 u32 val, x_y, axi_id;
6534
6535 val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
6536 RREG32(mmMMU_UP_RAZWI_READ_ID);
6537 x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
6538 (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
6539 axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
6540 RAZWI_INITIATOR_AXI_ID_SHIFT);
6541
6542 switch (x_y) {
6543 case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
6544 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6545 *engine_id_1 = GAUDI_ENGINE_ID_TPC_0;
6546 return "TPC0";
6547 }
6548 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6549 *engine_id_1 = GAUDI_ENGINE_ID_NIC_0;
6550 return "NIC0";
6551 }
6552 break;
6553 case RAZWI_INITIATOR_ID_X_Y_TPC1:
6554 *engine_id_1 = GAUDI_ENGINE_ID_TPC_1;
6555 return "TPC1";
6556 case RAZWI_INITIATOR_ID_X_Y_MME0_0:
6557 case RAZWI_INITIATOR_ID_X_Y_MME0_1:
6558 *engine_id_1 = GAUDI_ENGINE_ID_MME_0;
6559 return "MME0";
6560 case RAZWI_INITIATOR_ID_X_Y_MME1_0:
6561 case RAZWI_INITIATOR_ID_X_Y_MME1_1:
6562 *engine_id_1 = GAUDI_ENGINE_ID_MME_1;
6563 return "MME1";
6564 case RAZWI_INITIATOR_ID_X_Y_TPC2:
6565 *engine_id_1 = GAUDI_ENGINE_ID_TPC_2;
6566 return "TPC2";
6567 case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
6568 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6569 *engine_id_1 = GAUDI_ENGINE_ID_TPC_3;
6570 return "TPC3";
6571 }
6572 /* PCI, CPU or PSOC does not have engine id*/
6573 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
6574 return "PCI";
6575 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
6576 return "CPU";
6577 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
6578 return "PSOC";
6579 break;
6580 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6581 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6582 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6583 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6584 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6585 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6586 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6587 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6588 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write,
6589 engine_id_1, engine_id_2);
6590 case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
6591 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6592 *engine_id_1 = GAUDI_ENGINE_ID_TPC_4;
6593 return "TPC4";
6594 }
6595 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6596 *engine_id_1 = GAUDI_ENGINE_ID_NIC_1;
6597 return "NIC1";
6598 }
6599 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
6600 *engine_id_1 = GAUDI_ENGINE_ID_NIC_2;
6601 return "NIC2";
6602 }
6603 break;
6604 case RAZWI_INITIATOR_ID_X_Y_TPC5:
6605 *engine_id_1 = GAUDI_ENGINE_ID_TPC_5;
6606 return "TPC5";
6607 case RAZWI_INITIATOR_ID_X_Y_MME2_0:
6608 case RAZWI_INITIATOR_ID_X_Y_MME2_1:
6609 *engine_id_1 = GAUDI_ENGINE_ID_MME_2;
6610 return "MME2";
6611 case RAZWI_INITIATOR_ID_X_Y_MME3_0:
6612 case RAZWI_INITIATOR_ID_X_Y_MME3_1:
6613 *engine_id_1 = GAUDI_ENGINE_ID_MME_3;
6614 return "MME3";
6615 case RAZWI_INITIATOR_ID_X_Y_TPC6:
6616 *engine_id_1 = GAUDI_ENGINE_ID_TPC_6;
6617 return "TPC6";
6618 case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
6619 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6620 *engine_id_1 = GAUDI_ENGINE_ID_TPC_7;
6621 return "TPC7";
6622 }
6623 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6624 *engine_id_1 = GAUDI_ENGINE_ID_NIC_4;
6625 return "NIC4";
6626 }
6627 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
6628 *engine_id_1 = GAUDI_ENGINE_ID_NIC_5;
6629 return "NIC5";
6630 }
6631 break;
6632 default:
6633 break;
6634 }
6635
6636 dev_err(hdev->dev,
6637 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
6638 val,
6639 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
6640 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
6641 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
6642 RAZWI_INITIATOR_AXI_ID_MASK);
6643
6644 return "unknown initiator";
6645 }
6646
gaudi_print_and_get_razwi_info(struct hl_device * hdev,u16 * engine_id_1,u16 * engine_id_2,bool * is_read,bool * is_write)6647 static void gaudi_print_and_get_razwi_info(struct hl_device *hdev, u16 *engine_id_1,
6648 u16 *engine_id_2, bool *is_read, bool *is_write)
6649 {
6650
6651 if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
6652 dev_err_ratelimited(hdev->dev,
6653 "RAZWI event caused by illegal write of %s\n",
6654 gaudi_get_razwi_initiator_name(hdev, true, engine_id_1, engine_id_2));
6655 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
6656 *is_write = true;
6657 }
6658
6659 if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
6660 dev_err_ratelimited(hdev->dev,
6661 "RAZWI event caused by illegal read of %s\n",
6662 gaudi_get_razwi_initiator_name(hdev, false, engine_id_1, engine_id_2));
6663 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
6664 *is_read = true;
6665 }
6666 }
6667
gaudi_print_and_get_mmu_error_info(struct hl_device * hdev,u64 * addr,u64 * event_mask)6668 static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr, u64 *event_mask)
6669 {
6670 struct gaudi_device *gaudi = hdev->asic_specific;
6671 u32 val;
6672
6673 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6674 return;
6675
6676 val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
6677 if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6678 *addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
6679 *addr <<= 32;
6680 *addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
6681
6682 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", *addr);
6683 hl_handle_page_fault(hdev, *addr, 0, true, event_mask);
6684
6685 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
6686 }
6687
6688 val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
6689 if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6690 *addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
6691 *addr <<= 32;
6692 *addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
6693
6694 dev_err_ratelimited(hdev->dev, "MMU access error on va 0x%llx\n", *addr);
6695
6696 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
6697 }
6698 }
6699
6700 /*
6701 * +-------------------+------------------------------------------------------+
6702 * | Configuration Reg | Description |
6703 * | Address | |
6704 * +-------------------+------------------------------------------------------+
6705 * | 0xF30 - 0xF3F |ECC single error indication (1 bit per memory wrapper)|
6706 * | |0xF30 memory wrappers 31:0 (MSB to LSB) |
6707 * | |0xF34 memory wrappers 63:32 |
6708 * | |0xF38 memory wrappers 95:64 |
6709 * | |0xF3C memory wrappers 127:96 |
6710 * +-------------------+------------------------------------------------------+
6711 * | 0xF40 - 0xF4F |ECC double error indication (1 bit per memory wrapper)|
6712 * | |0xF40 memory wrappers 31:0 (MSB to LSB) |
6713 * | |0xF44 memory wrappers 63:32 |
6714 * | |0xF48 memory wrappers 95:64 |
6715 * | |0xF4C memory wrappers 127:96 |
6716 * +-------------------+------------------------------------------------------+
6717 */
gaudi_extract_ecc_info(struct hl_device * hdev,struct ecc_info_extract_params * params,u64 * ecc_address,u64 * ecc_syndrom,u8 * memory_wrapper_idx)6718 static int gaudi_extract_ecc_info(struct hl_device *hdev,
6719 struct ecc_info_extract_params *params, u64 *ecc_address,
6720 u64 *ecc_syndrom, u8 *memory_wrapper_idx)
6721 {
6722 u32 i, num_mem_regs, reg, err_bit;
6723 u64 err_addr, err_word = 0;
6724
6725 num_mem_regs = params->num_memories / 32 +
6726 ((params->num_memories % 32) ? 1 : 0);
6727
6728 if (params->block_address >= CFG_BASE)
6729 params->block_address -= CFG_BASE;
6730
6731 if (params->derr)
6732 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
6733 else
6734 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
6735
6736 /* Set invalid wrapper index */
6737 *memory_wrapper_idx = 0xFF;
6738
6739 /* Iterate through memory wrappers, a single bit must be set */
6740 for (i = 0 ; i < num_mem_regs ; i++) {
6741 err_addr += i * 4;
6742 err_word = RREG32(err_addr);
6743 if (err_word) {
6744 err_bit = __ffs(err_word);
6745 *memory_wrapper_idx = err_bit + (32 * i);
6746 break;
6747 }
6748 }
6749
6750 if (*memory_wrapper_idx == 0xFF) {
6751 dev_err(hdev->dev, "ECC error information cannot be found\n");
6752 return -EINVAL;
6753 }
6754
6755 WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
6756 *memory_wrapper_idx);
6757
6758 *ecc_address =
6759 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
6760 *ecc_syndrom =
6761 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
6762
6763 /* Clear error indication */
6764 reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
6765 if (params->derr)
6766 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
6767 else
6768 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
6769
6770 WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
6771
6772 return 0;
6773 }
6774
6775 /*
6776 * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
6777 *
6778 * @idx: the current pi/ci value
6779 * @q_len: the queue length (power of 2)
6780 *
6781 * @return the cyclically decremented index
6782 */
gaudi_queue_idx_dec(u32 idx,u32 q_len)6783 static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len)
6784 {
6785 u32 mask = q_len - 1;
6786
6787 /*
6788 * modular decrement is equivalent to adding (queue_size -1)
6789 * later we take LSBs to make sure the value is in the
6790 * range [0, queue_len - 1]
6791 */
6792 return (idx + q_len - 1) & mask;
6793 }
6794
6795 /**
6796 * gaudi_handle_sw_config_stream_data - print SW config stream data
6797 *
6798 * @hdev: pointer to the habanalabs device structure
6799 * @stream: the QMAN's stream
6800 * @qman_base: base address of QMAN registers block
6801 * @event_mask: mask of the last events occurred
6802 */
gaudi_handle_sw_config_stream_data(struct hl_device * hdev,u32 stream,u64 qman_base,u64 event_mask)6803 static void gaudi_handle_sw_config_stream_data(struct hl_device *hdev, u32 stream,
6804 u64 qman_base, u64 event_mask)
6805 {
6806 u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
6807 u32 cq_ptr_lo_off, size;
6808
6809 cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0;
6810
6811 cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) +
6812 stream * cq_ptr_lo_off;
6813 cq_ptr_hi = cq_ptr_lo +
6814 (mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0);
6815 cq_tsize = cq_ptr_lo +
6816 (mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0);
6817
6818 cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
6819 size = RREG32(cq_tsize);
6820 dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %u\n",
6821 stream, cq_ptr, size);
6822
6823 if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
6824 hdev->captured_err_info.undef_opcode.cq_addr = cq_ptr;
6825 hdev->captured_err_info.undef_opcode.cq_size = size;
6826 hdev->captured_err_info.undef_opcode.stream_id = stream;
6827 }
6828 }
6829
6830 /**
6831 * gaudi_handle_last_pqes_on_err - print last PQEs on error
6832 *
6833 * @hdev: pointer to the habanalabs device structure
6834 * @qid_base: first QID of the QMAN (out of 4 streams)
6835 * @stream: the QMAN's stream
6836 * @qman_base: base address of QMAN registers block
6837 * @event_mask: mask of the last events occurred
6838 * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
6839 */
gaudi_handle_last_pqes_on_err(struct hl_device * hdev,u32 qid_base,u32 stream,u64 qman_base,u64 event_mask,bool pr_sw_conf)6840 static void gaudi_handle_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
6841 u32 stream, u64 qman_base,
6842 u64 event_mask,
6843 bool pr_sw_conf)
6844 {
6845 u32 ci, qm_ci_stream_off, queue_len;
6846 struct hl_hw_queue *q;
6847 u64 pq_ci, addr[PQ_FETCHER_CACHE_SIZE];
6848 int i;
6849
6850 q = &hdev->kernel_queues[qid_base + stream];
6851
6852 qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0;
6853 pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) +
6854 stream * qm_ci_stream_off;
6855
6856 queue_len = (q->queue_type == QUEUE_TYPE_INT) ?
6857 q->int_queue_len : HL_QUEUE_LENGTH;
6858
6859 hdev->asic_funcs->hw_queues_lock(hdev);
6860
6861 if (pr_sw_conf)
6862 gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask);
6863
6864 ci = RREG32(pq_ci);
6865
6866 /* we should start printing form ci -1 */
6867 ci = gaudi_queue_idx_dec(ci, queue_len);
6868 memset(addr, 0, sizeof(addr));
6869
6870 for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
6871 struct hl_bd *bd;
6872 u32 len;
6873
6874 bd = q->kernel_address;
6875 bd += ci;
6876
6877 len = le32_to_cpu(bd->len);
6878 /* len 0 means uninitialized entry- break */
6879 if (!len)
6880 break;
6881
6882 addr[i] = le64_to_cpu(bd->ptr);
6883
6884 dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %u\n",
6885 stream, ci, addr[i], len);
6886
6887 /* get previous ci, wrap if needed */
6888 ci = gaudi_queue_idx_dec(ci, queue_len);
6889 }
6890
6891 if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
6892 struct undefined_opcode_info *undef_opcode = &hdev->captured_err_info.undef_opcode;
6893 u32 arr_idx = undef_opcode->cb_addr_streams_len;
6894
6895 if (arr_idx == 0) {
6896 undef_opcode->timestamp = ktime_get();
6897 undef_opcode->engine_id = gaudi_queue_id_to_engine_id[qid_base];
6898 }
6899
6900 memcpy(undef_opcode->cb_addr_streams[arr_idx], addr, sizeof(addr));
6901 undef_opcode->cb_addr_streams_len++;
6902 }
6903
6904 hdev->asic_funcs->hw_queues_unlock(hdev);
6905 }
6906
6907 /**
6908 * handle_qman_data_on_err - extract QMAN data on error
6909 *
6910 * @hdev: pointer to the habanalabs device structure
6911 * @qid_base: first QID of the QMAN (out of 4 streams)
6912 * @stream: the QMAN's stream
6913 * @qman_base: base address of QMAN registers block
6914 * @event_mask: mask of the last events occurred
6915 *
6916 * This function attempt to exatract as much data as possible on QMAN error.
6917 * On upper CP print the SW config stream data and last 8 PQEs.
6918 * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
6919 */
handle_qman_data_on_err(struct hl_device * hdev,u32 qid_base,u32 stream,u64 qman_base,u64 event_mask)6920 static void handle_qman_data_on_err(struct hl_device *hdev, u32 qid_base,
6921 u32 stream, u64 qman_base, u64 event_mask)
6922 {
6923 u32 i;
6924
6925 if (stream != QMAN_STREAMS) {
6926 gaudi_handle_last_pqes_on_err(hdev, qid_base, stream,
6927 qman_base, event_mask, true);
6928 return;
6929 }
6930
6931 /* handle Lower-CP */
6932 gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask);
6933
6934 for (i = 0; i < QMAN_STREAMS; i++)
6935 gaudi_handle_last_pqes_on_err(hdev, qid_base, i,
6936 qman_base, event_mask, false);
6937 }
6938
gaudi_handle_qman_err_generic(struct hl_device * hdev,const char * qm_name,u64 qman_base,u32 qid_base,u64 * event_mask)6939 static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
6940 const char *qm_name,
6941 u64 qman_base,
6942 u32 qid_base,
6943 u64 *event_mask)
6944 {
6945 u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
6946 u64 glbl_sts_addr, arb_err_addr;
6947 char reg_desc[32];
6948
6949 glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE);
6950 arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE);
6951
6952 /* Iterate through all stream GLBL_STS1 registers + Lower CP */
6953 for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
6954 glbl_sts_clr_val = 0;
6955 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
6956
6957 if (!glbl_sts_val)
6958 continue;
6959
6960 if (i == QMAN_STREAMS)
6961 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
6962 else
6963 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
6964
6965 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
6966 if (glbl_sts_val & BIT(j)) {
6967 dev_err_ratelimited(hdev->dev,
6968 "%s %s. err cause: %s\n",
6969 qm_name, reg_desc,
6970 gaudi_qman_error_cause[j]);
6971 glbl_sts_clr_val |= BIT(j);
6972 }
6973 }
6974 /* check for undefined opcode */
6975 if (glbl_sts_val & TPC0_QM_GLBL_STS1_CP_UNDEF_CMD_ERR_MASK &&
6976 hdev->captured_err_info.undef_opcode.write_enable) {
6977 memset(&hdev->captured_err_info.undef_opcode, 0,
6978 sizeof(hdev->captured_err_info.undef_opcode));
6979
6980 hdev->captured_err_info.undef_opcode.write_enable = false;
6981 *event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE;
6982 }
6983
6984 /* Write 1 clear errors */
6985 if (!hdev->stop_on_err)
6986 WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
6987 else
6988 handle_qman_data_on_err(hdev, qid_base, i, qman_base, *event_mask);
6989 }
6990
6991 arb_err_val = RREG32(arb_err_addr);
6992
6993 if (!arb_err_val)
6994 return;
6995
6996 for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
6997 if (arb_err_val & BIT(j)) {
6998 dev_err_ratelimited(hdev->dev,
6999 "%s ARB_ERR. err cause: %s\n",
7000 qm_name,
7001 gaudi_qman_arb_error_cause[j]);
7002 }
7003 }
7004 }
7005
gaudi_print_sm_sei_info(struct hl_device * hdev,u16 event_type,struct hl_eq_sm_sei_data * sei_data)7006 static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
7007 struct hl_eq_sm_sei_data *sei_data)
7008 {
7009 u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
7010
7011 /* Flip the bits as the enum is ordered in the opposite way */
7012 index = (index ^ 0x3) & 0x3;
7013
7014 switch (sei_data->sei_cause) {
7015 case SM_SEI_SO_OVERFLOW:
7016 dev_err_ratelimited(hdev->dev,
7017 "%s SEI Error: SOB Group %u overflow/underflow",
7018 gaudi_sync_manager_names[index],
7019 le32_to_cpu(sei_data->sei_log));
7020 break;
7021 case SM_SEI_LBW_4B_UNALIGNED:
7022 dev_err_ratelimited(hdev->dev,
7023 "%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
7024 gaudi_sync_manager_names[index],
7025 le32_to_cpu(sei_data->sei_log));
7026 break;
7027 case SM_SEI_AXI_RESPONSE_ERR:
7028 dev_err_ratelimited(hdev->dev,
7029 "%s SEI Error: AXI ID %u response error",
7030 gaudi_sync_manager_names[index],
7031 le32_to_cpu(sei_data->sei_log));
7032 break;
7033 default:
7034 dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u",
7035 le32_to_cpu(sei_data->sei_log));
7036 break;
7037 }
7038 }
7039
gaudi_handle_ecc_event(struct hl_device * hdev,u16 event_type,struct hl_eq_ecc_data * ecc_data)7040 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7041 struct hl_eq_ecc_data *ecc_data)
7042 {
7043 struct ecc_info_extract_params params;
7044 u64 ecc_address = 0, ecc_syndrom = 0;
7045 u8 index, memory_wrapper_idx = 0;
7046 bool extract_info_from_fw;
7047 int rc;
7048
7049 if (hdev->asic_prop.fw_security_enabled) {
7050 extract_info_from_fw = true;
7051 goto extract_ecc_info;
7052 }
7053
7054 switch (event_type) {
7055 case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
7056 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
7057 extract_info_from_fw = true;
7058 break;
7059 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7060 index = event_type - GAUDI_EVENT_TPC0_SERR;
7061 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7062 params.num_memories = 90;
7063 params.derr = false;
7064 extract_info_from_fw = false;
7065 break;
7066 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7067 index = event_type - GAUDI_EVENT_TPC0_DERR;
7068 params.block_address =
7069 mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7070 params.num_memories = 90;
7071 params.derr = true;
7072 extract_info_from_fw = false;
7073 break;
7074 case GAUDI_EVENT_MME0_ACC_SERR:
7075 case GAUDI_EVENT_MME1_ACC_SERR:
7076 case GAUDI_EVENT_MME2_ACC_SERR:
7077 case GAUDI_EVENT_MME3_ACC_SERR:
7078 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
7079 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7080 params.num_memories = 128;
7081 params.derr = false;
7082 extract_info_from_fw = false;
7083 break;
7084 case GAUDI_EVENT_MME0_ACC_DERR:
7085 case GAUDI_EVENT_MME1_ACC_DERR:
7086 case GAUDI_EVENT_MME2_ACC_DERR:
7087 case GAUDI_EVENT_MME3_ACC_DERR:
7088 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
7089 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7090 params.num_memories = 128;
7091 params.derr = true;
7092 extract_info_from_fw = false;
7093 break;
7094 case GAUDI_EVENT_MME0_SBAB_SERR:
7095 case GAUDI_EVENT_MME1_SBAB_SERR:
7096 case GAUDI_EVENT_MME2_SBAB_SERR:
7097 case GAUDI_EVENT_MME3_SBAB_SERR:
7098 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
7099 params.block_address =
7100 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7101 params.num_memories = 33;
7102 params.derr = false;
7103 extract_info_from_fw = false;
7104 break;
7105 case GAUDI_EVENT_MME0_SBAB_DERR:
7106 case GAUDI_EVENT_MME1_SBAB_DERR:
7107 case GAUDI_EVENT_MME2_SBAB_DERR:
7108 case GAUDI_EVENT_MME3_SBAB_DERR:
7109 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
7110 params.block_address =
7111 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7112 params.num_memories = 33;
7113 params.derr = true;
7114 extract_info_from_fw = false;
7115 break;
7116 default:
7117 return;
7118 }
7119
7120 extract_ecc_info:
7121 if (extract_info_from_fw) {
7122 ecc_address = le64_to_cpu(ecc_data->ecc_address);
7123 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7124 memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7125 } else {
7126 rc = gaudi_extract_ecc_info(hdev, ¶ms, &ecc_address,
7127 &ecc_syndrom, &memory_wrapper_idx);
7128 if (rc)
7129 return;
7130 }
7131
7132 dev_err(hdev->dev,
7133 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
7134 ecc_address, ecc_syndrom, memory_wrapper_idx);
7135 }
7136
gaudi_handle_qman_err(struct hl_device * hdev,u16 event_type,u64 * event_mask)7137 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
7138 {
7139 u64 qman_base;
7140 char desc[32];
7141 u32 qid_base;
7142 u8 index;
7143
7144 switch (event_type) {
7145 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7146 index = event_type - GAUDI_EVENT_TPC0_QM;
7147 qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS;
7148 qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET;
7149 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
7150 break;
7151 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7152 if (event_type == GAUDI_EVENT_MME0_QM) {
7153 index = 0;
7154 qid_base = GAUDI_QUEUE_ID_MME_0_0;
7155 } else { /* event_type == GAUDI_EVENT_MME2_QM */
7156 index = 2;
7157 qid_base = GAUDI_QUEUE_ID_MME_1_0;
7158 }
7159 qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET;
7160 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
7161 break;
7162 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7163 index = event_type - GAUDI_EVENT_DMA0_QM;
7164 qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS;
7165 /* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */
7166 if (index > 1)
7167 qid_base++;
7168 qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET;
7169 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
7170 break;
7171 case GAUDI_EVENT_NIC0_QM0:
7172 qid_base = GAUDI_QUEUE_ID_NIC_0_0;
7173 qman_base = mmNIC0_QM0_BASE;
7174 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
7175 break;
7176 case GAUDI_EVENT_NIC0_QM1:
7177 qid_base = GAUDI_QUEUE_ID_NIC_1_0;
7178 qman_base = mmNIC0_QM1_BASE;
7179 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
7180 break;
7181 case GAUDI_EVENT_NIC1_QM0:
7182 qid_base = GAUDI_QUEUE_ID_NIC_2_0;
7183 qman_base = mmNIC1_QM0_BASE;
7184 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
7185 break;
7186 case GAUDI_EVENT_NIC1_QM1:
7187 qid_base = GAUDI_QUEUE_ID_NIC_3_0;
7188 qman_base = mmNIC1_QM1_BASE;
7189 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
7190 break;
7191 case GAUDI_EVENT_NIC2_QM0:
7192 qid_base = GAUDI_QUEUE_ID_NIC_4_0;
7193 qman_base = mmNIC2_QM0_BASE;
7194 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
7195 break;
7196 case GAUDI_EVENT_NIC2_QM1:
7197 qid_base = GAUDI_QUEUE_ID_NIC_5_0;
7198 qman_base = mmNIC2_QM1_BASE;
7199 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
7200 break;
7201 case GAUDI_EVENT_NIC3_QM0:
7202 qid_base = GAUDI_QUEUE_ID_NIC_6_0;
7203 qman_base = mmNIC3_QM0_BASE;
7204 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
7205 break;
7206 case GAUDI_EVENT_NIC3_QM1:
7207 qid_base = GAUDI_QUEUE_ID_NIC_7_0;
7208 qman_base = mmNIC3_QM1_BASE;
7209 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
7210 break;
7211 case GAUDI_EVENT_NIC4_QM0:
7212 qid_base = GAUDI_QUEUE_ID_NIC_8_0;
7213 qman_base = mmNIC4_QM0_BASE;
7214 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
7215 break;
7216 case GAUDI_EVENT_NIC4_QM1:
7217 qid_base = GAUDI_QUEUE_ID_NIC_9_0;
7218 qman_base = mmNIC4_QM1_BASE;
7219 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
7220 break;
7221 default:
7222 return;
7223 }
7224
7225 gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base, event_mask);
7226 }
7227
gaudi_print_irq_info(struct hl_device * hdev,u16 event_type,bool check_razwi,u64 * event_mask)7228 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
7229 bool check_razwi, u64 *event_mask)
7230 {
7231 bool is_read = false, is_write = false;
7232 u16 engine_id[2], num_of_razwi_eng = 0;
7233 char desc[64] = "";
7234 u64 razwi_addr = 0;
7235 u8 razwi_flags = 0;
7236
7237 /*
7238 * Init engine id by default as not valid and only if razwi initiated from engine with
7239 * engine id it will get valid value.
7240 */
7241 engine_id[0] = HL_RAZWI_NA_ENG_ID;
7242 engine_id[1] = HL_RAZWI_NA_ENG_ID;
7243
7244 gaudi_get_event_desc(event_type, desc, sizeof(desc));
7245 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7246 event_type, desc);
7247
7248 if (check_razwi) {
7249 gaudi_print_and_get_razwi_info(hdev, &engine_id[0], &engine_id[1], &is_read,
7250 &is_write);
7251 gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, event_mask);
7252
7253 if (is_read)
7254 razwi_flags |= HL_RAZWI_READ;
7255 if (is_write)
7256 razwi_flags |= HL_RAZWI_WRITE;
7257
7258 if (engine_id[0] != HL_RAZWI_NA_ENG_ID) {
7259 if (engine_id[1] != HL_RAZWI_NA_ENG_ID)
7260 num_of_razwi_eng = 2;
7261 else
7262 num_of_razwi_eng = 1;
7263 }
7264
7265 if (razwi_flags)
7266 hl_handle_razwi(hdev, razwi_addr, engine_id, num_of_razwi_eng,
7267 razwi_flags, event_mask);
7268 }
7269 }
7270
gaudi_print_out_of_sync_info(struct hl_device * hdev,struct cpucp_pkt_sync_err * sync_err)7271 static void gaudi_print_out_of_sync_info(struct hl_device *hdev,
7272 struct cpucp_pkt_sync_err *sync_err)
7273 {
7274 struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
7275
7276 dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n",
7277 le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci));
7278 }
7279
gaudi_print_fw_alive_info(struct hl_device * hdev,struct hl_eq_fw_alive * fw_alive)7280 static void gaudi_print_fw_alive_info(struct hl_device *hdev,
7281 struct hl_eq_fw_alive *fw_alive)
7282 {
7283 dev_err(hdev->dev,
7284 "FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n",
7285 (fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ? "Minor" : "Critical",
7286 le32_to_cpu(fw_alive->process_id),
7287 le32_to_cpu(fw_alive->thread_id),
7288 le64_to_cpu(fw_alive->uptime_seconds));
7289 }
7290
gaudi_print_nic_axi_irq_info(struct hl_device * hdev,u16 event_type,void * data)7291 static void gaudi_print_nic_axi_irq_info(struct hl_device *hdev, u16 event_type,
7292 void *data)
7293 {
7294 char desc[64] = "", *type;
7295 struct eq_nic_sei_event *eq_nic_sei = data;
7296 u16 nic_id = event_type - GAUDI_EVENT_NIC_SEI_0;
7297
7298 switch (eq_nic_sei->axi_error_cause) {
7299 case RXB:
7300 type = "RXB";
7301 break;
7302 case RXE:
7303 type = "RXE";
7304 break;
7305 case TXS:
7306 type = "TXS";
7307 break;
7308 case TXE:
7309 type = "TXE";
7310 break;
7311 case QPC_RESP:
7312 type = "QPC_RESP";
7313 break;
7314 case NON_AXI_ERR:
7315 type = "NON_AXI_ERR";
7316 break;
7317 case TMR:
7318 type = "TMR";
7319 break;
7320 default:
7321 dev_err(hdev->dev, "unknown NIC AXI cause %d\n",
7322 eq_nic_sei->axi_error_cause);
7323 type = "N/A";
7324 break;
7325 }
7326
7327 snprintf(desc, sizeof(desc), "NIC%d_%s%d", nic_id, type,
7328 eq_nic_sei->id);
7329 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7330 event_type, desc);
7331 }
7332
gaudi_compute_reset_late_init(struct hl_device * hdev)7333 static int gaudi_compute_reset_late_init(struct hl_device *hdev)
7334 {
7335 /* GAUDI doesn't support any reset except hard-reset */
7336 return -EPERM;
7337 }
7338
gaudi_hbm_read_interrupts(struct hl_device * hdev,int device,struct hl_eq_hbm_ecc_data * hbm_ecc_data)7339 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
7340 struct hl_eq_hbm_ecc_data *hbm_ecc_data)
7341 {
7342 u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
7343 int rc = 0;
7344
7345 if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
7346 CPU_BOOT_DEV_STS0_HBM_ECC_EN) {
7347 if (!hbm_ecc_data) {
7348 dev_err(hdev->dev, "No FW ECC data");
7349 return 0;
7350 }
7351
7352 wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
7353 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7354 rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
7355 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7356 ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
7357 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7358 derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
7359 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7360 serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
7361 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7362 type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
7363 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7364 ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
7365 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7366
7367 dev_err(hdev->dev,
7368 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7369 device, ch, wr_par, rd_par, ca_par, serr, derr);
7370 dev_err(hdev->dev,
7371 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
7372 device, ch, hbm_ecc_data->first_addr, type,
7373 hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,
7374 hbm_ecc_data->dec_cnt);
7375 return 0;
7376 }
7377
7378 if (hdev->asic_prop.fw_security_enabled) {
7379 dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
7380 return 0;
7381 }
7382
7383 base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
7384 for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
7385 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
7386 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7387 if (val) {
7388 rc = -EIO;
7389 dev_err(hdev->dev,
7390 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7391 device, ch * 2, val & 0x1, (val >> 1) & 0x1,
7392 (val >> 2) & 0x1, (val >> 3) & 0x1,
7393 (val >> 4) & 0x1);
7394
7395 val2 = RREG32(base + ch * 0x1000 + 0x060);
7396 dev_err(hdev->dev,
7397 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7398 device, ch * 2,
7399 RREG32(base + ch * 0x1000 + 0x064),
7400 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7401 (val2 & 0xFF0000) >> 16,
7402 (val2 & 0xFF000000) >> 24);
7403 }
7404
7405 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
7406 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7407 if (val) {
7408 rc = -EIO;
7409 dev_err(hdev->dev,
7410 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7411 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
7412 (val >> 2) & 0x1, (val >> 3) & 0x1,
7413 (val >> 4) & 0x1);
7414
7415 val2 = RREG32(base + ch * 0x1000 + 0x070);
7416 dev_err(hdev->dev,
7417 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7418 device, ch * 2 + 1,
7419 RREG32(base + ch * 0x1000 + 0x074),
7420 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7421 (val2 & 0xFF0000) >> 16,
7422 (val2 & 0xFF000000) >> 24);
7423 }
7424
7425 /* Clear interrupts */
7426 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
7427 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
7428 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
7429 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
7430 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
7431 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
7432 }
7433
7434 val = RREG32(base + 0x8F30);
7435 val2 = RREG32(base + 0x8F34);
7436 if (val | val2) {
7437 rc = -EIO;
7438 dev_err(hdev->dev,
7439 "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
7440 device, val, val2);
7441 }
7442 val = RREG32(base + 0x8F40);
7443 val2 = RREG32(base + 0x8F44);
7444 if (val | val2) {
7445 rc = -EIO;
7446 dev_err(hdev->dev,
7447 "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
7448 device, val, val2);
7449 }
7450
7451 return rc;
7452 }
7453
gaudi_hbm_event_to_dev(u16 hbm_event_type)7454 static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
7455 {
7456 switch (hbm_event_type) {
7457 case GAUDI_EVENT_HBM0_SPI_0:
7458 case GAUDI_EVENT_HBM0_SPI_1:
7459 return 0;
7460 case GAUDI_EVENT_HBM1_SPI_0:
7461 case GAUDI_EVENT_HBM1_SPI_1:
7462 return 1;
7463 case GAUDI_EVENT_HBM2_SPI_0:
7464 case GAUDI_EVENT_HBM2_SPI_1:
7465 return 2;
7466 case GAUDI_EVENT_HBM3_SPI_0:
7467 case GAUDI_EVENT_HBM3_SPI_1:
7468 return 3;
7469 default:
7470 break;
7471 }
7472
7473 /* Should never happen */
7474 return 0;
7475 }
7476
gaudi_tpc_read_interrupts(struct hl_device * hdev,u8 tpc_id,char * interrupt_name)7477 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
7478 char *interrupt_name)
7479 {
7480 u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
7481 bool soft_reset_required = false;
7482
7483 tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
7484 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
7485
7486 for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
7487 if (tpc_interrupts_cause & BIT(i)) {
7488 dev_err_ratelimited(hdev->dev,
7489 "TPC%d_%s interrupt cause: %s\n",
7490 tpc_id, interrupt_name,
7491 gaudi_tpc_interrupts_cause[i]);
7492 /* If this is QM error, we need to soft-reset */
7493 if (i == 15)
7494 soft_reset_required = true;
7495 }
7496
7497 /* Clear interrupts */
7498 WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
7499
7500 return soft_reset_required;
7501 }
7502
tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)7503 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
7504 {
7505 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
7506 }
7507
tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)7508 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
7509 {
7510 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
7511 }
7512
gaudi_print_clk_change_info(struct hl_device * hdev,u16 event_type,u64 * event_mask)7513 static void gaudi_print_clk_change_info(struct hl_device *hdev, u16 event_type, u64 *event_mask)
7514 {
7515 ktime_t zero_time = ktime_set(0, 0);
7516
7517 mutex_lock(&hdev->clk_throttling.lock);
7518
7519 switch (event_type) {
7520 case GAUDI_EVENT_FIX_POWER_ENV_S:
7521 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;
7522 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;
7523 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
7524 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
7525 dev_info_ratelimited(hdev->dev,
7526 "Clock throttling due to power consumption\n");
7527 break;
7528
7529 case GAUDI_EVENT_FIX_POWER_ENV_E:
7530 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;
7531 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
7532 dev_info_ratelimited(hdev->dev,
7533 "Power envelop is safe, back to optimal clock\n");
7534 break;
7535
7536 case GAUDI_EVENT_FIX_THERMAL_ENV_S:
7537 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;
7538 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
7539 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
7540 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
7541 *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7542 dev_info_ratelimited(hdev->dev,
7543 "Clock throttling due to overheating\n");
7544 break;
7545
7546 case GAUDI_EVENT_FIX_THERMAL_ENV_E:
7547 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
7548 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
7549 *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7550 dev_info_ratelimited(hdev->dev,
7551 "Thermal envelop is safe, back to optimal clock\n");
7552 break;
7553
7554 default:
7555 dev_err(hdev->dev, "Received invalid clock change event %d\n",
7556 event_type);
7557 break;
7558 }
7559
7560 mutex_unlock(&hdev->clk_throttling.lock);
7561 }
7562
gaudi_handle_eqe(struct hl_device * hdev,struct hl_eq_entry * eq_entry)7563 static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
7564 {
7565 struct gaudi_device *gaudi = hdev->asic_specific;
7566 struct hl_info_fw_err_info fw_err_info;
7567 u64 data = le64_to_cpu(eq_entry->data[0]), event_mask = 0;
7568 u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
7569 u32 fw_fatal_err_flag = 0, flags = 0;
7570 u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
7571 >> EQ_CTL_EVENT_TYPE_SHIFT);
7572 bool reset_required, reset_direct = false;
7573 u8 cause;
7574 int rc;
7575
7576 if (event_type >= GAUDI_EVENT_SIZE) {
7577 dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
7578 event_type, GAUDI_EVENT_SIZE - 1);
7579 return;
7580 }
7581
7582 gaudi->events_stat[event_type]++;
7583 gaudi->events_stat_aggregate[event_type]++;
7584
7585 switch (event_type) {
7586 case GAUDI_EVENT_PCIE_CORE_DERR:
7587 case GAUDI_EVENT_PCIE_IF_DERR:
7588 case GAUDI_EVENT_PCIE_PHY_DERR:
7589 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7590 case GAUDI_EVENT_MME0_ACC_DERR:
7591 case GAUDI_EVENT_MME0_SBAB_DERR:
7592 case GAUDI_EVENT_MME1_ACC_DERR:
7593 case GAUDI_EVENT_MME1_SBAB_DERR:
7594 case GAUDI_EVENT_MME2_ACC_DERR:
7595 case GAUDI_EVENT_MME2_SBAB_DERR:
7596 case GAUDI_EVENT_MME3_ACC_DERR:
7597 case GAUDI_EVENT_MME3_SBAB_DERR:
7598 case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
7599 fallthrough;
7600 case GAUDI_EVENT_CPU_IF_ECC_DERR:
7601 case GAUDI_EVENT_PSOC_MEM_DERR:
7602 case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
7603 case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
7604 case GAUDI_EVENT_NIC0_DERR ... GAUDI_EVENT_NIC4_DERR:
7605 case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
7606 case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
7607 case GAUDI_EVENT_MMU_DERR:
7608 case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
7609 gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7610 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7611 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7612 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7613 goto reset_device;
7614
7615 case GAUDI_EVENT_GIC500:
7616 case GAUDI_EVENT_AXI_ECC:
7617 case GAUDI_EVENT_L2_RAM_ECC:
7618 case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
7619 gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7620 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7621 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7622 goto reset_device;
7623
7624 case GAUDI_EVENT_HBM0_SPI_0:
7625 case GAUDI_EVENT_HBM1_SPI_0:
7626 case GAUDI_EVENT_HBM2_SPI_0:
7627 case GAUDI_EVENT_HBM3_SPI_0:
7628 gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7629 gaudi_hbm_read_interrupts(hdev,
7630 gaudi_hbm_event_to_dev(event_type),
7631 &eq_entry->hbm_ecc_data);
7632 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7633 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7634 goto reset_device;
7635
7636 case GAUDI_EVENT_HBM0_SPI_1:
7637 case GAUDI_EVENT_HBM1_SPI_1:
7638 case GAUDI_EVENT_HBM2_SPI_1:
7639 case GAUDI_EVENT_HBM3_SPI_1:
7640 gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7641 gaudi_hbm_read_interrupts(hdev,
7642 gaudi_hbm_event_to_dev(event_type),
7643 &eq_entry->hbm_ecc_data);
7644 hl_fw_unmask_irq(hdev, event_type);
7645 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7646 break;
7647
7648 case GAUDI_EVENT_TPC0_DEC:
7649 case GAUDI_EVENT_TPC1_DEC:
7650 case GAUDI_EVENT_TPC2_DEC:
7651 case GAUDI_EVENT_TPC3_DEC:
7652 case GAUDI_EVENT_TPC4_DEC:
7653 case GAUDI_EVENT_TPC5_DEC:
7654 case GAUDI_EVENT_TPC6_DEC:
7655 case GAUDI_EVENT_TPC7_DEC:
7656 /* In TPC DEC event, notify on TPC assertion. While there isn't
7657 * a specific event for assertion yet, the FW generates TPC DEC event.
7658 * The SW upper layer will inspect an internal mapped area to indicate
7659 * if the event is a TPC Assertion or a "real" TPC DEC.
7660 */
7661 event_mask |= HL_NOTIFIER_EVENT_TPC_ASSERT;
7662 gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7663 reset_required = gaudi_tpc_read_interrupts(hdev,
7664 tpc_dec_event_to_tpc_id(event_type),
7665 "AXI_SLV_DEC_Error");
7666 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7667 if (reset_required) {
7668 dev_err(hdev->dev, "reset required due to %s\n",
7669 gaudi_irq_map_table[event_type].name);
7670
7671 reset_direct = true;
7672 goto reset_device;
7673 } else {
7674 hl_fw_unmask_irq(hdev, event_type);
7675 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7676 }
7677 break;
7678
7679 case GAUDI_EVENT_TPC0_KRN_ERR:
7680 case GAUDI_EVENT_TPC1_KRN_ERR:
7681 case GAUDI_EVENT_TPC2_KRN_ERR:
7682 case GAUDI_EVENT_TPC3_KRN_ERR:
7683 case GAUDI_EVENT_TPC4_KRN_ERR:
7684 case GAUDI_EVENT_TPC5_KRN_ERR:
7685 case GAUDI_EVENT_TPC6_KRN_ERR:
7686 case GAUDI_EVENT_TPC7_KRN_ERR:
7687 gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7688 reset_required = gaudi_tpc_read_interrupts(hdev,
7689 tpc_krn_event_to_tpc_id(event_type),
7690 "KRN_ERR");
7691 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7692 if (reset_required) {
7693 dev_err(hdev->dev, "reset required due to %s\n",
7694 gaudi_irq_map_table[event_type].name);
7695
7696 reset_direct = true;
7697 goto reset_device;
7698 } else {
7699 hl_fw_unmask_irq(hdev, event_type);
7700 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7701 }
7702 break;
7703
7704 case GAUDI_EVENT_PCIE_CORE_SERR:
7705 case GAUDI_EVENT_PCIE_IF_SERR:
7706 case GAUDI_EVENT_PCIE_PHY_SERR:
7707 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7708 case GAUDI_EVENT_MME0_ACC_SERR:
7709 case GAUDI_EVENT_MME0_SBAB_SERR:
7710 case GAUDI_EVENT_MME1_ACC_SERR:
7711 case GAUDI_EVENT_MME1_SBAB_SERR:
7712 case GAUDI_EVENT_MME2_ACC_SERR:
7713 case GAUDI_EVENT_MME2_SBAB_SERR:
7714 case GAUDI_EVENT_MME3_ACC_SERR:
7715 case GAUDI_EVENT_MME3_SBAB_SERR:
7716 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
7717 case GAUDI_EVENT_CPU_IF_ECC_SERR:
7718 case GAUDI_EVENT_PSOC_MEM_SERR:
7719 case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
7720 case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
7721 case GAUDI_EVENT_NIC0_SERR ... GAUDI_EVENT_NIC4_SERR:
7722 case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
7723 case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
7724 fallthrough;
7725 case GAUDI_EVENT_MMU_SERR:
7726 gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7727 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7728 hl_fw_unmask_irq(hdev, event_type);
7729 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7730 break;
7731
7732 case GAUDI_EVENT_PCIE_DEC:
7733 case GAUDI_EVENT_CPU_AXI_SPLITTER:
7734 case GAUDI_EVENT_PSOC_AXI_DEC:
7735 case GAUDI_EVENT_PSOC_PRSTN_FALL:
7736 gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7737 hl_fw_unmask_irq(hdev, event_type);
7738 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7739 break;
7740
7741 case GAUDI_EVENT_MMU_PAGE_FAULT:
7742 case GAUDI_EVENT_MMU_WR_PERM:
7743 gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7744 hl_fw_unmask_irq(hdev, event_type);
7745 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7746 break;
7747
7748 case GAUDI_EVENT_MME0_WBC_RSP:
7749 case GAUDI_EVENT_MME0_SBAB0_RSP:
7750 case GAUDI_EVENT_MME1_WBC_RSP:
7751 case GAUDI_EVENT_MME1_SBAB0_RSP:
7752 case GAUDI_EVENT_MME2_WBC_RSP:
7753 case GAUDI_EVENT_MME2_SBAB0_RSP:
7754 case GAUDI_EVENT_MME3_WBC_RSP:
7755 case GAUDI_EVENT_MME3_SBAB0_RSP:
7756 case GAUDI_EVENT_RAZWI_OR_ADC:
7757 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7758 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7759 fallthrough;
7760 case GAUDI_EVENT_NIC0_QM0:
7761 case GAUDI_EVENT_NIC0_QM1:
7762 case GAUDI_EVENT_NIC1_QM0:
7763 case GAUDI_EVENT_NIC1_QM1:
7764 case GAUDI_EVENT_NIC2_QM0:
7765 case GAUDI_EVENT_NIC2_QM1:
7766 case GAUDI_EVENT_NIC3_QM0:
7767 case GAUDI_EVENT_NIC3_QM1:
7768 case GAUDI_EVENT_NIC4_QM0:
7769 case GAUDI_EVENT_NIC4_QM1:
7770 case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
7771 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7772 gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7773 gaudi_handle_qman_err(hdev, event_type, &event_mask);
7774 hl_fw_unmask_irq(hdev, event_type);
7775 event_mask |= (HL_NOTIFIER_EVENT_USER_ENGINE_ERR | HL_NOTIFIER_EVENT_DEVICE_RESET);
7776 break;
7777
7778 case GAUDI_EVENT_RAZWI_OR_ADC_SW:
7779 gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7780 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7781 goto reset_device;
7782
7783 case GAUDI_EVENT_TPC0_BMON_SPMU:
7784 case GAUDI_EVENT_TPC1_BMON_SPMU:
7785 case GAUDI_EVENT_TPC2_BMON_SPMU:
7786 case GAUDI_EVENT_TPC3_BMON_SPMU:
7787 case GAUDI_EVENT_TPC4_BMON_SPMU:
7788 case GAUDI_EVENT_TPC5_BMON_SPMU:
7789 case GAUDI_EVENT_TPC6_BMON_SPMU:
7790 case GAUDI_EVENT_TPC7_BMON_SPMU:
7791 case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
7792 gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7793 hl_fw_unmask_irq(hdev, event_type);
7794 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7795 break;
7796
7797 case GAUDI_EVENT_NIC_SEI_0 ... GAUDI_EVENT_NIC_SEI_4:
7798 gaudi_print_nic_axi_irq_info(hdev, event_type, &data);
7799 hl_fw_unmask_irq(hdev, event_type);
7800 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7801 break;
7802
7803 case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
7804 gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7805 gaudi_print_sm_sei_info(hdev, event_type,
7806 &eq_entry->sm_sei_data);
7807 rc = hl_state_dump(hdev);
7808 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7809 if (rc)
7810 dev_err(hdev->dev,
7811 "Error during system state dump %d\n", rc);
7812 hl_fw_unmask_irq(hdev, event_type);
7813 break;
7814
7815 case GAUDI_EVENT_STATUS_NIC0_ENG0 ... GAUDI_EVENT_STATUS_NIC4_ENG1:
7816 break;
7817
7818 case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
7819 gaudi_print_clk_change_info(hdev, event_type, &event_mask);
7820 hl_fw_unmask_irq(hdev, event_type);
7821 break;
7822
7823 case GAUDI_EVENT_PSOC_GPIO_U16_0:
7824 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
7825 dev_err(hdev->dev,
7826 "Received high temp H/W interrupt %d (cause %d)\n",
7827 event_type, cause);
7828 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7829 break;
7830
7831 case GAUDI_EVENT_DEV_RESET_REQ:
7832 gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7833 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7834 goto reset_device;
7835
7836 case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
7837 gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7838 gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
7839 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7840 goto reset_device;
7841
7842 case GAUDI_EVENT_FW_ALIVE_S:
7843 gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7844 gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive);
7845 fw_err_info.err_type = HL_INFO_FW_REPORTED_ERR;
7846 fw_err_info.event_id = event_type;
7847 fw_err_info.event_mask = &event_mask;
7848 hl_handle_fw_err(hdev, &fw_err_info);
7849 goto reset_device;
7850
7851 default:
7852 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
7853 event_type);
7854 break;
7855 }
7856
7857 if (event_mask)
7858 hl_notifier_event_send_all(hdev, event_mask);
7859
7860 return;
7861
7862 reset_device:
7863 reset_required = true;
7864
7865 if (hdev->asic_prop.fw_security_enabled && !reset_direct) {
7866 flags = HL_DRV_RESET_HARD | HL_DRV_RESET_BYPASS_REQ_TO_FW | fw_fatal_err_flag;
7867
7868 /* notify on device unavailable while the reset triggered by fw */
7869 event_mask |= (HL_NOTIFIER_EVENT_DEVICE_RESET |
7870 HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE);
7871 } else if (hdev->hard_reset_on_fw_events) {
7872 flags = HL_DRV_RESET_HARD | HL_DRV_RESET_DELAY | fw_fatal_err_flag;
7873 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7874 } else {
7875 reset_required = false;
7876 }
7877
7878 if (reset_required) {
7879 /* escalate general hw errors to critical/fatal error */
7880 if (event_mask & HL_NOTIFIER_EVENT_GENERAL_HW_ERR)
7881 hl_handle_critical_hw_err(hdev, event_type, &event_mask);
7882
7883 hl_device_cond_reset(hdev, flags, event_mask);
7884 } else {
7885 hl_fw_unmask_irq(hdev, event_type);
7886 /* Notification on occurred event needs to be sent although reset is not executed */
7887 if (event_mask)
7888 hl_notifier_event_send_all(hdev, event_mask);
7889 }
7890 }
7891
gaudi_get_events_stat(struct hl_device * hdev,bool aggregate,u32 * size)7892 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
7893 {
7894 struct gaudi_device *gaudi = hdev->asic_specific;
7895
7896 if (aggregate) {
7897 *size = (u32) sizeof(gaudi->events_stat_aggregate);
7898 return gaudi->events_stat_aggregate;
7899 }
7900
7901 *size = (u32) sizeof(gaudi->events_stat);
7902 return gaudi->events_stat;
7903 }
7904
gaudi_mmu_invalidate_cache(struct hl_device * hdev,bool is_hard,u32 flags)7905 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags)
7906 {
7907 struct gaudi_device *gaudi = hdev->asic_specific;
7908 u32 status, timeout_usec;
7909 int rc;
7910
7911 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
7912 hdev->reset_info.hard_reset_pending)
7913 return 0;
7914
7915 if (hdev->pldm)
7916 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7917 else
7918 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7919
7920 /* L0 & L1 invalidation */
7921 WREG32(mmSTLB_INV_PS, 3);
7922 WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
7923 WREG32(mmSTLB_INV_PS, 2);
7924
7925 rc = hl_poll_timeout(
7926 hdev,
7927 mmSTLB_INV_PS,
7928 status,
7929 !status,
7930 1000,
7931 timeout_usec);
7932
7933 WREG32(mmSTLB_INV_SET, 0);
7934
7935 return rc;
7936 }
7937
gaudi_mmu_invalidate_cache_range(struct hl_device * hdev,bool is_hard,u32 flags,u32 asid,u64 va,u64 size)7938 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
7939 bool is_hard, u32 flags,
7940 u32 asid, u64 va, u64 size)
7941 {
7942 /* Treat as invalidate all because there is no range invalidation
7943 * in Gaudi
7944 */
7945 return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
7946 }
7947
gaudi_mmu_update_asid_hop0_addr(struct hl_device * hdev,u32 asid,u64 phys_addr)7948 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, u64 phys_addr)
7949 {
7950 u32 status, timeout_usec;
7951 int rc;
7952
7953 if (hdev->pldm)
7954 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7955 else
7956 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7957
7958 WREG32(MMU_ASID, asid);
7959 WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
7960 WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
7961 WREG32(MMU_BUSY, 0x80000000);
7962
7963 rc = hl_poll_timeout(
7964 hdev,
7965 MMU_BUSY,
7966 status,
7967 !(status & 0x80000000),
7968 1000,
7969 timeout_usec);
7970
7971 if (rc) {
7972 dev_err(hdev->dev,
7973 "Timeout during MMU hop0 config of asid %d\n", asid);
7974 return rc;
7975 }
7976
7977 return 0;
7978 }
7979
gaudi_send_heartbeat(struct hl_device * hdev)7980 static int gaudi_send_heartbeat(struct hl_device *hdev)
7981 {
7982 struct gaudi_device *gaudi = hdev->asic_specific;
7983
7984 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
7985 return 0;
7986
7987 return hl_fw_send_heartbeat(hdev);
7988 }
7989
gaudi_cpucp_info_get(struct hl_device * hdev)7990 static int gaudi_cpucp_info_get(struct hl_device *hdev)
7991 {
7992 struct gaudi_device *gaudi = hdev->asic_specific;
7993 struct asic_fixed_properties *prop = &hdev->asic_prop;
7994 int rc;
7995
7996 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
7997 return 0;
7998
7999 rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
8000 mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
8001 mmCPU_BOOT_ERR1);
8002 if (rc)
8003 return rc;
8004
8005 if (!strlen(prop->cpucp_info.card_name))
8006 strscpy_pad(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
8007 CARD_NAME_MAX_LEN);
8008
8009 hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
8010
8011 set_default_power_values(hdev);
8012
8013 return 0;
8014 }
8015
gaudi_is_device_idle(struct hl_device * hdev,u64 * mask_arr,u8 mask_len,struct engines_data * e)8016 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
8017 struct engines_data *e)
8018 {
8019 struct gaudi_device *gaudi = hdev->asic_specific;
8020 const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
8021 const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
8022 const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
8023 unsigned long *mask = (unsigned long *)mask_arr;
8024 u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
8025 bool is_idle = true, is_eng_idle, is_slave;
8026 u64 offset;
8027 int i, dma_id, port;
8028
8029 if (e)
8030 hl_engine_data_sprintf(e,
8031 "\nDMA is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n"
8032 "--- ------- ------------ ---------- -------------\n");
8033
8034 for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
8035 dma_id = gaudi_dma_assignment[i];
8036 offset = dma_id * DMA_QMAN_OFFSET;
8037
8038 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
8039 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
8040 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
8041 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8042 IS_DMA_IDLE(dma_core_sts0);
8043 is_idle &= is_eng_idle;
8044
8045 if (mask && !is_eng_idle)
8046 set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask);
8047 if (e)
8048 hl_engine_data_sprintf(e, fmt, dma_id,
8049 is_eng_idle ? "Y" : "N", qm_glbl_sts0,
8050 qm_cgm_sts, dma_core_sts0);
8051 }
8052
8053 if (e)
8054 hl_engine_data_sprintf(e,
8055 "\nTPC is_idle QM_GLBL_STS0 QM_CGM_STS CFG_STATUS\n"
8056 "--- ------- ------------ ---------- ----------\n");
8057
8058 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
8059 offset = i * TPC_QMAN_OFFSET;
8060 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
8061 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
8062 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
8063 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8064 IS_TPC_IDLE(tpc_cfg_sts);
8065 is_idle &= is_eng_idle;
8066
8067 if (mask && !is_eng_idle)
8068 set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask);
8069 if (e)
8070 hl_engine_data_sprintf(e, fmt, i,
8071 is_eng_idle ? "Y" : "N",
8072 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
8073 }
8074
8075 if (e)
8076 hl_engine_data_sprintf(e,
8077 "\nMME is_idle QM_GLBL_STS0 QM_CGM_STS ARCH_STATUS\n"
8078 "--- ------- ------------ ---------- -----------\n");
8079
8080 for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
8081 offset = i * MME_QMAN_OFFSET;
8082 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
8083 is_eng_idle = IS_MME_IDLE(mme_arch_sts);
8084
8085 /* MME 1 & 3 are slaves, no need to check their QMANs */
8086 is_slave = i % 2;
8087 if (!is_slave) {
8088 qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
8089 qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
8090 is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8091 }
8092
8093 is_idle &= is_eng_idle;
8094
8095 if (mask && !is_eng_idle)
8096 set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask);
8097 if (e) {
8098 if (!is_slave)
8099 hl_engine_data_sprintf(e, fmt, i,
8100 is_eng_idle ? "Y" : "N",
8101 qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
8102 else
8103 hl_engine_data_sprintf(e, mme_slave_fmt, i,
8104 is_eng_idle ? "Y" : "N", "-",
8105 "-", mme_arch_sts);
8106 }
8107 }
8108
8109 if (e)
8110 hl_engine_data_sprintf(e,
8111 "\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n"
8112 "--- ------- ------------ ----------\n");
8113
8114 for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
8115 offset = i * NIC_MACRO_QMAN_OFFSET;
8116 port = 2 * i;
8117 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8118 qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
8119 qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
8120 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8121 is_idle &= is_eng_idle;
8122
8123 if (mask && !is_eng_idle)
8124 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8125 if (e)
8126 hl_engine_data_sprintf(e, nic_fmt, port,
8127 is_eng_idle ? "Y" : "N",
8128 qm_glbl_sts0, qm_cgm_sts);
8129 }
8130
8131 port = 2 * i + 1;
8132 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8133 qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
8134 qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
8135 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8136 is_idle &= is_eng_idle;
8137
8138 if (mask && !is_eng_idle)
8139 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8140 if (e)
8141 hl_engine_data_sprintf(e, nic_fmt, port,
8142 is_eng_idle ? "Y" : "N",
8143 qm_glbl_sts0, qm_cgm_sts);
8144 }
8145 }
8146
8147 if (e)
8148 hl_engine_data_sprintf(e, "\n");
8149
8150 return is_idle;
8151 }
8152
gaudi_hw_queues_lock(struct hl_device * hdev)8153 static void gaudi_hw_queues_lock(struct hl_device *hdev)
8154 __acquires(&gaudi->hw_queues_lock)
8155 {
8156 struct gaudi_device *gaudi = hdev->asic_specific;
8157
8158 spin_lock(&gaudi->hw_queues_lock);
8159 }
8160
gaudi_hw_queues_unlock(struct hl_device * hdev)8161 static void gaudi_hw_queues_unlock(struct hl_device *hdev)
8162 __releases(&gaudi->hw_queues_lock)
8163 {
8164 struct gaudi_device *gaudi = hdev->asic_specific;
8165
8166 spin_unlock(&gaudi->hw_queues_lock);
8167 }
8168
gaudi_get_pci_id(struct hl_device * hdev)8169 static u32 gaudi_get_pci_id(struct hl_device *hdev)
8170 {
8171 return hdev->pdev->device;
8172 }
8173
gaudi_get_eeprom_data(struct hl_device * hdev,void * data,size_t max_size)8174 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
8175 size_t max_size)
8176 {
8177 struct gaudi_device *gaudi = hdev->asic_specific;
8178
8179 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8180 return 0;
8181
8182 return hl_fw_get_eeprom_data(hdev, data, max_size);
8183 }
8184
gaudi_get_monitor_dump(struct hl_device * hdev,void * data)8185 static int gaudi_get_monitor_dump(struct hl_device *hdev, void *data)
8186 {
8187 struct gaudi_device *gaudi = hdev->asic_specific;
8188
8189 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8190 return 0;
8191
8192 return hl_fw_get_monitor_dump(hdev, data);
8193 }
8194
8195 /*
8196 * this function should be used only during initialization and/or after reset,
8197 * when there are no active users.
8198 */
gaudi_run_tpc_kernel(struct hl_device * hdev,u64 tpc_kernel,u32 tpc_id)8199 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, u32 tpc_id)
8200 {
8201 u64 kernel_timeout;
8202 u32 status, offset;
8203 int rc;
8204
8205 offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
8206
8207 if (hdev->pldm)
8208 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
8209 else
8210 kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
8211
8212 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
8213 lower_32_bits(tpc_kernel));
8214 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
8215 upper_32_bits(tpc_kernel));
8216
8217 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
8218 lower_32_bits(tpc_kernel));
8219 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
8220 upper_32_bits(tpc_kernel));
8221 /* set a valid LUT pointer, content is of no significance */
8222 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
8223 lower_32_bits(tpc_kernel));
8224 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
8225 upper_32_bits(tpc_kernel));
8226
8227 WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
8228 lower_32_bits(CFG_BASE +
8229 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
8230
8231 WREG32(mmTPC0_CFG_TPC_CMD + offset,
8232 (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
8233 1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
8234 /* wait a bit for the engine to start executing */
8235 usleep_range(1000, 1500);
8236
8237 /* wait until engine has finished executing */
8238 rc = hl_poll_timeout(
8239 hdev,
8240 mmTPC0_CFG_STATUS + offset,
8241 status,
8242 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8243 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8244 1000,
8245 kernel_timeout);
8246
8247 if (rc) {
8248 dev_err(hdev->dev,
8249 "Timeout while waiting for TPC%d icache prefetch\n",
8250 tpc_id);
8251 return -EIO;
8252 }
8253
8254 WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
8255 1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
8256
8257 /* wait a bit for the engine to start executing */
8258 usleep_range(1000, 1500);
8259
8260 /* wait until engine has finished executing */
8261 rc = hl_poll_timeout(
8262 hdev,
8263 mmTPC0_CFG_STATUS + offset,
8264 status,
8265 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8266 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8267 1000,
8268 kernel_timeout);
8269
8270 if (rc) {
8271 dev_err(hdev->dev,
8272 "Timeout while waiting for TPC%d vector pipe\n",
8273 tpc_id);
8274 return -EIO;
8275 }
8276
8277 rc = hl_poll_timeout(
8278 hdev,
8279 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
8280 status,
8281 (status == 0),
8282 1000,
8283 kernel_timeout);
8284
8285 if (rc) {
8286 dev_err(hdev->dev,
8287 "Timeout while waiting for TPC%d kernel to execute\n",
8288 tpc_id);
8289 return -EIO;
8290 }
8291
8292 return 0;
8293 }
8294
gaudi_internal_cb_pool_init(struct hl_device * hdev,struct hl_ctx * ctx)8295 static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
8296 struct hl_ctx *ctx)
8297 {
8298 struct gaudi_device *gaudi = hdev->asic_specific;
8299 int min_alloc_order, rc, collective_cb_size;
8300
8301 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8302 return 0;
8303
8304 hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev,
8305 HOST_SPACE_INTERNAL_CB_SZ,
8306 &hdev->internal_cb_pool_dma_addr,
8307 GFP_KERNEL | __GFP_ZERO);
8308
8309 if (!hdev->internal_cb_pool_virt_addr)
8310 return -ENOMEM;
8311
8312 collective_cb_size = sizeof(struct packet_msg_short) * 5 +
8313 sizeof(struct packet_fence);
8314 min_alloc_order = ilog2(collective_cb_size);
8315
8316 hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
8317 if (!hdev->internal_cb_pool) {
8318 dev_err(hdev->dev,
8319 "Failed to create internal CB pool\n");
8320 rc = -ENOMEM;
8321 goto free_internal_cb_pool;
8322 }
8323
8324 rc = gen_pool_add(hdev->internal_cb_pool,
8325 (uintptr_t) hdev->internal_cb_pool_virt_addr,
8326 HOST_SPACE_INTERNAL_CB_SZ, -1);
8327 if (rc) {
8328 dev_err(hdev->dev,
8329 "Failed to add memory to internal CB pool\n");
8330 rc = -EFAULT;
8331 goto destroy_internal_cb_pool;
8332 }
8333
8334 hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
8335 HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
8336 HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
8337
8338 if (!hdev->internal_cb_va_base) {
8339 rc = -ENOMEM;
8340 goto destroy_internal_cb_pool;
8341 }
8342
8343 mutex_lock(&hdev->mmu_lock);
8344
8345 rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
8346 hdev->internal_cb_pool_dma_addr,
8347 HOST_SPACE_INTERNAL_CB_SZ);
8348 if (rc)
8349 goto unreserve_internal_cb_pool;
8350
8351 rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
8352 if (rc)
8353 goto unmap_internal_cb_pool;
8354
8355 mutex_unlock(&hdev->mmu_lock);
8356
8357 return 0;
8358
8359 unmap_internal_cb_pool:
8360 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8361 HOST_SPACE_INTERNAL_CB_SZ);
8362 unreserve_internal_cb_pool:
8363 mutex_unlock(&hdev->mmu_lock);
8364 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8365 HOST_SPACE_INTERNAL_CB_SZ);
8366 destroy_internal_cb_pool:
8367 gen_pool_destroy(hdev->internal_cb_pool);
8368 free_internal_cb_pool:
8369 hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
8370 hdev->internal_cb_pool_dma_addr);
8371
8372 return rc;
8373 }
8374
gaudi_internal_cb_pool_fini(struct hl_device * hdev,struct hl_ctx * ctx)8375 static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
8376 struct hl_ctx *ctx)
8377 {
8378 struct gaudi_device *gaudi = hdev->asic_specific;
8379
8380 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8381 return;
8382
8383 mutex_lock(&hdev->mmu_lock);
8384 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8385 HOST_SPACE_INTERNAL_CB_SZ);
8386 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8387 HOST_SPACE_INTERNAL_CB_SZ);
8388 hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
8389 mutex_unlock(&hdev->mmu_lock);
8390
8391 gen_pool_destroy(hdev->internal_cb_pool);
8392
8393 hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
8394 hdev->internal_cb_pool_dma_addr);
8395 }
8396
gaudi_ctx_init(struct hl_ctx * ctx)8397 static int gaudi_ctx_init(struct hl_ctx *ctx)
8398 {
8399 int rc;
8400
8401 if (ctx->asid == HL_KERNEL_ASID_ID)
8402 return 0;
8403
8404 rc = gaudi_internal_cb_pool_init(ctx->hdev, ctx);
8405 if (rc)
8406 return rc;
8407
8408 rc = gaudi_restore_user_registers(ctx->hdev);
8409 if (rc)
8410 gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8411
8412 return rc;
8413 }
8414
gaudi_ctx_fini(struct hl_ctx * ctx)8415 static void gaudi_ctx_fini(struct hl_ctx *ctx)
8416 {
8417 if (ctx->asid == HL_KERNEL_ASID_ID)
8418 return;
8419
8420 gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8421 }
8422
gaudi_pre_schedule_cs(struct hl_cs * cs)8423 static int gaudi_pre_schedule_cs(struct hl_cs *cs)
8424 {
8425 return 0;
8426 }
8427
gaudi_get_queue_id_for_cq(struct hl_device * hdev,u32 cq_idx)8428 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
8429 {
8430 return gaudi_cq_assignment[cq_idx];
8431 }
8432
gaudi_get_signal_cb_size(struct hl_device * hdev)8433 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
8434 {
8435 return sizeof(struct packet_msg_short) +
8436 sizeof(struct packet_msg_prot) * 2;
8437 }
8438
gaudi_get_wait_cb_size(struct hl_device * hdev)8439 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
8440 {
8441 return sizeof(struct packet_msg_short) * 4 +
8442 sizeof(struct packet_fence) +
8443 sizeof(struct packet_msg_prot) * 2;
8444 }
8445
gaudi_get_sob_addr(struct hl_device * hdev,u32 sob_id)8446 static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id)
8447 {
8448 return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4);
8449 }
8450
gaudi_gen_signal_cb(struct hl_device * hdev,void * data,u16 sob_id,u32 size,bool eb)8451 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
8452 u32 size, bool eb)
8453 {
8454 struct hl_cb *cb = (struct hl_cb *) data;
8455 struct packet_msg_short *pkt;
8456 u32 value, ctl, pkt_size = sizeof(*pkt);
8457
8458 pkt = cb->kernel_address + size;
8459 memset(pkt, 0, pkt_size);
8460
8461 /* Inc by 1, Mode ADD */
8462 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
8463 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
8464
8465 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
8466 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8467 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
8468 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8469 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);
8470 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8471 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8472
8473 pkt->value = cpu_to_le32(value);
8474 pkt->ctl = cpu_to_le32(ctl);
8475
8476 return size + pkt_size;
8477 }
8478
gaudi_add_mon_msg_short(struct packet_msg_short * pkt,u32 value,u16 addr)8479 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
8480 u16 addr)
8481 {
8482 u32 ctl, pkt_size = sizeof(*pkt);
8483
8484 memset(pkt, 0, pkt_size);
8485
8486 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
8487 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8488 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8489 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8490 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8491 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */
8492
8493 pkt->value = cpu_to_le32(value);
8494 pkt->ctl = cpu_to_le32(ctl);
8495
8496 return pkt_size;
8497 }
8498
gaudi_add_arm_monitor_pkt(struct hl_device * hdev,struct packet_msg_short * pkt,u16 sob_base,u8 sob_mask,u16 sob_val,u16 mon_id)8499 static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
8500 struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
8501 u16 sob_val, u16 mon_id)
8502 {
8503 u64 monitor_base;
8504 u32 ctl, value, pkt_size = sizeof(*pkt);
8505 u16 msg_addr_offset;
8506 u8 mask;
8507
8508 if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
8509 dev_err(hdev->dev,
8510 "sob_base %u (mask %#x) is not valid\n",
8511 sob_base, sob_mask);
8512 return 0;
8513 }
8514
8515 /*
8516 * monitor_base should be the content of the base0 address registers,
8517 * so it will be added to the msg short offsets
8518 */
8519 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8520
8521 msg_addr_offset =
8522 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
8523 monitor_base;
8524
8525 memset(pkt, 0, pkt_size);
8526
8527 /* Monitor config packet: bind the monitor to a sync object */
8528 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
8529 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
8530 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
8531 0); /* GREATER OR EQUAL*/
8532 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
8533
8534 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
8535 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8536 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8537 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8538 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8539 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8540 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8541
8542 pkt->value = cpu_to_le32(value);
8543 pkt->ctl = cpu_to_le32(ctl);
8544
8545 return pkt_size;
8546 }
8547
gaudi_add_fence_pkt(struct packet_fence * pkt)8548 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
8549 {
8550 u32 ctl, cfg, pkt_size = sizeof(*pkt);
8551
8552 memset(pkt, 0, pkt_size);
8553
8554 cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
8555 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
8556 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
8557
8558 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
8559 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8560 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8561 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8562
8563 pkt->cfg = cpu_to_le32(cfg);
8564 pkt->ctl = cpu_to_le32(ctl);
8565
8566 return pkt_size;
8567 }
8568
gaudi_get_fence_addr(struct hl_device * hdev,u32 queue_id,u64 * addr)8569 static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
8570 {
8571 u32 offset, nic_index;
8572
8573 switch (queue_id) {
8574 case GAUDI_QUEUE_ID_DMA_0_0:
8575 offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
8576 break;
8577 case GAUDI_QUEUE_ID_DMA_0_1:
8578 offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
8579 break;
8580 case GAUDI_QUEUE_ID_DMA_0_2:
8581 offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
8582 break;
8583 case GAUDI_QUEUE_ID_DMA_0_3:
8584 offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
8585 break;
8586 case GAUDI_QUEUE_ID_DMA_1_0:
8587 offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
8588 break;
8589 case GAUDI_QUEUE_ID_DMA_1_1:
8590 offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
8591 break;
8592 case GAUDI_QUEUE_ID_DMA_1_2:
8593 offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
8594 break;
8595 case GAUDI_QUEUE_ID_DMA_1_3:
8596 offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
8597 break;
8598 case GAUDI_QUEUE_ID_DMA_5_0:
8599 offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
8600 break;
8601 case GAUDI_QUEUE_ID_DMA_5_1:
8602 offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
8603 break;
8604 case GAUDI_QUEUE_ID_DMA_5_2:
8605 offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
8606 break;
8607 case GAUDI_QUEUE_ID_DMA_5_3:
8608 offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
8609 break;
8610 case GAUDI_QUEUE_ID_TPC_7_0:
8611 offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
8612 break;
8613 case GAUDI_QUEUE_ID_TPC_7_1:
8614 offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
8615 break;
8616 case GAUDI_QUEUE_ID_TPC_7_2:
8617 offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
8618 break;
8619 case GAUDI_QUEUE_ID_TPC_7_3:
8620 offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
8621 break;
8622 case GAUDI_QUEUE_ID_NIC_0_0:
8623 case GAUDI_QUEUE_ID_NIC_1_0:
8624 case GAUDI_QUEUE_ID_NIC_2_0:
8625 case GAUDI_QUEUE_ID_NIC_3_0:
8626 case GAUDI_QUEUE_ID_NIC_4_0:
8627 case GAUDI_QUEUE_ID_NIC_5_0:
8628 case GAUDI_QUEUE_ID_NIC_6_0:
8629 case GAUDI_QUEUE_ID_NIC_7_0:
8630 case GAUDI_QUEUE_ID_NIC_8_0:
8631 case GAUDI_QUEUE_ID_NIC_9_0:
8632 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
8633 offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
8634 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8635 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8636 break;
8637 case GAUDI_QUEUE_ID_NIC_0_1:
8638 case GAUDI_QUEUE_ID_NIC_1_1:
8639 case GAUDI_QUEUE_ID_NIC_2_1:
8640 case GAUDI_QUEUE_ID_NIC_3_1:
8641 case GAUDI_QUEUE_ID_NIC_4_1:
8642 case GAUDI_QUEUE_ID_NIC_5_1:
8643 case GAUDI_QUEUE_ID_NIC_6_1:
8644 case GAUDI_QUEUE_ID_NIC_7_1:
8645 case GAUDI_QUEUE_ID_NIC_8_1:
8646 case GAUDI_QUEUE_ID_NIC_9_1:
8647 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
8648 offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
8649 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8650 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8651 break;
8652 case GAUDI_QUEUE_ID_NIC_0_2:
8653 case GAUDI_QUEUE_ID_NIC_1_2:
8654 case GAUDI_QUEUE_ID_NIC_2_2:
8655 case GAUDI_QUEUE_ID_NIC_3_2:
8656 case GAUDI_QUEUE_ID_NIC_4_2:
8657 case GAUDI_QUEUE_ID_NIC_5_2:
8658 case GAUDI_QUEUE_ID_NIC_6_2:
8659 case GAUDI_QUEUE_ID_NIC_7_2:
8660 case GAUDI_QUEUE_ID_NIC_8_2:
8661 case GAUDI_QUEUE_ID_NIC_9_2:
8662 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
8663 offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
8664 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8665 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8666 break;
8667 case GAUDI_QUEUE_ID_NIC_0_3:
8668 case GAUDI_QUEUE_ID_NIC_1_3:
8669 case GAUDI_QUEUE_ID_NIC_2_3:
8670 case GAUDI_QUEUE_ID_NIC_3_3:
8671 case GAUDI_QUEUE_ID_NIC_4_3:
8672 case GAUDI_QUEUE_ID_NIC_5_3:
8673 case GAUDI_QUEUE_ID_NIC_6_3:
8674 case GAUDI_QUEUE_ID_NIC_7_3:
8675 case GAUDI_QUEUE_ID_NIC_8_3:
8676 case GAUDI_QUEUE_ID_NIC_9_3:
8677 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
8678 offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
8679 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8680 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8681 break;
8682 default:
8683 return -EINVAL;
8684 }
8685
8686 *addr = CFG_BASE + offset;
8687
8688 return 0;
8689 }
8690
gaudi_add_mon_pkts(void * buf,u16 mon_id,u64 fence_addr)8691 static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
8692 {
8693 u64 monitor_base;
8694 u32 size = 0;
8695 u16 msg_addr_offset;
8696
8697 /*
8698 * monitor_base should be the content of the base0 address registers,
8699 * so it will be added to the msg short offsets
8700 */
8701 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8702
8703 /* First monitor config packet: low address of the sync */
8704 msg_addr_offset =
8705 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
8706 monitor_base;
8707
8708 size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
8709 msg_addr_offset);
8710
8711 /* Second monitor config packet: high address of the sync */
8712 msg_addr_offset =
8713 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
8714 monitor_base;
8715
8716 size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
8717 msg_addr_offset);
8718
8719 /*
8720 * Third monitor config packet: the payload, i.e. what to write when the
8721 * sync triggers
8722 */
8723 msg_addr_offset =
8724 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
8725 monitor_base;
8726
8727 size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
8728
8729 return size;
8730 }
8731
gaudi_gen_wait_cb(struct hl_device * hdev,struct hl_gen_wait_properties * prop)8732 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
8733 struct hl_gen_wait_properties *prop)
8734 {
8735 struct hl_cb *cb = (struct hl_cb *) prop->data;
8736 void *buf = cb->kernel_address;
8737 u64 fence_addr = 0;
8738 u32 size = prop->size;
8739
8740 if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
8741 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
8742 prop->q_idx);
8743 return 0;
8744 }
8745
8746 size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
8747 size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
8748 prop->sob_mask, prop->sob_val, prop->mon_id);
8749 size += gaudi_add_fence_pkt(buf + size);
8750
8751 return size;
8752 }
8753
gaudi_reset_sob(struct hl_device * hdev,void * data)8754 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
8755 {
8756 struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
8757
8758 dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
8759 hw_sob->sob_id);
8760
8761 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
8762 hw_sob->sob_id * 4, 0);
8763
8764 kref_init(&hw_sob->kref);
8765 }
8766
gaudi_get_device_time(struct hl_device * hdev)8767 static u64 gaudi_get_device_time(struct hl_device *hdev)
8768 {
8769 u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
8770
8771 return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
8772 }
8773
gaudi_get_hw_block_id(struct hl_device * hdev,u64 block_addr,u32 * block_size,u32 * block_id)8774 static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
8775 u32 *block_size, u32 *block_id)
8776 {
8777 return -EPERM;
8778 }
8779
gaudi_block_mmap(struct hl_device * hdev,struct vm_area_struct * vma,u32 block_id,u32 block_size)8780 static int gaudi_block_mmap(struct hl_device *hdev,
8781 struct vm_area_struct *vma,
8782 u32 block_id, u32 block_size)
8783 {
8784 return -EPERM;
8785 }
8786
gaudi_enable_events_from_fw(struct hl_device * hdev)8787 static void gaudi_enable_events_from_fw(struct hl_device *hdev)
8788 {
8789 struct cpu_dyn_regs *dyn_regs =
8790 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
8791 u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
8792 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
8793 le32_to_cpu(dyn_regs->gic_host_ints_irq);
8794
8795 WREG32(irq_handler_offset,
8796 gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id);
8797 }
8798
gaudi_ack_mmu_page_fault_or_access_error(struct hl_device * hdev,u64 mmu_cap_mask)8799 static int gaudi_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask)
8800 {
8801 return -EINVAL;
8802 }
8803
gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)8804 static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)
8805 {
8806 switch (pll_idx) {
8807 case HL_GAUDI_CPU_PLL: return CPU_PLL;
8808 case HL_GAUDI_PCI_PLL: return PCI_PLL;
8809 case HL_GAUDI_NIC_PLL: return NIC_PLL;
8810 case HL_GAUDI_DMA_PLL: return DMA_PLL;
8811 case HL_GAUDI_MESH_PLL: return MESH_PLL;
8812 case HL_GAUDI_MME_PLL: return MME_PLL;
8813 case HL_GAUDI_TPC_PLL: return TPC_PLL;
8814 case HL_GAUDI_IF_PLL: return IF_PLL;
8815 case HL_GAUDI_SRAM_PLL: return SRAM_PLL;
8816 case HL_GAUDI_HBM_PLL: return HBM_PLL;
8817 default: return -EINVAL;
8818 }
8819 }
8820
gaudi_add_sync_to_engine_map_entry(struct hl_sync_to_engine_map * map,u32 reg_value,enum hl_sync_engine_type engine_type,u32 engine_id)8821 static int gaudi_add_sync_to_engine_map_entry(
8822 struct hl_sync_to_engine_map *map, u32 reg_value,
8823 enum hl_sync_engine_type engine_type, u32 engine_id)
8824 {
8825 struct hl_sync_to_engine_map_entry *entry;
8826
8827 /* Reg value represents a partial address of sync object,
8828 * it is used as unique identifier. For this we need to
8829 * clear the cutoff cfg base bits from the value.
8830 */
8831 if (reg_value == 0 || reg_value == 0xffffffff)
8832 return 0;
8833 reg_value -= lower_32_bits(CFG_BASE);
8834
8835 /* create a new hash entry */
8836 entry = kzalloc(sizeof(*entry), GFP_KERNEL);
8837 if (!entry)
8838 return -ENOMEM;
8839 entry->engine_type = engine_type;
8840 entry->engine_id = engine_id;
8841 entry->sync_id = reg_value;
8842 hash_add(map->tb, &entry->node, reg_value);
8843
8844 return 0;
8845 }
8846
gaudi_gen_sync_to_engine_map(struct hl_device * hdev,struct hl_sync_to_engine_map * map)8847 static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,
8848 struct hl_sync_to_engine_map *map)
8849 {
8850 struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
8851 int i, j, rc;
8852 u32 reg_value;
8853
8854 /* Iterate over TPC engines */
8855 for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) {
8856
8857 reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] +
8858 sds->props[SP_NEXT_TPC] * i);
8859
8860 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
8861 ENGINE_TPC, i);
8862 if (rc)
8863 goto free_sync_to_engine_map;
8864 }
8865
8866 /* Iterate over MME engines */
8867 for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) {
8868 for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) {
8869
8870 reg_value = RREG32(sds->props[SP_MME_CFG_SO] +
8871 sds->props[SP_NEXT_MME] * i +
8872 j * sizeof(u32));
8873
8874 rc = gaudi_add_sync_to_engine_map_entry(
8875 map, reg_value, ENGINE_MME,
8876 i * sds->props[SP_SUB_MME_ENG_NUM] + j);
8877 if (rc)
8878 goto free_sync_to_engine_map;
8879 }
8880 }
8881
8882 /* Iterate over DMA engines */
8883 for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) {
8884 reg_value = RREG32(sds->props[SP_DMA_CFG_SO] +
8885 sds->props[SP_DMA_QUEUES_OFFSET] * i);
8886 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
8887 ENGINE_DMA, i);
8888 if (rc)
8889 goto free_sync_to_engine_map;
8890 }
8891
8892 return 0;
8893
8894 free_sync_to_engine_map:
8895 hl_state_dump_free_sync_to_engine_map(map);
8896
8897 return rc;
8898 }
8899
gaudi_monitor_valid(struct hl_mon_state_dump * mon)8900 static int gaudi_monitor_valid(struct hl_mon_state_dump *mon)
8901 {
8902 return FIELD_GET(
8903 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK,
8904 mon->status);
8905 }
8906
gaudi_fill_sobs_from_mon(char * sobs,struct hl_mon_state_dump * mon)8907 static void gaudi_fill_sobs_from_mon(char *sobs, struct hl_mon_state_dump *mon)
8908 {
8909 const size_t max_write = 10;
8910 u32 gid, mask, sob;
8911 int i, offset;
8912
8913 /* Sync object ID is calculated as follows:
8914 * (8 * group_id + cleared bits in mask)
8915 */
8916 gid = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
8917 mon->arm_data);
8918 mask = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
8919 mon->arm_data);
8920
8921 for (i = 0, offset = 0; mask && offset < MONITOR_SOB_STRING_SIZE -
8922 max_write; mask >>= 1, i++) {
8923 if (!(mask & 1)) {
8924 sob = gid * MONITOR_MAX_SOBS + i;
8925
8926 if (offset > 0)
8927 offset += snprintf(sobs + offset, max_write,
8928 ", ");
8929
8930 offset += snprintf(sobs + offset, max_write, "%u", sob);
8931 }
8932 }
8933 }
8934
gaudi_print_single_monitor(char ** buf,size_t * size,size_t * offset,struct hl_device * hdev,struct hl_mon_state_dump * mon)8935 static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset,
8936 struct hl_device *hdev,
8937 struct hl_mon_state_dump *mon)
8938 {
8939 const char *name;
8940 char scratch_buf1[BIN_REG_STRING_SIZE],
8941 scratch_buf2[BIN_REG_STRING_SIZE];
8942 char monitored_sobs[MONITOR_SOB_STRING_SIZE] = {0};
8943
8944 name = hl_state_dump_get_monitor_name(hdev, mon);
8945 if (!name)
8946 name = "";
8947
8948 gaudi_fill_sobs_from_mon(monitored_sobs, mon);
8949
8950 return hl_snprintf_resize(
8951 buf, size, offset,
8952 "Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s. Means sync objects [%s] are being monitored.",
8953 mon->id, name,
8954 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
8955 mon->arm_data),
8956 hl_format_as_binary(
8957 scratch_buf1, sizeof(scratch_buf1),
8958 FIELD_GET(
8959 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
8960 mon->arm_data)),
8961 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK,
8962 mon->arm_data),
8963 mon->wr_data,
8964 (((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low,
8965 hl_format_as_binary(
8966 scratch_buf2, sizeof(scratch_buf2),
8967 FIELD_GET(
8968 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK,
8969 mon->status)),
8970 monitored_sobs);
8971 }
8972
8973
gaudi_print_fences_single_engine(struct hl_device * hdev,u64 base_offset,u64 status_base_offset,enum hl_sync_engine_type engine_type,u32 engine_id,char ** buf,size_t * size,size_t * offset)8974 static int gaudi_print_fences_single_engine(
8975 struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
8976 enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
8977 size_t *size, size_t *offset)
8978 {
8979 struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
8980 int rc = -ENOMEM, i;
8981 u32 *statuses, *fences;
8982
8983 statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES],
8984 sizeof(*statuses), GFP_KERNEL);
8985 if (!statuses)
8986 goto out;
8987
8988 fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] *
8989 sds->props[SP_ENGINE_NUM_OF_QUEUES],
8990 sizeof(*fences), GFP_KERNEL);
8991 if (!fences)
8992 goto free_status;
8993
8994 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i)
8995 statuses[i] = RREG32(status_base_offset + i * sizeof(u32));
8996
8997 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] *
8998 sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i)
8999 fences[i] = RREG32(base_offset + i * sizeof(u32));
9000
9001 /* The actual print */
9002 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) {
9003 u32 fence_id;
9004 u64 fence_cnt, fence_rdata;
9005 const char *engine_name;
9006
9007 if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK,
9008 statuses[i]))
9009 continue;
9010
9011 fence_id =
9012 FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]);
9013 fence_cnt = base_offset + CFG_BASE +
9014 sizeof(u32) *
9015 (i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]);
9016 fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] +
9017 sds->props[SP_FENCE0_RDATA_OFFSET];
9018 engine_name = hl_sync_engine_to_string(engine_type);
9019
9020 rc = hl_snprintf_resize(
9021 buf, size, offset,
9022 "%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n",
9023 engine_name, engine_id,
9024 i, fence_id,
9025 fence_cnt, engine_name, engine_id, fence_id, i,
9026 fence_rdata, engine_name, engine_id, fence_id, i,
9027 fences[fence_id],
9028 statuses[i]);
9029 if (rc)
9030 goto free_fences;
9031 }
9032
9033 rc = 0;
9034
9035 free_fences:
9036 kfree(fences);
9037 free_status:
9038 kfree(statuses);
9039 out:
9040 return rc;
9041 }
9042
9043
9044 static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = {
9045 .monitor_valid = gaudi_monitor_valid,
9046 .print_single_monitor = gaudi_print_single_monitor,
9047 .gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map,
9048 .print_fences_single_engine = gaudi_print_fences_single_engine,
9049 };
9050
gaudi_state_dump_init(struct hl_device * hdev)9051 static void gaudi_state_dump_init(struct hl_device *hdev)
9052 {
9053 struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9054 int i;
9055
9056 for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i)
9057 hash_add(sds->so_id_to_str_tb,
9058 &gaudi_so_id_to_str[i].node,
9059 gaudi_so_id_to_str[i].id);
9060
9061 for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i)
9062 hash_add(sds->monitor_id_to_str_tb,
9063 &gaudi_monitor_id_to_str[i].node,
9064 gaudi_monitor_id_to_str[i].id);
9065
9066 sds->props = gaudi_state_dump_specs_props;
9067
9068 sds->sync_namager_names = gaudi_sync_manager_names;
9069
9070 sds->funcs = gaudi_state_dump_funcs;
9071 }
9072
gaudi_get_stream_master_qid_arr(void)9073 static u32 *gaudi_get_stream_master_qid_arr(void)
9074 {
9075 return gaudi_stream_master;
9076 }
9077
gaudi_set_dram_properties(struct hl_device * hdev)9078 static int gaudi_set_dram_properties(struct hl_device *hdev)
9079 {
9080 return 0;
9081 }
9082
gaudi_set_binning_masks(struct hl_device * hdev)9083 static int gaudi_set_binning_masks(struct hl_device *hdev)
9084 {
9085 return 0;
9086 }
9087
gaudi_check_if_razwi_happened(struct hl_device * hdev)9088 static void gaudi_check_if_razwi_happened(struct hl_device *hdev)
9089 {
9090 }
9091
infineon_ver_show(struct device * dev,struct device_attribute * attr,char * buf)9092 static ssize_t infineon_ver_show(struct device *dev, struct device_attribute *attr, char *buf)
9093 {
9094 struct hl_device *hdev = dev_get_drvdata(dev);
9095 struct cpucp_info *cpucp_info;
9096
9097 cpucp_info = &hdev->asic_prop.cpucp_info;
9098
9099 return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version));
9100 }
9101
9102 static DEVICE_ATTR_RO(infineon_ver);
9103
9104 static struct attribute *gaudi_vrm_dev_attrs[] = {
9105 &dev_attr_infineon_ver.attr,
9106 NULL,
9107 };
9108
gaudi_add_device_attr(struct hl_device * hdev,struct attribute_group * dev_clk_attr_grp,struct attribute_group * dev_vrm_attr_grp)9109 static void gaudi_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
9110 struct attribute_group *dev_vrm_attr_grp)
9111 {
9112 hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp);
9113 dev_vrm_attr_grp->attrs = gaudi_vrm_dev_attrs;
9114 }
9115
gaudi_send_device_activity(struct hl_device * hdev,bool open)9116 static int gaudi_send_device_activity(struct hl_device *hdev, bool open)
9117 {
9118 return 0;
9119 }
9120
9121 static const struct hl_asic_funcs gaudi_funcs = {
9122 .early_init = gaudi_early_init,
9123 .early_fini = gaudi_early_fini,
9124 .late_init = gaudi_late_init,
9125 .late_fini = gaudi_late_fini,
9126 .sw_init = gaudi_sw_init,
9127 .sw_fini = gaudi_sw_fini,
9128 .hw_init = gaudi_hw_init,
9129 .hw_fini = gaudi_hw_fini,
9130 .halt_engines = gaudi_halt_engines,
9131 .suspend = gaudi_suspend,
9132 .resume = gaudi_resume,
9133 .mmap = gaudi_mmap,
9134 .ring_doorbell = gaudi_ring_doorbell,
9135 .pqe_write = gaudi_pqe_write,
9136 .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
9137 .asic_dma_free_coherent = gaudi_dma_free_coherent,
9138 .scrub_device_mem = gaudi_scrub_device_mem,
9139 .scrub_device_dram = gaudi_scrub_device_dram,
9140 .get_int_queue_base = gaudi_get_int_queue_base,
9141 .test_queues = gaudi_test_queues,
9142 .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
9143 .asic_dma_pool_free = gaudi_dma_pool_free,
9144 .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
9145 .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
9146 .dma_unmap_sgtable = hl_asic_dma_unmap_sgtable,
9147 .cs_parser = gaudi_cs_parser,
9148 .dma_map_sgtable = hl_asic_dma_map_sgtable,
9149 .add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
9150 .update_eq_ci = gaudi_update_eq_ci,
9151 .context_switch = gaudi_context_switch,
9152 .restore_phase_topology = gaudi_restore_phase_topology,
9153 .debugfs_read_dma = gaudi_debugfs_read_dma,
9154 .add_device_attr = gaudi_add_device_attr,
9155 .handle_eqe = gaudi_handle_eqe,
9156 .get_events_stat = gaudi_get_events_stat,
9157 .read_pte = gaudi_read_pte,
9158 .write_pte = gaudi_write_pte,
9159 .mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
9160 .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
9161 .mmu_prefetch_cache_range = NULL,
9162 .send_heartbeat = gaudi_send_heartbeat,
9163 .debug_coresight = gaudi_debug_coresight,
9164 .is_device_idle = gaudi_is_device_idle,
9165 .compute_reset_late_init = gaudi_compute_reset_late_init,
9166 .hw_queues_lock = gaudi_hw_queues_lock,
9167 .hw_queues_unlock = gaudi_hw_queues_unlock,
9168 .get_pci_id = gaudi_get_pci_id,
9169 .get_eeprom_data = gaudi_get_eeprom_data,
9170 .get_monitor_dump = gaudi_get_monitor_dump,
9171 .send_cpu_message = gaudi_send_cpu_message,
9172 .pci_bars_map = gaudi_pci_bars_map,
9173 .init_iatu = gaudi_init_iatu,
9174 .rreg = hl_rreg,
9175 .wreg = hl_wreg,
9176 .halt_coresight = gaudi_halt_coresight,
9177 .ctx_init = gaudi_ctx_init,
9178 .ctx_fini = gaudi_ctx_fini,
9179 .pre_schedule_cs = gaudi_pre_schedule_cs,
9180 .get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
9181 .load_firmware_to_device = gaudi_load_firmware_to_device,
9182 .load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
9183 .get_signal_cb_size = gaudi_get_signal_cb_size,
9184 .get_wait_cb_size = gaudi_get_wait_cb_size,
9185 .gen_signal_cb = gaudi_gen_signal_cb,
9186 .gen_wait_cb = gaudi_gen_wait_cb,
9187 .reset_sob = gaudi_reset_sob,
9188 .reset_sob_group = gaudi_reset_sob_group,
9189 .get_device_time = gaudi_get_device_time,
9190 .pb_print_security_errors = NULL,
9191 .collective_wait_init_cs = gaudi_collective_wait_init_cs,
9192 .collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
9193 .get_dec_base_addr = NULL,
9194 .scramble_addr = hl_mmu_scramble_addr,
9195 .descramble_addr = hl_mmu_descramble_addr,
9196 .ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
9197 .get_hw_block_id = gaudi_get_hw_block_id,
9198 .hw_block_mmap = gaudi_block_mmap,
9199 .enable_events_from_fw = gaudi_enable_events_from_fw,
9200 .ack_mmu_errors = gaudi_ack_mmu_page_fault_or_access_error,
9201 .map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx,
9202 .init_firmware_preload_params = gaudi_init_firmware_preload_params,
9203 .init_firmware_loader = gaudi_init_firmware_loader,
9204 .init_cpu_scrambler_dram = gaudi_init_scrambler_hbm,
9205 .state_dump_init = gaudi_state_dump_init,
9206 .get_sob_addr = gaudi_get_sob_addr,
9207 .set_pci_memory_regions = gaudi_set_pci_memory_regions,
9208 .get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr,
9209 .check_if_razwi_happened = gaudi_check_if_razwi_happened,
9210 .mmu_get_real_page_size = hl_mmu_get_real_page_size,
9211 .access_dev_mem = hl_access_dev_mem,
9212 .set_dram_bar_base = gaudi_set_hbm_bar_base,
9213 .send_device_activity = gaudi_send_device_activity,
9214 .set_dram_properties = gaudi_set_dram_properties,
9215 .set_binning_masks = gaudi_set_binning_masks,
9216 };
9217
9218 /**
9219 * gaudi_set_asic_funcs - set GAUDI function pointers
9220 *
9221 * @hdev: pointer to hl_device structure
9222 *
9223 */
gaudi_set_asic_funcs(struct hl_device * hdev)9224 void gaudi_set_asic_funcs(struct hl_device *hdev)
9225 {
9226 hdev->asic_funcs = &gaudi_funcs;
9227 }
9228