1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4 * Copyright 2016-2022 HabanaLabs, Ltd.
5 * All Rights Reserved.
6 */
7
8 #include "gaudiP.h"
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 #include "../include/hw_ip/mmu/mmu_v1_1.h"
11 #include "../include/gaudi/gaudi_masks.h"
12 #include "../include/gaudi/gaudi_fw_if.h"
13 #include "../include/gaudi/gaudi_reg_map.h"
14 #include "../include/gaudi/gaudi_async_ids_map_extended.h"
15
16 #include <linux/module.h>
17 #include <linux/pci.h>
18 #include <linux/firmware.h>
19 #include <linux/hwmon.h>
20 #include <linux/iommu.h>
21 #include <linux/seq_file.h>
22
23 /*
24 * Gaudi security scheme:
25 *
26 * 1. Host is protected by:
27 * - Range registers
28 * - MMU
29 *
30 * 2. DDR is protected by:
31 * - Range registers (protect the first 512MB)
32 *
33 * 3. Configuration is protected by:
34 * - Range registers
35 * - Protection bits
36 *
37 * MMU is always enabled.
38 *
39 * QMAN DMA channels 0,1 (PCI DMAN):
40 * - DMA is not secured.
41 * - PQ and CQ are secured.
42 * - CP is secured: The driver needs to parse CB but WREG should be allowed
43 * because of TDMA (tensor DMA). Hence, WREG is always not
44 * secured.
45 *
46 * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
47 * channel 0 to be secured, execute the DMA and change it back to not secured.
48 * Currently, the driver doesn't use the DMA while there are compute jobs
49 * running.
50 *
51 * The current use cases for the driver to use the DMA are:
52 * - Clear SRAM on context switch (happens on context switch when device is
53 * idle)
54 * - MMU page tables area clear (happens on init)
55 *
56 * QMAN DMA 2-7, TPC, MME, NIC:
57 * PQ is secured and is located on the Host (HBM CON TPC3 bug)
58 * CQ, CP and the engine are not secured
59 *
60 */
61
62 #define GAUDI_BOOT_FIT_FILE "habanalabs/gaudi/gaudi-boot-fit.itb"
63 #define GAUDI_LINUX_FW_FILE "habanalabs/gaudi/gaudi-fit.itb"
64 #define GAUDI_TPC_FW_FILE "habanalabs/gaudi/gaudi_tpc.bin"
65
66 MODULE_FIRMWARE(GAUDI_BOOT_FIT_FILE);
67 MODULE_FIRMWARE(GAUDI_LINUX_FW_FILE);
68 MODULE_FIRMWARE(GAUDI_TPC_FW_FILE);
69
70 #define GAUDI_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */
71
72 #define GAUDI_RESET_TIMEOUT_MSEC 2000 /* 2000ms */
73 #define GAUDI_RESET_WAIT_MSEC 1 /* 1ms */
74 #define GAUDI_CPU_RESET_WAIT_MSEC 200 /* 200ms */
75 #define GAUDI_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */
76
77 #define GAUDI_PLDM_RESET_WAIT_MSEC 1000 /* 1s */
78 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC 20000 /* 20s */
79 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */
80 #define GAUDI_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100)
81 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
82 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
83 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 4000000 /* 4s */
84 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */
85 #define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC 15000000 /* 15s */
86
87 #define GAUDI_QMAN0_FENCE_VAL 0x72E91AB9
88
89 #define GAUDI_MAX_STRING_LEN 20
90
91 #define GAUDI_CB_POOL_CB_CNT 512
92 #define GAUDI_CB_POOL_CB_SIZE 0x20000 /* 128KB */
93
94 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT 3
95
96 #define GAUDI_NUM_OF_TPC_INTR_CAUSE 20
97
98 #define GAUDI_NUM_OF_QM_ERR_CAUSE 16
99
100 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3
101
102 #define GAUDI_ARB_WDT_TIMEOUT 0xEE6b27FF /* 8 seconds */
103
104 #define HBM_SCRUBBING_TIMEOUT_US 1000000 /* 1s */
105
106 #define BIN_REG_STRING_SIZE sizeof("0b10101010101010101010101010101010")
107
108 #define MONITOR_SOB_STRING_SIZE 256
109
110 static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = {
111 GAUDI_QUEUE_ID_DMA_0_0,
112 GAUDI_QUEUE_ID_DMA_0_1,
113 GAUDI_QUEUE_ID_DMA_0_2,
114 GAUDI_QUEUE_ID_DMA_0_3,
115 GAUDI_QUEUE_ID_DMA_1_0,
116 GAUDI_QUEUE_ID_DMA_1_1,
117 GAUDI_QUEUE_ID_DMA_1_2,
118 GAUDI_QUEUE_ID_DMA_1_3
119 };
120
121 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
122 [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
123 [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
124 [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
125 [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
126 [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
127 [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
128 [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
129 [GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
130 };
131
132 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
133 [0] = GAUDI_QUEUE_ID_DMA_0_0,
134 [1] = GAUDI_QUEUE_ID_DMA_0_1,
135 [2] = GAUDI_QUEUE_ID_DMA_0_2,
136 [3] = GAUDI_QUEUE_ID_DMA_0_3,
137 [4] = GAUDI_QUEUE_ID_DMA_1_0,
138 [5] = GAUDI_QUEUE_ID_DMA_1_1,
139 [6] = GAUDI_QUEUE_ID_DMA_1_2,
140 [7] = GAUDI_QUEUE_ID_DMA_1_3,
141 };
142
143 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
144 [PACKET_WREG_32] = sizeof(struct packet_wreg32),
145 [PACKET_WREG_BULK] = sizeof(struct packet_wreg_bulk),
146 [PACKET_MSG_LONG] = sizeof(struct packet_msg_long),
147 [PACKET_MSG_SHORT] = sizeof(struct packet_msg_short),
148 [PACKET_CP_DMA] = sizeof(struct packet_cp_dma),
149 [PACKET_REPEAT] = sizeof(struct packet_repeat),
150 [PACKET_MSG_PROT] = sizeof(struct packet_msg_prot),
151 [PACKET_FENCE] = sizeof(struct packet_fence),
152 [PACKET_LIN_DMA] = sizeof(struct packet_lin_dma),
153 [PACKET_NOP] = sizeof(struct packet_nop),
154 [PACKET_STOP] = sizeof(struct packet_stop),
155 [PACKET_ARB_POINT] = sizeof(struct packet_arb_point),
156 [PACKET_WAIT] = sizeof(struct packet_wait),
157 [PACKET_LOAD_AND_EXE] = sizeof(struct packet_load_and_exe)
158 };
159
validate_packet_id(enum packet_id id)160 static inline bool validate_packet_id(enum packet_id id)
161 {
162 switch (id) {
163 case PACKET_WREG_32:
164 case PACKET_WREG_BULK:
165 case PACKET_MSG_LONG:
166 case PACKET_MSG_SHORT:
167 case PACKET_CP_DMA:
168 case PACKET_REPEAT:
169 case PACKET_MSG_PROT:
170 case PACKET_FENCE:
171 case PACKET_LIN_DMA:
172 case PACKET_NOP:
173 case PACKET_STOP:
174 case PACKET_ARB_POINT:
175 case PACKET_WAIT:
176 case PACKET_LOAD_AND_EXE:
177 return true;
178 default:
179 return false;
180 }
181 }
182
183 static const char * const
184 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
185 "tpc_address_exceed_slm",
186 "tpc_div_by_0",
187 "tpc_spu_mac_overflow",
188 "tpc_spu_addsub_overflow",
189 "tpc_spu_abs_overflow",
190 "tpc_spu_fp_dst_nan_inf",
191 "tpc_spu_fp_dst_denorm",
192 "tpc_vpu_mac_overflow",
193 "tpc_vpu_addsub_overflow",
194 "tpc_vpu_abs_overflow",
195 "tpc_vpu_fp_dst_nan_inf",
196 "tpc_vpu_fp_dst_denorm",
197 "tpc_assertions",
198 "tpc_illegal_instruction",
199 "tpc_pc_wrap_around",
200 "tpc_qm_sw_err",
201 "tpc_hbw_rresp_err",
202 "tpc_hbw_bresp_err",
203 "tpc_lbw_rresp_err",
204 "tpc_lbw_bresp_err"
205 };
206
207 static const char * const
208 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
209 "PQ AXI HBW error",
210 "CQ AXI HBW error",
211 "CP AXI HBW error",
212 "CP error due to undefined OPCODE",
213 "CP encountered STOP OPCODE",
214 "CP AXI LBW error",
215 "CP WRREG32 or WRBULK returned error",
216 "N/A",
217 "FENCE 0 inc over max value and clipped",
218 "FENCE 1 inc over max value and clipped",
219 "FENCE 2 inc over max value and clipped",
220 "FENCE 3 inc over max value and clipped",
221 "FENCE 0 dec under min value and clipped",
222 "FENCE 1 dec under min value and clipped",
223 "FENCE 2 dec under min value and clipped",
224 "FENCE 3 dec under min value and clipped"
225 };
226
227 static const char * const
228 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
229 "Choice push while full error",
230 "Choice Q watchdog error",
231 "MSG AXI LBW returned with error"
232 };
233
234 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
235 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
236 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
237 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
238 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
239 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
240 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
241 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
242 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
243 QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
244 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
245 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
246 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
247 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
248 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
249 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
250 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
251 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
252 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
253 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
254 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
255 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
256 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
257 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
258 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
259 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
260 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
261 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
262 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
263 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
264 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
265 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
266 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
267 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
268 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
269 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
270 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
271 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
272 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
273 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
274 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
275 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
276 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
277 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
278 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
279 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
280 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
281 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
282 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
283 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
284 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
285 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
286 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
287 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
288 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
289 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
290 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
291 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
292 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
293 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
294 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
295 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
296 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
297 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
298 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
299 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
300 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
301 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
302 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
303 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
304 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
305 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
306 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
307 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
308 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
309 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
310 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
311 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
312 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
313 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
314 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
315 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
316 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
317 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
318 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
319 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
320 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
321 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
322 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
323 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
324 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
325 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
326 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
327 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
328 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
329 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
330 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
331 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
332 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
333 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
334 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
335 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
336 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
337 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
338 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
339 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
340 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
341 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
342 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
343 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
344 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
345 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
346 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
347 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
348 };
349
350 static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = {
351 { .id = 0, .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" },
352 { .id = 1, .name = "SYNC_OBJ_DMA_UP_FEEDBACK" },
353 { .id = 2, .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" },
354 { .id = 3, .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" },
355 { .id = 4, .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" },
356 { .id = 5, .name = "SYNC_OBJ_HOST_DRAM_DONE" },
357 { .id = 6, .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" },
358 { .id = 7, .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" },
359 { .id = 8, .name = "SYNC_OBJ_ENGINE_SEM_MME_0" },
360 { .id = 9, .name = "SYNC_OBJ_ENGINE_SEM_MME_1" },
361 { .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" },
362 { .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" },
363 { .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" },
364 { .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" },
365 { .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" },
366 { .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" },
367 { .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" },
368 { .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" },
369 { .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" },
370 { .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" },
371 { .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" },
372 { .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" },
373 { .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" },
374 { .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" },
375 { .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" },
376 { .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" },
377 { .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" },
378 };
379
380 static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = {
381 { .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" },
382 { .id = 201, .name = "MON_OBJ_DMA_UP_FEEDBACK_RESET" },
383 { .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" },
384 { .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" },
385 { .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" },
386 { .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" },
387 { .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" },
388 { .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" },
389 { .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" },
390 { .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" },
391 { .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" },
392 };
393
394 static s64 gaudi_state_dump_specs_props[] = {
395 [SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0,
396 [SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL,
397 [SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK,
398 [SP_MON_OBJ_WR_ADDR_LOW] =
399 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0,
400 [SP_MON_OBJ_WR_ADDR_HIGH] =
401 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0,
402 [SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0,
403 [SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0,
404 [SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0,
405 [SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK,
406 [SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0,
407 [SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR,
408 [SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0,
409 [SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0,
410 [SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL,
411 [SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0,
412 [SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0,
413 [SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO,
414 [SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0,
415 [SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES,
416 [SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES,
417 [SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES,
418 [SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES,
419 [SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES,
420 [SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS,
421 [SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES,
422 [SP_FENCE0_CNT_OFFSET] =
423 mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0,
424 [SP_FENCE0_RDATA_OFFSET] =
425 mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0,
426 [SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0,
427 [SP_NUM_CORES] = 1,
428 };
429
430 static const int gaudi_queue_id_to_engine_id[] = {
431 [GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3] = GAUDI_ENGINE_ID_DMA_0,
432 [GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3] = GAUDI_ENGINE_ID_DMA_1,
433 [GAUDI_QUEUE_ID_CPU_PQ] = GAUDI_ENGINE_ID_SIZE,
434 [GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3] = GAUDI_ENGINE_ID_DMA_2,
435 [GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3] = GAUDI_ENGINE_ID_DMA_3,
436 [GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3] = GAUDI_ENGINE_ID_DMA_4,
437 [GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3] = GAUDI_ENGINE_ID_DMA_5,
438 [GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3] = GAUDI_ENGINE_ID_DMA_6,
439 [GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3] = GAUDI_ENGINE_ID_DMA_7,
440 [GAUDI_QUEUE_ID_MME_0_0...GAUDI_QUEUE_ID_MME_0_3] = GAUDI_ENGINE_ID_MME_0,
441 [GAUDI_QUEUE_ID_MME_1_0...GAUDI_QUEUE_ID_MME_1_3] = GAUDI_ENGINE_ID_MME_2,
442 [GAUDI_QUEUE_ID_TPC_0_0...GAUDI_QUEUE_ID_TPC_0_3] = GAUDI_ENGINE_ID_TPC_0,
443 [GAUDI_QUEUE_ID_TPC_1_0...GAUDI_QUEUE_ID_TPC_1_3] = GAUDI_ENGINE_ID_TPC_1,
444 [GAUDI_QUEUE_ID_TPC_2_0...GAUDI_QUEUE_ID_TPC_2_3] = GAUDI_ENGINE_ID_TPC_2,
445 [GAUDI_QUEUE_ID_TPC_3_0...GAUDI_QUEUE_ID_TPC_3_3] = GAUDI_ENGINE_ID_TPC_3,
446 [GAUDI_QUEUE_ID_TPC_4_0...GAUDI_QUEUE_ID_TPC_4_3] = GAUDI_ENGINE_ID_TPC_4,
447 [GAUDI_QUEUE_ID_TPC_5_0...GAUDI_QUEUE_ID_TPC_5_3] = GAUDI_ENGINE_ID_TPC_5,
448 [GAUDI_QUEUE_ID_TPC_6_0...GAUDI_QUEUE_ID_TPC_6_3] = GAUDI_ENGINE_ID_TPC_6,
449 [GAUDI_QUEUE_ID_TPC_7_0...GAUDI_QUEUE_ID_TPC_7_3] = GAUDI_ENGINE_ID_TPC_7,
450 [GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3] = GAUDI_ENGINE_ID_NIC_0,
451 [GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3] = GAUDI_ENGINE_ID_NIC_1,
452 [GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3] = GAUDI_ENGINE_ID_NIC_2,
453 [GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3] = GAUDI_ENGINE_ID_NIC_3,
454 [GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3] = GAUDI_ENGINE_ID_NIC_4,
455 [GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3] = GAUDI_ENGINE_ID_NIC_5,
456 [GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3] = GAUDI_ENGINE_ID_NIC_6,
457 [GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3] = GAUDI_ENGINE_ID_NIC_7,
458 [GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3] = GAUDI_ENGINE_ID_NIC_8,
459 [GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3] = GAUDI_ENGINE_ID_NIC_9,
460 };
461
462 /* The order here is opposite to the order of the indexing in the h/w.
463 * i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc.
464 */
465 static const char * const gaudi_sync_manager_names[] = {
466 "SYNC_MGR_E_N",
467 "SYNC_MGR_W_N",
468 "SYNC_MGR_E_S",
469 "SYNC_MGR_W_S",
470 NULL
471 };
472
473 struct ecc_info_extract_params {
474 u64 block_address;
475 u32 num_memories;
476 bool derr;
477 };
478
479 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
480 u64 phys_addr);
481 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
482 struct hl_cs_job *job);
483 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
484 u32 size, u64 val);
485 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
486 u32 num_regs, u32 val);
487 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
488 u32 tpc_id);
489 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
490 static int gaudi_cpucp_info_get(struct hl_device *hdev);
491 static void gaudi_disable_clock_gating(struct hl_device *hdev);
492 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
493 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
494 u32 size, bool eb);
495 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
496 struct hl_gen_wait_properties *prop);
497 static inline enum hl_collective_mode
get_collective_mode(struct hl_device * hdev,u32 queue_id)498 get_collective_mode(struct hl_device *hdev, u32 queue_id)
499 {
500 if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
501 return HL_COLLECTIVE_MASTER;
502
503 if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
504 queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
505 return HL_COLLECTIVE_SLAVE;
506
507 if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
508 queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
509 return HL_COLLECTIVE_SLAVE;
510
511 if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
512 queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
513 return HL_COLLECTIVE_SLAVE;
514
515 return HL_COLLECTIVE_NOT_SUPPORTED;
516 }
517
set_default_power_values(struct hl_device * hdev)518 static inline void set_default_power_values(struct hl_device *hdev)
519 {
520 struct asic_fixed_properties *prop = &hdev->asic_prop;
521
522 if (hdev->card_type == cpucp_card_type_pmc) {
523 prop->max_power_default = MAX_POWER_DEFAULT_PMC;
524
525 if (prop->fw_security_enabled)
526 prop->dc_power_default = DC_POWER_DEFAULT_PMC_SEC;
527 else
528 prop->dc_power_default = DC_POWER_DEFAULT_PMC;
529 } else {
530 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
531 prop->dc_power_default = DC_POWER_DEFAULT_PCI;
532 }
533 }
534
gaudi_set_fixed_properties(struct hl_device * hdev)535 static int gaudi_set_fixed_properties(struct hl_device *hdev)
536 {
537 struct asic_fixed_properties *prop = &hdev->asic_prop;
538 u32 num_sync_stream_queues = 0;
539 int i;
540
541 prop->max_queues = GAUDI_QUEUE_ID_SIZE;
542 prop->hw_queues_props = kcalloc(prop->max_queues,
543 sizeof(struct hw_queue_properties),
544 GFP_KERNEL);
545
546 if (!prop->hw_queues_props)
547 return -ENOMEM;
548
549 for (i = 0 ; i < prop->max_queues ; i++) {
550 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
551 prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
552 prop->hw_queues_props[i].driver_only = 0;
553 prop->hw_queues_props[i].supports_sync_stream = 1;
554 prop->hw_queues_props[i].cb_alloc_flags =
555 CB_ALLOC_KERNEL;
556 num_sync_stream_queues++;
557 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
558 prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
559 prop->hw_queues_props[i].driver_only = 1;
560 prop->hw_queues_props[i].supports_sync_stream = 0;
561 prop->hw_queues_props[i].cb_alloc_flags =
562 CB_ALLOC_KERNEL;
563 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
564 prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
565 prop->hw_queues_props[i].driver_only = 0;
566 prop->hw_queues_props[i].supports_sync_stream = 0;
567 prop->hw_queues_props[i].cb_alloc_flags =
568 CB_ALLOC_USER;
569
570 }
571 prop->hw_queues_props[i].collective_mode =
572 get_collective_mode(hdev, i);
573 }
574
575 prop->cache_line_size = DEVICE_CACHE_LINE_SIZE;
576 prop->cfg_base_address = CFG_BASE;
577 prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
578 prop->host_base_address = HOST_PHYS_BASE;
579 prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE;
580 prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
581 prop->completion_mode = HL_COMPLETION_MODE_JOB;
582 prop->collective_first_sob = 0;
583 prop->collective_first_mon = 0;
584
585 /* 2 SOBs per internal queue stream are reserved for collective */
586 prop->sync_stream_first_sob =
587 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
588 * QMAN_STREAMS * HL_RSVD_SOBS;
589
590 /* 1 monitor per internal queue stream are reserved for collective
591 * 2 monitors per external queue stream are reserved for collective
592 */
593 prop->sync_stream_first_mon =
594 (NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
595 (NUMBER_OF_EXT_HW_QUEUES * 2);
596
597 prop->dram_base_address = DRAM_PHYS_BASE;
598 prop->dram_size = GAUDI_HBM_SIZE_32GB;
599 prop->dram_end_address = prop->dram_base_address + prop->dram_size;
600 prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
601
602 prop->sram_base_address = SRAM_BASE_ADDR;
603 prop->sram_size = SRAM_SIZE;
604 prop->sram_end_address = prop->sram_base_address + prop->sram_size;
605 prop->sram_user_base_address =
606 prop->sram_base_address + SRAM_USER_BASE_OFFSET;
607
608 prop->mmu_cache_mng_addr = MMU_CACHE_MNG_ADDR;
609 prop->mmu_cache_mng_size = MMU_CACHE_MNG_SIZE;
610
611 prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
612 if (hdev->pldm)
613 prop->mmu_pgt_size = 0x800000; /* 8MB */
614 else
615 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
616 prop->mmu_pte_size = HL_PTE_SIZE;
617 prop->dram_page_size = PAGE_SIZE_2MB;
618 prop->device_mem_alloc_default_page_size = prop->dram_page_size;
619 prop->dram_supports_virtual_memory = false;
620
621 prop->pmmu.hop_shifts[MMU_HOP0] = MMU_V1_1_HOP0_SHIFT;
622 prop->pmmu.hop_shifts[MMU_HOP1] = MMU_V1_1_HOP1_SHIFT;
623 prop->pmmu.hop_shifts[MMU_HOP2] = MMU_V1_1_HOP2_SHIFT;
624 prop->pmmu.hop_shifts[MMU_HOP3] = MMU_V1_1_HOP3_SHIFT;
625 prop->pmmu.hop_shifts[MMU_HOP4] = MMU_V1_1_HOP4_SHIFT;
626 prop->pmmu.hop_masks[MMU_HOP0] = MMU_V1_1_HOP0_MASK;
627 prop->pmmu.hop_masks[MMU_HOP1] = MMU_V1_1_HOP1_MASK;
628 prop->pmmu.hop_masks[MMU_HOP2] = MMU_V1_1_HOP2_MASK;
629 prop->pmmu.hop_masks[MMU_HOP3] = MMU_V1_1_HOP3_MASK;
630 prop->pmmu.hop_masks[MMU_HOP4] = MMU_V1_1_HOP4_MASK;
631 prop->pmmu.start_addr = VA_HOST_SPACE_START;
632 prop->pmmu.end_addr =
633 (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
634 prop->pmmu.page_size = PAGE_SIZE_4KB;
635 prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
636 prop->pmmu.last_mask = LAST_MASK;
637 /* TODO: will be duplicated until implementing per-MMU props */
638 prop->pmmu.hop_table_size = HOP_TABLE_SIZE_512_PTE;
639 prop->pmmu.hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
640
641 /* PMMU and HPMMU are the same except of page size */
642 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
643 prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
644
645 /* shifts and masks are the same in PMMU and DMMU */
646 memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
647 prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
648 prop->dmmu.end_addr = VA_HOST_SPACE_END;
649 prop->dmmu.page_size = PAGE_SIZE_2MB;
650 prop->dmmu.pgt_size = prop->mmu_pgt_size;
651
652 prop->cfg_size = CFG_SIZE;
653 prop->max_asid = MAX_ASID;
654 prop->num_of_events = GAUDI_EVENT_SIZE;
655 prop->max_num_of_engines = GAUDI_ENGINE_ID_SIZE;
656 prop->tpc_enabled_mask = TPC_ENABLED_MASK;
657
658 set_default_power_values(hdev);
659
660 prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
661 prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
662
663 prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
664 prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
665
666 strscpy_pad(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
667 CARD_NAME_MAX_LEN);
668
669 prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
670
671 prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
672 prop->sync_stream_first_sob +
673 (num_sync_stream_queues * HL_RSVD_SOBS);
674 prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
675 prop->sync_stream_first_mon +
676 (num_sync_stream_queues * HL_RSVD_MONS);
677
678 prop->first_available_user_interrupt = USHRT_MAX;
679 prop->tpc_interrupt_id = USHRT_MAX;
680
681 /* single msi */
682 prop->eq_interrupt_id = 0;
683
684 for (i = 0 ; i < HL_MAX_DCORES ; i++)
685 prop->first_available_cq[i] = USHRT_MAX;
686
687 prop->fw_cpu_boot_dev_sts0_valid = false;
688 prop->fw_cpu_boot_dev_sts1_valid = false;
689 prop->hard_reset_done_by_fw = false;
690 prop->gic_interrupts_enable = true;
691
692 prop->server_type = HL_SERVER_TYPE_UNKNOWN;
693
694 prop->clk_pll_index = HL_GAUDI_MME_PLL;
695 prop->max_freq_value = GAUDI_MAX_CLK_FREQ;
696
697 prop->use_get_power_for_reset_history = true;
698
699 prop->configurable_stop_on_err = true;
700
701 prop->set_max_power_on_device_init = true;
702
703 prop->dma_mask = 48;
704
705 prop->hbw_flush_reg = mmPCIE_WRAP_RR_ELBI_RD_SEC_REG_CTRL;
706
707 return 0;
708 }
709
gaudi_pci_bars_map(struct hl_device * hdev)710 static int gaudi_pci_bars_map(struct hl_device *hdev)
711 {
712 static const char * const name[] = {"SRAM", "CFG", "HBM"};
713 bool is_wc[3] = {false, false, true};
714 int rc;
715
716 rc = hl_pci_bars_map(hdev, name, is_wc);
717 if (rc)
718 return rc;
719
720 hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
721 (CFG_BASE - SPI_FLASH_BASE_ADDR);
722
723 return 0;
724 }
725
gaudi_set_hbm_bar_base(struct hl_device * hdev,u64 addr)726 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
727 {
728 struct gaudi_device *gaudi = hdev->asic_specific;
729 struct hl_inbound_pci_region pci_region;
730 u64 old_addr = addr;
731 int rc;
732
733 if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
734 return old_addr;
735
736 if (hdev->asic_prop.iatu_done_by_fw)
737 return U64_MAX;
738
739 /* Inbound Region 2 - Bar 4 - Point to HBM */
740 pci_region.mode = PCI_BAR_MATCH_MODE;
741 pci_region.bar = HBM_BAR_ID;
742 pci_region.addr = addr;
743 rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
744 if (rc)
745 return U64_MAX;
746
747 if (gaudi) {
748 old_addr = gaudi->hbm_bar_cur_addr;
749 gaudi->hbm_bar_cur_addr = addr;
750 }
751
752 return old_addr;
753 }
754
gaudi_init_iatu(struct hl_device * hdev)755 static int gaudi_init_iatu(struct hl_device *hdev)
756 {
757 struct hl_inbound_pci_region inbound_region;
758 struct hl_outbound_pci_region outbound_region;
759 int rc;
760
761 if (hdev->asic_prop.iatu_done_by_fw)
762 return 0;
763
764 /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
765 inbound_region.mode = PCI_BAR_MATCH_MODE;
766 inbound_region.bar = SRAM_BAR_ID;
767 inbound_region.addr = SRAM_BASE_ADDR;
768 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
769 if (rc)
770 goto done;
771
772 /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
773 inbound_region.mode = PCI_BAR_MATCH_MODE;
774 inbound_region.bar = CFG_BAR_ID;
775 inbound_region.addr = SPI_FLASH_BASE_ADDR;
776 rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
777 if (rc)
778 goto done;
779
780 /* Inbound Region 2 - Bar 4 - Point to HBM */
781 inbound_region.mode = PCI_BAR_MATCH_MODE;
782 inbound_region.bar = HBM_BAR_ID;
783 inbound_region.addr = DRAM_PHYS_BASE;
784 rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
785 if (rc)
786 goto done;
787
788 /* Outbound Region 0 - Point to Host */
789 outbound_region.addr = HOST_PHYS_BASE;
790 outbound_region.size = HOST_PHYS_SIZE;
791 rc = hl_pci_set_outbound_region(hdev, &outbound_region);
792
793 done:
794 return rc;
795 }
796
gaudi_get_hw_state(struct hl_device * hdev)797 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
798 {
799 return RREG32(mmHW_STATE);
800 }
801
gaudi_early_init(struct hl_device * hdev)802 static int gaudi_early_init(struct hl_device *hdev)
803 {
804 struct asic_fixed_properties *prop = &hdev->asic_prop;
805 struct pci_dev *pdev = hdev->pdev;
806 resource_size_t pci_bar_size;
807 u32 fw_boot_status;
808 int rc;
809
810 rc = gaudi_set_fixed_properties(hdev);
811 if (rc) {
812 dev_err(hdev->dev, "Failed setting fixed properties\n");
813 return rc;
814 }
815
816 /* Check BAR sizes */
817 pci_bar_size = pci_resource_len(pdev, SRAM_BAR_ID);
818
819 if (pci_bar_size != SRAM_BAR_SIZE) {
820 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
821 SRAM_BAR_ID, &pci_bar_size, SRAM_BAR_SIZE);
822 rc = -ENODEV;
823 goto free_queue_props;
824 }
825
826 pci_bar_size = pci_resource_len(pdev, CFG_BAR_ID);
827
828 if (pci_bar_size != CFG_BAR_SIZE) {
829 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
830 CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE);
831 rc = -ENODEV;
832 goto free_queue_props;
833 }
834
835 prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
836 hdev->dram_pci_bar_start = pci_resource_start(pdev, HBM_BAR_ID);
837
838 /* If FW security is enabled at this point it means no access to ELBI */
839 if (hdev->asic_prop.fw_security_enabled) {
840 hdev->asic_prop.iatu_done_by_fw = true;
841
842 /*
843 * GIC-security-bit can ONLY be set by CPUCP, so in this stage
844 * decision can only be taken based on PCI ID security.
845 */
846 hdev->asic_prop.gic_interrupts_enable = false;
847 goto pci_init;
848 }
849
850 rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
851 &fw_boot_status);
852 if (rc)
853 goto free_queue_props;
854
855 /* Check whether FW is configuring iATU */
856 if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
857 (fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
858 hdev->asic_prop.iatu_done_by_fw = true;
859
860 pci_init:
861 rc = hl_pci_init(hdev);
862 if (rc)
863 goto free_queue_props;
864
865 /* Before continuing in the initialization, we need to read the preboot
866 * version to determine whether we run with a security-enabled firmware
867 */
868 rc = hl_fw_read_preboot_status(hdev);
869 if (rc) {
870 if (hdev->reset_on_preboot_fail)
871 /* we are already on failure flow, so don't check if hw_fini fails. */
872 hdev->asic_funcs->hw_fini(hdev, true, false);
873 goto pci_fini;
874 }
875
876 if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
877 dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");
878 rc = hdev->asic_funcs->hw_fini(hdev, true, false);
879 if (rc) {
880 dev_err(hdev->dev, "failed to reset HW in dirty state (%d)\n", rc);
881 goto pci_fini;
882 }
883 }
884
885 return 0;
886
887 pci_fini:
888 hl_pci_fini(hdev);
889 free_queue_props:
890 kfree(hdev->asic_prop.hw_queues_props);
891 return rc;
892 }
893
gaudi_early_fini(struct hl_device * hdev)894 static int gaudi_early_fini(struct hl_device *hdev)
895 {
896 kfree(hdev->asic_prop.hw_queues_props);
897 hl_pci_fini(hdev);
898
899 return 0;
900 }
901
902 /**
903 * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
904 *
905 * @hdev: pointer to hl_device structure
906 *
907 */
gaudi_fetch_psoc_frequency(struct hl_device * hdev)908 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
909 {
910 u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
911 struct asic_fixed_properties *prop = &hdev->asic_prop;
912 u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
913 int rc;
914
915 if ((hdev->fw_components & FW_TYPE_LINUX) &&
916 (prop->fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_PLL_INFO_EN)) {
917 struct gaudi_device *gaudi = hdev->asic_specific;
918
919 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
920 return 0;
921
922 rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
923
924 if (rc)
925 return rc;
926
927 freq = pll_freq_arr[2];
928 } else {
929 /* Backward compatibility */
930 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
931 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
932 nr = RREG32(mmPSOC_CPU_PLL_NR);
933 nf = RREG32(mmPSOC_CPU_PLL_NF);
934 od = RREG32(mmPSOC_CPU_PLL_OD);
935
936 if (div_sel == DIV_SEL_REF_CLK ||
937 div_sel == DIV_SEL_DIVIDED_REF) {
938 if (div_sel == DIV_SEL_REF_CLK)
939 freq = PLL_REF_CLK;
940 else
941 freq = PLL_REF_CLK / (div_fctr + 1);
942 } else if (div_sel == DIV_SEL_PLL_CLK ||
943 div_sel == DIV_SEL_DIVIDED_PLL) {
944 pll_clk = PLL_REF_CLK * (nf + 1) /
945 ((nr + 1) * (od + 1));
946 if (div_sel == DIV_SEL_PLL_CLK)
947 freq = pll_clk;
948 else
949 freq = pll_clk / (div_fctr + 1);
950 } else {
951 dev_warn(hdev->dev, "Received invalid div select value: %#x", div_sel);
952 freq = 0;
953 }
954 }
955
956 prop->psoc_timestamp_frequency = freq;
957 prop->psoc_pci_pll_nr = nr;
958 prop->psoc_pci_pll_nf = nf;
959 prop->psoc_pci_pll_od = od;
960 prop->psoc_pci_pll_div_factor = div_fctr;
961
962 return 0;
963 }
964
_gaudi_init_tpc_mem(struct hl_device * hdev,dma_addr_t tpc_kernel_src_addr,u32 tpc_kernel_size)965 static int _gaudi_init_tpc_mem(struct hl_device *hdev,
966 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
967 {
968 struct asic_fixed_properties *prop = &hdev->asic_prop;
969 struct packet_lin_dma *init_tpc_mem_pkt;
970 struct hl_cs_job *job;
971 struct hl_cb *cb;
972 u64 dst_addr;
973 u32 cb_size, ctl;
974 u8 tpc_id;
975 int rc;
976
977 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
978 if (!cb)
979 return -EFAULT;
980
981 init_tpc_mem_pkt = cb->kernel_address;
982 cb_size = sizeof(*init_tpc_mem_pkt);
983 memset(init_tpc_mem_pkt, 0, cb_size);
984
985 init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
986
987 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
988 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
989 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
990 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
991
992 init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
993
994 init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
995
996 /* TPC_CMD is configured with I$ prefetch enabled, so address should be aligned to 8KB */
997 dst_addr = FIELD_PREP(GAUDI_PKT_LIN_DMA_DST_ADDR_MASK,
998 round_up(prop->sram_user_base_address, SZ_8K));
999 init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
1000
1001 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
1002 if (!job) {
1003 dev_err(hdev->dev, "Failed to allocate a new job\n");
1004 rc = -ENOMEM;
1005 goto release_cb;
1006 }
1007
1008 job->id = 0;
1009 job->user_cb = cb;
1010 atomic_inc(&job->user_cb->cs_cnt);
1011 job->user_cb_size = cb_size;
1012 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
1013 job->patched_cb = job->user_cb;
1014 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
1015
1016 hl_debugfs_add_job(hdev, job);
1017
1018 rc = gaudi_send_job_on_qman0(hdev, job);
1019
1020 if (rc)
1021 goto free_job;
1022
1023 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
1024 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
1025 if (rc)
1026 break;
1027 }
1028
1029 free_job:
1030 hl_userptr_delete_list(hdev, &job->userptr_list);
1031 hl_debugfs_remove_job(hdev, job);
1032 kfree(job);
1033 atomic_dec(&cb->cs_cnt);
1034
1035 release_cb:
1036 hl_cb_put(cb);
1037 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
1038
1039 return rc;
1040 }
1041
1042 /*
1043 * gaudi_init_tpc_mem() - Initialize TPC memories.
1044 * @hdev: Pointer to hl_device structure.
1045 *
1046 * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
1047 *
1048 * Return: 0 for success, negative value for error.
1049 */
gaudi_init_tpc_mem(struct hl_device * hdev)1050 static int gaudi_init_tpc_mem(struct hl_device *hdev)
1051 {
1052 const struct firmware *fw;
1053 size_t fw_size;
1054 void *cpu_addr;
1055 dma_addr_t dma_handle;
1056 int rc, count = 5;
1057
1058 again:
1059 rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
1060 if (rc == -EINTR && count-- > 0) {
1061 msleep(50);
1062 goto again;
1063 }
1064
1065 if (rc) {
1066 dev_err(hdev->dev, "Failed to load firmware file %s\n",
1067 GAUDI_TPC_FW_FILE);
1068 goto out;
1069 }
1070
1071 fw_size = fw->size;
1072 cpu_addr = hl_asic_dma_alloc_coherent(hdev, fw_size, &dma_handle, GFP_KERNEL | __GFP_ZERO);
1073 if (!cpu_addr) {
1074 dev_err(hdev->dev,
1075 "Failed to allocate %zu of dma memory for TPC kernel\n",
1076 fw_size);
1077 rc = -ENOMEM;
1078 goto out;
1079 }
1080
1081 memcpy(cpu_addr, fw->data, fw_size);
1082
1083 rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
1084
1085 hl_asic_dma_free_coherent(hdev, fw->size, cpu_addr, dma_handle);
1086
1087 out:
1088 release_firmware(fw);
1089 return rc;
1090 }
1091
gaudi_collective_map_sobs(struct hl_device * hdev,u32 stream)1092 static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
1093 {
1094 struct gaudi_device *gaudi = hdev->asic_specific;
1095 struct gaudi_collective_properties *prop = &gaudi->collective_props;
1096 struct hl_hw_queue *q;
1097 u32 i, sob_id, sob_group_id, queue_id;
1098
1099 /* Iterate through SOB groups and assign a SOB for each slave queue */
1100 sob_group_id =
1101 stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
1102 sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
1103
1104 queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1105 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
1106 q = &hdev->kernel_queues[queue_id + (4 * i)];
1107 q->sync_stream_prop.collective_sob_id = sob_id + i;
1108 }
1109
1110 /* Both DMA5 and TPC7 use the same resources since only a single
1111 * engine need to participate in the reduction process
1112 */
1113 queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1114 q = &hdev->kernel_queues[queue_id];
1115 q->sync_stream_prop.collective_sob_id =
1116 sob_id + NIC_NUMBER_OF_ENGINES;
1117
1118 queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1119 q = &hdev->kernel_queues[queue_id];
1120 q->sync_stream_prop.collective_sob_id =
1121 sob_id + NIC_NUMBER_OF_ENGINES;
1122 }
1123
gaudi_sob_group_hw_reset(struct kref * ref)1124 static void gaudi_sob_group_hw_reset(struct kref *ref)
1125 {
1126 struct gaudi_hw_sob_group *hw_sob_group =
1127 container_of(ref, struct gaudi_hw_sob_group, kref);
1128 struct hl_device *hdev = hw_sob_group->hdev;
1129 int i;
1130
1131 for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++)
1132 WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
1133 (hw_sob_group->base_sob_id * 4) + (i * 4)), 0);
1134
1135 kref_init(&hw_sob_group->kref);
1136 }
1137
gaudi_sob_group_reset_error(struct kref * ref)1138 static void gaudi_sob_group_reset_error(struct kref *ref)
1139 {
1140 struct gaudi_hw_sob_group *hw_sob_group =
1141 container_of(ref, struct gaudi_hw_sob_group, kref);
1142 struct hl_device *hdev = hw_sob_group->hdev;
1143
1144 dev_crit(hdev->dev,
1145 "SOB release shouldn't be called here, base_sob_id: %d\n",
1146 hw_sob_group->base_sob_id);
1147 }
1148
gaudi_collective_mstr_sob_mask_set(struct gaudi_device * gaudi)1149 static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi)
1150 {
1151 struct gaudi_collective_properties *prop;
1152 int i;
1153
1154 prop = &gaudi->collective_props;
1155
1156 memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask));
1157
1158 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++)
1159 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1160 prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1161 BIT(i % HL_MAX_SOBS_PER_MONITOR);
1162 /* Set collective engine bit */
1163 prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1164 BIT(i % HL_MAX_SOBS_PER_MONITOR);
1165 }
1166
gaudi_collective_init(struct hl_device * hdev)1167 static int gaudi_collective_init(struct hl_device *hdev)
1168 {
1169 u32 i, sob_id, reserved_sobs_per_group;
1170 struct gaudi_collective_properties *prop;
1171 struct gaudi_device *gaudi;
1172
1173 gaudi = hdev->asic_specific;
1174 prop = &gaudi->collective_props;
1175 sob_id = hdev->asic_prop.collective_first_sob;
1176
1177 /* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
1178 reserved_sobs_per_group =
1179 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
1180
1181 /* Init SOB groups */
1182 for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
1183 prop->hw_sob_group[i].hdev = hdev;
1184 prop->hw_sob_group[i].base_sob_id = sob_id;
1185 sob_id += reserved_sobs_per_group;
1186 gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
1187 }
1188
1189 for (i = 0 ; i < QMAN_STREAMS; i++) {
1190 prop->next_sob_group_val[i] = 1;
1191 prop->curr_sob_group_idx[i] = 0;
1192 gaudi_collective_map_sobs(hdev, i);
1193 }
1194
1195 gaudi_collective_mstr_sob_mask_set(gaudi);
1196
1197 return 0;
1198 }
1199
gaudi_reset_sob_group(struct hl_device * hdev,u16 sob_group)1200 static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
1201 {
1202 struct gaudi_device *gaudi = hdev->asic_specific;
1203 struct gaudi_collective_properties *cprop = &gaudi->collective_props;
1204
1205 kref_put(&cprop->hw_sob_group[sob_group].kref,
1206 gaudi_sob_group_hw_reset);
1207 }
1208
gaudi_collective_master_init_job(struct hl_device * hdev,struct hl_cs_job * job,u32 stream,u32 sob_group_offset)1209 static void gaudi_collective_master_init_job(struct hl_device *hdev,
1210 struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
1211 {
1212 u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
1213 struct gaudi_collective_properties *cprop;
1214 struct hl_gen_wait_properties wait_prop;
1215 struct hl_sync_stream_properties *prop;
1216 struct gaudi_device *gaudi;
1217
1218 gaudi = hdev->asic_specific;
1219 cprop = &gaudi->collective_props;
1220 queue_id = job->hw_queue_id;
1221 prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1222
1223 master_sob_base =
1224 cprop->hw_sob_group[sob_group_offset].base_sob_id;
1225 master_monitor = prop->collective_mstr_mon_id[0];
1226
1227 cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
1228
1229 dev_dbg(hdev->dev,
1230 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1231 master_sob_base, cprop->mstr_sob_mask[0],
1232 cprop->next_sob_group_val[stream],
1233 master_monitor, queue_id);
1234
1235 wait_prop.data = (void *) job->patched_cb;
1236 wait_prop.sob_base = master_sob_base;
1237 wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1238 wait_prop.sob_val = cprop->next_sob_group_val[stream];
1239 wait_prop.mon_id = master_monitor;
1240 wait_prop.q_idx = queue_id;
1241 wait_prop.size = cb_size;
1242 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1243
1244 master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1245 master_monitor = prop->collective_mstr_mon_id[1];
1246
1247 dev_dbg(hdev->dev,
1248 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1249 master_sob_base, cprop->mstr_sob_mask[1],
1250 cprop->next_sob_group_val[stream],
1251 master_monitor, queue_id);
1252
1253 wait_prop.sob_base = master_sob_base;
1254 wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1255 wait_prop.mon_id = master_monitor;
1256 wait_prop.size = cb_size;
1257 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1258 }
1259
gaudi_collective_slave_init_job(struct hl_device * hdev,struct hl_cs_job * job,struct hl_cs_compl * cs_cmpl)1260 static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1261 struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1262 {
1263 struct hl_gen_wait_properties wait_prop;
1264 struct hl_sync_stream_properties *prop;
1265 u32 queue_id, cb_size = 0;
1266
1267 queue_id = job->hw_queue_id;
1268 prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1269
1270 if (job->cs->encaps_signals) {
1271 /* use the encaps signal handle store earlier in the flow
1272 * and set the SOB information from the encaps
1273 * signals handle
1274 */
1275 hl_hw_queue_encaps_sig_set_sob_info(hdev, job->cs, job,
1276 cs_cmpl);
1277
1278 dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u, wait for sob_val: %u\n",
1279 job->cs->sequence,
1280 cs_cmpl->hw_sob->sob_id,
1281 cs_cmpl->sob_val);
1282 }
1283
1284 /* Add to wait CBs using slave monitor */
1285 wait_prop.data = (void *) job->user_cb;
1286 wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1287 wait_prop.sob_mask = 0x1;
1288 wait_prop.sob_val = cs_cmpl->sob_val;
1289 wait_prop.mon_id = prop->collective_slave_mon_id;
1290 wait_prop.q_idx = queue_id;
1291 wait_prop.size = cb_size;
1292
1293 dev_dbg(hdev->dev,
1294 "Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n",
1295 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1296 prop->collective_slave_mon_id, queue_id);
1297
1298 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1299
1300 dev_dbg(hdev->dev,
1301 "generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1302 prop->collective_sob_id, queue_id);
1303
1304 cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
1305 prop->collective_sob_id, cb_size, false);
1306 }
1307
gaudi_collective_wait_init_cs(struct hl_cs * cs)1308 static int gaudi_collective_wait_init_cs(struct hl_cs *cs)
1309 {
1310 struct hl_cs_compl *signal_cs_cmpl =
1311 container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1312 struct hl_cs_compl *cs_cmpl =
1313 container_of(cs->fence, struct hl_cs_compl, base_fence);
1314 struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl;
1315 struct gaudi_collective_properties *cprop;
1316 u32 stream, queue_id, sob_group_offset;
1317 struct gaudi_device *gaudi;
1318 struct hl_device *hdev;
1319 struct hl_cs_job *job;
1320 struct hl_ctx *ctx;
1321
1322 ctx = cs->ctx;
1323 hdev = ctx->hdev;
1324 gaudi = hdev->asic_specific;
1325 cprop = &gaudi->collective_props;
1326
1327 if (cs->encaps_signals) {
1328 cs_cmpl->hw_sob = handle->hw_sob;
1329 /* at this checkpoint we only need the hw_sob pointer
1330 * for the completion check before start going over the jobs
1331 * of the master/slaves, the sob_value will be taken later on
1332 * in gaudi_collective_slave_init_job depends on each
1333 * job wait offset value.
1334 */
1335 cs_cmpl->sob_val = 0;
1336 } else {
1337 /* copy the SOB id and value of the signal CS */
1338 cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1339 cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1340 }
1341
1342 /* check again if the signal cs already completed.
1343 * if yes then don't send any wait cs since the hw_sob
1344 * could be in reset already. if signal is not completed
1345 * then get refcount to hw_sob to prevent resetting the sob
1346 * while wait cs is not submitted.
1347 * note that this check is protected by two locks,
1348 * hw queue lock and completion object lock,
1349 * and the same completion object lock also protects
1350 * the hw_sob reset handler function.
1351 * The hw_queue lock prevent out of sync of hw_sob
1352 * refcount value, changed by signal/wait flows.
1353 */
1354 spin_lock(&signal_cs_cmpl->lock);
1355
1356 if (completion_done(&cs->signal_fence->completion)) {
1357 spin_unlock(&signal_cs_cmpl->lock);
1358 return -EINVAL;
1359 }
1360 /* Increment kref since all slave queues are now waiting on it */
1361 kref_get(&cs_cmpl->hw_sob->kref);
1362
1363 spin_unlock(&signal_cs_cmpl->lock);
1364
1365 /* Calculate the stream from collective master queue (1st job) */
1366 job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1367 stream = job->hw_queue_id % 4;
1368 sob_group_offset =
1369 stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1370
1371 list_for_each_entry(job, &cs->job_list, cs_node) {
1372 queue_id = job->hw_queue_id;
1373
1374 if (hdev->kernel_queues[queue_id].collective_mode ==
1375 HL_COLLECTIVE_MASTER)
1376 gaudi_collective_master_init_job(hdev, job, stream,
1377 sob_group_offset);
1378 else
1379 gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1380 }
1381
1382 cs_cmpl->sob_group = sob_group_offset;
1383
1384 /* Handle sob group kref and wraparound */
1385 kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1386 cprop->next_sob_group_val[stream]++;
1387
1388 if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1389 /*
1390 * Decrement as we reached the max value.
1391 * The release function won't be called here as we've
1392 * just incremented the refcount.
1393 */
1394 kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1395 gaudi_sob_group_reset_error);
1396 cprop->next_sob_group_val[stream] = 1;
1397 /* only two SOBs are currently in use */
1398 cprop->curr_sob_group_idx[stream] =
1399 (cprop->curr_sob_group_idx[stream] + 1) &
1400 (HL_RSVD_SOBS - 1);
1401
1402 gaudi_collective_map_sobs(hdev, stream);
1403
1404 dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1405 cprop->curr_sob_group_idx[stream], stream);
1406 }
1407
1408 mb();
1409 hl_fence_put(cs->signal_fence);
1410 cs->signal_fence = NULL;
1411
1412 return 0;
1413 }
1414
gaudi_get_patched_cb_extra_size(u32 user_cb_size)1415 static u32 gaudi_get_patched_cb_extra_size(u32 user_cb_size)
1416 {
1417 u32 cacheline_end, additional_commands;
1418
1419 cacheline_end = round_up(user_cb_size, DEVICE_CACHE_LINE_SIZE);
1420 additional_commands = sizeof(struct packet_msg_prot) * 2;
1421
1422 if (user_cb_size + additional_commands > cacheline_end)
1423 return cacheline_end - user_cb_size + additional_commands;
1424 else
1425 return additional_commands;
1426 }
1427
gaudi_collective_wait_create_job(struct hl_device * hdev,struct hl_ctx * ctx,struct hl_cs * cs,enum hl_collective_mode mode,u32 queue_id,u32 wait_queue_id,u32 encaps_signal_offset)1428 static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1429 struct hl_ctx *ctx, struct hl_cs *cs,
1430 enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id,
1431 u32 encaps_signal_offset)
1432 {
1433 struct hw_queue_properties *hw_queue_prop;
1434 struct hl_cs_counters_atomic *cntr;
1435 struct hl_cs_job *job;
1436 struct hl_cb *cb;
1437 u32 cb_size;
1438 bool patched_cb;
1439
1440 cntr = &hdev->aggregated_cs_counters;
1441
1442 if (mode == HL_COLLECTIVE_MASTER) {
1443 /* CB size of collective master queue contains
1444 * 4 msg short packets for monitor 1 configuration
1445 * 1 fence packet
1446 * 4 msg short packets for monitor 2 configuration
1447 * 1 fence packet
1448 * 2 msg prot packets for completion and MSI
1449 */
1450 cb_size = sizeof(struct packet_msg_short) * 8 +
1451 sizeof(struct packet_fence) * 2 +
1452 sizeof(struct packet_msg_prot) * 2;
1453 patched_cb = true;
1454 } else {
1455 /* CB size of collective slave queues contains
1456 * 4 msg short packets for monitor configuration
1457 * 1 fence packet
1458 * 1 additional msg short packet for sob signal
1459 */
1460 cb_size = sizeof(struct packet_msg_short) * 5 +
1461 sizeof(struct packet_fence);
1462 patched_cb = false;
1463 }
1464
1465 hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1466 job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1467 if (!job) {
1468 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1469 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1470 dev_err(hdev->dev, "Failed to allocate a new job\n");
1471 return -ENOMEM;
1472 }
1473
1474 /* Allocate internal mapped CB for non patched CBs */
1475 cb = hl_cb_kernel_create(hdev, cb_size, !patched_cb);
1476 if (!cb) {
1477 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1478 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1479 kfree(job);
1480 return -EFAULT;
1481 }
1482
1483 job->id = 0;
1484 job->cs = cs;
1485 job->user_cb = cb;
1486 atomic_inc(&job->user_cb->cs_cnt);
1487 job->user_cb_size = cb_size;
1488 job->hw_queue_id = queue_id;
1489
1490 /* since its guaranteed to have only one chunk in the collective wait
1491 * cs, we can use this chunk to set the encapsulated signal offset
1492 * in the jobs.
1493 */
1494 if (cs->encaps_signals)
1495 job->encaps_sig_wait_offset = encaps_signal_offset;
1496
1497 /*
1498 * No need in parsing, user CB is the patched CB.
1499 * We call hl_cb_destroy() out of two reasons - we don't need
1500 * the CB in the CB idr anymore and to decrement its refcount as
1501 * it was incremented inside hl_cb_kernel_create().
1502 */
1503 if (patched_cb)
1504 job->patched_cb = job->user_cb;
1505 else
1506 job->patched_cb = NULL;
1507
1508 job->job_cb_size = job->user_cb_size;
1509 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
1510
1511 /* increment refcount as for external queues we get completion */
1512 if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1513 cs_get(cs);
1514
1515 cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1516
1517 list_add_tail(&job->cs_node, &cs->job_list);
1518
1519 hl_debugfs_add_job(hdev, job);
1520
1521 return 0;
1522 }
1523
gaudi_collective_wait_create_jobs(struct hl_device * hdev,struct hl_ctx * ctx,struct hl_cs * cs,u32 wait_queue_id,u32 collective_engine_id,u32 encaps_signal_offset)1524 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1525 struct hl_ctx *ctx, struct hl_cs *cs,
1526 u32 wait_queue_id, u32 collective_engine_id,
1527 u32 encaps_signal_offset)
1528 {
1529 struct gaudi_device *gaudi = hdev->asic_specific;
1530 struct hw_queue_properties *hw_queue_prop;
1531 u32 queue_id, collective_queue, num_jobs;
1532 u32 stream, nic_queue, nic_idx = 0;
1533 bool skip;
1534 int i, rc = 0;
1535
1536 /* Verify wait queue id is configured as master */
1537 hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1538 if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1539 dev_err(hdev->dev,
1540 "Queue %d is not configured as collective master\n",
1541 wait_queue_id);
1542 return -EINVAL;
1543 }
1544
1545 /* Verify engine id is supported */
1546 if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1547 collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1548 dev_err(hdev->dev,
1549 "Collective wait does not support engine %u\n",
1550 collective_engine_id);
1551 return -EINVAL;
1552 }
1553
1554 stream = wait_queue_id % 4;
1555
1556 if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1557 collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1558 else
1559 collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1560
1561 num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1562 nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1563
1564 /* First job goes to the collective master queue, it will wait for
1565 * the collective slave queues to finish execution.
1566 * The synchronization is done using two monitors:
1567 * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1568 * reduction engine (DMA5/TPC7).
1569 *
1570 * Rest of the jobs goes to the collective slave queues which will
1571 * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1572 */
1573 for (i = 0 ; i < num_jobs ; i++) {
1574 if (i == 0) {
1575 queue_id = wait_queue_id;
1576 rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1577 HL_COLLECTIVE_MASTER, queue_id,
1578 wait_queue_id, encaps_signal_offset);
1579 } else {
1580 if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1581 if (gaudi->hw_cap_initialized &
1582 BIT(HW_CAP_NIC_SHIFT + nic_idx))
1583 skip = false;
1584 else
1585 skip = true;
1586
1587 queue_id = nic_queue;
1588 nic_queue += 4;
1589 nic_idx++;
1590
1591 if (skip)
1592 continue;
1593 } else {
1594 queue_id = collective_queue;
1595 }
1596
1597 rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1598 HL_COLLECTIVE_SLAVE, queue_id,
1599 wait_queue_id, encaps_signal_offset);
1600 }
1601
1602 if (rc)
1603 return rc;
1604 }
1605
1606 return rc;
1607 }
1608
gaudi_late_init(struct hl_device * hdev)1609 static int gaudi_late_init(struct hl_device *hdev)
1610 {
1611 struct gaudi_device *gaudi = hdev->asic_specific;
1612 int rc;
1613
1614 rc = gaudi->cpucp_info_get(hdev);
1615 if (rc) {
1616 dev_err(hdev->dev, "Failed to get cpucp info\n");
1617 return rc;
1618 }
1619
1620 if ((hdev->card_type == cpucp_card_type_pci) &&
1621 (hdev->nic_ports_mask & 0x3)) {
1622 dev_info(hdev->dev,
1623 "PCI card detected, only 8 ports are enabled\n");
1624 hdev->nic_ports_mask &= ~0x3;
1625
1626 /* Stop and disable unused NIC QMANs */
1627 WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1628 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1629 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1630
1631 WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1632 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1633 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1634
1635 WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1636 WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1637
1638 gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1639 }
1640
1641 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS, 0x0);
1642 if (rc) {
1643 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
1644 return rc;
1645 }
1646
1647 /* Scrub both SRAM and DRAM */
1648 rc = hdev->asic_funcs->scrub_device_mem(hdev);
1649 if (rc)
1650 goto disable_pci_access;
1651
1652 rc = gaudi_fetch_psoc_frequency(hdev);
1653 if (rc) {
1654 dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1655 goto disable_pci_access;
1656 }
1657
1658 rc = gaudi_mmu_clear_pgt_range(hdev);
1659 if (rc) {
1660 dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1661 goto disable_pci_access;
1662 }
1663
1664 rc = gaudi_init_tpc_mem(hdev);
1665 if (rc) {
1666 dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1667 goto disable_pci_access;
1668 }
1669
1670 rc = gaudi_collective_init(hdev);
1671 if (rc) {
1672 dev_err(hdev->dev, "Failed to init collective\n");
1673 goto disable_pci_access;
1674 }
1675
1676 /* We only support a single ASID for the user, so for the sake of optimization, just
1677 * initialize the ASID one time during device initialization with the fixed value of 1
1678 */
1679 gaudi_mmu_prepare(hdev, 1);
1680
1681 hl_fw_set_pll_profile(hdev);
1682
1683 return 0;
1684
1685 disable_pci_access:
1686 hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
1687
1688 return rc;
1689 }
1690
gaudi_late_fini(struct hl_device * hdev)1691 static void gaudi_late_fini(struct hl_device *hdev)
1692 {
1693 hl_hwmon_release_resources(hdev);
1694 }
1695
gaudi_alloc_cpu_accessible_dma_mem(struct hl_device * hdev)1696 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1697 {
1698 dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1699 void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1700 int i, j, rc = 0;
1701
1702 /*
1703 * The device CPU works with 40-bits addresses, while bit 39 must be set
1704 * to '1' when accessing the host.
1705 * Bits 49:39 of the full host address are saved for a later
1706 * configuration of the HW to perform extension to 50 bits.
1707 * Because there is a single HW register that holds the extension bits,
1708 * these bits must be identical in all allocated range.
1709 */
1710
1711 for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1712 virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE,
1713 &dma_addr_arr[i],
1714 GFP_KERNEL | __GFP_ZERO);
1715 if (!virt_addr_arr[i]) {
1716 rc = -ENOMEM;
1717 goto free_dma_mem_arr;
1718 }
1719
1720 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1721 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1722 GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1723 break;
1724 }
1725
1726 if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1727 dev_err(hdev->dev,
1728 "MSB of CPU accessible DMA memory are not identical in all range\n");
1729 rc = -EFAULT;
1730 goto free_dma_mem_arr;
1731 }
1732
1733 hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1734 hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1735 hdev->cpu_pci_msb_addr =
1736 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1737
1738 if (!hdev->asic_prop.fw_security_enabled)
1739 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
1740
1741 free_dma_mem_arr:
1742 for (j = 0 ; j < i ; j++)
1743 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j],
1744 dma_addr_arr[j]);
1745
1746 return rc;
1747 }
1748
gaudi_free_internal_qmans_pq_mem(struct hl_device * hdev)1749 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1750 {
1751 struct gaudi_device *gaudi = hdev->asic_specific;
1752 struct gaudi_internal_qman_info *q;
1753 u32 i;
1754
1755 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1756 q = &gaudi->internal_qmans[i];
1757 if (!q->pq_kernel_addr)
1758 continue;
1759 hl_asic_dma_free_coherent(hdev, q->pq_size, q->pq_kernel_addr, q->pq_dma_addr);
1760 }
1761 }
1762
gaudi_alloc_internal_qmans_pq_mem(struct hl_device * hdev)1763 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1764 {
1765 struct gaudi_device *gaudi = hdev->asic_specific;
1766 struct gaudi_internal_qman_info *q;
1767 int rc, i;
1768
1769 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1770 if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1771 continue;
1772
1773 q = &gaudi->internal_qmans[i];
1774
1775 switch (i) {
1776 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
1777 q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1778 break;
1779 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1780 q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1781 break;
1782 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1783 q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1784 break;
1785 case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1786 q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1787 break;
1788 default:
1789 dev_err(hdev->dev, "Bad internal queue index %d", i);
1790 rc = -EINVAL;
1791 goto free_internal_qmans_pq_mem;
1792 }
1793
1794 q->pq_kernel_addr = hl_asic_dma_alloc_coherent(hdev, q->pq_size, &q->pq_dma_addr,
1795 GFP_KERNEL | __GFP_ZERO);
1796 if (!q->pq_kernel_addr) {
1797 rc = -ENOMEM;
1798 goto free_internal_qmans_pq_mem;
1799 }
1800 }
1801
1802 return 0;
1803
1804 free_internal_qmans_pq_mem:
1805 gaudi_free_internal_qmans_pq_mem(hdev);
1806 return rc;
1807 }
1808
gaudi_set_pci_memory_regions(struct hl_device * hdev)1809 static void gaudi_set_pci_memory_regions(struct hl_device *hdev)
1810 {
1811 struct asic_fixed_properties *prop = &hdev->asic_prop;
1812 struct pci_mem_region *region;
1813
1814 /* CFG */
1815 region = &hdev->pci_mem_region[PCI_REGION_CFG];
1816 region->region_base = CFG_BASE;
1817 region->region_size = CFG_SIZE;
1818 region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR;
1819 region->bar_size = CFG_BAR_SIZE;
1820 region->bar_id = CFG_BAR_ID;
1821 region->used = 1;
1822
1823 /* SRAM */
1824 region = &hdev->pci_mem_region[PCI_REGION_SRAM];
1825 region->region_base = SRAM_BASE_ADDR;
1826 region->region_size = SRAM_SIZE;
1827 region->offset_in_bar = 0;
1828 region->bar_size = SRAM_BAR_SIZE;
1829 region->bar_id = SRAM_BAR_ID;
1830 region->used = 1;
1831
1832 /* DRAM */
1833 region = &hdev->pci_mem_region[PCI_REGION_DRAM];
1834 region->region_base = DRAM_PHYS_BASE;
1835 region->region_size = hdev->asic_prop.dram_size;
1836 region->offset_in_bar = 0;
1837 region->bar_size = prop->dram_pci_bar_size;
1838 region->bar_id = HBM_BAR_ID;
1839 region->used = 1;
1840
1841 /* SP SRAM */
1842 region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM];
1843 region->region_base = PSOC_SCRATCHPAD_ADDR;
1844 region->region_size = PSOC_SCRATCHPAD_SIZE;
1845 region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR;
1846 region->bar_size = CFG_BAR_SIZE;
1847 region->bar_id = CFG_BAR_ID;
1848 region->used = 1;
1849 }
1850
gaudi_sw_init(struct hl_device * hdev)1851 static int gaudi_sw_init(struct hl_device *hdev)
1852 {
1853 struct gaudi_device *gaudi;
1854 u32 i, event_id = 0;
1855 int rc;
1856
1857 /* Allocate device structure */
1858 gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1859 if (!gaudi)
1860 return -ENOMEM;
1861
1862 for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1863 if (gaudi_irq_map_table[i].valid) {
1864 if (event_id == GAUDI_EVENT_SIZE) {
1865 dev_err(hdev->dev,
1866 "Event array exceeds the limit of %u events\n",
1867 GAUDI_EVENT_SIZE);
1868 rc = -EINVAL;
1869 goto free_gaudi_device;
1870 }
1871
1872 gaudi->events[event_id++] =
1873 gaudi_irq_map_table[i].fc_id;
1874 }
1875 }
1876
1877 gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1878
1879 hdev->asic_specific = gaudi;
1880
1881 /* Create DMA pool for small allocations */
1882 hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1883 &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1884 if (!hdev->dma_pool) {
1885 dev_err(hdev->dev, "failed to create DMA pool\n");
1886 rc = -ENOMEM;
1887 goto free_gaudi_device;
1888 }
1889
1890 rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1891 if (rc)
1892 goto free_dma_pool;
1893
1894 hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1895 if (!hdev->cpu_accessible_dma_pool) {
1896 dev_err(hdev->dev,
1897 "Failed to create CPU accessible DMA pool\n");
1898 rc = -ENOMEM;
1899 goto free_cpu_dma_mem;
1900 }
1901
1902 rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1903 (uintptr_t) hdev->cpu_accessible_dma_mem,
1904 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1905 if (rc) {
1906 dev_err(hdev->dev,
1907 "Failed to add memory to CPU accessible DMA pool\n");
1908 rc = -EFAULT;
1909 goto free_cpu_accessible_dma_pool;
1910 }
1911
1912 rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1913 if (rc)
1914 goto free_cpu_accessible_dma_pool;
1915
1916 spin_lock_init(&gaudi->hw_queues_lock);
1917
1918 hdev->supports_sync_stream = true;
1919 hdev->supports_coresight = true;
1920 hdev->supports_staged_submission = true;
1921 hdev->supports_wait_for_multi_cs = true;
1922
1923 hdev->asic_funcs->set_pci_memory_regions(hdev);
1924 hdev->stream_master_qid_arr =
1925 hdev->asic_funcs->get_stream_master_qid_arr();
1926 hdev->stream_master_qid_arr_size = GAUDI_STREAM_MASTER_ARR_SIZE;
1927
1928 return 0;
1929
1930 free_cpu_accessible_dma_pool:
1931 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1932 free_cpu_dma_mem:
1933 if (!hdev->asic_prop.fw_security_enabled)
1934 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1935 hdev->cpu_pci_msb_addr);
1936 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1937 hdev->cpu_accessible_dma_address);
1938 free_dma_pool:
1939 dma_pool_destroy(hdev->dma_pool);
1940 free_gaudi_device:
1941 kfree(gaudi);
1942 return rc;
1943 }
1944
gaudi_sw_fini(struct hl_device * hdev)1945 static int gaudi_sw_fini(struct hl_device *hdev)
1946 {
1947 struct gaudi_device *gaudi = hdev->asic_specific;
1948
1949 gaudi_free_internal_qmans_pq_mem(hdev);
1950
1951 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1952
1953 if (!hdev->asic_prop.fw_security_enabled)
1954 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1955 hdev->cpu_pci_msb_addr);
1956
1957 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1958 hdev->cpu_accessible_dma_address);
1959
1960 dma_pool_destroy(hdev->dma_pool);
1961
1962 kfree(gaudi);
1963
1964 return 0;
1965 }
1966
gaudi_irq_handler_single(int irq,void * arg)1967 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1968 {
1969 struct hl_device *hdev = arg;
1970 int i;
1971
1972 if (hdev->disabled)
1973 return IRQ_HANDLED;
1974
1975 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1976 hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1977
1978 hl_irq_handler_eq(irq, &hdev->event_queue);
1979
1980 return IRQ_HANDLED;
1981 }
1982
1983 /*
1984 * For backward compatibility, new MSI interrupts should be set after the
1985 * existing CPU and NIC interrupts.
1986 */
gaudi_pci_irq_vector(struct hl_device * hdev,unsigned int nr,bool cpu_eq)1987 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1988 bool cpu_eq)
1989 {
1990 int msi_vec;
1991
1992 if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1993 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1994 GAUDI_EVENT_QUEUE_MSI_IDX);
1995
1996 msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1997 (nr + NIC_NUMBER_OF_ENGINES + 1);
1998
1999 return pci_irq_vector(hdev->pdev, msi_vec);
2000 }
2001
gaudi_enable_msi_single(struct hl_device * hdev)2002 static int gaudi_enable_msi_single(struct hl_device *hdev)
2003 {
2004 int rc, irq;
2005
2006 dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
2007
2008 irq = gaudi_pci_irq_vector(hdev, 0, false);
2009 rc = request_irq(irq, gaudi_irq_handler_single, 0,
2010 "gaudi single msi", hdev);
2011 if (rc)
2012 dev_err(hdev->dev,
2013 "Failed to request single MSI IRQ\n");
2014
2015 return rc;
2016 }
2017
gaudi_enable_msi(struct hl_device * hdev)2018 static int gaudi_enable_msi(struct hl_device *hdev)
2019 {
2020 struct gaudi_device *gaudi = hdev->asic_specific;
2021 int rc;
2022
2023 if (gaudi->hw_cap_initialized & HW_CAP_MSI)
2024 return 0;
2025
2026 rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI);
2027 if (rc < 0) {
2028 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
2029 return rc;
2030 }
2031
2032 rc = gaudi_enable_msi_single(hdev);
2033 if (rc)
2034 goto free_pci_irq_vectors;
2035
2036 gaudi->hw_cap_initialized |= HW_CAP_MSI;
2037
2038 return 0;
2039
2040 free_pci_irq_vectors:
2041 pci_free_irq_vectors(hdev->pdev);
2042 return rc;
2043 }
2044
gaudi_sync_irqs(struct hl_device * hdev)2045 static void gaudi_sync_irqs(struct hl_device *hdev)
2046 {
2047 struct gaudi_device *gaudi = hdev->asic_specific;
2048
2049 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2050 return;
2051
2052 /* Wait for all pending IRQs to be finished */
2053 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
2054 }
2055
gaudi_disable_msi(struct hl_device * hdev)2056 static void gaudi_disable_msi(struct hl_device *hdev)
2057 {
2058 struct gaudi_device *gaudi = hdev->asic_specific;
2059
2060 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2061 return;
2062
2063 gaudi_sync_irqs(hdev);
2064 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
2065 pci_free_irq_vectors(hdev->pdev);
2066
2067 gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
2068 }
2069
gaudi_init_scrambler_sram(struct hl_device * hdev)2070 static void gaudi_init_scrambler_sram(struct hl_device *hdev)
2071 {
2072 struct gaudi_device *gaudi = hdev->asic_specific;
2073
2074 if (hdev->asic_prop.fw_security_enabled)
2075 return;
2076
2077 if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
2078 CPU_BOOT_DEV_STS0_SRAM_SCR_EN)
2079 return;
2080
2081 if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
2082 return;
2083
2084 WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2085 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2086 WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2087 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2088 WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2089 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2090 WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2091 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2092 WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2093 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2094 WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2095 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2096 WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2097 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2098 WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2099 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2100
2101 WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2102 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2103 WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2104 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2105 WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2106 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2107 WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2108 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2109 WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2110 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2111 WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2112 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2113 WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2114 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2115 WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2116 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2117
2118 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
2119 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2120 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
2121 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2122 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
2123 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2124 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
2125 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2126 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
2127 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2128 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
2129 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2130 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
2131 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2132 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
2133 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2134
2135 gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
2136 }
2137
gaudi_init_scrambler_hbm(struct hl_device * hdev)2138 static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
2139 {
2140 struct gaudi_device *gaudi = hdev->asic_specific;
2141
2142 if (hdev->asic_prop.fw_security_enabled)
2143 return;
2144
2145 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2146 CPU_BOOT_DEV_STS0_DRAM_SCR_EN)
2147 return;
2148
2149 if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
2150 return;
2151
2152 WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
2153 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2154 WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
2155 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2156 WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
2157 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2158 WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
2159 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2160 WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
2161 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2162 WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
2163 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2164 WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
2165 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2166 WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
2167 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2168
2169 WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
2170 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2171 WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
2172 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2173 WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
2174 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2175 WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
2176 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2177 WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
2178 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2179 WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
2180 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2181 WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
2182 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2183 WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
2184 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2185
2186 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
2187 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2188 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
2189 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2190 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
2191 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2192 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
2193 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2194 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
2195 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2196 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
2197 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2198 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
2199 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2200 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
2201 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2202
2203 gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
2204 }
2205
gaudi_init_e2e(struct hl_device * hdev)2206 static void gaudi_init_e2e(struct hl_device *hdev)
2207 {
2208 if (hdev->asic_prop.fw_security_enabled)
2209 return;
2210
2211 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2212 CPU_BOOT_DEV_STS0_E2E_CRED_EN)
2213 return;
2214
2215 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
2216 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
2217 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
2218 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
2219
2220 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2221 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2222 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2223 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2224
2225 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2226 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2227 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2228 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2229
2230 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2231 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2232 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2233 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2234
2235 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2236 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2237 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2238 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2239
2240 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2241 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2242 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2243 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2244
2245 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2246 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2247 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2248 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2249
2250 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2251 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2252 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2253 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2254
2255 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2256 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2257 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2258 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2259
2260 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2261 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2262 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2263 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2264
2265 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2266 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2267 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2268 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2269
2270 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2271 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2272 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2273 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2274
2275 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2276 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2277 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2278 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2279
2280 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2281 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2282 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2283 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2284
2285 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2286 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2287 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2288 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2289
2290 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2291 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2292 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2293 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2294
2295 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2296 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2297 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2298 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2299
2300 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2301 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2302 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2303 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2304
2305 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2306 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2307 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2308 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2309
2310 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2311 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2312 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2313 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2314
2315 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2316 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2317 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2318 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2319
2320 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2321 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2322 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2323 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2324
2325 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2326 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2327 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2328 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2329
2330 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2331 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2332 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2333 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2334
2335 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2336 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2337 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2338 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2339
2340 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2341 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2342 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2343 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2344
2345 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2346 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2347 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2348 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2349
2350 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2351 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2352 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2353 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2354
2355 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2356 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2357 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2358 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2359
2360 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2361 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2362 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2363 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2364
2365 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2366 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2367 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2368 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2369
2370 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2371 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2372 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2373 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2374
2375 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2376 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2377 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2378 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2379
2380 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2381 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2382 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2383 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2384
2385 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2386 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2387 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2388 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2389
2390 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2391 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2392 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2393 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2394
2395 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2396 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2397 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2398 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2399
2400 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2401 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2402 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2403 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2404
2405 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2406 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2407 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2408 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2409
2410 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2411 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2412 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2413 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2414
2415 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2416 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2417 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2418 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2419
2420 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2421 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2422 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2423 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2424
2425 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2426 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2427 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2428 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2429
2430 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2431 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2432 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2433 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2434
2435 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2436 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2437 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2438 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2439
2440 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2441 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2442 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2443 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2444
2445 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2446 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2447 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2448 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2449
2450 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2451 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2452 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2453 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2454 }
2455
gaudi_init_hbm_cred(struct hl_device * hdev)2456 static void gaudi_init_hbm_cred(struct hl_device *hdev)
2457 {
2458 u32 hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2459
2460 if (hdev->asic_prop.fw_security_enabled)
2461 return;
2462
2463 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2464 CPU_BOOT_DEV_STS0_HBM_CRED_EN)
2465 return;
2466
2467 hbm0_wr = 0x33333333;
2468 hbm0_rd = 0x77777777;
2469 hbm1_wr = 0x55555555;
2470 hbm1_rd = 0xDDDDDDDD;
2471
2472 WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2473 WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2474 WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2475 WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2476
2477 WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2478 WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2479 WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2480 WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2481
2482 WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2483 WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2484 WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2485 WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2486
2487 WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2488 WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2489 WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2490 WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2491
2492 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2493 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2494 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2495 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2496 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2497 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2498 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2499 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2500 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2501 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2502 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2503 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2504
2505 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2506 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2507 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2508 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2509 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2510 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2511 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2512 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2513 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2514 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2515 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2516 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2517 }
2518
gaudi_init_golden_registers(struct hl_device * hdev)2519 static void gaudi_init_golden_registers(struct hl_device *hdev)
2520 {
2521 u32 tpc_offset;
2522 int tpc_id, i;
2523
2524 gaudi_init_e2e(hdev);
2525 gaudi_init_hbm_cred(hdev);
2526
2527 for (tpc_id = 0, tpc_offset = 0;
2528 tpc_id < TPC_NUMBER_OF_ENGINES;
2529 tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2530 /* Mask all arithmetic interrupts from TPC */
2531 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFE);
2532 /* Set 16 cache lines */
2533 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2534 ICACHE_FETCH_LINE_NUM, 2);
2535 }
2536
2537 /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2538 for (i = 0 ; i < 128 ; i += 8)
2539 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2540
2541 WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2542 WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2543 WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2544 WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2545 }
2546
gaudi_init_pci_dma_qman(struct hl_device * hdev,int dma_id,int qman_id,dma_addr_t qman_pq_addr)2547 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2548 int qman_id, dma_addr_t qman_pq_addr)
2549 {
2550 struct cpu_dyn_regs *dyn_regs =
2551 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2552 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2553 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2554 u32 q_off, dma_qm_offset;
2555 u32 dma_qm_err_cfg, irq_handler_offset;
2556
2557 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2558
2559 mtr_base_en_lo = lower_32_bits(CFG_BASE +
2560 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2561 mtr_base_en_hi = upper_32_bits(CFG_BASE +
2562 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2563 so_base_en_lo = lower_32_bits(CFG_BASE +
2564 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2565 so_base_en_hi = upper_32_bits(CFG_BASE +
2566 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2567 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2568 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2569 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2570 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2571 so_base_ws_lo = lower_32_bits(CFG_BASE +
2572 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2573 so_base_ws_hi = upper_32_bits(CFG_BASE +
2574 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2575
2576 q_off = dma_qm_offset + qman_id * 4;
2577
2578 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2579 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2580
2581 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2582 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2583 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2584
2585 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2586 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2587 QMAN_LDMA_SRC_OFFSET);
2588 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2589 QMAN_LDMA_DST_OFFSET);
2590
2591 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2592 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2593 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2594 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2595 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2596 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2597 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2598 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2599
2600 WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2601
2602 /* The following configuration is needed only once per QMAN */
2603 if (qman_id == 0) {
2604 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2605 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2606 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2607
2608 /* Configure RAZWI IRQ */
2609 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2610 if (hdev->stop_on_err)
2611 dma_qm_err_cfg |=
2612 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2613
2614 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2615
2616 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2617 lower_32_bits(CFG_BASE + irq_handler_offset));
2618 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2619 upper_32_bits(CFG_BASE + irq_handler_offset));
2620
2621 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2622 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2623 dma_id);
2624
2625 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2626 QM_ARB_ERR_MSG_EN_MASK);
2627
2628 /* Set timeout to maximum */
2629 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
2630
2631 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2632 QMAN_EXTERNAL_MAKE_TRUSTED);
2633
2634 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2635 }
2636 }
2637
gaudi_init_dma_core(struct hl_device * hdev,int dma_id)2638 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2639 {
2640 struct cpu_dyn_regs *dyn_regs =
2641 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2642 u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2643 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2644 u32 irq_handler_offset;
2645
2646 /* Set to maximum possible according to physical size */
2647 WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2648 WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2649
2650 /* WA for H/W bug H3-2116 */
2651 WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2652
2653 /* STOP_ON bit implies no completion to operation in case of RAZWI */
2654 if (hdev->stop_on_err)
2655 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2656
2657 WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2658
2659 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2660 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2661 le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
2662
2663 WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2664 lower_32_bits(CFG_BASE + irq_handler_offset));
2665 WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2666 upper_32_bits(CFG_BASE + irq_handler_offset));
2667
2668 WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2669 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2670 WREG32(mmDMA0_CORE_PROT + dma_offset,
2671 1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2672 /* If the channel is secured, it should be in MMU bypass mode */
2673 WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2674 1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2675 WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2676 }
2677
gaudi_enable_qman(struct hl_device * hdev,int dma_id,u32 enable_mask)2678 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2679 u32 enable_mask)
2680 {
2681 u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2682
2683 WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2684 }
2685
gaudi_init_pci_dma_qmans(struct hl_device * hdev)2686 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2687 {
2688 struct gaudi_device *gaudi = hdev->asic_specific;
2689 struct hl_hw_queue *q;
2690 int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2691
2692 if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2693 return;
2694
2695 for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2696 dma_id = gaudi_dma_assignment[i];
2697 /*
2698 * For queues after the CPU Q need to add 1 to get the correct
2699 * queue. In addition, need to add the CPU EQ and NIC IRQs in
2700 * order to get the correct MSI register.
2701 */
2702 if (dma_id > 1) {
2703 cpu_skip = 1;
2704 nic_skip = NIC_NUMBER_OF_ENGINES;
2705 } else {
2706 cpu_skip = 0;
2707 nic_skip = 0;
2708 }
2709
2710 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2711 q_idx = 4 * dma_id + j + cpu_skip;
2712 q = &hdev->kernel_queues[q_idx];
2713 q->cq_id = cq_id++;
2714 q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2715 gaudi_init_pci_dma_qman(hdev, dma_id, j,
2716 q->bus_address);
2717 }
2718
2719 gaudi_init_dma_core(hdev, dma_id);
2720
2721 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2722 }
2723
2724 gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2725 }
2726
gaudi_init_hbm_dma_qman(struct hl_device * hdev,int dma_id,int qman_id,u64 qman_base_addr)2727 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2728 int qman_id, u64 qman_base_addr)
2729 {
2730 struct cpu_dyn_regs *dyn_regs =
2731 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2732 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2733 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2734 u32 dma_qm_err_cfg, irq_handler_offset;
2735 u32 q_off, dma_qm_offset;
2736
2737 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2738
2739 mtr_base_en_lo = lower_32_bits(CFG_BASE +
2740 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2741 mtr_base_en_hi = upper_32_bits(CFG_BASE +
2742 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2743 so_base_en_lo = lower_32_bits(CFG_BASE +
2744 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2745 so_base_en_hi = upper_32_bits(CFG_BASE +
2746 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2747 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2748 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2749 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2750 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2751 so_base_ws_lo = lower_32_bits(CFG_BASE +
2752 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2753 so_base_ws_hi = upper_32_bits(CFG_BASE +
2754 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2755
2756 q_off = dma_qm_offset + qman_id * 4;
2757
2758 if (qman_id < 4) {
2759 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2760 lower_32_bits(qman_base_addr));
2761 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2762 upper_32_bits(qman_base_addr));
2763
2764 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2765 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2766 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2767
2768 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2769 QMAN_CPDMA_SIZE_OFFSET);
2770 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2771 QMAN_CPDMA_SRC_OFFSET);
2772 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2773 QMAN_CPDMA_DST_OFFSET);
2774 } else {
2775 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2776 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2777 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2778
2779 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2780 QMAN_LDMA_SIZE_OFFSET);
2781 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2782 QMAN_LDMA_SRC_OFFSET);
2783 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2784 QMAN_LDMA_DST_OFFSET);
2785
2786 /* Configure RAZWI IRQ */
2787 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2788 if (hdev->stop_on_err)
2789 dma_qm_err_cfg |=
2790 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2791
2792 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2793
2794 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2795 lower_32_bits(CFG_BASE + irq_handler_offset));
2796 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2797 upper_32_bits(CFG_BASE + irq_handler_offset));
2798
2799 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2800 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2801 dma_id);
2802
2803 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2804 QM_ARB_ERR_MSG_EN_MASK);
2805
2806 /* Set timeout to maximum */
2807 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
2808
2809 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2810 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2811 QMAN_INTERNAL_MAKE_TRUSTED);
2812 }
2813
2814 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2815 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2816 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2817 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2818
2819 /* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
2820 if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
2821 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2822 mtr_base_ws_lo);
2823 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2824 mtr_base_ws_hi);
2825 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2826 so_base_ws_lo);
2827 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2828 so_base_ws_hi);
2829 }
2830 }
2831
gaudi_init_hbm_dma_qmans(struct hl_device * hdev)2832 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2833 {
2834 struct gaudi_device *gaudi = hdev->asic_specific;
2835 struct gaudi_internal_qman_info *q;
2836 u64 qman_base_addr;
2837 int i, j, dma_id, internal_q_index;
2838
2839 if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2840 return;
2841
2842 for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2843 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2844
2845 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2846 /*
2847 * Add the CPU queue in order to get the correct queue
2848 * number as all internal queue are placed after it
2849 */
2850 internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2851
2852 q = &gaudi->internal_qmans[internal_q_index];
2853 qman_base_addr = (u64) q->pq_dma_addr;
2854 gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2855 qman_base_addr);
2856 }
2857
2858 /* Initializing lower CP for HBM DMA QMAN */
2859 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2860
2861 gaudi_init_dma_core(hdev, dma_id);
2862
2863 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2864 }
2865
2866 gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2867 }
2868
gaudi_init_mme_qman(struct hl_device * hdev,u32 mme_offset,int qman_id,u64 qman_base_addr)2869 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2870 int qman_id, u64 qman_base_addr)
2871 {
2872 struct cpu_dyn_regs *dyn_regs =
2873 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2874 u32 mtr_base_lo, mtr_base_hi;
2875 u32 so_base_lo, so_base_hi;
2876 u32 irq_handler_offset;
2877 u32 q_off, mme_id;
2878 u32 mme_qm_err_cfg;
2879
2880 mtr_base_lo = lower_32_bits(CFG_BASE +
2881 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2882 mtr_base_hi = upper_32_bits(CFG_BASE +
2883 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2884 so_base_lo = lower_32_bits(CFG_BASE +
2885 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2886 so_base_hi = upper_32_bits(CFG_BASE +
2887 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2888
2889 q_off = mme_offset + qman_id * 4;
2890
2891 if (qman_id < 4) {
2892 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2893 lower_32_bits(qman_base_addr));
2894 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2895 upper_32_bits(qman_base_addr));
2896
2897 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2898 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2899 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2900
2901 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2902 QMAN_CPDMA_SIZE_OFFSET);
2903 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2904 QMAN_CPDMA_SRC_OFFSET);
2905 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2906 QMAN_CPDMA_DST_OFFSET);
2907 } else {
2908 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2909 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2910 le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
2911
2912 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2913 QMAN_LDMA_SIZE_OFFSET);
2914 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2915 QMAN_LDMA_SRC_OFFSET);
2916 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2917 QMAN_LDMA_DST_OFFSET);
2918
2919 /* Configure RAZWI IRQ */
2920 mme_id = mme_offset /
2921 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;
2922
2923 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2924 if (hdev->stop_on_err)
2925 mme_qm_err_cfg |=
2926 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2927
2928 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
2929
2930 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
2931 lower_32_bits(CFG_BASE + irq_handler_offset));
2932 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
2933 upper_32_bits(CFG_BASE + irq_handler_offset));
2934
2935 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
2936 gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
2937 mme_id);
2938
2939 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
2940 QM_ARB_ERR_MSG_EN_MASK);
2941
2942 /* Set timeout to maximum */
2943 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset, GAUDI_ARB_WDT_TIMEOUT);
2944
2945 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
2946 WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
2947 QMAN_INTERNAL_MAKE_TRUSTED);
2948 }
2949
2950 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2951 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2952 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2953 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2954 }
2955
gaudi_init_mme_qmans(struct hl_device * hdev)2956 static void gaudi_init_mme_qmans(struct hl_device *hdev)
2957 {
2958 struct gaudi_device *gaudi = hdev->asic_specific;
2959 struct gaudi_internal_qman_info *q;
2960 u64 qman_base_addr;
2961 u32 mme_offset;
2962 int i, internal_q_index;
2963
2964 if (gaudi->hw_cap_initialized & HW_CAP_MME)
2965 return;
2966
2967 /*
2968 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
2969 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
2970 */
2971
2972 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2973
2974 for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
2975 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
2976 q = &gaudi->internal_qmans[internal_q_index];
2977 qman_base_addr = (u64) q->pq_dma_addr;
2978 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
2979 qman_base_addr);
2980 if (i == 3)
2981 mme_offset = 0;
2982 }
2983
2984 /* Initializing lower CP for MME QMANs */
2985 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2986 gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
2987 gaudi_init_mme_qman(hdev, 0, 4, 0);
2988
2989 WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2990 WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2991
2992 gaudi->hw_cap_initialized |= HW_CAP_MME;
2993 }
2994
gaudi_init_tpc_qman(struct hl_device * hdev,u32 tpc_offset,int qman_id,u64 qman_base_addr)2995 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
2996 int qman_id, u64 qman_base_addr)
2997 {
2998 struct cpu_dyn_regs *dyn_regs =
2999 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3000 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3001 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3002 u32 tpc_qm_err_cfg, irq_handler_offset;
3003 u32 q_off, tpc_id;
3004
3005 mtr_base_en_lo = lower_32_bits(CFG_BASE +
3006 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3007 mtr_base_en_hi = upper_32_bits(CFG_BASE +
3008 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3009 so_base_en_lo = lower_32_bits(CFG_BASE +
3010 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3011 so_base_en_hi = upper_32_bits(CFG_BASE +
3012 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3013 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3014 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3015 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3016 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3017 so_base_ws_lo = lower_32_bits(CFG_BASE +
3018 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3019 so_base_ws_hi = upper_32_bits(CFG_BASE +
3020 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3021
3022 q_off = tpc_offset + qman_id * 4;
3023
3024 tpc_id = tpc_offset /
3025 (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
3026
3027 if (qman_id < 4) {
3028 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
3029 lower_32_bits(qman_base_addr));
3030 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
3031 upper_32_bits(qman_base_addr));
3032
3033 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
3034 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
3035 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
3036
3037 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3038 QMAN_CPDMA_SIZE_OFFSET);
3039 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3040 QMAN_CPDMA_SRC_OFFSET);
3041 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3042 QMAN_CPDMA_DST_OFFSET);
3043 } else {
3044 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3045 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3046 le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
3047
3048 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3049 QMAN_LDMA_SIZE_OFFSET);
3050 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3051 QMAN_LDMA_SRC_OFFSET);
3052 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3053 QMAN_LDMA_DST_OFFSET);
3054
3055 /* Configure RAZWI IRQ */
3056 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3057 if (hdev->stop_on_err)
3058 tpc_qm_err_cfg |=
3059 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3060
3061 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
3062
3063 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
3064 lower_32_bits(CFG_BASE + irq_handler_offset));
3065 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
3066 upper_32_bits(CFG_BASE + irq_handler_offset));
3067
3068 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
3069 gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
3070 tpc_id);
3071
3072 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
3073 QM_ARB_ERR_MSG_EN_MASK);
3074
3075 /* Set timeout to maximum */
3076 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset, GAUDI_ARB_WDT_TIMEOUT);
3077
3078 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
3079 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
3080 QMAN_INTERNAL_MAKE_TRUSTED);
3081 }
3082
3083 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3084 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3085 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3086 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3087
3088 /* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
3089 if (tpc_id == 6) {
3090 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3091 mtr_base_ws_lo);
3092 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3093 mtr_base_ws_hi);
3094 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3095 so_base_ws_lo);
3096 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3097 so_base_ws_hi);
3098 }
3099 }
3100
gaudi_init_tpc_qmans(struct hl_device * hdev)3101 static void gaudi_init_tpc_qmans(struct hl_device *hdev)
3102 {
3103 struct gaudi_device *gaudi = hdev->asic_specific;
3104 struct gaudi_internal_qman_info *q;
3105 u64 qman_base_addr;
3106 u32 so_base_hi, tpc_offset = 0;
3107 u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
3108 mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
3109 int i, tpc_id, internal_q_index;
3110
3111 if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
3112 return;
3113
3114 so_base_hi = upper_32_bits(CFG_BASE +
3115 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3116
3117 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3118 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3119 internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
3120 tpc_id * QMAN_STREAMS + i;
3121 q = &gaudi->internal_qmans[internal_q_index];
3122 qman_base_addr = (u64) q->pq_dma_addr;
3123 gaudi_init_tpc_qman(hdev, tpc_offset, i,
3124 qman_base_addr);
3125
3126 if (i == 3) {
3127 /* Initializing lower CP for TPC QMAN */
3128 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
3129
3130 /* Enable the QMAN and TPC channel */
3131 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
3132 QMAN_TPC_ENABLE);
3133 }
3134 }
3135
3136 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
3137 so_base_hi);
3138
3139 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3140
3141 gaudi->hw_cap_initialized |=
3142 FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
3143 }
3144 }
3145
gaudi_init_nic_qman(struct hl_device * hdev,u32 nic_offset,int qman_id,u64 qman_base_addr,int nic_id)3146 static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3147 int qman_id, u64 qman_base_addr, int nic_id)
3148 {
3149 struct cpu_dyn_regs *dyn_regs =
3150 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3151 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3152 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3153 u32 nic_qm_err_cfg, irq_handler_offset;
3154 u32 q_off;
3155
3156 mtr_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3157 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3158 mtr_base_en_hi = upper_32_bits(CFG_BASE +
3159 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3160 so_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3161 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3162 so_base_en_hi = upper_32_bits(CFG_BASE +
3163 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3164 mtr_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3165 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3166 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3167 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3168 so_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3169 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3170 so_base_ws_hi = upper_32_bits(CFG_BASE +
3171 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3172
3173 q_off = nic_offset + qman_id * 4;
3174
3175 WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3176 WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3177
3178 WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3179 WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3180 WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3181
3182 WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3183 QMAN_LDMA_SIZE_OFFSET);
3184 WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3185 QMAN_LDMA_SRC_OFFSET);
3186 WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3187 QMAN_LDMA_DST_OFFSET);
3188
3189 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3190 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3191 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3192 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3193
3194 /* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3195 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3196 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3197 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3198 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
3199
3200 if (qman_id == 0) {
3201 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3202 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3203 le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
3204
3205 /* Configure RAZWI IRQ */
3206 nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3207 if (hdev->stop_on_err)
3208 nic_qm_err_cfg |=
3209 NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3210
3211 WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
3212
3213 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
3214 lower_32_bits(CFG_BASE + irq_handler_offset));
3215 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
3216 upper_32_bits(CFG_BASE + irq_handler_offset));
3217
3218 WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3219 gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3220 nic_id);
3221
3222 WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3223 QM_ARB_ERR_MSG_EN_MASK);
3224
3225 /* Set timeout to maximum */
3226 WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset, GAUDI_ARB_WDT_TIMEOUT);
3227
3228 WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3229 WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3230 QMAN_INTERNAL_MAKE_TRUSTED);
3231 }
3232 }
3233
gaudi_init_nic_qmans(struct hl_device * hdev)3234 static void gaudi_init_nic_qmans(struct hl_device *hdev)
3235 {
3236 struct gaudi_device *gaudi = hdev->asic_specific;
3237 struct gaudi_internal_qman_info *q;
3238 u64 qman_base_addr;
3239 u32 nic_offset = 0;
3240 u32 nic_delta_between_qmans =
3241 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3242 u32 nic_delta_between_nics =
3243 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3244 int i, nic_id, internal_q_index;
3245
3246 if (!hdev->nic_ports_mask)
3247 return;
3248
3249 if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3250 return;
3251
3252 dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3253
3254 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3255 if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3256 nic_offset += nic_delta_between_qmans;
3257 if (nic_id & 1) {
3258 nic_offset -= (nic_delta_between_qmans * 2);
3259 nic_offset += nic_delta_between_nics;
3260 }
3261 continue;
3262 }
3263
3264 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3265 internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3266 nic_id * QMAN_STREAMS + i;
3267 q = &gaudi->internal_qmans[internal_q_index];
3268 qman_base_addr = (u64) q->pq_dma_addr;
3269 gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3270 qman_base_addr, nic_id);
3271 }
3272
3273 /* Enable the QMAN */
3274 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3275
3276 nic_offset += nic_delta_between_qmans;
3277 if (nic_id & 1) {
3278 nic_offset -= (nic_delta_between_qmans * 2);
3279 nic_offset += nic_delta_between_nics;
3280 }
3281
3282 gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3283 }
3284 }
3285
gaudi_disable_pci_dma_qmans(struct hl_device * hdev)3286 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3287 {
3288 struct gaudi_device *gaudi = hdev->asic_specific;
3289
3290 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3291 return;
3292
3293 WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3294 WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3295 WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3296 }
3297
gaudi_disable_hbm_dma_qmans(struct hl_device * hdev)3298 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3299 {
3300 struct gaudi_device *gaudi = hdev->asic_specific;
3301
3302 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3303 return;
3304
3305 WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3306 WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3307 WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3308 WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3309 WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3310 }
3311
gaudi_disable_mme_qmans(struct hl_device * hdev)3312 static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3313 {
3314 struct gaudi_device *gaudi = hdev->asic_specific;
3315
3316 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3317 return;
3318
3319 WREG32(mmMME2_QM_GLBL_CFG0, 0);
3320 WREG32(mmMME0_QM_GLBL_CFG0, 0);
3321 }
3322
gaudi_disable_tpc_qmans(struct hl_device * hdev)3323 static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3324 {
3325 struct gaudi_device *gaudi = hdev->asic_specific;
3326 u32 tpc_offset = 0;
3327 int tpc_id;
3328
3329 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3330 return;
3331
3332 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3333 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3334 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3335 }
3336 }
3337
gaudi_disable_nic_qmans(struct hl_device * hdev)3338 static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3339 {
3340 struct gaudi_device *gaudi = hdev->asic_specific;
3341 u32 nic_mask, nic_offset = 0;
3342 u32 nic_delta_between_qmans =
3343 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3344 u32 nic_delta_between_nics =
3345 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3346 int nic_id;
3347
3348 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3349 nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3350
3351 if (gaudi->hw_cap_initialized & nic_mask)
3352 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3353
3354 nic_offset += nic_delta_between_qmans;
3355 if (nic_id & 1) {
3356 nic_offset -= (nic_delta_between_qmans * 2);
3357 nic_offset += nic_delta_between_nics;
3358 }
3359 }
3360 }
3361
gaudi_stop_pci_dma_qmans(struct hl_device * hdev)3362 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3363 {
3364 struct gaudi_device *gaudi = hdev->asic_specific;
3365
3366 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3367 return;
3368
3369 /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3370 WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3371 WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3372 WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3373 }
3374
gaudi_stop_hbm_dma_qmans(struct hl_device * hdev)3375 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3376 {
3377 struct gaudi_device *gaudi = hdev->asic_specific;
3378
3379 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3380 return;
3381
3382 /* Stop CPs of HBM DMA QMANs */
3383
3384 WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3385 WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3386 WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3387 WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3388 WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3389 }
3390
gaudi_stop_mme_qmans(struct hl_device * hdev)3391 static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3392 {
3393 struct gaudi_device *gaudi = hdev->asic_specific;
3394
3395 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3396 return;
3397
3398 /* Stop CPs of MME QMANs */
3399 WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3400 WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3401 }
3402
gaudi_stop_tpc_qmans(struct hl_device * hdev)3403 static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3404 {
3405 struct gaudi_device *gaudi = hdev->asic_specific;
3406
3407 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3408 return;
3409
3410 WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3411 WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3412 WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3413 WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3414 WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3415 WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3416 WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3417 WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3418 }
3419
gaudi_stop_nic_qmans(struct hl_device * hdev)3420 static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3421 {
3422 struct gaudi_device *gaudi = hdev->asic_specific;
3423
3424 /* Stop upper CPs of QMANs */
3425
3426 if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3427 WREG32(mmNIC0_QM0_GLBL_CFG1,
3428 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3429 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3430 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3431
3432 if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3433 WREG32(mmNIC0_QM1_GLBL_CFG1,
3434 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3435 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3436 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3437
3438 if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3439 WREG32(mmNIC1_QM0_GLBL_CFG1,
3440 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3441 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3442 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3443
3444 if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3445 WREG32(mmNIC1_QM1_GLBL_CFG1,
3446 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3447 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3448 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3449
3450 if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3451 WREG32(mmNIC2_QM0_GLBL_CFG1,
3452 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3453 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3454 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3455
3456 if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3457 WREG32(mmNIC2_QM1_GLBL_CFG1,
3458 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3459 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3460 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3461
3462 if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3463 WREG32(mmNIC3_QM0_GLBL_CFG1,
3464 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3465 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3466 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3467
3468 if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3469 WREG32(mmNIC3_QM1_GLBL_CFG1,
3470 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3471 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3472 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3473
3474 if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3475 WREG32(mmNIC4_QM0_GLBL_CFG1,
3476 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3477 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3478 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3479
3480 if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3481 WREG32(mmNIC4_QM1_GLBL_CFG1,
3482 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3483 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3484 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3485 }
3486
gaudi_pci_dma_stall(struct hl_device * hdev)3487 static void gaudi_pci_dma_stall(struct hl_device *hdev)
3488 {
3489 struct gaudi_device *gaudi = hdev->asic_specific;
3490
3491 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3492 return;
3493
3494 WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3495 WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3496 WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3497 }
3498
gaudi_hbm_dma_stall(struct hl_device * hdev)3499 static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3500 {
3501 struct gaudi_device *gaudi = hdev->asic_specific;
3502
3503 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3504 return;
3505
3506 WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3507 WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3508 WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3509 WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3510 WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3511 }
3512
gaudi_mme_stall(struct hl_device * hdev)3513 static void gaudi_mme_stall(struct hl_device *hdev)
3514 {
3515 struct gaudi_device *gaudi = hdev->asic_specific;
3516
3517 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3518 return;
3519
3520 /* WA for H3-1800 bug: do ACC and SBAB writes twice */
3521 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3522 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3523 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3524 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3525 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3526 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3527 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3528 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3529 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3530 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3531 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3532 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3533 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3534 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3535 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3536 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3537 }
3538
gaudi_tpc_stall(struct hl_device * hdev)3539 static void gaudi_tpc_stall(struct hl_device *hdev)
3540 {
3541 struct gaudi_device *gaudi = hdev->asic_specific;
3542
3543 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3544 return;
3545
3546 WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3547 WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3548 WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3549 WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3550 WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3551 WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3552 WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3553 WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3554 }
3555
gaudi_disable_clock_gating(struct hl_device * hdev)3556 static void gaudi_disable_clock_gating(struct hl_device *hdev)
3557 {
3558 u32 qman_offset;
3559 int i;
3560
3561 if (hdev->asic_prop.fw_security_enabled)
3562 return;
3563
3564 for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3565 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3566 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3567
3568 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3569 }
3570
3571 WREG32(mmMME0_QM_CGM_CFG, 0);
3572 WREG32(mmMME0_QM_CGM_CFG1, 0);
3573 WREG32(mmMME2_QM_CGM_CFG, 0);
3574 WREG32(mmMME2_QM_CGM_CFG1, 0);
3575
3576 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3577 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3578 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3579
3580 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3581 }
3582 }
3583
gaudi_enable_timestamp(struct hl_device * hdev)3584 static void gaudi_enable_timestamp(struct hl_device *hdev)
3585 {
3586 /* Disable the timestamp counter */
3587 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3588
3589 /* Zero the lower/upper parts of the 64-bit counter */
3590 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3591 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3592
3593 /* Enable the counter */
3594 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3595 }
3596
gaudi_disable_timestamp(struct hl_device * hdev)3597 static void gaudi_disable_timestamp(struct hl_device *hdev)
3598 {
3599 /* Disable the timestamp counter */
3600 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3601 }
3602
gaudi_halt_engines(struct hl_device * hdev,bool hard_reset,bool fw_reset)3603 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
3604 {
3605 u32 wait_timeout_ms;
3606
3607 if (hdev->pldm)
3608 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3609 else
3610 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
3611
3612 if (fw_reset)
3613 goto skip_engines;
3614
3615 gaudi_stop_nic_qmans(hdev);
3616 gaudi_stop_mme_qmans(hdev);
3617 gaudi_stop_tpc_qmans(hdev);
3618 gaudi_stop_hbm_dma_qmans(hdev);
3619 gaudi_stop_pci_dma_qmans(hdev);
3620
3621 msleep(wait_timeout_ms);
3622
3623 gaudi_pci_dma_stall(hdev);
3624 gaudi_hbm_dma_stall(hdev);
3625 gaudi_tpc_stall(hdev);
3626 gaudi_mme_stall(hdev);
3627
3628 msleep(wait_timeout_ms);
3629
3630 gaudi_disable_nic_qmans(hdev);
3631 gaudi_disable_mme_qmans(hdev);
3632 gaudi_disable_tpc_qmans(hdev);
3633 gaudi_disable_hbm_dma_qmans(hdev);
3634 gaudi_disable_pci_dma_qmans(hdev);
3635
3636 gaudi_disable_timestamp(hdev);
3637
3638 skip_engines:
3639 gaudi_disable_msi(hdev);
3640 }
3641
gaudi_mmu_init(struct hl_device * hdev)3642 static int gaudi_mmu_init(struct hl_device *hdev)
3643 {
3644 struct asic_fixed_properties *prop = &hdev->asic_prop;
3645 struct gaudi_device *gaudi = hdev->asic_specific;
3646 u64 hop0_addr;
3647 int rc, i;
3648
3649 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3650 return 0;
3651
3652 for (i = 0 ; i < prop->max_asid ; i++) {
3653 hop0_addr = prop->mmu_pgt_addr +
3654 (i * prop->dmmu.hop_table_size);
3655
3656 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3657 if (rc) {
3658 dev_err(hdev->dev,
3659 "failed to set hop0 addr for asid %d\n", i);
3660 return rc;
3661 }
3662 }
3663
3664 /* init MMU cache manage page */
3665 WREG32(mmSTLB_CACHE_INV_BASE_39_8, prop->mmu_cache_mng_addr >> 8);
3666 WREG32(mmSTLB_CACHE_INV_BASE_49_40, prop->mmu_cache_mng_addr >> 40);
3667
3668 /* mem cache invalidation */
3669 WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1);
3670
3671 rc = hl_mmu_invalidate_cache(hdev, true, 0);
3672 if (rc)
3673 return rc;
3674
3675 WREG32(mmMMU_UP_MMU_ENABLE, 1);
3676 WREG32(mmMMU_UP_SPI_MASK, 0xF);
3677
3678 WREG32(mmSTLB_HOP_CONFIGURATION, 0x30440);
3679
3680 /*
3681 * The H/W expects the first PI after init to be 1. After wraparound
3682 * we'll write 0.
3683 */
3684 gaudi->mmu_cache_inv_pi = 1;
3685
3686 gaudi->hw_cap_initialized |= HW_CAP_MMU;
3687
3688 return 0;
3689 }
3690
gaudi_load_firmware_to_device(struct hl_device * hdev)3691 static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3692 {
3693 void __iomem *dst;
3694
3695 dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3696
3697 return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
3698 }
3699
gaudi_load_boot_fit_to_device(struct hl_device * hdev)3700 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3701 {
3702 void __iomem *dst;
3703
3704 dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3705
3706 return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
3707 }
3708
gaudi_init_dynamic_firmware_loader(struct hl_device * hdev)3709 static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev)
3710 {
3711 struct dynamic_fw_load_mgr *dynamic_loader;
3712 struct cpu_dyn_regs *dyn_regs;
3713
3714 dynamic_loader = &hdev->fw_loader.dynamic_loader;
3715
3716 /*
3717 * here we update initial values for few specific dynamic regs (as
3718 * before reading the first descriptor from FW those value has to be
3719 * hard-coded) in later stages of the protocol those values will be
3720 * updated automatically by reading the FW descriptor so data there
3721 * will always be up-to-date
3722 */
3723 dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
3724 dyn_regs->kmd_msg_to_cpu =
3725 cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
3726 dyn_regs->cpu_cmd_status_to_host =
3727 cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
3728
3729 dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC;
3730 }
3731
gaudi_init_static_firmware_loader(struct hl_device * hdev)3732 static void gaudi_init_static_firmware_loader(struct hl_device *hdev)
3733 {
3734 struct static_fw_load_mgr *static_loader;
3735
3736 static_loader = &hdev->fw_loader.static_loader;
3737
3738 static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3739 static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3740 static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
3741 static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
3742 static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3743 static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
3744 static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
3745 static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
3746 static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
3747 static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
3748 static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
3749 static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
3750 static_loader->cpu_reset_wait_msec = hdev->pldm ?
3751 GAUDI_PLDM_RESET_WAIT_MSEC :
3752 GAUDI_CPU_RESET_WAIT_MSEC;
3753 }
3754
gaudi_init_firmware_preload_params(struct hl_device * hdev)3755 static void gaudi_init_firmware_preload_params(struct hl_device *hdev)
3756 {
3757 struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
3758
3759 pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3760 pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0;
3761 pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1;
3762 pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0;
3763 pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1;
3764 pre_fw_load->wait_for_preboot_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3765 }
3766
gaudi_init_firmware_loader(struct hl_device * hdev)3767 static void gaudi_init_firmware_loader(struct hl_device *hdev)
3768 {
3769 struct asic_fixed_properties *prop = &hdev->asic_prop;
3770 struct fw_load_mgr *fw_loader = &hdev->fw_loader;
3771
3772 /* fill common fields */
3773 fw_loader->fw_comp_loaded = FW_TYPE_NONE;
3774 fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE;
3775 fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE;
3776 fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC;
3777 fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3778 fw_loader->skip_bmc = !hdev->bmc_enable;
3779 fw_loader->sram_bar_id = SRAM_BAR_ID;
3780 fw_loader->dram_bar_id = HBM_BAR_ID;
3781
3782 if (prop->dynamic_fw_load)
3783 gaudi_init_dynamic_firmware_loader(hdev);
3784 else
3785 gaudi_init_static_firmware_loader(hdev);
3786 }
3787
gaudi_init_cpu(struct hl_device * hdev)3788 static int gaudi_init_cpu(struct hl_device *hdev)
3789 {
3790 struct gaudi_device *gaudi = hdev->asic_specific;
3791 int rc;
3792
3793 if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
3794 return 0;
3795
3796 if (gaudi->hw_cap_initialized & HW_CAP_CPU)
3797 return 0;
3798
3799 /*
3800 * The device CPU works with 40 bits addresses.
3801 * This register sets the extension to 50 bits.
3802 */
3803 if (!hdev->asic_prop.fw_security_enabled)
3804 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
3805
3806 rc = hl_fw_init_cpu(hdev);
3807
3808 if (rc)
3809 return rc;
3810
3811 gaudi->hw_cap_initialized |= HW_CAP_CPU;
3812
3813 return 0;
3814 }
3815
gaudi_init_cpu_queues(struct hl_device * hdev,u32 cpu_timeout)3816 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
3817 {
3818 struct cpu_dyn_regs *dyn_regs =
3819 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3820 struct asic_fixed_properties *prop = &hdev->asic_prop;
3821 struct gaudi_device *gaudi = hdev->asic_specific;
3822 u32 status, irq_handler_offset;
3823 struct hl_eq *eq;
3824 struct hl_hw_queue *cpu_pq =
3825 &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
3826 int err;
3827
3828 if (!hdev->cpu_queues_enable)
3829 return 0;
3830
3831 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3832 return 0;
3833
3834 eq = &hdev->event_queue;
3835
3836 WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
3837 WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
3838
3839 WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
3840 WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
3841
3842 WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
3843 lower_32_bits(hdev->cpu_accessible_dma_address));
3844 WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
3845 upper_32_bits(hdev->cpu_accessible_dma_address));
3846
3847 WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
3848 WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
3849 WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
3850
3851 /* Used for EQ CI */
3852 WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
3853
3854 WREG32(mmCPU_IF_PF_PQ_PI, 0);
3855
3856 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
3857
3858 irq_handler_offset = prop->gic_interrupts_enable ?
3859 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3860 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
3861
3862 WREG32(irq_handler_offset,
3863 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
3864
3865 err = hl_poll_timeout(
3866 hdev,
3867 mmCPU_IF_QUEUE_INIT,
3868 status,
3869 (status == PQ_INIT_STATUS_READY_FOR_HOST),
3870 1000,
3871 cpu_timeout);
3872
3873 if (err) {
3874 dev_err(hdev->dev,
3875 "Failed to communicate with Device CPU (CPU-CP timeout)\n");
3876 return -EIO;
3877 }
3878
3879 /* update FW application security bits */
3880 if (prop->fw_cpu_boot_dev_sts0_valid)
3881 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
3882 if (prop->fw_cpu_boot_dev_sts1_valid)
3883 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
3884
3885 gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
3886 return 0;
3887 }
3888
gaudi_pre_hw_init(struct hl_device * hdev)3889 static void gaudi_pre_hw_init(struct hl_device *hdev)
3890 {
3891 /* Perform read from the device to make sure device is up */
3892 RREG32(mmHW_STATE);
3893
3894 if (!hdev->asic_prop.fw_security_enabled) {
3895 /* Set the access through PCI bars (Linux driver only) as
3896 * secured
3897 */
3898 WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
3899 (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
3900 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
3901
3902 /* Perform read to flush the waiting writes to ensure
3903 * configuration was set in the device
3904 */
3905 RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
3906 }
3907
3908 /*
3909 * Let's mark in the H/W that we have reached this point. We check
3910 * this value in the reset_before_init function to understand whether
3911 * we need to reset the chip before doing H/W init. This register is
3912 * cleared by the H/W upon H/W reset
3913 */
3914 WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
3915 }
3916
gaudi_hw_init(struct hl_device * hdev)3917 static int gaudi_hw_init(struct hl_device *hdev)
3918 {
3919 struct gaudi_device *gaudi = hdev->asic_specific;
3920 int rc;
3921
3922 gaudi_pre_hw_init(hdev);
3923
3924 /* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
3925 * So we set it here and if anyone tries to move it later to
3926 * a different address, there will be an error
3927 */
3928 if (hdev->asic_prop.iatu_done_by_fw)
3929 gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE;
3930
3931 /*
3932 * Before pushing u-boot/linux to device, need to set the hbm bar to
3933 * base address of dram
3934 */
3935 if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
3936 dev_err(hdev->dev,
3937 "failed to map HBM bar to DRAM base address\n");
3938 return -EIO;
3939 }
3940
3941 rc = gaudi_init_cpu(hdev);
3942 if (rc) {
3943 dev_err(hdev->dev, "failed to initialize CPU\n");
3944 return rc;
3945 }
3946
3947 /* In case the clock gating was enabled in preboot we need to disable
3948 * it here before touching the MME/TPC registers.
3949 */
3950 gaudi_disable_clock_gating(hdev);
3951
3952 /* SRAM scrambler must be initialized after CPU is running from HBM */
3953 gaudi_init_scrambler_sram(hdev);
3954
3955 /* This is here just in case we are working without CPU */
3956 gaudi_init_scrambler_hbm(hdev);
3957
3958 gaudi_init_golden_registers(hdev);
3959
3960 rc = gaudi_mmu_init(hdev);
3961 if (rc)
3962 return rc;
3963
3964 gaudi_init_security(hdev);
3965
3966 gaudi_init_pci_dma_qmans(hdev);
3967
3968 gaudi_init_hbm_dma_qmans(hdev);
3969
3970 gaudi_init_mme_qmans(hdev);
3971
3972 gaudi_init_tpc_qmans(hdev);
3973
3974 gaudi_init_nic_qmans(hdev);
3975
3976 gaudi_enable_timestamp(hdev);
3977
3978 /* MSI must be enabled before CPU queues and NIC are initialized */
3979 rc = gaudi_enable_msi(hdev);
3980 if (rc)
3981 goto disable_queues;
3982
3983 /* must be called after MSI was enabled */
3984 rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
3985 if (rc) {
3986 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
3987 rc);
3988 goto disable_msi;
3989 }
3990
3991 /* Perform read from the device to flush all configuration */
3992 RREG32(mmHW_STATE);
3993
3994 return 0;
3995
3996 disable_msi:
3997 gaudi_disable_msi(hdev);
3998 disable_queues:
3999 gaudi_disable_mme_qmans(hdev);
4000 gaudi_disable_pci_dma_qmans(hdev);
4001
4002 return rc;
4003 }
4004
gaudi_hw_fini(struct hl_device * hdev,bool hard_reset,bool fw_reset)4005 static int gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
4006 {
4007 struct cpu_dyn_regs *dyn_regs =
4008 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4009 u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;
4010 struct gaudi_device *gaudi = hdev->asic_specific;
4011 bool driver_performs_reset;
4012
4013 if (!hard_reset) {
4014 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
4015 return 0;
4016 }
4017
4018 if (hdev->pldm) {
4019 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
4020 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
4021 } else {
4022 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
4023 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
4024 }
4025
4026 if (fw_reset) {
4027 dev_dbg(hdev->dev,
4028 "Firmware performs HARD reset, going to wait %dms\n",
4029 reset_timeout_ms);
4030
4031 goto skip_reset;
4032 }
4033
4034 driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
4035 !hdev->asic_prop.hard_reset_done_by_fw);
4036
4037 /* Set device to handle FLR by H/W as we will put the device CPU to
4038 * halt mode
4039 */
4040 if (driver_performs_reset)
4041 WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
4042 PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
4043
4044 /* If linux is loaded in the device CPU we need to communicate with it
4045 * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU
4046 * registers in case of old F/Ws
4047 */
4048 if (hdev->fw_loader.fw_comp_loaded & FW_TYPE_LINUX) {
4049 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4050 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4051 le32_to_cpu(dyn_regs->gic_host_halt_irq);
4052
4053 WREG32(irq_handler_offset,
4054 gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id);
4055
4056 /* This is a hail-mary attempt to revive the card in the small chance that the
4057 * f/w has experienced a watchdog event, which caused it to return back to preboot.
4058 * In that case, triggering reset through GIC won't help. We need to trigger the
4059 * reset as if Linux wasn't loaded.
4060 *
4061 * We do it only if the reset cause was HB, because that would be the indication
4062 * of such an event.
4063 *
4064 * In case watchdog hasn't expired but we still got HB, then this won't do any
4065 * damage.
4066 */
4067 if (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT) {
4068 if (hdev->asic_prop.hard_reset_done_by_fw)
4069 hl_fw_ask_hard_reset_without_linux(hdev);
4070 else
4071 hl_fw_ask_halt_machine_without_linux(hdev);
4072 }
4073 } else {
4074 if (hdev->asic_prop.hard_reset_done_by_fw)
4075 hl_fw_ask_hard_reset_without_linux(hdev);
4076 else
4077 hl_fw_ask_halt_machine_without_linux(hdev);
4078 }
4079
4080 if (driver_performs_reset) {
4081
4082 /* Configure the reset registers. Must be done as early as
4083 * possible in case we fail during H/W initialization
4084 */
4085 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
4086 (CFG_RST_H_DMA_MASK |
4087 CFG_RST_H_MME_MASK |
4088 CFG_RST_H_SM_MASK |
4089 CFG_RST_H_TPC_7_MASK));
4090
4091 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
4092
4093 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
4094 (CFG_RST_H_HBM_MASK |
4095 CFG_RST_H_TPC_7_MASK |
4096 CFG_RST_H_NIC_MASK |
4097 CFG_RST_H_SM_MASK |
4098 CFG_RST_H_DMA_MASK |
4099 CFG_RST_H_MME_MASK |
4100 CFG_RST_H_CPU_MASK |
4101 CFG_RST_H_MMU_MASK));
4102
4103 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
4104 (CFG_RST_L_IF_MASK |
4105 CFG_RST_L_PSOC_MASK |
4106 CFG_RST_L_TPC_MASK));
4107
4108 msleep(cpu_timeout_ms);
4109
4110 /* Tell ASIC not to re-initialize PCIe */
4111 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
4112
4113 /* Restart BTL/BLR upon hard-reset */
4114 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
4115
4116 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
4117 1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
4118
4119 dev_dbg(hdev->dev,
4120 "Issued HARD reset command, going to wait %dms\n",
4121 reset_timeout_ms);
4122 } else {
4123 dev_dbg(hdev->dev,
4124 "Firmware performs HARD reset, going to wait %dms\n",
4125 reset_timeout_ms);
4126 }
4127
4128 skip_reset:
4129 /*
4130 * After hard reset, we can't poll the BTM_FSM register because the PSOC
4131 * itself is in reset. Need to wait until the reset is deasserted
4132 */
4133 msleep(reset_timeout_ms);
4134
4135 status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
4136 if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK) {
4137 dev_err(hdev->dev, "Timeout while waiting for device to reset 0x%x\n", status);
4138 return -ETIMEDOUT;
4139 }
4140
4141 if (gaudi) {
4142 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | HW_CAP_HBM |
4143 HW_CAP_PCI_DMA | HW_CAP_MME | HW_CAP_TPC_MASK |
4144 HW_CAP_HBM_DMA | HW_CAP_PLL | HW_CAP_NIC_MASK |
4145 HW_CAP_MMU | HW_CAP_SRAM_SCRAMBLER |
4146 HW_CAP_HBM_SCRAMBLER);
4147
4148 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
4149
4150 hdev->device_cpu_is_halted = false;
4151 }
4152 return 0;
4153 }
4154
gaudi_suspend(struct hl_device * hdev)4155 static int gaudi_suspend(struct hl_device *hdev)
4156 {
4157 int rc;
4158
4159 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
4160 if (rc)
4161 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
4162
4163 return rc;
4164 }
4165
gaudi_resume(struct hl_device * hdev)4166 static int gaudi_resume(struct hl_device *hdev)
4167 {
4168 return gaudi_init_iatu(hdev);
4169 }
4170
gaudi_mmap(struct hl_device * hdev,struct vm_area_struct * vma,void * cpu_addr,dma_addr_t dma_addr,size_t size)4171 static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
4172 void *cpu_addr, dma_addr_t dma_addr, size_t size)
4173 {
4174 int rc;
4175
4176 vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
4177 VM_DONTCOPY | VM_NORESERVE);
4178
4179 rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
4180 (dma_addr - HOST_PHYS_BASE), size);
4181 if (rc)
4182 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
4183
4184 return rc;
4185 }
4186
gaudi_ring_doorbell(struct hl_device * hdev,u32 hw_queue_id,u32 pi)4187 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
4188 {
4189 struct cpu_dyn_regs *dyn_regs =
4190 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4191 u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset;
4192 struct gaudi_device *gaudi = hdev->asic_specific;
4193 bool invalid_queue = false;
4194 int dma_id;
4195
4196 switch (hw_queue_id) {
4197 case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
4198 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
4199 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4200 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4201 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4202 break;
4203
4204 case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
4205 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
4206 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4207 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4208 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4209 break;
4210
4211 case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
4212 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
4213 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4214 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4215 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4216 break;
4217
4218 case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
4219 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
4220 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4221 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4222 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4223 break;
4224
4225 case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
4226 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
4227 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4228 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4229 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4230 break;
4231
4232 case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
4233 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
4234 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4235 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4236 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4237 break;
4238
4239 case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
4240 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
4241 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4242 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4243 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4244 break;
4245
4246 case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
4247 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
4248 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4249 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4250 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4251 break;
4252
4253 case GAUDI_QUEUE_ID_CPU_PQ:
4254 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4255 db_reg_offset = mmCPU_IF_PF_PQ_PI;
4256 else
4257 invalid_queue = true;
4258 break;
4259
4260 case GAUDI_QUEUE_ID_MME_0_0:
4261 db_reg_offset = mmMME2_QM_PQ_PI_0;
4262 break;
4263
4264 case GAUDI_QUEUE_ID_MME_0_1:
4265 db_reg_offset = mmMME2_QM_PQ_PI_1;
4266 break;
4267
4268 case GAUDI_QUEUE_ID_MME_0_2:
4269 db_reg_offset = mmMME2_QM_PQ_PI_2;
4270 break;
4271
4272 case GAUDI_QUEUE_ID_MME_0_3:
4273 db_reg_offset = mmMME2_QM_PQ_PI_3;
4274 break;
4275
4276 case GAUDI_QUEUE_ID_MME_1_0:
4277 db_reg_offset = mmMME0_QM_PQ_PI_0;
4278 break;
4279
4280 case GAUDI_QUEUE_ID_MME_1_1:
4281 db_reg_offset = mmMME0_QM_PQ_PI_1;
4282 break;
4283
4284 case GAUDI_QUEUE_ID_MME_1_2:
4285 db_reg_offset = mmMME0_QM_PQ_PI_2;
4286 break;
4287
4288 case GAUDI_QUEUE_ID_MME_1_3:
4289 db_reg_offset = mmMME0_QM_PQ_PI_3;
4290 break;
4291
4292 case GAUDI_QUEUE_ID_TPC_0_0:
4293 db_reg_offset = mmTPC0_QM_PQ_PI_0;
4294 break;
4295
4296 case GAUDI_QUEUE_ID_TPC_0_1:
4297 db_reg_offset = mmTPC0_QM_PQ_PI_1;
4298 break;
4299
4300 case GAUDI_QUEUE_ID_TPC_0_2:
4301 db_reg_offset = mmTPC0_QM_PQ_PI_2;
4302 break;
4303
4304 case GAUDI_QUEUE_ID_TPC_0_3:
4305 db_reg_offset = mmTPC0_QM_PQ_PI_3;
4306 break;
4307
4308 case GAUDI_QUEUE_ID_TPC_1_0:
4309 db_reg_offset = mmTPC1_QM_PQ_PI_0;
4310 break;
4311
4312 case GAUDI_QUEUE_ID_TPC_1_1:
4313 db_reg_offset = mmTPC1_QM_PQ_PI_1;
4314 break;
4315
4316 case GAUDI_QUEUE_ID_TPC_1_2:
4317 db_reg_offset = mmTPC1_QM_PQ_PI_2;
4318 break;
4319
4320 case GAUDI_QUEUE_ID_TPC_1_3:
4321 db_reg_offset = mmTPC1_QM_PQ_PI_3;
4322 break;
4323
4324 case GAUDI_QUEUE_ID_TPC_2_0:
4325 db_reg_offset = mmTPC2_QM_PQ_PI_0;
4326 break;
4327
4328 case GAUDI_QUEUE_ID_TPC_2_1:
4329 db_reg_offset = mmTPC2_QM_PQ_PI_1;
4330 break;
4331
4332 case GAUDI_QUEUE_ID_TPC_2_2:
4333 db_reg_offset = mmTPC2_QM_PQ_PI_2;
4334 break;
4335
4336 case GAUDI_QUEUE_ID_TPC_2_3:
4337 db_reg_offset = mmTPC2_QM_PQ_PI_3;
4338 break;
4339
4340 case GAUDI_QUEUE_ID_TPC_3_0:
4341 db_reg_offset = mmTPC3_QM_PQ_PI_0;
4342 break;
4343
4344 case GAUDI_QUEUE_ID_TPC_3_1:
4345 db_reg_offset = mmTPC3_QM_PQ_PI_1;
4346 break;
4347
4348 case GAUDI_QUEUE_ID_TPC_3_2:
4349 db_reg_offset = mmTPC3_QM_PQ_PI_2;
4350 break;
4351
4352 case GAUDI_QUEUE_ID_TPC_3_3:
4353 db_reg_offset = mmTPC3_QM_PQ_PI_3;
4354 break;
4355
4356 case GAUDI_QUEUE_ID_TPC_4_0:
4357 db_reg_offset = mmTPC4_QM_PQ_PI_0;
4358 break;
4359
4360 case GAUDI_QUEUE_ID_TPC_4_1:
4361 db_reg_offset = mmTPC4_QM_PQ_PI_1;
4362 break;
4363
4364 case GAUDI_QUEUE_ID_TPC_4_2:
4365 db_reg_offset = mmTPC4_QM_PQ_PI_2;
4366 break;
4367
4368 case GAUDI_QUEUE_ID_TPC_4_3:
4369 db_reg_offset = mmTPC4_QM_PQ_PI_3;
4370 break;
4371
4372 case GAUDI_QUEUE_ID_TPC_5_0:
4373 db_reg_offset = mmTPC5_QM_PQ_PI_0;
4374 break;
4375
4376 case GAUDI_QUEUE_ID_TPC_5_1:
4377 db_reg_offset = mmTPC5_QM_PQ_PI_1;
4378 break;
4379
4380 case GAUDI_QUEUE_ID_TPC_5_2:
4381 db_reg_offset = mmTPC5_QM_PQ_PI_2;
4382 break;
4383
4384 case GAUDI_QUEUE_ID_TPC_5_3:
4385 db_reg_offset = mmTPC5_QM_PQ_PI_3;
4386 break;
4387
4388 case GAUDI_QUEUE_ID_TPC_6_0:
4389 db_reg_offset = mmTPC6_QM_PQ_PI_0;
4390 break;
4391
4392 case GAUDI_QUEUE_ID_TPC_6_1:
4393 db_reg_offset = mmTPC6_QM_PQ_PI_1;
4394 break;
4395
4396 case GAUDI_QUEUE_ID_TPC_6_2:
4397 db_reg_offset = mmTPC6_QM_PQ_PI_2;
4398 break;
4399
4400 case GAUDI_QUEUE_ID_TPC_6_3:
4401 db_reg_offset = mmTPC6_QM_PQ_PI_3;
4402 break;
4403
4404 case GAUDI_QUEUE_ID_TPC_7_0:
4405 db_reg_offset = mmTPC7_QM_PQ_PI_0;
4406 break;
4407
4408 case GAUDI_QUEUE_ID_TPC_7_1:
4409 db_reg_offset = mmTPC7_QM_PQ_PI_1;
4410 break;
4411
4412 case GAUDI_QUEUE_ID_TPC_7_2:
4413 db_reg_offset = mmTPC7_QM_PQ_PI_2;
4414 break;
4415
4416 case GAUDI_QUEUE_ID_TPC_7_3:
4417 db_reg_offset = mmTPC7_QM_PQ_PI_3;
4418 break;
4419
4420 case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3:
4421 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0))
4422 invalid_queue = true;
4423
4424 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4425 db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off;
4426 break;
4427
4428 case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3:
4429 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1))
4430 invalid_queue = true;
4431
4432 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4433 db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off;
4434 break;
4435
4436 case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3:
4437 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2))
4438 invalid_queue = true;
4439
4440 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4441 db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off;
4442 break;
4443
4444 case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3:
4445 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3))
4446 invalid_queue = true;
4447
4448 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4449 db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off;
4450 break;
4451
4452 case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3:
4453 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4))
4454 invalid_queue = true;
4455
4456 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4457 db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off;
4458 break;
4459
4460 case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3:
4461 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5))
4462 invalid_queue = true;
4463
4464 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4465 db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off;
4466 break;
4467
4468 case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3:
4469 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6))
4470 invalid_queue = true;
4471
4472 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4473 db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off;
4474 break;
4475
4476 case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3:
4477 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7))
4478 invalid_queue = true;
4479
4480 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4481 db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off;
4482 break;
4483
4484 case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3:
4485 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8))
4486 invalid_queue = true;
4487
4488 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4489 db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off;
4490 break;
4491
4492 case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3:
4493 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9))
4494 invalid_queue = true;
4495
4496 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4497 db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off;
4498 break;
4499
4500 default:
4501 invalid_queue = true;
4502 }
4503
4504 if (invalid_queue) {
4505 /* Should never get here */
4506 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4507 hw_queue_id);
4508 return;
4509 }
4510
4511 db_value = pi;
4512
4513 /* ring the doorbell */
4514 WREG32(db_reg_offset, db_value);
4515
4516 if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
4517 /* make sure device CPU will read latest data from host */
4518 mb();
4519
4520 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4521 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4522 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4523
4524 WREG32(irq_handler_offset,
4525 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4526 }
4527 }
4528
gaudi_pqe_write(struct hl_device * hdev,__le64 * pqe,struct hl_bd * bd)4529 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4530 struct hl_bd *bd)
4531 {
4532 __le64 *pbd = (__le64 *) bd;
4533
4534 /* The QMANs are on the host memory so a simple copy suffice */
4535 pqe[0] = pbd[0];
4536 pqe[1] = pbd[1];
4537 }
4538
gaudi_dma_alloc_coherent(struct hl_device * hdev,size_t size,dma_addr_t * dma_handle,gfp_t flags)4539 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4540 dma_addr_t *dma_handle, gfp_t flags)
4541 {
4542 void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
4543 dma_handle, flags);
4544
4545 /* Shift to the device's base physical address of host memory */
4546 if (kernel_addr)
4547 *dma_handle += HOST_PHYS_BASE;
4548
4549 return kernel_addr;
4550 }
4551
gaudi_dma_free_coherent(struct hl_device * hdev,size_t size,void * cpu_addr,dma_addr_t dma_handle)4552 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4553 void *cpu_addr, dma_addr_t dma_handle)
4554 {
4555 /* Cancel the device's base physical address of host memory */
4556 dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4557
4558 dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
4559 }
4560
gaudi_scrub_device_dram(struct hl_device * hdev,u64 val)4561 static int gaudi_scrub_device_dram(struct hl_device *hdev, u64 val)
4562 {
4563 struct asic_fixed_properties *prop = &hdev->asic_prop;
4564 u64 cur_addr = prop->dram_user_base_address;
4565 u32 chunk_size, busy;
4566 int rc, dma_id;
4567
4568 while (cur_addr < prop->dram_end_address) {
4569 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4570 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4571
4572 chunk_size =
4573 min((u64)SZ_2G, prop->dram_end_address - cur_addr);
4574
4575 dev_dbg(hdev->dev,
4576 "Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4577 cur_addr, cur_addr + chunk_size);
4578
4579 WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset,
4580 lower_32_bits(val));
4581 WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset,
4582 upper_32_bits(val));
4583 WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
4584 lower_32_bits(cur_addr));
4585 WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
4586 upper_32_bits(cur_addr));
4587 WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
4588 chunk_size);
4589 WREG32(mmDMA0_CORE_COMMIT + dma_offset,
4590 ((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
4591 (1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
4592
4593 cur_addr += chunk_size;
4594
4595 if (cur_addr == prop->dram_end_address)
4596 break;
4597 }
4598
4599 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4600 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4601
4602 rc = hl_poll_timeout(
4603 hdev,
4604 mmDMA0_CORE_STS0 + dma_offset,
4605 busy,
4606 ((busy & DMA0_CORE_STS0_BUSY_MASK) == 0),
4607 1000,
4608 HBM_SCRUBBING_TIMEOUT_US);
4609
4610 if (rc) {
4611 dev_err(hdev->dev,
4612 "DMA Timeout during HBM scrubbing of DMA #%d\n",
4613 dma_id);
4614 return -EIO;
4615 }
4616 }
4617 }
4618
4619 return 0;
4620 }
4621
gaudi_scrub_device_mem(struct hl_device * hdev)4622 static int gaudi_scrub_device_mem(struct hl_device *hdev)
4623 {
4624 struct asic_fixed_properties *prop = &hdev->asic_prop;
4625 u64 wait_to_idle_time = HBM_SCRUBBING_TIMEOUT_US;
4626 u64 addr, size, val = hdev->memory_scrub_val;
4627 ktime_t timeout;
4628 int rc = 0;
4629
4630 if (!hdev->memory_scrub)
4631 return 0;
4632
4633 timeout = ktime_add_us(ktime_get(), wait_to_idle_time);
4634 while (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
4635 if (ktime_compare(ktime_get(), timeout) > 0) {
4636 dev_err(hdev->dev, "waiting for idle timeout\n");
4637 return -ETIMEDOUT;
4638 }
4639 usleep_range((1000 >> 2) + 1, 1000);
4640 }
4641
4642 /* Scrub SRAM */
4643 addr = prop->sram_user_base_address;
4644 size = hdev->pldm ? 0x10000 : prop->sram_size - SRAM_USER_BASE_OFFSET;
4645
4646 dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx val: 0x%llx\n",
4647 addr, addr + size, val);
4648 rc = gaudi_memset_device_memory(hdev, addr, size, val);
4649 if (rc) {
4650 dev_err(hdev->dev, "Failed to clear SRAM (%d)\n", rc);
4651 return rc;
4652 }
4653
4654 /* Scrub HBM using all DMA channels in parallel */
4655 rc = gaudi_scrub_device_dram(hdev, val);
4656 if (rc) {
4657 dev_err(hdev->dev, "Failed to clear HBM (%d)\n", rc);
4658 return rc;
4659 }
4660
4661 return 0;
4662 }
4663
gaudi_get_int_queue_base(struct hl_device * hdev,u32 queue_id,dma_addr_t * dma_handle,u16 * queue_len)4664 static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4665 u32 queue_id, dma_addr_t *dma_handle,
4666 u16 *queue_len)
4667 {
4668 struct gaudi_device *gaudi = hdev->asic_specific;
4669 struct gaudi_internal_qman_info *q;
4670
4671 if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4672 gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4673 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4674 return NULL;
4675 }
4676
4677 q = &gaudi->internal_qmans[queue_id];
4678 *dma_handle = q->pq_dma_addr;
4679 *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4680
4681 return q->pq_kernel_addr;
4682 }
4683
gaudi_send_cpu_message(struct hl_device * hdev,u32 * msg,u16 len,u32 timeout,u64 * result)4684 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
4685 u16 len, u32 timeout, u64 *result)
4686 {
4687 struct gaudi_device *gaudi = hdev->asic_specific;
4688
4689 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4690 if (result)
4691 *result = 0;
4692 return 0;
4693 }
4694
4695 if (!timeout)
4696 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4697
4698 return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4699 timeout, result);
4700 }
4701
gaudi_test_queue(struct hl_device * hdev,u32 hw_queue_id)4702 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4703 {
4704 struct packet_msg_prot *fence_pkt;
4705 dma_addr_t pkt_dma_addr;
4706 u32 fence_val, tmp, timeout_usec;
4707 dma_addr_t fence_dma_addr;
4708 u32 *fence_ptr;
4709 int rc;
4710
4711 if (hdev->pldm)
4712 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4713 else
4714 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4715
4716 fence_val = GAUDI_QMAN0_FENCE_VAL;
4717
4718 fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
4719 if (!fence_ptr) {
4720 dev_err(hdev->dev,
4721 "Failed to allocate memory for H/W queue %d testing\n",
4722 hw_queue_id);
4723 return -ENOMEM;
4724 }
4725
4726 *fence_ptr = 0;
4727
4728 fence_pkt = hl_asic_dma_pool_zalloc(hdev, sizeof(struct packet_msg_prot), GFP_KERNEL,
4729 &pkt_dma_addr);
4730 if (!fence_pkt) {
4731 dev_err(hdev->dev,
4732 "Failed to allocate packet for H/W queue %d testing\n",
4733 hw_queue_id);
4734 rc = -ENOMEM;
4735 goto free_fence_ptr;
4736 }
4737
4738 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4739 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4740 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4741
4742 fence_pkt->ctl = cpu_to_le32(tmp);
4743 fence_pkt->value = cpu_to_le32(fence_val);
4744 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4745
4746 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
4747 sizeof(struct packet_msg_prot),
4748 pkt_dma_addr);
4749 if (rc) {
4750 dev_err(hdev->dev,
4751 "Failed to send fence packet to H/W queue %d\n",
4752 hw_queue_id);
4753 goto free_pkt;
4754 }
4755
4756 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
4757 1000, timeout_usec, true);
4758
4759 hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
4760
4761 if (rc == -ETIMEDOUT) {
4762 dev_err(hdev->dev,
4763 "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
4764 hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
4765 rc = -EIO;
4766 }
4767
4768 free_pkt:
4769 hl_asic_dma_pool_free(hdev, (void *) fence_pkt, pkt_dma_addr);
4770 free_fence_ptr:
4771 hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
4772 return rc;
4773 }
4774
gaudi_test_cpu_queue(struct hl_device * hdev)4775 static int gaudi_test_cpu_queue(struct hl_device *hdev)
4776 {
4777 struct gaudi_device *gaudi = hdev->asic_specific;
4778
4779 /*
4780 * check capability here as send_cpu_message() won't update the result
4781 * value if no capability
4782 */
4783 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
4784 return 0;
4785
4786 return hl_fw_test_cpu_queue(hdev);
4787 }
4788
gaudi_test_queues(struct hl_device * hdev)4789 static int gaudi_test_queues(struct hl_device *hdev)
4790 {
4791 int i, rc, ret_val = 0;
4792
4793 for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
4794 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
4795 rc = gaudi_test_queue(hdev, i);
4796 if (rc)
4797 ret_val = -EINVAL;
4798 }
4799 }
4800
4801 rc = gaudi_test_cpu_queue(hdev);
4802 if (rc)
4803 ret_val = -EINVAL;
4804
4805 return ret_val;
4806 }
4807
gaudi_dma_pool_zalloc(struct hl_device * hdev,size_t size,gfp_t mem_flags,dma_addr_t * dma_handle)4808 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
4809 gfp_t mem_flags, dma_addr_t *dma_handle)
4810 {
4811 void *kernel_addr;
4812
4813 if (size > GAUDI_DMA_POOL_BLK_SIZE)
4814 return NULL;
4815
4816 kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
4817
4818 /* Shift to the device's base physical address of host memory */
4819 if (kernel_addr)
4820 *dma_handle += HOST_PHYS_BASE;
4821
4822 return kernel_addr;
4823 }
4824
gaudi_dma_pool_free(struct hl_device * hdev,void * vaddr,dma_addr_t dma_addr)4825 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
4826 dma_addr_t dma_addr)
4827 {
4828 /* Cancel the device's base physical address of host memory */
4829 dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
4830
4831 dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
4832 }
4833
gaudi_cpu_accessible_dma_pool_alloc(struct hl_device * hdev,size_t size,dma_addr_t * dma_handle)4834 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
4835 size_t size, dma_addr_t *dma_handle)
4836 {
4837 return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
4838 }
4839
gaudi_cpu_accessible_dma_pool_free(struct hl_device * hdev,size_t size,void * vaddr)4840 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
4841 size_t size, void *vaddr)
4842 {
4843 hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
4844 }
4845
gaudi_get_dma_desc_list_size(struct hl_device * hdev,struct sg_table * sgt)4846 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt)
4847 {
4848 struct scatterlist *sg, *sg_next_iter;
4849 u32 count, dma_desc_cnt;
4850 u64 len, len_next;
4851 dma_addr_t addr, addr_next;
4852
4853 dma_desc_cnt = 0;
4854
4855 for_each_sgtable_dma_sg(sgt, sg, count) {
4856 len = sg_dma_len(sg);
4857 addr = sg_dma_address(sg);
4858
4859 if (len == 0)
4860 break;
4861
4862 while ((count + 1) < sgt->nents) {
4863 sg_next_iter = sg_next(sg);
4864 len_next = sg_dma_len(sg_next_iter);
4865 addr_next = sg_dma_address(sg_next_iter);
4866
4867 if (len_next == 0)
4868 break;
4869
4870 if ((addr + len == addr_next) &&
4871 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
4872 len += len_next;
4873 count++;
4874 sg = sg_next_iter;
4875 } else {
4876 break;
4877 }
4878 }
4879
4880 dma_desc_cnt++;
4881 }
4882
4883 return dma_desc_cnt * sizeof(struct packet_lin_dma);
4884 }
4885
gaudi_pin_memory_before_cs(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt,u64 addr,enum dma_data_direction dir)4886 static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
4887 struct hl_cs_parser *parser,
4888 struct packet_lin_dma *user_dma_pkt,
4889 u64 addr, enum dma_data_direction dir)
4890 {
4891 struct hl_userptr *userptr;
4892 int rc;
4893
4894 if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4895 parser->job_userptr_list, &userptr))
4896 goto already_pinned;
4897
4898 userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
4899 if (!userptr)
4900 return -ENOMEM;
4901
4902 rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4903 userptr);
4904 if (rc)
4905 goto free_userptr;
4906
4907 list_add_tail(&userptr->job_node, parser->job_userptr_list);
4908
4909 rc = hl_dma_map_sgtable(hdev, userptr->sgt, dir);
4910 if (rc) {
4911 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
4912 goto unpin_memory;
4913 }
4914
4915 userptr->dma_mapped = true;
4916 userptr->dir = dir;
4917
4918 already_pinned:
4919 parser->patched_cb_size +=
4920 gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
4921
4922 return 0;
4923
4924 unpin_memory:
4925 list_del(&userptr->job_node);
4926 hl_unpin_host_memory(hdev, userptr);
4927 free_userptr:
4928 kfree(userptr);
4929 return rc;
4930 }
4931
gaudi_validate_dma_pkt_host(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt,bool src_in_host)4932 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
4933 struct hl_cs_parser *parser,
4934 struct packet_lin_dma *user_dma_pkt,
4935 bool src_in_host)
4936 {
4937 enum dma_data_direction dir;
4938 bool skip_host_mem_pin = false, user_memset;
4939 u64 addr;
4940 int rc = 0;
4941
4942 user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
4943 GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
4944 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
4945
4946 if (src_in_host) {
4947 if (user_memset)
4948 skip_host_mem_pin = true;
4949
4950 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
4951 dir = DMA_TO_DEVICE;
4952 addr = le64_to_cpu(user_dma_pkt->src_addr);
4953 } else {
4954 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
4955 dir = DMA_FROM_DEVICE;
4956 addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
4957 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
4958 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
4959 }
4960
4961 if (skip_host_mem_pin)
4962 parser->patched_cb_size += sizeof(*user_dma_pkt);
4963 else
4964 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
4965 addr, dir);
4966
4967 return rc;
4968 }
4969
gaudi_validate_dma_pkt_no_mmu(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt)4970 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
4971 struct hl_cs_parser *parser,
4972 struct packet_lin_dma *user_dma_pkt)
4973 {
4974 bool src_in_host = false;
4975 u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
4976 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
4977 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
4978
4979 dev_dbg(hdev->dev, "DMA packet details:\n");
4980 dev_dbg(hdev->dev, "source == 0x%llx\n",
4981 le64_to_cpu(user_dma_pkt->src_addr));
4982 dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
4983 dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
4984
4985 /*
4986 * Special handling for DMA with size 0. Bypass all validations
4987 * because no transactions will be done except for WR_COMP, which
4988 * is not a security issue
4989 */
4990 if (!le32_to_cpu(user_dma_pkt->tsize)) {
4991 parser->patched_cb_size += sizeof(*user_dma_pkt);
4992 return 0;
4993 }
4994
4995 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
4996 src_in_host = true;
4997
4998 return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
4999 src_in_host);
5000 }
5001
gaudi_validate_load_and_exe_pkt(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_load_and_exe * user_pkt)5002 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
5003 struct hl_cs_parser *parser,
5004 struct packet_load_and_exe *user_pkt)
5005 {
5006 u32 cfg;
5007
5008 cfg = le32_to_cpu(user_pkt->cfg);
5009
5010 if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
5011 dev_err(hdev->dev,
5012 "User not allowed to use Load and Execute\n");
5013 return -EPERM;
5014 }
5015
5016 parser->patched_cb_size += sizeof(struct packet_load_and_exe);
5017
5018 return 0;
5019 }
5020
gaudi_validate_cb(struct hl_device * hdev,struct hl_cs_parser * parser,bool is_mmu)5021 static int gaudi_validate_cb(struct hl_device *hdev,
5022 struct hl_cs_parser *parser, bool is_mmu)
5023 {
5024 u32 cb_parsed_length = 0;
5025 int rc = 0;
5026
5027 parser->patched_cb_size = 0;
5028
5029 /* cb_user_size is more than 0 so loop will always be executed */
5030 while (cb_parsed_length < parser->user_cb_size) {
5031 enum packet_id pkt_id;
5032 u16 pkt_size;
5033 struct gaudi_packet *user_pkt;
5034
5035 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5036
5037 pkt_id = (enum packet_id) (
5038 (le64_to_cpu(user_pkt->header) &
5039 PACKET_HEADER_PACKET_ID_MASK) >>
5040 PACKET_HEADER_PACKET_ID_SHIFT);
5041
5042 if (!validate_packet_id(pkt_id)) {
5043 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5044 rc = -EINVAL;
5045 break;
5046 }
5047
5048 pkt_size = gaudi_packet_sizes[pkt_id];
5049 cb_parsed_length += pkt_size;
5050 if (cb_parsed_length > parser->user_cb_size) {
5051 dev_err(hdev->dev,
5052 "packet 0x%x is out of CB boundary\n", pkt_id);
5053 rc = -EINVAL;
5054 break;
5055 }
5056
5057 switch (pkt_id) {
5058 case PACKET_MSG_PROT:
5059 dev_err(hdev->dev,
5060 "User not allowed to use MSG_PROT\n");
5061 rc = -EPERM;
5062 break;
5063
5064 case PACKET_CP_DMA:
5065 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5066 rc = -EPERM;
5067 break;
5068
5069 case PACKET_STOP:
5070 dev_err(hdev->dev, "User not allowed to use STOP\n");
5071 rc = -EPERM;
5072 break;
5073
5074 case PACKET_WREG_BULK:
5075 dev_err(hdev->dev,
5076 "User not allowed to use WREG_BULK\n");
5077 rc = -EPERM;
5078 break;
5079
5080 case PACKET_LOAD_AND_EXE:
5081 rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
5082 (struct packet_load_and_exe *) user_pkt);
5083 break;
5084
5085 case PACKET_LIN_DMA:
5086 parser->contains_dma_pkt = true;
5087 if (is_mmu)
5088 parser->patched_cb_size += pkt_size;
5089 else
5090 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
5091 (struct packet_lin_dma *) user_pkt);
5092 break;
5093
5094 case PACKET_WREG_32:
5095 case PACKET_MSG_LONG:
5096 case PACKET_MSG_SHORT:
5097 case PACKET_REPEAT:
5098 case PACKET_FENCE:
5099 case PACKET_NOP:
5100 case PACKET_ARB_POINT:
5101 parser->patched_cb_size += pkt_size;
5102 break;
5103
5104 default:
5105 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5106 pkt_id);
5107 rc = -EINVAL;
5108 break;
5109 }
5110
5111 if (rc)
5112 break;
5113 }
5114
5115 /*
5116 * The new CB should have space at the end for two MSG_PROT packets:
5117 * 1. Optional NOP padding for cacheline alignment
5118 * 2. A packet that will act as a completion packet
5119 * 3. A packet that will generate MSI interrupt
5120 */
5121 if (parser->completion)
5122 parser->patched_cb_size += gaudi_get_patched_cb_extra_size(
5123 parser->patched_cb_size);
5124
5125 return rc;
5126 }
5127
gaudi_patch_dma_packet(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt,struct packet_lin_dma * new_dma_pkt,u32 * new_dma_pkt_size)5128 static int gaudi_patch_dma_packet(struct hl_device *hdev,
5129 struct hl_cs_parser *parser,
5130 struct packet_lin_dma *user_dma_pkt,
5131 struct packet_lin_dma *new_dma_pkt,
5132 u32 *new_dma_pkt_size)
5133 {
5134 struct hl_userptr *userptr;
5135 struct scatterlist *sg, *sg_next_iter;
5136 u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
5137 u64 len, len_next;
5138 dma_addr_t dma_addr, dma_addr_next;
5139 u64 device_memory_addr, addr;
5140 enum dma_data_direction dir;
5141 struct sg_table *sgt;
5142 bool src_in_host = false;
5143 bool skip_host_mem_pin = false;
5144 bool user_memset;
5145
5146 ctl = le32_to_cpu(user_dma_pkt->ctl);
5147
5148 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5149 src_in_host = true;
5150
5151 user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5152 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5153
5154 if (src_in_host) {
5155 addr = le64_to_cpu(user_dma_pkt->src_addr);
5156 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
5157 dir = DMA_TO_DEVICE;
5158 if (user_memset)
5159 skip_host_mem_pin = true;
5160 } else {
5161 addr = le64_to_cpu(user_dma_pkt->dst_addr);
5162 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
5163 dir = DMA_FROM_DEVICE;
5164 }
5165
5166 if ((!skip_host_mem_pin) &&
5167 (!hl_userptr_is_pinned(hdev, addr,
5168 le32_to_cpu(user_dma_pkt->tsize),
5169 parser->job_userptr_list, &userptr))) {
5170 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
5171 addr, user_dma_pkt->tsize);
5172 return -EFAULT;
5173 }
5174
5175 if ((user_memset) && (dir == DMA_TO_DEVICE)) {
5176 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
5177 *new_dma_pkt_size = sizeof(*user_dma_pkt);
5178 return 0;
5179 }
5180
5181 user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5182
5183 sgt = userptr->sgt;
5184 dma_desc_cnt = 0;
5185
5186 for_each_sgtable_dma_sg(sgt, sg, count) {
5187 len = sg_dma_len(sg);
5188 dma_addr = sg_dma_address(sg);
5189
5190 if (len == 0)
5191 break;
5192
5193 while ((count + 1) < sgt->nents) {
5194 sg_next_iter = sg_next(sg);
5195 len_next = sg_dma_len(sg_next_iter);
5196 dma_addr_next = sg_dma_address(sg_next_iter);
5197
5198 if (len_next == 0)
5199 break;
5200
5201 if ((dma_addr + len == dma_addr_next) &&
5202 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5203 len += len_next;
5204 count++;
5205 sg = sg_next_iter;
5206 } else {
5207 break;
5208 }
5209 }
5210
5211 ctl = le32_to_cpu(user_dma_pkt->ctl);
5212 if (likely(dma_desc_cnt))
5213 ctl &= ~GAUDI_PKT_CTL_EB_MASK;
5214 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5215 new_dma_pkt->ctl = cpu_to_le32(ctl);
5216 new_dma_pkt->tsize = cpu_to_le32(len);
5217
5218 if (dir == DMA_TO_DEVICE) {
5219 new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
5220 new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
5221 } else {
5222 new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
5223 new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
5224 }
5225
5226 if (!user_memset)
5227 device_memory_addr += len;
5228 dma_desc_cnt++;
5229 new_dma_pkt++;
5230 }
5231
5232 if (!dma_desc_cnt) {
5233 dev_err(hdev->dev,
5234 "Error of 0 SG entries when patching DMA packet\n");
5235 return -EFAULT;
5236 }
5237
5238 /* Fix the last dma packet - wrcomp must be as user set it */
5239 new_dma_pkt--;
5240 new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
5241
5242 *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
5243
5244 return 0;
5245 }
5246
gaudi_patch_cb(struct hl_device * hdev,struct hl_cs_parser * parser)5247 static int gaudi_patch_cb(struct hl_device *hdev,
5248 struct hl_cs_parser *parser)
5249 {
5250 u32 cb_parsed_length = 0;
5251 u32 cb_patched_cur_length = 0;
5252 int rc = 0;
5253
5254 /* cb_user_size is more than 0 so loop will always be executed */
5255 while (cb_parsed_length < parser->user_cb_size) {
5256 enum packet_id pkt_id;
5257 u16 pkt_size;
5258 u32 new_pkt_size = 0;
5259 struct gaudi_packet *user_pkt, *kernel_pkt;
5260
5261 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5262 kernel_pkt = parser->patched_cb->kernel_address +
5263 cb_patched_cur_length;
5264
5265 pkt_id = (enum packet_id) (
5266 (le64_to_cpu(user_pkt->header) &
5267 PACKET_HEADER_PACKET_ID_MASK) >>
5268 PACKET_HEADER_PACKET_ID_SHIFT);
5269
5270 if (!validate_packet_id(pkt_id)) {
5271 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5272 rc = -EINVAL;
5273 break;
5274 }
5275
5276 pkt_size = gaudi_packet_sizes[pkt_id];
5277 cb_parsed_length += pkt_size;
5278 if (cb_parsed_length > parser->user_cb_size) {
5279 dev_err(hdev->dev,
5280 "packet 0x%x is out of CB boundary\n", pkt_id);
5281 rc = -EINVAL;
5282 break;
5283 }
5284
5285 switch (pkt_id) {
5286 case PACKET_LIN_DMA:
5287 rc = gaudi_patch_dma_packet(hdev, parser,
5288 (struct packet_lin_dma *) user_pkt,
5289 (struct packet_lin_dma *) kernel_pkt,
5290 &new_pkt_size);
5291 cb_patched_cur_length += new_pkt_size;
5292 break;
5293
5294 case PACKET_MSG_PROT:
5295 dev_err(hdev->dev,
5296 "User not allowed to use MSG_PROT\n");
5297 rc = -EPERM;
5298 break;
5299
5300 case PACKET_CP_DMA:
5301 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5302 rc = -EPERM;
5303 break;
5304
5305 case PACKET_STOP:
5306 dev_err(hdev->dev, "User not allowed to use STOP\n");
5307 rc = -EPERM;
5308 break;
5309
5310 case PACKET_WREG_32:
5311 case PACKET_WREG_BULK:
5312 case PACKET_MSG_LONG:
5313 case PACKET_MSG_SHORT:
5314 case PACKET_REPEAT:
5315 case PACKET_FENCE:
5316 case PACKET_NOP:
5317 case PACKET_ARB_POINT:
5318 case PACKET_LOAD_AND_EXE:
5319 memcpy(kernel_pkt, user_pkt, pkt_size);
5320 cb_patched_cur_length += pkt_size;
5321 break;
5322
5323 default:
5324 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5325 pkt_id);
5326 rc = -EINVAL;
5327 break;
5328 }
5329
5330 if (rc)
5331 break;
5332 }
5333
5334 return rc;
5335 }
5336
gaudi_parse_cb_mmu(struct hl_device * hdev,struct hl_cs_parser * parser)5337 static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5338 struct hl_cs_parser *parser)
5339 {
5340 u64 handle;
5341 u32 patched_cb_size;
5342 struct hl_cb *user_cb;
5343 int rc;
5344
5345 /*
5346 * The new CB should have space at the end for two MSG_PROT packets:
5347 * 1. Optional NOP padding for cacheline alignment
5348 * 2. A packet that will act as a completion packet
5349 * 3. A packet that will generate MSI interrupt
5350 */
5351 if (parser->completion)
5352 parser->patched_cb_size = parser->user_cb_size +
5353 gaudi_get_patched_cb_extra_size(parser->user_cb_size);
5354 else
5355 parser->patched_cb_size = parser->user_cb_size;
5356
5357 rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
5358 parser->patched_cb_size, false, false,
5359 &handle);
5360
5361 if (rc) {
5362 dev_err(hdev->dev,
5363 "Failed to allocate patched CB for DMA CS %d\n",
5364 rc);
5365 return rc;
5366 }
5367
5368 parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
5369 /* hl_cb_get should never fail */
5370 if (!parser->patched_cb) {
5371 dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
5372 rc = -EFAULT;
5373 goto out;
5374 }
5375
5376 /*
5377 * We are protected from overflow because the check
5378 * "parser->user_cb_size <= parser->user_cb->size" was done in get_cb_from_cs_chunk()
5379 * in the common code. That check is done only if is_kernel_allocated_cb is true.
5380 *
5381 * There is no option to reach here without going through that check because:
5382 * 1. validate_queue_index() assigns true to is_kernel_allocated_cb for any submission to
5383 * an external queue.
5384 * 2. For Gaudi, we only parse CBs that were submitted to the external queues.
5385 */
5386 memcpy(parser->patched_cb->kernel_address,
5387 parser->user_cb->kernel_address,
5388 parser->user_cb_size);
5389
5390 patched_cb_size = parser->patched_cb_size;
5391
5392 /* Validate patched CB instead of user CB */
5393 user_cb = parser->user_cb;
5394 parser->user_cb = parser->patched_cb;
5395 rc = gaudi_validate_cb(hdev, parser, true);
5396 parser->user_cb = user_cb;
5397
5398 if (rc) {
5399 hl_cb_put(parser->patched_cb);
5400 goto out;
5401 }
5402
5403 if (patched_cb_size != parser->patched_cb_size) {
5404 dev_err(hdev->dev, "user CB size mismatch\n");
5405 hl_cb_put(parser->patched_cb);
5406 rc = -EINVAL;
5407 goto out;
5408 }
5409
5410 out:
5411 /*
5412 * Always call cb destroy here because we still have 1 reference
5413 * to it by calling cb_get earlier. After the job will be completed,
5414 * cb_put will release it, but here we want to remove it from the
5415 * idr
5416 */
5417 hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
5418
5419 return rc;
5420 }
5421
gaudi_parse_cb_no_mmu(struct hl_device * hdev,struct hl_cs_parser * parser)5422 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5423 struct hl_cs_parser *parser)
5424 {
5425 u64 handle;
5426 int rc;
5427
5428 rc = gaudi_validate_cb(hdev, parser, false);
5429
5430 if (rc)
5431 goto free_userptr;
5432
5433 rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
5434 parser->patched_cb_size, false, false,
5435 &handle);
5436 if (rc) {
5437 dev_err(hdev->dev,
5438 "Failed to allocate patched CB for DMA CS %d\n", rc);
5439 goto free_userptr;
5440 }
5441
5442 parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
5443 /* hl_cb_get should never fail here */
5444 if (!parser->patched_cb) {
5445 dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
5446 rc = -EFAULT;
5447 goto out;
5448 }
5449
5450 rc = gaudi_patch_cb(hdev, parser);
5451
5452 if (rc)
5453 hl_cb_put(parser->patched_cb);
5454
5455 out:
5456 /*
5457 * Always call cb destroy here because we still have 1 reference
5458 * to it by calling cb_get earlier. After the job will be completed,
5459 * cb_put will release it, but here we want to remove it from the
5460 * idr
5461 */
5462 hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
5463
5464 free_userptr:
5465 if (rc)
5466 hl_userptr_delete_list(hdev, parser->job_userptr_list);
5467 return rc;
5468 }
5469
gaudi_parse_cb_no_ext_queue(struct hl_device * hdev,struct hl_cs_parser * parser)5470 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5471 struct hl_cs_parser *parser)
5472 {
5473 struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5474 struct gaudi_device *gaudi = hdev->asic_specific;
5475 u32 nic_queue_offset, nic_mask_q_id;
5476
5477 if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5478 (parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3)) {
5479 nic_queue_offset = parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0;
5480 nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT + (nic_queue_offset >> 2));
5481
5482 if (!(gaudi->hw_cap_initialized & nic_mask_q_id)) {
5483 dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id);
5484 return -EINVAL;
5485 }
5486 }
5487
5488 /* For internal queue jobs just check if CB address is valid */
5489 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5490 parser->user_cb_size,
5491 asic_prop->sram_user_base_address,
5492 asic_prop->sram_end_address))
5493 return 0;
5494
5495 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5496 parser->user_cb_size,
5497 asic_prop->dram_user_base_address,
5498 asic_prop->dram_end_address))
5499 return 0;
5500
5501 /* PMMU and HPMMU addresses are equal, check only one of them */
5502 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5503 parser->user_cb_size,
5504 asic_prop->pmmu.start_addr,
5505 asic_prop->pmmu.end_addr))
5506 return 0;
5507
5508 dev_err(hdev->dev,
5509 "CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5510 parser->user_cb, parser->user_cb_size);
5511
5512 return -EFAULT;
5513 }
5514
gaudi_cs_parser(struct hl_device * hdev,struct hl_cs_parser * parser)5515 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5516 {
5517 struct gaudi_device *gaudi = hdev->asic_specific;
5518
5519 if (parser->queue_type == QUEUE_TYPE_INT)
5520 return gaudi_parse_cb_no_ext_queue(hdev, parser);
5521
5522 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5523 return gaudi_parse_cb_mmu(hdev, parser);
5524 else
5525 return gaudi_parse_cb_no_mmu(hdev, parser);
5526 }
5527
gaudi_add_end_of_cb_packets(struct hl_device * hdev,void * kernel_address,u32 len,u32 original_len,u64 cq_addr,u32 cq_val,u32 msi_vec,bool eb)5528 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_address,
5529 u32 len, u32 original_len, u64 cq_addr, u32 cq_val,
5530 u32 msi_vec, bool eb)
5531 {
5532 struct packet_msg_prot *cq_pkt;
5533 struct packet_nop *cq_padding;
5534 u64 msi_addr;
5535 u32 tmp;
5536
5537 cq_padding = kernel_address + original_len;
5538 cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
5539
5540 while ((void *)cq_padding < (void *)cq_pkt) {
5541 cq_padding->ctl = cpu_to_le32(FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_NOP));
5542 cq_padding++;
5543 }
5544
5545 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5546 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5547
5548 if (eb)
5549 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5550
5551 cq_pkt->ctl = cpu_to_le32(tmp);
5552 cq_pkt->value = cpu_to_le32(cq_val);
5553 cq_pkt->addr = cpu_to_le64(cq_addr);
5554
5555 cq_pkt++;
5556
5557 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5558 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5559 cq_pkt->ctl = cpu_to_le32(tmp);
5560 cq_pkt->value = cpu_to_le32(1);
5561 msi_addr = hdev->pdev ? mmPCIE_CORE_MSI_REQ : mmPCIE_MSI_INTR_0 + msi_vec * 4;
5562 cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr);
5563 }
5564
gaudi_update_eq_ci(struct hl_device * hdev,u32 val)5565 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5566 {
5567 WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5568 }
5569
gaudi_memset_device_memory(struct hl_device * hdev,u64 addr,u32 size,u64 val)5570 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5571 u32 size, u64 val)
5572 {
5573 struct packet_lin_dma *lin_dma_pkt;
5574 struct hl_cs_job *job;
5575 u32 cb_size, ctl, err_cause;
5576 struct hl_cb *cb;
5577 int rc;
5578
5579 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
5580 if (!cb)
5581 return -EFAULT;
5582
5583 lin_dma_pkt = cb->kernel_address;
5584 memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5585 cb_size = sizeof(*lin_dma_pkt);
5586
5587 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5588 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5589 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5590 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5591 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5592
5593 lin_dma_pkt->ctl = cpu_to_le32(ctl);
5594 lin_dma_pkt->src_addr = cpu_to_le64(val);
5595 lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5596 lin_dma_pkt->tsize = cpu_to_le32(size);
5597
5598 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5599 if (!job) {
5600 dev_err(hdev->dev, "Failed to allocate a new job\n");
5601 rc = -ENOMEM;
5602 goto release_cb;
5603 }
5604
5605 /* Verify DMA is OK */
5606 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5607 if (err_cause && !hdev->init_done) {
5608 dev_dbg(hdev->dev,
5609 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5610 err_cause);
5611 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5612 }
5613
5614 job->id = 0;
5615 job->user_cb = cb;
5616 atomic_inc(&job->user_cb->cs_cnt);
5617 job->user_cb_size = cb_size;
5618 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5619 job->patched_cb = job->user_cb;
5620 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5621
5622 hl_debugfs_add_job(hdev, job);
5623
5624 rc = gaudi_send_job_on_qman0(hdev, job);
5625 hl_debugfs_remove_job(hdev, job);
5626 kfree(job);
5627 atomic_dec(&cb->cs_cnt);
5628
5629 /* Verify DMA is OK */
5630 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5631 if (err_cause) {
5632 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5633 rc = -EIO;
5634 if (!hdev->init_done) {
5635 dev_dbg(hdev->dev,
5636 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5637 err_cause);
5638 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5639 }
5640 }
5641
5642 release_cb:
5643 hl_cb_put(cb);
5644 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
5645
5646 return rc;
5647 }
5648
gaudi_memset_registers(struct hl_device * hdev,u64 reg_base,u32 num_regs,u32 val)5649 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
5650 u32 num_regs, u32 val)
5651 {
5652 struct packet_msg_long *pkt;
5653 struct hl_cs_job *job;
5654 u32 cb_size, ctl;
5655 struct hl_cb *cb;
5656 int i, rc;
5657
5658 cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
5659
5660 if (cb_size > SZ_2M) {
5661 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5662 return -ENOMEM;
5663 }
5664
5665 cb = hl_cb_kernel_create(hdev, cb_size, false);
5666 if (!cb)
5667 return -EFAULT;
5668
5669 pkt = cb->kernel_address;
5670
5671 ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5672 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5673 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5674 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5675 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5676
5677 for (i = 0; i < num_regs ; i++, pkt++) {
5678 pkt->ctl = cpu_to_le32(ctl);
5679 pkt->value = cpu_to_le32(val);
5680 pkt->addr = cpu_to_le64(reg_base + (i * 4));
5681 }
5682
5683 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5684 if (!job) {
5685 dev_err(hdev->dev, "Failed to allocate a new job\n");
5686 rc = -ENOMEM;
5687 goto release_cb;
5688 }
5689
5690 job->id = 0;
5691 job->user_cb = cb;
5692 atomic_inc(&job->user_cb->cs_cnt);
5693 job->user_cb_size = cb_size;
5694 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5695 job->patched_cb = job->user_cb;
5696 job->job_cb_size = cb_size;
5697
5698 hl_debugfs_add_job(hdev, job);
5699
5700 rc = gaudi_send_job_on_qman0(hdev, job);
5701 hl_debugfs_remove_job(hdev, job);
5702 kfree(job);
5703 atomic_dec(&cb->cs_cnt);
5704
5705 release_cb:
5706 hl_cb_put(cb);
5707 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
5708
5709 return rc;
5710 }
5711
gaudi_restore_sm_registers(struct hl_device * hdev)5712 static int gaudi_restore_sm_registers(struct hl_device *hdev)
5713 {
5714 u64 base_addr;
5715 u32 num_regs;
5716 int rc;
5717
5718 base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5719 num_regs = NUM_OF_SOB_IN_BLOCK;
5720 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5721 if (rc) {
5722 dev_err(hdev->dev, "failed resetting SM registers");
5723 return -ENOMEM;
5724 }
5725
5726 base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
5727 num_regs = NUM_OF_SOB_IN_BLOCK;
5728 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5729 if (rc) {
5730 dev_err(hdev->dev, "failed resetting SM registers");
5731 return -ENOMEM;
5732 }
5733
5734 base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5735 num_regs = NUM_OF_SOB_IN_BLOCK;
5736 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5737 if (rc) {
5738 dev_err(hdev->dev, "failed resetting SM registers");
5739 return -ENOMEM;
5740 }
5741
5742 base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5743 num_regs = NUM_OF_MONITORS_IN_BLOCK;
5744 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5745 if (rc) {
5746 dev_err(hdev->dev, "failed resetting SM registers");
5747 return -ENOMEM;
5748 }
5749
5750 base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
5751 num_regs = NUM_OF_MONITORS_IN_BLOCK;
5752 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5753 if (rc) {
5754 dev_err(hdev->dev, "failed resetting SM registers");
5755 return -ENOMEM;
5756 }
5757
5758 base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5759 num_regs = NUM_OF_MONITORS_IN_BLOCK;
5760 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5761 if (rc) {
5762 dev_err(hdev->dev, "failed resetting SM registers");
5763 return -ENOMEM;
5764 }
5765
5766 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5767 (GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
5768 num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
5769 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5770 if (rc) {
5771 dev_err(hdev->dev, "failed resetting SM registers");
5772 return -ENOMEM;
5773 }
5774
5775 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
5776 (GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
5777 num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
5778 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5779 if (rc) {
5780 dev_err(hdev->dev, "failed resetting SM registers");
5781 return -ENOMEM;
5782 }
5783
5784 return 0;
5785 }
5786
gaudi_restore_dma_registers(struct hl_device * hdev)5787 static void gaudi_restore_dma_registers(struct hl_device *hdev)
5788 {
5789 u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
5790 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5791 int i;
5792
5793 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5794 u64 sob_addr = CFG_BASE +
5795 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5796 (i * sob_delta);
5797 u32 dma_offset = i * DMA_CORE_OFFSET;
5798
5799 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
5800 lower_32_bits(sob_addr));
5801 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
5802 upper_32_bits(sob_addr));
5803 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
5804
5805 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
5806 * modified by the user for SRAM reduction
5807 */
5808 if (i > 1)
5809 WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
5810 0x00000001);
5811 }
5812 }
5813
gaudi_restore_qm_registers(struct hl_device * hdev)5814 static void gaudi_restore_qm_registers(struct hl_device *hdev)
5815 {
5816 u32 qman_offset;
5817 int i;
5818
5819 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5820 qman_offset = i * DMA_QMAN_OFFSET;
5821 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
5822 }
5823
5824 for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
5825 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
5826 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
5827 }
5828
5829 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
5830 qman_offset = i * TPC_QMAN_OFFSET;
5831 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
5832 }
5833
5834 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
5835 qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
5836 (i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
5837 WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
5838 }
5839 }
5840
gaudi_restore_user_registers(struct hl_device * hdev)5841 static int gaudi_restore_user_registers(struct hl_device *hdev)
5842 {
5843 int rc;
5844
5845 rc = gaudi_restore_sm_registers(hdev);
5846 if (rc)
5847 return rc;
5848
5849 gaudi_restore_dma_registers(hdev);
5850 gaudi_restore_qm_registers(hdev);
5851
5852 return 0;
5853 }
5854
gaudi_context_switch(struct hl_device * hdev,u32 asid)5855 static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
5856 {
5857 return 0;
5858 }
5859
gaudi_mmu_clear_pgt_range(struct hl_device * hdev)5860 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
5861 {
5862 u32 size = hdev->asic_prop.mmu_pgt_size +
5863 hdev->asic_prop.mmu_cache_mng_size;
5864 struct gaudi_device *gaudi = hdev->asic_specific;
5865 u64 addr = hdev->asic_prop.mmu_pgt_addr;
5866
5867 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
5868 return 0;
5869
5870 return gaudi_memset_device_memory(hdev, addr, size, 0);
5871 }
5872
gaudi_restore_phase_topology(struct hl_device * hdev)5873 static void gaudi_restore_phase_topology(struct hl_device *hdev)
5874 {
5875
5876 }
5877
gaudi_dma_core_transfer(struct hl_device * hdev,int dma_id,u64 addr,u32 size_to_dma,dma_addr_t dma_addr)5878 static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,
5879 u32 size_to_dma, dma_addr_t dma_addr)
5880 {
5881 u32 err_cause, val;
5882 u64 dma_offset;
5883 int rc;
5884
5885 dma_offset = dma_id * DMA_CORE_OFFSET;
5886
5887 WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr));
5888 WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr));
5889 WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr));
5890 WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr));
5891 WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma);
5892 WREG32(mmDMA0_CORE_COMMIT + dma_offset,
5893 (1 << DMA0_CORE_COMMIT_LIN_SHIFT));
5894
5895 rc = hl_poll_timeout(
5896 hdev,
5897 mmDMA0_CORE_STS0 + dma_offset,
5898 val,
5899 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
5900 0,
5901 1000000);
5902
5903 if (rc) {
5904 dev_err(hdev->dev,
5905 "DMA %d timed-out during reading of 0x%llx\n",
5906 dma_id, addr);
5907 return -EIO;
5908 }
5909
5910 /* Verify DMA is OK */
5911 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
5912 if (err_cause) {
5913 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5914 dev_dbg(hdev->dev,
5915 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5916 err_cause);
5917 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
5918
5919 return -EIO;
5920 }
5921
5922 return 0;
5923 }
5924
gaudi_debugfs_read_dma(struct hl_device * hdev,u64 addr,u32 size,void * blob_addr)5925 static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
5926 void *blob_addr)
5927 {
5928 u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
5929 u32 qm_glbl_sts0, qm_cgm_sts;
5930 u64 dma_offset, qm_offset;
5931 dma_addr_t dma_addr;
5932 void *kernel_addr;
5933 bool is_eng_idle;
5934 int rc = 0, dma_id;
5935
5936 kernel_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &dma_addr, GFP_KERNEL | __GFP_ZERO);
5937
5938 if (!kernel_addr)
5939 return -ENOMEM;
5940
5941 hdev->asic_funcs->hw_queues_lock(hdev);
5942
5943 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
5944 dma_offset = dma_id * DMA_CORE_OFFSET;
5945 qm_offset = dma_id * DMA_QMAN_OFFSET;
5946 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
5947 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
5948 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
5949 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
5950 IS_DMA_IDLE(dma_core_sts0);
5951
5952 if (!is_eng_idle) {
5953 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
5954 dma_offset = dma_id * DMA_CORE_OFFSET;
5955 qm_offset = dma_id * DMA_QMAN_OFFSET;
5956 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
5957 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
5958 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
5959 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
5960 IS_DMA_IDLE(dma_core_sts0);
5961
5962 if (!is_eng_idle) {
5963 dev_err_ratelimited(hdev->dev,
5964 "Can't read via DMA because it is BUSY\n");
5965 rc = -EAGAIN;
5966 goto out;
5967 }
5968 }
5969
5970 cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset);
5971 WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset,
5972 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
5973
5974 /* TODO: remove this by mapping the DMA temporary buffer to the MMU
5975 * using the compute ctx ASID, if exists. If not, use the kernel ctx
5976 * ASID
5977 */
5978 WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
5979
5980 /* Verify DMA is OK */
5981 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
5982 if (err_cause) {
5983 dev_dbg(hdev->dev,
5984 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5985 err_cause);
5986 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
5987 }
5988
5989 pos = 0;
5990 size_left = size;
5991 size_to_dma = SZ_2M;
5992
5993 while (size_left > 0) {
5994
5995 if (size_left < SZ_2M)
5996 size_to_dma = size_left;
5997
5998 rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma,
5999 dma_addr);
6000 if (rc)
6001 break;
6002
6003 memcpy(blob_addr + pos, kernel_addr, size_to_dma);
6004
6005 if (size_left <= SZ_2M)
6006 break;
6007
6008 pos += SZ_2M;
6009 addr += SZ_2M;
6010 size_left -= SZ_2M;
6011 }
6012
6013 /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6014 * using the compute ctx ASID, if exists. If not, use the kernel ctx
6015 * ASID
6016 */
6017 WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6018 ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6019
6020 WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1);
6021
6022 out:
6023 hdev->asic_funcs->hw_queues_unlock(hdev);
6024
6025 hl_asic_dma_free_coherent(hdev, SZ_2M, kernel_addr, dma_addr);
6026
6027 return rc;
6028 }
6029
gaudi_read_pte(struct hl_device * hdev,u64 addr)6030 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
6031 {
6032 struct gaudi_device *gaudi = hdev->asic_specific;
6033
6034 if (hdev->reset_info.hard_reset_pending)
6035 return U64_MAX;
6036
6037 return readq(hdev->pcie_bar[HBM_BAR_ID] +
6038 (addr - gaudi->hbm_bar_cur_addr));
6039 }
6040
gaudi_write_pte(struct hl_device * hdev,u64 addr,u64 val)6041 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
6042 {
6043 struct gaudi_device *gaudi = hdev->asic_specific;
6044
6045 if (hdev->reset_info.hard_reset_pending)
6046 return;
6047
6048 writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6049 (addr - gaudi->hbm_bar_cur_addr));
6050 }
6051
gaudi_mmu_prepare_reg(struct hl_device * hdev,u64 reg,u32 asid)6052 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
6053 {
6054 /* mask to zero the MMBP and ASID bits */
6055 WREG32_AND(reg, ~0x7FF);
6056 WREG32_OR(reg, asid);
6057 }
6058
gaudi_mmu_prepare(struct hl_device * hdev,u32 asid)6059 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
6060 {
6061 struct gaudi_device *gaudi = hdev->asic_specific;
6062
6063 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6064 return;
6065
6066 if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
6067 dev_crit(hdev->dev, "asid %u is too big\n", asid);
6068 return;
6069 }
6070
6071 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6072 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6073 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6074 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6075 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6076
6077 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6078 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6079 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6080 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6081 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6082
6083 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6084 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6085 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6086 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6087 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6088
6089 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6090 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6091 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6092 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6093 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6094
6095 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6096 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6097 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6098 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6099 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6100
6101 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6102 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6103 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6104 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6105 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6106
6107 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6108 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6109 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6110 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6111 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6112
6113 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6114 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6115 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6116 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6117 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6118
6119 gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
6120 gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
6121 gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
6122 gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
6123 gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
6124 gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
6125 gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
6126 gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
6127
6128 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6129 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6130 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6131 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6132 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6133 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
6134 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
6135
6136 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6137 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6138 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6139 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6140 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6141 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
6142 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
6143
6144 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6145 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6146 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6147 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6148 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6149 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
6150 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
6151
6152 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6153 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6154 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6155 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6156 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6157 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
6158 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
6159
6160 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6161 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6162 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6163 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6164 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6165 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
6166 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
6167
6168 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6169 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6170 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6171 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6172 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6173 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
6174 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
6175
6176 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6177 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6178 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6179 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6180 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6181 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
6182 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
6183
6184 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6185 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6186 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6187 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6188 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6189 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
6190 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
6191
6192 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6193 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6194 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6195 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6196 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6197 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6198 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6199 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6200 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6201 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6202
6203 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
6204 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
6205 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
6206 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
6207 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
6208 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
6209 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
6210 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
6211 gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
6212 gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
6213 gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
6214 gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
6215
6216 if (gaudi->hw_cap_initialized & HW_CAP_NIC0) {
6217 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
6218 asid);
6219 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
6220 asid);
6221 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
6222 asid);
6223 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
6224 asid);
6225 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
6226 asid);
6227 }
6228
6229 if (gaudi->hw_cap_initialized & HW_CAP_NIC1) {
6230 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
6231 asid);
6232 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
6233 asid);
6234 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
6235 asid);
6236 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
6237 asid);
6238 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
6239 asid);
6240 }
6241
6242 if (gaudi->hw_cap_initialized & HW_CAP_NIC2) {
6243 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
6244 asid);
6245 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
6246 asid);
6247 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
6248 asid);
6249 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
6250 asid);
6251 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
6252 asid);
6253 }
6254
6255 if (gaudi->hw_cap_initialized & HW_CAP_NIC3) {
6256 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
6257 asid);
6258 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
6259 asid);
6260 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
6261 asid);
6262 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
6263 asid);
6264 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
6265 asid);
6266 }
6267
6268 if (gaudi->hw_cap_initialized & HW_CAP_NIC4) {
6269 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
6270 asid);
6271 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
6272 asid);
6273 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
6274 asid);
6275 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
6276 asid);
6277 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
6278 asid);
6279 }
6280
6281 if (gaudi->hw_cap_initialized & HW_CAP_NIC5) {
6282 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
6283 asid);
6284 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
6285 asid);
6286 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
6287 asid);
6288 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
6289 asid);
6290 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
6291 asid);
6292 }
6293
6294 if (gaudi->hw_cap_initialized & HW_CAP_NIC6) {
6295 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
6296 asid);
6297 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
6298 asid);
6299 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
6300 asid);
6301 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
6302 asid);
6303 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
6304 asid);
6305 }
6306
6307 if (gaudi->hw_cap_initialized & HW_CAP_NIC7) {
6308 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
6309 asid);
6310 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
6311 asid);
6312 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
6313 asid);
6314 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
6315 asid);
6316 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
6317 asid);
6318 }
6319
6320 if (gaudi->hw_cap_initialized & HW_CAP_NIC8) {
6321 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
6322 asid);
6323 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
6324 asid);
6325 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
6326 asid);
6327 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
6328 asid);
6329 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
6330 asid);
6331 }
6332
6333 if (gaudi->hw_cap_initialized & HW_CAP_NIC9) {
6334 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
6335 asid);
6336 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
6337 asid);
6338 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
6339 asid);
6340 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
6341 asid);
6342 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
6343 asid);
6344 }
6345
6346 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
6347 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
6348 }
6349
gaudi_send_job_on_qman0(struct hl_device * hdev,struct hl_cs_job * job)6350 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6351 struct hl_cs_job *job)
6352 {
6353 struct packet_msg_prot *fence_pkt;
6354 u32 *fence_ptr;
6355 dma_addr_t fence_dma_addr;
6356 struct hl_cb *cb;
6357 u32 tmp, timeout, dma_offset;
6358 int rc;
6359
6360 if (hdev->pldm)
6361 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6362 else
6363 timeout = HL_DEVICE_TIMEOUT_USEC;
6364
6365 fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
6366 if (!fence_ptr) {
6367 dev_err(hdev->dev,
6368 "Failed to allocate fence memory for QMAN0\n");
6369 return -ENOMEM;
6370 }
6371
6372 cb = job->patched_cb;
6373
6374 fence_pkt = cb->kernel_address +
6375 job->job_cb_size - sizeof(struct packet_msg_prot);
6376
6377 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6378 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6379 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6380
6381 fence_pkt->ctl = cpu_to_le32(tmp);
6382 fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6383 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6384
6385 dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6386
6387 WREG32(mmDMA0_CORE_PROT + dma_offset,
6388 BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT) | BIT(DMA0_CORE_PROT_VAL_SHIFT));
6389
6390 rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
6391 job->job_cb_size, cb->bus_address);
6392 if (rc) {
6393 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6394 goto free_fence_ptr;
6395 }
6396
6397 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6398 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6399 timeout, true);
6400
6401 hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
6402
6403 if (rc == -ETIMEDOUT) {
6404 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6405 goto free_fence_ptr;
6406 }
6407
6408 free_fence_ptr:
6409 WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT));
6410
6411 hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
6412 return rc;
6413 }
6414
gaudi_get_event_desc(u16 event_type,char * desc,size_t size)6415 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6416 {
6417 if (event_type >= GAUDI_EVENT_SIZE)
6418 goto event_not_supported;
6419
6420 if (!gaudi_irq_map_table[event_type].valid)
6421 goto event_not_supported;
6422
6423 snprintf(desc, size, gaudi_irq_map_table[event_type].name);
6424
6425 return;
6426
6427 event_not_supported:
6428 snprintf(desc, size, "N/A");
6429 }
6430
gaudi_get_razwi_initiator_dma_name(struct hl_device * hdev,u32 x_y,bool is_write,u16 * engine_id_1,u16 * engine_id_2)6431 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev, u32 x_y,
6432 bool is_write, u16 *engine_id_1,
6433 u16 *engine_id_2)
6434 {
6435 u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6436
6437 mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6438 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6439
6440 switch (x_y) {
6441 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6442 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6443 dma_id[0] = 0;
6444 dma_id[1] = 2;
6445 break;
6446 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6447 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6448 dma_id[0] = 1;
6449 dma_id[1] = 3;
6450 break;
6451 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6452 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6453 dma_id[0] = 4;
6454 dma_id[1] = 6;
6455 break;
6456 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6457 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6458 dma_id[0] = 5;
6459 dma_id[1] = 7;
6460 break;
6461 default:
6462 goto unknown_initiator;
6463 }
6464
6465 for (i = 0 ; i < 2 ; i++) {
6466 dma_offset = dma_id[i] * DMA_CORE_OFFSET;
6467 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6468 }
6469
6470 switch (x_y) {
6471 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6472 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6473 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6474 *engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6475 return "DMA0";
6476 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6477 *engine_id_1 = GAUDI_ENGINE_ID_DMA_2;
6478 return "DMA2";
6479 } else {
6480 *engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6481 *engine_id_2 = GAUDI_ENGINE_ID_DMA_2;
6482 return "DMA0 or DMA2";
6483 }
6484 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6485 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6486 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6487 *engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6488 return "DMA1";
6489 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6490 *engine_id_1 = GAUDI_ENGINE_ID_DMA_3;
6491 return "DMA3";
6492 } else {
6493 *engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6494 *engine_id_2 = GAUDI_ENGINE_ID_DMA_3;
6495 return "DMA1 or DMA3";
6496 }
6497 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6498 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6499 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6500 *engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6501 return "DMA4";
6502 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6503 *engine_id_1 = GAUDI_ENGINE_ID_DMA_6;
6504 return "DMA6";
6505 } else {
6506 *engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6507 *engine_id_2 = GAUDI_ENGINE_ID_DMA_6;
6508 return "DMA4 or DMA6";
6509 }
6510 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6511 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6512 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6513 *engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6514 return "DMA5";
6515 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6516 *engine_id_1 = GAUDI_ENGINE_ID_DMA_7;
6517 return "DMA7";
6518 } else {
6519 *engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6520 *engine_id_2 = GAUDI_ENGINE_ID_DMA_7;
6521 return "DMA5 or DMA7";
6522 }
6523 }
6524
6525 unknown_initiator:
6526 return "unknown initiator";
6527 }
6528
gaudi_get_razwi_initiator_name(struct hl_device * hdev,bool is_write,u16 * engine_id_1,u16 * engine_id_2)6529 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev, bool is_write,
6530 u16 *engine_id_1, u16 *engine_id_2)
6531 {
6532 u32 val, x_y, axi_id;
6533
6534 val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
6535 RREG32(mmMMU_UP_RAZWI_READ_ID);
6536 x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
6537 (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
6538 axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
6539 RAZWI_INITIATOR_AXI_ID_SHIFT);
6540
6541 switch (x_y) {
6542 case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
6543 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6544 *engine_id_1 = GAUDI_ENGINE_ID_TPC_0;
6545 return "TPC0";
6546 }
6547 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6548 *engine_id_1 = GAUDI_ENGINE_ID_NIC_0;
6549 return "NIC0";
6550 }
6551 break;
6552 case RAZWI_INITIATOR_ID_X_Y_TPC1:
6553 *engine_id_1 = GAUDI_ENGINE_ID_TPC_1;
6554 return "TPC1";
6555 case RAZWI_INITIATOR_ID_X_Y_MME0_0:
6556 case RAZWI_INITIATOR_ID_X_Y_MME0_1:
6557 *engine_id_1 = GAUDI_ENGINE_ID_MME_0;
6558 return "MME0";
6559 case RAZWI_INITIATOR_ID_X_Y_MME1_0:
6560 case RAZWI_INITIATOR_ID_X_Y_MME1_1:
6561 *engine_id_1 = GAUDI_ENGINE_ID_MME_1;
6562 return "MME1";
6563 case RAZWI_INITIATOR_ID_X_Y_TPC2:
6564 *engine_id_1 = GAUDI_ENGINE_ID_TPC_2;
6565 return "TPC2";
6566 case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
6567 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6568 *engine_id_1 = GAUDI_ENGINE_ID_TPC_3;
6569 return "TPC3";
6570 }
6571 /* PCI, CPU or PSOC does not have engine id*/
6572 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
6573 return "PCI";
6574 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
6575 return "CPU";
6576 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
6577 return "PSOC";
6578 break;
6579 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6580 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6581 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6582 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6583 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6584 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6585 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6586 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6587 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write,
6588 engine_id_1, engine_id_2);
6589 case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
6590 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6591 *engine_id_1 = GAUDI_ENGINE_ID_TPC_4;
6592 return "TPC4";
6593 }
6594 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6595 *engine_id_1 = GAUDI_ENGINE_ID_NIC_1;
6596 return "NIC1";
6597 }
6598 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
6599 *engine_id_1 = GAUDI_ENGINE_ID_NIC_2;
6600 return "NIC2";
6601 }
6602 break;
6603 case RAZWI_INITIATOR_ID_X_Y_TPC5:
6604 *engine_id_1 = GAUDI_ENGINE_ID_TPC_5;
6605 return "TPC5";
6606 case RAZWI_INITIATOR_ID_X_Y_MME2_0:
6607 case RAZWI_INITIATOR_ID_X_Y_MME2_1:
6608 *engine_id_1 = GAUDI_ENGINE_ID_MME_2;
6609 return "MME2";
6610 case RAZWI_INITIATOR_ID_X_Y_MME3_0:
6611 case RAZWI_INITIATOR_ID_X_Y_MME3_1:
6612 *engine_id_1 = GAUDI_ENGINE_ID_MME_3;
6613 return "MME3";
6614 case RAZWI_INITIATOR_ID_X_Y_TPC6:
6615 *engine_id_1 = GAUDI_ENGINE_ID_TPC_6;
6616 return "TPC6";
6617 case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
6618 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6619 *engine_id_1 = GAUDI_ENGINE_ID_TPC_7;
6620 return "TPC7";
6621 }
6622 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6623 *engine_id_1 = GAUDI_ENGINE_ID_NIC_4;
6624 return "NIC4";
6625 }
6626 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
6627 *engine_id_1 = GAUDI_ENGINE_ID_NIC_5;
6628 return "NIC5";
6629 }
6630 break;
6631 default:
6632 break;
6633 }
6634
6635 dev_err(hdev->dev,
6636 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
6637 val,
6638 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
6639 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
6640 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
6641 RAZWI_INITIATOR_AXI_ID_MASK);
6642
6643 return "unknown initiator";
6644 }
6645
gaudi_print_and_get_razwi_info(struct hl_device * hdev,u16 * engine_id_1,u16 * engine_id_2,bool * is_read,bool * is_write)6646 static void gaudi_print_and_get_razwi_info(struct hl_device *hdev, u16 *engine_id_1,
6647 u16 *engine_id_2, bool *is_read, bool *is_write)
6648 {
6649
6650 if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
6651 dev_err_ratelimited(hdev->dev,
6652 "RAZWI event caused by illegal write of %s\n",
6653 gaudi_get_razwi_initiator_name(hdev, true, engine_id_1, engine_id_2));
6654 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
6655 *is_write = true;
6656 }
6657
6658 if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
6659 dev_err_ratelimited(hdev->dev,
6660 "RAZWI event caused by illegal read of %s\n",
6661 gaudi_get_razwi_initiator_name(hdev, false, engine_id_1, engine_id_2));
6662 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
6663 *is_read = true;
6664 }
6665 }
6666
gaudi_print_and_get_mmu_error_info(struct hl_device * hdev,u64 * addr,u64 * event_mask)6667 static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr, u64 *event_mask)
6668 {
6669 struct gaudi_device *gaudi = hdev->asic_specific;
6670 u32 val;
6671
6672 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6673 return;
6674
6675 val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
6676 if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6677 *addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
6678 *addr <<= 32;
6679 *addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
6680
6681 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", *addr);
6682 hl_handle_page_fault(hdev, *addr, 0, true, event_mask);
6683
6684 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
6685 }
6686
6687 val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
6688 if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6689 *addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
6690 *addr <<= 32;
6691 *addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
6692
6693 dev_err_ratelimited(hdev->dev, "MMU access error on va 0x%llx\n", *addr);
6694
6695 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
6696 }
6697 }
6698
6699 /*
6700 * +-------------------+------------------------------------------------------+
6701 * | Configuration Reg | Description |
6702 * | Address | |
6703 * +-------------------+------------------------------------------------------+
6704 * | 0xF30 - 0xF3F |ECC single error indication (1 bit per memory wrapper)|
6705 * | |0xF30 memory wrappers 31:0 (MSB to LSB) |
6706 * | |0xF34 memory wrappers 63:32 |
6707 * | |0xF38 memory wrappers 95:64 |
6708 * | |0xF3C memory wrappers 127:96 |
6709 * +-------------------+------------------------------------------------------+
6710 * | 0xF40 - 0xF4F |ECC double error indication (1 bit per memory wrapper)|
6711 * | |0xF40 memory wrappers 31:0 (MSB to LSB) |
6712 * | |0xF44 memory wrappers 63:32 |
6713 * | |0xF48 memory wrappers 95:64 |
6714 * | |0xF4C memory wrappers 127:96 |
6715 * +-------------------+------------------------------------------------------+
6716 */
gaudi_extract_ecc_info(struct hl_device * hdev,struct ecc_info_extract_params * params,u64 * ecc_address,u64 * ecc_syndrom,u8 * memory_wrapper_idx)6717 static int gaudi_extract_ecc_info(struct hl_device *hdev,
6718 struct ecc_info_extract_params *params, u64 *ecc_address,
6719 u64 *ecc_syndrom, u8 *memory_wrapper_idx)
6720 {
6721 u32 i, num_mem_regs, reg, err_bit;
6722 u64 err_addr, err_word = 0;
6723
6724 num_mem_regs = params->num_memories / 32 +
6725 ((params->num_memories % 32) ? 1 : 0);
6726
6727 if (params->block_address >= CFG_BASE)
6728 params->block_address -= CFG_BASE;
6729
6730 if (params->derr)
6731 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
6732 else
6733 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
6734
6735 /* Set invalid wrapper index */
6736 *memory_wrapper_idx = 0xFF;
6737
6738 /* Iterate through memory wrappers, a single bit must be set */
6739 for (i = 0 ; i < num_mem_regs ; i++) {
6740 err_addr += i * 4;
6741 err_word = RREG32(err_addr);
6742 if (err_word) {
6743 err_bit = __ffs(err_word);
6744 *memory_wrapper_idx = err_bit + (32 * i);
6745 break;
6746 }
6747 }
6748
6749 if (*memory_wrapper_idx == 0xFF) {
6750 dev_err(hdev->dev, "ECC error information cannot be found\n");
6751 return -EINVAL;
6752 }
6753
6754 WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
6755 *memory_wrapper_idx);
6756
6757 *ecc_address =
6758 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
6759 *ecc_syndrom =
6760 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
6761
6762 /* Clear error indication */
6763 reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
6764 if (params->derr)
6765 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
6766 else
6767 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
6768
6769 WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
6770
6771 return 0;
6772 }
6773
6774 /*
6775 * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
6776 *
6777 * @idx: the current pi/ci value
6778 * @q_len: the queue length (power of 2)
6779 *
6780 * @return the cyclically decremented index
6781 */
gaudi_queue_idx_dec(u32 idx,u32 q_len)6782 static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len)
6783 {
6784 u32 mask = q_len - 1;
6785
6786 /*
6787 * modular decrement is equivalent to adding (queue_size -1)
6788 * later we take LSBs to make sure the value is in the
6789 * range [0, queue_len - 1]
6790 */
6791 return (idx + q_len - 1) & mask;
6792 }
6793
6794 /**
6795 * gaudi_handle_sw_config_stream_data - print SW config stream data
6796 *
6797 * @hdev: pointer to the habanalabs device structure
6798 * @stream: the QMAN's stream
6799 * @qman_base: base address of QMAN registers block
6800 * @event_mask: mask of the last events occurred
6801 */
gaudi_handle_sw_config_stream_data(struct hl_device * hdev,u32 stream,u64 qman_base,u64 event_mask)6802 static void gaudi_handle_sw_config_stream_data(struct hl_device *hdev, u32 stream,
6803 u64 qman_base, u64 event_mask)
6804 {
6805 u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
6806 u32 cq_ptr_lo_off, size;
6807
6808 cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0;
6809
6810 cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) +
6811 stream * cq_ptr_lo_off;
6812 cq_ptr_hi = cq_ptr_lo +
6813 (mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0);
6814 cq_tsize = cq_ptr_lo +
6815 (mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0);
6816
6817 cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
6818 size = RREG32(cq_tsize);
6819 dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %u\n",
6820 stream, cq_ptr, size);
6821
6822 if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
6823 hdev->captured_err_info.undef_opcode.cq_addr = cq_ptr;
6824 hdev->captured_err_info.undef_opcode.cq_size = size;
6825 hdev->captured_err_info.undef_opcode.stream_id = stream;
6826 }
6827 }
6828
6829 /**
6830 * gaudi_handle_last_pqes_on_err - print last PQEs on error
6831 *
6832 * @hdev: pointer to the habanalabs device structure
6833 * @qid_base: first QID of the QMAN (out of 4 streams)
6834 * @stream: the QMAN's stream
6835 * @qman_base: base address of QMAN registers block
6836 * @event_mask: mask of the last events occurred
6837 * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
6838 */
gaudi_handle_last_pqes_on_err(struct hl_device * hdev,u32 qid_base,u32 stream,u64 qman_base,u64 event_mask,bool pr_sw_conf)6839 static void gaudi_handle_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
6840 u32 stream, u64 qman_base,
6841 u64 event_mask,
6842 bool pr_sw_conf)
6843 {
6844 u32 ci, qm_ci_stream_off, queue_len;
6845 struct hl_hw_queue *q;
6846 u64 pq_ci, addr[PQ_FETCHER_CACHE_SIZE];
6847 int i;
6848
6849 q = &hdev->kernel_queues[qid_base + stream];
6850
6851 qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0;
6852 pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) +
6853 stream * qm_ci_stream_off;
6854
6855 queue_len = (q->queue_type == QUEUE_TYPE_INT) ?
6856 q->int_queue_len : HL_QUEUE_LENGTH;
6857
6858 hdev->asic_funcs->hw_queues_lock(hdev);
6859
6860 if (pr_sw_conf)
6861 gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask);
6862
6863 ci = RREG32(pq_ci);
6864
6865 /* we should start printing form ci -1 */
6866 ci = gaudi_queue_idx_dec(ci, queue_len);
6867 memset(addr, 0, sizeof(addr));
6868
6869 for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
6870 struct hl_bd *bd;
6871 u32 len;
6872
6873 bd = q->kernel_address;
6874 bd += ci;
6875
6876 len = le32_to_cpu(bd->len);
6877 /* len 0 means uninitialized entry- break */
6878 if (!len)
6879 break;
6880
6881 addr[i] = le64_to_cpu(bd->ptr);
6882
6883 dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %u\n",
6884 stream, ci, addr[i], len);
6885
6886 /* get previous ci, wrap if needed */
6887 ci = gaudi_queue_idx_dec(ci, queue_len);
6888 }
6889
6890 if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
6891 struct undefined_opcode_info *undef_opcode = &hdev->captured_err_info.undef_opcode;
6892 u32 arr_idx = undef_opcode->cb_addr_streams_len;
6893
6894 if (arr_idx == 0) {
6895 undef_opcode->timestamp = ktime_get();
6896 undef_opcode->engine_id = gaudi_queue_id_to_engine_id[qid_base];
6897 }
6898
6899 memcpy(undef_opcode->cb_addr_streams[arr_idx], addr, sizeof(addr));
6900 undef_opcode->cb_addr_streams_len++;
6901 }
6902
6903 hdev->asic_funcs->hw_queues_unlock(hdev);
6904 }
6905
6906 /**
6907 * handle_qman_data_on_err - extract QMAN data on error
6908 *
6909 * @hdev: pointer to the habanalabs device structure
6910 * @qid_base: first QID of the QMAN (out of 4 streams)
6911 * @stream: the QMAN's stream
6912 * @qman_base: base address of QMAN registers block
6913 * @event_mask: mask of the last events occurred
6914 *
6915 * This function attempt to exatract as much data as possible on QMAN error.
6916 * On upper CP print the SW config stream data and last 8 PQEs.
6917 * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
6918 */
handle_qman_data_on_err(struct hl_device * hdev,u32 qid_base,u32 stream,u64 qman_base,u64 event_mask)6919 static void handle_qman_data_on_err(struct hl_device *hdev, u32 qid_base,
6920 u32 stream, u64 qman_base, u64 event_mask)
6921 {
6922 u32 i;
6923
6924 if (stream != QMAN_STREAMS) {
6925 gaudi_handle_last_pqes_on_err(hdev, qid_base, stream,
6926 qman_base, event_mask, true);
6927 return;
6928 }
6929
6930 /* handle Lower-CP */
6931 gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask);
6932
6933 for (i = 0; i < QMAN_STREAMS; i++)
6934 gaudi_handle_last_pqes_on_err(hdev, qid_base, i,
6935 qman_base, event_mask, false);
6936 }
6937
gaudi_handle_qman_err_generic(struct hl_device * hdev,const char * qm_name,u64 qman_base,u32 qid_base,u64 * event_mask)6938 static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
6939 const char *qm_name,
6940 u64 qman_base,
6941 u32 qid_base,
6942 u64 *event_mask)
6943 {
6944 u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
6945 u64 glbl_sts_addr, arb_err_addr;
6946 char reg_desc[32];
6947
6948 glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE);
6949 arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE);
6950
6951 /* Iterate through all stream GLBL_STS1 registers + Lower CP */
6952 for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
6953 glbl_sts_clr_val = 0;
6954 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
6955
6956 if (!glbl_sts_val)
6957 continue;
6958
6959 if (i == QMAN_STREAMS)
6960 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
6961 else
6962 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
6963
6964 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
6965 if (glbl_sts_val & BIT(j)) {
6966 dev_err_ratelimited(hdev->dev,
6967 "%s %s. err cause: %s\n",
6968 qm_name, reg_desc,
6969 gaudi_qman_error_cause[j]);
6970 glbl_sts_clr_val |= BIT(j);
6971 }
6972 }
6973 /* check for undefined opcode */
6974 if (glbl_sts_val & TPC0_QM_GLBL_STS1_CP_UNDEF_CMD_ERR_MASK &&
6975 hdev->captured_err_info.undef_opcode.write_enable) {
6976 memset(&hdev->captured_err_info.undef_opcode, 0,
6977 sizeof(hdev->captured_err_info.undef_opcode));
6978
6979 hdev->captured_err_info.undef_opcode.write_enable = false;
6980 *event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE;
6981 }
6982
6983 /* Write 1 clear errors */
6984 if (!hdev->stop_on_err)
6985 WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
6986 else
6987 handle_qman_data_on_err(hdev, qid_base, i, qman_base, *event_mask);
6988 }
6989
6990 arb_err_val = RREG32(arb_err_addr);
6991
6992 if (!arb_err_val)
6993 return;
6994
6995 for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
6996 if (arb_err_val & BIT(j)) {
6997 dev_err_ratelimited(hdev->dev,
6998 "%s ARB_ERR. err cause: %s\n",
6999 qm_name,
7000 gaudi_qman_arb_error_cause[j]);
7001 }
7002 }
7003 }
7004
gaudi_print_sm_sei_info(struct hl_device * hdev,u16 event_type,struct hl_eq_sm_sei_data * sei_data)7005 static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
7006 struct hl_eq_sm_sei_data *sei_data)
7007 {
7008 u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
7009
7010 /* Flip the bits as the enum is ordered in the opposite way */
7011 index = (index ^ 0x3) & 0x3;
7012
7013 switch (sei_data->sei_cause) {
7014 case SM_SEI_SO_OVERFLOW:
7015 dev_err_ratelimited(hdev->dev,
7016 "%s SEI Error: SOB Group %u overflow/underflow",
7017 gaudi_sync_manager_names[index],
7018 le32_to_cpu(sei_data->sei_log));
7019 break;
7020 case SM_SEI_LBW_4B_UNALIGNED:
7021 dev_err_ratelimited(hdev->dev,
7022 "%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
7023 gaudi_sync_manager_names[index],
7024 le32_to_cpu(sei_data->sei_log));
7025 break;
7026 case SM_SEI_AXI_RESPONSE_ERR:
7027 dev_err_ratelimited(hdev->dev,
7028 "%s SEI Error: AXI ID %u response error",
7029 gaudi_sync_manager_names[index],
7030 le32_to_cpu(sei_data->sei_log));
7031 break;
7032 default:
7033 dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u",
7034 le32_to_cpu(sei_data->sei_log));
7035 break;
7036 }
7037 }
7038
gaudi_handle_ecc_event(struct hl_device * hdev,u16 event_type,struct hl_eq_ecc_data * ecc_data)7039 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7040 struct hl_eq_ecc_data *ecc_data)
7041 {
7042 struct ecc_info_extract_params params;
7043 u64 ecc_address = 0, ecc_syndrom = 0;
7044 u8 index, memory_wrapper_idx = 0;
7045 bool extract_info_from_fw;
7046 int rc;
7047
7048 if (hdev->asic_prop.fw_security_enabled) {
7049 extract_info_from_fw = true;
7050 goto extract_ecc_info;
7051 }
7052
7053 switch (event_type) {
7054 case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
7055 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
7056 extract_info_from_fw = true;
7057 break;
7058 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7059 index = event_type - GAUDI_EVENT_TPC0_SERR;
7060 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7061 params.num_memories = 90;
7062 params.derr = false;
7063 extract_info_from_fw = false;
7064 break;
7065 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7066 index = event_type - GAUDI_EVENT_TPC0_DERR;
7067 params.block_address =
7068 mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7069 params.num_memories = 90;
7070 params.derr = true;
7071 extract_info_from_fw = false;
7072 break;
7073 case GAUDI_EVENT_MME0_ACC_SERR:
7074 case GAUDI_EVENT_MME1_ACC_SERR:
7075 case GAUDI_EVENT_MME2_ACC_SERR:
7076 case GAUDI_EVENT_MME3_ACC_SERR:
7077 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
7078 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7079 params.num_memories = 128;
7080 params.derr = false;
7081 extract_info_from_fw = false;
7082 break;
7083 case GAUDI_EVENT_MME0_ACC_DERR:
7084 case GAUDI_EVENT_MME1_ACC_DERR:
7085 case GAUDI_EVENT_MME2_ACC_DERR:
7086 case GAUDI_EVENT_MME3_ACC_DERR:
7087 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
7088 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7089 params.num_memories = 128;
7090 params.derr = true;
7091 extract_info_from_fw = false;
7092 break;
7093 case GAUDI_EVENT_MME0_SBAB_SERR:
7094 case GAUDI_EVENT_MME1_SBAB_SERR:
7095 case GAUDI_EVENT_MME2_SBAB_SERR:
7096 case GAUDI_EVENT_MME3_SBAB_SERR:
7097 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
7098 params.block_address =
7099 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7100 params.num_memories = 33;
7101 params.derr = false;
7102 extract_info_from_fw = false;
7103 break;
7104 case GAUDI_EVENT_MME0_SBAB_DERR:
7105 case GAUDI_EVENT_MME1_SBAB_DERR:
7106 case GAUDI_EVENT_MME2_SBAB_DERR:
7107 case GAUDI_EVENT_MME3_SBAB_DERR:
7108 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
7109 params.block_address =
7110 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7111 params.num_memories = 33;
7112 params.derr = true;
7113 extract_info_from_fw = false;
7114 break;
7115 default:
7116 return;
7117 }
7118
7119 extract_ecc_info:
7120 if (extract_info_from_fw) {
7121 ecc_address = le64_to_cpu(ecc_data->ecc_address);
7122 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7123 memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7124 } else {
7125 rc = gaudi_extract_ecc_info(hdev, ¶ms, &ecc_address,
7126 &ecc_syndrom, &memory_wrapper_idx);
7127 if (rc)
7128 return;
7129 }
7130
7131 dev_err(hdev->dev,
7132 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
7133 ecc_address, ecc_syndrom, memory_wrapper_idx);
7134 }
7135
gaudi_handle_qman_err(struct hl_device * hdev,u16 event_type,u64 * event_mask)7136 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
7137 {
7138 u64 qman_base;
7139 char desc[32];
7140 u32 qid_base;
7141 u8 index;
7142
7143 switch (event_type) {
7144 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7145 index = event_type - GAUDI_EVENT_TPC0_QM;
7146 qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS;
7147 qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET;
7148 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
7149 break;
7150 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7151 if (event_type == GAUDI_EVENT_MME0_QM) {
7152 index = 0;
7153 qid_base = GAUDI_QUEUE_ID_MME_0_0;
7154 } else { /* event_type == GAUDI_EVENT_MME2_QM */
7155 index = 2;
7156 qid_base = GAUDI_QUEUE_ID_MME_1_0;
7157 }
7158 qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET;
7159 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
7160 break;
7161 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7162 index = event_type - GAUDI_EVENT_DMA0_QM;
7163 qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS;
7164 /* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */
7165 if (index > 1)
7166 qid_base++;
7167 qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET;
7168 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
7169 break;
7170 case GAUDI_EVENT_NIC0_QM0:
7171 qid_base = GAUDI_QUEUE_ID_NIC_0_0;
7172 qman_base = mmNIC0_QM0_BASE;
7173 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
7174 break;
7175 case GAUDI_EVENT_NIC0_QM1:
7176 qid_base = GAUDI_QUEUE_ID_NIC_1_0;
7177 qman_base = mmNIC0_QM1_BASE;
7178 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
7179 break;
7180 case GAUDI_EVENT_NIC1_QM0:
7181 qid_base = GAUDI_QUEUE_ID_NIC_2_0;
7182 qman_base = mmNIC1_QM0_BASE;
7183 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
7184 break;
7185 case GAUDI_EVENT_NIC1_QM1:
7186 qid_base = GAUDI_QUEUE_ID_NIC_3_0;
7187 qman_base = mmNIC1_QM1_BASE;
7188 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
7189 break;
7190 case GAUDI_EVENT_NIC2_QM0:
7191 qid_base = GAUDI_QUEUE_ID_NIC_4_0;
7192 qman_base = mmNIC2_QM0_BASE;
7193 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
7194 break;
7195 case GAUDI_EVENT_NIC2_QM1:
7196 qid_base = GAUDI_QUEUE_ID_NIC_5_0;
7197 qman_base = mmNIC2_QM1_BASE;
7198 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
7199 break;
7200 case GAUDI_EVENT_NIC3_QM0:
7201 qid_base = GAUDI_QUEUE_ID_NIC_6_0;
7202 qman_base = mmNIC3_QM0_BASE;
7203 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
7204 break;
7205 case GAUDI_EVENT_NIC3_QM1:
7206 qid_base = GAUDI_QUEUE_ID_NIC_7_0;
7207 qman_base = mmNIC3_QM1_BASE;
7208 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
7209 break;
7210 case GAUDI_EVENT_NIC4_QM0:
7211 qid_base = GAUDI_QUEUE_ID_NIC_8_0;
7212 qman_base = mmNIC4_QM0_BASE;
7213 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
7214 break;
7215 case GAUDI_EVENT_NIC4_QM1:
7216 qid_base = GAUDI_QUEUE_ID_NIC_9_0;
7217 qman_base = mmNIC4_QM1_BASE;
7218 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
7219 break;
7220 default:
7221 return;
7222 }
7223
7224 gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base, event_mask);
7225 }
7226
gaudi_print_irq_info(struct hl_device * hdev,u16 event_type,bool check_razwi,u64 * event_mask)7227 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
7228 bool check_razwi, u64 *event_mask)
7229 {
7230 bool is_read = false, is_write = false;
7231 u16 engine_id[2], num_of_razwi_eng = 0;
7232 char desc[64] = "";
7233 u64 razwi_addr = 0;
7234 u8 razwi_flags = 0;
7235
7236 /*
7237 * Init engine id by default as not valid and only if razwi initiated from engine with
7238 * engine id it will get valid value.
7239 */
7240 engine_id[0] = HL_RAZWI_NA_ENG_ID;
7241 engine_id[1] = HL_RAZWI_NA_ENG_ID;
7242
7243 gaudi_get_event_desc(event_type, desc, sizeof(desc));
7244 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7245 event_type, desc);
7246
7247 if (check_razwi) {
7248 gaudi_print_and_get_razwi_info(hdev, &engine_id[0], &engine_id[1], &is_read,
7249 &is_write);
7250 gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, event_mask);
7251
7252 if (is_read)
7253 razwi_flags |= HL_RAZWI_READ;
7254 if (is_write)
7255 razwi_flags |= HL_RAZWI_WRITE;
7256
7257 if (engine_id[0] != HL_RAZWI_NA_ENG_ID) {
7258 if (engine_id[1] != HL_RAZWI_NA_ENG_ID)
7259 num_of_razwi_eng = 2;
7260 else
7261 num_of_razwi_eng = 1;
7262 }
7263
7264 if (razwi_flags)
7265 hl_handle_razwi(hdev, razwi_addr, engine_id, num_of_razwi_eng,
7266 razwi_flags, event_mask);
7267 }
7268 }
7269
gaudi_print_out_of_sync_info(struct hl_device * hdev,struct cpucp_pkt_sync_err * sync_err)7270 static void gaudi_print_out_of_sync_info(struct hl_device *hdev,
7271 struct cpucp_pkt_sync_err *sync_err)
7272 {
7273 struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
7274
7275 dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n",
7276 le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci));
7277 }
7278
gaudi_print_fw_alive_info(struct hl_device * hdev,struct hl_eq_fw_alive * fw_alive)7279 static void gaudi_print_fw_alive_info(struct hl_device *hdev,
7280 struct hl_eq_fw_alive *fw_alive)
7281 {
7282 dev_err(hdev->dev,
7283 "FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n",
7284 (fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ? "Minor" : "Critical",
7285 le32_to_cpu(fw_alive->process_id),
7286 le32_to_cpu(fw_alive->thread_id),
7287 le64_to_cpu(fw_alive->uptime_seconds));
7288 }
7289
gaudi_print_nic_axi_irq_info(struct hl_device * hdev,u16 event_type,void * data)7290 static void gaudi_print_nic_axi_irq_info(struct hl_device *hdev, u16 event_type,
7291 void *data)
7292 {
7293 char desc[64] = "", *type;
7294 struct eq_nic_sei_event *eq_nic_sei = data;
7295 u16 nic_id = event_type - GAUDI_EVENT_NIC_SEI_0;
7296
7297 switch (eq_nic_sei->axi_error_cause) {
7298 case RXB:
7299 type = "RXB";
7300 break;
7301 case RXE:
7302 type = "RXE";
7303 break;
7304 case TXS:
7305 type = "TXS";
7306 break;
7307 case TXE:
7308 type = "TXE";
7309 break;
7310 case QPC_RESP:
7311 type = "QPC_RESP";
7312 break;
7313 case NON_AXI_ERR:
7314 type = "NON_AXI_ERR";
7315 break;
7316 case TMR:
7317 type = "TMR";
7318 break;
7319 default:
7320 dev_err(hdev->dev, "unknown NIC AXI cause %d\n",
7321 eq_nic_sei->axi_error_cause);
7322 type = "N/A";
7323 break;
7324 }
7325
7326 snprintf(desc, sizeof(desc), "NIC%d_%s%d", nic_id, type,
7327 eq_nic_sei->id);
7328 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7329 event_type, desc);
7330 }
7331
gaudi_compute_reset_late_init(struct hl_device * hdev)7332 static int gaudi_compute_reset_late_init(struct hl_device *hdev)
7333 {
7334 /* GAUDI doesn't support any reset except hard-reset */
7335 return -EPERM;
7336 }
7337
gaudi_hbm_read_interrupts(struct hl_device * hdev,int device,struct hl_eq_hbm_ecc_data * hbm_ecc_data)7338 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
7339 struct hl_eq_hbm_ecc_data *hbm_ecc_data)
7340 {
7341 u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
7342 int rc = 0;
7343
7344 if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
7345 CPU_BOOT_DEV_STS0_HBM_ECC_EN) {
7346 if (!hbm_ecc_data) {
7347 dev_err(hdev->dev, "No FW ECC data");
7348 return 0;
7349 }
7350
7351 wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
7352 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7353 rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
7354 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7355 ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
7356 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7357 derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
7358 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7359 serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
7360 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7361 type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
7362 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7363 ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
7364 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7365
7366 dev_err(hdev->dev,
7367 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7368 device, ch, wr_par, rd_par, ca_par, serr, derr);
7369 dev_err(hdev->dev,
7370 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
7371 device, ch, hbm_ecc_data->first_addr, type,
7372 hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,
7373 hbm_ecc_data->dec_cnt);
7374 return 0;
7375 }
7376
7377 if (hdev->asic_prop.fw_security_enabled) {
7378 dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
7379 return 0;
7380 }
7381
7382 base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
7383 for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
7384 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
7385 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7386 if (val) {
7387 rc = -EIO;
7388 dev_err(hdev->dev,
7389 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7390 device, ch * 2, val & 0x1, (val >> 1) & 0x1,
7391 (val >> 2) & 0x1, (val >> 3) & 0x1,
7392 (val >> 4) & 0x1);
7393
7394 val2 = RREG32(base + ch * 0x1000 + 0x060);
7395 dev_err(hdev->dev,
7396 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7397 device, ch * 2,
7398 RREG32(base + ch * 0x1000 + 0x064),
7399 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7400 (val2 & 0xFF0000) >> 16,
7401 (val2 & 0xFF000000) >> 24);
7402 }
7403
7404 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
7405 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7406 if (val) {
7407 rc = -EIO;
7408 dev_err(hdev->dev,
7409 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7410 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
7411 (val >> 2) & 0x1, (val >> 3) & 0x1,
7412 (val >> 4) & 0x1);
7413
7414 val2 = RREG32(base + ch * 0x1000 + 0x070);
7415 dev_err(hdev->dev,
7416 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7417 device, ch * 2 + 1,
7418 RREG32(base + ch * 0x1000 + 0x074),
7419 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7420 (val2 & 0xFF0000) >> 16,
7421 (val2 & 0xFF000000) >> 24);
7422 }
7423
7424 /* Clear interrupts */
7425 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
7426 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
7427 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
7428 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
7429 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
7430 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
7431 }
7432
7433 val = RREG32(base + 0x8F30);
7434 val2 = RREG32(base + 0x8F34);
7435 if (val | val2) {
7436 rc = -EIO;
7437 dev_err(hdev->dev,
7438 "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
7439 device, val, val2);
7440 }
7441 val = RREG32(base + 0x8F40);
7442 val2 = RREG32(base + 0x8F44);
7443 if (val | val2) {
7444 rc = -EIO;
7445 dev_err(hdev->dev,
7446 "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
7447 device, val, val2);
7448 }
7449
7450 return rc;
7451 }
7452
gaudi_hbm_event_to_dev(u16 hbm_event_type)7453 static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
7454 {
7455 switch (hbm_event_type) {
7456 case GAUDI_EVENT_HBM0_SPI_0:
7457 case GAUDI_EVENT_HBM0_SPI_1:
7458 return 0;
7459 case GAUDI_EVENT_HBM1_SPI_0:
7460 case GAUDI_EVENT_HBM1_SPI_1:
7461 return 1;
7462 case GAUDI_EVENT_HBM2_SPI_0:
7463 case GAUDI_EVENT_HBM2_SPI_1:
7464 return 2;
7465 case GAUDI_EVENT_HBM3_SPI_0:
7466 case GAUDI_EVENT_HBM3_SPI_1:
7467 return 3;
7468 default:
7469 break;
7470 }
7471
7472 /* Should never happen */
7473 return 0;
7474 }
7475
gaudi_tpc_read_interrupts(struct hl_device * hdev,u8 tpc_id,char * interrupt_name)7476 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
7477 char *interrupt_name)
7478 {
7479 u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
7480 bool soft_reset_required = false;
7481
7482 tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
7483 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
7484
7485 for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
7486 if (tpc_interrupts_cause & BIT(i)) {
7487 dev_err_ratelimited(hdev->dev,
7488 "TPC%d_%s interrupt cause: %s\n",
7489 tpc_id, interrupt_name,
7490 gaudi_tpc_interrupts_cause[i]);
7491 /* If this is QM error, we need to soft-reset */
7492 if (i == 15)
7493 soft_reset_required = true;
7494 }
7495
7496 /* Clear interrupts */
7497 WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
7498
7499 return soft_reset_required;
7500 }
7501
tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)7502 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
7503 {
7504 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
7505 }
7506
tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)7507 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
7508 {
7509 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
7510 }
7511
gaudi_print_clk_change_info(struct hl_device * hdev,u16 event_type,u64 * event_mask)7512 static void gaudi_print_clk_change_info(struct hl_device *hdev, u16 event_type, u64 *event_mask)
7513 {
7514 ktime_t zero_time = ktime_set(0, 0);
7515
7516 mutex_lock(&hdev->clk_throttling.lock);
7517
7518 switch (event_type) {
7519 case GAUDI_EVENT_FIX_POWER_ENV_S:
7520 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;
7521 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;
7522 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
7523 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
7524 dev_info_ratelimited(hdev->dev,
7525 "Clock throttling due to power consumption\n");
7526 break;
7527
7528 case GAUDI_EVENT_FIX_POWER_ENV_E:
7529 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;
7530 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
7531 dev_info_ratelimited(hdev->dev,
7532 "Power envelop is safe, back to optimal clock\n");
7533 break;
7534
7535 case GAUDI_EVENT_FIX_THERMAL_ENV_S:
7536 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;
7537 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
7538 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
7539 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
7540 *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7541 dev_info_ratelimited(hdev->dev,
7542 "Clock throttling due to overheating\n");
7543 break;
7544
7545 case GAUDI_EVENT_FIX_THERMAL_ENV_E:
7546 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
7547 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
7548 *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7549 dev_info_ratelimited(hdev->dev,
7550 "Thermal envelop is safe, back to optimal clock\n");
7551 break;
7552
7553 default:
7554 dev_err(hdev->dev, "Received invalid clock change event %d\n",
7555 event_type);
7556 break;
7557 }
7558
7559 mutex_unlock(&hdev->clk_throttling.lock);
7560 }
7561
gaudi_handle_eqe(struct hl_device * hdev,struct hl_eq_entry * eq_entry)7562 static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
7563 {
7564 struct gaudi_device *gaudi = hdev->asic_specific;
7565 struct hl_info_fw_err_info fw_err_info;
7566 u64 data = le64_to_cpu(eq_entry->data[0]), event_mask = 0;
7567 u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
7568 u32 fw_fatal_err_flag = 0, flags = 0;
7569 u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
7570 >> EQ_CTL_EVENT_TYPE_SHIFT);
7571 bool reset_required, reset_direct = false;
7572 u8 cause;
7573 int rc;
7574
7575 if (event_type >= GAUDI_EVENT_SIZE) {
7576 dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
7577 event_type, GAUDI_EVENT_SIZE - 1);
7578 return;
7579 }
7580
7581 gaudi->events_stat[event_type]++;
7582 gaudi->events_stat_aggregate[event_type]++;
7583
7584 switch (event_type) {
7585 case GAUDI_EVENT_PCIE_CORE_DERR:
7586 case GAUDI_EVENT_PCIE_IF_DERR:
7587 case GAUDI_EVENT_PCIE_PHY_DERR:
7588 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7589 case GAUDI_EVENT_MME0_ACC_DERR:
7590 case GAUDI_EVENT_MME0_SBAB_DERR:
7591 case GAUDI_EVENT_MME1_ACC_DERR:
7592 case GAUDI_EVENT_MME1_SBAB_DERR:
7593 case GAUDI_EVENT_MME2_ACC_DERR:
7594 case GAUDI_EVENT_MME2_SBAB_DERR:
7595 case GAUDI_EVENT_MME3_ACC_DERR:
7596 case GAUDI_EVENT_MME3_SBAB_DERR:
7597 case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
7598 fallthrough;
7599 case GAUDI_EVENT_CPU_IF_ECC_DERR:
7600 case GAUDI_EVENT_PSOC_MEM_DERR:
7601 case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
7602 case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
7603 case GAUDI_EVENT_NIC0_DERR ... GAUDI_EVENT_NIC4_DERR:
7604 case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
7605 case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
7606 case GAUDI_EVENT_MMU_DERR:
7607 case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
7608 gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7609 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7610 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7611 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7612 goto reset_device;
7613
7614 case GAUDI_EVENT_GIC500:
7615 case GAUDI_EVENT_AXI_ECC:
7616 case GAUDI_EVENT_L2_RAM_ECC:
7617 case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
7618 gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7619 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7620 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7621 goto reset_device;
7622
7623 case GAUDI_EVENT_HBM0_SPI_0:
7624 case GAUDI_EVENT_HBM1_SPI_0:
7625 case GAUDI_EVENT_HBM2_SPI_0:
7626 case GAUDI_EVENT_HBM3_SPI_0:
7627 gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7628 gaudi_hbm_read_interrupts(hdev,
7629 gaudi_hbm_event_to_dev(event_type),
7630 &eq_entry->hbm_ecc_data);
7631 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7632 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7633 goto reset_device;
7634
7635 case GAUDI_EVENT_HBM0_SPI_1:
7636 case GAUDI_EVENT_HBM1_SPI_1:
7637 case GAUDI_EVENT_HBM2_SPI_1:
7638 case GAUDI_EVENT_HBM3_SPI_1:
7639 gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7640 gaudi_hbm_read_interrupts(hdev,
7641 gaudi_hbm_event_to_dev(event_type),
7642 &eq_entry->hbm_ecc_data);
7643 hl_fw_unmask_irq(hdev, event_type);
7644 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7645 break;
7646
7647 case GAUDI_EVENT_TPC0_DEC:
7648 case GAUDI_EVENT_TPC1_DEC:
7649 case GAUDI_EVENT_TPC2_DEC:
7650 case GAUDI_EVENT_TPC3_DEC:
7651 case GAUDI_EVENT_TPC4_DEC:
7652 case GAUDI_EVENT_TPC5_DEC:
7653 case GAUDI_EVENT_TPC6_DEC:
7654 case GAUDI_EVENT_TPC7_DEC:
7655 /* In TPC DEC event, notify on TPC assertion. While there isn't
7656 * a specific event for assertion yet, the FW generates TPC DEC event.
7657 * The SW upper layer will inspect an internal mapped area to indicate
7658 * if the event is a TPC Assertion or a "real" TPC DEC.
7659 */
7660 event_mask |= HL_NOTIFIER_EVENT_TPC_ASSERT;
7661 gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7662 reset_required = gaudi_tpc_read_interrupts(hdev,
7663 tpc_dec_event_to_tpc_id(event_type),
7664 "AXI_SLV_DEC_Error");
7665 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7666 if (reset_required) {
7667 dev_err(hdev->dev, "reset required due to %s\n",
7668 gaudi_irq_map_table[event_type].name);
7669
7670 reset_direct = true;
7671 goto reset_device;
7672 } else {
7673 hl_fw_unmask_irq(hdev, event_type);
7674 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7675 }
7676 break;
7677
7678 case GAUDI_EVENT_TPC0_KRN_ERR:
7679 case GAUDI_EVENT_TPC1_KRN_ERR:
7680 case GAUDI_EVENT_TPC2_KRN_ERR:
7681 case GAUDI_EVENT_TPC3_KRN_ERR:
7682 case GAUDI_EVENT_TPC4_KRN_ERR:
7683 case GAUDI_EVENT_TPC5_KRN_ERR:
7684 case GAUDI_EVENT_TPC6_KRN_ERR:
7685 case GAUDI_EVENT_TPC7_KRN_ERR:
7686 gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7687 reset_required = gaudi_tpc_read_interrupts(hdev,
7688 tpc_krn_event_to_tpc_id(event_type),
7689 "KRN_ERR");
7690 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7691 if (reset_required) {
7692 dev_err(hdev->dev, "reset required due to %s\n",
7693 gaudi_irq_map_table[event_type].name);
7694
7695 reset_direct = true;
7696 goto reset_device;
7697 } else {
7698 hl_fw_unmask_irq(hdev, event_type);
7699 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7700 }
7701 break;
7702
7703 case GAUDI_EVENT_PCIE_CORE_SERR:
7704 case GAUDI_EVENT_PCIE_IF_SERR:
7705 case GAUDI_EVENT_PCIE_PHY_SERR:
7706 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7707 case GAUDI_EVENT_MME0_ACC_SERR:
7708 case GAUDI_EVENT_MME0_SBAB_SERR:
7709 case GAUDI_EVENT_MME1_ACC_SERR:
7710 case GAUDI_EVENT_MME1_SBAB_SERR:
7711 case GAUDI_EVENT_MME2_ACC_SERR:
7712 case GAUDI_EVENT_MME2_SBAB_SERR:
7713 case GAUDI_EVENT_MME3_ACC_SERR:
7714 case GAUDI_EVENT_MME3_SBAB_SERR:
7715 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
7716 case GAUDI_EVENT_CPU_IF_ECC_SERR:
7717 case GAUDI_EVENT_PSOC_MEM_SERR:
7718 case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
7719 case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
7720 case GAUDI_EVENT_NIC0_SERR ... GAUDI_EVENT_NIC4_SERR:
7721 case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
7722 case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
7723 fallthrough;
7724 case GAUDI_EVENT_MMU_SERR:
7725 gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7726 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7727 hl_fw_unmask_irq(hdev, event_type);
7728 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7729 break;
7730
7731 case GAUDI_EVENT_PCIE_DEC:
7732 case GAUDI_EVENT_CPU_AXI_SPLITTER:
7733 case GAUDI_EVENT_PSOC_AXI_DEC:
7734 case GAUDI_EVENT_PSOC_PRSTN_FALL:
7735 gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7736 hl_fw_unmask_irq(hdev, event_type);
7737 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7738 break;
7739
7740 case GAUDI_EVENT_MMU_PAGE_FAULT:
7741 case GAUDI_EVENT_MMU_WR_PERM:
7742 gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7743 hl_fw_unmask_irq(hdev, event_type);
7744 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7745 break;
7746
7747 case GAUDI_EVENT_MME0_WBC_RSP:
7748 case GAUDI_EVENT_MME0_SBAB0_RSP:
7749 case GAUDI_EVENT_MME1_WBC_RSP:
7750 case GAUDI_EVENT_MME1_SBAB0_RSP:
7751 case GAUDI_EVENT_MME2_WBC_RSP:
7752 case GAUDI_EVENT_MME2_SBAB0_RSP:
7753 case GAUDI_EVENT_MME3_WBC_RSP:
7754 case GAUDI_EVENT_MME3_SBAB0_RSP:
7755 case GAUDI_EVENT_RAZWI_OR_ADC:
7756 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7757 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7758 fallthrough;
7759 case GAUDI_EVENT_NIC0_QM0:
7760 case GAUDI_EVENT_NIC0_QM1:
7761 case GAUDI_EVENT_NIC1_QM0:
7762 case GAUDI_EVENT_NIC1_QM1:
7763 case GAUDI_EVENT_NIC2_QM0:
7764 case GAUDI_EVENT_NIC2_QM1:
7765 case GAUDI_EVENT_NIC3_QM0:
7766 case GAUDI_EVENT_NIC3_QM1:
7767 case GAUDI_EVENT_NIC4_QM0:
7768 case GAUDI_EVENT_NIC4_QM1:
7769 case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
7770 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7771 gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7772 gaudi_handle_qman_err(hdev, event_type, &event_mask);
7773 hl_fw_unmask_irq(hdev, event_type);
7774 event_mask |= (HL_NOTIFIER_EVENT_USER_ENGINE_ERR | HL_NOTIFIER_EVENT_DEVICE_RESET);
7775 break;
7776
7777 case GAUDI_EVENT_RAZWI_OR_ADC_SW:
7778 gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7779 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7780 goto reset_device;
7781
7782 case GAUDI_EVENT_TPC0_BMON_SPMU:
7783 case GAUDI_EVENT_TPC1_BMON_SPMU:
7784 case GAUDI_EVENT_TPC2_BMON_SPMU:
7785 case GAUDI_EVENT_TPC3_BMON_SPMU:
7786 case GAUDI_EVENT_TPC4_BMON_SPMU:
7787 case GAUDI_EVENT_TPC5_BMON_SPMU:
7788 case GAUDI_EVENT_TPC6_BMON_SPMU:
7789 case GAUDI_EVENT_TPC7_BMON_SPMU:
7790 case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
7791 gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7792 hl_fw_unmask_irq(hdev, event_type);
7793 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7794 break;
7795
7796 case GAUDI_EVENT_NIC_SEI_0 ... GAUDI_EVENT_NIC_SEI_4:
7797 gaudi_print_nic_axi_irq_info(hdev, event_type, &data);
7798 hl_fw_unmask_irq(hdev, event_type);
7799 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7800 break;
7801
7802 case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
7803 gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7804 gaudi_print_sm_sei_info(hdev, event_type,
7805 &eq_entry->sm_sei_data);
7806 rc = hl_state_dump(hdev);
7807 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7808 if (rc)
7809 dev_err(hdev->dev,
7810 "Error during system state dump %d\n", rc);
7811 hl_fw_unmask_irq(hdev, event_type);
7812 break;
7813
7814 case GAUDI_EVENT_STATUS_NIC0_ENG0 ... GAUDI_EVENT_STATUS_NIC4_ENG1:
7815 break;
7816
7817 case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
7818 gaudi_print_clk_change_info(hdev, event_type, &event_mask);
7819 hl_fw_unmask_irq(hdev, event_type);
7820 break;
7821
7822 case GAUDI_EVENT_PSOC_GPIO_U16_0:
7823 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
7824 dev_err(hdev->dev,
7825 "Received high temp H/W interrupt %d (cause %d)\n",
7826 event_type, cause);
7827 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7828 break;
7829
7830 case GAUDI_EVENT_DEV_RESET_REQ:
7831 gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7832 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7833 goto reset_device;
7834
7835 case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
7836 gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7837 gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
7838 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7839 goto reset_device;
7840
7841 case GAUDI_EVENT_FW_ALIVE_S:
7842 gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7843 gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive);
7844 fw_err_info.err_type = HL_INFO_FW_REPORTED_ERR;
7845 fw_err_info.event_id = event_type;
7846 fw_err_info.event_mask = &event_mask;
7847 hl_handle_fw_err(hdev, &fw_err_info);
7848 goto reset_device;
7849
7850 default:
7851 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
7852 event_type);
7853 break;
7854 }
7855
7856 if (event_mask)
7857 hl_notifier_event_send_all(hdev, event_mask);
7858
7859 return;
7860
7861 reset_device:
7862 reset_required = true;
7863
7864 if (hdev->asic_prop.fw_security_enabled && !reset_direct) {
7865 flags = HL_DRV_RESET_HARD | HL_DRV_RESET_BYPASS_REQ_TO_FW | fw_fatal_err_flag;
7866
7867 /* notify on device unavailable while the reset triggered by fw */
7868 event_mask |= (HL_NOTIFIER_EVENT_DEVICE_RESET |
7869 HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE);
7870 } else if (hdev->hard_reset_on_fw_events) {
7871 flags = HL_DRV_RESET_HARD | HL_DRV_RESET_DELAY | fw_fatal_err_flag;
7872 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7873 } else {
7874 reset_required = false;
7875 }
7876
7877 if (reset_required) {
7878 /* escalate general hw errors to critical/fatal error */
7879 if (event_mask & HL_NOTIFIER_EVENT_GENERAL_HW_ERR)
7880 hl_handle_critical_hw_err(hdev, event_type, &event_mask);
7881
7882 hl_device_cond_reset(hdev, flags, event_mask);
7883 } else {
7884 hl_fw_unmask_irq(hdev, event_type);
7885 /* Notification on occurred event needs to be sent although reset is not executed */
7886 if (event_mask)
7887 hl_notifier_event_send_all(hdev, event_mask);
7888 }
7889 }
7890
gaudi_get_events_stat(struct hl_device * hdev,bool aggregate,u32 * size)7891 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
7892 {
7893 struct gaudi_device *gaudi = hdev->asic_specific;
7894
7895 if (aggregate) {
7896 *size = (u32) sizeof(gaudi->events_stat_aggregate);
7897 return gaudi->events_stat_aggregate;
7898 }
7899
7900 *size = (u32) sizeof(gaudi->events_stat);
7901 return gaudi->events_stat;
7902 }
7903
gaudi_mmu_invalidate_cache(struct hl_device * hdev,bool is_hard,u32 flags)7904 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags)
7905 {
7906 struct gaudi_device *gaudi = hdev->asic_specific;
7907 u32 status, timeout_usec;
7908 int rc;
7909
7910 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
7911 hdev->reset_info.hard_reset_pending)
7912 return 0;
7913
7914 if (hdev->pldm)
7915 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7916 else
7917 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7918
7919 /* L0 & L1 invalidation */
7920 WREG32(mmSTLB_INV_PS, 3);
7921 WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
7922 WREG32(mmSTLB_INV_PS, 2);
7923
7924 rc = hl_poll_timeout(
7925 hdev,
7926 mmSTLB_INV_PS,
7927 status,
7928 !status,
7929 1000,
7930 timeout_usec);
7931
7932 WREG32(mmSTLB_INV_SET, 0);
7933
7934 return rc;
7935 }
7936
gaudi_mmu_invalidate_cache_range(struct hl_device * hdev,bool is_hard,u32 flags,u32 asid,u64 va,u64 size)7937 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
7938 bool is_hard, u32 flags,
7939 u32 asid, u64 va, u64 size)
7940 {
7941 /* Treat as invalidate all because there is no range invalidation
7942 * in Gaudi
7943 */
7944 return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
7945 }
7946
gaudi_mmu_update_asid_hop0_addr(struct hl_device * hdev,u32 asid,u64 phys_addr)7947 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, u64 phys_addr)
7948 {
7949 u32 status, timeout_usec;
7950 int rc;
7951
7952 if (hdev->pldm)
7953 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7954 else
7955 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7956
7957 WREG32(MMU_ASID, asid);
7958 WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
7959 WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
7960 WREG32(MMU_BUSY, 0x80000000);
7961
7962 rc = hl_poll_timeout(
7963 hdev,
7964 MMU_BUSY,
7965 status,
7966 !(status & 0x80000000),
7967 1000,
7968 timeout_usec);
7969
7970 if (rc) {
7971 dev_err(hdev->dev,
7972 "Timeout during MMU hop0 config of asid %d\n", asid);
7973 return rc;
7974 }
7975
7976 return 0;
7977 }
7978
gaudi_send_heartbeat(struct hl_device * hdev)7979 static int gaudi_send_heartbeat(struct hl_device *hdev)
7980 {
7981 struct gaudi_device *gaudi = hdev->asic_specific;
7982
7983 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
7984 return 0;
7985
7986 return hl_fw_send_heartbeat(hdev);
7987 }
7988
gaudi_cpucp_info_get(struct hl_device * hdev)7989 static int gaudi_cpucp_info_get(struct hl_device *hdev)
7990 {
7991 struct gaudi_device *gaudi = hdev->asic_specific;
7992 struct asic_fixed_properties *prop = &hdev->asic_prop;
7993 int rc;
7994
7995 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
7996 return 0;
7997
7998 rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
7999 mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
8000 mmCPU_BOOT_ERR1);
8001 if (rc)
8002 return rc;
8003
8004 if (!strlen(prop->cpucp_info.card_name))
8005 strscpy_pad(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
8006 CARD_NAME_MAX_LEN);
8007
8008 hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
8009
8010 set_default_power_values(hdev);
8011
8012 return 0;
8013 }
8014
gaudi_is_device_idle(struct hl_device * hdev,u64 * mask_arr,u8 mask_len,struct engines_data * e)8015 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
8016 struct engines_data *e)
8017 {
8018 struct gaudi_device *gaudi = hdev->asic_specific;
8019 const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
8020 const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
8021 const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
8022 unsigned long *mask = (unsigned long *)mask_arr;
8023 u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
8024 bool is_idle = true, is_eng_idle, is_slave;
8025 u64 offset;
8026 int i, dma_id, port;
8027
8028 if (e)
8029 hl_engine_data_sprintf(e,
8030 "\nDMA is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n"
8031 "--- ------- ------------ ---------- -------------\n");
8032
8033 for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
8034 dma_id = gaudi_dma_assignment[i];
8035 offset = dma_id * DMA_QMAN_OFFSET;
8036
8037 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
8038 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
8039 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
8040 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8041 IS_DMA_IDLE(dma_core_sts0);
8042 is_idle &= is_eng_idle;
8043
8044 if (mask && !is_eng_idle)
8045 set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask);
8046 if (e)
8047 hl_engine_data_sprintf(e, fmt, dma_id,
8048 is_eng_idle ? "Y" : "N", qm_glbl_sts0,
8049 qm_cgm_sts, dma_core_sts0);
8050 }
8051
8052 if (e)
8053 hl_engine_data_sprintf(e,
8054 "\nTPC is_idle QM_GLBL_STS0 QM_CGM_STS CFG_STATUS\n"
8055 "--- ------- ------------ ---------- ----------\n");
8056
8057 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
8058 offset = i * TPC_QMAN_OFFSET;
8059 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
8060 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
8061 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
8062 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8063 IS_TPC_IDLE(tpc_cfg_sts);
8064 is_idle &= is_eng_idle;
8065
8066 if (mask && !is_eng_idle)
8067 set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask);
8068 if (e)
8069 hl_engine_data_sprintf(e, fmt, i,
8070 is_eng_idle ? "Y" : "N",
8071 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
8072 }
8073
8074 if (e)
8075 hl_engine_data_sprintf(e,
8076 "\nMME is_idle QM_GLBL_STS0 QM_CGM_STS ARCH_STATUS\n"
8077 "--- ------- ------------ ---------- -----------\n");
8078
8079 for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
8080 offset = i * MME_QMAN_OFFSET;
8081 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
8082 is_eng_idle = IS_MME_IDLE(mme_arch_sts);
8083
8084 /* MME 1 & 3 are slaves, no need to check their QMANs */
8085 is_slave = i % 2;
8086 if (!is_slave) {
8087 qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
8088 qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
8089 is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8090 }
8091
8092 is_idle &= is_eng_idle;
8093
8094 if (mask && !is_eng_idle)
8095 set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask);
8096 if (e) {
8097 if (!is_slave)
8098 hl_engine_data_sprintf(e, fmt, i,
8099 is_eng_idle ? "Y" : "N",
8100 qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
8101 else
8102 hl_engine_data_sprintf(e, mme_slave_fmt, i,
8103 is_eng_idle ? "Y" : "N", "-",
8104 "-", mme_arch_sts);
8105 }
8106 }
8107
8108 if (e)
8109 hl_engine_data_sprintf(e,
8110 "\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n"
8111 "--- ------- ------------ ----------\n");
8112
8113 for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
8114 offset = i * NIC_MACRO_QMAN_OFFSET;
8115 port = 2 * i;
8116 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8117 qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
8118 qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
8119 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8120 is_idle &= is_eng_idle;
8121
8122 if (mask && !is_eng_idle)
8123 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8124 if (e)
8125 hl_engine_data_sprintf(e, nic_fmt, port,
8126 is_eng_idle ? "Y" : "N",
8127 qm_glbl_sts0, qm_cgm_sts);
8128 }
8129
8130 port = 2 * i + 1;
8131 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8132 qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
8133 qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
8134 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8135 is_idle &= is_eng_idle;
8136
8137 if (mask && !is_eng_idle)
8138 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8139 if (e)
8140 hl_engine_data_sprintf(e, nic_fmt, port,
8141 is_eng_idle ? "Y" : "N",
8142 qm_glbl_sts0, qm_cgm_sts);
8143 }
8144 }
8145
8146 if (e)
8147 hl_engine_data_sprintf(e, "\n");
8148
8149 return is_idle;
8150 }
8151
gaudi_hw_queues_lock(struct hl_device * hdev)8152 static void gaudi_hw_queues_lock(struct hl_device *hdev)
8153 __acquires(&gaudi->hw_queues_lock)
8154 {
8155 struct gaudi_device *gaudi = hdev->asic_specific;
8156
8157 spin_lock(&gaudi->hw_queues_lock);
8158 }
8159
gaudi_hw_queues_unlock(struct hl_device * hdev)8160 static void gaudi_hw_queues_unlock(struct hl_device *hdev)
8161 __releases(&gaudi->hw_queues_lock)
8162 {
8163 struct gaudi_device *gaudi = hdev->asic_specific;
8164
8165 spin_unlock(&gaudi->hw_queues_lock);
8166 }
8167
gaudi_get_pci_id(struct hl_device * hdev)8168 static u32 gaudi_get_pci_id(struct hl_device *hdev)
8169 {
8170 return hdev->pdev->device;
8171 }
8172
gaudi_get_eeprom_data(struct hl_device * hdev,void * data,size_t max_size)8173 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
8174 size_t max_size)
8175 {
8176 struct gaudi_device *gaudi = hdev->asic_specific;
8177
8178 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8179 return 0;
8180
8181 return hl_fw_get_eeprom_data(hdev, data, max_size);
8182 }
8183
gaudi_get_monitor_dump(struct hl_device * hdev,void * data)8184 static int gaudi_get_monitor_dump(struct hl_device *hdev, void *data)
8185 {
8186 struct gaudi_device *gaudi = hdev->asic_specific;
8187
8188 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8189 return 0;
8190
8191 return hl_fw_get_monitor_dump(hdev, data);
8192 }
8193
8194 /*
8195 * this function should be used only during initialization and/or after reset,
8196 * when there are no active users.
8197 */
gaudi_run_tpc_kernel(struct hl_device * hdev,u64 tpc_kernel,u32 tpc_id)8198 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, u32 tpc_id)
8199 {
8200 u64 kernel_timeout;
8201 u32 status, offset;
8202 int rc;
8203
8204 offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
8205
8206 if (hdev->pldm)
8207 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
8208 else
8209 kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
8210
8211 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
8212 lower_32_bits(tpc_kernel));
8213 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
8214 upper_32_bits(tpc_kernel));
8215
8216 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
8217 lower_32_bits(tpc_kernel));
8218 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
8219 upper_32_bits(tpc_kernel));
8220 /* set a valid LUT pointer, content is of no significance */
8221 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
8222 lower_32_bits(tpc_kernel));
8223 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
8224 upper_32_bits(tpc_kernel));
8225
8226 WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
8227 lower_32_bits(CFG_BASE +
8228 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
8229
8230 WREG32(mmTPC0_CFG_TPC_CMD + offset,
8231 (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
8232 1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
8233 /* wait a bit for the engine to start executing */
8234 usleep_range(1000, 1500);
8235
8236 /* wait until engine has finished executing */
8237 rc = hl_poll_timeout(
8238 hdev,
8239 mmTPC0_CFG_STATUS + offset,
8240 status,
8241 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8242 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8243 1000,
8244 kernel_timeout);
8245
8246 if (rc) {
8247 dev_err(hdev->dev,
8248 "Timeout while waiting for TPC%d icache prefetch\n",
8249 tpc_id);
8250 return -EIO;
8251 }
8252
8253 WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
8254 1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
8255
8256 /* wait a bit for the engine to start executing */
8257 usleep_range(1000, 1500);
8258
8259 /* wait until engine has finished executing */
8260 rc = hl_poll_timeout(
8261 hdev,
8262 mmTPC0_CFG_STATUS + offset,
8263 status,
8264 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8265 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8266 1000,
8267 kernel_timeout);
8268
8269 if (rc) {
8270 dev_err(hdev->dev,
8271 "Timeout while waiting for TPC%d vector pipe\n",
8272 tpc_id);
8273 return -EIO;
8274 }
8275
8276 rc = hl_poll_timeout(
8277 hdev,
8278 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
8279 status,
8280 (status == 0),
8281 1000,
8282 kernel_timeout);
8283
8284 if (rc) {
8285 dev_err(hdev->dev,
8286 "Timeout while waiting for TPC%d kernel to execute\n",
8287 tpc_id);
8288 return -EIO;
8289 }
8290
8291 return 0;
8292 }
8293
gaudi_internal_cb_pool_init(struct hl_device * hdev,struct hl_ctx * ctx)8294 static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
8295 struct hl_ctx *ctx)
8296 {
8297 struct gaudi_device *gaudi = hdev->asic_specific;
8298 int min_alloc_order, rc, collective_cb_size;
8299
8300 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8301 return 0;
8302
8303 hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev,
8304 HOST_SPACE_INTERNAL_CB_SZ,
8305 &hdev->internal_cb_pool_dma_addr,
8306 GFP_KERNEL | __GFP_ZERO);
8307
8308 if (!hdev->internal_cb_pool_virt_addr)
8309 return -ENOMEM;
8310
8311 collective_cb_size = sizeof(struct packet_msg_short) * 5 +
8312 sizeof(struct packet_fence);
8313 min_alloc_order = ilog2(collective_cb_size);
8314
8315 hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
8316 if (!hdev->internal_cb_pool) {
8317 dev_err(hdev->dev,
8318 "Failed to create internal CB pool\n");
8319 rc = -ENOMEM;
8320 goto free_internal_cb_pool;
8321 }
8322
8323 rc = gen_pool_add(hdev->internal_cb_pool,
8324 (uintptr_t) hdev->internal_cb_pool_virt_addr,
8325 HOST_SPACE_INTERNAL_CB_SZ, -1);
8326 if (rc) {
8327 dev_err(hdev->dev,
8328 "Failed to add memory to internal CB pool\n");
8329 rc = -EFAULT;
8330 goto destroy_internal_cb_pool;
8331 }
8332
8333 hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
8334 HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
8335 HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
8336
8337 if (!hdev->internal_cb_va_base) {
8338 rc = -ENOMEM;
8339 goto destroy_internal_cb_pool;
8340 }
8341
8342 mutex_lock(&hdev->mmu_lock);
8343
8344 rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
8345 hdev->internal_cb_pool_dma_addr,
8346 HOST_SPACE_INTERNAL_CB_SZ);
8347 if (rc)
8348 goto unreserve_internal_cb_pool;
8349
8350 rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
8351 if (rc)
8352 goto unmap_internal_cb_pool;
8353
8354 mutex_unlock(&hdev->mmu_lock);
8355
8356 return 0;
8357
8358 unmap_internal_cb_pool:
8359 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8360 HOST_SPACE_INTERNAL_CB_SZ);
8361 unreserve_internal_cb_pool:
8362 mutex_unlock(&hdev->mmu_lock);
8363 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8364 HOST_SPACE_INTERNAL_CB_SZ);
8365 destroy_internal_cb_pool:
8366 gen_pool_destroy(hdev->internal_cb_pool);
8367 free_internal_cb_pool:
8368 hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
8369 hdev->internal_cb_pool_dma_addr);
8370
8371 return rc;
8372 }
8373
gaudi_internal_cb_pool_fini(struct hl_device * hdev,struct hl_ctx * ctx)8374 static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
8375 struct hl_ctx *ctx)
8376 {
8377 struct gaudi_device *gaudi = hdev->asic_specific;
8378
8379 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8380 return;
8381
8382 mutex_lock(&hdev->mmu_lock);
8383 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8384 HOST_SPACE_INTERNAL_CB_SZ);
8385 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8386 HOST_SPACE_INTERNAL_CB_SZ);
8387 hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
8388 mutex_unlock(&hdev->mmu_lock);
8389
8390 gen_pool_destroy(hdev->internal_cb_pool);
8391
8392 hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
8393 hdev->internal_cb_pool_dma_addr);
8394 }
8395
gaudi_ctx_init(struct hl_ctx * ctx)8396 static int gaudi_ctx_init(struct hl_ctx *ctx)
8397 {
8398 int rc;
8399
8400 if (ctx->asid == HL_KERNEL_ASID_ID)
8401 return 0;
8402
8403 rc = gaudi_internal_cb_pool_init(ctx->hdev, ctx);
8404 if (rc)
8405 return rc;
8406
8407 rc = gaudi_restore_user_registers(ctx->hdev);
8408 if (rc)
8409 gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8410
8411 return rc;
8412 }
8413
gaudi_ctx_fini(struct hl_ctx * ctx)8414 static void gaudi_ctx_fini(struct hl_ctx *ctx)
8415 {
8416 if (ctx->asid == HL_KERNEL_ASID_ID)
8417 return;
8418
8419 gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8420 }
8421
gaudi_pre_schedule_cs(struct hl_cs * cs)8422 static int gaudi_pre_schedule_cs(struct hl_cs *cs)
8423 {
8424 return 0;
8425 }
8426
gaudi_get_queue_id_for_cq(struct hl_device * hdev,u32 cq_idx)8427 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
8428 {
8429 return gaudi_cq_assignment[cq_idx];
8430 }
8431
gaudi_get_signal_cb_size(struct hl_device * hdev)8432 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
8433 {
8434 return sizeof(struct packet_msg_short) +
8435 sizeof(struct packet_msg_prot) * 2;
8436 }
8437
gaudi_get_wait_cb_size(struct hl_device * hdev)8438 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
8439 {
8440 return sizeof(struct packet_msg_short) * 4 +
8441 sizeof(struct packet_fence) +
8442 sizeof(struct packet_msg_prot) * 2;
8443 }
8444
gaudi_get_sob_addr(struct hl_device * hdev,u32 sob_id)8445 static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id)
8446 {
8447 return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4);
8448 }
8449
gaudi_gen_signal_cb(struct hl_device * hdev,void * data,u16 sob_id,u32 size,bool eb)8450 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
8451 u32 size, bool eb)
8452 {
8453 struct hl_cb *cb = (struct hl_cb *) data;
8454 struct packet_msg_short *pkt;
8455 u32 value, ctl, pkt_size = sizeof(*pkt);
8456
8457 pkt = cb->kernel_address + size;
8458 memset(pkt, 0, pkt_size);
8459
8460 /* Inc by 1, Mode ADD */
8461 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
8462 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
8463
8464 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
8465 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8466 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
8467 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8468 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);
8469 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8470 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8471
8472 pkt->value = cpu_to_le32(value);
8473 pkt->ctl = cpu_to_le32(ctl);
8474
8475 return size + pkt_size;
8476 }
8477
gaudi_add_mon_msg_short(struct packet_msg_short * pkt,u32 value,u16 addr)8478 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
8479 u16 addr)
8480 {
8481 u32 ctl, pkt_size = sizeof(*pkt);
8482
8483 memset(pkt, 0, pkt_size);
8484
8485 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
8486 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8487 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8488 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8489 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8490 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */
8491
8492 pkt->value = cpu_to_le32(value);
8493 pkt->ctl = cpu_to_le32(ctl);
8494
8495 return pkt_size;
8496 }
8497
gaudi_add_arm_monitor_pkt(struct hl_device * hdev,struct packet_msg_short * pkt,u16 sob_base,u8 sob_mask,u16 sob_val,u16 mon_id)8498 static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
8499 struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
8500 u16 sob_val, u16 mon_id)
8501 {
8502 u64 monitor_base;
8503 u32 ctl, value, pkt_size = sizeof(*pkt);
8504 u16 msg_addr_offset;
8505 u8 mask;
8506
8507 if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
8508 dev_err(hdev->dev,
8509 "sob_base %u (mask %#x) is not valid\n",
8510 sob_base, sob_mask);
8511 return 0;
8512 }
8513
8514 /*
8515 * monitor_base should be the content of the base0 address registers,
8516 * so it will be added to the msg short offsets
8517 */
8518 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8519
8520 msg_addr_offset =
8521 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
8522 monitor_base;
8523
8524 memset(pkt, 0, pkt_size);
8525
8526 /* Monitor config packet: bind the monitor to a sync object */
8527 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
8528 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
8529 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
8530 0); /* GREATER OR EQUAL*/
8531 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
8532
8533 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
8534 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8535 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8536 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8537 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8538 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8539 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8540
8541 pkt->value = cpu_to_le32(value);
8542 pkt->ctl = cpu_to_le32(ctl);
8543
8544 return pkt_size;
8545 }
8546
gaudi_add_fence_pkt(struct packet_fence * pkt)8547 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
8548 {
8549 u32 ctl, cfg, pkt_size = sizeof(*pkt);
8550
8551 memset(pkt, 0, pkt_size);
8552
8553 cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
8554 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
8555 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
8556
8557 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
8558 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8559 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8560 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8561
8562 pkt->cfg = cpu_to_le32(cfg);
8563 pkt->ctl = cpu_to_le32(ctl);
8564
8565 return pkt_size;
8566 }
8567
gaudi_get_fence_addr(struct hl_device * hdev,u32 queue_id,u64 * addr)8568 static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
8569 {
8570 u32 offset, nic_index;
8571
8572 switch (queue_id) {
8573 case GAUDI_QUEUE_ID_DMA_0_0:
8574 offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
8575 break;
8576 case GAUDI_QUEUE_ID_DMA_0_1:
8577 offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
8578 break;
8579 case GAUDI_QUEUE_ID_DMA_0_2:
8580 offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
8581 break;
8582 case GAUDI_QUEUE_ID_DMA_0_3:
8583 offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
8584 break;
8585 case GAUDI_QUEUE_ID_DMA_1_0:
8586 offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
8587 break;
8588 case GAUDI_QUEUE_ID_DMA_1_1:
8589 offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
8590 break;
8591 case GAUDI_QUEUE_ID_DMA_1_2:
8592 offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
8593 break;
8594 case GAUDI_QUEUE_ID_DMA_1_3:
8595 offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
8596 break;
8597 case GAUDI_QUEUE_ID_DMA_5_0:
8598 offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
8599 break;
8600 case GAUDI_QUEUE_ID_DMA_5_1:
8601 offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
8602 break;
8603 case GAUDI_QUEUE_ID_DMA_5_2:
8604 offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
8605 break;
8606 case GAUDI_QUEUE_ID_DMA_5_3:
8607 offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
8608 break;
8609 case GAUDI_QUEUE_ID_TPC_7_0:
8610 offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
8611 break;
8612 case GAUDI_QUEUE_ID_TPC_7_1:
8613 offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
8614 break;
8615 case GAUDI_QUEUE_ID_TPC_7_2:
8616 offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
8617 break;
8618 case GAUDI_QUEUE_ID_TPC_7_3:
8619 offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
8620 break;
8621 case GAUDI_QUEUE_ID_NIC_0_0:
8622 case GAUDI_QUEUE_ID_NIC_1_0:
8623 case GAUDI_QUEUE_ID_NIC_2_0:
8624 case GAUDI_QUEUE_ID_NIC_3_0:
8625 case GAUDI_QUEUE_ID_NIC_4_0:
8626 case GAUDI_QUEUE_ID_NIC_5_0:
8627 case GAUDI_QUEUE_ID_NIC_6_0:
8628 case GAUDI_QUEUE_ID_NIC_7_0:
8629 case GAUDI_QUEUE_ID_NIC_8_0:
8630 case GAUDI_QUEUE_ID_NIC_9_0:
8631 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
8632 offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
8633 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8634 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8635 break;
8636 case GAUDI_QUEUE_ID_NIC_0_1:
8637 case GAUDI_QUEUE_ID_NIC_1_1:
8638 case GAUDI_QUEUE_ID_NIC_2_1:
8639 case GAUDI_QUEUE_ID_NIC_3_1:
8640 case GAUDI_QUEUE_ID_NIC_4_1:
8641 case GAUDI_QUEUE_ID_NIC_5_1:
8642 case GAUDI_QUEUE_ID_NIC_6_1:
8643 case GAUDI_QUEUE_ID_NIC_7_1:
8644 case GAUDI_QUEUE_ID_NIC_8_1:
8645 case GAUDI_QUEUE_ID_NIC_9_1:
8646 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
8647 offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
8648 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8649 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8650 break;
8651 case GAUDI_QUEUE_ID_NIC_0_2:
8652 case GAUDI_QUEUE_ID_NIC_1_2:
8653 case GAUDI_QUEUE_ID_NIC_2_2:
8654 case GAUDI_QUEUE_ID_NIC_3_2:
8655 case GAUDI_QUEUE_ID_NIC_4_2:
8656 case GAUDI_QUEUE_ID_NIC_5_2:
8657 case GAUDI_QUEUE_ID_NIC_6_2:
8658 case GAUDI_QUEUE_ID_NIC_7_2:
8659 case GAUDI_QUEUE_ID_NIC_8_2:
8660 case GAUDI_QUEUE_ID_NIC_9_2:
8661 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
8662 offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
8663 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8664 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8665 break;
8666 case GAUDI_QUEUE_ID_NIC_0_3:
8667 case GAUDI_QUEUE_ID_NIC_1_3:
8668 case GAUDI_QUEUE_ID_NIC_2_3:
8669 case GAUDI_QUEUE_ID_NIC_3_3:
8670 case GAUDI_QUEUE_ID_NIC_4_3:
8671 case GAUDI_QUEUE_ID_NIC_5_3:
8672 case GAUDI_QUEUE_ID_NIC_6_3:
8673 case GAUDI_QUEUE_ID_NIC_7_3:
8674 case GAUDI_QUEUE_ID_NIC_8_3:
8675 case GAUDI_QUEUE_ID_NIC_9_3:
8676 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
8677 offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
8678 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8679 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8680 break;
8681 default:
8682 return -EINVAL;
8683 }
8684
8685 *addr = CFG_BASE + offset;
8686
8687 return 0;
8688 }
8689
gaudi_add_mon_pkts(void * buf,u16 mon_id,u64 fence_addr)8690 static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
8691 {
8692 u64 monitor_base;
8693 u32 size = 0;
8694 u16 msg_addr_offset;
8695
8696 /*
8697 * monitor_base should be the content of the base0 address registers,
8698 * so it will be added to the msg short offsets
8699 */
8700 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8701
8702 /* First monitor config packet: low address of the sync */
8703 msg_addr_offset =
8704 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
8705 monitor_base;
8706
8707 size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
8708 msg_addr_offset);
8709
8710 /* Second monitor config packet: high address of the sync */
8711 msg_addr_offset =
8712 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
8713 monitor_base;
8714
8715 size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
8716 msg_addr_offset);
8717
8718 /*
8719 * Third monitor config packet: the payload, i.e. what to write when the
8720 * sync triggers
8721 */
8722 msg_addr_offset =
8723 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
8724 monitor_base;
8725
8726 size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
8727
8728 return size;
8729 }
8730
gaudi_gen_wait_cb(struct hl_device * hdev,struct hl_gen_wait_properties * prop)8731 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
8732 struct hl_gen_wait_properties *prop)
8733 {
8734 struct hl_cb *cb = (struct hl_cb *) prop->data;
8735 void *buf = cb->kernel_address;
8736 u64 fence_addr = 0;
8737 u32 size = prop->size;
8738
8739 if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
8740 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
8741 prop->q_idx);
8742 return 0;
8743 }
8744
8745 size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
8746 size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
8747 prop->sob_mask, prop->sob_val, prop->mon_id);
8748 size += gaudi_add_fence_pkt(buf + size);
8749
8750 return size;
8751 }
8752
gaudi_reset_sob(struct hl_device * hdev,void * data)8753 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
8754 {
8755 struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
8756
8757 dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
8758 hw_sob->sob_id);
8759
8760 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
8761 hw_sob->sob_id * 4, 0);
8762
8763 kref_init(&hw_sob->kref);
8764 }
8765
gaudi_get_device_time(struct hl_device * hdev)8766 static u64 gaudi_get_device_time(struct hl_device *hdev)
8767 {
8768 u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
8769
8770 return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
8771 }
8772
gaudi_get_hw_block_id(struct hl_device * hdev,u64 block_addr,u32 * block_size,u32 * block_id)8773 static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
8774 u32 *block_size, u32 *block_id)
8775 {
8776 return -EPERM;
8777 }
8778
gaudi_block_mmap(struct hl_device * hdev,struct vm_area_struct * vma,u32 block_id,u32 block_size)8779 static int gaudi_block_mmap(struct hl_device *hdev,
8780 struct vm_area_struct *vma,
8781 u32 block_id, u32 block_size)
8782 {
8783 return -EPERM;
8784 }
8785
gaudi_enable_events_from_fw(struct hl_device * hdev)8786 static void gaudi_enable_events_from_fw(struct hl_device *hdev)
8787 {
8788 struct cpu_dyn_regs *dyn_regs =
8789 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
8790 u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
8791 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
8792 le32_to_cpu(dyn_regs->gic_host_ints_irq);
8793
8794 WREG32(irq_handler_offset,
8795 gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id);
8796 }
8797
gaudi_ack_mmu_page_fault_or_access_error(struct hl_device * hdev,u64 mmu_cap_mask)8798 static int gaudi_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask)
8799 {
8800 return -EINVAL;
8801 }
8802
gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)8803 static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)
8804 {
8805 switch (pll_idx) {
8806 case HL_GAUDI_CPU_PLL: return CPU_PLL;
8807 case HL_GAUDI_PCI_PLL: return PCI_PLL;
8808 case HL_GAUDI_NIC_PLL: return NIC_PLL;
8809 case HL_GAUDI_DMA_PLL: return DMA_PLL;
8810 case HL_GAUDI_MESH_PLL: return MESH_PLL;
8811 case HL_GAUDI_MME_PLL: return MME_PLL;
8812 case HL_GAUDI_TPC_PLL: return TPC_PLL;
8813 case HL_GAUDI_IF_PLL: return IF_PLL;
8814 case HL_GAUDI_SRAM_PLL: return SRAM_PLL;
8815 case HL_GAUDI_HBM_PLL: return HBM_PLL;
8816 default: return -EINVAL;
8817 }
8818 }
8819
gaudi_add_sync_to_engine_map_entry(struct hl_sync_to_engine_map * map,u32 reg_value,enum hl_sync_engine_type engine_type,u32 engine_id)8820 static int gaudi_add_sync_to_engine_map_entry(
8821 struct hl_sync_to_engine_map *map, u32 reg_value,
8822 enum hl_sync_engine_type engine_type, u32 engine_id)
8823 {
8824 struct hl_sync_to_engine_map_entry *entry;
8825
8826 /* Reg value represents a partial address of sync object,
8827 * it is used as unique identifier. For this we need to
8828 * clear the cutoff cfg base bits from the value.
8829 */
8830 if (reg_value == 0 || reg_value == 0xffffffff)
8831 return 0;
8832 reg_value -= lower_32_bits(CFG_BASE);
8833
8834 /* create a new hash entry */
8835 entry = kzalloc(sizeof(*entry), GFP_KERNEL);
8836 if (!entry)
8837 return -ENOMEM;
8838 entry->engine_type = engine_type;
8839 entry->engine_id = engine_id;
8840 entry->sync_id = reg_value;
8841 hash_add(map->tb, &entry->node, reg_value);
8842
8843 return 0;
8844 }
8845
gaudi_gen_sync_to_engine_map(struct hl_device * hdev,struct hl_sync_to_engine_map * map)8846 static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,
8847 struct hl_sync_to_engine_map *map)
8848 {
8849 struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
8850 int i, j, rc;
8851 u32 reg_value;
8852
8853 /* Iterate over TPC engines */
8854 for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) {
8855
8856 reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] +
8857 sds->props[SP_NEXT_TPC] * i);
8858
8859 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
8860 ENGINE_TPC, i);
8861 if (rc)
8862 goto free_sync_to_engine_map;
8863 }
8864
8865 /* Iterate over MME engines */
8866 for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) {
8867 for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) {
8868
8869 reg_value = RREG32(sds->props[SP_MME_CFG_SO] +
8870 sds->props[SP_NEXT_MME] * i +
8871 j * sizeof(u32));
8872
8873 rc = gaudi_add_sync_to_engine_map_entry(
8874 map, reg_value, ENGINE_MME,
8875 i * sds->props[SP_SUB_MME_ENG_NUM] + j);
8876 if (rc)
8877 goto free_sync_to_engine_map;
8878 }
8879 }
8880
8881 /* Iterate over DMA engines */
8882 for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) {
8883 reg_value = RREG32(sds->props[SP_DMA_CFG_SO] +
8884 sds->props[SP_DMA_QUEUES_OFFSET] * i);
8885 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
8886 ENGINE_DMA, i);
8887 if (rc)
8888 goto free_sync_to_engine_map;
8889 }
8890
8891 return 0;
8892
8893 free_sync_to_engine_map:
8894 hl_state_dump_free_sync_to_engine_map(map);
8895
8896 return rc;
8897 }
8898
gaudi_monitor_valid(struct hl_mon_state_dump * mon)8899 static int gaudi_monitor_valid(struct hl_mon_state_dump *mon)
8900 {
8901 return FIELD_GET(
8902 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK,
8903 mon->status);
8904 }
8905
gaudi_fill_sobs_from_mon(char * sobs,struct hl_mon_state_dump * mon)8906 static void gaudi_fill_sobs_from_mon(char *sobs, struct hl_mon_state_dump *mon)
8907 {
8908 const size_t max_write = 10;
8909 u32 gid, mask, sob;
8910 int i, offset;
8911
8912 /* Sync object ID is calculated as follows:
8913 * (8 * group_id + cleared bits in mask)
8914 */
8915 gid = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
8916 mon->arm_data);
8917 mask = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
8918 mon->arm_data);
8919
8920 for (i = 0, offset = 0; mask && offset < MONITOR_SOB_STRING_SIZE -
8921 max_write; mask >>= 1, i++) {
8922 if (!(mask & 1)) {
8923 sob = gid * MONITOR_MAX_SOBS + i;
8924
8925 if (offset > 0)
8926 offset += snprintf(sobs + offset, max_write,
8927 ", ");
8928
8929 offset += snprintf(sobs + offset, max_write, "%u", sob);
8930 }
8931 }
8932 }
8933
gaudi_print_single_monitor(char ** buf,size_t * size,size_t * offset,struct hl_device * hdev,struct hl_mon_state_dump * mon)8934 static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset,
8935 struct hl_device *hdev,
8936 struct hl_mon_state_dump *mon)
8937 {
8938 const char *name;
8939 char scratch_buf1[BIN_REG_STRING_SIZE],
8940 scratch_buf2[BIN_REG_STRING_SIZE];
8941 char monitored_sobs[MONITOR_SOB_STRING_SIZE] = {0};
8942
8943 name = hl_state_dump_get_monitor_name(hdev, mon);
8944 if (!name)
8945 name = "";
8946
8947 gaudi_fill_sobs_from_mon(monitored_sobs, mon);
8948
8949 return hl_snprintf_resize(
8950 buf, size, offset,
8951 "Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s. Means sync objects [%s] are being monitored.",
8952 mon->id, name,
8953 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
8954 mon->arm_data),
8955 hl_format_as_binary(
8956 scratch_buf1, sizeof(scratch_buf1),
8957 FIELD_GET(
8958 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
8959 mon->arm_data)),
8960 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK,
8961 mon->arm_data),
8962 mon->wr_data,
8963 (((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low,
8964 hl_format_as_binary(
8965 scratch_buf2, sizeof(scratch_buf2),
8966 FIELD_GET(
8967 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK,
8968 mon->status)),
8969 monitored_sobs);
8970 }
8971
8972
gaudi_print_fences_single_engine(struct hl_device * hdev,u64 base_offset,u64 status_base_offset,enum hl_sync_engine_type engine_type,u32 engine_id,char ** buf,size_t * size,size_t * offset)8973 static int gaudi_print_fences_single_engine(
8974 struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
8975 enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
8976 size_t *size, size_t *offset)
8977 {
8978 struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
8979 int rc = -ENOMEM, i;
8980 u32 *statuses, *fences;
8981
8982 statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES],
8983 sizeof(*statuses), GFP_KERNEL);
8984 if (!statuses)
8985 goto out;
8986
8987 fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] *
8988 sds->props[SP_ENGINE_NUM_OF_QUEUES],
8989 sizeof(*fences), GFP_KERNEL);
8990 if (!fences)
8991 goto free_status;
8992
8993 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i)
8994 statuses[i] = RREG32(status_base_offset + i * sizeof(u32));
8995
8996 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] *
8997 sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i)
8998 fences[i] = RREG32(base_offset + i * sizeof(u32));
8999
9000 /* The actual print */
9001 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) {
9002 u32 fence_id;
9003 u64 fence_cnt, fence_rdata;
9004 const char *engine_name;
9005
9006 if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK,
9007 statuses[i]))
9008 continue;
9009
9010 fence_id =
9011 FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]);
9012 fence_cnt = base_offset + CFG_BASE +
9013 sizeof(u32) *
9014 (i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]);
9015 fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] +
9016 sds->props[SP_FENCE0_RDATA_OFFSET];
9017 engine_name = hl_sync_engine_to_string(engine_type);
9018
9019 rc = hl_snprintf_resize(
9020 buf, size, offset,
9021 "%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n",
9022 engine_name, engine_id,
9023 i, fence_id,
9024 fence_cnt, engine_name, engine_id, fence_id, i,
9025 fence_rdata, engine_name, engine_id, fence_id, i,
9026 fences[fence_id],
9027 statuses[i]);
9028 if (rc)
9029 goto free_fences;
9030 }
9031
9032 rc = 0;
9033
9034 free_fences:
9035 kfree(fences);
9036 free_status:
9037 kfree(statuses);
9038 out:
9039 return rc;
9040 }
9041
9042
9043 static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = {
9044 .monitor_valid = gaudi_monitor_valid,
9045 .print_single_monitor = gaudi_print_single_monitor,
9046 .gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map,
9047 .print_fences_single_engine = gaudi_print_fences_single_engine,
9048 };
9049
gaudi_state_dump_init(struct hl_device * hdev)9050 static void gaudi_state_dump_init(struct hl_device *hdev)
9051 {
9052 struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9053 int i;
9054
9055 for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i)
9056 hash_add(sds->so_id_to_str_tb,
9057 &gaudi_so_id_to_str[i].node,
9058 gaudi_so_id_to_str[i].id);
9059
9060 for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i)
9061 hash_add(sds->monitor_id_to_str_tb,
9062 &gaudi_monitor_id_to_str[i].node,
9063 gaudi_monitor_id_to_str[i].id);
9064
9065 sds->props = gaudi_state_dump_specs_props;
9066
9067 sds->sync_namager_names = gaudi_sync_manager_names;
9068
9069 sds->funcs = gaudi_state_dump_funcs;
9070 }
9071
gaudi_get_stream_master_qid_arr(void)9072 static u32 *gaudi_get_stream_master_qid_arr(void)
9073 {
9074 return gaudi_stream_master;
9075 }
9076
gaudi_set_dram_properties(struct hl_device * hdev)9077 static int gaudi_set_dram_properties(struct hl_device *hdev)
9078 {
9079 return 0;
9080 }
9081
gaudi_set_binning_masks(struct hl_device * hdev)9082 static int gaudi_set_binning_masks(struct hl_device *hdev)
9083 {
9084 return 0;
9085 }
9086
gaudi_check_if_razwi_happened(struct hl_device * hdev)9087 static void gaudi_check_if_razwi_happened(struct hl_device *hdev)
9088 {
9089 }
9090
infineon_ver_show(struct device * dev,struct device_attribute * attr,char * buf)9091 static ssize_t infineon_ver_show(struct device *dev, struct device_attribute *attr, char *buf)
9092 {
9093 struct hl_device *hdev = dev_get_drvdata(dev);
9094 struct cpucp_info *cpucp_info;
9095
9096 cpucp_info = &hdev->asic_prop.cpucp_info;
9097
9098 return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version));
9099 }
9100
9101 static DEVICE_ATTR_RO(infineon_ver);
9102
9103 static struct attribute *gaudi_vrm_dev_attrs[] = {
9104 &dev_attr_infineon_ver.attr,
9105 NULL,
9106 };
9107
gaudi_add_device_attr(struct hl_device * hdev,struct attribute_group * dev_clk_attr_grp,struct attribute_group * dev_vrm_attr_grp)9108 static void gaudi_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
9109 struct attribute_group *dev_vrm_attr_grp)
9110 {
9111 hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp);
9112 dev_vrm_attr_grp->attrs = gaudi_vrm_dev_attrs;
9113 }
9114
gaudi_send_device_activity(struct hl_device * hdev,bool open)9115 static int gaudi_send_device_activity(struct hl_device *hdev, bool open)
9116 {
9117 return 0;
9118 }
9119
9120 static const struct hl_asic_funcs gaudi_funcs = {
9121 .early_init = gaudi_early_init,
9122 .early_fini = gaudi_early_fini,
9123 .late_init = gaudi_late_init,
9124 .late_fini = gaudi_late_fini,
9125 .sw_init = gaudi_sw_init,
9126 .sw_fini = gaudi_sw_fini,
9127 .hw_init = gaudi_hw_init,
9128 .hw_fini = gaudi_hw_fini,
9129 .halt_engines = gaudi_halt_engines,
9130 .suspend = gaudi_suspend,
9131 .resume = gaudi_resume,
9132 .mmap = gaudi_mmap,
9133 .ring_doorbell = gaudi_ring_doorbell,
9134 .pqe_write = gaudi_pqe_write,
9135 .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
9136 .asic_dma_free_coherent = gaudi_dma_free_coherent,
9137 .scrub_device_mem = gaudi_scrub_device_mem,
9138 .scrub_device_dram = gaudi_scrub_device_dram,
9139 .get_int_queue_base = gaudi_get_int_queue_base,
9140 .test_queues = gaudi_test_queues,
9141 .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
9142 .asic_dma_pool_free = gaudi_dma_pool_free,
9143 .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
9144 .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
9145 .dma_unmap_sgtable = hl_asic_dma_unmap_sgtable,
9146 .cs_parser = gaudi_cs_parser,
9147 .dma_map_sgtable = hl_asic_dma_map_sgtable,
9148 .add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
9149 .update_eq_ci = gaudi_update_eq_ci,
9150 .context_switch = gaudi_context_switch,
9151 .restore_phase_topology = gaudi_restore_phase_topology,
9152 .debugfs_read_dma = gaudi_debugfs_read_dma,
9153 .add_device_attr = gaudi_add_device_attr,
9154 .handle_eqe = gaudi_handle_eqe,
9155 .get_events_stat = gaudi_get_events_stat,
9156 .read_pte = gaudi_read_pte,
9157 .write_pte = gaudi_write_pte,
9158 .mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
9159 .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
9160 .mmu_prefetch_cache_range = NULL,
9161 .send_heartbeat = gaudi_send_heartbeat,
9162 .debug_coresight = gaudi_debug_coresight,
9163 .is_device_idle = gaudi_is_device_idle,
9164 .compute_reset_late_init = gaudi_compute_reset_late_init,
9165 .hw_queues_lock = gaudi_hw_queues_lock,
9166 .hw_queues_unlock = gaudi_hw_queues_unlock,
9167 .get_pci_id = gaudi_get_pci_id,
9168 .get_eeprom_data = gaudi_get_eeprom_data,
9169 .get_monitor_dump = gaudi_get_monitor_dump,
9170 .send_cpu_message = gaudi_send_cpu_message,
9171 .pci_bars_map = gaudi_pci_bars_map,
9172 .init_iatu = gaudi_init_iatu,
9173 .rreg = hl_rreg,
9174 .wreg = hl_wreg,
9175 .halt_coresight = gaudi_halt_coresight,
9176 .ctx_init = gaudi_ctx_init,
9177 .ctx_fini = gaudi_ctx_fini,
9178 .pre_schedule_cs = gaudi_pre_schedule_cs,
9179 .get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
9180 .load_firmware_to_device = gaudi_load_firmware_to_device,
9181 .load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
9182 .get_signal_cb_size = gaudi_get_signal_cb_size,
9183 .get_wait_cb_size = gaudi_get_wait_cb_size,
9184 .gen_signal_cb = gaudi_gen_signal_cb,
9185 .gen_wait_cb = gaudi_gen_wait_cb,
9186 .reset_sob = gaudi_reset_sob,
9187 .reset_sob_group = gaudi_reset_sob_group,
9188 .get_device_time = gaudi_get_device_time,
9189 .pb_print_security_errors = NULL,
9190 .collective_wait_init_cs = gaudi_collective_wait_init_cs,
9191 .collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
9192 .get_dec_base_addr = NULL,
9193 .scramble_addr = hl_mmu_scramble_addr,
9194 .descramble_addr = hl_mmu_descramble_addr,
9195 .ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
9196 .get_hw_block_id = gaudi_get_hw_block_id,
9197 .hw_block_mmap = gaudi_block_mmap,
9198 .enable_events_from_fw = gaudi_enable_events_from_fw,
9199 .ack_mmu_errors = gaudi_ack_mmu_page_fault_or_access_error,
9200 .map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx,
9201 .init_firmware_preload_params = gaudi_init_firmware_preload_params,
9202 .init_firmware_loader = gaudi_init_firmware_loader,
9203 .init_cpu_scrambler_dram = gaudi_init_scrambler_hbm,
9204 .state_dump_init = gaudi_state_dump_init,
9205 .get_sob_addr = gaudi_get_sob_addr,
9206 .set_pci_memory_regions = gaudi_set_pci_memory_regions,
9207 .get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr,
9208 .check_if_razwi_happened = gaudi_check_if_razwi_happened,
9209 .mmu_get_real_page_size = hl_mmu_get_real_page_size,
9210 .access_dev_mem = hl_access_dev_mem,
9211 .set_dram_bar_base = gaudi_set_hbm_bar_base,
9212 .send_device_activity = gaudi_send_device_activity,
9213 .set_dram_properties = gaudi_set_dram_properties,
9214 .set_binning_masks = gaudi_set_binning_masks,
9215 };
9216
9217 /**
9218 * gaudi_set_asic_funcs - set GAUDI function pointers
9219 *
9220 * @hdev: pointer to hl_device structure
9221 *
9222 */
gaudi_set_asic_funcs(struct hl_device * hdev)9223 void gaudi_set_asic_funcs(struct hl_device *hdev)
9224 {
9225 hdev->asic_funcs = &gaudi_funcs;
9226 }
9227