1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4 * Copyright 2016-2020 HabanaLabs, Ltd.
5 * All Rights Reserved.
6 */
7
8 #include "gaudiP.h"
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 #include "../include/hw_ip/mmu/mmu_v1_1.h"
11 #include "../include/gaudi/gaudi_masks.h"
12 #include "../include/gaudi/gaudi_fw_if.h"
13 #include "../include/gaudi/gaudi_reg_map.h"
14 #include "../include/gaudi/gaudi_async_ids_map_extended.h"
15
16 #include <linux/module.h>
17 #include <linux/pci.h>
18 #include <linux/firmware.h>
19 #include <linux/hwmon.h>
20 #include <linux/iommu.h>
21 #include <linux/seq_file.h>
22
23 /*
24 * Gaudi security scheme:
25 *
26 * 1. Host is protected by:
27 * - Range registers
28 * - MMU
29 *
30 * 2. DDR is protected by:
31 * - Range registers (protect the first 512MB)
32 *
33 * 3. Configuration is protected by:
34 * - Range registers
35 * - Protection bits
36 *
37 * MMU is always enabled.
38 *
39 * QMAN DMA channels 0,1 (PCI DMAN):
40 * - DMA is not secured.
41 * - PQ and CQ are secured.
42 * - CP is secured: The driver needs to parse CB but WREG should be allowed
43 * because of TDMA (tensor DMA). Hence, WREG is always not
44 * secured.
45 *
46 * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
47 * channel 0 to be secured, execute the DMA and change it back to not secured.
48 * Currently, the driver doesn't use the DMA while there are compute jobs
49 * running.
50 *
51 * The current use cases for the driver to use the DMA are:
52 * - Clear SRAM on context switch (happens on context switch when device is
53 * idle)
54 * - MMU page tables area clear (happens on init)
55 *
56 * QMAN DMA 2-7, TPC, MME, NIC:
57 * PQ is secured and is located on the Host (HBM CON TPC3 bug)
58 * CQ, CP and the engine are not secured
59 *
60 */
61
62 #define GAUDI_BOOT_FIT_FILE "habanalabs/gaudi/gaudi-boot-fit.itb"
63 #define GAUDI_LINUX_FW_FILE "habanalabs/gaudi/gaudi-fit.itb"
64 #define GAUDI_TPC_FW_FILE "habanalabs/gaudi/gaudi_tpc.bin"
65
66 #define GAUDI_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */
67
68 #define GAUDI_RESET_TIMEOUT_MSEC 2000 /* 2000ms */
69 #define GAUDI_RESET_WAIT_MSEC 1 /* 1ms */
70 #define GAUDI_CPU_RESET_WAIT_MSEC 200 /* 200ms */
71 #define GAUDI_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */
72
73 #define GAUDI_PLDM_RESET_WAIT_MSEC 1000 /* 1s */
74 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC 20000 /* 20s */
75 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */
76 #define GAUDI_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100)
77 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
78 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
79 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 1000000 /* 1s */
80 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */
81
82 #define GAUDI_QMAN0_FENCE_VAL 0x72E91AB9
83
84 #define GAUDI_MAX_STRING_LEN 20
85
86 #define GAUDI_CB_POOL_CB_CNT 512
87 #define GAUDI_CB_POOL_CB_SIZE 0x20000 /* 128KB */
88
89 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT 3
90
91 #define GAUDI_NUM_OF_TPC_INTR_CAUSE 20
92
93 #define GAUDI_NUM_OF_QM_ERR_CAUSE 16
94
95 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3
96
97 #define GAUDI_ARB_WDT_TIMEOUT 0x1000000
98
99 #define GAUDI_CLK_GATE_DEBUGFS_MASK (\
100 BIT(GAUDI_ENGINE_ID_MME_0) |\
101 BIT(GAUDI_ENGINE_ID_MME_2) |\
102 GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0))
103
104 #define HBM_SCRUBBING_TIMEOUT_US 1000000 /* 1s */
105
106 #define GAUDI_PLL_MAX 10
107
108 /*
109 * this enum kept here for compatibility with old FW (in which each asic has
110 * unique PLL numbering
111 */
112 enum gaudi_pll_index {
113 GAUDI_CPU_PLL = 0,
114 GAUDI_PCI_PLL,
115 GAUDI_SRAM_PLL,
116 GAUDI_HBM_PLL,
117 GAUDI_NIC_PLL,
118 GAUDI_DMA_PLL,
119 GAUDI_MESH_PLL,
120 GAUDI_MME_PLL,
121 GAUDI_TPC_PLL,
122 GAUDI_IF_PLL,
123 };
124
125 static enum pll_index gaudi_pll_map[PLL_MAX] = {
126 [CPU_PLL] = GAUDI_CPU_PLL,
127 [PCI_PLL] = GAUDI_PCI_PLL,
128 [SRAM_PLL] = GAUDI_SRAM_PLL,
129 [HBM_PLL] = GAUDI_HBM_PLL,
130 [NIC_PLL] = GAUDI_NIC_PLL,
131 [DMA_PLL] = GAUDI_DMA_PLL,
132 [MESH_PLL] = GAUDI_MESH_PLL,
133 [MME_PLL] = GAUDI_MME_PLL,
134 [TPC_PLL] = GAUDI_TPC_PLL,
135 [IF_PLL] = GAUDI_IF_PLL,
136 };
137
138 static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
139 "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
140 "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
141 "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
142 "gaudi cpu eq"
143 };
144
145 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
146 [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
147 [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
148 [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
149 [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
150 [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
151 [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
152 [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
153 [GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
154 };
155
156 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
157 [0] = GAUDI_QUEUE_ID_DMA_0_0,
158 [1] = GAUDI_QUEUE_ID_DMA_0_1,
159 [2] = GAUDI_QUEUE_ID_DMA_0_2,
160 [3] = GAUDI_QUEUE_ID_DMA_0_3,
161 [4] = GAUDI_QUEUE_ID_DMA_1_0,
162 [5] = GAUDI_QUEUE_ID_DMA_1_1,
163 [6] = GAUDI_QUEUE_ID_DMA_1_2,
164 [7] = GAUDI_QUEUE_ID_DMA_1_3,
165 };
166
167 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
168 [PACKET_WREG_32] = sizeof(struct packet_wreg32),
169 [PACKET_WREG_BULK] = sizeof(struct packet_wreg_bulk),
170 [PACKET_MSG_LONG] = sizeof(struct packet_msg_long),
171 [PACKET_MSG_SHORT] = sizeof(struct packet_msg_short),
172 [PACKET_CP_DMA] = sizeof(struct packet_cp_dma),
173 [PACKET_REPEAT] = sizeof(struct packet_repeat),
174 [PACKET_MSG_PROT] = sizeof(struct packet_msg_prot),
175 [PACKET_FENCE] = sizeof(struct packet_fence),
176 [PACKET_LIN_DMA] = sizeof(struct packet_lin_dma),
177 [PACKET_NOP] = sizeof(struct packet_nop),
178 [PACKET_STOP] = sizeof(struct packet_stop),
179 [PACKET_ARB_POINT] = sizeof(struct packet_arb_point),
180 [PACKET_WAIT] = sizeof(struct packet_wait),
181 [PACKET_LOAD_AND_EXE] = sizeof(struct packet_load_and_exe)
182 };
183
validate_packet_id(enum packet_id id)184 static inline bool validate_packet_id(enum packet_id id)
185 {
186 switch (id) {
187 case PACKET_WREG_32:
188 case PACKET_WREG_BULK:
189 case PACKET_MSG_LONG:
190 case PACKET_MSG_SHORT:
191 case PACKET_CP_DMA:
192 case PACKET_REPEAT:
193 case PACKET_MSG_PROT:
194 case PACKET_FENCE:
195 case PACKET_LIN_DMA:
196 case PACKET_NOP:
197 case PACKET_STOP:
198 case PACKET_ARB_POINT:
199 case PACKET_WAIT:
200 case PACKET_LOAD_AND_EXE:
201 return true;
202 default:
203 return false;
204 }
205 }
206
207 static const char * const
208 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
209 "tpc_address_exceed_slm",
210 "tpc_div_by_0",
211 "tpc_spu_mac_overflow",
212 "tpc_spu_addsub_overflow",
213 "tpc_spu_abs_overflow",
214 "tpc_spu_fp_dst_nan_inf",
215 "tpc_spu_fp_dst_denorm",
216 "tpc_vpu_mac_overflow",
217 "tpc_vpu_addsub_overflow",
218 "tpc_vpu_abs_overflow",
219 "tpc_vpu_fp_dst_nan_inf",
220 "tpc_vpu_fp_dst_denorm",
221 "tpc_assertions",
222 "tpc_illegal_instruction",
223 "tpc_pc_wrap_around",
224 "tpc_qm_sw_err",
225 "tpc_hbw_rresp_err",
226 "tpc_hbw_bresp_err",
227 "tpc_lbw_rresp_err",
228 "tpc_lbw_bresp_err"
229 };
230
231 static const char * const
232 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
233 "PQ AXI HBW error",
234 "CQ AXI HBW error",
235 "CP AXI HBW error",
236 "CP error due to undefined OPCODE",
237 "CP encountered STOP OPCODE",
238 "CP AXI LBW error",
239 "CP WRREG32 or WRBULK returned error",
240 "N/A",
241 "FENCE 0 inc over max value and clipped",
242 "FENCE 1 inc over max value and clipped",
243 "FENCE 2 inc over max value and clipped",
244 "FENCE 3 inc over max value and clipped",
245 "FENCE 0 dec under min value and clipped",
246 "FENCE 1 dec under min value and clipped",
247 "FENCE 2 dec under min value and clipped",
248 "FENCE 3 dec under min value and clipped"
249 };
250
251 static const char * const
252 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
253 "Choice push while full error",
254 "Choice Q watchdog error",
255 "MSG AXI LBW returned with error"
256 };
257
258 enum gaudi_sm_sei_cause {
259 GAUDI_SM_SEI_SO_OVERFLOW,
260 GAUDI_SM_SEI_LBW_4B_UNALIGNED,
261 GAUDI_SM_SEI_AXI_RESPONSE_ERR
262 };
263
264 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
265 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
266 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
267 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
268 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
269 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
270 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
271 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
272 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
273 QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
274 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
275 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
276 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
277 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
278 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
279 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
280 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
281 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
282 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
283 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
284 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
285 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
286 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
287 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
288 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
289 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
290 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
291 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
292 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
293 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
294 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
295 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
296 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
297 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
298 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
299 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
300 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
301 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
302 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
303 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
304 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
305 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
306 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
307 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
308 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
309 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
310 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
311 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
312 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
313 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
314 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
315 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
316 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
317 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
318 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
319 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
320 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
321 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
322 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
323 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
324 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
325 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
326 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
327 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
328 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
329 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
330 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
331 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
332 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
333 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
334 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
335 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
336 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
337 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
338 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
339 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
340 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
341 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
342 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
343 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
344 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
345 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
346 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
347 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
348 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
349 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
350 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
351 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
352 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
353 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
354 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
355 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
356 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
357 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
358 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
359 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
360 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
361 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
362 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
363 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
364 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
365 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
366 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
367 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
368 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
369 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
370 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
371 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
372 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
373 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
374 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
375 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
376 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
377 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
378 };
379
380 struct ecc_info_extract_params {
381 u64 block_address;
382 u32 num_memories;
383 bool derr;
384 bool disable_clock_gating;
385 };
386
387 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
388 u64 phys_addr);
389 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
390 struct hl_cs_job *job);
391 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
392 u32 size, u64 val);
393 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
394 u32 num_regs, u32 val);
395 static int gaudi_schedule_register_memset(struct hl_device *hdev,
396 u32 hw_queue_id, u64 reg_base, u32 num_regs, u32 val);
397 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
398 u32 tpc_id);
399 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
400 static int gaudi_cpucp_info_get(struct hl_device *hdev);
401 static void gaudi_disable_clock_gating(struct hl_device *hdev);
402 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
403 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
404 u32 size, bool eb);
405 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
406 struct hl_gen_wait_properties *prop);
407
408 static inline enum hl_collective_mode
get_collective_mode(struct hl_device * hdev,u32 queue_id)409 get_collective_mode(struct hl_device *hdev, u32 queue_id)
410 {
411 if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
412 return HL_COLLECTIVE_MASTER;
413
414 if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
415 queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
416 return HL_COLLECTIVE_SLAVE;
417
418 if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
419 queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
420 return HL_COLLECTIVE_SLAVE;
421
422 if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
423 queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
424 return HL_COLLECTIVE_SLAVE;
425
426 return HL_COLLECTIVE_NOT_SUPPORTED;
427 }
428
set_default_power_values(struct hl_device * hdev)429 static inline void set_default_power_values(struct hl_device *hdev)
430 {
431 struct asic_fixed_properties *prop = &hdev->asic_prop;
432
433 if (hdev->card_type == cpucp_card_type_pmc) {
434 prop->max_power_default = MAX_POWER_DEFAULT_PMC;
435 prop->dc_power_default = DC_POWER_DEFAULT_PMC;
436 } else {
437 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
438 prop->dc_power_default = DC_POWER_DEFAULT_PCI;
439 }
440 }
441
gaudi_get_fixed_properties(struct hl_device * hdev)442 static int gaudi_get_fixed_properties(struct hl_device *hdev)
443 {
444 struct asic_fixed_properties *prop = &hdev->asic_prop;
445 u32 num_sync_stream_queues = 0;
446 int i;
447
448 prop->max_queues = GAUDI_QUEUE_ID_SIZE;
449 prop->hw_queues_props = kcalloc(prop->max_queues,
450 sizeof(struct hw_queue_properties),
451 GFP_KERNEL);
452
453 if (!prop->hw_queues_props)
454 return -ENOMEM;
455
456 for (i = 0 ; i < prop->max_queues ; i++) {
457 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
458 prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
459 prop->hw_queues_props[i].driver_only = 0;
460 prop->hw_queues_props[i].supports_sync_stream = 1;
461 prop->hw_queues_props[i].cb_alloc_flags =
462 CB_ALLOC_KERNEL;
463 num_sync_stream_queues++;
464 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
465 prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
466 prop->hw_queues_props[i].driver_only = 1;
467 prop->hw_queues_props[i].supports_sync_stream = 0;
468 prop->hw_queues_props[i].cb_alloc_flags =
469 CB_ALLOC_KERNEL;
470 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
471 prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
472 prop->hw_queues_props[i].driver_only = 0;
473 prop->hw_queues_props[i].supports_sync_stream = 0;
474 prop->hw_queues_props[i].cb_alloc_flags =
475 CB_ALLOC_USER;
476
477 }
478 prop->hw_queues_props[i].collective_mode =
479 get_collective_mode(hdev, i);
480 }
481
482 prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
483 prop->collective_first_sob = 0;
484 prop->collective_first_mon = 0;
485
486 /* 2 SOBs per internal queue stream are reserved for collective */
487 prop->sync_stream_first_sob =
488 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
489 * QMAN_STREAMS * HL_RSVD_SOBS;
490
491 /* 1 monitor per internal queue stream are reserved for collective
492 * 2 monitors per external queue stream are reserved for collective
493 */
494 prop->sync_stream_first_mon =
495 (NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
496 (NUMBER_OF_EXT_HW_QUEUES * 2);
497
498 prop->dram_base_address = DRAM_PHYS_BASE;
499 prop->dram_size = GAUDI_HBM_SIZE_32GB;
500 prop->dram_end_address = prop->dram_base_address +
501 prop->dram_size;
502 prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
503
504 prop->sram_base_address = SRAM_BASE_ADDR;
505 prop->sram_size = SRAM_SIZE;
506 prop->sram_end_address = prop->sram_base_address +
507 prop->sram_size;
508 prop->sram_user_base_address = prop->sram_base_address +
509 SRAM_USER_BASE_OFFSET;
510
511 prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
512 if (hdev->pldm)
513 prop->mmu_pgt_size = 0x800000; /* 8MB */
514 else
515 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
516 prop->mmu_pte_size = HL_PTE_SIZE;
517 prop->mmu_hop_table_size = HOP_TABLE_SIZE;
518 prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
519 prop->dram_page_size = PAGE_SIZE_2MB;
520 prop->dram_supports_virtual_memory = false;
521
522 prop->pmmu.hop0_shift = HOP0_SHIFT;
523 prop->pmmu.hop1_shift = HOP1_SHIFT;
524 prop->pmmu.hop2_shift = HOP2_SHIFT;
525 prop->pmmu.hop3_shift = HOP3_SHIFT;
526 prop->pmmu.hop4_shift = HOP4_SHIFT;
527 prop->pmmu.hop0_mask = HOP0_MASK;
528 prop->pmmu.hop1_mask = HOP1_MASK;
529 prop->pmmu.hop2_mask = HOP2_MASK;
530 prop->pmmu.hop3_mask = HOP3_MASK;
531 prop->pmmu.hop4_mask = HOP4_MASK;
532 prop->pmmu.start_addr = VA_HOST_SPACE_START;
533 prop->pmmu.end_addr =
534 (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
535 prop->pmmu.page_size = PAGE_SIZE_4KB;
536 prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
537
538 /* PMMU and HPMMU are the same except of page size */
539 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
540 prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
541
542 /* shifts and masks are the same in PMMU and DMMU */
543 memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
544 prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
545 prop->dmmu.end_addr = VA_HOST_SPACE_END;
546 prop->dmmu.page_size = PAGE_SIZE_2MB;
547
548 prop->cfg_size = CFG_SIZE;
549 prop->max_asid = MAX_ASID;
550 prop->num_of_events = GAUDI_EVENT_SIZE;
551 prop->tpc_enabled_mask = TPC_ENABLED_MASK;
552
553 set_default_power_values(hdev);
554
555 prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
556 prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
557
558 prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
559 prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
560
561 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
562 CARD_NAME_MAX_LEN);
563
564 prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
565
566 prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
567 prop->sync_stream_first_sob +
568 (num_sync_stream_queues * HL_RSVD_SOBS);
569 prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
570 prop->sync_stream_first_mon +
571 (num_sync_stream_queues * HL_RSVD_MONS);
572
573 prop->first_available_user_msix_interrupt = USHRT_MAX;
574
575 for (i = 0 ; i < HL_MAX_DCORES ; i++)
576 prop->first_available_cq[i] = USHRT_MAX;
577
578 prop->fw_security_status_valid = false;
579 prop->hard_reset_done_by_fw = false;
580
581 return 0;
582 }
583
gaudi_pci_bars_map(struct hl_device * hdev)584 static int gaudi_pci_bars_map(struct hl_device *hdev)
585 {
586 static const char * const name[] = {"SRAM", "CFG", "HBM"};
587 bool is_wc[3] = {false, false, true};
588 int rc;
589
590 rc = hl_pci_bars_map(hdev, name, is_wc);
591 if (rc)
592 return rc;
593
594 hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
595 (CFG_BASE - SPI_FLASH_BASE_ADDR);
596
597 return 0;
598 }
599
gaudi_set_hbm_bar_base(struct hl_device * hdev,u64 addr)600 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
601 {
602 struct gaudi_device *gaudi = hdev->asic_specific;
603 struct hl_inbound_pci_region pci_region;
604 u64 old_addr = addr;
605 int rc;
606
607 if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
608 return old_addr;
609
610 /* Inbound Region 2 - Bar 4 - Point to HBM */
611 pci_region.mode = PCI_BAR_MATCH_MODE;
612 pci_region.bar = HBM_BAR_ID;
613 pci_region.addr = addr;
614 rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
615 if (rc)
616 return U64_MAX;
617
618 if (gaudi) {
619 old_addr = gaudi->hbm_bar_cur_addr;
620 gaudi->hbm_bar_cur_addr = addr;
621 }
622
623 return old_addr;
624 }
625
gaudi_init_iatu(struct hl_device * hdev)626 static int gaudi_init_iatu(struct hl_device *hdev)
627 {
628 struct hl_inbound_pci_region inbound_region;
629 struct hl_outbound_pci_region outbound_region;
630 int rc;
631
632 if (hdev->asic_prop.iatu_done_by_fw) {
633 hdev->asic_funcs->set_dma_mask_from_fw(hdev);
634 return 0;
635 }
636
637 /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
638 inbound_region.mode = PCI_BAR_MATCH_MODE;
639 inbound_region.bar = SRAM_BAR_ID;
640 inbound_region.addr = SRAM_BASE_ADDR;
641 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
642 if (rc)
643 goto done;
644
645 /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
646 inbound_region.mode = PCI_BAR_MATCH_MODE;
647 inbound_region.bar = CFG_BAR_ID;
648 inbound_region.addr = SPI_FLASH_BASE_ADDR;
649 rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
650 if (rc)
651 goto done;
652
653 /* Inbound Region 2 - Bar 4 - Point to HBM */
654 inbound_region.mode = PCI_BAR_MATCH_MODE;
655 inbound_region.bar = HBM_BAR_ID;
656 inbound_region.addr = DRAM_PHYS_BASE;
657 rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
658 if (rc)
659 goto done;
660
661 hdev->asic_funcs->set_dma_mask_from_fw(hdev);
662
663 /* Outbound Region 0 - Point to Host */
664 outbound_region.addr = HOST_PHYS_BASE;
665 outbound_region.size = HOST_PHYS_SIZE;
666 rc = hl_pci_set_outbound_region(hdev, &outbound_region);
667
668 done:
669 return rc;
670 }
671
gaudi_get_hw_state(struct hl_device * hdev)672 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
673 {
674 return RREG32(mmHW_STATE);
675 }
676
gaudi_early_init(struct hl_device * hdev)677 static int gaudi_early_init(struct hl_device *hdev)
678 {
679 struct asic_fixed_properties *prop = &hdev->asic_prop;
680 struct pci_dev *pdev = hdev->pdev;
681 u32 fw_boot_status;
682 int rc;
683
684 rc = gaudi_get_fixed_properties(hdev);
685 if (rc) {
686 dev_err(hdev->dev, "Failed to get fixed properties\n");
687 return rc;
688 }
689
690 /* Check BAR sizes */
691 if (pci_resource_len(pdev, SRAM_BAR_ID) != SRAM_BAR_SIZE) {
692 dev_err(hdev->dev,
693 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
694 SRAM_BAR_ID,
695 (unsigned long long) pci_resource_len(pdev,
696 SRAM_BAR_ID),
697 SRAM_BAR_SIZE);
698 rc = -ENODEV;
699 goto free_queue_props;
700 }
701
702 if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) {
703 dev_err(hdev->dev,
704 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
705 CFG_BAR_ID,
706 (unsigned long long) pci_resource_len(pdev,
707 CFG_BAR_ID),
708 CFG_BAR_SIZE);
709 rc = -ENODEV;
710 goto free_queue_props;
711 }
712
713 prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
714
715 /* If FW security is enabled at this point it means no access to ELBI */
716 if (!hdev->asic_prop.fw_security_disabled) {
717 hdev->asic_prop.iatu_done_by_fw = true;
718 goto pci_init;
719 }
720
721 rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
722 &fw_boot_status);
723 if (rc)
724 goto free_queue_props;
725
726 /* Check whether FW is configuring iATU */
727 if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
728 (fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
729 hdev->asic_prop.iatu_done_by_fw = true;
730
731 pci_init:
732 rc = hl_pci_init(hdev);
733 if (rc)
734 goto free_queue_props;
735
736 /* Before continuing in the initialization, we need to read the preboot
737 * version to determine whether we run with a security-enabled firmware
738 */
739 rc = hl_fw_read_preboot_status(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
740 mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0,
741 GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
742 if (rc) {
743 if (hdev->reset_on_preboot_fail)
744 hdev->asic_funcs->hw_fini(hdev, true);
745 goto pci_fini;
746 }
747
748 if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
749 dev_info(hdev->dev,
750 "H/W state is dirty, must reset before initializing\n");
751 hdev->asic_funcs->hw_fini(hdev, true);
752 }
753
754 return 0;
755
756 pci_fini:
757 hl_pci_fini(hdev);
758 free_queue_props:
759 kfree(hdev->asic_prop.hw_queues_props);
760 return rc;
761 }
762
gaudi_early_fini(struct hl_device * hdev)763 static int gaudi_early_fini(struct hl_device *hdev)
764 {
765 kfree(hdev->asic_prop.hw_queues_props);
766 hl_pci_fini(hdev);
767
768 return 0;
769 }
770
771 /**
772 * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
773 *
774 * @hdev: pointer to hl_device structure
775 *
776 */
gaudi_fetch_psoc_frequency(struct hl_device * hdev)777 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
778 {
779 struct asic_fixed_properties *prop = &hdev->asic_prop;
780 u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
781 u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
782 int rc;
783
784 if (hdev->asic_prop.fw_security_disabled) {
785 /* Backward compatibility */
786 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
787 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
788 nr = RREG32(mmPSOC_CPU_PLL_NR);
789 nf = RREG32(mmPSOC_CPU_PLL_NF);
790 od = RREG32(mmPSOC_CPU_PLL_OD);
791
792 if (div_sel == DIV_SEL_REF_CLK ||
793 div_sel == DIV_SEL_DIVIDED_REF) {
794 if (div_sel == DIV_SEL_REF_CLK)
795 freq = PLL_REF_CLK;
796 else
797 freq = PLL_REF_CLK / (div_fctr + 1);
798 } else if (div_sel == DIV_SEL_PLL_CLK ||
799 div_sel == DIV_SEL_DIVIDED_PLL) {
800 pll_clk = PLL_REF_CLK * (nf + 1) /
801 ((nr + 1) * (od + 1));
802 if (div_sel == DIV_SEL_PLL_CLK)
803 freq = pll_clk;
804 else
805 freq = pll_clk / (div_fctr + 1);
806 } else {
807 dev_warn(hdev->dev,
808 "Received invalid div select value: %d",
809 div_sel);
810 freq = 0;
811 }
812 } else {
813 rc = hl_fw_cpucp_pll_info_get(hdev, CPU_PLL, pll_freq_arr);
814
815 if (rc)
816 return rc;
817
818 freq = pll_freq_arr[2];
819 }
820
821 prop->psoc_timestamp_frequency = freq;
822 prop->psoc_pci_pll_nr = nr;
823 prop->psoc_pci_pll_nf = nf;
824 prop->psoc_pci_pll_od = od;
825 prop->psoc_pci_pll_div_factor = div_fctr;
826
827 return 0;
828 }
829
_gaudi_init_tpc_mem(struct hl_device * hdev,dma_addr_t tpc_kernel_src_addr,u32 tpc_kernel_size)830 static int _gaudi_init_tpc_mem(struct hl_device *hdev,
831 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
832 {
833 struct asic_fixed_properties *prop = &hdev->asic_prop;
834 struct packet_lin_dma *init_tpc_mem_pkt;
835 struct hl_cs_job *job;
836 struct hl_cb *cb;
837 u64 dst_addr;
838 u32 cb_size, ctl;
839 u8 tpc_id;
840 int rc;
841
842 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
843 if (!cb)
844 return -EFAULT;
845
846 init_tpc_mem_pkt = cb->kernel_address;
847 cb_size = sizeof(*init_tpc_mem_pkt);
848 memset(init_tpc_mem_pkt, 0, cb_size);
849
850 init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
851
852 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
853 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
854 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
855 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
856
857 init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
858
859 init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
860 dst_addr = (prop->sram_user_base_address &
861 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
862 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
863 init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
864
865 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
866 if (!job) {
867 dev_err(hdev->dev, "Failed to allocate a new job\n");
868 rc = -ENOMEM;
869 goto release_cb;
870 }
871
872 job->id = 0;
873 job->user_cb = cb;
874 atomic_inc(&job->user_cb->cs_cnt);
875 job->user_cb_size = cb_size;
876 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
877 job->patched_cb = job->user_cb;
878 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
879
880 hl_debugfs_add_job(hdev, job);
881
882 rc = gaudi_send_job_on_qman0(hdev, job);
883
884 if (rc)
885 goto free_job;
886
887 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
888 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
889 if (rc)
890 break;
891 }
892
893 free_job:
894 hl_userptr_delete_list(hdev, &job->userptr_list);
895 hl_debugfs_remove_job(hdev, job);
896 kfree(job);
897 atomic_dec(&cb->cs_cnt);
898
899 release_cb:
900 hl_cb_put(cb);
901 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
902
903 return rc;
904 }
905
906 /*
907 * gaudi_init_tpc_mem() - Initialize TPC memories.
908 * @hdev: Pointer to hl_device structure.
909 *
910 * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
911 *
912 * Return: 0 for success, negative value for error.
913 */
gaudi_init_tpc_mem(struct hl_device * hdev)914 static int gaudi_init_tpc_mem(struct hl_device *hdev)
915 {
916 const struct firmware *fw;
917 size_t fw_size;
918 void *cpu_addr;
919 dma_addr_t dma_handle;
920 int rc, count = 5;
921
922 again:
923 rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
924 if (rc == -EINTR && count-- > 0) {
925 msleep(50);
926 goto again;
927 }
928
929 if (rc) {
930 dev_err(hdev->dev, "Failed to load firmware file %s\n",
931 GAUDI_TPC_FW_FILE);
932 goto out;
933 }
934
935 fw_size = fw->size;
936 cpu_addr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, fw_size,
937 &dma_handle, GFP_KERNEL | __GFP_ZERO);
938 if (!cpu_addr) {
939 dev_err(hdev->dev,
940 "Failed to allocate %zu of dma memory for TPC kernel\n",
941 fw_size);
942 rc = -ENOMEM;
943 goto out;
944 }
945
946 memcpy(cpu_addr, fw->data, fw_size);
947
948 rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
949
950 hdev->asic_funcs->asic_dma_free_coherent(hdev, fw->size, cpu_addr,
951 dma_handle);
952
953 out:
954 release_firmware(fw);
955 return rc;
956 }
957
gaudi_collective_map_sobs(struct hl_device * hdev,u32 stream)958 static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
959 {
960 struct gaudi_device *gaudi = hdev->asic_specific;
961 struct gaudi_collective_properties *prop = &gaudi->collective_props;
962 struct hl_hw_queue *q;
963 u32 i, sob_id, sob_group_id, queue_id;
964
965 /* Iterate through SOB groups and assign a SOB for each slave queue */
966 sob_group_id =
967 stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
968 sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
969
970 queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
971 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
972 q = &hdev->kernel_queues[queue_id + (4 * i)];
973 q->sync_stream_prop.collective_sob_id = sob_id + i;
974 }
975
976 /* Both DMA5 and TPC7 use the same resources since only a single
977 * engine need to participate in the reduction process
978 */
979 queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
980 q = &hdev->kernel_queues[queue_id];
981 q->sync_stream_prop.collective_sob_id =
982 sob_id + NIC_NUMBER_OF_ENGINES;
983
984 queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
985 q = &hdev->kernel_queues[queue_id];
986 q->sync_stream_prop.collective_sob_id =
987 sob_id + NIC_NUMBER_OF_ENGINES;
988 }
989
gaudi_sob_group_hw_reset(struct kref * ref)990 static void gaudi_sob_group_hw_reset(struct kref *ref)
991 {
992 struct gaudi_hw_sob_group *hw_sob_group =
993 container_of(ref, struct gaudi_hw_sob_group, kref);
994 struct hl_device *hdev = hw_sob_group->hdev;
995 u64 base_addr;
996 int rc;
997
998 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
999 hw_sob_group->base_sob_id * 4;
1000 rc = gaudi_schedule_register_memset(hdev, hw_sob_group->queue_id,
1001 base_addr, NUMBER_OF_SOBS_IN_GRP, 0);
1002 if (rc)
1003 dev_err(hdev->dev,
1004 "failed resetting sob group - sob base %u, count %u",
1005 hw_sob_group->base_sob_id, NUMBER_OF_SOBS_IN_GRP);
1006
1007 kref_init(&hw_sob_group->kref);
1008 }
1009
gaudi_sob_group_reset_error(struct kref * ref)1010 static void gaudi_sob_group_reset_error(struct kref *ref)
1011 {
1012 struct gaudi_hw_sob_group *hw_sob_group =
1013 container_of(ref, struct gaudi_hw_sob_group, kref);
1014 struct hl_device *hdev = hw_sob_group->hdev;
1015
1016 dev_crit(hdev->dev,
1017 "SOB release shouldn't be called here, base_sob_id: %d\n",
1018 hw_sob_group->base_sob_id);
1019 }
1020
gaudi_collective_init(struct hl_device * hdev)1021 static int gaudi_collective_init(struct hl_device *hdev)
1022 {
1023 u32 i, master_monitor_sobs, sob_id, reserved_sobs_per_group;
1024 struct gaudi_collective_properties *prop;
1025 struct gaudi_device *gaudi;
1026
1027 gaudi = hdev->asic_specific;
1028 prop = &gaudi->collective_props;
1029 sob_id = hdev->asic_prop.collective_first_sob;
1030
1031 /* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
1032 reserved_sobs_per_group =
1033 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
1034
1035 /* Init SOB groups */
1036 for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
1037 prop->hw_sob_group[i].hdev = hdev;
1038 prop->hw_sob_group[i].base_sob_id = sob_id;
1039 sob_id += reserved_sobs_per_group;
1040 gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
1041 }
1042
1043 for (i = 0 ; i < QMAN_STREAMS; i++) {
1044 prop->next_sob_group_val[i] = 1;
1045 prop->curr_sob_group_idx[i] = 0;
1046 gaudi_collective_map_sobs(hdev, i);
1047 }
1048
1049 prop->mstr_sob_mask[0] = 0;
1050 master_monitor_sobs = HL_MAX_SOBS_PER_MONITOR;
1051 for (i = 0 ; i < master_monitor_sobs ; i++)
1052 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1053 prop->mstr_sob_mask[0] |= BIT(i);
1054
1055 prop->mstr_sob_mask[1] = 0;
1056 master_monitor_sobs =
1057 NIC_NUMBER_OF_ENGINES - HL_MAX_SOBS_PER_MONITOR;
1058 for (i = 0 ; i < master_monitor_sobs; i++) {
1059 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1060 prop->mstr_sob_mask[1] |= BIT(i);
1061 }
1062
1063 /* Set collective engine bit */
1064 prop->mstr_sob_mask[1] |= BIT(i);
1065
1066 return 0;
1067 }
1068
gaudi_reset_sob_group(struct hl_device * hdev,u16 sob_group)1069 static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
1070 {
1071 struct gaudi_device *gaudi = hdev->asic_specific;
1072 struct gaudi_collective_properties *cprop = &gaudi->collective_props;
1073
1074 kref_put(&cprop->hw_sob_group[sob_group].kref,
1075 gaudi_sob_group_hw_reset);
1076 }
1077
gaudi_collective_master_init_job(struct hl_device * hdev,struct hl_cs_job * job,u32 stream,u32 sob_group_offset)1078 static void gaudi_collective_master_init_job(struct hl_device *hdev,
1079 struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
1080 {
1081 u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
1082 struct gaudi_collective_properties *cprop;
1083 struct hl_gen_wait_properties wait_prop;
1084 struct hl_sync_stream_properties *prop;
1085 struct gaudi_device *gaudi;
1086
1087 gaudi = hdev->asic_specific;
1088 cprop = &gaudi->collective_props;
1089 queue_id = job->hw_queue_id;
1090 prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1091
1092 master_sob_base =
1093 cprop->hw_sob_group[sob_group_offset].base_sob_id;
1094 master_monitor = prop->collective_mstr_mon_id[0];
1095
1096 cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
1097
1098 dev_dbg(hdev->dev,
1099 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1100 master_sob_base, cprop->mstr_sob_mask[0],
1101 cprop->next_sob_group_val[stream],
1102 master_monitor, queue_id);
1103
1104 wait_prop.data = (void *) job->patched_cb;
1105 wait_prop.sob_base = master_sob_base;
1106 wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1107 wait_prop.sob_val = cprop->next_sob_group_val[stream];
1108 wait_prop.mon_id = master_monitor;
1109 wait_prop.q_idx = queue_id;
1110 wait_prop.size = cb_size;
1111 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1112
1113 master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1114 master_monitor = prop->collective_mstr_mon_id[1];
1115
1116 dev_dbg(hdev->dev,
1117 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1118 master_sob_base, cprop->mstr_sob_mask[1],
1119 cprop->next_sob_group_val[stream],
1120 master_monitor, queue_id);
1121
1122 wait_prop.sob_base = master_sob_base;
1123 wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1124 wait_prop.mon_id = master_monitor;
1125 wait_prop.size = cb_size;
1126 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1127 }
1128
gaudi_collective_slave_init_job(struct hl_device * hdev,struct hl_cs_job * job,struct hl_cs_compl * cs_cmpl)1129 static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1130 struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1131 {
1132 struct hl_gen_wait_properties wait_prop;
1133 struct hl_sync_stream_properties *prop;
1134 u32 queue_id, cb_size = 0;
1135
1136 queue_id = job->hw_queue_id;
1137 prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1138
1139 /* Add to wait CBs using slave monitor */
1140 wait_prop.data = (void *) job->user_cb;
1141 wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1142 wait_prop.sob_mask = 0x1;
1143 wait_prop.sob_val = cs_cmpl->sob_val;
1144 wait_prop.mon_id = prop->collective_slave_mon_id;
1145 wait_prop.q_idx = queue_id;
1146 wait_prop.size = cb_size;
1147
1148 dev_dbg(hdev->dev,
1149 "Generate slave wait CB, sob %d, val:0x%x, mon %d, q %d\n",
1150 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1151 prop->collective_slave_mon_id, queue_id);
1152
1153 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1154
1155 dev_dbg(hdev->dev,
1156 "generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1157 prop->collective_sob_id, queue_id);
1158
1159 cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
1160 prop->collective_sob_id, cb_size, false);
1161 }
1162
gaudi_collective_wait_init_cs(struct hl_cs * cs)1163 static void gaudi_collective_wait_init_cs(struct hl_cs *cs)
1164 {
1165 struct hl_cs_compl *signal_cs_cmpl =
1166 container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1167 struct hl_cs_compl *cs_cmpl =
1168 container_of(cs->fence, struct hl_cs_compl, base_fence);
1169 struct gaudi_collective_properties *cprop;
1170 u32 stream, queue_id, sob_group_offset;
1171 struct gaudi_device *gaudi;
1172 struct hl_device *hdev;
1173 struct hl_cs_job *job;
1174 struct hl_ctx *ctx;
1175
1176 ctx = cs->ctx;
1177 hdev = ctx->hdev;
1178 gaudi = hdev->asic_specific;
1179 cprop = &gaudi->collective_props;
1180
1181 /* copy the SOB id and value of the signal CS */
1182 cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1183 cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1184
1185 /* Calculate the stream from collective master queue (1st job) */
1186 job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1187 stream = job->hw_queue_id % 4;
1188 sob_group_offset =
1189 stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1190
1191 list_for_each_entry(job, &cs->job_list, cs_node) {
1192 queue_id = job->hw_queue_id;
1193
1194 if (hdev->kernel_queues[queue_id].collective_mode ==
1195 HL_COLLECTIVE_MASTER)
1196 gaudi_collective_master_init_job(hdev, job, stream,
1197 sob_group_offset);
1198 else
1199 gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1200 }
1201
1202 cs_cmpl->sob_group = sob_group_offset;
1203
1204 /* Handle sob group kref and wraparound */
1205 kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1206 cprop->next_sob_group_val[stream]++;
1207
1208 if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1209 /*
1210 * Decrement as we reached the max value.
1211 * The release function won't be called here as we've
1212 * just incremented the refcount.
1213 */
1214 kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1215 gaudi_sob_group_reset_error);
1216 cprop->next_sob_group_val[stream] = 1;
1217 /* only two SOBs are currently in use */
1218 cprop->curr_sob_group_idx[stream] =
1219 (cprop->curr_sob_group_idx[stream] + 1) &
1220 (HL_RSVD_SOBS - 1);
1221
1222 gaudi_collective_map_sobs(hdev, stream);
1223
1224 dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1225 cprop->curr_sob_group_idx[stream], stream);
1226 }
1227
1228 /* Increment kref since all slave queues are now waiting on it */
1229 kref_get(&cs_cmpl->hw_sob->kref);
1230 /*
1231 * Must put the signal fence after the SOB refcnt increment so
1232 * the SOB refcnt won't turn 0 and reset the SOB before the
1233 * wait CS was submitted.
1234 */
1235 mb();
1236 hl_fence_put(cs->signal_fence);
1237 cs->signal_fence = NULL;
1238 }
1239
gaudi_collective_wait_create_job(struct hl_device * hdev,struct hl_ctx * ctx,struct hl_cs * cs,enum hl_collective_mode mode,u32 queue_id,u32 wait_queue_id)1240 static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1241 struct hl_ctx *ctx, struct hl_cs *cs,
1242 enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id)
1243 {
1244 struct hw_queue_properties *hw_queue_prop;
1245 struct hl_cs_counters_atomic *cntr;
1246 struct hl_cs_job *job;
1247 struct hl_cb *cb;
1248 u32 cb_size;
1249 bool patched_cb;
1250
1251 cntr = &hdev->aggregated_cs_counters;
1252
1253 if (mode == HL_COLLECTIVE_MASTER) {
1254 /* CB size of collective master queue contains
1255 * 4 msg short packets for monitor 1 configuration
1256 * 1 fence packet
1257 * 4 msg short packets for monitor 2 configuration
1258 * 1 fence packet
1259 * 2 msg prot packets for completion and MSI-X
1260 */
1261 cb_size = sizeof(struct packet_msg_short) * 8 +
1262 sizeof(struct packet_fence) * 2 +
1263 sizeof(struct packet_msg_prot) * 2;
1264 patched_cb = true;
1265 } else {
1266 /* CB size of collective slave queues contains
1267 * 4 msg short packets for monitor configuration
1268 * 1 fence packet
1269 * 1 additional msg short packet for sob signal
1270 */
1271 cb_size = sizeof(struct packet_msg_short) * 5 +
1272 sizeof(struct packet_fence);
1273 patched_cb = false;
1274 }
1275
1276 hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1277 job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1278 if (!job) {
1279 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1280 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1281 dev_err(hdev->dev, "Failed to allocate a new job\n");
1282 return -ENOMEM;
1283 }
1284
1285 /* Allocate internal mapped CB for non patched CBs */
1286 cb = hl_cb_kernel_create(hdev, cb_size,
1287 hdev->mmu_enable && !patched_cb);
1288 if (!cb) {
1289 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1290 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1291 kfree(job);
1292 return -EFAULT;
1293 }
1294
1295 job->id = 0;
1296 job->cs = cs;
1297 job->user_cb = cb;
1298 atomic_inc(&job->user_cb->cs_cnt);
1299 job->user_cb_size = cb_size;
1300 job->hw_queue_id = queue_id;
1301
1302 /*
1303 * No need in parsing, user CB is the patched CB.
1304 * We call hl_cb_destroy() out of two reasons - we don't need
1305 * the CB in the CB idr anymore and to decrement its refcount as
1306 * it was incremented inside hl_cb_kernel_create().
1307 */
1308 if (patched_cb)
1309 job->patched_cb = job->user_cb;
1310 else
1311 job->patched_cb = NULL;
1312
1313 job->job_cb_size = job->user_cb_size;
1314 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
1315
1316 /* increment refcount as for external queues we get completion */
1317 if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1318 cs_get(cs);
1319
1320 cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1321
1322 list_add_tail(&job->cs_node, &cs->job_list);
1323
1324 hl_debugfs_add_job(hdev, job);
1325
1326 return 0;
1327 }
1328
gaudi_collective_wait_create_jobs(struct hl_device * hdev,struct hl_ctx * ctx,struct hl_cs * cs,u32 wait_queue_id,u32 collective_engine_id)1329 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1330 struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id,
1331 u32 collective_engine_id)
1332 {
1333 struct gaudi_device *gaudi = hdev->asic_specific;
1334 struct hw_queue_properties *hw_queue_prop;
1335 u32 queue_id, collective_queue, num_jobs;
1336 u32 stream, nic_queue, nic_idx = 0;
1337 bool skip;
1338 int i, rc = 0;
1339
1340 /* Verify wait queue id is configured as master */
1341 hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1342 if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1343 dev_err(hdev->dev,
1344 "Queue %d is not configured as collective master\n",
1345 wait_queue_id);
1346 return -EINVAL;
1347 }
1348
1349 /* Verify engine id is supported */
1350 if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1351 collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1352 dev_err(hdev->dev,
1353 "Collective wait does not support engine %u\n",
1354 collective_engine_id);
1355 return -EINVAL;
1356 }
1357
1358 stream = wait_queue_id % 4;
1359
1360 if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1361 collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1362 else
1363 collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1364
1365 num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1366 nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1367
1368 /* First job goes to the collective master queue, it will wait for
1369 * the collective slave queues to finish execution.
1370 * The synchronization is done using two monitors:
1371 * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1372 * reduction engine (DMA5/TPC7).
1373 *
1374 * Rest of the jobs goes to the collective slave queues which will
1375 * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1376 */
1377 for (i = 0 ; i < num_jobs ; i++) {
1378 if (i == 0) {
1379 queue_id = wait_queue_id;
1380 rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1381 HL_COLLECTIVE_MASTER, queue_id, wait_queue_id);
1382 } else {
1383 if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1384 if (gaudi->hw_cap_initialized &
1385 BIT(HW_CAP_NIC_SHIFT + nic_idx))
1386 skip = false;
1387 else
1388 skip = true;
1389
1390 queue_id = nic_queue;
1391 nic_queue += 4;
1392 nic_idx++;
1393
1394 if (skip)
1395 continue;
1396 } else {
1397 queue_id = collective_queue;
1398 }
1399
1400 rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1401 HL_COLLECTIVE_SLAVE, queue_id, wait_queue_id);
1402 }
1403
1404 if (rc)
1405 return rc;
1406 }
1407
1408 return rc;
1409 }
1410
gaudi_late_init(struct hl_device * hdev)1411 static int gaudi_late_init(struct hl_device *hdev)
1412 {
1413 struct gaudi_device *gaudi = hdev->asic_specific;
1414 int rc;
1415
1416 rc = gaudi->cpucp_info_get(hdev);
1417 if (rc) {
1418 dev_err(hdev->dev, "Failed to get cpucp info\n");
1419 return rc;
1420 }
1421
1422 if ((hdev->card_type == cpucp_card_type_pci) &&
1423 (hdev->nic_ports_mask & 0x3)) {
1424 dev_info(hdev->dev,
1425 "PCI card detected, only 8 ports are enabled\n");
1426 hdev->nic_ports_mask &= ~0x3;
1427
1428 /* Stop and disable unused NIC QMANs */
1429 WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1430 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1431 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1432
1433 WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1434 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1435 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1436
1437 WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1438 WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1439
1440 gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1441 }
1442
1443 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS);
1444 if (rc) {
1445 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
1446 return rc;
1447 }
1448
1449 rc = gaudi_fetch_psoc_frequency(hdev);
1450 if (rc) {
1451 dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1452 goto disable_pci_access;
1453 }
1454
1455 rc = gaudi_mmu_clear_pgt_range(hdev);
1456 if (rc) {
1457 dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1458 goto disable_pci_access;
1459 }
1460
1461 rc = gaudi_init_tpc_mem(hdev);
1462 if (rc) {
1463 dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1464 goto disable_pci_access;
1465 }
1466
1467 rc = gaudi_collective_init(hdev);
1468 if (rc) {
1469 dev_err(hdev->dev, "Failed to init collective\n");
1470 goto disable_pci_access;
1471 }
1472
1473 return 0;
1474
1475 disable_pci_access:
1476 hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
1477
1478 return rc;
1479 }
1480
gaudi_late_fini(struct hl_device * hdev)1481 static void gaudi_late_fini(struct hl_device *hdev)
1482 {
1483 const struct hwmon_channel_info **channel_info_arr;
1484 int i = 0;
1485
1486 if (!hdev->hl_chip_info->info)
1487 return;
1488
1489 channel_info_arr = hdev->hl_chip_info->info;
1490
1491 while (channel_info_arr[i]) {
1492 kfree(channel_info_arr[i]->config);
1493 kfree(channel_info_arr[i]);
1494 i++;
1495 }
1496
1497 kfree(channel_info_arr);
1498
1499 hdev->hl_chip_info->info = NULL;
1500 }
1501
gaudi_alloc_cpu_accessible_dma_mem(struct hl_device * hdev)1502 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1503 {
1504 dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1505 void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1506 int i, j, rc = 0;
1507
1508 /*
1509 * The device CPU works with 40-bits addresses, while bit 39 must be set
1510 * to '1' when accessing the host.
1511 * Bits 49:39 of the full host address are saved for a later
1512 * configuration of the HW to perform extension to 50 bits.
1513 * Because there is a single HW register that holds the extension bits,
1514 * these bits must be identical in all allocated range.
1515 */
1516
1517 for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1518 virt_addr_arr[i] =
1519 hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
1520 HL_CPU_ACCESSIBLE_MEM_SIZE,
1521 &dma_addr_arr[i],
1522 GFP_KERNEL | __GFP_ZERO);
1523 if (!virt_addr_arr[i]) {
1524 rc = -ENOMEM;
1525 goto free_dma_mem_arr;
1526 }
1527
1528 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1529 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1530 GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1531 break;
1532 }
1533
1534 if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1535 dev_err(hdev->dev,
1536 "MSB of CPU accessible DMA memory are not identical in all range\n");
1537 rc = -EFAULT;
1538 goto free_dma_mem_arr;
1539 }
1540
1541 hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1542 hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1543 hdev->cpu_pci_msb_addr =
1544 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1545
1546 if (hdev->asic_prop.fw_security_disabled)
1547 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
1548
1549 free_dma_mem_arr:
1550 for (j = 0 ; j < i ; j++)
1551 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1552 HL_CPU_ACCESSIBLE_MEM_SIZE,
1553 virt_addr_arr[j],
1554 dma_addr_arr[j]);
1555
1556 return rc;
1557 }
1558
gaudi_free_internal_qmans_pq_mem(struct hl_device * hdev)1559 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1560 {
1561 struct gaudi_device *gaudi = hdev->asic_specific;
1562 struct gaudi_internal_qman_info *q;
1563 u32 i;
1564
1565 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1566 q = &gaudi->internal_qmans[i];
1567 if (!q->pq_kernel_addr)
1568 continue;
1569 hdev->asic_funcs->asic_dma_free_coherent(hdev, q->pq_size,
1570 q->pq_kernel_addr,
1571 q->pq_dma_addr);
1572 }
1573 }
1574
gaudi_alloc_internal_qmans_pq_mem(struct hl_device * hdev)1575 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1576 {
1577 struct gaudi_device *gaudi = hdev->asic_specific;
1578 struct gaudi_internal_qman_info *q;
1579 int rc, i;
1580
1581 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1582 if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1583 continue;
1584
1585 q = &gaudi->internal_qmans[i];
1586
1587 switch (i) {
1588 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
1589 q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1590 break;
1591 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1592 q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1593 break;
1594 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1595 q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1596 break;
1597 case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1598 q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1599 break;
1600 default:
1601 dev_err(hdev->dev, "Bad internal queue index %d", i);
1602 rc = -EINVAL;
1603 goto free_internal_qmans_pq_mem;
1604 }
1605
1606 q->pq_kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
1607 hdev, q->pq_size,
1608 &q->pq_dma_addr,
1609 GFP_KERNEL | __GFP_ZERO);
1610 if (!q->pq_kernel_addr) {
1611 rc = -ENOMEM;
1612 goto free_internal_qmans_pq_mem;
1613 }
1614 }
1615
1616 return 0;
1617
1618 free_internal_qmans_pq_mem:
1619 gaudi_free_internal_qmans_pq_mem(hdev);
1620 return rc;
1621 }
1622
gaudi_sw_init(struct hl_device * hdev)1623 static int gaudi_sw_init(struct hl_device *hdev)
1624 {
1625 struct gaudi_device *gaudi;
1626 u32 i, event_id = 0;
1627 int rc;
1628
1629 /* Allocate device structure */
1630 gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1631 if (!gaudi)
1632 return -ENOMEM;
1633
1634 for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1635 if (gaudi_irq_map_table[i].valid) {
1636 if (event_id == GAUDI_EVENT_SIZE) {
1637 dev_err(hdev->dev,
1638 "Event array exceeds the limit of %u events\n",
1639 GAUDI_EVENT_SIZE);
1640 rc = -EINVAL;
1641 goto free_gaudi_device;
1642 }
1643
1644 gaudi->events[event_id++] =
1645 gaudi_irq_map_table[i].fc_id;
1646 }
1647 }
1648
1649 gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1650
1651 gaudi->max_freq_value = GAUDI_MAX_CLK_FREQ;
1652
1653 hdev->asic_specific = gaudi;
1654
1655 /* store legacy PLL map */
1656 hdev->legacy_pll_map = gaudi_pll_map;
1657
1658 /* Create DMA pool for small allocations */
1659 hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1660 &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1661 if (!hdev->dma_pool) {
1662 dev_err(hdev->dev, "failed to create DMA pool\n");
1663 rc = -ENOMEM;
1664 goto free_gaudi_device;
1665 }
1666
1667 rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1668 if (rc)
1669 goto free_dma_pool;
1670
1671 hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1672 if (!hdev->cpu_accessible_dma_pool) {
1673 dev_err(hdev->dev,
1674 "Failed to create CPU accessible DMA pool\n");
1675 rc = -ENOMEM;
1676 goto free_cpu_dma_mem;
1677 }
1678
1679 rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1680 (uintptr_t) hdev->cpu_accessible_dma_mem,
1681 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1682 if (rc) {
1683 dev_err(hdev->dev,
1684 "Failed to add memory to CPU accessible DMA pool\n");
1685 rc = -EFAULT;
1686 goto free_cpu_accessible_dma_pool;
1687 }
1688
1689 rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1690 if (rc)
1691 goto free_cpu_accessible_dma_pool;
1692
1693 spin_lock_init(&gaudi->hw_queues_lock);
1694 mutex_init(&gaudi->clk_gate_mutex);
1695
1696 hdev->supports_sync_stream = true;
1697 hdev->supports_coresight = true;
1698 hdev->supports_staged_submission = true;
1699
1700 return 0;
1701
1702 free_cpu_accessible_dma_pool:
1703 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1704 free_cpu_dma_mem:
1705 if (hdev->asic_prop.fw_security_disabled)
1706 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1707 hdev->cpu_pci_msb_addr);
1708 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1709 HL_CPU_ACCESSIBLE_MEM_SIZE,
1710 hdev->cpu_accessible_dma_mem,
1711 hdev->cpu_accessible_dma_address);
1712 free_dma_pool:
1713 dma_pool_destroy(hdev->dma_pool);
1714 free_gaudi_device:
1715 kfree(gaudi);
1716 return rc;
1717 }
1718
gaudi_sw_fini(struct hl_device * hdev)1719 static int gaudi_sw_fini(struct hl_device *hdev)
1720 {
1721 struct gaudi_device *gaudi = hdev->asic_specific;
1722
1723 gaudi_free_internal_qmans_pq_mem(hdev);
1724
1725 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1726
1727 if (hdev->asic_prop.fw_security_disabled)
1728 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1729 hdev->cpu_pci_msb_addr);
1730
1731 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1732 HL_CPU_ACCESSIBLE_MEM_SIZE,
1733 hdev->cpu_accessible_dma_mem,
1734 hdev->cpu_accessible_dma_address);
1735
1736 dma_pool_destroy(hdev->dma_pool);
1737
1738 mutex_destroy(&gaudi->clk_gate_mutex);
1739
1740 kfree(gaudi);
1741
1742 return 0;
1743 }
1744
gaudi_irq_handler_single(int irq,void * arg)1745 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1746 {
1747 struct hl_device *hdev = arg;
1748 int i;
1749
1750 if (hdev->disabled)
1751 return IRQ_HANDLED;
1752
1753 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1754 hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1755
1756 hl_irq_handler_eq(irq, &hdev->event_queue);
1757
1758 return IRQ_HANDLED;
1759 }
1760
1761 /*
1762 * For backward compatibility, new MSI interrupts should be set after the
1763 * existing CPU and NIC interrupts.
1764 */
gaudi_pci_irq_vector(struct hl_device * hdev,unsigned int nr,bool cpu_eq)1765 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1766 bool cpu_eq)
1767 {
1768 int msi_vec;
1769
1770 if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1771 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1772 GAUDI_EVENT_QUEUE_MSI_IDX);
1773
1774 msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1775 (nr + NIC_NUMBER_OF_ENGINES + 1);
1776
1777 return pci_irq_vector(hdev->pdev, msi_vec);
1778 }
1779
gaudi_enable_msi_single(struct hl_device * hdev)1780 static int gaudi_enable_msi_single(struct hl_device *hdev)
1781 {
1782 int rc, irq;
1783
1784 dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
1785
1786 irq = gaudi_pci_irq_vector(hdev, 0, false);
1787 rc = request_irq(irq, gaudi_irq_handler_single, 0,
1788 "gaudi single msi", hdev);
1789 if (rc)
1790 dev_err(hdev->dev,
1791 "Failed to request single MSI IRQ\n");
1792
1793 return rc;
1794 }
1795
gaudi_enable_msi_multi(struct hl_device * hdev)1796 static int gaudi_enable_msi_multi(struct hl_device *hdev)
1797 {
1798 int cq_cnt = hdev->asic_prop.completion_queues_count;
1799 int rc, i, irq_cnt_init, irq;
1800
1801 for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
1802 irq = gaudi_pci_irq_vector(hdev, i, false);
1803 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
1804 &hdev->completion_queue[i]);
1805 if (rc) {
1806 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1807 goto free_irqs;
1808 }
1809 }
1810
1811 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
1812 rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
1813 &hdev->event_queue);
1814 if (rc) {
1815 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1816 goto free_irqs;
1817 }
1818
1819 return 0;
1820
1821 free_irqs:
1822 for (i = 0 ; i < irq_cnt_init ; i++)
1823 free_irq(gaudi_pci_irq_vector(hdev, i, false),
1824 &hdev->completion_queue[i]);
1825 return rc;
1826 }
1827
gaudi_enable_msi(struct hl_device * hdev)1828 static int gaudi_enable_msi(struct hl_device *hdev)
1829 {
1830 struct gaudi_device *gaudi = hdev->asic_specific;
1831 int rc;
1832
1833 if (gaudi->hw_cap_initialized & HW_CAP_MSI)
1834 return 0;
1835
1836 rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI);
1837 if (rc < 0) {
1838 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
1839 return rc;
1840 }
1841
1842 if (rc < NUMBER_OF_INTERRUPTS) {
1843 gaudi->multi_msi_mode = false;
1844 rc = gaudi_enable_msi_single(hdev);
1845 } else {
1846 gaudi->multi_msi_mode = true;
1847 rc = gaudi_enable_msi_multi(hdev);
1848 }
1849
1850 if (rc)
1851 goto free_pci_irq_vectors;
1852
1853 gaudi->hw_cap_initialized |= HW_CAP_MSI;
1854
1855 return 0;
1856
1857 free_pci_irq_vectors:
1858 pci_free_irq_vectors(hdev->pdev);
1859 return rc;
1860 }
1861
gaudi_sync_irqs(struct hl_device * hdev)1862 static void gaudi_sync_irqs(struct hl_device *hdev)
1863 {
1864 struct gaudi_device *gaudi = hdev->asic_specific;
1865 int i, cq_cnt = hdev->asic_prop.completion_queues_count;
1866
1867 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1868 return;
1869
1870 /* Wait for all pending IRQs to be finished */
1871 if (gaudi->multi_msi_mode) {
1872 for (i = 0 ; i < cq_cnt ; i++)
1873 synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
1874
1875 synchronize_irq(gaudi_pci_irq_vector(hdev,
1876 GAUDI_EVENT_QUEUE_MSI_IDX,
1877 true));
1878 } else {
1879 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
1880 }
1881 }
1882
gaudi_disable_msi(struct hl_device * hdev)1883 static void gaudi_disable_msi(struct hl_device *hdev)
1884 {
1885 struct gaudi_device *gaudi = hdev->asic_specific;
1886 int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
1887
1888 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1889 return;
1890
1891 gaudi_sync_irqs(hdev);
1892
1893 if (gaudi->multi_msi_mode) {
1894 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
1895 true);
1896 free_irq(irq, &hdev->event_queue);
1897
1898 for (i = 0 ; i < cq_cnt ; i++) {
1899 irq = gaudi_pci_irq_vector(hdev, i, false);
1900 free_irq(irq, &hdev->completion_queue[i]);
1901 }
1902 } else {
1903 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
1904 }
1905
1906 pci_free_irq_vectors(hdev->pdev);
1907
1908 gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
1909 }
1910
gaudi_init_scrambler_sram(struct hl_device * hdev)1911 static void gaudi_init_scrambler_sram(struct hl_device *hdev)
1912 {
1913 struct gaudi_device *gaudi = hdev->asic_specific;
1914
1915 if (!hdev->asic_prop.fw_security_disabled)
1916 return;
1917
1918 if (hdev->asic_prop.fw_security_status_valid &&
1919 (hdev->asic_prop.fw_app_security_map &
1920 CPU_BOOT_DEV_STS0_SRAM_SCR_EN))
1921 return;
1922
1923 if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
1924 return;
1925
1926 if (!hdev->sram_scrambler_enable)
1927 return;
1928
1929 WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1930 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1931 WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1932 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1933 WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1934 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1935 WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1936 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1937 WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1938 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1939 WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1940 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1941 WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1942 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1943 WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1944 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1945
1946 WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1947 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1948 WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1949 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1950 WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1951 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1952 WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1953 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1954 WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1955 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1956 WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1957 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1958 WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1959 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1960 WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1961 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1962
1963 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
1964 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1965 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
1966 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1967 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
1968 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1969 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
1970 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1971 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
1972 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1973 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
1974 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1975 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
1976 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1977 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
1978 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1979
1980 gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
1981 }
1982
gaudi_init_scrambler_hbm(struct hl_device * hdev)1983 static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
1984 {
1985 struct gaudi_device *gaudi = hdev->asic_specific;
1986
1987 if (!hdev->asic_prop.fw_security_disabled)
1988 return;
1989
1990 if (hdev->asic_prop.fw_security_status_valid &&
1991 (hdev->asic_prop.fw_boot_cpu_security_map &
1992 CPU_BOOT_DEV_STS0_DRAM_SCR_EN))
1993 return;
1994
1995 if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
1996 return;
1997
1998 if (!hdev->dram_scrambler_enable)
1999 return;
2000
2001 WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
2002 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2003 WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
2004 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2005 WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
2006 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2007 WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
2008 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2009 WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
2010 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2011 WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
2012 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2013 WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
2014 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2015 WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
2016 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2017
2018 WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
2019 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2020 WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
2021 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2022 WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
2023 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2024 WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
2025 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2026 WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
2027 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2028 WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
2029 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2030 WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
2031 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2032 WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
2033 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2034
2035 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
2036 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2037 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
2038 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2039 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
2040 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2041 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
2042 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2043 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
2044 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2045 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
2046 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2047 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
2048 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2049 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
2050 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2051
2052 gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
2053 }
2054
gaudi_init_e2e(struct hl_device * hdev)2055 static void gaudi_init_e2e(struct hl_device *hdev)
2056 {
2057 if (!hdev->asic_prop.fw_security_disabled)
2058 return;
2059
2060 if (hdev->asic_prop.fw_security_status_valid &&
2061 (hdev->asic_prop.fw_boot_cpu_security_map &
2062 CPU_BOOT_DEV_STS0_E2E_CRED_EN))
2063 return;
2064
2065 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
2066 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
2067 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
2068 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
2069
2070 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2071 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2072 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2073 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2074
2075 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2076 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2077 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2078 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2079
2080 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2081 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2082 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2083 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2084
2085 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2086 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2087 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2088 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2089
2090 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2091 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2092 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2093 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2094
2095 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2096 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2097 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2098 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2099
2100 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2101 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2102 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2103 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2104
2105 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2106 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2107 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2108 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2109
2110 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2111 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2112 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2113 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2114
2115 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2116 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2117 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2118 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2119
2120 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2121 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2122 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2123 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2124
2125 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2126 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2127 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2128 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2129
2130 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2131 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2132 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2133 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2134
2135 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2136 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2137 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2138 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2139
2140 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2141 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2142 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2143 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2144
2145 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2146 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2147 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2148 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2149
2150 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2151 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2152 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2153 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2154
2155 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2156 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2157 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2158 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2159
2160 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2161 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2162 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2163 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2164
2165 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2166 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2167 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2168 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2169
2170 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2171 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2172 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2173 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2174
2175 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2176 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2177 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2178 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2179
2180 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2181 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2182 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2183 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2184
2185 if (!hdev->dram_scrambler_enable) {
2186 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2187 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2188 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2189 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2190
2191 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2192 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2193 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2194 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2195
2196 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2197 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2198 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2199 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2200
2201 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2202 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2203 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2204 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2205
2206 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2207 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2208 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2209 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2210
2211 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2212 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2213 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2214 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2215
2216 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2217 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2218 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2219 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2220
2221 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2222 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2223 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2224 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2225
2226 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2227 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2228 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2229 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2230
2231 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2232 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2233 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2234 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2235
2236 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2237 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2238 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2239 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2240
2241 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2242 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2243 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2244 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2245
2246 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2247 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2248 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2249 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2250
2251 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2252 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2253 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2254 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2255
2256 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2257 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2258 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2259 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2260
2261 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2262 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2263 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2264 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2265
2266 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2267 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2268 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2269 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2270
2271 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2272 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2273 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2274 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2275
2276 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2277 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2278 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2279 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2280
2281 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2282 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2283 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2284 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2285
2286 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2287 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2288 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2289 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2290
2291 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2292 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2293 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2294 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2295
2296 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2297 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2298 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2299 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2300
2301 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2302 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2303 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2304 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2305 }
2306
2307 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2308 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2309 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2310 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2311
2312 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2313 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2314 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2315 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2316
2317 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2318 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2319 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2320 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2321
2322 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2323 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2324 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2325 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2326
2327 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2328 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2329 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2330 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2331
2332 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2333 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2334 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2335 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2336
2337 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2338 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2339 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2340 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2341
2342 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2343 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2344 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2345 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2346
2347 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2348 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2349 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2350 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2351
2352 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2353 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2354 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2355 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2356
2357 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2358 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2359 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2360 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2361
2362 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2363 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2364 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2365 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2366
2367 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2368 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2369 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2370 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2371
2372 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2373 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2374 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2375 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2376
2377 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2378 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2379 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2380 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2381
2382 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2383 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2384 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2385 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2386
2387 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2388 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2389 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2390 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2391
2392 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2393 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2394 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2395 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2396
2397 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2398 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2399 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2400 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2401
2402 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2403 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2404 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2405 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2406
2407 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2408 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2409 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2410 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2411
2412 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2413 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2414 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2415 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2416
2417 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2418 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2419 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2420 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2421
2422 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2423 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2424 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2425 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2426 }
2427
gaudi_init_hbm_cred(struct hl_device * hdev)2428 static void gaudi_init_hbm_cred(struct hl_device *hdev)
2429 {
2430 uint32_t hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2431
2432 if (!hdev->asic_prop.fw_security_disabled)
2433 return;
2434
2435 if (hdev->asic_prop.fw_security_status_valid &&
2436 (hdev->asic_prop.fw_boot_cpu_security_map &
2437 CPU_BOOT_DEV_STS0_HBM_CRED_EN))
2438 return;
2439
2440 hbm0_wr = 0x33333333;
2441 hbm0_rd = 0x77777777;
2442 hbm1_wr = 0x55555555;
2443 hbm1_rd = 0xDDDDDDDD;
2444
2445 WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2446 WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2447 WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2448 WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2449
2450 WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2451 WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2452 WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2453 WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2454
2455 WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2456 WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2457 WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2458 WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2459
2460 WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2461 WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2462 WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2463 WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2464
2465 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2466 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2467 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2468 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2469 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2470 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2471 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2472 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2473 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2474 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2475 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2476 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2477
2478 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2479 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2480 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2481 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2482 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2483 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2484 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2485 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2486 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2487 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2488 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2489 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2490 }
2491
gaudi_init_golden_registers(struct hl_device * hdev)2492 static void gaudi_init_golden_registers(struct hl_device *hdev)
2493 {
2494 u32 tpc_offset;
2495 int tpc_id, i;
2496
2497 gaudi_init_e2e(hdev);
2498 gaudi_init_hbm_cred(hdev);
2499
2500 for (tpc_id = 0, tpc_offset = 0;
2501 tpc_id < TPC_NUMBER_OF_ENGINES;
2502 tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2503 /* Mask all arithmetic interrupts from TPC */
2504 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFF);
2505 /* Set 16 cache lines */
2506 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2507 ICACHE_FETCH_LINE_NUM, 2);
2508 }
2509
2510 /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2511 for (i = 0 ; i < 128 ; i += 8)
2512 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2513
2514 WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2515 WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2516 WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2517 WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2518 }
2519
gaudi_init_pci_dma_qman(struct hl_device * hdev,int dma_id,int qman_id,dma_addr_t qman_pq_addr)2520 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2521 int qman_id, dma_addr_t qman_pq_addr)
2522 {
2523 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2524 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2525 u32 q_off, dma_qm_offset;
2526 u32 dma_qm_err_cfg;
2527
2528 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2529
2530 mtr_base_en_lo = lower_32_bits(CFG_BASE +
2531 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2532 mtr_base_en_hi = upper_32_bits(CFG_BASE +
2533 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2534 so_base_en_lo = lower_32_bits(CFG_BASE +
2535 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2536 so_base_en_hi = upper_32_bits(CFG_BASE +
2537 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2538 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2539 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2540 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2541 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2542 so_base_ws_lo = lower_32_bits(CFG_BASE +
2543 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2544 so_base_ws_hi = upper_32_bits(CFG_BASE +
2545 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2546
2547 q_off = dma_qm_offset + qman_id * 4;
2548
2549 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2550 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2551
2552 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2553 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2554 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2555
2556 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2557 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2558 QMAN_LDMA_SRC_OFFSET);
2559 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2560 QMAN_LDMA_DST_OFFSET);
2561
2562 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2563 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2564 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2565 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2566 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2567 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2568 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2569 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2570
2571 WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2572
2573 /* The following configuration is needed only once per QMAN */
2574 if (qman_id == 0) {
2575 /* Configure RAZWI IRQ */
2576 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2577 if (hdev->stop_on_err) {
2578 dma_qm_err_cfg |=
2579 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2580 }
2581
2582 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2583 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2584 lower_32_bits(CFG_BASE +
2585 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2586 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2587 upper_32_bits(CFG_BASE +
2588 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2589 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2590 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2591 dma_id);
2592
2593 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2594 QM_ARB_ERR_MSG_EN_MASK);
2595
2596 /* Increase ARB WDT to support streams architecture */
2597 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2598 GAUDI_ARB_WDT_TIMEOUT);
2599
2600 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2601 QMAN_EXTERNAL_MAKE_TRUSTED);
2602
2603 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2604 }
2605 }
2606
gaudi_init_dma_core(struct hl_device * hdev,int dma_id)2607 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2608 {
2609 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2610 u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2611
2612 /* Set to maximum possible according to physical size */
2613 WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2614 WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2615
2616 /* WA for H/W bug H3-2116 */
2617 WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2618
2619 /* STOP_ON bit implies no completion to operation in case of RAZWI */
2620 if (hdev->stop_on_err)
2621 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2622
2623 WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2624 WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2625 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2626 WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2627 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2628 WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2629 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2630 WREG32(mmDMA0_CORE_PROT + dma_offset,
2631 1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2632 /* If the channel is secured, it should be in MMU bypass mode */
2633 WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2634 1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2635 WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2636 }
2637
gaudi_enable_qman(struct hl_device * hdev,int dma_id,u32 enable_mask)2638 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2639 u32 enable_mask)
2640 {
2641 u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2642
2643 WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2644 }
2645
gaudi_init_pci_dma_qmans(struct hl_device * hdev)2646 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2647 {
2648 struct gaudi_device *gaudi = hdev->asic_specific;
2649 struct hl_hw_queue *q;
2650 int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2651
2652 if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2653 return;
2654
2655 for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2656 dma_id = gaudi_dma_assignment[i];
2657 /*
2658 * For queues after the CPU Q need to add 1 to get the correct
2659 * queue. In addition, need to add the CPU EQ and NIC IRQs in
2660 * order to get the correct MSI register.
2661 */
2662 if (dma_id > 1) {
2663 cpu_skip = 1;
2664 nic_skip = NIC_NUMBER_OF_ENGINES;
2665 } else {
2666 cpu_skip = 0;
2667 nic_skip = 0;
2668 }
2669
2670 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2671 q_idx = 4 * dma_id + j + cpu_skip;
2672 q = &hdev->kernel_queues[q_idx];
2673 q->cq_id = cq_id++;
2674 q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2675 gaudi_init_pci_dma_qman(hdev, dma_id, j,
2676 q->bus_address);
2677 }
2678
2679 gaudi_init_dma_core(hdev, dma_id);
2680
2681 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2682 }
2683
2684 gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2685 }
2686
gaudi_init_hbm_dma_qman(struct hl_device * hdev,int dma_id,int qman_id,u64 qman_base_addr)2687 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2688 int qman_id, u64 qman_base_addr)
2689 {
2690 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2691 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2692 u32 q_off, dma_qm_offset;
2693 u32 dma_qm_err_cfg;
2694
2695 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2696
2697 mtr_base_en_lo = lower_32_bits(CFG_BASE +
2698 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2699 mtr_base_en_hi = upper_32_bits(CFG_BASE +
2700 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2701 so_base_en_lo = lower_32_bits(CFG_BASE +
2702 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2703 so_base_en_hi = upper_32_bits(CFG_BASE +
2704 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2705 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2706 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2707 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2708 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2709 so_base_ws_lo = lower_32_bits(CFG_BASE +
2710 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2711 so_base_ws_hi = upper_32_bits(CFG_BASE +
2712 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2713
2714 q_off = dma_qm_offset + qman_id * 4;
2715
2716 if (qman_id < 4) {
2717 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2718 lower_32_bits(qman_base_addr));
2719 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2720 upper_32_bits(qman_base_addr));
2721
2722 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2723 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2724 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2725
2726 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2727 QMAN_CPDMA_SIZE_OFFSET);
2728 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2729 QMAN_CPDMA_SRC_OFFSET);
2730 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2731 QMAN_CPDMA_DST_OFFSET);
2732 } else {
2733 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2734 QMAN_LDMA_SIZE_OFFSET);
2735 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2736 QMAN_LDMA_SRC_OFFSET);
2737 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2738 QMAN_LDMA_DST_OFFSET);
2739
2740 /* Configure RAZWI IRQ */
2741 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2742 if (hdev->stop_on_err) {
2743 dma_qm_err_cfg |=
2744 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2745 }
2746 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2747
2748 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2749 lower_32_bits(CFG_BASE +
2750 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2751 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2752 upper_32_bits(CFG_BASE +
2753 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2754 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2755 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2756 dma_id);
2757
2758 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2759 QM_ARB_ERR_MSG_EN_MASK);
2760
2761 /* Increase ARB WDT to support streams architecture */
2762 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2763 GAUDI_ARB_WDT_TIMEOUT);
2764
2765 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2766 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2767 QMAN_INTERNAL_MAKE_TRUSTED);
2768 }
2769
2770 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2771 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2772 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2773 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2774
2775 /* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
2776 if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
2777 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2778 mtr_base_ws_lo);
2779 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2780 mtr_base_ws_hi);
2781 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2782 so_base_ws_lo);
2783 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2784 so_base_ws_hi);
2785 }
2786 }
2787
gaudi_init_hbm_dma_qmans(struct hl_device * hdev)2788 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2789 {
2790 struct gaudi_device *gaudi = hdev->asic_specific;
2791 struct gaudi_internal_qman_info *q;
2792 u64 qman_base_addr;
2793 int i, j, dma_id, internal_q_index;
2794
2795 if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2796 return;
2797
2798 for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2799 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2800
2801 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2802 /*
2803 * Add the CPU queue in order to get the correct queue
2804 * number as all internal queue are placed after it
2805 */
2806 internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2807
2808 q = &gaudi->internal_qmans[internal_q_index];
2809 qman_base_addr = (u64) q->pq_dma_addr;
2810 gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2811 qman_base_addr);
2812 }
2813
2814 /* Initializing lower CP for HBM DMA QMAN */
2815 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2816
2817 gaudi_init_dma_core(hdev, dma_id);
2818
2819 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2820 }
2821
2822 gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2823 }
2824
gaudi_init_mme_qman(struct hl_device * hdev,u32 mme_offset,int qman_id,u64 qman_base_addr)2825 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2826 int qman_id, u64 qman_base_addr)
2827 {
2828 u32 mtr_base_lo, mtr_base_hi;
2829 u32 so_base_lo, so_base_hi;
2830 u32 q_off, mme_id;
2831 u32 mme_qm_err_cfg;
2832
2833 mtr_base_lo = lower_32_bits(CFG_BASE +
2834 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2835 mtr_base_hi = upper_32_bits(CFG_BASE +
2836 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2837 so_base_lo = lower_32_bits(CFG_BASE +
2838 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2839 so_base_hi = upper_32_bits(CFG_BASE +
2840 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2841
2842 q_off = mme_offset + qman_id * 4;
2843
2844 if (qman_id < 4) {
2845 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2846 lower_32_bits(qman_base_addr));
2847 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2848 upper_32_bits(qman_base_addr));
2849
2850 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2851 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2852 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2853
2854 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2855 QMAN_CPDMA_SIZE_OFFSET);
2856 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2857 QMAN_CPDMA_SRC_OFFSET);
2858 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2859 QMAN_CPDMA_DST_OFFSET);
2860 } else {
2861 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2862 QMAN_LDMA_SIZE_OFFSET);
2863 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2864 QMAN_LDMA_SRC_OFFSET);
2865 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2866 QMAN_LDMA_DST_OFFSET);
2867
2868 /* Configure RAZWI IRQ */
2869 mme_id = mme_offset /
2870 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0);
2871
2872 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2873 if (hdev->stop_on_err) {
2874 mme_qm_err_cfg |=
2875 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2876 }
2877 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
2878 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
2879 lower_32_bits(CFG_BASE +
2880 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2881 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
2882 upper_32_bits(CFG_BASE +
2883 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2884 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
2885 gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
2886 mme_id);
2887
2888 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
2889 QM_ARB_ERR_MSG_EN_MASK);
2890
2891 /* Increase ARB WDT to support streams architecture */
2892 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset,
2893 GAUDI_ARB_WDT_TIMEOUT);
2894
2895 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
2896 WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
2897 QMAN_INTERNAL_MAKE_TRUSTED);
2898 }
2899
2900 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2901 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2902 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2903 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2904 }
2905
gaudi_init_mme_qmans(struct hl_device * hdev)2906 static void gaudi_init_mme_qmans(struct hl_device *hdev)
2907 {
2908 struct gaudi_device *gaudi = hdev->asic_specific;
2909 struct gaudi_internal_qman_info *q;
2910 u64 qman_base_addr;
2911 u32 mme_offset;
2912 int i, internal_q_index;
2913
2914 if (gaudi->hw_cap_initialized & HW_CAP_MME)
2915 return;
2916
2917 /*
2918 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
2919 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
2920 */
2921
2922 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2923
2924 for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
2925 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
2926 q = &gaudi->internal_qmans[internal_q_index];
2927 qman_base_addr = (u64) q->pq_dma_addr;
2928 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
2929 qman_base_addr);
2930 if (i == 3)
2931 mme_offset = 0;
2932 }
2933
2934 /* Initializing lower CP for MME QMANs */
2935 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2936 gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
2937 gaudi_init_mme_qman(hdev, 0, 4, 0);
2938
2939 WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2940 WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2941
2942 gaudi->hw_cap_initialized |= HW_CAP_MME;
2943 }
2944
gaudi_init_tpc_qman(struct hl_device * hdev,u32 tpc_offset,int qman_id,u64 qman_base_addr)2945 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
2946 int qman_id, u64 qman_base_addr)
2947 {
2948 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2949 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2950 u32 q_off, tpc_id;
2951 u32 tpc_qm_err_cfg;
2952
2953 mtr_base_en_lo = lower_32_bits(CFG_BASE +
2954 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2955 mtr_base_en_hi = upper_32_bits(CFG_BASE +
2956 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2957 so_base_en_lo = lower_32_bits(CFG_BASE +
2958 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2959 so_base_en_hi = upper_32_bits(CFG_BASE +
2960 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2961 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2962 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2963 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2964 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2965 so_base_ws_lo = lower_32_bits(CFG_BASE +
2966 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2967 so_base_ws_hi = upper_32_bits(CFG_BASE +
2968 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2969
2970 q_off = tpc_offset + qman_id * 4;
2971
2972 tpc_id = tpc_offset /
2973 (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
2974
2975 if (qman_id < 4) {
2976 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
2977 lower_32_bits(qman_base_addr));
2978 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
2979 upper_32_bits(qman_base_addr));
2980
2981 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
2982 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
2983 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
2984
2985 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2986 QMAN_CPDMA_SIZE_OFFSET);
2987 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2988 QMAN_CPDMA_SRC_OFFSET);
2989 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2990 QMAN_CPDMA_DST_OFFSET);
2991 } else {
2992 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2993 QMAN_LDMA_SIZE_OFFSET);
2994 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2995 QMAN_LDMA_SRC_OFFSET);
2996 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2997 QMAN_LDMA_DST_OFFSET);
2998
2999 /* Configure RAZWI IRQ */
3000 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3001 if (hdev->stop_on_err) {
3002 tpc_qm_err_cfg |=
3003 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3004 }
3005
3006 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
3007 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
3008 lower_32_bits(CFG_BASE +
3009 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
3010 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
3011 upper_32_bits(CFG_BASE +
3012 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
3013 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
3014 gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
3015 tpc_id);
3016
3017 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
3018 QM_ARB_ERR_MSG_EN_MASK);
3019
3020 /* Increase ARB WDT to support streams architecture */
3021 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset,
3022 GAUDI_ARB_WDT_TIMEOUT);
3023
3024 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
3025 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
3026 QMAN_INTERNAL_MAKE_TRUSTED);
3027 }
3028
3029 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3030 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3031 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3032 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3033
3034 /* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
3035 if (tpc_id == 6) {
3036 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3037 mtr_base_ws_lo);
3038 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3039 mtr_base_ws_hi);
3040 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3041 so_base_ws_lo);
3042 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3043 so_base_ws_hi);
3044 }
3045 }
3046
gaudi_init_tpc_qmans(struct hl_device * hdev)3047 static void gaudi_init_tpc_qmans(struct hl_device *hdev)
3048 {
3049 struct gaudi_device *gaudi = hdev->asic_specific;
3050 struct gaudi_internal_qman_info *q;
3051 u64 qman_base_addr;
3052 u32 so_base_hi, tpc_offset = 0;
3053 u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
3054 mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
3055 int i, tpc_id, internal_q_index;
3056
3057 if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
3058 return;
3059
3060 so_base_hi = upper_32_bits(CFG_BASE +
3061 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3062
3063 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3064 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3065 internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
3066 tpc_id * QMAN_STREAMS + i;
3067 q = &gaudi->internal_qmans[internal_q_index];
3068 qman_base_addr = (u64) q->pq_dma_addr;
3069 gaudi_init_tpc_qman(hdev, tpc_offset, i,
3070 qman_base_addr);
3071
3072 if (i == 3) {
3073 /* Initializing lower CP for TPC QMAN */
3074 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
3075
3076 /* Enable the QMAN and TPC channel */
3077 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
3078 QMAN_TPC_ENABLE);
3079 }
3080 }
3081
3082 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
3083 so_base_hi);
3084
3085 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3086
3087 gaudi->hw_cap_initialized |=
3088 FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
3089 }
3090 }
3091
gaudi_init_nic_qman(struct hl_device * hdev,u32 nic_offset,int qman_id,u64 qman_base_addr,int nic_id)3092 static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3093 int qman_id, u64 qman_base_addr, int nic_id)
3094 {
3095 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3096 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3097 u32 q_off;
3098 u32 nic_qm_err_cfg;
3099
3100 mtr_base_en_lo = lower_32_bits(CFG_BASE +
3101 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3102 mtr_base_en_hi = upper_32_bits(CFG_BASE +
3103 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3104 so_base_en_lo = lower_32_bits(CFG_BASE +
3105 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3106 so_base_en_hi = upper_32_bits(CFG_BASE +
3107 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3108 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3109 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3110 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3111 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3112 so_base_ws_lo = lower_32_bits(CFG_BASE +
3113 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3114 so_base_ws_hi = upper_32_bits(CFG_BASE +
3115 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3116
3117 q_off = nic_offset + qman_id * 4;
3118
3119 WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3120 WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3121
3122 WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3123 WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3124 WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3125
3126 WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3127 QMAN_LDMA_SIZE_OFFSET);
3128 WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3129 QMAN_LDMA_SRC_OFFSET);
3130 WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3131 QMAN_LDMA_DST_OFFSET);
3132
3133 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3134 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3135 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3136 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3137
3138 /* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3139 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3140 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3141 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3142 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
3143
3144 if (qman_id == 0) {
3145 /* Configure RAZWI IRQ */
3146 nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3147 if (hdev->stop_on_err) {
3148 nic_qm_err_cfg |=
3149 NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3150 }
3151
3152 WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
3153 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
3154 lower_32_bits(CFG_BASE +
3155 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
3156 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
3157 upper_32_bits(CFG_BASE +
3158 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
3159 WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3160 gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3161 nic_id);
3162
3163 WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3164 QM_ARB_ERR_MSG_EN_MASK);
3165
3166 /* Increase ARB WDT to support streams architecture */
3167 WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset,
3168 GAUDI_ARB_WDT_TIMEOUT);
3169
3170 WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3171 WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3172 QMAN_INTERNAL_MAKE_TRUSTED);
3173 }
3174 }
3175
gaudi_init_nic_qmans(struct hl_device * hdev)3176 static void gaudi_init_nic_qmans(struct hl_device *hdev)
3177 {
3178 struct gaudi_device *gaudi = hdev->asic_specific;
3179 struct gaudi_internal_qman_info *q;
3180 u64 qman_base_addr;
3181 u32 nic_offset = 0;
3182 u32 nic_delta_between_qmans =
3183 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3184 u32 nic_delta_between_nics =
3185 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3186 int i, nic_id, internal_q_index;
3187
3188 if (!hdev->nic_ports_mask)
3189 return;
3190
3191 if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3192 return;
3193
3194 dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3195
3196 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3197 if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3198 nic_offset += nic_delta_between_qmans;
3199 if (nic_id & 1) {
3200 nic_offset -= (nic_delta_between_qmans * 2);
3201 nic_offset += nic_delta_between_nics;
3202 }
3203 continue;
3204 }
3205
3206 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3207 internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3208 nic_id * QMAN_STREAMS + i;
3209 q = &gaudi->internal_qmans[internal_q_index];
3210 qman_base_addr = (u64) q->pq_dma_addr;
3211 gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3212 qman_base_addr, nic_id);
3213 }
3214
3215 /* Enable the QMAN */
3216 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3217
3218 nic_offset += nic_delta_between_qmans;
3219 if (nic_id & 1) {
3220 nic_offset -= (nic_delta_between_qmans * 2);
3221 nic_offset += nic_delta_between_nics;
3222 }
3223
3224 gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3225 }
3226 }
3227
gaudi_disable_pci_dma_qmans(struct hl_device * hdev)3228 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3229 {
3230 struct gaudi_device *gaudi = hdev->asic_specific;
3231
3232 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3233 return;
3234
3235 WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3236 WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3237 WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3238 }
3239
gaudi_disable_hbm_dma_qmans(struct hl_device * hdev)3240 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3241 {
3242 struct gaudi_device *gaudi = hdev->asic_specific;
3243
3244 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3245 return;
3246
3247 WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3248 WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3249 WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3250 WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3251 WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3252 }
3253
gaudi_disable_mme_qmans(struct hl_device * hdev)3254 static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3255 {
3256 struct gaudi_device *gaudi = hdev->asic_specific;
3257
3258 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3259 return;
3260
3261 WREG32(mmMME2_QM_GLBL_CFG0, 0);
3262 WREG32(mmMME0_QM_GLBL_CFG0, 0);
3263 }
3264
gaudi_disable_tpc_qmans(struct hl_device * hdev)3265 static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3266 {
3267 struct gaudi_device *gaudi = hdev->asic_specific;
3268 u32 tpc_offset = 0;
3269 int tpc_id;
3270
3271 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3272 return;
3273
3274 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3275 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3276 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3277 }
3278 }
3279
gaudi_disable_nic_qmans(struct hl_device * hdev)3280 static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3281 {
3282 struct gaudi_device *gaudi = hdev->asic_specific;
3283 u32 nic_mask, nic_offset = 0;
3284 u32 nic_delta_between_qmans =
3285 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3286 u32 nic_delta_between_nics =
3287 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3288 int nic_id;
3289
3290 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3291 nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3292
3293 if (gaudi->hw_cap_initialized & nic_mask)
3294 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3295
3296 nic_offset += nic_delta_between_qmans;
3297 if (nic_id & 1) {
3298 nic_offset -= (nic_delta_between_qmans * 2);
3299 nic_offset += nic_delta_between_nics;
3300 }
3301 }
3302 }
3303
gaudi_stop_pci_dma_qmans(struct hl_device * hdev)3304 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3305 {
3306 struct gaudi_device *gaudi = hdev->asic_specific;
3307
3308 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3309 return;
3310
3311 /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3312 WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3313 WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3314 WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3315 }
3316
gaudi_stop_hbm_dma_qmans(struct hl_device * hdev)3317 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3318 {
3319 struct gaudi_device *gaudi = hdev->asic_specific;
3320
3321 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3322 return;
3323
3324 /* Stop CPs of HBM DMA QMANs */
3325
3326 WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3327 WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3328 WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3329 WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3330 WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3331 }
3332
gaudi_stop_mme_qmans(struct hl_device * hdev)3333 static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3334 {
3335 struct gaudi_device *gaudi = hdev->asic_specific;
3336
3337 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3338 return;
3339
3340 /* Stop CPs of MME QMANs */
3341 WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3342 WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3343 }
3344
gaudi_stop_tpc_qmans(struct hl_device * hdev)3345 static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3346 {
3347 struct gaudi_device *gaudi = hdev->asic_specific;
3348
3349 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3350 return;
3351
3352 WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3353 WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3354 WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3355 WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3356 WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3357 WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3358 WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3359 WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3360 }
3361
gaudi_stop_nic_qmans(struct hl_device * hdev)3362 static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3363 {
3364 struct gaudi_device *gaudi = hdev->asic_specific;
3365
3366 /* Stop upper CPs of QMANs */
3367
3368 if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3369 WREG32(mmNIC0_QM0_GLBL_CFG1,
3370 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3371 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3372 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3373
3374 if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3375 WREG32(mmNIC0_QM1_GLBL_CFG1,
3376 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3377 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3378 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3379
3380 if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3381 WREG32(mmNIC1_QM0_GLBL_CFG1,
3382 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3383 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3384 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3385
3386 if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3387 WREG32(mmNIC1_QM1_GLBL_CFG1,
3388 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3389 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3390 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3391
3392 if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3393 WREG32(mmNIC2_QM0_GLBL_CFG1,
3394 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3395 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3396 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3397
3398 if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3399 WREG32(mmNIC2_QM1_GLBL_CFG1,
3400 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3401 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3402 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3403
3404 if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3405 WREG32(mmNIC3_QM0_GLBL_CFG1,
3406 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3407 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3408 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3409
3410 if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3411 WREG32(mmNIC3_QM1_GLBL_CFG1,
3412 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3413 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3414 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3415
3416 if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3417 WREG32(mmNIC4_QM0_GLBL_CFG1,
3418 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3419 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3420 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3421
3422 if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3423 WREG32(mmNIC4_QM1_GLBL_CFG1,
3424 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3425 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3426 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3427 }
3428
gaudi_pci_dma_stall(struct hl_device * hdev)3429 static void gaudi_pci_dma_stall(struct hl_device *hdev)
3430 {
3431 struct gaudi_device *gaudi = hdev->asic_specific;
3432
3433 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3434 return;
3435
3436 WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3437 WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3438 WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3439 }
3440
gaudi_hbm_dma_stall(struct hl_device * hdev)3441 static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3442 {
3443 struct gaudi_device *gaudi = hdev->asic_specific;
3444
3445 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3446 return;
3447
3448 WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3449 WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3450 WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3451 WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3452 WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3453 }
3454
gaudi_mme_stall(struct hl_device * hdev)3455 static void gaudi_mme_stall(struct hl_device *hdev)
3456 {
3457 struct gaudi_device *gaudi = hdev->asic_specific;
3458
3459 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3460 return;
3461
3462 /* WA for H3-1800 bug: do ACC and SBAB writes twice */
3463 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3464 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3465 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3466 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3467 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3468 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3469 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3470 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3471 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3472 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3473 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3474 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3475 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3476 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3477 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3478 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3479 }
3480
gaudi_tpc_stall(struct hl_device * hdev)3481 static void gaudi_tpc_stall(struct hl_device *hdev)
3482 {
3483 struct gaudi_device *gaudi = hdev->asic_specific;
3484
3485 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3486 return;
3487
3488 WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3489 WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3490 WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3491 WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3492 WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3493 WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3494 WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3495 WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3496 }
3497
gaudi_set_clock_gating(struct hl_device * hdev)3498 static void gaudi_set_clock_gating(struct hl_device *hdev)
3499 {
3500 struct gaudi_device *gaudi = hdev->asic_specific;
3501 u32 qman_offset;
3502 bool enable;
3503 int i;
3504
3505 /* In case we are during debug session, don't enable the clock gate
3506 * as it may interfere
3507 */
3508 if (hdev->in_debug)
3509 return;
3510
3511 if (!hdev->asic_prop.fw_security_disabled)
3512 return;
3513
3514 for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) {
3515 enable = !!(hdev->clock_gating_mask &
3516 (BIT_ULL(gaudi_dma_assignment[i])));
3517
3518 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
3519 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3520 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3521 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
3522 enable ? QMAN_UPPER_CP_CGM_PWR_GATE_EN : 0);
3523 }
3524
3525 for (i = GAUDI_HBM_DMA_1 ; i < GAUDI_DMA_MAX ; i++) {
3526 enable = !!(hdev->clock_gating_mask &
3527 (BIT_ULL(gaudi_dma_assignment[i])));
3528
3529 /* GC sends work to DMA engine through Upper CP in DMA5 so
3530 * we need to not enable clock gating in that DMA
3531 */
3532 if (i == GAUDI_HBM_DMA_4)
3533 enable = 0;
3534
3535 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
3536 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3537 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3538 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
3539 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3540 }
3541
3542 enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_0)));
3543 WREG32(mmMME0_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3544 WREG32(mmMME0_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3545
3546 enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_2)));
3547 WREG32(mmMME2_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3548 WREG32(mmMME2_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3549
3550 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3551 enable = !!(hdev->clock_gating_mask &
3552 (BIT_ULL(GAUDI_ENGINE_ID_TPC_0 + i)));
3553
3554 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset,
3555 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3556 WREG32(mmTPC0_QM_CGM_CFG + qman_offset,
3557 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3558
3559 qman_offset += TPC_QMAN_OFFSET;
3560 }
3561
3562 gaudi->hw_cap_initialized |= HW_CAP_CLK_GATE;
3563 }
3564
gaudi_disable_clock_gating(struct hl_device * hdev)3565 static void gaudi_disable_clock_gating(struct hl_device *hdev)
3566 {
3567 struct gaudi_device *gaudi = hdev->asic_specific;
3568 u32 qman_offset;
3569 int i;
3570
3571 if (!hdev->asic_prop.fw_security_disabled)
3572 return;
3573
3574 for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3575 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3576 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3577
3578 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3579 }
3580
3581 WREG32(mmMME0_QM_CGM_CFG, 0);
3582 WREG32(mmMME0_QM_CGM_CFG1, 0);
3583 WREG32(mmMME2_QM_CGM_CFG, 0);
3584 WREG32(mmMME2_QM_CGM_CFG1, 0);
3585
3586 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3587 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3588 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3589
3590 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3591 }
3592
3593 gaudi->hw_cap_initialized &= ~(HW_CAP_CLK_GATE);
3594 }
3595
gaudi_enable_timestamp(struct hl_device * hdev)3596 static void gaudi_enable_timestamp(struct hl_device *hdev)
3597 {
3598 /* Disable the timestamp counter */
3599 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3600
3601 /* Zero the lower/upper parts of the 64-bit counter */
3602 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3603 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3604
3605 /* Enable the counter */
3606 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3607 }
3608
gaudi_disable_timestamp(struct hl_device * hdev)3609 static void gaudi_disable_timestamp(struct hl_device *hdev)
3610 {
3611 /* Disable the timestamp counter */
3612 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3613 }
3614
gaudi_halt_engines(struct hl_device * hdev,bool hard_reset)3615 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset)
3616 {
3617 u32 wait_timeout_ms;
3618
3619 dev_info(hdev->dev,
3620 "Halting compute engines and disabling interrupts\n");
3621
3622 if (hdev->pldm)
3623 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3624 else
3625 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
3626
3627 gaudi_stop_nic_qmans(hdev);
3628 gaudi_stop_mme_qmans(hdev);
3629 gaudi_stop_tpc_qmans(hdev);
3630 gaudi_stop_hbm_dma_qmans(hdev);
3631 gaudi_stop_pci_dma_qmans(hdev);
3632
3633 hdev->asic_funcs->disable_clock_gating(hdev);
3634
3635 msleep(wait_timeout_ms);
3636
3637 gaudi_pci_dma_stall(hdev);
3638 gaudi_hbm_dma_stall(hdev);
3639 gaudi_tpc_stall(hdev);
3640 gaudi_mme_stall(hdev);
3641
3642 msleep(wait_timeout_ms);
3643
3644 gaudi_disable_nic_qmans(hdev);
3645 gaudi_disable_mme_qmans(hdev);
3646 gaudi_disable_tpc_qmans(hdev);
3647 gaudi_disable_hbm_dma_qmans(hdev);
3648 gaudi_disable_pci_dma_qmans(hdev);
3649
3650 gaudi_disable_timestamp(hdev);
3651
3652 gaudi_disable_msi(hdev);
3653 }
3654
gaudi_mmu_init(struct hl_device * hdev)3655 static int gaudi_mmu_init(struct hl_device *hdev)
3656 {
3657 struct asic_fixed_properties *prop = &hdev->asic_prop;
3658 struct gaudi_device *gaudi = hdev->asic_specific;
3659 u64 hop0_addr;
3660 int rc, i;
3661
3662 if (!hdev->mmu_enable)
3663 return 0;
3664
3665 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3666 return 0;
3667
3668 for (i = 0 ; i < prop->max_asid ; i++) {
3669 hop0_addr = prop->mmu_pgt_addr +
3670 (i * prop->mmu_hop_table_size);
3671
3672 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3673 if (rc) {
3674 dev_err(hdev->dev,
3675 "failed to set hop0 addr for asid %d\n", i);
3676 goto err;
3677 }
3678 }
3679
3680 /* init MMU cache manage page */
3681 WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
3682 WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
3683
3684 hdev->asic_funcs->mmu_invalidate_cache(hdev, true, 0);
3685
3686 WREG32(mmMMU_UP_MMU_ENABLE, 1);
3687 WREG32(mmMMU_UP_SPI_MASK, 0xF);
3688
3689 WREG32(mmSTLB_HOP_CONFIGURATION,
3690 hdev->mmu_huge_page_opt ? 0x30440 : 0x40440);
3691
3692 /*
3693 * The H/W expects the first PI after init to be 1. After wraparound
3694 * we'll write 0.
3695 */
3696 gaudi->mmu_cache_inv_pi = 1;
3697
3698 gaudi->hw_cap_initialized |= HW_CAP_MMU;
3699
3700 return 0;
3701
3702 err:
3703 return rc;
3704 }
3705
gaudi_load_firmware_to_device(struct hl_device * hdev)3706 static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3707 {
3708 void __iomem *dst;
3709
3710 /* HBM scrambler must be initialized before pushing F/W to HBM */
3711 gaudi_init_scrambler_hbm(hdev);
3712
3713 dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3714
3715 return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
3716 }
3717
gaudi_load_boot_fit_to_device(struct hl_device * hdev)3718 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3719 {
3720 void __iomem *dst;
3721
3722 dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3723
3724 return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
3725 }
3726
gaudi_read_device_fw_version(struct hl_device * hdev,enum hl_fw_component fwc)3727 static int gaudi_read_device_fw_version(struct hl_device *hdev,
3728 enum hl_fw_component fwc)
3729 {
3730 const char *name;
3731 u32 ver_off;
3732 char *dest;
3733
3734 switch (fwc) {
3735 case FW_COMP_UBOOT:
3736 ver_off = RREG32(mmUBOOT_VER_OFFSET);
3737 dest = hdev->asic_prop.uboot_ver;
3738 name = "U-Boot";
3739 break;
3740 case FW_COMP_PREBOOT:
3741 ver_off = RREG32(mmPREBOOT_VER_OFFSET);
3742 dest = hdev->asic_prop.preboot_ver;
3743 name = "Preboot";
3744 break;
3745 default:
3746 dev_warn(hdev->dev, "Undefined FW component: %d\n", fwc);
3747 return -EIO;
3748 }
3749
3750 ver_off &= ~((u32)SRAM_BASE_ADDR);
3751
3752 if (ver_off < SRAM_SIZE - VERSION_MAX_LEN) {
3753 memcpy_fromio(dest, hdev->pcie_bar[SRAM_BAR_ID] + ver_off,
3754 VERSION_MAX_LEN);
3755 } else {
3756 dev_err(hdev->dev, "%s version offset (0x%x) is above SRAM\n",
3757 name, ver_off);
3758 strcpy(dest, "unavailable");
3759 return -EIO;
3760 }
3761
3762 return 0;
3763 }
3764
gaudi_init_cpu(struct hl_device * hdev)3765 static int gaudi_init_cpu(struct hl_device *hdev)
3766 {
3767 struct gaudi_device *gaudi = hdev->asic_specific;
3768 int rc;
3769
3770 if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
3771 return 0;
3772
3773 if (gaudi->hw_cap_initialized & HW_CAP_CPU)
3774 return 0;
3775
3776 /*
3777 * The device CPU works with 40 bits addresses.
3778 * This register sets the extension to 50 bits.
3779 */
3780 if (hdev->asic_prop.fw_security_disabled)
3781 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
3782
3783 rc = hl_fw_init_cpu(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
3784 mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU,
3785 mmCPU_CMD_STATUS_TO_HOST,
3786 mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0,
3787 !hdev->bmc_enable, GAUDI_CPU_TIMEOUT_USEC,
3788 GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
3789
3790 if (rc)
3791 return rc;
3792
3793 gaudi->hw_cap_initialized |= HW_CAP_CPU;
3794
3795 return 0;
3796 }
3797
gaudi_init_cpu_queues(struct hl_device * hdev,u32 cpu_timeout)3798 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
3799 {
3800 struct gaudi_device *gaudi = hdev->asic_specific;
3801 struct asic_fixed_properties *prop = &hdev->asic_prop;
3802 struct hl_eq *eq;
3803 u32 status;
3804 struct hl_hw_queue *cpu_pq =
3805 &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
3806 int err;
3807
3808 if (!hdev->cpu_queues_enable)
3809 return 0;
3810
3811 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3812 return 0;
3813
3814 eq = &hdev->event_queue;
3815
3816 WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
3817 WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
3818
3819 WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
3820 WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
3821
3822 WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
3823 lower_32_bits(hdev->cpu_accessible_dma_address));
3824 WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
3825 upper_32_bits(hdev->cpu_accessible_dma_address));
3826
3827 WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
3828 WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
3829 WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
3830
3831 /* Used for EQ CI */
3832 WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
3833
3834 WREG32(mmCPU_IF_PF_PQ_PI, 0);
3835
3836 if (gaudi->multi_msi_mode)
3837 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
3838 else
3839 WREG32(mmCPU_IF_QUEUE_INIT,
3840 PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
3841
3842 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_PI_UPDATE);
3843
3844 err = hl_poll_timeout(
3845 hdev,
3846 mmCPU_IF_QUEUE_INIT,
3847 status,
3848 (status == PQ_INIT_STATUS_READY_FOR_HOST),
3849 1000,
3850 cpu_timeout);
3851
3852 if (err) {
3853 dev_err(hdev->dev,
3854 "Failed to communicate with Device CPU (CPU-CP timeout)\n");
3855 return -EIO;
3856 }
3857
3858 /* update FW application security bits */
3859 if (prop->fw_security_status_valid)
3860 prop->fw_app_security_map = RREG32(mmCPU_BOOT_DEV_STS0);
3861
3862 gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
3863 return 0;
3864 }
3865
gaudi_pre_hw_init(struct hl_device * hdev)3866 static void gaudi_pre_hw_init(struct hl_device *hdev)
3867 {
3868 /* Perform read from the device to make sure device is up */
3869 RREG32(mmHW_STATE);
3870
3871 if (hdev->asic_prop.fw_security_disabled) {
3872 /* Set the access through PCI bars (Linux driver only) as
3873 * secured
3874 */
3875 WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
3876 (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
3877 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
3878
3879 /* Perform read to flush the waiting writes to ensure
3880 * configuration was set in the device
3881 */
3882 RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
3883 }
3884
3885 /*
3886 * Let's mark in the H/W that we have reached this point. We check
3887 * this value in the reset_before_init function to understand whether
3888 * we need to reset the chip before doing H/W init. This register is
3889 * cleared by the H/W upon H/W reset
3890 */
3891 WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
3892 }
3893
gaudi_hw_init(struct hl_device * hdev)3894 static int gaudi_hw_init(struct hl_device *hdev)
3895 {
3896 int rc;
3897
3898 gaudi_pre_hw_init(hdev);
3899
3900 gaudi_init_pci_dma_qmans(hdev);
3901
3902 gaudi_init_hbm_dma_qmans(hdev);
3903
3904 rc = gaudi_init_cpu(hdev);
3905 if (rc) {
3906 dev_err(hdev->dev, "failed to initialize CPU\n");
3907 return rc;
3908 }
3909
3910 /* In case the clock gating was enabled in preboot we need to disable
3911 * it here before touching the MME/TPC registers.
3912 * There is no need to take clk gating mutex because when this function
3913 * runs, no other relevant code can run
3914 */
3915 hdev->asic_funcs->disable_clock_gating(hdev);
3916
3917 /* SRAM scrambler must be initialized after CPU is running from HBM */
3918 gaudi_init_scrambler_sram(hdev);
3919
3920 /* This is here just in case we are working without CPU */
3921 gaudi_init_scrambler_hbm(hdev);
3922
3923 gaudi_init_golden_registers(hdev);
3924
3925 rc = gaudi_mmu_init(hdev);
3926 if (rc)
3927 return rc;
3928
3929 gaudi_init_security(hdev);
3930
3931 gaudi_init_mme_qmans(hdev);
3932
3933 gaudi_init_tpc_qmans(hdev);
3934
3935 gaudi_init_nic_qmans(hdev);
3936
3937 hdev->asic_funcs->set_clock_gating(hdev);
3938
3939 gaudi_enable_timestamp(hdev);
3940
3941 /* MSI must be enabled before CPU queues and NIC are initialized */
3942 rc = gaudi_enable_msi(hdev);
3943 if (rc)
3944 goto disable_queues;
3945
3946 /* must be called after MSI was enabled */
3947 rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
3948 if (rc) {
3949 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
3950 rc);
3951 goto disable_msi;
3952 }
3953
3954 /* Perform read from the device to flush all configuration */
3955 RREG32(mmHW_STATE);
3956
3957 return 0;
3958
3959 disable_msi:
3960 gaudi_disable_msi(hdev);
3961 disable_queues:
3962 gaudi_disable_mme_qmans(hdev);
3963 gaudi_disable_pci_dma_qmans(hdev);
3964
3965 return rc;
3966 }
3967
gaudi_hw_fini(struct hl_device * hdev,bool hard_reset)3968 static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
3969 {
3970 struct gaudi_device *gaudi = hdev->asic_specific;
3971 u32 status, reset_timeout_ms, cpu_timeout_ms;
3972
3973 if (!hard_reset) {
3974 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
3975 return;
3976 }
3977
3978 if (hdev->pldm) {
3979 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
3980 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3981 } else {
3982 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
3983 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
3984 }
3985
3986 /* Set device to handle FLR by H/W as we will put the device CPU to
3987 * halt mode
3988 */
3989 if (hdev->asic_prop.fw_security_disabled &&
3990 !hdev->asic_prop.hard_reset_done_by_fw)
3991 WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
3992 PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
3993
3994 /* I don't know what is the state of the CPU so make sure it is
3995 * stopped in any means necessary
3996 */
3997 if (hdev->asic_prop.hard_reset_done_by_fw)
3998 WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_RST_DEV);
3999 else
4000 WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_GOTO_WFE);
4001
4002 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_HALT_MACHINE);
4003
4004 if (hdev->asic_prop.fw_security_disabled &&
4005 !hdev->asic_prop.hard_reset_done_by_fw) {
4006
4007 /* Configure the reset registers. Must be done as early as
4008 * possible in case we fail during H/W initialization
4009 */
4010 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
4011 (CFG_RST_H_DMA_MASK |
4012 CFG_RST_H_MME_MASK |
4013 CFG_RST_H_SM_MASK |
4014 CFG_RST_H_TPC_7_MASK));
4015
4016 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
4017
4018 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
4019 (CFG_RST_H_HBM_MASK |
4020 CFG_RST_H_TPC_7_MASK |
4021 CFG_RST_H_NIC_MASK |
4022 CFG_RST_H_SM_MASK |
4023 CFG_RST_H_DMA_MASK |
4024 CFG_RST_H_MME_MASK |
4025 CFG_RST_H_CPU_MASK |
4026 CFG_RST_H_MMU_MASK));
4027
4028 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
4029 (CFG_RST_L_IF_MASK |
4030 CFG_RST_L_PSOC_MASK |
4031 CFG_RST_L_TPC_MASK));
4032
4033 msleep(cpu_timeout_ms);
4034
4035 /* Tell ASIC not to re-initialize PCIe */
4036 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
4037
4038 /* Restart BTL/BLR upon hard-reset */
4039 if (hdev->asic_prop.fw_security_disabled)
4040 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
4041
4042 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
4043 1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
4044
4045 dev_info(hdev->dev,
4046 "Issued HARD reset command, going to wait %dms\n",
4047 reset_timeout_ms);
4048 } else {
4049 dev_info(hdev->dev,
4050 "Firmware performs HARD reset, going to wait %dms\n",
4051 reset_timeout_ms);
4052 }
4053
4054 /*
4055 * After hard reset, we can't poll the BTM_FSM register because the PSOC
4056 * itself is in reset. Need to wait until the reset is deasserted
4057 */
4058 msleep(reset_timeout_ms);
4059
4060 status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
4061 if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
4062 dev_err(hdev->dev,
4063 "Timeout while waiting for device to reset 0x%x\n",
4064 status);
4065
4066 if (gaudi) {
4067 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
4068 HW_CAP_HBM | HW_CAP_PCI_DMA |
4069 HW_CAP_MME | HW_CAP_TPC_MASK |
4070 HW_CAP_HBM_DMA | HW_CAP_PLL |
4071 HW_CAP_NIC_MASK | HW_CAP_MMU |
4072 HW_CAP_SRAM_SCRAMBLER |
4073 HW_CAP_HBM_SCRAMBLER |
4074 HW_CAP_CLK_GATE);
4075
4076 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
4077 }
4078 }
4079
gaudi_suspend(struct hl_device * hdev)4080 static int gaudi_suspend(struct hl_device *hdev)
4081 {
4082 int rc;
4083
4084 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
4085 if (rc)
4086 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
4087
4088 return rc;
4089 }
4090
gaudi_resume(struct hl_device * hdev)4091 static int gaudi_resume(struct hl_device *hdev)
4092 {
4093 return gaudi_init_iatu(hdev);
4094 }
4095
gaudi_cb_mmap(struct hl_device * hdev,struct vm_area_struct * vma,void * cpu_addr,dma_addr_t dma_addr,size_t size)4096 static int gaudi_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
4097 void *cpu_addr, dma_addr_t dma_addr, size_t size)
4098 {
4099 int rc;
4100
4101 vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
4102 VM_DONTCOPY | VM_NORESERVE;
4103
4104 rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
4105 (dma_addr - HOST_PHYS_BASE), size);
4106 if (rc)
4107 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
4108
4109 return rc;
4110 }
4111
gaudi_ring_doorbell(struct hl_device * hdev,u32 hw_queue_id,u32 pi)4112 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
4113 {
4114 struct gaudi_device *gaudi = hdev->asic_specific;
4115 u32 db_reg_offset, db_value, dma_qm_offset, q_off;
4116 int dma_id;
4117 bool invalid_queue = false;
4118
4119 switch (hw_queue_id) {
4120 case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
4121 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
4122 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4123 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4124 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4125 break;
4126
4127 case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
4128 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
4129 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4130 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4131 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4132 break;
4133
4134 case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
4135 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
4136 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4137 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4138 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4139 break;
4140
4141 case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
4142 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
4143 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4144 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4145 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4146 break;
4147
4148 case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
4149 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
4150 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4151 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4152 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4153 break;
4154
4155 case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
4156 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
4157 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4158 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4159 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4160 break;
4161
4162 case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
4163 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
4164 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4165 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4166 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4167 break;
4168
4169 case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
4170 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
4171 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4172 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4173 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4174 break;
4175
4176 case GAUDI_QUEUE_ID_CPU_PQ:
4177 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4178 db_reg_offset = mmCPU_IF_PF_PQ_PI;
4179 else
4180 invalid_queue = true;
4181 break;
4182
4183 case GAUDI_QUEUE_ID_MME_0_0:
4184 db_reg_offset = mmMME2_QM_PQ_PI_0;
4185 break;
4186
4187 case GAUDI_QUEUE_ID_MME_0_1:
4188 db_reg_offset = mmMME2_QM_PQ_PI_1;
4189 break;
4190
4191 case GAUDI_QUEUE_ID_MME_0_2:
4192 db_reg_offset = mmMME2_QM_PQ_PI_2;
4193 break;
4194
4195 case GAUDI_QUEUE_ID_MME_0_3:
4196 db_reg_offset = mmMME2_QM_PQ_PI_3;
4197 break;
4198
4199 case GAUDI_QUEUE_ID_MME_1_0:
4200 db_reg_offset = mmMME0_QM_PQ_PI_0;
4201 break;
4202
4203 case GAUDI_QUEUE_ID_MME_1_1:
4204 db_reg_offset = mmMME0_QM_PQ_PI_1;
4205 break;
4206
4207 case GAUDI_QUEUE_ID_MME_1_2:
4208 db_reg_offset = mmMME0_QM_PQ_PI_2;
4209 break;
4210
4211 case GAUDI_QUEUE_ID_MME_1_3:
4212 db_reg_offset = mmMME0_QM_PQ_PI_3;
4213 break;
4214
4215 case GAUDI_QUEUE_ID_TPC_0_0:
4216 db_reg_offset = mmTPC0_QM_PQ_PI_0;
4217 break;
4218
4219 case GAUDI_QUEUE_ID_TPC_0_1:
4220 db_reg_offset = mmTPC0_QM_PQ_PI_1;
4221 break;
4222
4223 case GAUDI_QUEUE_ID_TPC_0_2:
4224 db_reg_offset = mmTPC0_QM_PQ_PI_2;
4225 break;
4226
4227 case GAUDI_QUEUE_ID_TPC_0_3:
4228 db_reg_offset = mmTPC0_QM_PQ_PI_3;
4229 break;
4230
4231 case GAUDI_QUEUE_ID_TPC_1_0:
4232 db_reg_offset = mmTPC1_QM_PQ_PI_0;
4233 break;
4234
4235 case GAUDI_QUEUE_ID_TPC_1_1:
4236 db_reg_offset = mmTPC1_QM_PQ_PI_1;
4237 break;
4238
4239 case GAUDI_QUEUE_ID_TPC_1_2:
4240 db_reg_offset = mmTPC1_QM_PQ_PI_2;
4241 break;
4242
4243 case GAUDI_QUEUE_ID_TPC_1_3:
4244 db_reg_offset = mmTPC1_QM_PQ_PI_3;
4245 break;
4246
4247 case GAUDI_QUEUE_ID_TPC_2_0:
4248 db_reg_offset = mmTPC2_QM_PQ_PI_0;
4249 break;
4250
4251 case GAUDI_QUEUE_ID_TPC_2_1:
4252 db_reg_offset = mmTPC2_QM_PQ_PI_1;
4253 break;
4254
4255 case GAUDI_QUEUE_ID_TPC_2_2:
4256 db_reg_offset = mmTPC2_QM_PQ_PI_2;
4257 break;
4258
4259 case GAUDI_QUEUE_ID_TPC_2_3:
4260 db_reg_offset = mmTPC2_QM_PQ_PI_3;
4261 break;
4262
4263 case GAUDI_QUEUE_ID_TPC_3_0:
4264 db_reg_offset = mmTPC3_QM_PQ_PI_0;
4265 break;
4266
4267 case GAUDI_QUEUE_ID_TPC_3_1:
4268 db_reg_offset = mmTPC3_QM_PQ_PI_1;
4269 break;
4270
4271 case GAUDI_QUEUE_ID_TPC_3_2:
4272 db_reg_offset = mmTPC3_QM_PQ_PI_2;
4273 break;
4274
4275 case GAUDI_QUEUE_ID_TPC_3_3:
4276 db_reg_offset = mmTPC3_QM_PQ_PI_3;
4277 break;
4278
4279 case GAUDI_QUEUE_ID_TPC_4_0:
4280 db_reg_offset = mmTPC4_QM_PQ_PI_0;
4281 break;
4282
4283 case GAUDI_QUEUE_ID_TPC_4_1:
4284 db_reg_offset = mmTPC4_QM_PQ_PI_1;
4285 break;
4286
4287 case GAUDI_QUEUE_ID_TPC_4_2:
4288 db_reg_offset = mmTPC4_QM_PQ_PI_2;
4289 break;
4290
4291 case GAUDI_QUEUE_ID_TPC_4_3:
4292 db_reg_offset = mmTPC4_QM_PQ_PI_3;
4293 break;
4294
4295 case GAUDI_QUEUE_ID_TPC_5_0:
4296 db_reg_offset = mmTPC5_QM_PQ_PI_0;
4297 break;
4298
4299 case GAUDI_QUEUE_ID_TPC_5_1:
4300 db_reg_offset = mmTPC5_QM_PQ_PI_1;
4301 break;
4302
4303 case GAUDI_QUEUE_ID_TPC_5_2:
4304 db_reg_offset = mmTPC5_QM_PQ_PI_2;
4305 break;
4306
4307 case GAUDI_QUEUE_ID_TPC_5_3:
4308 db_reg_offset = mmTPC5_QM_PQ_PI_3;
4309 break;
4310
4311 case GAUDI_QUEUE_ID_TPC_6_0:
4312 db_reg_offset = mmTPC6_QM_PQ_PI_0;
4313 break;
4314
4315 case GAUDI_QUEUE_ID_TPC_6_1:
4316 db_reg_offset = mmTPC6_QM_PQ_PI_1;
4317 break;
4318
4319 case GAUDI_QUEUE_ID_TPC_6_2:
4320 db_reg_offset = mmTPC6_QM_PQ_PI_2;
4321 break;
4322
4323 case GAUDI_QUEUE_ID_TPC_6_3:
4324 db_reg_offset = mmTPC6_QM_PQ_PI_3;
4325 break;
4326
4327 case GAUDI_QUEUE_ID_TPC_7_0:
4328 db_reg_offset = mmTPC7_QM_PQ_PI_0;
4329 break;
4330
4331 case GAUDI_QUEUE_ID_TPC_7_1:
4332 db_reg_offset = mmTPC7_QM_PQ_PI_1;
4333 break;
4334
4335 case GAUDI_QUEUE_ID_TPC_7_2:
4336 db_reg_offset = mmTPC7_QM_PQ_PI_2;
4337 break;
4338
4339 case GAUDI_QUEUE_ID_TPC_7_3:
4340 db_reg_offset = mmTPC7_QM_PQ_PI_3;
4341 break;
4342
4343 case GAUDI_QUEUE_ID_NIC_0_0:
4344 db_reg_offset = mmNIC0_QM0_PQ_PI_0;
4345 break;
4346
4347 case GAUDI_QUEUE_ID_NIC_0_1:
4348 db_reg_offset = mmNIC0_QM0_PQ_PI_1;
4349 break;
4350
4351 case GAUDI_QUEUE_ID_NIC_0_2:
4352 db_reg_offset = mmNIC0_QM0_PQ_PI_2;
4353 break;
4354
4355 case GAUDI_QUEUE_ID_NIC_0_3:
4356 db_reg_offset = mmNIC0_QM0_PQ_PI_3;
4357 break;
4358
4359 case GAUDI_QUEUE_ID_NIC_1_0:
4360 db_reg_offset = mmNIC0_QM1_PQ_PI_0;
4361 break;
4362
4363 case GAUDI_QUEUE_ID_NIC_1_1:
4364 db_reg_offset = mmNIC0_QM1_PQ_PI_1;
4365 break;
4366
4367 case GAUDI_QUEUE_ID_NIC_1_2:
4368 db_reg_offset = mmNIC0_QM1_PQ_PI_2;
4369 break;
4370
4371 case GAUDI_QUEUE_ID_NIC_1_3:
4372 db_reg_offset = mmNIC0_QM1_PQ_PI_3;
4373 break;
4374
4375 case GAUDI_QUEUE_ID_NIC_2_0:
4376 db_reg_offset = mmNIC1_QM0_PQ_PI_0;
4377 break;
4378
4379 case GAUDI_QUEUE_ID_NIC_2_1:
4380 db_reg_offset = mmNIC1_QM0_PQ_PI_1;
4381 break;
4382
4383 case GAUDI_QUEUE_ID_NIC_2_2:
4384 db_reg_offset = mmNIC1_QM0_PQ_PI_2;
4385 break;
4386
4387 case GAUDI_QUEUE_ID_NIC_2_3:
4388 db_reg_offset = mmNIC1_QM0_PQ_PI_3;
4389 break;
4390
4391 case GAUDI_QUEUE_ID_NIC_3_0:
4392 db_reg_offset = mmNIC1_QM1_PQ_PI_0;
4393 break;
4394
4395 case GAUDI_QUEUE_ID_NIC_3_1:
4396 db_reg_offset = mmNIC1_QM1_PQ_PI_1;
4397 break;
4398
4399 case GAUDI_QUEUE_ID_NIC_3_2:
4400 db_reg_offset = mmNIC1_QM1_PQ_PI_2;
4401 break;
4402
4403 case GAUDI_QUEUE_ID_NIC_3_3:
4404 db_reg_offset = mmNIC1_QM1_PQ_PI_3;
4405 break;
4406
4407 case GAUDI_QUEUE_ID_NIC_4_0:
4408 db_reg_offset = mmNIC2_QM0_PQ_PI_0;
4409 break;
4410
4411 case GAUDI_QUEUE_ID_NIC_4_1:
4412 db_reg_offset = mmNIC2_QM0_PQ_PI_1;
4413 break;
4414
4415 case GAUDI_QUEUE_ID_NIC_4_2:
4416 db_reg_offset = mmNIC2_QM0_PQ_PI_2;
4417 break;
4418
4419 case GAUDI_QUEUE_ID_NIC_4_3:
4420 db_reg_offset = mmNIC2_QM0_PQ_PI_3;
4421 break;
4422
4423 case GAUDI_QUEUE_ID_NIC_5_0:
4424 db_reg_offset = mmNIC2_QM1_PQ_PI_0;
4425 break;
4426
4427 case GAUDI_QUEUE_ID_NIC_5_1:
4428 db_reg_offset = mmNIC2_QM1_PQ_PI_1;
4429 break;
4430
4431 case GAUDI_QUEUE_ID_NIC_5_2:
4432 db_reg_offset = mmNIC2_QM1_PQ_PI_2;
4433 break;
4434
4435 case GAUDI_QUEUE_ID_NIC_5_3:
4436 db_reg_offset = mmNIC2_QM1_PQ_PI_3;
4437 break;
4438
4439 case GAUDI_QUEUE_ID_NIC_6_0:
4440 db_reg_offset = mmNIC3_QM0_PQ_PI_0;
4441 break;
4442
4443 case GAUDI_QUEUE_ID_NIC_6_1:
4444 db_reg_offset = mmNIC3_QM0_PQ_PI_1;
4445 break;
4446
4447 case GAUDI_QUEUE_ID_NIC_6_2:
4448 db_reg_offset = mmNIC3_QM0_PQ_PI_2;
4449 break;
4450
4451 case GAUDI_QUEUE_ID_NIC_6_3:
4452 db_reg_offset = mmNIC3_QM0_PQ_PI_3;
4453 break;
4454
4455 case GAUDI_QUEUE_ID_NIC_7_0:
4456 db_reg_offset = mmNIC3_QM1_PQ_PI_0;
4457 break;
4458
4459 case GAUDI_QUEUE_ID_NIC_7_1:
4460 db_reg_offset = mmNIC3_QM1_PQ_PI_1;
4461 break;
4462
4463 case GAUDI_QUEUE_ID_NIC_7_2:
4464 db_reg_offset = mmNIC3_QM1_PQ_PI_2;
4465 break;
4466
4467 case GAUDI_QUEUE_ID_NIC_7_3:
4468 db_reg_offset = mmNIC3_QM1_PQ_PI_3;
4469 break;
4470
4471 case GAUDI_QUEUE_ID_NIC_8_0:
4472 db_reg_offset = mmNIC4_QM0_PQ_PI_0;
4473 break;
4474
4475 case GAUDI_QUEUE_ID_NIC_8_1:
4476 db_reg_offset = mmNIC4_QM0_PQ_PI_1;
4477 break;
4478
4479 case GAUDI_QUEUE_ID_NIC_8_2:
4480 db_reg_offset = mmNIC4_QM0_PQ_PI_2;
4481 break;
4482
4483 case GAUDI_QUEUE_ID_NIC_8_3:
4484 db_reg_offset = mmNIC4_QM0_PQ_PI_3;
4485 break;
4486
4487 case GAUDI_QUEUE_ID_NIC_9_0:
4488 db_reg_offset = mmNIC4_QM1_PQ_PI_0;
4489 break;
4490
4491 case GAUDI_QUEUE_ID_NIC_9_1:
4492 db_reg_offset = mmNIC4_QM1_PQ_PI_1;
4493 break;
4494
4495 case GAUDI_QUEUE_ID_NIC_9_2:
4496 db_reg_offset = mmNIC4_QM1_PQ_PI_2;
4497 break;
4498
4499 case GAUDI_QUEUE_ID_NIC_9_3:
4500 db_reg_offset = mmNIC4_QM1_PQ_PI_3;
4501 break;
4502
4503 default:
4504 invalid_queue = true;
4505 }
4506
4507 if (invalid_queue) {
4508 /* Should never get here */
4509 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4510 hw_queue_id);
4511 return;
4512 }
4513
4514 db_value = pi;
4515
4516 /* ring the doorbell */
4517 WREG32(db_reg_offset, db_value);
4518
4519 if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
4520 /* make sure device CPU will read latest data from host */
4521 mb();
4522 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
4523 GAUDI_EVENT_PI_UPDATE);
4524 }
4525 }
4526
gaudi_pqe_write(struct hl_device * hdev,__le64 * pqe,struct hl_bd * bd)4527 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4528 struct hl_bd *bd)
4529 {
4530 __le64 *pbd = (__le64 *) bd;
4531
4532 /* The QMANs are on the host memory so a simple copy suffice */
4533 pqe[0] = pbd[0];
4534 pqe[1] = pbd[1];
4535 }
4536
gaudi_dma_alloc_coherent(struct hl_device * hdev,size_t size,dma_addr_t * dma_handle,gfp_t flags)4537 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4538 dma_addr_t *dma_handle, gfp_t flags)
4539 {
4540 void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
4541 dma_handle, flags);
4542
4543 /* Shift to the device's base physical address of host memory */
4544 if (kernel_addr)
4545 *dma_handle += HOST_PHYS_BASE;
4546
4547 return kernel_addr;
4548 }
4549
gaudi_dma_free_coherent(struct hl_device * hdev,size_t size,void * cpu_addr,dma_addr_t dma_handle)4550 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4551 void *cpu_addr, dma_addr_t dma_handle)
4552 {
4553 /* Cancel the device's base physical address of host memory */
4554 dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4555
4556 dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
4557 }
4558
gaudi_hbm_scrubbing(struct hl_device * hdev)4559 static int gaudi_hbm_scrubbing(struct hl_device *hdev)
4560 {
4561 struct asic_fixed_properties *prop = &hdev->asic_prop;
4562 u64 cur_addr = DRAM_BASE_ADDR_USER;
4563 u32 val;
4564 u32 chunk_size;
4565 int rc, dma_id;
4566
4567 while (cur_addr < prop->dram_end_address) {
4568 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4569 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4570
4571 chunk_size =
4572 min((u64)SZ_2G, prop->dram_end_address - cur_addr);
4573
4574 dev_dbg(hdev->dev,
4575 "Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4576 cur_addr, cur_addr + chunk_size);
4577
4578 WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, 0);
4579 WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, 0);
4580 WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
4581 lower_32_bits(cur_addr));
4582 WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
4583 upper_32_bits(cur_addr));
4584 WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
4585 chunk_size);
4586 WREG32(mmDMA0_CORE_COMMIT + dma_offset,
4587 ((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
4588 (1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
4589
4590 cur_addr += chunk_size;
4591
4592 if (cur_addr == prop->dram_end_address)
4593 break;
4594 }
4595
4596 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4597 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4598
4599 rc = hl_poll_timeout(
4600 hdev,
4601 mmDMA0_CORE_STS0 + dma_offset,
4602 val,
4603 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
4604 1000,
4605 HBM_SCRUBBING_TIMEOUT_US);
4606
4607 if (rc) {
4608 dev_err(hdev->dev,
4609 "DMA Timeout during HBM scrubbing of DMA #%d\n",
4610 dma_id);
4611 return -EIO;
4612 }
4613 }
4614 }
4615
4616 return 0;
4617 }
4618
gaudi_scrub_device_mem(struct hl_device * hdev,u64 addr,u64 size)4619 static int gaudi_scrub_device_mem(struct hl_device *hdev, u64 addr, u64 size)
4620 {
4621 struct asic_fixed_properties *prop = &hdev->asic_prop;
4622 struct gaudi_device *gaudi = hdev->asic_specific;
4623 int rc = 0;
4624 u64 val = 0;
4625
4626 if (!hdev->memory_scrub)
4627 return 0;
4628
4629 if (!addr && !size) {
4630 /* Wait till device is idle */
4631 rc = hl_poll_timeout(
4632 hdev,
4633 mmDMA0_CORE_STS0/* dummy */,
4634 val/* dummy */,
4635 (hdev->asic_funcs->is_device_idle(hdev, NULL,
4636 0, NULL)),
4637 1000,
4638 HBM_SCRUBBING_TIMEOUT_US);
4639 if (rc) {
4640 dev_err(hdev->dev, "waiting for idle timeout\n");
4641 return -EIO;
4642 }
4643
4644 /* Scrub SRAM */
4645 addr = prop->sram_user_base_address;
4646 size = hdev->pldm ? 0x10000 :
4647 (prop->sram_size - SRAM_USER_BASE_OFFSET);
4648 val = 0x7777777777777777ull;
4649
4650 rc = gaudi_memset_device_memory(hdev, addr, size, val);
4651 if (rc) {
4652 dev_err(hdev->dev,
4653 "Failed to clear SRAM in mem scrub all\n");
4654 return rc;
4655 }
4656
4657 mutex_lock(&gaudi->clk_gate_mutex);
4658 hdev->asic_funcs->disable_clock_gating(hdev);
4659
4660 /* Scrub HBM using all DMA channels in parallel */
4661 rc = gaudi_hbm_scrubbing(hdev);
4662 if (rc)
4663 dev_err(hdev->dev,
4664 "Failed to clear HBM in mem scrub all\n");
4665
4666 hdev->asic_funcs->set_clock_gating(hdev);
4667 mutex_unlock(&gaudi->clk_gate_mutex);
4668 }
4669
4670 return rc;
4671 }
4672
gaudi_get_int_queue_base(struct hl_device * hdev,u32 queue_id,dma_addr_t * dma_handle,u16 * queue_len)4673 static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4674 u32 queue_id, dma_addr_t *dma_handle,
4675 u16 *queue_len)
4676 {
4677 struct gaudi_device *gaudi = hdev->asic_specific;
4678 struct gaudi_internal_qman_info *q;
4679
4680 if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4681 gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4682 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4683 return NULL;
4684 }
4685
4686 q = &gaudi->internal_qmans[queue_id];
4687 *dma_handle = q->pq_dma_addr;
4688 *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4689
4690 return q->pq_kernel_addr;
4691 }
4692
gaudi_send_cpu_message(struct hl_device * hdev,u32 * msg,u16 len,u32 timeout,u64 * result)4693 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
4694 u16 len, u32 timeout, u64 *result)
4695 {
4696 struct gaudi_device *gaudi = hdev->asic_specific;
4697
4698 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4699 if (result)
4700 *result = 0;
4701 return 0;
4702 }
4703
4704 if (!timeout)
4705 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4706
4707 return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4708 timeout, result);
4709 }
4710
gaudi_test_queue(struct hl_device * hdev,u32 hw_queue_id)4711 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4712 {
4713 struct packet_msg_prot *fence_pkt;
4714 dma_addr_t pkt_dma_addr;
4715 u32 fence_val, tmp, timeout_usec;
4716 dma_addr_t fence_dma_addr;
4717 u32 *fence_ptr;
4718 int rc;
4719
4720 if (hdev->pldm)
4721 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4722 else
4723 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4724
4725 fence_val = GAUDI_QMAN0_FENCE_VAL;
4726
4727 fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
4728 &fence_dma_addr);
4729 if (!fence_ptr) {
4730 dev_err(hdev->dev,
4731 "Failed to allocate memory for H/W queue %d testing\n",
4732 hw_queue_id);
4733 return -ENOMEM;
4734 }
4735
4736 *fence_ptr = 0;
4737
4738 fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
4739 sizeof(struct packet_msg_prot),
4740 GFP_KERNEL, &pkt_dma_addr);
4741 if (!fence_pkt) {
4742 dev_err(hdev->dev,
4743 "Failed to allocate packet for H/W queue %d testing\n",
4744 hw_queue_id);
4745 rc = -ENOMEM;
4746 goto free_fence_ptr;
4747 }
4748
4749 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4750 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4751 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4752
4753 fence_pkt->ctl = cpu_to_le32(tmp);
4754 fence_pkt->value = cpu_to_le32(fence_val);
4755 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4756
4757 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
4758 sizeof(struct packet_msg_prot),
4759 pkt_dma_addr);
4760 if (rc) {
4761 dev_err(hdev->dev,
4762 "Failed to send fence packet to H/W queue %d\n",
4763 hw_queue_id);
4764 goto free_pkt;
4765 }
4766
4767 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
4768 1000, timeout_usec, true);
4769
4770 hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
4771
4772 if (rc == -ETIMEDOUT) {
4773 dev_err(hdev->dev,
4774 "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
4775 hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
4776 rc = -EIO;
4777 }
4778
4779 free_pkt:
4780 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
4781 pkt_dma_addr);
4782 free_fence_ptr:
4783 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
4784 fence_dma_addr);
4785 return rc;
4786 }
4787
gaudi_test_cpu_queue(struct hl_device * hdev)4788 static int gaudi_test_cpu_queue(struct hl_device *hdev)
4789 {
4790 struct gaudi_device *gaudi = hdev->asic_specific;
4791
4792 /*
4793 * check capability here as send_cpu_message() won't update the result
4794 * value if no capability
4795 */
4796 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
4797 return 0;
4798
4799 return hl_fw_test_cpu_queue(hdev);
4800 }
4801
gaudi_test_queues(struct hl_device * hdev)4802 static int gaudi_test_queues(struct hl_device *hdev)
4803 {
4804 int i, rc, ret_val = 0;
4805
4806 for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
4807 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
4808 rc = gaudi_test_queue(hdev, i);
4809 if (rc)
4810 ret_val = -EINVAL;
4811 }
4812 }
4813
4814 rc = gaudi_test_cpu_queue(hdev);
4815 if (rc)
4816 ret_val = -EINVAL;
4817
4818 return ret_val;
4819 }
4820
gaudi_dma_pool_zalloc(struct hl_device * hdev,size_t size,gfp_t mem_flags,dma_addr_t * dma_handle)4821 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
4822 gfp_t mem_flags, dma_addr_t *dma_handle)
4823 {
4824 void *kernel_addr;
4825
4826 if (size > GAUDI_DMA_POOL_BLK_SIZE)
4827 return NULL;
4828
4829 kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
4830
4831 /* Shift to the device's base physical address of host memory */
4832 if (kernel_addr)
4833 *dma_handle += HOST_PHYS_BASE;
4834
4835 return kernel_addr;
4836 }
4837
gaudi_dma_pool_free(struct hl_device * hdev,void * vaddr,dma_addr_t dma_addr)4838 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
4839 dma_addr_t dma_addr)
4840 {
4841 /* Cancel the device's base physical address of host memory */
4842 dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
4843
4844 dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
4845 }
4846
gaudi_cpu_accessible_dma_pool_alloc(struct hl_device * hdev,size_t size,dma_addr_t * dma_handle)4847 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
4848 size_t size, dma_addr_t *dma_handle)
4849 {
4850 return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
4851 }
4852
gaudi_cpu_accessible_dma_pool_free(struct hl_device * hdev,size_t size,void * vaddr)4853 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
4854 size_t size, void *vaddr)
4855 {
4856 hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
4857 }
4858
gaudi_dma_map_sg(struct hl_device * hdev,struct scatterlist * sgl,int nents,enum dma_data_direction dir)4859 static int gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
4860 int nents, enum dma_data_direction dir)
4861 {
4862 struct scatterlist *sg;
4863 int i;
4864
4865 if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
4866 return -ENOMEM;
4867
4868 /* Shift to the device's base physical address of host memory */
4869 for_each_sg(sgl, sg, nents, i)
4870 sg->dma_address += HOST_PHYS_BASE;
4871
4872 return 0;
4873 }
4874
gaudi_dma_unmap_sg(struct hl_device * hdev,struct scatterlist * sgl,int nents,enum dma_data_direction dir)4875 static void gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
4876 int nents, enum dma_data_direction dir)
4877 {
4878 struct scatterlist *sg;
4879 int i;
4880
4881 /* Cancel the device's base physical address of host memory */
4882 for_each_sg(sgl, sg, nents, i)
4883 sg->dma_address -= HOST_PHYS_BASE;
4884
4885 dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
4886 }
4887
gaudi_get_dma_desc_list_size(struct hl_device * hdev,struct sg_table * sgt)4888 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev,
4889 struct sg_table *sgt)
4890 {
4891 struct scatterlist *sg, *sg_next_iter;
4892 u32 count, dma_desc_cnt;
4893 u64 len, len_next;
4894 dma_addr_t addr, addr_next;
4895
4896 dma_desc_cnt = 0;
4897
4898 for_each_sg(sgt->sgl, sg, sgt->nents, count) {
4899
4900 len = sg_dma_len(sg);
4901 addr = sg_dma_address(sg);
4902
4903 if (len == 0)
4904 break;
4905
4906 while ((count + 1) < sgt->nents) {
4907 sg_next_iter = sg_next(sg);
4908 len_next = sg_dma_len(sg_next_iter);
4909 addr_next = sg_dma_address(sg_next_iter);
4910
4911 if (len_next == 0)
4912 break;
4913
4914 if ((addr + len == addr_next) &&
4915 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
4916 len += len_next;
4917 count++;
4918 sg = sg_next_iter;
4919 } else {
4920 break;
4921 }
4922 }
4923
4924 dma_desc_cnt++;
4925 }
4926
4927 return dma_desc_cnt * sizeof(struct packet_lin_dma);
4928 }
4929
gaudi_pin_memory_before_cs(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt,u64 addr,enum dma_data_direction dir)4930 static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
4931 struct hl_cs_parser *parser,
4932 struct packet_lin_dma *user_dma_pkt,
4933 u64 addr, enum dma_data_direction dir)
4934 {
4935 struct hl_userptr *userptr;
4936 int rc;
4937
4938 if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4939 parser->job_userptr_list, &userptr))
4940 goto already_pinned;
4941
4942 userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
4943 if (!userptr)
4944 return -ENOMEM;
4945
4946 rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4947 userptr);
4948 if (rc)
4949 goto free_userptr;
4950
4951 list_add_tail(&userptr->job_node, parser->job_userptr_list);
4952
4953 rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
4954 userptr->sgt->nents, dir);
4955 if (rc) {
4956 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
4957 goto unpin_memory;
4958 }
4959
4960 userptr->dma_mapped = true;
4961 userptr->dir = dir;
4962
4963 already_pinned:
4964 parser->patched_cb_size +=
4965 gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
4966
4967 return 0;
4968
4969 unpin_memory:
4970 hl_unpin_host_memory(hdev, userptr);
4971 free_userptr:
4972 kfree(userptr);
4973 return rc;
4974 }
4975
gaudi_validate_dma_pkt_host(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt,bool src_in_host)4976 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
4977 struct hl_cs_parser *parser,
4978 struct packet_lin_dma *user_dma_pkt,
4979 bool src_in_host)
4980 {
4981 enum dma_data_direction dir;
4982 bool skip_host_mem_pin = false, user_memset;
4983 u64 addr;
4984 int rc = 0;
4985
4986 user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
4987 GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
4988 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
4989
4990 if (src_in_host) {
4991 if (user_memset)
4992 skip_host_mem_pin = true;
4993
4994 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
4995 dir = DMA_TO_DEVICE;
4996 addr = le64_to_cpu(user_dma_pkt->src_addr);
4997 } else {
4998 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
4999 dir = DMA_FROM_DEVICE;
5000 addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5001 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5002 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5003 }
5004
5005 if (skip_host_mem_pin)
5006 parser->patched_cb_size += sizeof(*user_dma_pkt);
5007 else
5008 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
5009 addr, dir);
5010
5011 return rc;
5012 }
5013
gaudi_validate_dma_pkt_no_mmu(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt)5014 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
5015 struct hl_cs_parser *parser,
5016 struct packet_lin_dma *user_dma_pkt)
5017 {
5018 bool src_in_host = false;
5019 u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5020 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5021 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5022
5023 dev_dbg(hdev->dev, "DMA packet details:\n");
5024 dev_dbg(hdev->dev, "source == 0x%llx\n",
5025 le64_to_cpu(user_dma_pkt->src_addr));
5026 dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
5027 dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
5028
5029 /*
5030 * Special handling for DMA with size 0. Bypass all validations
5031 * because no transactions will be done except for WR_COMP, which
5032 * is not a security issue
5033 */
5034 if (!le32_to_cpu(user_dma_pkt->tsize)) {
5035 parser->patched_cb_size += sizeof(*user_dma_pkt);
5036 return 0;
5037 }
5038
5039 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5040 src_in_host = true;
5041
5042 return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
5043 src_in_host);
5044 }
5045
gaudi_validate_load_and_exe_pkt(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_load_and_exe * user_pkt)5046 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
5047 struct hl_cs_parser *parser,
5048 struct packet_load_and_exe *user_pkt)
5049 {
5050 u32 cfg;
5051
5052 cfg = le32_to_cpu(user_pkt->cfg);
5053
5054 if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
5055 dev_err(hdev->dev,
5056 "User not allowed to use Load and Execute\n");
5057 return -EPERM;
5058 }
5059
5060 parser->patched_cb_size += sizeof(struct packet_load_and_exe);
5061
5062 return 0;
5063 }
5064
gaudi_validate_cb(struct hl_device * hdev,struct hl_cs_parser * parser,bool is_mmu)5065 static int gaudi_validate_cb(struct hl_device *hdev,
5066 struct hl_cs_parser *parser, bool is_mmu)
5067 {
5068 u32 cb_parsed_length = 0;
5069 int rc = 0;
5070
5071 parser->patched_cb_size = 0;
5072
5073 /* cb_user_size is more than 0 so loop will always be executed */
5074 while (cb_parsed_length < parser->user_cb_size) {
5075 enum packet_id pkt_id;
5076 u16 pkt_size;
5077 struct gaudi_packet *user_pkt;
5078
5079 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5080
5081 pkt_id = (enum packet_id) (
5082 (le64_to_cpu(user_pkt->header) &
5083 PACKET_HEADER_PACKET_ID_MASK) >>
5084 PACKET_HEADER_PACKET_ID_SHIFT);
5085
5086 if (!validate_packet_id(pkt_id)) {
5087 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5088 rc = -EINVAL;
5089 break;
5090 }
5091
5092 pkt_size = gaudi_packet_sizes[pkt_id];
5093 cb_parsed_length += pkt_size;
5094 if (cb_parsed_length > parser->user_cb_size) {
5095 dev_err(hdev->dev,
5096 "packet 0x%x is out of CB boundary\n", pkt_id);
5097 rc = -EINVAL;
5098 break;
5099 }
5100
5101 switch (pkt_id) {
5102 case PACKET_MSG_PROT:
5103 dev_err(hdev->dev,
5104 "User not allowed to use MSG_PROT\n");
5105 rc = -EPERM;
5106 break;
5107
5108 case PACKET_CP_DMA:
5109 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5110 rc = -EPERM;
5111 break;
5112
5113 case PACKET_STOP:
5114 dev_err(hdev->dev, "User not allowed to use STOP\n");
5115 rc = -EPERM;
5116 break;
5117
5118 case PACKET_WREG_BULK:
5119 dev_err(hdev->dev,
5120 "User not allowed to use WREG_BULK\n");
5121 rc = -EPERM;
5122 break;
5123
5124 case PACKET_LOAD_AND_EXE:
5125 rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
5126 (struct packet_load_and_exe *) user_pkt);
5127 break;
5128
5129 case PACKET_LIN_DMA:
5130 parser->contains_dma_pkt = true;
5131 if (is_mmu)
5132 parser->patched_cb_size += pkt_size;
5133 else
5134 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
5135 (struct packet_lin_dma *) user_pkt);
5136 break;
5137
5138 case PACKET_WREG_32:
5139 case PACKET_MSG_LONG:
5140 case PACKET_MSG_SHORT:
5141 case PACKET_REPEAT:
5142 case PACKET_FENCE:
5143 case PACKET_NOP:
5144 case PACKET_ARB_POINT:
5145 parser->patched_cb_size += pkt_size;
5146 break;
5147
5148 default:
5149 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5150 pkt_id);
5151 rc = -EINVAL;
5152 break;
5153 }
5154
5155 if (rc)
5156 break;
5157 }
5158
5159 /*
5160 * The new CB should have space at the end for two MSG_PROT packets:
5161 * 1. A packet that will act as a completion packet
5162 * 2. A packet that will generate MSI-X interrupt
5163 */
5164 if (parser->completion)
5165 parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
5166
5167 return rc;
5168 }
5169
gaudi_patch_dma_packet(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt,struct packet_lin_dma * new_dma_pkt,u32 * new_dma_pkt_size)5170 static int gaudi_patch_dma_packet(struct hl_device *hdev,
5171 struct hl_cs_parser *parser,
5172 struct packet_lin_dma *user_dma_pkt,
5173 struct packet_lin_dma *new_dma_pkt,
5174 u32 *new_dma_pkt_size)
5175 {
5176 struct hl_userptr *userptr;
5177 struct scatterlist *sg, *sg_next_iter;
5178 u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
5179 u64 len, len_next;
5180 dma_addr_t dma_addr, dma_addr_next;
5181 u64 device_memory_addr, addr;
5182 enum dma_data_direction dir;
5183 struct sg_table *sgt;
5184 bool src_in_host = false;
5185 bool skip_host_mem_pin = false;
5186 bool user_memset;
5187
5188 ctl = le32_to_cpu(user_dma_pkt->ctl);
5189
5190 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5191 src_in_host = true;
5192
5193 user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5194 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5195
5196 if (src_in_host) {
5197 addr = le64_to_cpu(user_dma_pkt->src_addr);
5198 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
5199 dir = DMA_TO_DEVICE;
5200 if (user_memset)
5201 skip_host_mem_pin = true;
5202 } else {
5203 addr = le64_to_cpu(user_dma_pkt->dst_addr);
5204 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
5205 dir = DMA_FROM_DEVICE;
5206 }
5207
5208 if ((!skip_host_mem_pin) &&
5209 (!hl_userptr_is_pinned(hdev, addr,
5210 le32_to_cpu(user_dma_pkt->tsize),
5211 parser->job_userptr_list, &userptr))) {
5212 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
5213 addr, user_dma_pkt->tsize);
5214 return -EFAULT;
5215 }
5216
5217 if ((user_memset) && (dir == DMA_TO_DEVICE)) {
5218 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
5219 *new_dma_pkt_size = sizeof(*user_dma_pkt);
5220 return 0;
5221 }
5222
5223 user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5224
5225 sgt = userptr->sgt;
5226 dma_desc_cnt = 0;
5227
5228 for_each_sg(sgt->sgl, sg, sgt->nents, count) {
5229 len = sg_dma_len(sg);
5230 dma_addr = sg_dma_address(sg);
5231
5232 if (len == 0)
5233 break;
5234
5235 while ((count + 1) < sgt->nents) {
5236 sg_next_iter = sg_next(sg);
5237 len_next = sg_dma_len(sg_next_iter);
5238 dma_addr_next = sg_dma_address(sg_next_iter);
5239
5240 if (len_next == 0)
5241 break;
5242
5243 if ((dma_addr + len == dma_addr_next) &&
5244 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5245 len += len_next;
5246 count++;
5247 sg = sg_next_iter;
5248 } else {
5249 break;
5250 }
5251 }
5252
5253 ctl = le32_to_cpu(user_dma_pkt->ctl);
5254 if (likely(dma_desc_cnt))
5255 ctl &= ~GAUDI_PKT_CTL_EB_MASK;
5256 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5257 new_dma_pkt->ctl = cpu_to_le32(ctl);
5258 new_dma_pkt->tsize = cpu_to_le32(len);
5259
5260 if (dir == DMA_TO_DEVICE) {
5261 new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
5262 new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
5263 } else {
5264 new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
5265 new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
5266 }
5267
5268 if (!user_memset)
5269 device_memory_addr += len;
5270 dma_desc_cnt++;
5271 new_dma_pkt++;
5272 }
5273
5274 if (!dma_desc_cnt) {
5275 dev_err(hdev->dev,
5276 "Error of 0 SG entries when patching DMA packet\n");
5277 return -EFAULT;
5278 }
5279
5280 /* Fix the last dma packet - wrcomp must be as user set it */
5281 new_dma_pkt--;
5282 new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
5283
5284 *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
5285
5286 return 0;
5287 }
5288
gaudi_patch_cb(struct hl_device * hdev,struct hl_cs_parser * parser)5289 static int gaudi_patch_cb(struct hl_device *hdev,
5290 struct hl_cs_parser *parser)
5291 {
5292 u32 cb_parsed_length = 0;
5293 u32 cb_patched_cur_length = 0;
5294 int rc = 0;
5295
5296 /* cb_user_size is more than 0 so loop will always be executed */
5297 while (cb_parsed_length < parser->user_cb_size) {
5298 enum packet_id pkt_id;
5299 u16 pkt_size;
5300 u32 new_pkt_size = 0;
5301 struct gaudi_packet *user_pkt, *kernel_pkt;
5302
5303 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5304 kernel_pkt = parser->patched_cb->kernel_address +
5305 cb_patched_cur_length;
5306
5307 pkt_id = (enum packet_id) (
5308 (le64_to_cpu(user_pkt->header) &
5309 PACKET_HEADER_PACKET_ID_MASK) >>
5310 PACKET_HEADER_PACKET_ID_SHIFT);
5311
5312 if (!validate_packet_id(pkt_id)) {
5313 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5314 rc = -EINVAL;
5315 break;
5316 }
5317
5318 pkt_size = gaudi_packet_sizes[pkt_id];
5319 cb_parsed_length += pkt_size;
5320 if (cb_parsed_length > parser->user_cb_size) {
5321 dev_err(hdev->dev,
5322 "packet 0x%x is out of CB boundary\n", pkt_id);
5323 rc = -EINVAL;
5324 break;
5325 }
5326
5327 switch (pkt_id) {
5328 case PACKET_LIN_DMA:
5329 rc = gaudi_patch_dma_packet(hdev, parser,
5330 (struct packet_lin_dma *) user_pkt,
5331 (struct packet_lin_dma *) kernel_pkt,
5332 &new_pkt_size);
5333 cb_patched_cur_length += new_pkt_size;
5334 break;
5335
5336 case PACKET_MSG_PROT:
5337 dev_err(hdev->dev,
5338 "User not allowed to use MSG_PROT\n");
5339 rc = -EPERM;
5340 break;
5341
5342 case PACKET_CP_DMA:
5343 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5344 rc = -EPERM;
5345 break;
5346
5347 case PACKET_STOP:
5348 dev_err(hdev->dev, "User not allowed to use STOP\n");
5349 rc = -EPERM;
5350 break;
5351
5352 case PACKET_WREG_32:
5353 case PACKET_WREG_BULK:
5354 case PACKET_MSG_LONG:
5355 case PACKET_MSG_SHORT:
5356 case PACKET_REPEAT:
5357 case PACKET_FENCE:
5358 case PACKET_NOP:
5359 case PACKET_ARB_POINT:
5360 case PACKET_LOAD_AND_EXE:
5361 memcpy(kernel_pkt, user_pkt, pkt_size);
5362 cb_patched_cur_length += pkt_size;
5363 break;
5364
5365 default:
5366 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5367 pkt_id);
5368 rc = -EINVAL;
5369 break;
5370 }
5371
5372 if (rc)
5373 break;
5374 }
5375
5376 return rc;
5377 }
5378
gaudi_parse_cb_mmu(struct hl_device * hdev,struct hl_cs_parser * parser)5379 static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5380 struct hl_cs_parser *parser)
5381 {
5382 u64 patched_cb_handle;
5383 u32 patched_cb_size;
5384 struct hl_cb *user_cb;
5385 int rc;
5386
5387 /*
5388 * The new CB should have space at the end for two MSG_PROT pkt:
5389 * 1. A packet that will act as a completion packet
5390 * 2. A packet that will generate MSI interrupt
5391 */
5392 if (parser->completion)
5393 parser->patched_cb_size = parser->user_cb_size +
5394 sizeof(struct packet_msg_prot) * 2;
5395 else
5396 parser->patched_cb_size = parser->user_cb_size;
5397
5398 rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
5399 parser->patched_cb_size, false, false,
5400 &patched_cb_handle);
5401
5402 if (rc) {
5403 dev_err(hdev->dev,
5404 "Failed to allocate patched CB for DMA CS %d\n",
5405 rc);
5406 return rc;
5407 }
5408
5409 patched_cb_handle >>= PAGE_SHIFT;
5410 parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5411 (u32) patched_cb_handle);
5412 /* hl_cb_get should never fail */
5413 if (!parser->patched_cb) {
5414 dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
5415 (u32) patched_cb_handle);
5416 rc = -EFAULT;
5417 goto out;
5418 }
5419
5420 /*
5421 * The check that parser->user_cb_size <= parser->user_cb->size was done
5422 * in validate_queue_index().
5423 */
5424 memcpy(parser->patched_cb->kernel_address,
5425 parser->user_cb->kernel_address,
5426 parser->user_cb_size);
5427
5428 patched_cb_size = parser->patched_cb_size;
5429
5430 /* Validate patched CB instead of user CB */
5431 user_cb = parser->user_cb;
5432 parser->user_cb = parser->patched_cb;
5433 rc = gaudi_validate_cb(hdev, parser, true);
5434 parser->user_cb = user_cb;
5435
5436 if (rc) {
5437 hl_cb_put(parser->patched_cb);
5438 goto out;
5439 }
5440
5441 if (patched_cb_size != parser->patched_cb_size) {
5442 dev_err(hdev->dev, "user CB size mismatch\n");
5443 hl_cb_put(parser->patched_cb);
5444 rc = -EINVAL;
5445 goto out;
5446 }
5447
5448 out:
5449 /*
5450 * Always call cb destroy here because we still have 1 reference
5451 * to it by calling cb_get earlier. After the job will be completed,
5452 * cb_put will release it, but here we want to remove it from the
5453 * idr
5454 */
5455 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5456 patched_cb_handle << PAGE_SHIFT);
5457
5458 return rc;
5459 }
5460
gaudi_parse_cb_no_mmu(struct hl_device * hdev,struct hl_cs_parser * parser)5461 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5462 struct hl_cs_parser *parser)
5463 {
5464 u64 patched_cb_handle;
5465 int rc;
5466
5467 rc = gaudi_validate_cb(hdev, parser, false);
5468
5469 if (rc)
5470 goto free_userptr;
5471
5472 rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
5473 parser->patched_cb_size, false, false,
5474 &patched_cb_handle);
5475 if (rc) {
5476 dev_err(hdev->dev,
5477 "Failed to allocate patched CB for DMA CS %d\n", rc);
5478 goto free_userptr;
5479 }
5480
5481 patched_cb_handle >>= PAGE_SHIFT;
5482 parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5483 (u32) patched_cb_handle);
5484 /* hl_cb_get should never fail here */
5485 if (!parser->patched_cb) {
5486 dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
5487 (u32) patched_cb_handle);
5488 rc = -EFAULT;
5489 goto out;
5490 }
5491
5492 rc = gaudi_patch_cb(hdev, parser);
5493
5494 if (rc)
5495 hl_cb_put(parser->patched_cb);
5496
5497 out:
5498 /*
5499 * Always call cb destroy here because we still have 1 reference
5500 * to it by calling cb_get earlier. After the job will be completed,
5501 * cb_put will release it, but here we want to remove it from the
5502 * idr
5503 */
5504 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5505 patched_cb_handle << PAGE_SHIFT);
5506
5507 free_userptr:
5508 if (rc)
5509 hl_userptr_delete_list(hdev, parser->job_userptr_list);
5510 return rc;
5511 }
5512
gaudi_parse_cb_no_ext_queue(struct hl_device * hdev,struct hl_cs_parser * parser)5513 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5514 struct hl_cs_parser *parser)
5515 {
5516 struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5517 struct gaudi_device *gaudi = hdev->asic_specific;
5518 u32 nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT +
5519 ((parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2));
5520
5521 if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5522 (parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3) &&
5523 (!(gaudi->hw_cap_initialized & nic_mask_q_id))) {
5524 dev_err(hdev->dev, "h/w queue %d is disabled\n",
5525 parser->hw_queue_id);
5526 return -EINVAL;
5527 }
5528
5529 /* For internal queue jobs just check if CB address is valid */
5530 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5531 parser->user_cb_size,
5532 asic_prop->sram_user_base_address,
5533 asic_prop->sram_end_address))
5534 return 0;
5535
5536 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5537 parser->user_cb_size,
5538 asic_prop->dram_user_base_address,
5539 asic_prop->dram_end_address))
5540 return 0;
5541
5542 /* PMMU and HPMMU addresses are equal, check only one of them */
5543 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5544 parser->user_cb_size,
5545 asic_prop->pmmu.start_addr,
5546 asic_prop->pmmu.end_addr))
5547 return 0;
5548
5549 dev_err(hdev->dev,
5550 "CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5551 parser->user_cb, parser->user_cb_size);
5552
5553 return -EFAULT;
5554 }
5555
gaudi_cs_parser(struct hl_device * hdev,struct hl_cs_parser * parser)5556 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5557 {
5558 struct gaudi_device *gaudi = hdev->asic_specific;
5559
5560 if (parser->queue_type == QUEUE_TYPE_INT)
5561 return gaudi_parse_cb_no_ext_queue(hdev, parser);
5562
5563 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5564 return gaudi_parse_cb_mmu(hdev, parser);
5565 else
5566 return gaudi_parse_cb_no_mmu(hdev, parser);
5567 }
5568
gaudi_add_end_of_cb_packets(struct hl_device * hdev,void * kernel_address,u32 len,u64 cq_addr,u32 cq_val,u32 msi_vec,bool eb)5569 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
5570 void *kernel_address, u32 len,
5571 u64 cq_addr, u32 cq_val, u32 msi_vec,
5572 bool eb)
5573 {
5574 struct gaudi_device *gaudi = hdev->asic_specific;
5575 struct packet_msg_prot *cq_pkt;
5576 u32 tmp;
5577
5578 cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
5579
5580 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5581 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5582
5583 if (eb)
5584 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5585
5586 cq_pkt->ctl = cpu_to_le32(tmp);
5587 cq_pkt->value = cpu_to_le32(cq_val);
5588 cq_pkt->addr = cpu_to_le64(cq_addr);
5589
5590 cq_pkt++;
5591
5592 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5593 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5594 cq_pkt->ctl = cpu_to_le32(tmp);
5595 cq_pkt->value = cpu_to_le32(1);
5596
5597 if (!gaudi->multi_msi_mode)
5598 msi_vec = 0;
5599
5600 cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_MSI_INTR_0 + msi_vec * 4);
5601 }
5602
gaudi_update_eq_ci(struct hl_device * hdev,u32 val)5603 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5604 {
5605 WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5606 }
5607
gaudi_memset_device_memory(struct hl_device * hdev,u64 addr,u32 size,u64 val)5608 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5609 u32 size, u64 val)
5610 {
5611 struct packet_lin_dma *lin_dma_pkt;
5612 struct hl_cs_job *job;
5613 u32 cb_size, ctl, err_cause;
5614 struct hl_cb *cb;
5615 int rc;
5616
5617 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
5618 if (!cb)
5619 return -EFAULT;
5620
5621 lin_dma_pkt = cb->kernel_address;
5622 memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5623 cb_size = sizeof(*lin_dma_pkt);
5624
5625 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5626 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5627 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5628 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5629 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5630
5631 lin_dma_pkt->ctl = cpu_to_le32(ctl);
5632 lin_dma_pkt->src_addr = cpu_to_le64(val);
5633 lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5634 lin_dma_pkt->tsize = cpu_to_le32(size);
5635
5636 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5637 if (!job) {
5638 dev_err(hdev->dev, "Failed to allocate a new job\n");
5639 rc = -ENOMEM;
5640 goto release_cb;
5641 }
5642
5643 /* Verify DMA is OK */
5644 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5645 if (err_cause && !hdev->init_done) {
5646 dev_dbg(hdev->dev,
5647 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5648 err_cause);
5649 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5650 }
5651
5652 job->id = 0;
5653 job->user_cb = cb;
5654 atomic_inc(&job->user_cb->cs_cnt);
5655 job->user_cb_size = cb_size;
5656 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5657 job->patched_cb = job->user_cb;
5658 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5659
5660 hl_debugfs_add_job(hdev, job);
5661
5662 rc = gaudi_send_job_on_qman0(hdev, job);
5663 hl_debugfs_remove_job(hdev, job);
5664 kfree(job);
5665 atomic_dec(&cb->cs_cnt);
5666
5667 /* Verify DMA is OK */
5668 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5669 if (err_cause) {
5670 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5671 rc = -EIO;
5672 if (!hdev->init_done) {
5673 dev_dbg(hdev->dev,
5674 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5675 err_cause);
5676 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5677 }
5678 }
5679
5680 release_cb:
5681 hl_cb_put(cb);
5682 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
5683
5684 return rc;
5685 }
5686
gaudi_memset_registers(struct hl_device * hdev,u64 reg_base,u32 num_regs,u32 val)5687 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
5688 u32 num_regs, u32 val)
5689 {
5690 struct packet_msg_long *pkt;
5691 struct hl_cs_job *job;
5692 u32 cb_size, ctl;
5693 struct hl_cb *cb;
5694 int i, rc;
5695
5696 cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
5697
5698 if (cb_size > SZ_2M) {
5699 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5700 return -ENOMEM;
5701 }
5702
5703 cb = hl_cb_kernel_create(hdev, cb_size, false);
5704 if (!cb)
5705 return -EFAULT;
5706
5707 pkt = cb->kernel_address;
5708
5709 ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5710 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5711 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5712 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5713 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5714
5715 for (i = 0; i < num_regs ; i++, pkt++) {
5716 pkt->ctl = cpu_to_le32(ctl);
5717 pkt->value = cpu_to_le32(val);
5718 pkt->addr = cpu_to_le64(reg_base + (i * 4));
5719 }
5720
5721 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5722 if (!job) {
5723 dev_err(hdev->dev, "Failed to allocate a new job\n");
5724 rc = -ENOMEM;
5725 goto release_cb;
5726 }
5727
5728 job->id = 0;
5729 job->user_cb = cb;
5730 atomic_inc(&job->user_cb->cs_cnt);
5731 job->user_cb_size = cb_size;
5732 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5733 job->patched_cb = job->user_cb;
5734 job->job_cb_size = cb_size;
5735
5736 hl_debugfs_add_job(hdev, job);
5737
5738 rc = gaudi_send_job_on_qman0(hdev, job);
5739 hl_debugfs_remove_job(hdev, job);
5740 kfree(job);
5741 atomic_dec(&cb->cs_cnt);
5742
5743 release_cb:
5744 hl_cb_put(cb);
5745 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
5746
5747 return rc;
5748 }
5749
gaudi_schedule_register_memset(struct hl_device * hdev,u32 hw_queue_id,u64 reg_base,u32 num_regs,u32 val)5750 static int gaudi_schedule_register_memset(struct hl_device *hdev,
5751 u32 hw_queue_id, u64 reg_base, u32 num_regs, u32 val)
5752 {
5753 struct hl_ctx *ctx;
5754 struct hl_pending_cb *pending_cb;
5755 struct packet_msg_long *pkt;
5756 u32 cb_size, ctl;
5757 struct hl_cb *cb;
5758 int i, rc;
5759
5760 mutex_lock(&hdev->fpriv_list_lock);
5761 ctx = hdev->compute_ctx;
5762
5763 /* If no compute context available or context is going down
5764 * memset registers directly
5765 */
5766 if (!ctx || kref_read(&ctx->refcount) == 0) {
5767 rc = gaudi_memset_registers(hdev, reg_base, num_regs, val);
5768 mutex_unlock(&hdev->fpriv_list_lock);
5769 return rc;
5770 }
5771
5772 mutex_unlock(&hdev->fpriv_list_lock);
5773
5774 cb_size = (sizeof(*pkt) * num_regs) +
5775 sizeof(struct packet_msg_prot) * 2;
5776
5777 if (cb_size > SZ_2M) {
5778 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5779 return -ENOMEM;
5780 }
5781
5782 pending_cb = kzalloc(sizeof(*pending_cb), GFP_KERNEL);
5783 if (!pending_cb)
5784 return -ENOMEM;
5785
5786 cb = hl_cb_kernel_create(hdev, cb_size, false);
5787 if (!cb) {
5788 kfree(pending_cb);
5789 return -EFAULT;
5790 }
5791
5792 pkt = cb->kernel_address;
5793
5794 ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5795 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5796 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5797 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5798 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5799
5800 for (i = 0; i < num_regs ; i++, pkt++) {
5801 pkt->ctl = cpu_to_le32(ctl);
5802 pkt->value = cpu_to_le32(val);
5803 pkt->addr = cpu_to_le64(reg_base + (i * 4));
5804 }
5805
5806 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
5807
5808 pending_cb->cb = cb;
5809 pending_cb->cb_size = cb_size;
5810 /* The queue ID MUST be an external queue ID. Otherwise, we will
5811 * have undefined behavior
5812 */
5813 pending_cb->hw_queue_id = hw_queue_id;
5814
5815 spin_lock(&ctx->pending_cb_lock);
5816 list_add_tail(&pending_cb->cb_node, &ctx->pending_cb_list);
5817 spin_unlock(&ctx->pending_cb_lock);
5818
5819 return 0;
5820 }
5821
gaudi_restore_sm_registers(struct hl_device * hdev)5822 static int gaudi_restore_sm_registers(struct hl_device *hdev)
5823 {
5824 u64 base_addr;
5825 u32 num_regs;
5826 int rc;
5827
5828 base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5829 num_regs = NUM_OF_SOB_IN_BLOCK;
5830 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5831 if (rc) {
5832 dev_err(hdev->dev, "failed resetting SM registers");
5833 return -ENOMEM;
5834 }
5835
5836 base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
5837 num_regs = NUM_OF_SOB_IN_BLOCK;
5838 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5839 if (rc) {
5840 dev_err(hdev->dev, "failed resetting SM registers");
5841 return -ENOMEM;
5842 }
5843
5844 base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5845 num_regs = NUM_OF_SOB_IN_BLOCK;
5846 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5847 if (rc) {
5848 dev_err(hdev->dev, "failed resetting SM registers");
5849 return -ENOMEM;
5850 }
5851
5852 base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5853 num_regs = NUM_OF_MONITORS_IN_BLOCK;
5854 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5855 if (rc) {
5856 dev_err(hdev->dev, "failed resetting SM registers");
5857 return -ENOMEM;
5858 }
5859
5860 base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
5861 num_regs = NUM_OF_MONITORS_IN_BLOCK;
5862 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5863 if (rc) {
5864 dev_err(hdev->dev, "failed resetting SM registers");
5865 return -ENOMEM;
5866 }
5867
5868 base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5869 num_regs = NUM_OF_MONITORS_IN_BLOCK;
5870 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5871 if (rc) {
5872 dev_err(hdev->dev, "failed resetting SM registers");
5873 return -ENOMEM;
5874 }
5875
5876 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5877 (GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
5878 num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
5879 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5880 if (rc) {
5881 dev_err(hdev->dev, "failed resetting SM registers");
5882 return -ENOMEM;
5883 }
5884
5885 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
5886 (GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
5887 num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
5888 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5889 if (rc) {
5890 dev_err(hdev->dev, "failed resetting SM registers");
5891 return -ENOMEM;
5892 }
5893
5894 return 0;
5895 }
5896
gaudi_restore_dma_registers(struct hl_device * hdev)5897 static void gaudi_restore_dma_registers(struct hl_device *hdev)
5898 {
5899 u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
5900 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5901 int i;
5902
5903 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5904 u64 sob_addr = CFG_BASE +
5905 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5906 (i * sob_delta);
5907 u32 dma_offset = i * DMA_CORE_OFFSET;
5908
5909 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
5910 lower_32_bits(sob_addr));
5911 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
5912 upper_32_bits(sob_addr));
5913 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
5914
5915 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
5916 * modified by the user for SRAM reduction
5917 */
5918 if (i > 1)
5919 WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
5920 0x00000001);
5921 }
5922 }
5923
gaudi_restore_qm_registers(struct hl_device * hdev)5924 static void gaudi_restore_qm_registers(struct hl_device *hdev)
5925 {
5926 u32 qman_offset;
5927 int i;
5928
5929 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5930 qman_offset = i * DMA_QMAN_OFFSET;
5931 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
5932 }
5933
5934 for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
5935 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
5936 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
5937 }
5938
5939 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
5940 qman_offset = i * TPC_QMAN_OFFSET;
5941 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
5942 }
5943
5944 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
5945 qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
5946 (i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
5947 WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
5948 }
5949 }
5950
gaudi_restore_user_registers(struct hl_device * hdev)5951 static int gaudi_restore_user_registers(struct hl_device *hdev)
5952 {
5953 int rc;
5954
5955 rc = gaudi_restore_sm_registers(hdev);
5956 if (rc)
5957 return rc;
5958
5959 gaudi_restore_dma_registers(hdev);
5960 gaudi_restore_qm_registers(hdev);
5961
5962 return 0;
5963 }
5964
gaudi_context_switch(struct hl_device * hdev,u32 asid)5965 static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
5966 {
5967 return gaudi_restore_user_registers(hdev);
5968 }
5969
gaudi_mmu_clear_pgt_range(struct hl_device * hdev)5970 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
5971 {
5972 struct asic_fixed_properties *prop = &hdev->asic_prop;
5973 struct gaudi_device *gaudi = hdev->asic_specific;
5974 u64 addr = prop->mmu_pgt_addr;
5975 u32 size = prop->mmu_pgt_size + MMU_CACHE_MNG_SIZE;
5976
5977 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
5978 return 0;
5979
5980 return gaudi_memset_device_memory(hdev, addr, size, 0);
5981 }
5982
gaudi_restore_phase_topology(struct hl_device * hdev)5983 static void gaudi_restore_phase_topology(struct hl_device *hdev)
5984 {
5985
5986 }
5987
gaudi_debugfs_read32(struct hl_device * hdev,u64 addr,bool user_address,u32 * val)5988 static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr,
5989 bool user_address, u32 *val)
5990 {
5991 struct asic_fixed_properties *prop = &hdev->asic_prop;
5992 struct gaudi_device *gaudi = hdev->asic_specific;
5993 u64 hbm_bar_addr, host_phys_end;
5994 int rc = 0;
5995
5996 host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
5997
5998 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
5999
6000 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6001 (hdev->clock_gating_mask &
6002 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6003
6004 dev_err_ratelimited(hdev->dev,
6005 "Can't read register - clock gating is enabled!\n");
6006 rc = -EFAULT;
6007 } else {
6008 *val = RREG32(addr - CFG_BASE);
6009 }
6010
6011 } else if ((addr >= SRAM_BASE_ADDR) &&
6012 (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
6013 *val = readl(hdev->pcie_bar[SRAM_BAR_ID] +
6014 (addr - SRAM_BASE_ADDR));
6015 } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
6016 u64 bar_base_addr = DRAM_PHYS_BASE +
6017 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6018
6019 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6020 if (hbm_bar_addr != U64_MAX) {
6021 *val = readl(hdev->pcie_bar[HBM_BAR_ID] +
6022 (addr - bar_base_addr));
6023
6024 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6025 hbm_bar_addr);
6026 }
6027 if (hbm_bar_addr == U64_MAX)
6028 rc = -EIO;
6029 } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6030 user_address && !iommu_present(&pci_bus_type)) {
6031 *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
6032 } else {
6033 rc = -EFAULT;
6034 }
6035
6036 return rc;
6037 }
6038
gaudi_debugfs_write32(struct hl_device * hdev,u64 addr,bool user_address,u32 val)6039 static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr,
6040 bool user_address, u32 val)
6041 {
6042 struct asic_fixed_properties *prop = &hdev->asic_prop;
6043 struct gaudi_device *gaudi = hdev->asic_specific;
6044 u64 hbm_bar_addr, host_phys_end;
6045 int rc = 0;
6046
6047 host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6048
6049 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
6050
6051 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6052 (hdev->clock_gating_mask &
6053 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6054
6055 dev_err_ratelimited(hdev->dev,
6056 "Can't write register - clock gating is enabled!\n");
6057 rc = -EFAULT;
6058 } else {
6059 WREG32(addr - CFG_BASE, val);
6060 }
6061
6062 } else if ((addr >= SRAM_BASE_ADDR) &&
6063 (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
6064 writel(val, hdev->pcie_bar[SRAM_BAR_ID] +
6065 (addr - SRAM_BASE_ADDR));
6066 } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
6067 u64 bar_base_addr = DRAM_PHYS_BASE +
6068 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6069
6070 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6071 if (hbm_bar_addr != U64_MAX) {
6072 writel(val, hdev->pcie_bar[HBM_BAR_ID] +
6073 (addr - bar_base_addr));
6074
6075 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6076 hbm_bar_addr);
6077 }
6078 if (hbm_bar_addr == U64_MAX)
6079 rc = -EIO;
6080 } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6081 user_address && !iommu_present(&pci_bus_type)) {
6082 *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
6083 } else {
6084 rc = -EFAULT;
6085 }
6086
6087 return rc;
6088 }
6089
gaudi_debugfs_read64(struct hl_device * hdev,u64 addr,bool user_address,u64 * val)6090 static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr,
6091 bool user_address, u64 *val)
6092 {
6093 struct asic_fixed_properties *prop = &hdev->asic_prop;
6094 struct gaudi_device *gaudi = hdev->asic_specific;
6095 u64 hbm_bar_addr, host_phys_end;
6096 int rc = 0;
6097
6098 host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6099
6100 if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
6101
6102 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6103 (hdev->clock_gating_mask &
6104 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6105
6106 dev_err_ratelimited(hdev->dev,
6107 "Can't read register - clock gating is enabled!\n");
6108 rc = -EFAULT;
6109 } else {
6110 u32 val_l = RREG32(addr - CFG_BASE);
6111 u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
6112
6113 *val = (((u64) val_h) << 32) | val_l;
6114 }
6115
6116 } else if ((addr >= SRAM_BASE_ADDR) &&
6117 (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
6118 *val = readq(hdev->pcie_bar[SRAM_BAR_ID] +
6119 (addr - SRAM_BASE_ADDR));
6120 } else if (addr <=
6121 DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
6122 u64 bar_base_addr = DRAM_PHYS_BASE +
6123 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6124
6125 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6126 if (hbm_bar_addr != U64_MAX) {
6127 *val = readq(hdev->pcie_bar[HBM_BAR_ID] +
6128 (addr - bar_base_addr));
6129
6130 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6131 hbm_bar_addr);
6132 }
6133 if (hbm_bar_addr == U64_MAX)
6134 rc = -EIO;
6135 } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6136 user_address && !iommu_present(&pci_bus_type)) {
6137 *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
6138 } else {
6139 rc = -EFAULT;
6140 }
6141
6142 return rc;
6143 }
6144
gaudi_debugfs_write64(struct hl_device * hdev,u64 addr,bool user_address,u64 val)6145 static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr,
6146 bool user_address, u64 val)
6147 {
6148 struct asic_fixed_properties *prop = &hdev->asic_prop;
6149 struct gaudi_device *gaudi = hdev->asic_specific;
6150 u64 hbm_bar_addr, host_phys_end;
6151 int rc = 0;
6152
6153 host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6154
6155 if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
6156
6157 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6158 (hdev->clock_gating_mask &
6159 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6160
6161 dev_err_ratelimited(hdev->dev,
6162 "Can't write register - clock gating is enabled!\n");
6163 rc = -EFAULT;
6164 } else {
6165 WREG32(addr - CFG_BASE, lower_32_bits(val));
6166 WREG32(addr + sizeof(u32) - CFG_BASE,
6167 upper_32_bits(val));
6168 }
6169
6170 } else if ((addr >= SRAM_BASE_ADDR) &&
6171 (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
6172 writeq(val, hdev->pcie_bar[SRAM_BAR_ID] +
6173 (addr - SRAM_BASE_ADDR));
6174 } else if (addr <=
6175 DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
6176 u64 bar_base_addr = DRAM_PHYS_BASE +
6177 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6178
6179 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6180 if (hbm_bar_addr != U64_MAX) {
6181 writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6182 (addr - bar_base_addr));
6183
6184 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6185 hbm_bar_addr);
6186 }
6187 if (hbm_bar_addr == U64_MAX)
6188 rc = -EIO;
6189 } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6190 user_address && !iommu_present(&pci_bus_type)) {
6191 *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
6192 } else {
6193 rc = -EFAULT;
6194 }
6195
6196 return rc;
6197 }
6198
gaudi_dma_core_transfer(struct hl_device * hdev,int dma_id,u64 addr,u32 size_to_dma,dma_addr_t dma_addr)6199 static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,
6200 u32 size_to_dma, dma_addr_t dma_addr)
6201 {
6202 u32 err_cause, val;
6203 u64 dma_offset;
6204 int rc;
6205
6206 dma_offset = dma_id * DMA_CORE_OFFSET;
6207
6208 WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr));
6209 WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr));
6210 WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr));
6211 WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr));
6212 WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma);
6213 WREG32(mmDMA0_CORE_COMMIT + dma_offset,
6214 (1 << DMA0_CORE_COMMIT_LIN_SHIFT));
6215
6216 rc = hl_poll_timeout(
6217 hdev,
6218 mmDMA0_CORE_STS0 + dma_offset,
6219 val,
6220 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
6221 0,
6222 1000000);
6223
6224 if (rc) {
6225 dev_err(hdev->dev,
6226 "DMA %d timed-out during reading of 0x%llx\n",
6227 dma_id, addr);
6228 return -EIO;
6229 }
6230
6231 /* Verify DMA is OK */
6232 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6233 if (err_cause) {
6234 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
6235 dev_dbg(hdev->dev,
6236 "Clearing DMA0 engine from errors (cause 0x%x)\n",
6237 err_cause);
6238 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6239
6240 return -EIO;
6241 }
6242
6243 return 0;
6244 }
6245
gaudi_debugfs_read_dma(struct hl_device * hdev,u64 addr,u32 size,void * blob_addr)6246 static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
6247 void *blob_addr)
6248 {
6249 u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
6250 struct gaudi_device *gaudi = hdev->asic_specific;
6251 u64 dma_offset, qm_offset;
6252 dma_addr_t dma_addr;
6253 void *kernel_addr;
6254 bool is_eng_idle;
6255 int rc = 0, dma_id;
6256
6257 kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
6258 hdev, SZ_2M,
6259 &dma_addr,
6260 GFP_KERNEL | __GFP_ZERO);
6261
6262 if (!kernel_addr)
6263 return -ENOMEM;
6264
6265 mutex_lock(&gaudi->clk_gate_mutex);
6266
6267 hdev->asic_funcs->disable_clock_gating(hdev);
6268
6269 hdev->asic_funcs->hw_queues_lock(hdev);
6270
6271 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
6272 dma_offset = dma_id * DMA_CORE_OFFSET;
6273 qm_offset = dma_id * DMA_QMAN_OFFSET;
6274 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6275 is_eng_idle = IS_DMA_IDLE(dma_core_sts0);
6276
6277 if (!is_eng_idle) {
6278 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
6279 dma_offset = dma_id * DMA_CORE_OFFSET;
6280 qm_offset = dma_id * DMA_QMAN_OFFSET;
6281 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6282 is_eng_idle = IS_DMA_IDLE(dma_core_sts0);
6283
6284 if (!is_eng_idle) {
6285 dev_err_ratelimited(hdev->dev,
6286 "Can't read via DMA because it is BUSY\n");
6287 rc = -EAGAIN;
6288 goto out;
6289 }
6290 }
6291
6292 cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset);
6293 WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset,
6294 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
6295
6296 /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6297 * using the compute ctx ASID, if exists. If not, use the kernel ctx
6298 * ASID
6299 */
6300 WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
6301
6302 /* Verify DMA is OK */
6303 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6304 if (err_cause) {
6305 dev_dbg(hdev->dev,
6306 "Clearing DMA0 engine from errors (cause 0x%x)\n",
6307 err_cause);
6308 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6309 }
6310
6311 pos = 0;
6312 size_left = size;
6313 size_to_dma = SZ_2M;
6314
6315 while (size_left > 0) {
6316
6317 if (size_left < SZ_2M)
6318 size_to_dma = size_left;
6319
6320 rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma,
6321 dma_addr);
6322 if (rc)
6323 break;
6324
6325 memcpy(blob_addr + pos, kernel_addr, size_to_dma);
6326
6327 if (size_left <= SZ_2M)
6328 break;
6329
6330 pos += SZ_2M;
6331 addr += SZ_2M;
6332 size_left -= SZ_2M;
6333 }
6334
6335 /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6336 * using the compute ctx ASID, if exists. If not, use the kernel ctx
6337 * ASID
6338 */
6339 WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6340 ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6341
6342 WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1);
6343
6344 out:
6345 hdev->asic_funcs->hw_queues_unlock(hdev);
6346
6347 hdev->asic_funcs->set_clock_gating(hdev);
6348
6349 mutex_unlock(&gaudi->clk_gate_mutex);
6350
6351 hdev->asic_funcs->asic_dma_free_coherent(hdev, SZ_2M, kernel_addr,
6352 dma_addr);
6353
6354 return rc;
6355 }
6356
gaudi_read_pte(struct hl_device * hdev,u64 addr)6357 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
6358 {
6359 struct gaudi_device *gaudi = hdev->asic_specific;
6360
6361 if (hdev->hard_reset_pending)
6362 return U64_MAX;
6363
6364 return readq(hdev->pcie_bar[HBM_BAR_ID] +
6365 (addr - gaudi->hbm_bar_cur_addr));
6366 }
6367
gaudi_write_pte(struct hl_device * hdev,u64 addr,u64 val)6368 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
6369 {
6370 struct gaudi_device *gaudi = hdev->asic_specific;
6371
6372 if (hdev->hard_reset_pending)
6373 return;
6374
6375 writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6376 (addr - gaudi->hbm_bar_cur_addr));
6377 }
6378
gaudi_mmu_prepare_reg(struct hl_device * hdev,u64 reg,u32 asid)6379 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
6380 {
6381 /* mask to zero the MMBP and ASID bits */
6382 WREG32_AND(reg, ~0x7FF);
6383 WREG32_OR(reg, asid);
6384 }
6385
gaudi_mmu_prepare(struct hl_device * hdev,u32 asid)6386 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
6387 {
6388 struct gaudi_device *gaudi = hdev->asic_specific;
6389
6390 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6391 return;
6392
6393 if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
6394 dev_crit(hdev->dev, "asid %u is too big\n", asid);
6395 return;
6396 }
6397
6398 mutex_lock(&gaudi->clk_gate_mutex);
6399
6400 hdev->asic_funcs->disable_clock_gating(hdev);
6401
6402 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6403 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6404 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6405 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6406 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6407
6408 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6409 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6410 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6411 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6412 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6413
6414 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6415 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6416 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6417 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6418 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6419
6420 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6421 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6422 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6423 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6424 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6425
6426 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6427 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6428 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6429 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6430 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6431
6432 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6433 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6434 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6435 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6436 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6437
6438 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6439 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6440 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6441 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6442 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6443
6444 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6445 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6446 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6447 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6448 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6449
6450 gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
6451 gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
6452 gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
6453 gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
6454 gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
6455 gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
6456 gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
6457 gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
6458
6459 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6460 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6461 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6462 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6463 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6464 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
6465 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
6466
6467 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6468 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6469 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6470 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6471 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6472 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
6473 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
6474
6475 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6476 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6477 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6478 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6479 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6480 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
6481 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
6482
6483 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6484 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6485 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6486 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6487 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6488 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
6489 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
6490
6491 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6492 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6493 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6494 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6495 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6496 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
6497 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
6498
6499 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6500 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6501 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6502 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6503 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6504 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
6505 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
6506
6507 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6508 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6509 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6510 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6511 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6512 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
6513 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
6514
6515 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6516 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6517 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6518 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6519 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6520 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
6521 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
6522
6523 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6524 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6525 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6526 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6527 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6528 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6529 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6530 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6531 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6532 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6533
6534 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
6535 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
6536 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
6537 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
6538 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
6539 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
6540 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
6541 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
6542 gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
6543 gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
6544 gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
6545 gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
6546
6547 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC0) {
6548 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
6549 asid);
6550 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
6551 asid);
6552 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
6553 asid);
6554 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
6555 asid);
6556 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
6557 asid);
6558 }
6559
6560 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC1) {
6561 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
6562 asid);
6563 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
6564 asid);
6565 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
6566 asid);
6567 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
6568 asid);
6569 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
6570 asid);
6571 }
6572
6573 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC2) {
6574 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
6575 asid);
6576 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
6577 asid);
6578 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
6579 asid);
6580 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
6581 asid);
6582 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
6583 asid);
6584 }
6585
6586 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC3) {
6587 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
6588 asid);
6589 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
6590 asid);
6591 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
6592 asid);
6593 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
6594 asid);
6595 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
6596 asid);
6597 }
6598
6599 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC4) {
6600 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
6601 asid);
6602 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
6603 asid);
6604 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
6605 asid);
6606 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
6607 asid);
6608 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
6609 asid);
6610 }
6611
6612 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC5) {
6613 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
6614 asid);
6615 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
6616 asid);
6617 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
6618 asid);
6619 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
6620 asid);
6621 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
6622 asid);
6623 }
6624
6625 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC6) {
6626 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
6627 asid);
6628 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
6629 asid);
6630 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
6631 asid);
6632 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
6633 asid);
6634 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
6635 asid);
6636 }
6637
6638 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC7) {
6639 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
6640 asid);
6641 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
6642 asid);
6643 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
6644 asid);
6645 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
6646 asid);
6647 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
6648 asid);
6649 }
6650
6651 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC8) {
6652 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
6653 asid);
6654 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
6655 asid);
6656 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
6657 asid);
6658 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
6659 asid);
6660 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
6661 asid);
6662 }
6663
6664 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC9) {
6665 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
6666 asid);
6667 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
6668 asid);
6669 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
6670 asid);
6671 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
6672 asid);
6673 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
6674 asid);
6675 }
6676
6677 hdev->asic_funcs->set_clock_gating(hdev);
6678
6679 mutex_unlock(&gaudi->clk_gate_mutex);
6680 }
6681
gaudi_send_job_on_qman0(struct hl_device * hdev,struct hl_cs_job * job)6682 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6683 struct hl_cs_job *job)
6684 {
6685 struct packet_msg_prot *fence_pkt;
6686 u32 *fence_ptr;
6687 dma_addr_t fence_dma_addr;
6688 struct hl_cb *cb;
6689 u32 tmp, timeout, dma_offset;
6690 int rc;
6691
6692 if (hdev->pldm)
6693 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6694 else
6695 timeout = HL_DEVICE_TIMEOUT_USEC;
6696
6697 if (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
6698 dev_err_ratelimited(hdev->dev,
6699 "Can't send driver job on QMAN0 because the device is not idle\n");
6700 return -EBUSY;
6701 }
6702
6703 fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
6704 &fence_dma_addr);
6705 if (!fence_ptr) {
6706 dev_err(hdev->dev,
6707 "Failed to allocate fence memory for QMAN0\n");
6708 return -ENOMEM;
6709 }
6710
6711 cb = job->patched_cb;
6712
6713 fence_pkt = cb->kernel_address +
6714 job->job_cb_size - sizeof(struct packet_msg_prot);
6715
6716 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6717 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6718 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6719
6720 fence_pkt->ctl = cpu_to_le32(tmp);
6721 fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6722 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6723
6724 dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6725
6726 WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
6727
6728 rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
6729 job->job_cb_size, cb->bus_address);
6730 if (rc) {
6731 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6732 goto free_fence_ptr;
6733 }
6734
6735 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6736 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6737 timeout, true);
6738
6739 hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
6740
6741 if (rc == -ETIMEDOUT) {
6742 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6743 goto free_fence_ptr;
6744 }
6745
6746 free_fence_ptr:
6747 WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6748 ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6749
6750 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
6751 fence_dma_addr);
6752 return rc;
6753 }
6754
gaudi_get_event_desc(u16 event_type,char * desc,size_t size)6755 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6756 {
6757 if (event_type >= GAUDI_EVENT_SIZE)
6758 goto event_not_supported;
6759
6760 if (!gaudi_irq_map_table[event_type].valid)
6761 goto event_not_supported;
6762
6763 snprintf(desc, size, gaudi_irq_map_table[event_type].name);
6764
6765 return;
6766
6767 event_not_supported:
6768 snprintf(desc, size, "N/A");
6769 }
6770
gaudi_get_razwi_initiator_dma_name(struct hl_device * hdev,u32 x_y,bool is_write)6771 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev,
6772 u32 x_y, bool is_write)
6773 {
6774 u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6775
6776 mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6777 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6778
6779 switch (x_y) {
6780 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6781 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6782 dma_id[0] = 0;
6783 dma_id[1] = 2;
6784 break;
6785 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6786 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6787 dma_id[0] = 1;
6788 dma_id[1] = 3;
6789 break;
6790 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6791 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6792 dma_id[0] = 4;
6793 dma_id[1] = 6;
6794 break;
6795 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6796 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6797 dma_id[0] = 5;
6798 dma_id[1] = 7;
6799 break;
6800 default:
6801 goto unknown_initiator;
6802 }
6803
6804 for (i = 0 ; i < 2 ; i++) {
6805 dma_offset = dma_id[i] * DMA_CORE_OFFSET;
6806 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6807 }
6808
6809 switch (x_y) {
6810 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6811 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6812 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6813 return "DMA0";
6814 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6815 return "DMA2";
6816 else
6817 return "DMA0 or DMA2";
6818 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6819 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6820 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6821 return "DMA1";
6822 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6823 return "DMA3";
6824 else
6825 return "DMA1 or DMA3";
6826 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6827 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6828 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6829 return "DMA4";
6830 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6831 return "DMA6";
6832 else
6833 return "DMA4 or DMA6";
6834 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6835 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6836 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6837 return "DMA5";
6838 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6839 return "DMA7";
6840 else
6841 return "DMA5 or DMA7";
6842 }
6843
6844 unknown_initiator:
6845 return "unknown initiator";
6846 }
6847
gaudi_get_razwi_initiator_name(struct hl_device * hdev,bool is_write)6848 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev,
6849 bool is_write)
6850 {
6851 u32 val, x_y, axi_id;
6852
6853 val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
6854 RREG32(mmMMU_UP_RAZWI_READ_ID);
6855 x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
6856 (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
6857 axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
6858 RAZWI_INITIATOR_AXI_ID_SHIFT);
6859
6860 switch (x_y) {
6861 case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
6862 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
6863 return "TPC0";
6864 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
6865 return "NIC0";
6866 break;
6867 case RAZWI_INITIATOR_ID_X_Y_TPC1:
6868 return "TPC1";
6869 case RAZWI_INITIATOR_ID_X_Y_MME0_0:
6870 case RAZWI_INITIATOR_ID_X_Y_MME0_1:
6871 return "MME0";
6872 case RAZWI_INITIATOR_ID_X_Y_MME1_0:
6873 case RAZWI_INITIATOR_ID_X_Y_MME1_1:
6874 return "MME1";
6875 case RAZWI_INITIATOR_ID_X_Y_TPC2:
6876 return "TPC2";
6877 case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
6878 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
6879 return "TPC3";
6880 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
6881 return "PCI";
6882 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
6883 return "CPU";
6884 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
6885 return "PSOC";
6886 break;
6887 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6888 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6889 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6890 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6891 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6892 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6893 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6894 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6895 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write);
6896 case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
6897 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
6898 return "TPC4";
6899 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
6900 return "NIC1";
6901 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
6902 return "NIC2";
6903 break;
6904 case RAZWI_INITIATOR_ID_X_Y_TPC5:
6905 return "TPC5";
6906 case RAZWI_INITIATOR_ID_X_Y_MME2_0:
6907 case RAZWI_INITIATOR_ID_X_Y_MME2_1:
6908 return "MME2";
6909 case RAZWI_INITIATOR_ID_X_Y_MME3_0:
6910 case RAZWI_INITIATOR_ID_X_Y_MME3_1:
6911 return "MME3";
6912 case RAZWI_INITIATOR_ID_X_Y_TPC6:
6913 return "TPC6";
6914 case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
6915 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
6916 return "TPC7";
6917 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
6918 return "NIC4";
6919 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
6920 return "NIC5";
6921 break;
6922 default:
6923 break;
6924 }
6925
6926 dev_err(hdev->dev,
6927 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
6928 val,
6929 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
6930 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
6931 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
6932 RAZWI_INITIATOR_AXI_ID_MASK);
6933
6934 return "unknown initiator";
6935 }
6936
gaudi_print_razwi_info(struct hl_device * hdev)6937 static void gaudi_print_razwi_info(struct hl_device *hdev)
6938 {
6939 if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
6940 dev_err_ratelimited(hdev->dev,
6941 "RAZWI event caused by illegal write of %s\n",
6942 gaudi_get_razwi_initiator_name(hdev, true));
6943 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
6944 }
6945
6946 if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
6947 dev_err_ratelimited(hdev->dev,
6948 "RAZWI event caused by illegal read of %s\n",
6949 gaudi_get_razwi_initiator_name(hdev, false));
6950 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
6951 }
6952 }
6953
gaudi_print_mmu_error_info(struct hl_device * hdev)6954 static void gaudi_print_mmu_error_info(struct hl_device *hdev)
6955 {
6956 struct gaudi_device *gaudi = hdev->asic_specific;
6957 u64 addr;
6958 u32 val;
6959
6960 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6961 return;
6962
6963 val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
6964 if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6965 addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
6966 addr <<= 32;
6967 addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
6968
6969 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
6970 addr);
6971
6972 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
6973 }
6974
6975 val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
6976 if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6977 addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
6978 addr <<= 32;
6979 addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
6980
6981 dev_err_ratelimited(hdev->dev,
6982 "MMU access error on va 0x%llx\n", addr);
6983
6984 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
6985 }
6986 }
6987
6988 /*
6989 * +-------------------+------------------------------------------------------+
6990 * | Configuration Reg | Description |
6991 * | Address | |
6992 * +-------------------+------------------------------------------------------+
6993 * | 0xF30 - 0xF3F |ECC single error indication (1 bit per memory wrapper)|
6994 * | |0xF30 memory wrappers 31:0 (MSB to LSB) |
6995 * | |0xF34 memory wrappers 63:32 |
6996 * | |0xF38 memory wrappers 95:64 |
6997 * | |0xF3C memory wrappers 127:96 |
6998 * +-------------------+------------------------------------------------------+
6999 * | 0xF40 - 0xF4F |ECC double error indication (1 bit per memory wrapper)|
7000 * | |0xF40 memory wrappers 31:0 (MSB to LSB) |
7001 * | |0xF44 memory wrappers 63:32 |
7002 * | |0xF48 memory wrappers 95:64 |
7003 * | |0xF4C memory wrappers 127:96 |
7004 * +-------------------+------------------------------------------------------+
7005 */
gaudi_extract_ecc_info(struct hl_device * hdev,struct ecc_info_extract_params * params,u64 * ecc_address,u64 * ecc_syndrom,u8 * memory_wrapper_idx)7006 static int gaudi_extract_ecc_info(struct hl_device *hdev,
7007 struct ecc_info_extract_params *params, u64 *ecc_address,
7008 u64 *ecc_syndrom, u8 *memory_wrapper_idx)
7009 {
7010 struct gaudi_device *gaudi = hdev->asic_specific;
7011 u32 i, num_mem_regs, reg, err_bit;
7012 u64 err_addr, err_word = 0;
7013 int rc = 0;
7014
7015 num_mem_regs = params->num_memories / 32 +
7016 ((params->num_memories % 32) ? 1 : 0);
7017
7018 if (params->block_address >= CFG_BASE)
7019 params->block_address -= CFG_BASE;
7020
7021 if (params->derr)
7022 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
7023 else
7024 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
7025
7026 if (params->disable_clock_gating) {
7027 mutex_lock(&gaudi->clk_gate_mutex);
7028 hdev->asic_funcs->disable_clock_gating(hdev);
7029 }
7030
7031 /* Set invalid wrapper index */
7032 *memory_wrapper_idx = 0xFF;
7033
7034 /* Iterate through memory wrappers, a single bit must be set */
7035 for (i = 0 ; i < num_mem_regs ; i++) {
7036 err_addr += i * 4;
7037 err_word = RREG32(err_addr);
7038 if (err_word) {
7039 err_bit = __ffs(err_word);
7040 *memory_wrapper_idx = err_bit + (32 * i);
7041 break;
7042 }
7043 }
7044
7045 if (*memory_wrapper_idx == 0xFF) {
7046 dev_err(hdev->dev, "ECC error information cannot be found\n");
7047 rc = -EINVAL;
7048 goto enable_clk_gate;
7049 }
7050
7051 WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
7052 *memory_wrapper_idx);
7053
7054 *ecc_address =
7055 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
7056 *ecc_syndrom =
7057 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
7058
7059 /* Clear error indication */
7060 reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
7061 if (params->derr)
7062 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
7063 else
7064 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
7065
7066 WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
7067
7068 enable_clk_gate:
7069 if (params->disable_clock_gating) {
7070 hdev->asic_funcs->set_clock_gating(hdev);
7071
7072 mutex_unlock(&gaudi->clk_gate_mutex);
7073 }
7074
7075 return rc;
7076 }
7077
gaudi_handle_qman_err_generic(struct hl_device * hdev,const char * qm_name,u64 glbl_sts_addr,u64 arb_err_addr)7078 static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
7079 const char *qm_name,
7080 u64 glbl_sts_addr,
7081 u64 arb_err_addr)
7082 {
7083 u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
7084 char reg_desc[32];
7085
7086 /* Iterate through all stream GLBL_STS1 registers + Lower CP */
7087 for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
7088 glbl_sts_clr_val = 0;
7089 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
7090
7091 if (!glbl_sts_val)
7092 continue;
7093
7094 if (i == QMAN_STREAMS)
7095 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
7096 else
7097 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
7098
7099 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
7100 if (glbl_sts_val & BIT(j)) {
7101 dev_err_ratelimited(hdev->dev,
7102 "%s %s. err cause: %s\n",
7103 qm_name, reg_desc,
7104 gaudi_qman_error_cause[j]);
7105 glbl_sts_clr_val |= BIT(j);
7106 }
7107 }
7108
7109 /* Write 1 clear errors */
7110 if (!hdev->stop_on_err)
7111 WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
7112 }
7113
7114 arb_err_val = RREG32(arb_err_addr);
7115
7116 if (!arb_err_val)
7117 return;
7118
7119 for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
7120 if (arb_err_val & BIT(j)) {
7121 dev_err_ratelimited(hdev->dev,
7122 "%s ARB_ERR. err cause: %s\n",
7123 qm_name,
7124 gaudi_qman_arb_error_cause[j]);
7125 }
7126 }
7127 }
7128
gaudi_print_sm_sei_info(struct hl_device * hdev,u16 event_type,struct hl_eq_sm_sei_data * sei_data)7129 static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
7130 struct hl_eq_sm_sei_data *sei_data)
7131 {
7132 u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
7133
7134 switch (sei_data->sei_cause) {
7135 case SM_SEI_SO_OVERFLOW:
7136 dev_err(hdev->dev,
7137 "SM %u SEI Error: SO %u overflow/underflow",
7138 index, le32_to_cpu(sei_data->sei_log));
7139 break;
7140 case SM_SEI_LBW_4B_UNALIGNED:
7141 dev_err(hdev->dev,
7142 "SM %u SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
7143 index, le32_to_cpu(sei_data->sei_log));
7144 break;
7145 case SM_SEI_AXI_RESPONSE_ERR:
7146 dev_err(hdev->dev,
7147 "SM %u SEI Error: AXI ID %u response error",
7148 index, le32_to_cpu(sei_data->sei_log));
7149 break;
7150 default:
7151 dev_err(hdev->dev, "Unknown SM SEI cause %u",
7152 le32_to_cpu(sei_data->sei_log));
7153 break;
7154 }
7155 }
7156
gaudi_handle_ecc_event(struct hl_device * hdev,u16 event_type,struct hl_eq_ecc_data * ecc_data)7157 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7158 struct hl_eq_ecc_data *ecc_data)
7159 {
7160 struct ecc_info_extract_params params;
7161 u64 ecc_address = 0, ecc_syndrom = 0;
7162 u8 index, memory_wrapper_idx = 0;
7163 bool extract_info_from_fw;
7164 int rc;
7165
7166 switch (event_type) {
7167 case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
7168 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
7169 extract_info_from_fw = true;
7170 break;
7171 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7172 index = event_type - GAUDI_EVENT_TPC0_SERR;
7173 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7174 params.num_memories = 90;
7175 params.derr = false;
7176 params.disable_clock_gating = true;
7177 extract_info_from_fw = false;
7178 break;
7179 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7180 index = event_type - GAUDI_EVENT_TPC0_DERR;
7181 params.block_address =
7182 mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7183 params.num_memories = 90;
7184 params.derr = true;
7185 params.disable_clock_gating = true;
7186 extract_info_from_fw = false;
7187 break;
7188 case GAUDI_EVENT_MME0_ACC_SERR:
7189 case GAUDI_EVENT_MME1_ACC_SERR:
7190 case GAUDI_EVENT_MME2_ACC_SERR:
7191 case GAUDI_EVENT_MME3_ACC_SERR:
7192 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
7193 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7194 params.num_memories = 128;
7195 params.derr = false;
7196 params.disable_clock_gating = true;
7197 extract_info_from_fw = false;
7198 break;
7199 case GAUDI_EVENT_MME0_ACC_DERR:
7200 case GAUDI_EVENT_MME1_ACC_DERR:
7201 case GAUDI_EVENT_MME2_ACC_DERR:
7202 case GAUDI_EVENT_MME3_ACC_DERR:
7203 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
7204 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7205 params.num_memories = 128;
7206 params.derr = true;
7207 params.disable_clock_gating = true;
7208 extract_info_from_fw = false;
7209 break;
7210 case GAUDI_EVENT_MME0_SBAB_SERR:
7211 case GAUDI_EVENT_MME1_SBAB_SERR:
7212 case GAUDI_EVENT_MME2_SBAB_SERR:
7213 case GAUDI_EVENT_MME3_SBAB_SERR:
7214 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
7215 params.block_address =
7216 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7217 params.num_memories = 33;
7218 params.derr = false;
7219 params.disable_clock_gating = true;
7220 extract_info_from_fw = false;
7221 break;
7222 case GAUDI_EVENT_MME0_SBAB_DERR:
7223 case GAUDI_EVENT_MME1_SBAB_DERR:
7224 case GAUDI_EVENT_MME2_SBAB_DERR:
7225 case GAUDI_EVENT_MME3_SBAB_DERR:
7226 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
7227 params.block_address =
7228 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7229 params.num_memories = 33;
7230 params.derr = true;
7231 params.disable_clock_gating = true;
7232 extract_info_from_fw = false;
7233 break;
7234 default:
7235 return;
7236 }
7237
7238 if (extract_info_from_fw) {
7239 ecc_address = le64_to_cpu(ecc_data->ecc_address);
7240 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7241 memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7242 } else {
7243 rc = gaudi_extract_ecc_info(hdev, ¶ms, &ecc_address,
7244 &ecc_syndrom, &memory_wrapper_idx);
7245 if (rc)
7246 return;
7247 }
7248
7249 dev_err(hdev->dev,
7250 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
7251 ecc_address, ecc_syndrom, memory_wrapper_idx);
7252 }
7253
gaudi_handle_qman_err(struct hl_device * hdev,u16 event_type)7254 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type)
7255 {
7256 u64 glbl_sts_addr, arb_err_addr;
7257 u8 index;
7258 char desc[32];
7259
7260 switch (event_type) {
7261 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7262 index = event_type - GAUDI_EVENT_TPC0_QM;
7263 glbl_sts_addr =
7264 mmTPC0_QM_GLBL_STS1_0 + index * TPC_QMAN_OFFSET;
7265 arb_err_addr =
7266 mmTPC0_QM_ARB_ERR_CAUSE + index * TPC_QMAN_OFFSET;
7267 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
7268 break;
7269 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7270 index = event_type - GAUDI_EVENT_MME0_QM;
7271 glbl_sts_addr =
7272 mmMME0_QM_GLBL_STS1_0 + index * MME_QMAN_OFFSET;
7273 arb_err_addr =
7274 mmMME0_QM_ARB_ERR_CAUSE + index * MME_QMAN_OFFSET;
7275 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
7276 break;
7277 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7278 index = event_type - GAUDI_EVENT_DMA0_QM;
7279 glbl_sts_addr =
7280 mmDMA0_QM_GLBL_STS1_0 + index * DMA_QMAN_OFFSET;
7281 arb_err_addr =
7282 mmDMA0_QM_ARB_ERR_CAUSE + index * DMA_QMAN_OFFSET;
7283 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
7284 break;
7285 case GAUDI_EVENT_NIC0_QM0:
7286 glbl_sts_addr = mmNIC0_QM0_GLBL_STS1_0;
7287 arb_err_addr = mmNIC0_QM0_ARB_ERR_CAUSE;
7288 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
7289 break;
7290 case GAUDI_EVENT_NIC0_QM1:
7291 glbl_sts_addr = mmNIC0_QM1_GLBL_STS1_0;
7292 arb_err_addr = mmNIC0_QM1_ARB_ERR_CAUSE;
7293 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
7294 break;
7295 case GAUDI_EVENT_NIC1_QM0:
7296 glbl_sts_addr = mmNIC1_QM0_GLBL_STS1_0;
7297 arb_err_addr = mmNIC1_QM0_ARB_ERR_CAUSE;
7298 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
7299 break;
7300 case GAUDI_EVENT_NIC1_QM1:
7301 glbl_sts_addr = mmNIC1_QM1_GLBL_STS1_0;
7302 arb_err_addr = mmNIC1_QM1_ARB_ERR_CAUSE;
7303 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
7304 break;
7305 case GAUDI_EVENT_NIC2_QM0:
7306 glbl_sts_addr = mmNIC2_QM0_GLBL_STS1_0;
7307 arb_err_addr = mmNIC2_QM0_ARB_ERR_CAUSE;
7308 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
7309 break;
7310 case GAUDI_EVENT_NIC2_QM1:
7311 glbl_sts_addr = mmNIC2_QM1_GLBL_STS1_0;
7312 arb_err_addr = mmNIC2_QM1_ARB_ERR_CAUSE;
7313 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
7314 break;
7315 case GAUDI_EVENT_NIC3_QM0:
7316 glbl_sts_addr = mmNIC3_QM0_GLBL_STS1_0;
7317 arb_err_addr = mmNIC3_QM0_ARB_ERR_CAUSE;
7318 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
7319 break;
7320 case GAUDI_EVENT_NIC3_QM1:
7321 glbl_sts_addr = mmNIC3_QM1_GLBL_STS1_0;
7322 arb_err_addr = mmNIC3_QM1_ARB_ERR_CAUSE;
7323 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
7324 break;
7325 case GAUDI_EVENT_NIC4_QM0:
7326 glbl_sts_addr = mmNIC4_QM0_GLBL_STS1_0;
7327 arb_err_addr = mmNIC4_QM0_ARB_ERR_CAUSE;
7328 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
7329 break;
7330 case GAUDI_EVENT_NIC4_QM1:
7331 glbl_sts_addr = mmNIC4_QM1_GLBL_STS1_0;
7332 arb_err_addr = mmNIC4_QM1_ARB_ERR_CAUSE;
7333 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
7334 break;
7335 default:
7336 return;
7337 }
7338
7339 gaudi_handle_qman_err_generic(hdev, desc, glbl_sts_addr, arb_err_addr);
7340 }
7341
gaudi_print_irq_info(struct hl_device * hdev,u16 event_type,bool razwi)7342 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
7343 bool razwi)
7344 {
7345 char desc[64] = "";
7346
7347 gaudi_get_event_desc(event_type, desc, sizeof(desc));
7348 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7349 event_type, desc);
7350
7351 if (razwi) {
7352 gaudi_print_razwi_info(hdev);
7353 gaudi_print_mmu_error_info(hdev);
7354 }
7355 }
7356
gaudi_print_out_of_sync_info(struct hl_device * hdev,struct cpucp_pkt_sync_err * sync_err)7357 static void gaudi_print_out_of_sync_info(struct hl_device *hdev,
7358 struct cpucp_pkt_sync_err *sync_err)
7359 {
7360 struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
7361
7362 dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%u\n",
7363 sync_err->pi, sync_err->ci, q->pi, atomic_read(&q->ci));
7364 }
7365
gaudi_soft_reset_late_init(struct hl_device * hdev)7366 static int gaudi_soft_reset_late_init(struct hl_device *hdev)
7367 {
7368 struct gaudi_device *gaudi = hdev->asic_specific;
7369
7370 /* Unmask all IRQs since some could have been received
7371 * during the soft reset
7372 */
7373 return hl_fw_unmask_irq_arr(hdev, gaudi->events, sizeof(gaudi->events));
7374 }
7375
gaudi_hbm_read_interrupts(struct hl_device * hdev,int device,struct hl_eq_hbm_ecc_data * hbm_ecc_data)7376 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
7377 struct hl_eq_hbm_ecc_data *hbm_ecc_data)
7378 {
7379 u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
7380 int err = 0;
7381
7382 if (hdev->asic_prop.fw_security_status_valid &&
7383 (hdev->asic_prop.fw_app_security_map &
7384 CPU_BOOT_DEV_STS0_HBM_ECC_EN)) {
7385 if (!hbm_ecc_data) {
7386 dev_err(hdev->dev, "No FW ECC data");
7387 return 0;
7388 }
7389
7390 wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
7391 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7392 rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
7393 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7394 ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
7395 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7396 derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
7397 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7398 serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
7399 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7400 type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
7401 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7402 ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
7403 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7404
7405 dev_err(hdev->dev,
7406 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7407 device, ch, wr_par, rd_par, ca_par, serr, derr);
7408 dev_err(hdev->dev,
7409 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
7410 device, ch, hbm_ecc_data->first_addr, type,
7411 hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,
7412 hbm_ecc_data->dec_cnt);
7413
7414 err = 1;
7415
7416 return 0;
7417 }
7418
7419 if (!hdev->asic_prop.fw_security_disabled) {
7420 dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
7421 return 0;
7422 }
7423
7424 base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
7425 for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
7426 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
7427 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7428 if (val) {
7429 err = 1;
7430 dev_err(hdev->dev,
7431 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7432 device, ch * 2, val & 0x1, (val >> 1) & 0x1,
7433 (val >> 2) & 0x1, (val >> 3) & 0x1,
7434 (val >> 4) & 0x1);
7435
7436 val2 = RREG32(base + ch * 0x1000 + 0x060);
7437 dev_err(hdev->dev,
7438 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7439 device, ch * 2,
7440 RREG32(base + ch * 0x1000 + 0x064),
7441 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7442 (val2 & 0xFF0000) >> 16,
7443 (val2 & 0xFF000000) >> 24);
7444 }
7445
7446 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
7447 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7448 if (val) {
7449 err = 1;
7450 dev_err(hdev->dev,
7451 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7452 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
7453 (val >> 2) & 0x1, (val >> 3) & 0x1,
7454 (val >> 4) & 0x1);
7455
7456 val2 = RREG32(base + ch * 0x1000 + 0x070);
7457 dev_err(hdev->dev,
7458 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7459 device, ch * 2 + 1,
7460 RREG32(base + ch * 0x1000 + 0x074),
7461 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7462 (val2 & 0xFF0000) >> 16,
7463 (val2 & 0xFF000000) >> 24);
7464 }
7465
7466 /* Clear interrupts */
7467 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
7468 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
7469 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
7470 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
7471 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
7472 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
7473 }
7474
7475 val = RREG32(base + 0x8F30);
7476 val2 = RREG32(base + 0x8F34);
7477 if (val | val2) {
7478 err = 1;
7479 dev_err(hdev->dev,
7480 "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
7481 device, val, val2);
7482 }
7483 val = RREG32(base + 0x8F40);
7484 val2 = RREG32(base + 0x8F44);
7485 if (val | val2) {
7486 err = 1;
7487 dev_err(hdev->dev,
7488 "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
7489 device, val, val2);
7490 }
7491
7492 return err;
7493 }
7494
gaudi_hbm_event_to_dev(u16 hbm_event_type)7495 static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
7496 {
7497 switch (hbm_event_type) {
7498 case GAUDI_EVENT_HBM0_SPI_0:
7499 case GAUDI_EVENT_HBM0_SPI_1:
7500 return 0;
7501 case GAUDI_EVENT_HBM1_SPI_0:
7502 case GAUDI_EVENT_HBM1_SPI_1:
7503 return 1;
7504 case GAUDI_EVENT_HBM2_SPI_0:
7505 case GAUDI_EVENT_HBM2_SPI_1:
7506 return 2;
7507 case GAUDI_EVENT_HBM3_SPI_0:
7508 case GAUDI_EVENT_HBM3_SPI_1:
7509 return 3;
7510 default:
7511 break;
7512 }
7513
7514 /* Should never happen */
7515 return 0;
7516 }
7517
gaudi_tpc_read_interrupts(struct hl_device * hdev,u8 tpc_id,char * interrupt_name)7518 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
7519 char *interrupt_name)
7520 {
7521 struct gaudi_device *gaudi = hdev->asic_specific;
7522 u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
7523 bool soft_reset_required = false;
7524
7525 /* Accessing the TPC_INTR_CAUSE registers requires disabling the clock
7526 * gating, and thus cannot be done in CPU-CP and should be done instead
7527 * by the driver.
7528 */
7529
7530 mutex_lock(&gaudi->clk_gate_mutex);
7531
7532 hdev->asic_funcs->disable_clock_gating(hdev);
7533
7534 tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
7535 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
7536
7537 for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
7538 if (tpc_interrupts_cause & BIT(i)) {
7539 dev_err_ratelimited(hdev->dev,
7540 "TPC%d_%s interrupt cause: %s\n",
7541 tpc_id, interrupt_name,
7542 gaudi_tpc_interrupts_cause[i]);
7543 /* If this is QM error, we need to soft-reset */
7544 if (i == 15)
7545 soft_reset_required = true;
7546 }
7547
7548 /* Clear interrupts */
7549 WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
7550
7551 hdev->asic_funcs->set_clock_gating(hdev);
7552
7553 mutex_unlock(&gaudi->clk_gate_mutex);
7554
7555 return soft_reset_required;
7556 }
7557
tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)7558 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
7559 {
7560 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
7561 }
7562
tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)7563 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
7564 {
7565 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
7566 }
7567
gaudi_print_clk_change_info(struct hl_device * hdev,u16 event_type)7568 static void gaudi_print_clk_change_info(struct hl_device *hdev,
7569 u16 event_type)
7570 {
7571 switch (event_type) {
7572 case GAUDI_EVENT_FIX_POWER_ENV_S:
7573 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER;
7574 dev_info_ratelimited(hdev->dev,
7575 "Clock throttling due to power consumption\n");
7576 break;
7577
7578 case GAUDI_EVENT_FIX_POWER_ENV_E:
7579 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER;
7580 dev_info_ratelimited(hdev->dev,
7581 "Power envelop is safe, back to optimal clock\n");
7582 break;
7583
7584 case GAUDI_EVENT_FIX_THERMAL_ENV_S:
7585 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL;
7586 dev_info_ratelimited(hdev->dev,
7587 "Clock throttling due to overheating\n");
7588 break;
7589
7590 case GAUDI_EVENT_FIX_THERMAL_ENV_E:
7591 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL;
7592 dev_info_ratelimited(hdev->dev,
7593 "Thermal envelop is safe, back to optimal clock\n");
7594 break;
7595
7596 default:
7597 dev_err(hdev->dev, "Received invalid clock change event %d\n",
7598 event_type);
7599 break;
7600 }
7601 }
7602
gaudi_handle_eqe(struct hl_device * hdev,struct hl_eq_entry * eq_entry)7603 static void gaudi_handle_eqe(struct hl_device *hdev,
7604 struct hl_eq_entry *eq_entry)
7605 {
7606 struct gaudi_device *gaudi = hdev->asic_specific;
7607 u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
7608 u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
7609 >> EQ_CTL_EVENT_TYPE_SHIFT);
7610 u8 cause;
7611 bool reset_required;
7612
7613 gaudi->events_stat[event_type]++;
7614 gaudi->events_stat_aggregate[event_type]++;
7615
7616 switch (event_type) {
7617 case GAUDI_EVENT_PCIE_CORE_DERR:
7618 case GAUDI_EVENT_PCIE_IF_DERR:
7619 case GAUDI_EVENT_PCIE_PHY_DERR:
7620 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7621 case GAUDI_EVENT_MME0_ACC_DERR:
7622 case GAUDI_EVENT_MME0_SBAB_DERR:
7623 case GAUDI_EVENT_MME1_ACC_DERR:
7624 case GAUDI_EVENT_MME1_SBAB_DERR:
7625 case GAUDI_EVENT_MME2_ACC_DERR:
7626 case GAUDI_EVENT_MME2_SBAB_DERR:
7627 case GAUDI_EVENT_MME3_ACC_DERR:
7628 case GAUDI_EVENT_MME3_SBAB_DERR:
7629 case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
7630 fallthrough;
7631 case GAUDI_EVENT_CPU_IF_ECC_DERR:
7632 case GAUDI_EVENT_PSOC_MEM_DERR:
7633 case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
7634 case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
7635 case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
7636 case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
7637 case GAUDI_EVENT_MMU_DERR:
7638 gaudi_print_irq_info(hdev, event_type, true);
7639 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7640 goto reset_device;
7641
7642 case GAUDI_EVENT_GIC500:
7643 case GAUDI_EVENT_AXI_ECC:
7644 case GAUDI_EVENT_L2_RAM_ECC:
7645 case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
7646 gaudi_print_irq_info(hdev, event_type, false);
7647 goto reset_device;
7648
7649 case GAUDI_EVENT_HBM0_SPI_0:
7650 case GAUDI_EVENT_HBM1_SPI_0:
7651 case GAUDI_EVENT_HBM2_SPI_0:
7652 case GAUDI_EVENT_HBM3_SPI_0:
7653 gaudi_print_irq_info(hdev, event_type, false);
7654 gaudi_hbm_read_interrupts(hdev,
7655 gaudi_hbm_event_to_dev(event_type),
7656 &eq_entry->hbm_ecc_data);
7657 goto reset_device;
7658
7659 case GAUDI_EVENT_HBM0_SPI_1:
7660 case GAUDI_EVENT_HBM1_SPI_1:
7661 case GAUDI_EVENT_HBM2_SPI_1:
7662 case GAUDI_EVENT_HBM3_SPI_1:
7663 gaudi_print_irq_info(hdev, event_type, false);
7664 gaudi_hbm_read_interrupts(hdev,
7665 gaudi_hbm_event_to_dev(event_type),
7666 &eq_entry->hbm_ecc_data);
7667 hl_fw_unmask_irq(hdev, event_type);
7668 break;
7669
7670 case GAUDI_EVENT_TPC0_DEC:
7671 case GAUDI_EVENT_TPC1_DEC:
7672 case GAUDI_EVENT_TPC2_DEC:
7673 case GAUDI_EVENT_TPC3_DEC:
7674 case GAUDI_EVENT_TPC4_DEC:
7675 case GAUDI_EVENT_TPC5_DEC:
7676 case GAUDI_EVENT_TPC6_DEC:
7677 case GAUDI_EVENT_TPC7_DEC:
7678 gaudi_print_irq_info(hdev, event_type, true);
7679 reset_required = gaudi_tpc_read_interrupts(hdev,
7680 tpc_dec_event_to_tpc_id(event_type),
7681 "AXI_SLV_DEC_Error");
7682 if (reset_required) {
7683 dev_err(hdev->dev, "hard reset required due to %s\n",
7684 gaudi_irq_map_table[event_type].name);
7685
7686 goto reset_device;
7687 } else {
7688 hl_fw_unmask_irq(hdev, event_type);
7689 }
7690 break;
7691
7692 case GAUDI_EVENT_TPC0_KRN_ERR:
7693 case GAUDI_EVENT_TPC1_KRN_ERR:
7694 case GAUDI_EVENT_TPC2_KRN_ERR:
7695 case GAUDI_EVENT_TPC3_KRN_ERR:
7696 case GAUDI_EVENT_TPC4_KRN_ERR:
7697 case GAUDI_EVENT_TPC5_KRN_ERR:
7698 case GAUDI_EVENT_TPC6_KRN_ERR:
7699 case GAUDI_EVENT_TPC7_KRN_ERR:
7700 gaudi_print_irq_info(hdev, event_type, true);
7701 reset_required = gaudi_tpc_read_interrupts(hdev,
7702 tpc_krn_event_to_tpc_id(event_type),
7703 "KRN_ERR");
7704 if (reset_required) {
7705 dev_err(hdev->dev, "hard reset required due to %s\n",
7706 gaudi_irq_map_table[event_type].name);
7707
7708 goto reset_device;
7709 } else {
7710 hl_fw_unmask_irq(hdev, event_type);
7711 }
7712 break;
7713
7714 case GAUDI_EVENT_PCIE_CORE_SERR:
7715 case GAUDI_EVENT_PCIE_IF_SERR:
7716 case GAUDI_EVENT_PCIE_PHY_SERR:
7717 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7718 case GAUDI_EVENT_MME0_ACC_SERR:
7719 case GAUDI_EVENT_MME0_SBAB_SERR:
7720 case GAUDI_EVENT_MME1_ACC_SERR:
7721 case GAUDI_EVENT_MME1_SBAB_SERR:
7722 case GAUDI_EVENT_MME2_ACC_SERR:
7723 case GAUDI_EVENT_MME2_SBAB_SERR:
7724 case GAUDI_EVENT_MME3_ACC_SERR:
7725 case GAUDI_EVENT_MME3_SBAB_SERR:
7726 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
7727 case GAUDI_EVENT_CPU_IF_ECC_SERR:
7728 case GAUDI_EVENT_PSOC_MEM_SERR:
7729 case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
7730 case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
7731 case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
7732 case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
7733 fallthrough;
7734 case GAUDI_EVENT_MMU_SERR:
7735 gaudi_print_irq_info(hdev, event_type, true);
7736 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7737 hl_fw_unmask_irq(hdev, event_type);
7738 break;
7739
7740 case GAUDI_EVENT_PCIE_DEC:
7741 case GAUDI_EVENT_MME0_WBC_RSP:
7742 case GAUDI_EVENT_MME0_SBAB0_RSP:
7743 case GAUDI_EVENT_MME1_WBC_RSP:
7744 case GAUDI_EVENT_MME1_SBAB0_RSP:
7745 case GAUDI_EVENT_MME2_WBC_RSP:
7746 case GAUDI_EVENT_MME2_SBAB0_RSP:
7747 case GAUDI_EVENT_MME3_WBC_RSP:
7748 case GAUDI_EVENT_MME3_SBAB0_RSP:
7749 case GAUDI_EVENT_CPU_AXI_SPLITTER:
7750 case GAUDI_EVENT_PSOC_AXI_DEC:
7751 case GAUDI_EVENT_PSOC_PRSTN_FALL:
7752 case GAUDI_EVENT_MMU_PAGE_FAULT:
7753 case GAUDI_EVENT_MMU_WR_PERM:
7754 case GAUDI_EVENT_RAZWI_OR_ADC:
7755 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7756 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7757 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7758 fallthrough;
7759 case GAUDI_EVENT_NIC0_QM0:
7760 case GAUDI_EVENT_NIC0_QM1:
7761 case GAUDI_EVENT_NIC1_QM0:
7762 case GAUDI_EVENT_NIC1_QM1:
7763 case GAUDI_EVENT_NIC2_QM0:
7764 case GAUDI_EVENT_NIC2_QM1:
7765 case GAUDI_EVENT_NIC3_QM0:
7766 case GAUDI_EVENT_NIC3_QM1:
7767 case GAUDI_EVENT_NIC4_QM0:
7768 case GAUDI_EVENT_NIC4_QM1:
7769 case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
7770 gaudi_print_irq_info(hdev, event_type, true);
7771 gaudi_handle_qman_err(hdev, event_type);
7772 hl_fw_unmask_irq(hdev, event_type);
7773 break;
7774
7775 case GAUDI_EVENT_RAZWI_OR_ADC_SW:
7776 gaudi_print_irq_info(hdev, event_type, true);
7777 goto reset_device;
7778
7779 case GAUDI_EVENT_TPC0_BMON_SPMU:
7780 case GAUDI_EVENT_TPC1_BMON_SPMU:
7781 case GAUDI_EVENT_TPC2_BMON_SPMU:
7782 case GAUDI_EVENT_TPC3_BMON_SPMU:
7783 case GAUDI_EVENT_TPC4_BMON_SPMU:
7784 case GAUDI_EVENT_TPC5_BMON_SPMU:
7785 case GAUDI_EVENT_TPC6_BMON_SPMU:
7786 case GAUDI_EVENT_TPC7_BMON_SPMU:
7787 case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
7788 gaudi_print_irq_info(hdev, event_type, false);
7789 hl_fw_unmask_irq(hdev, event_type);
7790 break;
7791
7792 case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
7793 gaudi_print_irq_info(hdev, event_type, false);
7794 gaudi_print_sm_sei_info(hdev, event_type,
7795 &eq_entry->sm_sei_data);
7796 hl_fw_unmask_irq(hdev, event_type);
7797 break;
7798
7799 case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
7800 gaudi_print_clk_change_info(hdev, event_type);
7801 hl_fw_unmask_irq(hdev, event_type);
7802 break;
7803
7804 case GAUDI_EVENT_PSOC_GPIO_U16_0:
7805 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
7806 dev_err(hdev->dev,
7807 "Received high temp H/W interrupt %d (cause %d)\n",
7808 event_type, cause);
7809 break;
7810
7811 case GAUDI_EVENT_DEV_RESET_REQ:
7812 gaudi_print_irq_info(hdev, event_type, false);
7813 goto reset_device;
7814
7815 case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
7816 gaudi_print_irq_info(hdev, event_type, false);
7817 gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
7818 goto reset_device;
7819
7820 default:
7821 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
7822 event_type);
7823 break;
7824 }
7825
7826 return;
7827
7828 reset_device:
7829 if (hdev->hard_reset_on_fw_events)
7830 hl_device_reset(hdev, HL_RESET_HARD);
7831 else
7832 hl_fw_unmask_irq(hdev, event_type);
7833 }
7834
gaudi_get_events_stat(struct hl_device * hdev,bool aggregate,u32 * size)7835 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate,
7836 u32 *size)
7837 {
7838 struct gaudi_device *gaudi = hdev->asic_specific;
7839
7840 if (aggregate) {
7841 *size = (u32) sizeof(gaudi->events_stat_aggregate);
7842 return gaudi->events_stat_aggregate;
7843 }
7844
7845 *size = (u32) sizeof(gaudi->events_stat);
7846 return gaudi->events_stat;
7847 }
7848
gaudi_mmu_invalidate_cache(struct hl_device * hdev,bool is_hard,u32 flags)7849 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
7850 u32 flags)
7851 {
7852 struct gaudi_device *gaudi = hdev->asic_specific;
7853 u32 status, timeout_usec;
7854 int rc;
7855
7856 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
7857 hdev->hard_reset_pending)
7858 return 0;
7859
7860 if (hdev->pldm)
7861 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7862 else
7863 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7864
7865 /* L0 & L1 invalidation */
7866 WREG32(mmSTLB_INV_PS, 3);
7867 WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
7868 WREG32(mmSTLB_INV_PS, 2);
7869
7870 rc = hl_poll_timeout(
7871 hdev,
7872 mmSTLB_INV_PS,
7873 status,
7874 !status,
7875 1000,
7876 timeout_usec);
7877
7878 WREG32(mmSTLB_INV_SET, 0);
7879
7880 if (rc) {
7881 dev_err_ratelimited(hdev->dev,
7882 "MMU cache invalidation timeout\n");
7883 hl_device_reset(hdev, HL_RESET_HARD);
7884 }
7885
7886 return rc;
7887 }
7888
gaudi_mmu_invalidate_cache_range(struct hl_device * hdev,bool is_hard,u32 asid,u64 va,u64 size)7889 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
7890 bool is_hard, u32 asid, u64 va, u64 size)
7891 {
7892 struct gaudi_device *gaudi = hdev->asic_specific;
7893 u32 status, timeout_usec;
7894 u32 inv_data;
7895 u32 pi;
7896 int rc;
7897
7898 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
7899 hdev->hard_reset_pending)
7900 return 0;
7901
7902 if (hdev->pldm)
7903 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7904 else
7905 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7906
7907 /*
7908 * TODO: currently invalidate entire L0 & L1 as in regular hard
7909 * invalidation. Need to apply invalidation of specific cache
7910 * lines with mask of ASID & VA & size.
7911 * Note that L1 with be flushed entirely in any case.
7912 */
7913
7914 /* L0 & L1 invalidation */
7915 inv_data = RREG32(mmSTLB_CACHE_INV);
7916 /* PI is 8 bit */
7917 pi = ((inv_data & STLB_CACHE_INV_PRODUCER_INDEX_MASK) + 1) & 0xFF;
7918 WREG32(mmSTLB_CACHE_INV,
7919 (inv_data & STLB_CACHE_INV_INDEX_MASK_MASK) | pi);
7920
7921 rc = hl_poll_timeout(
7922 hdev,
7923 mmSTLB_INV_CONSUMER_INDEX,
7924 status,
7925 status == pi,
7926 1000,
7927 timeout_usec);
7928
7929 if (rc) {
7930 dev_err_ratelimited(hdev->dev,
7931 "MMU cache invalidation timeout\n");
7932 hl_device_reset(hdev, HL_RESET_HARD);
7933 }
7934
7935 return rc;
7936 }
7937
gaudi_mmu_update_asid_hop0_addr(struct hl_device * hdev,u32 asid,u64 phys_addr)7938 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev,
7939 u32 asid, u64 phys_addr)
7940 {
7941 u32 status, timeout_usec;
7942 int rc;
7943
7944 if (hdev->pldm)
7945 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7946 else
7947 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7948
7949 WREG32(MMU_ASID, asid);
7950 WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
7951 WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
7952 WREG32(MMU_BUSY, 0x80000000);
7953
7954 rc = hl_poll_timeout(
7955 hdev,
7956 MMU_BUSY,
7957 status,
7958 !(status & 0x80000000),
7959 1000,
7960 timeout_usec);
7961
7962 if (rc) {
7963 dev_err(hdev->dev,
7964 "Timeout during MMU hop0 config of asid %d\n", asid);
7965 return rc;
7966 }
7967
7968 return 0;
7969 }
7970
gaudi_send_heartbeat(struct hl_device * hdev)7971 static int gaudi_send_heartbeat(struct hl_device *hdev)
7972 {
7973 struct gaudi_device *gaudi = hdev->asic_specific;
7974
7975 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
7976 return 0;
7977
7978 return hl_fw_send_heartbeat(hdev);
7979 }
7980
gaudi_cpucp_info_get(struct hl_device * hdev)7981 static int gaudi_cpucp_info_get(struct hl_device *hdev)
7982 {
7983 struct gaudi_device *gaudi = hdev->asic_specific;
7984 struct asic_fixed_properties *prop = &hdev->asic_prop;
7985 int rc;
7986
7987 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
7988 return 0;
7989
7990 rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0);
7991 if (rc)
7992 return rc;
7993
7994 if (!strlen(prop->cpucp_info.card_name))
7995 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
7996 CARD_NAME_MAX_LEN);
7997
7998 hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
7999
8000 set_default_power_values(hdev);
8001
8002 hdev->max_power = prop->max_power_default;
8003
8004 return 0;
8005 }
8006
gaudi_is_device_idle(struct hl_device * hdev,u64 * mask_arr,u8 mask_len,struct seq_file * s)8007 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
8008 u8 mask_len, struct seq_file *s)
8009 {
8010 struct gaudi_device *gaudi = hdev->asic_specific;
8011 const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
8012 const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
8013 const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
8014 unsigned long *mask = (unsigned long *)mask_arr;
8015 u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
8016 bool is_idle = true, is_eng_idle, is_slave;
8017 u64 offset;
8018 int i, dma_id, port;
8019
8020 mutex_lock(&gaudi->clk_gate_mutex);
8021
8022 hdev->asic_funcs->disable_clock_gating(hdev);
8023
8024 if (s)
8025 seq_puts(s,
8026 "\nDMA is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n"
8027 "--- ------- ------------ ---------- -------------\n");
8028
8029 for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
8030 dma_id = gaudi_dma_assignment[i];
8031 offset = dma_id * DMA_QMAN_OFFSET;
8032
8033 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
8034 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
8035 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
8036 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8037 IS_DMA_IDLE(dma_core_sts0);
8038 is_idle &= is_eng_idle;
8039
8040 if (mask && !is_eng_idle)
8041 set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask);
8042 if (s)
8043 seq_printf(s, fmt, dma_id,
8044 is_eng_idle ? "Y" : "N", qm_glbl_sts0,
8045 qm_cgm_sts, dma_core_sts0);
8046 }
8047
8048 if (s)
8049 seq_puts(s,
8050 "\nTPC is_idle QM_GLBL_STS0 QM_CGM_STS CFG_STATUS\n"
8051 "--- ------- ------------ ---------- ----------\n");
8052
8053 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
8054 offset = i * TPC_QMAN_OFFSET;
8055 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
8056 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
8057 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
8058 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8059 IS_TPC_IDLE(tpc_cfg_sts);
8060 is_idle &= is_eng_idle;
8061
8062 if (mask && !is_eng_idle)
8063 set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask);
8064 if (s)
8065 seq_printf(s, fmt, i,
8066 is_eng_idle ? "Y" : "N",
8067 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
8068 }
8069
8070 if (s)
8071 seq_puts(s,
8072 "\nMME is_idle QM_GLBL_STS0 QM_CGM_STS ARCH_STATUS\n"
8073 "--- ------- ------------ ---------- -----------\n");
8074
8075 for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
8076 offset = i * MME_QMAN_OFFSET;
8077 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
8078 is_eng_idle = IS_MME_IDLE(mme_arch_sts);
8079
8080 /* MME 1 & 3 are slaves, no need to check their QMANs */
8081 is_slave = i % 2;
8082 if (!is_slave) {
8083 qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
8084 qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
8085 is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8086 }
8087
8088 is_idle &= is_eng_idle;
8089
8090 if (mask && !is_eng_idle)
8091 set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask);
8092 if (s) {
8093 if (!is_slave)
8094 seq_printf(s, fmt, i,
8095 is_eng_idle ? "Y" : "N",
8096 qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
8097 else
8098 seq_printf(s, mme_slave_fmt, i,
8099 is_eng_idle ? "Y" : "N", "-",
8100 "-", mme_arch_sts);
8101 }
8102 }
8103
8104 if (s)
8105 seq_puts(s, "\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n"
8106 "--- ------- ------------ ----------\n");
8107
8108 for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
8109 offset = i * NIC_MACRO_QMAN_OFFSET;
8110 port = 2 * i;
8111 if (hdev->nic_ports_mask & BIT(port)) {
8112 qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
8113 qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
8114 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8115 is_idle &= is_eng_idle;
8116
8117 if (mask && !is_eng_idle)
8118 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8119 if (s)
8120 seq_printf(s, nic_fmt, port,
8121 is_eng_idle ? "Y" : "N",
8122 qm_glbl_sts0, qm_cgm_sts);
8123 }
8124
8125 port = 2 * i + 1;
8126 if (hdev->nic_ports_mask & BIT(port)) {
8127 qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
8128 qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
8129 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8130 is_idle &= is_eng_idle;
8131
8132 if (mask && !is_eng_idle)
8133 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8134 if (s)
8135 seq_printf(s, nic_fmt, port,
8136 is_eng_idle ? "Y" : "N",
8137 qm_glbl_sts0, qm_cgm_sts);
8138 }
8139 }
8140
8141 if (s)
8142 seq_puts(s, "\n");
8143
8144 hdev->asic_funcs->set_clock_gating(hdev);
8145
8146 mutex_unlock(&gaudi->clk_gate_mutex);
8147
8148 return is_idle;
8149 }
8150
gaudi_hw_queues_lock(struct hl_device * hdev)8151 static void gaudi_hw_queues_lock(struct hl_device *hdev)
8152 __acquires(&gaudi->hw_queues_lock)
8153 {
8154 struct gaudi_device *gaudi = hdev->asic_specific;
8155
8156 spin_lock(&gaudi->hw_queues_lock);
8157 }
8158
gaudi_hw_queues_unlock(struct hl_device * hdev)8159 static void gaudi_hw_queues_unlock(struct hl_device *hdev)
8160 __releases(&gaudi->hw_queues_lock)
8161 {
8162 struct gaudi_device *gaudi = hdev->asic_specific;
8163
8164 spin_unlock(&gaudi->hw_queues_lock);
8165 }
8166
gaudi_get_pci_id(struct hl_device * hdev)8167 static u32 gaudi_get_pci_id(struct hl_device *hdev)
8168 {
8169 return hdev->pdev->device;
8170 }
8171
gaudi_get_eeprom_data(struct hl_device * hdev,void * data,size_t max_size)8172 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
8173 size_t max_size)
8174 {
8175 struct gaudi_device *gaudi = hdev->asic_specific;
8176
8177 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8178 return 0;
8179
8180 return hl_fw_get_eeprom_data(hdev, data, max_size);
8181 }
8182
8183 /*
8184 * this function should be used only during initialization and/or after reset,
8185 * when there are no active users.
8186 */
gaudi_run_tpc_kernel(struct hl_device * hdev,u64 tpc_kernel,u32 tpc_id)8187 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
8188 u32 tpc_id)
8189 {
8190 struct gaudi_device *gaudi = hdev->asic_specific;
8191 u64 kernel_timeout;
8192 u32 status, offset;
8193 int rc;
8194
8195 offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
8196
8197 if (hdev->pldm)
8198 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
8199 else
8200 kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
8201
8202 mutex_lock(&gaudi->clk_gate_mutex);
8203
8204 hdev->asic_funcs->disable_clock_gating(hdev);
8205
8206 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
8207 lower_32_bits(tpc_kernel));
8208 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
8209 upper_32_bits(tpc_kernel));
8210
8211 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
8212 lower_32_bits(tpc_kernel));
8213 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
8214 upper_32_bits(tpc_kernel));
8215 /* set a valid LUT pointer, content is of no significance */
8216 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
8217 lower_32_bits(tpc_kernel));
8218 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
8219 upper_32_bits(tpc_kernel));
8220
8221 WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
8222 lower_32_bits(CFG_BASE +
8223 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
8224
8225 WREG32(mmTPC0_CFG_TPC_CMD + offset,
8226 (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
8227 1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
8228 /* wait a bit for the engine to start executing */
8229 usleep_range(1000, 1500);
8230
8231 /* wait until engine has finished executing */
8232 rc = hl_poll_timeout(
8233 hdev,
8234 mmTPC0_CFG_STATUS + offset,
8235 status,
8236 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8237 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8238 1000,
8239 kernel_timeout);
8240
8241 if (rc) {
8242 dev_err(hdev->dev,
8243 "Timeout while waiting for TPC%d icache prefetch\n",
8244 tpc_id);
8245 hdev->asic_funcs->set_clock_gating(hdev);
8246 mutex_unlock(&gaudi->clk_gate_mutex);
8247 return -EIO;
8248 }
8249
8250 WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
8251 1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
8252
8253 /* wait a bit for the engine to start executing */
8254 usleep_range(1000, 1500);
8255
8256 /* wait until engine has finished executing */
8257 rc = hl_poll_timeout(
8258 hdev,
8259 mmTPC0_CFG_STATUS + offset,
8260 status,
8261 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8262 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8263 1000,
8264 kernel_timeout);
8265
8266 if (rc) {
8267 dev_err(hdev->dev,
8268 "Timeout while waiting for TPC%d vector pipe\n",
8269 tpc_id);
8270 hdev->asic_funcs->set_clock_gating(hdev);
8271 mutex_unlock(&gaudi->clk_gate_mutex);
8272 return -EIO;
8273 }
8274
8275 rc = hl_poll_timeout(
8276 hdev,
8277 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
8278 status,
8279 (status == 0),
8280 1000,
8281 kernel_timeout);
8282
8283 hdev->asic_funcs->set_clock_gating(hdev);
8284 mutex_unlock(&gaudi->clk_gate_mutex);
8285
8286 if (rc) {
8287 dev_err(hdev->dev,
8288 "Timeout while waiting for TPC%d kernel to execute\n",
8289 tpc_id);
8290 return -EIO;
8291 }
8292
8293 return 0;
8294 }
8295
gaudi_internal_cb_pool_init(struct hl_device * hdev,struct hl_ctx * ctx)8296 static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
8297 struct hl_ctx *ctx)
8298 {
8299 struct gaudi_device *gaudi = hdev->asic_specific;
8300 int min_alloc_order, rc, collective_cb_size;
8301
8302 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8303 return 0;
8304
8305 hdev->internal_cb_pool_virt_addr =
8306 hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
8307 HOST_SPACE_INTERNAL_CB_SZ,
8308 &hdev->internal_cb_pool_dma_addr,
8309 GFP_KERNEL | __GFP_ZERO);
8310
8311 if (!hdev->internal_cb_pool_virt_addr)
8312 return -ENOMEM;
8313
8314 collective_cb_size = sizeof(struct packet_msg_short) * 5 +
8315 sizeof(struct packet_fence);
8316 min_alloc_order = ilog2(collective_cb_size);
8317
8318 hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
8319 if (!hdev->internal_cb_pool) {
8320 dev_err(hdev->dev,
8321 "Failed to create internal CB pool\n");
8322 rc = -ENOMEM;
8323 goto free_internal_cb_pool;
8324 }
8325
8326 rc = gen_pool_add(hdev->internal_cb_pool,
8327 (uintptr_t) hdev->internal_cb_pool_virt_addr,
8328 HOST_SPACE_INTERNAL_CB_SZ, -1);
8329 if (rc) {
8330 dev_err(hdev->dev,
8331 "Failed to add memory to internal CB pool\n");
8332 rc = -EFAULT;
8333 goto destroy_internal_cb_pool;
8334 }
8335
8336 hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
8337 HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
8338 HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
8339
8340 if (!hdev->internal_cb_va_base)
8341 goto destroy_internal_cb_pool;
8342
8343 mutex_lock(&ctx->mmu_lock);
8344 rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
8345 hdev->internal_cb_pool_dma_addr,
8346 HOST_SPACE_INTERNAL_CB_SZ);
8347
8348 hdev->asic_funcs->mmu_invalidate_cache(hdev, false, VM_TYPE_USERPTR);
8349 mutex_unlock(&ctx->mmu_lock);
8350
8351 if (rc)
8352 goto unreserve_internal_cb_pool;
8353
8354 return 0;
8355
8356 unreserve_internal_cb_pool:
8357 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8358 HOST_SPACE_INTERNAL_CB_SZ);
8359 destroy_internal_cb_pool:
8360 gen_pool_destroy(hdev->internal_cb_pool);
8361 free_internal_cb_pool:
8362 hdev->asic_funcs->asic_dma_free_coherent(hdev,
8363 HOST_SPACE_INTERNAL_CB_SZ,
8364 hdev->internal_cb_pool_virt_addr,
8365 hdev->internal_cb_pool_dma_addr);
8366
8367 return rc;
8368 }
8369
gaudi_internal_cb_pool_fini(struct hl_device * hdev,struct hl_ctx * ctx)8370 static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
8371 struct hl_ctx *ctx)
8372 {
8373 struct gaudi_device *gaudi = hdev->asic_specific;
8374
8375 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8376 return;
8377
8378 mutex_lock(&ctx->mmu_lock);
8379 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8380 HOST_SPACE_INTERNAL_CB_SZ);
8381 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8382 HOST_SPACE_INTERNAL_CB_SZ);
8383 hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
8384 mutex_unlock(&ctx->mmu_lock);
8385
8386 gen_pool_destroy(hdev->internal_cb_pool);
8387
8388 hdev->asic_funcs->asic_dma_free_coherent(hdev,
8389 HOST_SPACE_INTERNAL_CB_SZ,
8390 hdev->internal_cb_pool_virt_addr,
8391 hdev->internal_cb_pool_dma_addr);
8392 }
8393
gaudi_ctx_init(struct hl_ctx * ctx)8394 static int gaudi_ctx_init(struct hl_ctx *ctx)
8395 {
8396 if (ctx->asid == HL_KERNEL_ASID_ID)
8397 return 0;
8398
8399 gaudi_mmu_prepare(ctx->hdev, ctx->asid);
8400 return gaudi_internal_cb_pool_init(ctx->hdev, ctx);
8401 }
8402
gaudi_ctx_fini(struct hl_ctx * ctx)8403 static void gaudi_ctx_fini(struct hl_ctx *ctx)
8404 {
8405 if (ctx->asid == HL_KERNEL_ASID_ID)
8406 return;
8407
8408 gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8409 }
8410
gaudi_get_queue_id_for_cq(struct hl_device * hdev,u32 cq_idx)8411 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
8412 {
8413 return gaudi_cq_assignment[cq_idx];
8414 }
8415
gaudi_get_signal_cb_size(struct hl_device * hdev)8416 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
8417 {
8418 return sizeof(struct packet_msg_short) +
8419 sizeof(struct packet_msg_prot) * 2;
8420 }
8421
gaudi_get_wait_cb_size(struct hl_device * hdev)8422 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
8423 {
8424 return sizeof(struct packet_msg_short) * 4 +
8425 sizeof(struct packet_fence) +
8426 sizeof(struct packet_msg_prot) * 2;
8427 }
8428
gaudi_gen_signal_cb(struct hl_device * hdev,void * data,u16 sob_id,u32 size,bool eb)8429 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
8430 u32 size, bool eb)
8431 {
8432 struct hl_cb *cb = (struct hl_cb *) data;
8433 struct packet_msg_short *pkt;
8434 u32 value, ctl, pkt_size = sizeof(*pkt);
8435
8436 pkt = cb->kernel_address + size;
8437 memset(pkt, 0, pkt_size);
8438
8439 /* Inc by 1, Mode ADD */
8440 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
8441 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
8442
8443 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
8444 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8445 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
8446 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8447 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);
8448 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8449 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8450
8451 pkt->value = cpu_to_le32(value);
8452 pkt->ctl = cpu_to_le32(ctl);
8453
8454 return size + pkt_size;
8455 }
8456
gaudi_add_mon_msg_short(struct packet_msg_short * pkt,u32 value,u16 addr)8457 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
8458 u16 addr)
8459 {
8460 u32 ctl, pkt_size = sizeof(*pkt);
8461
8462 memset(pkt, 0, pkt_size);
8463
8464 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
8465 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8466 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8467 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8468 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8469 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */
8470
8471 pkt->value = cpu_to_le32(value);
8472 pkt->ctl = cpu_to_le32(ctl);
8473
8474 return pkt_size;
8475 }
8476
gaudi_add_arm_monitor_pkt(struct hl_device * hdev,struct packet_msg_short * pkt,u16 sob_base,u8 sob_mask,u16 sob_val,u16 mon_id)8477 static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
8478 struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
8479 u16 sob_val, u16 mon_id)
8480 {
8481 u64 monitor_base;
8482 u32 ctl, value, pkt_size = sizeof(*pkt);
8483 u16 msg_addr_offset;
8484 u8 mask;
8485
8486 if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
8487 dev_err(hdev->dev,
8488 "sob_base %u (mask %#x) is not valid\n",
8489 sob_base, sob_mask);
8490 return 0;
8491 }
8492
8493 /*
8494 * monitor_base should be the content of the base0 address registers,
8495 * so it will be added to the msg short offsets
8496 */
8497 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8498
8499 msg_addr_offset =
8500 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
8501 monitor_base;
8502
8503 memset(pkt, 0, pkt_size);
8504
8505 /* Monitor config packet: bind the monitor to a sync object */
8506 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
8507 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
8508 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
8509 0); /* GREATER OR EQUAL*/
8510 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
8511
8512 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
8513 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8514 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8515 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8516 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8517 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8518 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8519
8520 pkt->value = cpu_to_le32(value);
8521 pkt->ctl = cpu_to_le32(ctl);
8522
8523 return pkt_size;
8524 }
8525
gaudi_add_fence_pkt(struct packet_fence * pkt)8526 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
8527 {
8528 u32 ctl, cfg, pkt_size = sizeof(*pkt);
8529
8530 memset(pkt, 0, pkt_size);
8531
8532 cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
8533 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
8534 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
8535
8536 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
8537 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8538 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8539 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8540
8541 pkt->cfg = cpu_to_le32(cfg);
8542 pkt->ctl = cpu_to_le32(ctl);
8543
8544 return pkt_size;
8545 }
8546
gaudi_get_fence_addr(struct hl_device * hdev,u32 queue_id,u64 * addr)8547 static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
8548 {
8549 u32 offset, nic_index;
8550
8551 switch (queue_id) {
8552 case GAUDI_QUEUE_ID_DMA_0_0:
8553 offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
8554 break;
8555 case GAUDI_QUEUE_ID_DMA_0_1:
8556 offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
8557 break;
8558 case GAUDI_QUEUE_ID_DMA_0_2:
8559 offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
8560 break;
8561 case GAUDI_QUEUE_ID_DMA_0_3:
8562 offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
8563 break;
8564 case GAUDI_QUEUE_ID_DMA_1_0:
8565 offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
8566 break;
8567 case GAUDI_QUEUE_ID_DMA_1_1:
8568 offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
8569 break;
8570 case GAUDI_QUEUE_ID_DMA_1_2:
8571 offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
8572 break;
8573 case GAUDI_QUEUE_ID_DMA_1_3:
8574 offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
8575 break;
8576 case GAUDI_QUEUE_ID_DMA_5_0:
8577 offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
8578 break;
8579 case GAUDI_QUEUE_ID_DMA_5_1:
8580 offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
8581 break;
8582 case GAUDI_QUEUE_ID_DMA_5_2:
8583 offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
8584 break;
8585 case GAUDI_QUEUE_ID_DMA_5_3:
8586 offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
8587 break;
8588 case GAUDI_QUEUE_ID_TPC_7_0:
8589 offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
8590 break;
8591 case GAUDI_QUEUE_ID_TPC_7_1:
8592 offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
8593 break;
8594 case GAUDI_QUEUE_ID_TPC_7_2:
8595 offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
8596 break;
8597 case GAUDI_QUEUE_ID_TPC_7_3:
8598 offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
8599 break;
8600 case GAUDI_QUEUE_ID_NIC_0_0:
8601 case GAUDI_QUEUE_ID_NIC_1_0:
8602 case GAUDI_QUEUE_ID_NIC_2_0:
8603 case GAUDI_QUEUE_ID_NIC_3_0:
8604 case GAUDI_QUEUE_ID_NIC_4_0:
8605 case GAUDI_QUEUE_ID_NIC_5_0:
8606 case GAUDI_QUEUE_ID_NIC_6_0:
8607 case GAUDI_QUEUE_ID_NIC_7_0:
8608 case GAUDI_QUEUE_ID_NIC_8_0:
8609 case GAUDI_QUEUE_ID_NIC_9_0:
8610 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
8611 offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
8612 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8613 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8614 break;
8615 case GAUDI_QUEUE_ID_NIC_0_1:
8616 case GAUDI_QUEUE_ID_NIC_1_1:
8617 case GAUDI_QUEUE_ID_NIC_2_1:
8618 case GAUDI_QUEUE_ID_NIC_3_1:
8619 case GAUDI_QUEUE_ID_NIC_4_1:
8620 case GAUDI_QUEUE_ID_NIC_5_1:
8621 case GAUDI_QUEUE_ID_NIC_6_1:
8622 case GAUDI_QUEUE_ID_NIC_7_1:
8623 case GAUDI_QUEUE_ID_NIC_8_1:
8624 case GAUDI_QUEUE_ID_NIC_9_1:
8625 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
8626 offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
8627 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8628 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8629 break;
8630 case GAUDI_QUEUE_ID_NIC_0_2:
8631 case GAUDI_QUEUE_ID_NIC_1_2:
8632 case GAUDI_QUEUE_ID_NIC_2_2:
8633 case GAUDI_QUEUE_ID_NIC_3_2:
8634 case GAUDI_QUEUE_ID_NIC_4_2:
8635 case GAUDI_QUEUE_ID_NIC_5_2:
8636 case GAUDI_QUEUE_ID_NIC_6_2:
8637 case GAUDI_QUEUE_ID_NIC_7_2:
8638 case GAUDI_QUEUE_ID_NIC_8_2:
8639 case GAUDI_QUEUE_ID_NIC_9_2:
8640 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
8641 offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
8642 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8643 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8644 break;
8645 case GAUDI_QUEUE_ID_NIC_0_3:
8646 case GAUDI_QUEUE_ID_NIC_1_3:
8647 case GAUDI_QUEUE_ID_NIC_2_3:
8648 case GAUDI_QUEUE_ID_NIC_3_3:
8649 case GAUDI_QUEUE_ID_NIC_4_3:
8650 case GAUDI_QUEUE_ID_NIC_5_3:
8651 case GAUDI_QUEUE_ID_NIC_6_3:
8652 case GAUDI_QUEUE_ID_NIC_7_3:
8653 case GAUDI_QUEUE_ID_NIC_8_3:
8654 case GAUDI_QUEUE_ID_NIC_9_3:
8655 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
8656 offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
8657 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8658 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8659 break;
8660 default:
8661 return -EINVAL;
8662 }
8663
8664 *addr = CFG_BASE + offset;
8665
8666 return 0;
8667 }
8668
gaudi_add_mon_pkts(void * buf,u16 mon_id,u64 fence_addr)8669 static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
8670 {
8671 u64 monitor_base;
8672 u32 size = 0;
8673 u16 msg_addr_offset;
8674
8675 /*
8676 * monitor_base should be the content of the base0 address registers,
8677 * so it will be added to the msg short offsets
8678 */
8679 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8680
8681 /* First monitor config packet: low address of the sync */
8682 msg_addr_offset =
8683 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
8684 monitor_base;
8685
8686 size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
8687 msg_addr_offset);
8688
8689 /* Second monitor config packet: high address of the sync */
8690 msg_addr_offset =
8691 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
8692 monitor_base;
8693
8694 size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
8695 msg_addr_offset);
8696
8697 /*
8698 * Third monitor config packet: the payload, i.e. what to write when the
8699 * sync triggers
8700 */
8701 msg_addr_offset =
8702 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
8703 monitor_base;
8704
8705 size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
8706
8707 return size;
8708 }
8709
gaudi_gen_wait_cb(struct hl_device * hdev,struct hl_gen_wait_properties * prop)8710 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
8711 struct hl_gen_wait_properties *prop)
8712 {
8713 struct hl_cb *cb = (struct hl_cb *) prop->data;
8714 void *buf = cb->kernel_address;
8715 u64 fence_addr = 0;
8716 u32 size = prop->size;
8717
8718 if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
8719 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
8720 prop->q_idx);
8721 return 0;
8722 }
8723
8724 size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
8725 size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
8726 prop->sob_mask, prop->sob_val, prop->mon_id);
8727 size += gaudi_add_fence_pkt(buf + size);
8728
8729 return size;
8730 }
8731
gaudi_reset_sob(struct hl_device * hdev,void * data)8732 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
8733 {
8734 struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
8735 int rc;
8736
8737 dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
8738 hw_sob->sob_id);
8739
8740 rc = gaudi_schedule_register_memset(hdev, hw_sob->q_idx,
8741 CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
8742 hw_sob->sob_id * 4, 1, 0);
8743 if (rc)
8744 dev_err(hdev->dev, "failed resetting sob %u", hw_sob->sob_id);
8745
8746 kref_init(&hw_sob->kref);
8747 }
8748
gaudi_set_dma_mask_from_fw(struct hl_device * hdev)8749 static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev)
8750 {
8751 if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
8752 HL_POWER9_HOST_MAGIC) {
8753 hdev->power9_64bit_dma_enable = 1;
8754 hdev->dma_mask = 64;
8755 } else {
8756 hdev->power9_64bit_dma_enable = 0;
8757 hdev->dma_mask = 48;
8758 }
8759 }
8760
gaudi_get_device_time(struct hl_device * hdev)8761 static u64 gaudi_get_device_time(struct hl_device *hdev)
8762 {
8763 u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
8764
8765 return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
8766 }
8767
gaudi_get_hw_block_id(struct hl_device * hdev,u64 block_addr,u32 * block_size,u32 * block_id)8768 static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
8769 u32 *block_size, u32 *block_id)
8770 {
8771 return -EPERM;
8772 }
8773
gaudi_block_mmap(struct hl_device * hdev,struct vm_area_struct * vma,u32 block_id,u32 block_size)8774 static int gaudi_block_mmap(struct hl_device *hdev,
8775 struct vm_area_struct *vma,
8776 u32 block_id, u32 block_size)
8777 {
8778 return -EPERM;
8779 }
8780
gaudi_enable_events_from_fw(struct hl_device * hdev)8781 static void gaudi_enable_events_from_fw(struct hl_device *hdev)
8782 {
8783 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_INTS_REGISTER);
8784 }
8785
8786 static const struct hl_asic_funcs gaudi_funcs = {
8787 .early_init = gaudi_early_init,
8788 .early_fini = gaudi_early_fini,
8789 .late_init = gaudi_late_init,
8790 .late_fini = gaudi_late_fini,
8791 .sw_init = gaudi_sw_init,
8792 .sw_fini = gaudi_sw_fini,
8793 .hw_init = gaudi_hw_init,
8794 .hw_fini = gaudi_hw_fini,
8795 .halt_engines = gaudi_halt_engines,
8796 .suspend = gaudi_suspend,
8797 .resume = gaudi_resume,
8798 .cb_mmap = gaudi_cb_mmap,
8799 .ring_doorbell = gaudi_ring_doorbell,
8800 .pqe_write = gaudi_pqe_write,
8801 .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
8802 .asic_dma_free_coherent = gaudi_dma_free_coherent,
8803 .scrub_device_mem = gaudi_scrub_device_mem,
8804 .get_int_queue_base = gaudi_get_int_queue_base,
8805 .test_queues = gaudi_test_queues,
8806 .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
8807 .asic_dma_pool_free = gaudi_dma_pool_free,
8808 .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
8809 .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
8810 .hl_dma_unmap_sg = gaudi_dma_unmap_sg,
8811 .cs_parser = gaudi_cs_parser,
8812 .asic_dma_map_sg = gaudi_dma_map_sg,
8813 .get_dma_desc_list_size = gaudi_get_dma_desc_list_size,
8814 .add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
8815 .update_eq_ci = gaudi_update_eq_ci,
8816 .context_switch = gaudi_context_switch,
8817 .restore_phase_topology = gaudi_restore_phase_topology,
8818 .debugfs_read32 = gaudi_debugfs_read32,
8819 .debugfs_write32 = gaudi_debugfs_write32,
8820 .debugfs_read64 = gaudi_debugfs_read64,
8821 .debugfs_write64 = gaudi_debugfs_write64,
8822 .debugfs_read_dma = gaudi_debugfs_read_dma,
8823 .add_device_attr = gaudi_add_device_attr,
8824 .handle_eqe = gaudi_handle_eqe,
8825 .set_pll_profile = gaudi_set_pll_profile,
8826 .get_events_stat = gaudi_get_events_stat,
8827 .read_pte = gaudi_read_pte,
8828 .write_pte = gaudi_write_pte,
8829 .mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
8830 .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
8831 .send_heartbeat = gaudi_send_heartbeat,
8832 .set_clock_gating = gaudi_set_clock_gating,
8833 .disable_clock_gating = gaudi_disable_clock_gating,
8834 .debug_coresight = gaudi_debug_coresight,
8835 .is_device_idle = gaudi_is_device_idle,
8836 .soft_reset_late_init = gaudi_soft_reset_late_init,
8837 .hw_queues_lock = gaudi_hw_queues_lock,
8838 .hw_queues_unlock = gaudi_hw_queues_unlock,
8839 .get_pci_id = gaudi_get_pci_id,
8840 .get_eeprom_data = gaudi_get_eeprom_data,
8841 .send_cpu_message = gaudi_send_cpu_message,
8842 .pci_bars_map = gaudi_pci_bars_map,
8843 .init_iatu = gaudi_init_iatu,
8844 .rreg = hl_rreg,
8845 .wreg = hl_wreg,
8846 .halt_coresight = gaudi_halt_coresight,
8847 .ctx_init = gaudi_ctx_init,
8848 .ctx_fini = gaudi_ctx_fini,
8849 .get_clk_rate = gaudi_get_clk_rate,
8850 .get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
8851 .read_device_fw_version = gaudi_read_device_fw_version,
8852 .load_firmware_to_device = gaudi_load_firmware_to_device,
8853 .load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
8854 .get_signal_cb_size = gaudi_get_signal_cb_size,
8855 .get_wait_cb_size = gaudi_get_wait_cb_size,
8856 .gen_signal_cb = gaudi_gen_signal_cb,
8857 .gen_wait_cb = gaudi_gen_wait_cb,
8858 .reset_sob = gaudi_reset_sob,
8859 .reset_sob_group = gaudi_reset_sob_group,
8860 .set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw,
8861 .get_device_time = gaudi_get_device_time,
8862 .collective_wait_init_cs = gaudi_collective_wait_init_cs,
8863 .collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
8864 .scramble_addr = hl_mmu_scramble_addr,
8865 .descramble_addr = hl_mmu_descramble_addr,
8866 .ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
8867 .get_hw_block_id = gaudi_get_hw_block_id,
8868 .hw_block_mmap = gaudi_block_mmap,
8869 .enable_events_from_fw = gaudi_enable_events_from_fw
8870 };
8871
8872 /**
8873 * gaudi_set_asic_funcs - set GAUDI function pointers
8874 *
8875 * @hdev: pointer to hl_device structure
8876 *
8877 */
gaudi_set_asic_funcs(struct hl_device * hdev)8878 void gaudi_set_asic_funcs(struct hl_device *hdev)
8879 {
8880 hdev->asic_funcs = &gaudi_funcs;
8881 }
8882