1 // SPDX-License-Identifier: GPL-2.0
2 
3 /*
4  * Copyright 2016-2020 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7 
8 #include "gaudiP.h"
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 #include "../include/hw_ip/mmu/mmu_v1_1.h"
11 #include "../include/gaudi/gaudi_masks.h"
12 #include "../include/gaudi/gaudi_fw_if.h"
13 #include "../include/gaudi/gaudi_reg_map.h"
14 #include "../include/gaudi/gaudi_async_ids_map_extended.h"
15 
16 #include <linux/module.h>
17 #include <linux/pci.h>
18 #include <linux/firmware.h>
19 #include <linux/hwmon.h>
20 #include <linux/iommu.h>
21 #include <linux/seq_file.h>
22 
23 /*
24  * Gaudi security scheme:
25  *
26  * 1. Host is protected by:
27  *        - Range registers
28  *        - MMU
29  *
30  * 2. DDR is protected by:
31  *        - Range registers (protect the first 512MB)
32  *
33  * 3. Configuration is protected by:
34  *        - Range registers
35  *        - Protection bits
36  *
37  * MMU is always enabled.
38  *
39  * QMAN DMA channels 0,1 (PCI DMAN):
40  *     - DMA is not secured.
41  *     - PQ and CQ are secured.
42  *     - CP is secured: The driver needs to parse CB but WREG should be allowed
43  *                      because of TDMA (tensor DMA). Hence, WREG is always not
44  *                      secured.
45  *
46  * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
47  * channel 0 to be secured, execute the DMA and change it back to not secured.
48  * Currently, the driver doesn't use the DMA while there are compute jobs
49  * running.
50  *
51  * The current use cases for the driver to use the DMA are:
52  *     - Clear SRAM on context switch (happens on context switch when device is
53  *       idle)
54  *     - MMU page tables area clear (happens on init)
55  *
56  * QMAN DMA 2-7, TPC, MME, NIC:
57  * PQ is secured and is located on the Host (HBM CON TPC3 bug)
58  * CQ, CP and the engine are not secured
59  *
60  */
61 
62 #define GAUDI_BOOT_FIT_FILE	"habanalabs/gaudi/gaudi-boot-fit.itb"
63 #define GAUDI_LINUX_FW_FILE	"habanalabs/gaudi/gaudi-fit.itb"
64 #define GAUDI_TPC_FW_FILE	"habanalabs/gaudi/gaudi_tpc.bin"
65 
66 #define GAUDI_DMA_POOL_BLK_SIZE		0x100 /* 256 bytes */
67 
68 #define GAUDI_RESET_TIMEOUT_MSEC	2000		/* 2000ms */
69 #define GAUDI_RESET_WAIT_MSEC		1		/* 1ms */
70 #define GAUDI_CPU_RESET_WAIT_MSEC	200		/* 200ms */
71 #define GAUDI_TEST_QUEUE_WAIT_USEC	100000		/* 100ms */
72 
73 #define GAUDI_PLDM_RESET_WAIT_MSEC	1000		/* 1s */
74 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC	20000		/* 20s */
75 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC	1000000		/* 1s */
76 #define GAUDI_PLDM_MMU_TIMEOUT_USEC	(MMU_CONFIG_TIMEOUT_USEC * 100)
77 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC	(HL_DEVICE_TIMEOUT_USEC * 30)
78 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC	(HL_DEVICE_TIMEOUT_USEC * 30)
79 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC	1000000		/* 1s */
80 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC	4000000		/* 4s */
81 
82 #define GAUDI_QMAN0_FENCE_VAL		0x72E91AB9
83 
84 #define GAUDI_MAX_STRING_LEN		20
85 
86 #define GAUDI_CB_POOL_CB_CNT		512
87 #define GAUDI_CB_POOL_CB_SIZE		0x20000 /* 128KB */
88 
89 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT	3
90 
91 #define GAUDI_NUM_OF_TPC_INTR_CAUSE	20
92 
93 #define GAUDI_NUM_OF_QM_ERR_CAUSE	16
94 
95 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE	3
96 
97 #define GAUDI_ARB_WDT_TIMEOUT		0x1000000
98 
99 #define GAUDI_CLK_GATE_DEBUGFS_MASK	(\
100 		BIT(GAUDI_ENGINE_ID_MME_0) |\
101 		BIT(GAUDI_ENGINE_ID_MME_2) |\
102 		GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0))
103 
104 #define HBM_SCRUBBING_TIMEOUT_US	1000000 /* 1s */
105 
106 #define GAUDI_PLL_MAX 10
107 
108 /*
109  * this enum kept here for compatibility with old FW (in which each asic has
110  * unique PLL numbering
111  */
112 enum gaudi_pll_index {
113 	GAUDI_CPU_PLL = 0,
114 	GAUDI_PCI_PLL,
115 	GAUDI_SRAM_PLL,
116 	GAUDI_HBM_PLL,
117 	GAUDI_NIC_PLL,
118 	GAUDI_DMA_PLL,
119 	GAUDI_MESH_PLL,
120 	GAUDI_MME_PLL,
121 	GAUDI_TPC_PLL,
122 	GAUDI_IF_PLL,
123 };
124 
125 static enum pll_index gaudi_pll_map[PLL_MAX] = {
126 	[CPU_PLL] = GAUDI_CPU_PLL,
127 	[PCI_PLL] = GAUDI_PCI_PLL,
128 	[SRAM_PLL] = GAUDI_SRAM_PLL,
129 	[HBM_PLL] = GAUDI_HBM_PLL,
130 	[NIC_PLL] = GAUDI_NIC_PLL,
131 	[DMA_PLL] = GAUDI_DMA_PLL,
132 	[MESH_PLL] = GAUDI_MESH_PLL,
133 	[MME_PLL] = GAUDI_MME_PLL,
134 	[TPC_PLL] = GAUDI_TPC_PLL,
135 	[IF_PLL] = GAUDI_IF_PLL,
136 };
137 
138 static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
139 		"gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
140 		"gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
141 		"gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
142 		"gaudi cpu eq"
143 };
144 
145 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
146 	[GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
147 	[GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
148 	[GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
149 	[GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
150 	[GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
151 	[GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
152 	[GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
153 	[GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
154 };
155 
156 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
157 	[0] = GAUDI_QUEUE_ID_DMA_0_0,
158 	[1] = GAUDI_QUEUE_ID_DMA_0_1,
159 	[2] = GAUDI_QUEUE_ID_DMA_0_2,
160 	[3] = GAUDI_QUEUE_ID_DMA_0_3,
161 	[4] = GAUDI_QUEUE_ID_DMA_1_0,
162 	[5] = GAUDI_QUEUE_ID_DMA_1_1,
163 	[6] = GAUDI_QUEUE_ID_DMA_1_2,
164 	[7] = GAUDI_QUEUE_ID_DMA_1_3,
165 };
166 
167 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
168 	[PACKET_WREG_32]	= sizeof(struct packet_wreg32),
169 	[PACKET_WREG_BULK]	= sizeof(struct packet_wreg_bulk),
170 	[PACKET_MSG_LONG]	= sizeof(struct packet_msg_long),
171 	[PACKET_MSG_SHORT]	= sizeof(struct packet_msg_short),
172 	[PACKET_CP_DMA]		= sizeof(struct packet_cp_dma),
173 	[PACKET_REPEAT]		= sizeof(struct packet_repeat),
174 	[PACKET_MSG_PROT]	= sizeof(struct packet_msg_prot),
175 	[PACKET_FENCE]		= sizeof(struct packet_fence),
176 	[PACKET_LIN_DMA]	= sizeof(struct packet_lin_dma),
177 	[PACKET_NOP]		= sizeof(struct packet_nop),
178 	[PACKET_STOP]		= sizeof(struct packet_stop),
179 	[PACKET_ARB_POINT]	= sizeof(struct packet_arb_point),
180 	[PACKET_WAIT]		= sizeof(struct packet_wait),
181 	[PACKET_LOAD_AND_EXE]	= sizeof(struct packet_load_and_exe)
182 };
183 
validate_packet_id(enum packet_id id)184 static inline bool validate_packet_id(enum packet_id id)
185 {
186 	switch (id) {
187 	case PACKET_WREG_32:
188 	case PACKET_WREG_BULK:
189 	case PACKET_MSG_LONG:
190 	case PACKET_MSG_SHORT:
191 	case PACKET_CP_DMA:
192 	case PACKET_REPEAT:
193 	case PACKET_MSG_PROT:
194 	case PACKET_FENCE:
195 	case PACKET_LIN_DMA:
196 	case PACKET_NOP:
197 	case PACKET_STOP:
198 	case PACKET_ARB_POINT:
199 	case PACKET_WAIT:
200 	case PACKET_LOAD_AND_EXE:
201 		return true;
202 	default:
203 		return false;
204 	}
205 }
206 
207 static const char * const
208 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
209 	"tpc_address_exceed_slm",
210 	"tpc_div_by_0",
211 	"tpc_spu_mac_overflow",
212 	"tpc_spu_addsub_overflow",
213 	"tpc_spu_abs_overflow",
214 	"tpc_spu_fp_dst_nan_inf",
215 	"tpc_spu_fp_dst_denorm",
216 	"tpc_vpu_mac_overflow",
217 	"tpc_vpu_addsub_overflow",
218 	"tpc_vpu_abs_overflow",
219 	"tpc_vpu_fp_dst_nan_inf",
220 	"tpc_vpu_fp_dst_denorm",
221 	"tpc_assertions",
222 	"tpc_illegal_instruction",
223 	"tpc_pc_wrap_around",
224 	"tpc_qm_sw_err",
225 	"tpc_hbw_rresp_err",
226 	"tpc_hbw_bresp_err",
227 	"tpc_lbw_rresp_err",
228 	"tpc_lbw_bresp_err"
229 };
230 
231 static const char * const
232 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
233 	"PQ AXI HBW error",
234 	"CQ AXI HBW error",
235 	"CP AXI HBW error",
236 	"CP error due to undefined OPCODE",
237 	"CP encountered STOP OPCODE",
238 	"CP AXI LBW error",
239 	"CP WRREG32 or WRBULK returned error",
240 	"N/A",
241 	"FENCE 0 inc over max value and clipped",
242 	"FENCE 1 inc over max value and clipped",
243 	"FENCE 2 inc over max value and clipped",
244 	"FENCE 3 inc over max value and clipped",
245 	"FENCE 0 dec under min value and clipped",
246 	"FENCE 1 dec under min value and clipped",
247 	"FENCE 2 dec under min value and clipped",
248 	"FENCE 3 dec under min value and clipped"
249 };
250 
251 static const char * const
252 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
253 	"Choice push while full error",
254 	"Choice Q watchdog error",
255 	"MSG AXI LBW returned with error"
256 };
257 
258 enum gaudi_sm_sei_cause {
259 	GAUDI_SM_SEI_SO_OVERFLOW,
260 	GAUDI_SM_SEI_LBW_4B_UNALIGNED,
261 	GAUDI_SM_SEI_AXI_RESPONSE_ERR
262 };
263 
264 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
265 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
266 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
267 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
268 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
269 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
270 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
271 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
272 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
273 	QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
274 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
275 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
276 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
277 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
278 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
279 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
280 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
281 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
282 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
283 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
284 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
285 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
286 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
287 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
288 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
289 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
290 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
291 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
292 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
293 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
294 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
295 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
296 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
297 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
298 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
299 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
300 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
301 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
302 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
303 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
304 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
305 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
306 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
307 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
308 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
309 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
310 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
311 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
312 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
313 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
314 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
315 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
316 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
317 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
318 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
319 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
320 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
321 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
322 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
323 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
324 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
325 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
326 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
327 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
328 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
329 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
330 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
331 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
332 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
333 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
334 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
335 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
336 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
337 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
338 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
339 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
340 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
341 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
342 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
343 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
344 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
345 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
346 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
347 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
348 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
349 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
350 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
351 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
352 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
353 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
354 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
355 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
356 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
357 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
358 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
359 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
360 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
361 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
362 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
363 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
364 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
365 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
366 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
367 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
368 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
369 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
370 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
371 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
372 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
373 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
374 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
375 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
376 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
377 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
378 };
379 
380 struct ecc_info_extract_params {
381 	u64 block_address;
382 	u32 num_memories;
383 	bool derr;
384 	bool disable_clock_gating;
385 };
386 
387 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
388 								u64 phys_addr);
389 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
390 					struct hl_cs_job *job);
391 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
392 					u32 size, u64 val);
393 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
394 					u32 num_regs, u32 val);
395 static int gaudi_schedule_register_memset(struct hl_device *hdev,
396 		u32 hw_queue_id, u64 reg_base, u32 num_regs, u32 val);
397 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
398 				u32 tpc_id);
399 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
400 static int gaudi_cpucp_info_get(struct hl_device *hdev);
401 static void gaudi_disable_clock_gating(struct hl_device *hdev);
402 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
403 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
404 				u32 size, bool eb);
405 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
406 				struct hl_gen_wait_properties *prop);
407 
408 static inline enum hl_collective_mode
get_collective_mode(struct hl_device * hdev,u32 queue_id)409 get_collective_mode(struct hl_device *hdev, u32 queue_id)
410 {
411 	if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
412 		return HL_COLLECTIVE_MASTER;
413 
414 	if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
415 			queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
416 		return HL_COLLECTIVE_SLAVE;
417 
418 	if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
419 			queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
420 		return HL_COLLECTIVE_SLAVE;
421 
422 	if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
423 			queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
424 		return HL_COLLECTIVE_SLAVE;
425 
426 	return HL_COLLECTIVE_NOT_SUPPORTED;
427 }
428 
set_default_power_values(struct hl_device * hdev)429 static inline void set_default_power_values(struct hl_device *hdev)
430 {
431 	struct asic_fixed_properties *prop = &hdev->asic_prop;
432 
433 	if (hdev->card_type == cpucp_card_type_pmc) {
434 		prop->max_power_default = MAX_POWER_DEFAULT_PMC;
435 		prop->dc_power_default = DC_POWER_DEFAULT_PMC;
436 	} else {
437 		prop->max_power_default = MAX_POWER_DEFAULT_PCI;
438 		prop->dc_power_default = DC_POWER_DEFAULT_PCI;
439 	}
440 }
441 
gaudi_get_fixed_properties(struct hl_device * hdev)442 static int gaudi_get_fixed_properties(struct hl_device *hdev)
443 {
444 	struct asic_fixed_properties *prop = &hdev->asic_prop;
445 	u32 num_sync_stream_queues = 0;
446 	int i;
447 
448 	prop->max_queues = GAUDI_QUEUE_ID_SIZE;
449 	prop->hw_queues_props = kcalloc(prop->max_queues,
450 			sizeof(struct hw_queue_properties),
451 			GFP_KERNEL);
452 
453 	if (!prop->hw_queues_props)
454 		return -ENOMEM;
455 
456 	for (i = 0 ; i < prop->max_queues ; i++) {
457 		if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
458 			prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
459 			prop->hw_queues_props[i].driver_only = 0;
460 			prop->hw_queues_props[i].supports_sync_stream = 1;
461 			prop->hw_queues_props[i].cb_alloc_flags =
462 				CB_ALLOC_KERNEL;
463 			num_sync_stream_queues++;
464 		} else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
465 			prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
466 			prop->hw_queues_props[i].driver_only = 1;
467 			prop->hw_queues_props[i].supports_sync_stream = 0;
468 			prop->hw_queues_props[i].cb_alloc_flags =
469 				CB_ALLOC_KERNEL;
470 		} else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
471 			prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
472 			prop->hw_queues_props[i].driver_only = 0;
473 			prop->hw_queues_props[i].supports_sync_stream = 0;
474 			prop->hw_queues_props[i].cb_alloc_flags =
475 				CB_ALLOC_USER;
476 
477 		}
478 		prop->hw_queues_props[i].collective_mode =
479 						get_collective_mode(hdev, i);
480 	}
481 
482 	prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
483 	prop->collective_first_sob = 0;
484 	prop->collective_first_mon = 0;
485 
486 	/* 2 SOBs per internal queue stream are reserved for collective */
487 	prop->sync_stream_first_sob =
488 			ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
489 			* QMAN_STREAMS * HL_RSVD_SOBS;
490 
491 	/* 1 monitor per internal queue stream are reserved for collective
492 	 * 2 monitors per external queue stream are reserved for collective
493 	 */
494 	prop->sync_stream_first_mon =
495 			(NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
496 			(NUMBER_OF_EXT_HW_QUEUES * 2);
497 
498 	prop->dram_base_address = DRAM_PHYS_BASE;
499 	prop->dram_size = GAUDI_HBM_SIZE_32GB;
500 	prop->dram_end_address = prop->dram_base_address +
501 					prop->dram_size;
502 	prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
503 
504 	prop->sram_base_address = SRAM_BASE_ADDR;
505 	prop->sram_size = SRAM_SIZE;
506 	prop->sram_end_address = prop->sram_base_address +
507 					prop->sram_size;
508 	prop->sram_user_base_address = prop->sram_base_address +
509 					SRAM_USER_BASE_OFFSET;
510 
511 	prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
512 	if (hdev->pldm)
513 		prop->mmu_pgt_size = 0x800000; /* 8MB */
514 	else
515 		prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
516 	prop->mmu_pte_size = HL_PTE_SIZE;
517 	prop->mmu_hop_table_size = HOP_TABLE_SIZE;
518 	prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
519 	prop->dram_page_size = PAGE_SIZE_2MB;
520 	prop->dram_supports_virtual_memory = false;
521 
522 	prop->pmmu.hop0_shift = HOP0_SHIFT;
523 	prop->pmmu.hop1_shift = HOP1_SHIFT;
524 	prop->pmmu.hop2_shift = HOP2_SHIFT;
525 	prop->pmmu.hop3_shift = HOP3_SHIFT;
526 	prop->pmmu.hop4_shift = HOP4_SHIFT;
527 	prop->pmmu.hop0_mask = HOP0_MASK;
528 	prop->pmmu.hop1_mask = HOP1_MASK;
529 	prop->pmmu.hop2_mask = HOP2_MASK;
530 	prop->pmmu.hop3_mask = HOP3_MASK;
531 	prop->pmmu.hop4_mask = HOP4_MASK;
532 	prop->pmmu.start_addr = VA_HOST_SPACE_START;
533 	prop->pmmu.end_addr =
534 			(VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
535 	prop->pmmu.page_size = PAGE_SIZE_4KB;
536 	prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
537 
538 	/* PMMU and HPMMU are the same except of page size */
539 	memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
540 	prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
541 
542 	/* shifts and masks are the same in PMMU and DMMU */
543 	memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
544 	prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
545 	prop->dmmu.end_addr = VA_HOST_SPACE_END;
546 	prop->dmmu.page_size = PAGE_SIZE_2MB;
547 
548 	prop->cfg_size = CFG_SIZE;
549 	prop->max_asid = MAX_ASID;
550 	prop->num_of_events = GAUDI_EVENT_SIZE;
551 	prop->tpc_enabled_mask = TPC_ENABLED_MASK;
552 
553 	set_default_power_values(hdev);
554 
555 	prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
556 	prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
557 
558 	prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
559 	prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
560 
561 	strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
562 					CARD_NAME_MAX_LEN);
563 
564 	prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
565 
566 	prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
567 			prop->sync_stream_first_sob +
568 			(num_sync_stream_queues * HL_RSVD_SOBS);
569 	prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
570 			prop->sync_stream_first_mon +
571 			(num_sync_stream_queues * HL_RSVD_MONS);
572 
573 	prop->first_available_user_msix_interrupt = USHRT_MAX;
574 
575 	for (i = 0 ; i < HL_MAX_DCORES ; i++)
576 		prop->first_available_cq[i] = USHRT_MAX;
577 
578 	prop->fw_security_status_valid = false;
579 	prop->hard_reset_done_by_fw = false;
580 
581 	return 0;
582 }
583 
gaudi_pci_bars_map(struct hl_device * hdev)584 static int gaudi_pci_bars_map(struct hl_device *hdev)
585 {
586 	static const char * const name[] = {"SRAM", "CFG", "HBM"};
587 	bool is_wc[3] = {false, false, true};
588 	int rc;
589 
590 	rc = hl_pci_bars_map(hdev, name, is_wc);
591 	if (rc)
592 		return rc;
593 
594 	hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
595 			(CFG_BASE - SPI_FLASH_BASE_ADDR);
596 
597 	return 0;
598 }
599 
gaudi_set_hbm_bar_base(struct hl_device * hdev,u64 addr)600 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
601 {
602 	struct gaudi_device *gaudi = hdev->asic_specific;
603 	struct hl_inbound_pci_region pci_region;
604 	u64 old_addr = addr;
605 	int rc;
606 
607 	if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
608 		return old_addr;
609 
610 	/* Inbound Region 2 - Bar 4 - Point to HBM */
611 	pci_region.mode = PCI_BAR_MATCH_MODE;
612 	pci_region.bar = HBM_BAR_ID;
613 	pci_region.addr = addr;
614 	rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
615 	if (rc)
616 		return U64_MAX;
617 
618 	if (gaudi) {
619 		old_addr = gaudi->hbm_bar_cur_addr;
620 		gaudi->hbm_bar_cur_addr = addr;
621 	}
622 
623 	return old_addr;
624 }
625 
gaudi_init_iatu(struct hl_device * hdev)626 static int gaudi_init_iatu(struct hl_device *hdev)
627 {
628 	struct hl_inbound_pci_region inbound_region;
629 	struct hl_outbound_pci_region outbound_region;
630 	int rc;
631 
632 	if (hdev->asic_prop.iatu_done_by_fw) {
633 		hdev->asic_funcs->set_dma_mask_from_fw(hdev);
634 		return 0;
635 	}
636 
637 	/* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
638 	inbound_region.mode = PCI_BAR_MATCH_MODE;
639 	inbound_region.bar = SRAM_BAR_ID;
640 	inbound_region.addr = SRAM_BASE_ADDR;
641 	rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
642 	if (rc)
643 		goto done;
644 
645 	/* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
646 	inbound_region.mode = PCI_BAR_MATCH_MODE;
647 	inbound_region.bar = CFG_BAR_ID;
648 	inbound_region.addr = SPI_FLASH_BASE_ADDR;
649 	rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
650 	if (rc)
651 		goto done;
652 
653 	/* Inbound Region 2 - Bar 4 - Point to HBM */
654 	inbound_region.mode = PCI_BAR_MATCH_MODE;
655 	inbound_region.bar = HBM_BAR_ID;
656 	inbound_region.addr = DRAM_PHYS_BASE;
657 	rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
658 	if (rc)
659 		goto done;
660 
661 	hdev->asic_funcs->set_dma_mask_from_fw(hdev);
662 
663 	/* Outbound Region 0 - Point to Host */
664 	outbound_region.addr = HOST_PHYS_BASE;
665 	outbound_region.size = HOST_PHYS_SIZE;
666 	rc = hl_pci_set_outbound_region(hdev, &outbound_region);
667 
668 done:
669 	return rc;
670 }
671 
gaudi_get_hw_state(struct hl_device * hdev)672 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
673 {
674 	return RREG32(mmHW_STATE);
675 }
676 
gaudi_early_init(struct hl_device * hdev)677 static int gaudi_early_init(struct hl_device *hdev)
678 {
679 	struct asic_fixed_properties *prop = &hdev->asic_prop;
680 	struct pci_dev *pdev = hdev->pdev;
681 	u32 fw_boot_status;
682 	int rc;
683 
684 	rc = gaudi_get_fixed_properties(hdev);
685 	if (rc) {
686 		dev_err(hdev->dev, "Failed to get fixed properties\n");
687 		return rc;
688 	}
689 
690 	/* Check BAR sizes */
691 	if (pci_resource_len(pdev, SRAM_BAR_ID) != SRAM_BAR_SIZE) {
692 		dev_err(hdev->dev,
693 			"Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
694 			SRAM_BAR_ID,
695 			(unsigned long long) pci_resource_len(pdev,
696 							SRAM_BAR_ID),
697 			SRAM_BAR_SIZE);
698 		rc = -ENODEV;
699 		goto free_queue_props;
700 	}
701 
702 	if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) {
703 		dev_err(hdev->dev,
704 			"Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
705 			CFG_BAR_ID,
706 			(unsigned long long) pci_resource_len(pdev,
707 								CFG_BAR_ID),
708 			CFG_BAR_SIZE);
709 		rc = -ENODEV;
710 		goto free_queue_props;
711 	}
712 
713 	prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
714 
715 	/* If FW security is enabled at this point it means no access to ELBI */
716 	if (!hdev->asic_prop.fw_security_disabled) {
717 		hdev->asic_prop.iatu_done_by_fw = true;
718 		goto pci_init;
719 	}
720 
721 	rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
722 				&fw_boot_status);
723 	if (rc)
724 		goto free_queue_props;
725 
726 	/* Check whether FW is configuring iATU */
727 	if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
728 			(fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
729 		hdev->asic_prop.iatu_done_by_fw = true;
730 
731 pci_init:
732 	rc = hl_pci_init(hdev);
733 	if (rc)
734 		goto free_queue_props;
735 
736 	/* Before continuing in the initialization, we need to read the preboot
737 	 * version to determine whether we run with a security-enabled firmware
738 	 */
739 	rc = hl_fw_read_preboot_status(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
740 			mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0,
741 			GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
742 	if (rc) {
743 		if (hdev->reset_on_preboot_fail)
744 			hdev->asic_funcs->hw_fini(hdev, true);
745 		goto pci_fini;
746 	}
747 
748 	if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
749 		dev_info(hdev->dev,
750 			"H/W state is dirty, must reset before initializing\n");
751 		hdev->asic_funcs->hw_fini(hdev, true);
752 	}
753 
754 	return 0;
755 
756 pci_fini:
757 	hl_pci_fini(hdev);
758 free_queue_props:
759 	kfree(hdev->asic_prop.hw_queues_props);
760 	return rc;
761 }
762 
gaudi_early_fini(struct hl_device * hdev)763 static int gaudi_early_fini(struct hl_device *hdev)
764 {
765 	kfree(hdev->asic_prop.hw_queues_props);
766 	hl_pci_fini(hdev);
767 
768 	return 0;
769 }
770 
771 /**
772  * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
773  *
774  * @hdev: pointer to hl_device structure
775  *
776  */
gaudi_fetch_psoc_frequency(struct hl_device * hdev)777 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
778 {
779 	struct asic_fixed_properties *prop = &hdev->asic_prop;
780 	u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
781 	u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
782 	int rc;
783 
784 	if (hdev->asic_prop.fw_security_disabled) {
785 		/* Backward compatibility */
786 		div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
787 		div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
788 		nr = RREG32(mmPSOC_CPU_PLL_NR);
789 		nf = RREG32(mmPSOC_CPU_PLL_NF);
790 		od = RREG32(mmPSOC_CPU_PLL_OD);
791 
792 		if (div_sel == DIV_SEL_REF_CLK ||
793 				div_sel == DIV_SEL_DIVIDED_REF) {
794 			if (div_sel == DIV_SEL_REF_CLK)
795 				freq = PLL_REF_CLK;
796 			else
797 				freq = PLL_REF_CLK / (div_fctr + 1);
798 		} else if (div_sel == DIV_SEL_PLL_CLK ||
799 			div_sel == DIV_SEL_DIVIDED_PLL) {
800 			pll_clk = PLL_REF_CLK * (nf + 1) /
801 					((nr + 1) * (od + 1));
802 			if (div_sel == DIV_SEL_PLL_CLK)
803 				freq = pll_clk;
804 			else
805 				freq = pll_clk / (div_fctr + 1);
806 		} else {
807 			dev_warn(hdev->dev,
808 				"Received invalid div select value: %d",
809 				div_sel);
810 			freq = 0;
811 		}
812 	} else {
813 		rc = hl_fw_cpucp_pll_info_get(hdev, CPU_PLL, pll_freq_arr);
814 
815 		if (rc)
816 			return rc;
817 
818 		freq = pll_freq_arr[2];
819 	}
820 
821 	prop->psoc_timestamp_frequency = freq;
822 	prop->psoc_pci_pll_nr = nr;
823 	prop->psoc_pci_pll_nf = nf;
824 	prop->psoc_pci_pll_od = od;
825 	prop->psoc_pci_pll_div_factor = div_fctr;
826 
827 	return 0;
828 }
829 
_gaudi_init_tpc_mem(struct hl_device * hdev,dma_addr_t tpc_kernel_src_addr,u32 tpc_kernel_size)830 static int _gaudi_init_tpc_mem(struct hl_device *hdev,
831 		dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
832 {
833 	struct asic_fixed_properties *prop = &hdev->asic_prop;
834 	struct packet_lin_dma *init_tpc_mem_pkt;
835 	struct hl_cs_job *job;
836 	struct hl_cb *cb;
837 	u64 dst_addr;
838 	u32 cb_size, ctl;
839 	u8 tpc_id;
840 	int rc;
841 
842 	cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
843 	if (!cb)
844 		return -EFAULT;
845 
846 	init_tpc_mem_pkt = cb->kernel_address;
847 	cb_size = sizeof(*init_tpc_mem_pkt);
848 	memset(init_tpc_mem_pkt, 0, cb_size);
849 
850 	init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
851 
852 	ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
853 	ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
854 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
855 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
856 
857 	init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
858 
859 	init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
860 	dst_addr = (prop->sram_user_base_address &
861 			GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
862 			GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
863 	init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
864 
865 	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
866 	if (!job) {
867 		dev_err(hdev->dev, "Failed to allocate a new job\n");
868 		rc = -ENOMEM;
869 		goto release_cb;
870 	}
871 
872 	job->id = 0;
873 	job->user_cb = cb;
874 	atomic_inc(&job->user_cb->cs_cnt);
875 	job->user_cb_size = cb_size;
876 	job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
877 	job->patched_cb = job->user_cb;
878 	job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
879 
880 	hl_debugfs_add_job(hdev, job);
881 
882 	rc = gaudi_send_job_on_qman0(hdev, job);
883 
884 	if (rc)
885 		goto free_job;
886 
887 	for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
888 		rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
889 		if (rc)
890 			break;
891 	}
892 
893 free_job:
894 	hl_userptr_delete_list(hdev, &job->userptr_list);
895 	hl_debugfs_remove_job(hdev, job);
896 	kfree(job);
897 	atomic_dec(&cb->cs_cnt);
898 
899 release_cb:
900 	hl_cb_put(cb);
901 	hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
902 
903 	return rc;
904 }
905 
906 /*
907  * gaudi_init_tpc_mem() - Initialize TPC memories.
908  * @hdev: Pointer to hl_device structure.
909  *
910  * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
911  *
912  * Return: 0 for success, negative value for error.
913  */
gaudi_init_tpc_mem(struct hl_device * hdev)914 static int gaudi_init_tpc_mem(struct hl_device *hdev)
915 {
916 	const struct firmware *fw;
917 	size_t fw_size;
918 	void *cpu_addr;
919 	dma_addr_t dma_handle;
920 	int rc, count = 5;
921 
922 again:
923 	rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
924 	if (rc == -EINTR && count-- > 0) {
925 		msleep(50);
926 		goto again;
927 	}
928 
929 	if (rc) {
930 		dev_err(hdev->dev, "Failed to load firmware file %s\n",
931 				GAUDI_TPC_FW_FILE);
932 		goto out;
933 	}
934 
935 	fw_size = fw->size;
936 	cpu_addr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, fw_size,
937 			&dma_handle, GFP_KERNEL | __GFP_ZERO);
938 	if (!cpu_addr) {
939 		dev_err(hdev->dev,
940 			"Failed to allocate %zu of dma memory for TPC kernel\n",
941 			fw_size);
942 		rc = -ENOMEM;
943 		goto out;
944 	}
945 
946 	memcpy(cpu_addr, fw->data, fw_size);
947 
948 	rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
949 
950 	hdev->asic_funcs->asic_dma_free_coherent(hdev, fw->size, cpu_addr,
951 			dma_handle);
952 
953 out:
954 	release_firmware(fw);
955 	return rc;
956 }
957 
gaudi_collective_map_sobs(struct hl_device * hdev,u32 stream)958 static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
959 {
960 	struct gaudi_device *gaudi = hdev->asic_specific;
961 	struct gaudi_collective_properties *prop = &gaudi->collective_props;
962 	struct hl_hw_queue *q;
963 	u32 i, sob_id, sob_group_id, queue_id;
964 
965 	/* Iterate through SOB groups and assign a SOB for each slave queue */
966 	sob_group_id =
967 		stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
968 	sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
969 
970 	queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
971 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
972 		q = &hdev->kernel_queues[queue_id + (4 * i)];
973 		q->sync_stream_prop.collective_sob_id = sob_id + i;
974 	}
975 
976 	/* Both DMA5 and TPC7 use the same resources since only a single
977 	 * engine need to participate in the reduction process
978 	 */
979 	queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
980 	q = &hdev->kernel_queues[queue_id];
981 	q->sync_stream_prop.collective_sob_id =
982 			sob_id + NIC_NUMBER_OF_ENGINES;
983 
984 	queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
985 	q = &hdev->kernel_queues[queue_id];
986 	q->sync_stream_prop.collective_sob_id =
987 			sob_id + NIC_NUMBER_OF_ENGINES;
988 }
989 
gaudi_sob_group_hw_reset(struct kref * ref)990 static void gaudi_sob_group_hw_reset(struct kref *ref)
991 {
992 	struct gaudi_hw_sob_group *hw_sob_group =
993 		container_of(ref, struct gaudi_hw_sob_group, kref);
994 	struct hl_device *hdev = hw_sob_group->hdev;
995 	u64 base_addr;
996 	int rc;
997 
998 	base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
999 			hw_sob_group->base_sob_id * 4;
1000 	rc = gaudi_schedule_register_memset(hdev, hw_sob_group->queue_id,
1001 			base_addr, NUMBER_OF_SOBS_IN_GRP, 0);
1002 	if (rc)
1003 		dev_err(hdev->dev,
1004 			"failed resetting sob group - sob base %u, count %u",
1005 			hw_sob_group->base_sob_id, NUMBER_OF_SOBS_IN_GRP);
1006 
1007 	kref_init(&hw_sob_group->kref);
1008 }
1009 
gaudi_sob_group_reset_error(struct kref * ref)1010 static void gaudi_sob_group_reset_error(struct kref *ref)
1011 {
1012 	struct gaudi_hw_sob_group *hw_sob_group =
1013 		container_of(ref, struct gaudi_hw_sob_group, kref);
1014 	struct hl_device *hdev = hw_sob_group->hdev;
1015 
1016 	dev_crit(hdev->dev,
1017 		"SOB release shouldn't be called here, base_sob_id: %d\n",
1018 		hw_sob_group->base_sob_id);
1019 }
1020 
gaudi_collective_init(struct hl_device * hdev)1021 static int gaudi_collective_init(struct hl_device *hdev)
1022 {
1023 	u32 i, master_monitor_sobs, sob_id, reserved_sobs_per_group;
1024 	struct gaudi_collective_properties *prop;
1025 	struct gaudi_device *gaudi;
1026 
1027 	gaudi = hdev->asic_specific;
1028 	prop = &gaudi->collective_props;
1029 	sob_id = hdev->asic_prop.collective_first_sob;
1030 
1031 	/* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
1032 	reserved_sobs_per_group =
1033 		ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
1034 
1035 	/* Init SOB groups */
1036 	for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
1037 		prop->hw_sob_group[i].hdev = hdev;
1038 		prop->hw_sob_group[i].base_sob_id = sob_id;
1039 		sob_id += reserved_sobs_per_group;
1040 		gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
1041 	}
1042 
1043 	for (i = 0 ; i < QMAN_STREAMS; i++) {
1044 		prop->next_sob_group_val[i] = 1;
1045 		prop->curr_sob_group_idx[i] = 0;
1046 		gaudi_collective_map_sobs(hdev, i);
1047 	}
1048 
1049 	prop->mstr_sob_mask[0] = 0;
1050 	master_monitor_sobs = HL_MAX_SOBS_PER_MONITOR;
1051 	for (i = 0 ; i < master_monitor_sobs ; i++)
1052 		if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1053 			prop->mstr_sob_mask[0] |= BIT(i);
1054 
1055 	prop->mstr_sob_mask[1] = 0;
1056 	master_monitor_sobs =
1057 		NIC_NUMBER_OF_ENGINES - HL_MAX_SOBS_PER_MONITOR;
1058 	for (i = 0 ; i < master_monitor_sobs; i++) {
1059 		if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1060 			prop->mstr_sob_mask[1] |= BIT(i);
1061 	}
1062 
1063 	/* Set collective engine bit */
1064 	prop->mstr_sob_mask[1] |= BIT(i);
1065 
1066 	return 0;
1067 }
1068 
gaudi_reset_sob_group(struct hl_device * hdev,u16 sob_group)1069 static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
1070 {
1071 	struct gaudi_device *gaudi = hdev->asic_specific;
1072 	struct gaudi_collective_properties *cprop = &gaudi->collective_props;
1073 
1074 	kref_put(&cprop->hw_sob_group[sob_group].kref,
1075 					gaudi_sob_group_hw_reset);
1076 }
1077 
gaudi_collective_master_init_job(struct hl_device * hdev,struct hl_cs_job * job,u32 stream,u32 sob_group_offset)1078 static void gaudi_collective_master_init_job(struct hl_device *hdev,
1079 		struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
1080 {
1081 	u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
1082 	struct gaudi_collective_properties *cprop;
1083 	struct hl_gen_wait_properties wait_prop;
1084 	struct hl_sync_stream_properties *prop;
1085 	struct gaudi_device *gaudi;
1086 
1087 	gaudi = hdev->asic_specific;
1088 	cprop = &gaudi->collective_props;
1089 	queue_id = job->hw_queue_id;
1090 	prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1091 
1092 	master_sob_base =
1093 		cprop->hw_sob_group[sob_group_offset].base_sob_id;
1094 	master_monitor = prop->collective_mstr_mon_id[0];
1095 
1096 	cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
1097 
1098 	dev_dbg(hdev->dev,
1099 		"Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1100 		master_sob_base, cprop->mstr_sob_mask[0],
1101 		cprop->next_sob_group_val[stream],
1102 		master_monitor, queue_id);
1103 
1104 	wait_prop.data = (void *) job->patched_cb;
1105 	wait_prop.sob_base = master_sob_base;
1106 	wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1107 	wait_prop.sob_val = cprop->next_sob_group_val[stream];
1108 	wait_prop.mon_id = master_monitor;
1109 	wait_prop.q_idx = queue_id;
1110 	wait_prop.size = cb_size;
1111 	cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1112 
1113 	master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1114 	master_monitor = prop->collective_mstr_mon_id[1];
1115 
1116 	dev_dbg(hdev->dev,
1117 		"Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1118 		master_sob_base, cprop->mstr_sob_mask[1],
1119 		cprop->next_sob_group_val[stream],
1120 		master_monitor, queue_id);
1121 
1122 	wait_prop.sob_base = master_sob_base;
1123 	wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1124 	wait_prop.mon_id = master_monitor;
1125 	wait_prop.size = cb_size;
1126 	cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1127 }
1128 
gaudi_collective_slave_init_job(struct hl_device * hdev,struct hl_cs_job * job,struct hl_cs_compl * cs_cmpl)1129 static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1130 		struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1131 {
1132 	struct hl_gen_wait_properties wait_prop;
1133 	struct hl_sync_stream_properties *prop;
1134 	u32 queue_id, cb_size = 0;
1135 
1136 	queue_id = job->hw_queue_id;
1137 	prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1138 
1139 	/* Add to wait CBs using slave monitor */
1140 	wait_prop.data = (void *) job->user_cb;
1141 	wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1142 	wait_prop.sob_mask = 0x1;
1143 	wait_prop.sob_val = cs_cmpl->sob_val;
1144 	wait_prop.mon_id = prop->collective_slave_mon_id;
1145 	wait_prop.q_idx = queue_id;
1146 	wait_prop.size = cb_size;
1147 
1148 	dev_dbg(hdev->dev,
1149 		"Generate slave wait CB, sob %d, val:0x%x, mon %d, q %d\n",
1150 		cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1151 		prop->collective_slave_mon_id, queue_id);
1152 
1153 	cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1154 
1155 	dev_dbg(hdev->dev,
1156 		"generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1157 		prop->collective_sob_id, queue_id);
1158 
1159 	cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
1160 			prop->collective_sob_id, cb_size, false);
1161 }
1162 
gaudi_collective_wait_init_cs(struct hl_cs * cs)1163 static void gaudi_collective_wait_init_cs(struct hl_cs *cs)
1164 {
1165 	struct hl_cs_compl *signal_cs_cmpl =
1166 		container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1167 	struct hl_cs_compl *cs_cmpl =
1168 		container_of(cs->fence, struct hl_cs_compl, base_fence);
1169 	struct gaudi_collective_properties *cprop;
1170 	u32 stream, queue_id, sob_group_offset;
1171 	struct gaudi_device *gaudi;
1172 	struct hl_device *hdev;
1173 	struct hl_cs_job *job;
1174 	struct hl_ctx *ctx;
1175 
1176 	ctx = cs->ctx;
1177 	hdev = ctx->hdev;
1178 	gaudi = hdev->asic_specific;
1179 	cprop = &gaudi->collective_props;
1180 
1181 	/* copy the SOB id and value of the signal CS */
1182 	cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1183 	cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1184 
1185 	/* Calculate the stream from collective master queue (1st job) */
1186 	job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1187 	stream = job->hw_queue_id % 4;
1188 	sob_group_offset =
1189 		stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1190 
1191 	list_for_each_entry(job, &cs->job_list, cs_node) {
1192 		queue_id = job->hw_queue_id;
1193 
1194 		if (hdev->kernel_queues[queue_id].collective_mode ==
1195 				HL_COLLECTIVE_MASTER)
1196 			gaudi_collective_master_init_job(hdev, job, stream,
1197 						sob_group_offset);
1198 		else
1199 			gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1200 	}
1201 
1202 	cs_cmpl->sob_group = sob_group_offset;
1203 
1204 	/* Handle sob group kref and wraparound */
1205 	kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1206 	cprop->next_sob_group_val[stream]++;
1207 
1208 	if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1209 		/*
1210 		 * Decrement as we reached the max value.
1211 		 * The release function won't be called here as we've
1212 		 * just incremented the refcount.
1213 		 */
1214 		kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1215 				gaudi_sob_group_reset_error);
1216 		cprop->next_sob_group_val[stream] = 1;
1217 		/* only two SOBs are currently in use */
1218 		cprop->curr_sob_group_idx[stream] =
1219 			(cprop->curr_sob_group_idx[stream] + 1) &
1220 							(HL_RSVD_SOBS - 1);
1221 
1222 		gaudi_collective_map_sobs(hdev, stream);
1223 
1224 		dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1225 				cprop->curr_sob_group_idx[stream], stream);
1226 	}
1227 
1228 	/* Increment kref since all slave queues are now waiting on it */
1229 	kref_get(&cs_cmpl->hw_sob->kref);
1230 	/*
1231 	 * Must put the signal fence after the SOB refcnt increment so
1232 	 * the SOB refcnt won't turn 0 and reset the SOB before the
1233 	 * wait CS was submitted.
1234 	 */
1235 	mb();
1236 	hl_fence_put(cs->signal_fence);
1237 	cs->signal_fence = NULL;
1238 }
1239 
gaudi_collective_wait_create_job(struct hl_device * hdev,struct hl_ctx * ctx,struct hl_cs * cs,enum hl_collective_mode mode,u32 queue_id,u32 wait_queue_id)1240 static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1241 		struct hl_ctx *ctx, struct hl_cs *cs,
1242 		enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id)
1243 {
1244 	struct hw_queue_properties *hw_queue_prop;
1245 	struct hl_cs_counters_atomic *cntr;
1246 	struct hl_cs_job *job;
1247 	struct hl_cb *cb;
1248 	u32 cb_size;
1249 	bool patched_cb;
1250 
1251 	cntr = &hdev->aggregated_cs_counters;
1252 
1253 	if (mode == HL_COLLECTIVE_MASTER) {
1254 		/* CB size of collective master queue contains
1255 		 * 4 msg short packets for monitor 1 configuration
1256 		 * 1 fence packet
1257 		 * 4 msg short packets for monitor 2 configuration
1258 		 * 1 fence packet
1259 		 * 2 msg prot packets for completion and MSI-X
1260 		 */
1261 		cb_size = sizeof(struct packet_msg_short) * 8 +
1262 				sizeof(struct packet_fence) * 2 +
1263 				sizeof(struct packet_msg_prot) * 2;
1264 		patched_cb = true;
1265 	} else {
1266 		/* CB size of collective slave queues contains
1267 		 * 4 msg short packets for monitor configuration
1268 		 * 1 fence packet
1269 		 * 1 additional msg short packet for sob signal
1270 		 */
1271 		cb_size = sizeof(struct packet_msg_short) * 5 +
1272 				sizeof(struct packet_fence);
1273 		patched_cb = false;
1274 	}
1275 
1276 	hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1277 	job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1278 	if (!job) {
1279 		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1280 		atomic64_inc(&cntr->out_of_mem_drop_cnt);
1281 		dev_err(hdev->dev, "Failed to allocate a new job\n");
1282 		return -ENOMEM;
1283 	}
1284 
1285 	/* Allocate internal mapped CB for non patched CBs */
1286 	cb = hl_cb_kernel_create(hdev, cb_size,
1287 			hdev->mmu_enable && !patched_cb);
1288 	if (!cb) {
1289 		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1290 		atomic64_inc(&cntr->out_of_mem_drop_cnt);
1291 		kfree(job);
1292 		return -EFAULT;
1293 	}
1294 
1295 	job->id = 0;
1296 	job->cs = cs;
1297 	job->user_cb = cb;
1298 	atomic_inc(&job->user_cb->cs_cnt);
1299 	job->user_cb_size = cb_size;
1300 	job->hw_queue_id = queue_id;
1301 
1302 	/*
1303 	 * No need in parsing, user CB is the patched CB.
1304 	 * We call hl_cb_destroy() out of two reasons - we don't need
1305 	 * the CB in the CB idr anymore and to decrement its refcount as
1306 	 * it was incremented inside hl_cb_kernel_create().
1307 	 */
1308 	if (patched_cb)
1309 		job->patched_cb = job->user_cb;
1310 	else
1311 		job->patched_cb = NULL;
1312 
1313 	job->job_cb_size = job->user_cb_size;
1314 	hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
1315 
1316 	/* increment refcount as for external queues we get completion */
1317 	if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1318 		cs_get(cs);
1319 
1320 	cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1321 
1322 	list_add_tail(&job->cs_node, &cs->job_list);
1323 
1324 	hl_debugfs_add_job(hdev, job);
1325 
1326 	return 0;
1327 }
1328 
gaudi_collective_wait_create_jobs(struct hl_device * hdev,struct hl_ctx * ctx,struct hl_cs * cs,u32 wait_queue_id,u32 collective_engine_id)1329 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1330 		struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id,
1331 		u32 collective_engine_id)
1332 {
1333 	struct gaudi_device *gaudi = hdev->asic_specific;
1334 	struct hw_queue_properties *hw_queue_prop;
1335 	u32 queue_id, collective_queue, num_jobs;
1336 	u32 stream, nic_queue, nic_idx = 0;
1337 	bool skip;
1338 	int i, rc = 0;
1339 
1340 	/* Verify wait queue id is configured as master */
1341 	hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1342 	if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1343 		dev_err(hdev->dev,
1344 			"Queue %d is not configured as collective master\n",
1345 			wait_queue_id);
1346 		return -EINVAL;
1347 	}
1348 
1349 	/* Verify engine id is supported */
1350 	if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1351 			collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1352 		dev_err(hdev->dev,
1353 			"Collective wait does not support engine %u\n",
1354 			collective_engine_id);
1355 		return -EINVAL;
1356 	}
1357 
1358 	stream = wait_queue_id % 4;
1359 
1360 	if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1361 		collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1362 	else
1363 		collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1364 
1365 	num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1366 	nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1367 
1368 	/* First job goes to the collective master queue, it will wait for
1369 	 * the collective slave queues to finish execution.
1370 	 * The synchronization is done using two monitors:
1371 	 * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1372 	 * reduction engine (DMA5/TPC7).
1373 	 *
1374 	 * Rest of the jobs goes to the collective slave queues which will
1375 	 * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1376 	 */
1377 	for (i = 0 ; i < num_jobs ; i++) {
1378 		if (i == 0) {
1379 			queue_id = wait_queue_id;
1380 			rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1381 				HL_COLLECTIVE_MASTER, queue_id, wait_queue_id);
1382 		} else {
1383 			if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1384 				if (gaudi->hw_cap_initialized &
1385 					BIT(HW_CAP_NIC_SHIFT + nic_idx))
1386 					skip = false;
1387 				else
1388 					skip = true;
1389 
1390 				queue_id = nic_queue;
1391 				nic_queue += 4;
1392 				nic_idx++;
1393 
1394 				if (skip)
1395 					continue;
1396 			} else {
1397 				queue_id = collective_queue;
1398 			}
1399 
1400 			rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1401 				HL_COLLECTIVE_SLAVE, queue_id, wait_queue_id);
1402 		}
1403 
1404 		if (rc)
1405 			return rc;
1406 	}
1407 
1408 	return rc;
1409 }
1410 
gaudi_late_init(struct hl_device * hdev)1411 static int gaudi_late_init(struct hl_device *hdev)
1412 {
1413 	struct gaudi_device *gaudi = hdev->asic_specific;
1414 	int rc;
1415 
1416 	rc = gaudi->cpucp_info_get(hdev);
1417 	if (rc) {
1418 		dev_err(hdev->dev, "Failed to get cpucp info\n");
1419 		return rc;
1420 	}
1421 
1422 	if ((hdev->card_type == cpucp_card_type_pci) &&
1423 			(hdev->nic_ports_mask & 0x3)) {
1424 		dev_info(hdev->dev,
1425 			"PCI card detected, only 8 ports are enabled\n");
1426 		hdev->nic_ports_mask &= ~0x3;
1427 
1428 		/* Stop and disable unused NIC QMANs */
1429 		WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1430 					NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1431 					NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1432 
1433 		WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1434 					NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1435 					NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1436 
1437 		WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1438 		WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1439 
1440 		gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1441 	}
1442 
1443 	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS);
1444 	if (rc) {
1445 		dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
1446 		return rc;
1447 	}
1448 
1449 	rc = gaudi_fetch_psoc_frequency(hdev);
1450 	if (rc) {
1451 		dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1452 		goto disable_pci_access;
1453 	}
1454 
1455 	rc = gaudi_mmu_clear_pgt_range(hdev);
1456 	if (rc) {
1457 		dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1458 		goto disable_pci_access;
1459 	}
1460 
1461 	rc = gaudi_init_tpc_mem(hdev);
1462 	if (rc) {
1463 		dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1464 		goto disable_pci_access;
1465 	}
1466 
1467 	rc = gaudi_collective_init(hdev);
1468 	if (rc) {
1469 		dev_err(hdev->dev, "Failed to init collective\n");
1470 		goto disable_pci_access;
1471 	}
1472 
1473 	return 0;
1474 
1475 disable_pci_access:
1476 	hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
1477 
1478 	return rc;
1479 }
1480 
gaudi_late_fini(struct hl_device * hdev)1481 static void gaudi_late_fini(struct hl_device *hdev)
1482 {
1483 	const struct hwmon_channel_info **channel_info_arr;
1484 	int i = 0;
1485 
1486 	if (!hdev->hl_chip_info->info)
1487 		return;
1488 
1489 	channel_info_arr = hdev->hl_chip_info->info;
1490 
1491 	while (channel_info_arr[i]) {
1492 		kfree(channel_info_arr[i]->config);
1493 		kfree(channel_info_arr[i]);
1494 		i++;
1495 	}
1496 
1497 	kfree(channel_info_arr);
1498 
1499 	hdev->hl_chip_info->info = NULL;
1500 }
1501 
gaudi_alloc_cpu_accessible_dma_mem(struct hl_device * hdev)1502 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1503 {
1504 	dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1505 	void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1506 	int i, j, rc = 0;
1507 
1508 	/*
1509 	 * The device CPU works with 40-bits addresses, while bit 39 must be set
1510 	 * to '1' when accessing the host.
1511 	 * Bits 49:39 of the full host address are saved for a later
1512 	 * configuration of the HW to perform extension to 50 bits.
1513 	 * Because there is a single HW register that holds the extension bits,
1514 	 * these bits must be identical in all allocated range.
1515 	 */
1516 
1517 	for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1518 		virt_addr_arr[i] =
1519 			hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
1520 						HL_CPU_ACCESSIBLE_MEM_SIZE,
1521 						&dma_addr_arr[i],
1522 						GFP_KERNEL | __GFP_ZERO);
1523 		if (!virt_addr_arr[i]) {
1524 			rc = -ENOMEM;
1525 			goto free_dma_mem_arr;
1526 		}
1527 
1528 		end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1529 		if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1530 				GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1531 			break;
1532 	}
1533 
1534 	if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1535 		dev_err(hdev->dev,
1536 			"MSB of CPU accessible DMA memory are not identical in all range\n");
1537 		rc = -EFAULT;
1538 		goto free_dma_mem_arr;
1539 	}
1540 
1541 	hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1542 	hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1543 	hdev->cpu_pci_msb_addr =
1544 		GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1545 
1546 	if (hdev->asic_prop.fw_security_disabled)
1547 		GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
1548 
1549 free_dma_mem_arr:
1550 	for (j = 0 ; j < i ; j++)
1551 		hdev->asic_funcs->asic_dma_free_coherent(hdev,
1552 						HL_CPU_ACCESSIBLE_MEM_SIZE,
1553 						virt_addr_arr[j],
1554 						dma_addr_arr[j]);
1555 
1556 	return rc;
1557 }
1558 
gaudi_free_internal_qmans_pq_mem(struct hl_device * hdev)1559 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1560 {
1561 	struct gaudi_device *gaudi = hdev->asic_specific;
1562 	struct gaudi_internal_qman_info *q;
1563 	u32 i;
1564 
1565 	for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1566 		q = &gaudi->internal_qmans[i];
1567 		if (!q->pq_kernel_addr)
1568 			continue;
1569 		hdev->asic_funcs->asic_dma_free_coherent(hdev, q->pq_size,
1570 							q->pq_kernel_addr,
1571 							q->pq_dma_addr);
1572 	}
1573 }
1574 
gaudi_alloc_internal_qmans_pq_mem(struct hl_device * hdev)1575 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1576 {
1577 	struct gaudi_device *gaudi = hdev->asic_specific;
1578 	struct gaudi_internal_qman_info *q;
1579 	int rc, i;
1580 
1581 	for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1582 		if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1583 			continue;
1584 
1585 		q = &gaudi->internal_qmans[i];
1586 
1587 		switch (i) {
1588 		case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
1589 			q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1590 			break;
1591 		case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1592 			q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1593 			break;
1594 		case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1595 			q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1596 			break;
1597 		case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1598 			q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1599 			break;
1600 		default:
1601 			dev_err(hdev->dev, "Bad internal queue index %d", i);
1602 			rc = -EINVAL;
1603 			goto free_internal_qmans_pq_mem;
1604 		}
1605 
1606 		q->pq_kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
1607 						hdev, q->pq_size,
1608 						&q->pq_dma_addr,
1609 						GFP_KERNEL | __GFP_ZERO);
1610 		if (!q->pq_kernel_addr) {
1611 			rc = -ENOMEM;
1612 			goto free_internal_qmans_pq_mem;
1613 		}
1614 	}
1615 
1616 	return 0;
1617 
1618 free_internal_qmans_pq_mem:
1619 	gaudi_free_internal_qmans_pq_mem(hdev);
1620 	return rc;
1621 }
1622 
gaudi_sw_init(struct hl_device * hdev)1623 static int gaudi_sw_init(struct hl_device *hdev)
1624 {
1625 	struct gaudi_device *gaudi;
1626 	u32 i, event_id = 0;
1627 	int rc;
1628 
1629 	/* Allocate device structure */
1630 	gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1631 	if (!gaudi)
1632 		return -ENOMEM;
1633 
1634 	for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1635 		if (gaudi_irq_map_table[i].valid) {
1636 			if (event_id == GAUDI_EVENT_SIZE) {
1637 				dev_err(hdev->dev,
1638 					"Event array exceeds the limit of %u events\n",
1639 					GAUDI_EVENT_SIZE);
1640 				rc = -EINVAL;
1641 				goto free_gaudi_device;
1642 			}
1643 
1644 			gaudi->events[event_id++] =
1645 					gaudi_irq_map_table[i].fc_id;
1646 		}
1647 	}
1648 
1649 	gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1650 
1651 	gaudi->max_freq_value = GAUDI_MAX_CLK_FREQ;
1652 
1653 	hdev->asic_specific = gaudi;
1654 
1655 	/* store legacy PLL map */
1656 	hdev->legacy_pll_map = gaudi_pll_map;
1657 
1658 	/* Create DMA pool for small allocations */
1659 	hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1660 			&hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1661 	if (!hdev->dma_pool) {
1662 		dev_err(hdev->dev, "failed to create DMA pool\n");
1663 		rc = -ENOMEM;
1664 		goto free_gaudi_device;
1665 	}
1666 
1667 	rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1668 	if (rc)
1669 		goto free_dma_pool;
1670 
1671 	hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1672 	if (!hdev->cpu_accessible_dma_pool) {
1673 		dev_err(hdev->dev,
1674 			"Failed to create CPU accessible DMA pool\n");
1675 		rc = -ENOMEM;
1676 		goto free_cpu_dma_mem;
1677 	}
1678 
1679 	rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1680 				(uintptr_t) hdev->cpu_accessible_dma_mem,
1681 				HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1682 	if (rc) {
1683 		dev_err(hdev->dev,
1684 			"Failed to add memory to CPU accessible DMA pool\n");
1685 		rc = -EFAULT;
1686 		goto free_cpu_accessible_dma_pool;
1687 	}
1688 
1689 	rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1690 	if (rc)
1691 		goto free_cpu_accessible_dma_pool;
1692 
1693 	spin_lock_init(&gaudi->hw_queues_lock);
1694 	mutex_init(&gaudi->clk_gate_mutex);
1695 
1696 	hdev->supports_sync_stream = true;
1697 	hdev->supports_coresight = true;
1698 	hdev->supports_staged_submission = true;
1699 
1700 	return 0;
1701 
1702 free_cpu_accessible_dma_pool:
1703 	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1704 free_cpu_dma_mem:
1705 	if (hdev->asic_prop.fw_security_disabled)
1706 		GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1707 					hdev->cpu_pci_msb_addr);
1708 	hdev->asic_funcs->asic_dma_free_coherent(hdev,
1709 			HL_CPU_ACCESSIBLE_MEM_SIZE,
1710 			hdev->cpu_accessible_dma_mem,
1711 			hdev->cpu_accessible_dma_address);
1712 free_dma_pool:
1713 	dma_pool_destroy(hdev->dma_pool);
1714 free_gaudi_device:
1715 	kfree(gaudi);
1716 	return rc;
1717 }
1718 
gaudi_sw_fini(struct hl_device * hdev)1719 static int gaudi_sw_fini(struct hl_device *hdev)
1720 {
1721 	struct gaudi_device *gaudi = hdev->asic_specific;
1722 
1723 	gaudi_free_internal_qmans_pq_mem(hdev);
1724 
1725 	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1726 
1727 	if (hdev->asic_prop.fw_security_disabled)
1728 		GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1729 					hdev->cpu_pci_msb_addr);
1730 
1731 	hdev->asic_funcs->asic_dma_free_coherent(hdev,
1732 			HL_CPU_ACCESSIBLE_MEM_SIZE,
1733 			hdev->cpu_accessible_dma_mem,
1734 			hdev->cpu_accessible_dma_address);
1735 
1736 	dma_pool_destroy(hdev->dma_pool);
1737 
1738 	mutex_destroy(&gaudi->clk_gate_mutex);
1739 
1740 	kfree(gaudi);
1741 
1742 	return 0;
1743 }
1744 
gaudi_irq_handler_single(int irq,void * arg)1745 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1746 {
1747 	struct hl_device *hdev = arg;
1748 	int i;
1749 
1750 	if (hdev->disabled)
1751 		return IRQ_HANDLED;
1752 
1753 	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1754 		hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1755 
1756 	hl_irq_handler_eq(irq, &hdev->event_queue);
1757 
1758 	return IRQ_HANDLED;
1759 }
1760 
1761 /*
1762  * For backward compatibility, new MSI interrupts should be set after the
1763  * existing CPU and NIC interrupts.
1764  */
gaudi_pci_irq_vector(struct hl_device * hdev,unsigned int nr,bool cpu_eq)1765 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1766 				bool cpu_eq)
1767 {
1768 	int msi_vec;
1769 
1770 	if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1771 		dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1772 				GAUDI_EVENT_QUEUE_MSI_IDX);
1773 
1774 	msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1775 			(nr + NIC_NUMBER_OF_ENGINES + 1);
1776 
1777 	return pci_irq_vector(hdev->pdev, msi_vec);
1778 }
1779 
gaudi_enable_msi_single(struct hl_device * hdev)1780 static int gaudi_enable_msi_single(struct hl_device *hdev)
1781 {
1782 	int rc, irq;
1783 
1784 	dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
1785 
1786 	irq = gaudi_pci_irq_vector(hdev, 0, false);
1787 	rc = request_irq(irq, gaudi_irq_handler_single, 0,
1788 			"gaudi single msi", hdev);
1789 	if (rc)
1790 		dev_err(hdev->dev,
1791 			"Failed to request single MSI IRQ\n");
1792 
1793 	return rc;
1794 }
1795 
gaudi_enable_msi_multi(struct hl_device * hdev)1796 static int gaudi_enable_msi_multi(struct hl_device *hdev)
1797 {
1798 	int cq_cnt = hdev->asic_prop.completion_queues_count;
1799 	int rc, i, irq_cnt_init, irq;
1800 
1801 	for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
1802 		irq = gaudi_pci_irq_vector(hdev, i, false);
1803 		rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
1804 				&hdev->completion_queue[i]);
1805 		if (rc) {
1806 			dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1807 			goto free_irqs;
1808 		}
1809 	}
1810 
1811 	irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
1812 	rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
1813 				&hdev->event_queue);
1814 	if (rc) {
1815 		dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1816 		goto free_irqs;
1817 	}
1818 
1819 	return 0;
1820 
1821 free_irqs:
1822 	for (i = 0 ; i < irq_cnt_init ; i++)
1823 		free_irq(gaudi_pci_irq_vector(hdev, i, false),
1824 				&hdev->completion_queue[i]);
1825 	return rc;
1826 }
1827 
gaudi_enable_msi(struct hl_device * hdev)1828 static int gaudi_enable_msi(struct hl_device *hdev)
1829 {
1830 	struct gaudi_device *gaudi = hdev->asic_specific;
1831 	int rc;
1832 
1833 	if (gaudi->hw_cap_initialized & HW_CAP_MSI)
1834 		return 0;
1835 
1836 	rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI);
1837 	if (rc < 0) {
1838 		dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
1839 		return rc;
1840 	}
1841 
1842 	if (rc < NUMBER_OF_INTERRUPTS) {
1843 		gaudi->multi_msi_mode = false;
1844 		rc = gaudi_enable_msi_single(hdev);
1845 	} else {
1846 		gaudi->multi_msi_mode = true;
1847 		rc = gaudi_enable_msi_multi(hdev);
1848 	}
1849 
1850 	if (rc)
1851 		goto free_pci_irq_vectors;
1852 
1853 	gaudi->hw_cap_initialized |= HW_CAP_MSI;
1854 
1855 	return 0;
1856 
1857 free_pci_irq_vectors:
1858 	pci_free_irq_vectors(hdev->pdev);
1859 	return rc;
1860 }
1861 
gaudi_sync_irqs(struct hl_device * hdev)1862 static void gaudi_sync_irqs(struct hl_device *hdev)
1863 {
1864 	struct gaudi_device *gaudi = hdev->asic_specific;
1865 	int i, cq_cnt = hdev->asic_prop.completion_queues_count;
1866 
1867 	if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1868 		return;
1869 
1870 	/* Wait for all pending IRQs to be finished */
1871 	if (gaudi->multi_msi_mode) {
1872 		for (i = 0 ; i < cq_cnt ; i++)
1873 			synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
1874 
1875 		synchronize_irq(gaudi_pci_irq_vector(hdev,
1876 						GAUDI_EVENT_QUEUE_MSI_IDX,
1877 						true));
1878 	} else {
1879 		synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
1880 	}
1881 }
1882 
gaudi_disable_msi(struct hl_device * hdev)1883 static void gaudi_disable_msi(struct hl_device *hdev)
1884 {
1885 	struct gaudi_device *gaudi = hdev->asic_specific;
1886 	int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
1887 
1888 	if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1889 		return;
1890 
1891 	gaudi_sync_irqs(hdev);
1892 
1893 	if (gaudi->multi_msi_mode) {
1894 		irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
1895 						true);
1896 		free_irq(irq, &hdev->event_queue);
1897 
1898 		for (i = 0 ; i < cq_cnt ; i++) {
1899 			irq = gaudi_pci_irq_vector(hdev, i, false);
1900 			free_irq(irq, &hdev->completion_queue[i]);
1901 		}
1902 	} else {
1903 		free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
1904 	}
1905 
1906 	pci_free_irq_vectors(hdev->pdev);
1907 
1908 	gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
1909 }
1910 
gaudi_init_scrambler_sram(struct hl_device * hdev)1911 static void gaudi_init_scrambler_sram(struct hl_device *hdev)
1912 {
1913 	struct gaudi_device *gaudi = hdev->asic_specific;
1914 
1915 	if (!hdev->asic_prop.fw_security_disabled)
1916 		return;
1917 
1918 	if (hdev->asic_prop.fw_security_status_valid &&
1919 			(hdev->asic_prop.fw_app_security_map &
1920 					CPU_BOOT_DEV_STS0_SRAM_SCR_EN))
1921 		return;
1922 
1923 	if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
1924 		return;
1925 
1926 	if (!hdev->sram_scrambler_enable)
1927 		return;
1928 
1929 	WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1930 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1931 	WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1932 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1933 	WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1934 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1935 	WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1936 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1937 	WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1938 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1939 	WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1940 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1941 	WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1942 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1943 	WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1944 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1945 
1946 	WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1947 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1948 	WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1949 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1950 	WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1951 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1952 	WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1953 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1954 	WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1955 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1956 	WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1957 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1958 	WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1959 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1960 	WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1961 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1962 
1963 	WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
1964 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1965 	WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
1966 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1967 	WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
1968 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1969 	WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
1970 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1971 	WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
1972 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1973 	WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
1974 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1975 	WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
1976 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1977 	WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
1978 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1979 
1980 	gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
1981 }
1982 
gaudi_init_scrambler_hbm(struct hl_device * hdev)1983 static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
1984 {
1985 	struct gaudi_device *gaudi = hdev->asic_specific;
1986 
1987 	if (!hdev->asic_prop.fw_security_disabled)
1988 		return;
1989 
1990 	if (hdev->asic_prop.fw_security_status_valid &&
1991 			(hdev->asic_prop.fw_boot_cpu_security_map &
1992 					CPU_BOOT_DEV_STS0_DRAM_SCR_EN))
1993 		return;
1994 
1995 	if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
1996 		return;
1997 
1998 	if (!hdev->dram_scrambler_enable)
1999 		return;
2000 
2001 	WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
2002 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2003 	WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
2004 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2005 	WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
2006 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2007 	WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
2008 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2009 	WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
2010 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2011 	WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
2012 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2013 	WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
2014 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2015 	WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
2016 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2017 
2018 	WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
2019 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2020 	WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
2021 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2022 	WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
2023 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2024 	WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
2025 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2026 	WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
2027 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2028 	WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
2029 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2030 	WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
2031 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2032 	WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
2033 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2034 
2035 	WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
2036 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2037 	WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
2038 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2039 	WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
2040 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2041 	WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
2042 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2043 	WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
2044 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2045 	WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
2046 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2047 	WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
2048 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2049 	WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
2050 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2051 
2052 	gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
2053 }
2054 
gaudi_init_e2e(struct hl_device * hdev)2055 static void gaudi_init_e2e(struct hl_device *hdev)
2056 {
2057 	if (!hdev->asic_prop.fw_security_disabled)
2058 		return;
2059 
2060 	if (hdev->asic_prop.fw_security_status_valid &&
2061 			(hdev->asic_prop.fw_boot_cpu_security_map &
2062 					CPU_BOOT_DEV_STS0_E2E_CRED_EN))
2063 		return;
2064 
2065 	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
2066 	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
2067 	WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
2068 	WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
2069 
2070 	WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2071 	WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2072 	WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2073 	WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2074 
2075 	WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2076 	WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2077 	WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2078 	WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2079 
2080 	WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2081 	WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2082 	WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2083 	WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2084 
2085 	WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2086 	WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2087 	WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2088 	WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2089 
2090 	WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2091 	WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2092 	WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2093 	WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2094 
2095 	WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2096 	WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2097 	WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2098 	WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2099 
2100 	WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2101 	WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2102 	WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2103 	WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2104 
2105 	WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2106 	WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2107 	WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2108 	WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2109 
2110 	WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2111 	WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2112 	WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2113 	WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2114 
2115 	WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2116 	WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2117 	WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2118 	WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2119 
2120 	WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2121 	WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2122 	WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2123 	WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2124 
2125 	WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2126 	WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2127 	WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2128 	WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2129 
2130 	WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2131 	WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2132 	WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2133 	WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2134 
2135 	WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2136 	WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2137 	WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2138 	WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2139 
2140 	WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2141 	WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2142 	WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2143 	WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2144 
2145 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2146 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2147 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2148 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2149 
2150 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2151 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2152 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2153 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2154 
2155 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2156 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2157 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2158 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2159 
2160 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2161 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2162 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2163 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2164 
2165 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2166 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2167 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2168 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2169 
2170 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2171 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2172 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2173 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2174 
2175 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2176 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2177 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2178 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2179 
2180 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2181 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2182 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2183 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2184 
2185 	if (!hdev->dram_scrambler_enable) {
2186 		WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2187 		WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2188 		WREG32(mmSIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2189 		WREG32(mmSIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2190 
2191 		WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2192 		WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2193 		WREG32(mmSIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2194 		WREG32(mmSIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2195 
2196 		WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2197 		WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2198 		WREG32(mmSIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2199 		WREG32(mmSIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2200 
2201 		WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2202 		WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2203 		WREG32(mmSIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2204 		WREG32(mmSIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2205 
2206 		WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2207 		WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2208 		WREG32(mmSIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2209 		WREG32(mmSIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2210 
2211 		WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2212 		WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2213 		WREG32(mmSIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2214 		WREG32(mmSIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2215 
2216 		WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2217 		WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2218 		WREG32(mmSIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2219 		WREG32(mmSIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2220 
2221 		WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2222 		WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2223 		WREG32(mmSIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2224 		WREG32(mmSIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2225 
2226 		WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2227 		WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2228 		WREG32(mmNIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2229 		WREG32(mmNIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2230 
2231 		WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2232 		WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2233 		WREG32(mmNIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2234 		WREG32(mmNIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2235 
2236 		WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2237 		WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2238 		WREG32(mmNIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2239 		WREG32(mmNIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2240 
2241 		WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2242 		WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2243 		WREG32(mmNIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2244 		WREG32(mmNIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2245 
2246 		WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2247 		WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2248 		WREG32(mmNIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2249 		WREG32(mmNIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2250 
2251 		WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2252 		WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2253 		WREG32(mmNIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2254 		WREG32(mmNIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2255 
2256 		WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2257 		WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2258 		WREG32(mmNIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2259 		WREG32(mmNIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2260 
2261 		WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2262 		WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2263 		WREG32(mmNIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2264 		WREG32(mmNIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2265 
2266 		WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2267 		WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2268 		WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2269 		WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2270 
2271 		WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2272 		WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2273 		WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2274 		WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2275 
2276 		WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2277 		WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2278 		WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2279 		WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2280 
2281 		WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2282 		WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2283 		WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2284 		WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2285 
2286 		WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2287 		WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2288 		WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2289 		WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2290 
2291 		WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2292 		WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2293 		WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2294 		WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2295 
2296 		WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2297 		WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2298 		WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2299 		WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2300 
2301 		WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2302 		WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2303 		WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2304 		WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2305 	}
2306 
2307 	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2308 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2309 	WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2310 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2311 
2312 	WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2313 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2314 	WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2315 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2316 
2317 	WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2318 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2319 	WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2320 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2321 
2322 	WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2323 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2324 	WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2325 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2326 
2327 	WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2328 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2329 	WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2330 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2331 
2332 	WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2333 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2334 	WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2335 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2336 
2337 	WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2338 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2339 	WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2340 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2341 
2342 	WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2343 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2344 	WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2345 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2346 
2347 	WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2348 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2349 	WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2350 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2351 
2352 	WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2353 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2354 	WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2355 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2356 
2357 	WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2358 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2359 	WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2360 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2361 
2362 	WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2363 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2364 	WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2365 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2366 
2367 	WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2368 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2369 	WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2370 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2371 
2372 	WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2373 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2374 	WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2375 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2376 
2377 	WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2378 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2379 	WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2380 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2381 
2382 	WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2383 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2384 	WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2385 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2386 
2387 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2388 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2389 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2390 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2391 
2392 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2393 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2394 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2395 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2396 
2397 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2398 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2399 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2400 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2401 
2402 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2403 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2404 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2405 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2406 
2407 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2408 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2409 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2410 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2411 
2412 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2413 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2414 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2415 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2416 
2417 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2418 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2419 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2420 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2421 
2422 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2423 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2424 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2425 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2426 }
2427 
gaudi_init_hbm_cred(struct hl_device * hdev)2428 static void gaudi_init_hbm_cred(struct hl_device *hdev)
2429 {
2430 	uint32_t hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2431 
2432 	if (!hdev->asic_prop.fw_security_disabled)
2433 		return;
2434 
2435 	if (hdev->asic_prop.fw_security_status_valid &&
2436 			(hdev->asic_prop.fw_boot_cpu_security_map &
2437 					CPU_BOOT_DEV_STS0_HBM_CRED_EN))
2438 		return;
2439 
2440 	hbm0_wr = 0x33333333;
2441 	hbm0_rd = 0x77777777;
2442 	hbm1_wr = 0x55555555;
2443 	hbm1_rd = 0xDDDDDDDD;
2444 
2445 	WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2446 	WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2447 	WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2448 	WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2449 
2450 	WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2451 	WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2452 	WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2453 	WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2454 
2455 	WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2456 	WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2457 	WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2458 	WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2459 
2460 	WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2461 	WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2462 	WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2463 	WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2464 
2465 	WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2466 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2467 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2468 	WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2469 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2470 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2471 	WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2472 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2473 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2474 	WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2475 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2476 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2477 
2478 	WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2479 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2480 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2481 	WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2482 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2483 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2484 	WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2485 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2486 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2487 	WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2488 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2489 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2490 }
2491 
gaudi_init_golden_registers(struct hl_device * hdev)2492 static void gaudi_init_golden_registers(struct hl_device *hdev)
2493 {
2494 	u32 tpc_offset;
2495 	int tpc_id, i;
2496 
2497 	gaudi_init_e2e(hdev);
2498 	gaudi_init_hbm_cred(hdev);
2499 
2500 	for (tpc_id = 0, tpc_offset = 0;
2501 				tpc_id < TPC_NUMBER_OF_ENGINES;
2502 				tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2503 		/* Mask all arithmetic interrupts from TPC */
2504 		WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFF);
2505 		/* Set 16 cache lines */
2506 		WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2507 				ICACHE_FETCH_LINE_NUM, 2);
2508 	}
2509 
2510 	/* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2511 	for (i = 0 ; i < 128 ; i += 8)
2512 		writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2513 
2514 	WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2515 	WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2516 	WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2517 	WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2518 }
2519 
gaudi_init_pci_dma_qman(struct hl_device * hdev,int dma_id,int qman_id,dma_addr_t qman_pq_addr)2520 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2521 					int qman_id, dma_addr_t qman_pq_addr)
2522 {
2523 	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2524 	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2525 	u32 q_off, dma_qm_offset;
2526 	u32 dma_qm_err_cfg;
2527 
2528 	dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2529 
2530 	mtr_base_en_lo = lower_32_bits(CFG_BASE +
2531 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2532 	mtr_base_en_hi = upper_32_bits(CFG_BASE +
2533 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2534 	so_base_en_lo = lower_32_bits(CFG_BASE +
2535 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2536 	so_base_en_hi = upper_32_bits(CFG_BASE +
2537 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2538 	mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2539 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2540 	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2541 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2542 	so_base_ws_lo = lower_32_bits(CFG_BASE +
2543 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2544 	so_base_ws_hi = upper_32_bits(CFG_BASE +
2545 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2546 
2547 	q_off = dma_qm_offset + qman_id * 4;
2548 
2549 	WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2550 	WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2551 
2552 	WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2553 	WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2554 	WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2555 
2556 	WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2557 	WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2558 							QMAN_LDMA_SRC_OFFSET);
2559 	WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2560 							QMAN_LDMA_DST_OFFSET);
2561 
2562 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2563 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2564 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2565 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2566 	WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2567 	WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2568 	WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2569 	WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2570 
2571 	WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2572 
2573 	/* The following configuration is needed only once per QMAN */
2574 	if (qman_id == 0) {
2575 		/* Configure RAZWI IRQ */
2576 		dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2577 		if (hdev->stop_on_err) {
2578 			dma_qm_err_cfg |=
2579 				PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2580 		}
2581 
2582 		WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2583 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2584 			lower_32_bits(CFG_BASE +
2585 					mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2586 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2587 			upper_32_bits(CFG_BASE +
2588 					mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2589 		WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2590 			gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2591 									dma_id);
2592 
2593 		WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2594 				QM_ARB_ERR_MSG_EN_MASK);
2595 
2596 		/* Increase ARB WDT to support streams architecture */
2597 		WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2598 				GAUDI_ARB_WDT_TIMEOUT);
2599 
2600 		WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2601 				QMAN_EXTERNAL_MAKE_TRUSTED);
2602 
2603 		WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2604 	}
2605 }
2606 
gaudi_init_dma_core(struct hl_device * hdev,int dma_id)2607 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2608 {
2609 	u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2610 	u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2611 
2612 	/* Set to maximum possible according to physical size */
2613 	WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2614 	WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2615 
2616 	/* WA for H/W bug H3-2116 */
2617 	WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2618 
2619 	/* STOP_ON bit implies no completion to operation in case of RAZWI */
2620 	if (hdev->stop_on_err)
2621 		dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2622 
2623 	WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2624 	WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2625 		lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2626 	WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2627 		upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2628 	WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2629 		gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2630 	WREG32(mmDMA0_CORE_PROT + dma_offset,
2631 			1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2632 	/* If the channel is secured, it should be in MMU bypass mode */
2633 	WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2634 			1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2635 	WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2636 }
2637 
gaudi_enable_qman(struct hl_device * hdev,int dma_id,u32 enable_mask)2638 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2639 				u32 enable_mask)
2640 {
2641 	u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2642 
2643 	WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2644 }
2645 
gaudi_init_pci_dma_qmans(struct hl_device * hdev)2646 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2647 {
2648 	struct gaudi_device *gaudi = hdev->asic_specific;
2649 	struct hl_hw_queue *q;
2650 	int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2651 
2652 	if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2653 		return;
2654 
2655 	for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2656 		dma_id = gaudi_dma_assignment[i];
2657 		/*
2658 		 * For queues after the CPU Q need to add 1 to get the correct
2659 		 * queue. In addition, need to add the CPU EQ and NIC IRQs in
2660 		 * order to get the correct MSI register.
2661 		 */
2662 		if (dma_id > 1) {
2663 			cpu_skip = 1;
2664 			nic_skip = NIC_NUMBER_OF_ENGINES;
2665 		} else {
2666 			cpu_skip = 0;
2667 			nic_skip = 0;
2668 		}
2669 
2670 		for (j = 0 ; j < QMAN_STREAMS ; j++) {
2671 			q_idx = 4 * dma_id + j + cpu_skip;
2672 			q = &hdev->kernel_queues[q_idx];
2673 			q->cq_id = cq_id++;
2674 			q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2675 			gaudi_init_pci_dma_qman(hdev, dma_id, j,
2676 						q->bus_address);
2677 		}
2678 
2679 		gaudi_init_dma_core(hdev, dma_id);
2680 
2681 		gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2682 	}
2683 
2684 	gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2685 }
2686 
gaudi_init_hbm_dma_qman(struct hl_device * hdev,int dma_id,int qman_id,u64 qman_base_addr)2687 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2688 					int qman_id, u64 qman_base_addr)
2689 {
2690 	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2691 	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2692 	u32 q_off, dma_qm_offset;
2693 	u32 dma_qm_err_cfg;
2694 
2695 	dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2696 
2697 	mtr_base_en_lo = lower_32_bits(CFG_BASE +
2698 			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2699 	mtr_base_en_hi = upper_32_bits(CFG_BASE +
2700 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2701 	so_base_en_lo = lower_32_bits(CFG_BASE +
2702 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2703 	so_base_en_hi = upper_32_bits(CFG_BASE +
2704 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2705 	mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2706 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2707 	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2708 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2709 	so_base_ws_lo = lower_32_bits(CFG_BASE +
2710 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2711 	so_base_ws_hi = upper_32_bits(CFG_BASE +
2712 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2713 
2714 	q_off = dma_qm_offset + qman_id * 4;
2715 
2716 	if (qman_id < 4) {
2717 		WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2718 					lower_32_bits(qman_base_addr));
2719 		WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2720 					upper_32_bits(qman_base_addr));
2721 
2722 		WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2723 		WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2724 		WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2725 
2726 		WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2727 							QMAN_CPDMA_SIZE_OFFSET);
2728 		WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2729 							QMAN_CPDMA_SRC_OFFSET);
2730 		WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2731 							QMAN_CPDMA_DST_OFFSET);
2732 	} else {
2733 		WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2734 							QMAN_LDMA_SIZE_OFFSET);
2735 		WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2736 							QMAN_LDMA_SRC_OFFSET);
2737 		WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2738 							QMAN_LDMA_DST_OFFSET);
2739 
2740 		/* Configure RAZWI IRQ */
2741 		dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2742 		if (hdev->stop_on_err) {
2743 			dma_qm_err_cfg |=
2744 				HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2745 		}
2746 		WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2747 
2748 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2749 			lower_32_bits(CFG_BASE +
2750 					mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2751 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2752 			upper_32_bits(CFG_BASE +
2753 					mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2754 		WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2755 			gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2756 									dma_id);
2757 
2758 		WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2759 				QM_ARB_ERR_MSG_EN_MASK);
2760 
2761 		/* Increase ARB WDT to support streams architecture */
2762 		WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2763 				GAUDI_ARB_WDT_TIMEOUT);
2764 
2765 		WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2766 		WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2767 				QMAN_INTERNAL_MAKE_TRUSTED);
2768 	}
2769 
2770 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2771 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2772 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2773 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2774 
2775 	/* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
2776 	if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
2777 		WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2778 				mtr_base_ws_lo);
2779 		WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2780 				mtr_base_ws_hi);
2781 		WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2782 				so_base_ws_lo);
2783 		WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2784 				so_base_ws_hi);
2785 	}
2786 }
2787 
gaudi_init_hbm_dma_qmans(struct hl_device * hdev)2788 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2789 {
2790 	struct gaudi_device *gaudi = hdev->asic_specific;
2791 	struct gaudi_internal_qman_info *q;
2792 	u64 qman_base_addr;
2793 	int i, j, dma_id, internal_q_index;
2794 
2795 	if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2796 		return;
2797 
2798 	for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2799 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2800 
2801 		for (j = 0 ; j < QMAN_STREAMS ; j++) {
2802 			 /*
2803 			  * Add the CPU queue in order to get the correct queue
2804 			  * number as all internal queue are placed after it
2805 			  */
2806 			internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2807 
2808 			q = &gaudi->internal_qmans[internal_q_index];
2809 			qman_base_addr = (u64) q->pq_dma_addr;
2810 			gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2811 						qman_base_addr);
2812 		}
2813 
2814 		/* Initializing lower CP for HBM DMA QMAN */
2815 		gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2816 
2817 		gaudi_init_dma_core(hdev, dma_id);
2818 
2819 		gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2820 	}
2821 
2822 	gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2823 }
2824 
gaudi_init_mme_qman(struct hl_device * hdev,u32 mme_offset,int qman_id,u64 qman_base_addr)2825 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2826 					int qman_id, u64 qman_base_addr)
2827 {
2828 	u32 mtr_base_lo, mtr_base_hi;
2829 	u32 so_base_lo, so_base_hi;
2830 	u32 q_off, mme_id;
2831 	u32 mme_qm_err_cfg;
2832 
2833 	mtr_base_lo = lower_32_bits(CFG_BASE +
2834 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2835 	mtr_base_hi = upper_32_bits(CFG_BASE +
2836 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2837 	so_base_lo = lower_32_bits(CFG_BASE +
2838 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2839 	so_base_hi = upper_32_bits(CFG_BASE +
2840 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2841 
2842 	q_off = mme_offset + qman_id * 4;
2843 
2844 	if (qman_id < 4) {
2845 		WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2846 					lower_32_bits(qman_base_addr));
2847 		WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2848 					upper_32_bits(qman_base_addr));
2849 
2850 		WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2851 		WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2852 		WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2853 
2854 		WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2855 							QMAN_CPDMA_SIZE_OFFSET);
2856 		WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2857 							QMAN_CPDMA_SRC_OFFSET);
2858 		WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2859 							QMAN_CPDMA_DST_OFFSET);
2860 	} else {
2861 		WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2862 							QMAN_LDMA_SIZE_OFFSET);
2863 		WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2864 							QMAN_LDMA_SRC_OFFSET);
2865 		WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2866 							QMAN_LDMA_DST_OFFSET);
2867 
2868 		/* Configure RAZWI IRQ */
2869 		mme_id = mme_offset /
2870 				(mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0);
2871 
2872 		mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2873 		if (hdev->stop_on_err) {
2874 			mme_qm_err_cfg |=
2875 				MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2876 		}
2877 		WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
2878 		WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
2879 			lower_32_bits(CFG_BASE +
2880 					mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2881 		WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
2882 			upper_32_bits(CFG_BASE +
2883 					mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2884 		WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
2885 			gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
2886 									mme_id);
2887 
2888 		WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
2889 				QM_ARB_ERR_MSG_EN_MASK);
2890 
2891 		/* Increase ARB WDT to support streams architecture */
2892 		WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset,
2893 				GAUDI_ARB_WDT_TIMEOUT);
2894 
2895 		WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
2896 		WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
2897 				QMAN_INTERNAL_MAKE_TRUSTED);
2898 	}
2899 
2900 	WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2901 	WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2902 	WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2903 	WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2904 }
2905 
gaudi_init_mme_qmans(struct hl_device * hdev)2906 static void gaudi_init_mme_qmans(struct hl_device *hdev)
2907 {
2908 	struct gaudi_device *gaudi = hdev->asic_specific;
2909 	struct gaudi_internal_qman_info *q;
2910 	u64 qman_base_addr;
2911 	u32 mme_offset;
2912 	int i, internal_q_index;
2913 
2914 	if (gaudi->hw_cap_initialized & HW_CAP_MME)
2915 		return;
2916 
2917 	/*
2918 	 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
2919 	 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
2920 	 */
2921 
2922 	mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2923 
2924 	for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
2925 		internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
2926 		q = &gaudi->internal_qmans[internal_q_index];
2927 		qman_base_addr = (u64) q->pq_dma_addr;
2928 		gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
2929 					qman_base_addr);
2930 		if (i == 3)
2931 			mme_offset = 0;
2932 	}
2933 
2934 	/* Initializing lower CP for MME QMANs */
2935 	mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2936 	gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
2937 	gaudi_init_mme_qman(hdev, 0, 4, 0);
2938 
2939 	WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2940 	WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2941 
2942 	gaudi->hw_cap_initialized |= HW_CAP_MME;
2943 }
2944 
gaudi_init_tpc_qman(struct hl_device * hdev,u32 tpc_offset,int qman_id,u64 qman_base_addr)2945 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
2946 				int qman_id, u64 qman_base_addr)
2947 {
2948 	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2949 	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2950 	u32 q_off, tpc_id;
2951 	u32 tpc_qm_err_cfg;
2952 
2953 	mtr_base_en_lo = lower_32_bits(CFG_BASE +
2954 			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2955 	mtr_base_en_hi = upper_32_bits(CFG_BASE +
2956 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2957 	so_base_en_lo = lower_32_bits(CFG_BASE +
2958 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2959 	so_base_en_hi = upper_32_bits(CFG_BASE +
2960 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2961 	mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2962 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2963 	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2964 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2965 	so_base_ws_lo = lower_32_bits(CFG_BASE +
2966 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2967 	so_base_ws_hi = upper_32_bits(CFG_BASE +
2968 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2969 
2970 	q_off = tpc_offset + qman_id * 4;
2971 
2972 	tpc_id = tpc_offset /
2973 			(mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
2974 
2975 	if (qman_id < 4) {
2976 		WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
2977 					lower_32_bits(qman_base_addr));
2978 		WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
2979 					upper_32_bits(qman_base_addr));
2980 
2981 		WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
2982 		WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
2983 		WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
2984 
2985 		WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2986 							QMAN_CPDMA_SIZE_OFFSET);
2987 		WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2988 							QMAN_CPDMA_SRC_OFFSET);
2989 		WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2990 							QMAN_CPDMA_DST_OFFSET);
2991 	} else {
2992 		WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2993 							QMAN_LDMA_SIZE_OFFSET);
2994 		WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2995 							QMAN_LDMA_SRC_OFFSET);
2996 		WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2997 							QMAN_LDMA_DST_OFFSET);
2998 
2999 		/* Configure RAZWI IRQ */
3000 		tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3001 		if (hdev->stop_on_err) {
3002 			tpc_qm_err_cfg |=
3003 				TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3004 		}
3005 
3006 		WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
3007 		WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
3008 			lower_32_bits(CFG_BASE +
3009 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
3010 		WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
3011 			upper_32_bits(CFG_BASE +
3012 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
3013 		WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
3014 			gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
3015 									tpc_id);
3016 
3017 		WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
3018 				QM_ARB_ERR_MSG_EN_MASK);
3019 
3020 		/* Increase ARB WDT to support streams architecture */
3021 		WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset,
3022 				GAUDI_ARB_WDT_TIMEOUT);
3023 
3024 		WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
3025 		WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
3026 				QMAN_INTERNAL_MAKE_TRUSTED);
3027 	}
3028 
3029 	WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3030 	WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3031 	WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3032 	WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3033 
3034 	/* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
3035 	if (tpc_id == 6) {
3036 		WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3037 				mtr_base_ws_lo);
3038 		WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3039 				mtr_base_ws_hi);
3040 		WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3041 				so_base_ws_lo);
3042 		WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3043 				so_base_ws_hi);
3044 	}
3045 }
3046 
gaudi_init_tpc_qmans(struct hl_device * hdev)3047 static void gaudi_init_tpc_qmans(struct hl_device *hdev)
3048 {
3049 	struct gaudi_device *gaudi = hdev->asic_specific;
3050 	struct gaudi_internal_qman_info *q;
3051 	u64 qman_base_addr;
3052 	u32 so_base_hi, tpc_offset = 0;
3053 	u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
3054 			mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
3055 	int i, tpc_id, internal_q_index;
3056 
3057 	if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
3058 		return;
3059 
3060 	so_base_hi = upper_32_bits(CFG_BASE +
3061 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3062 
3063 	for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3064 		for (i = 0 ; i < QMAN_STREAMS ; i++) {
3065 			internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
3066 						tpc_id * QMAN_STREAMS + i;
3067 			q = &gaudi->internal_qmans[internal_q_index];
3068 			qman_base_addr = (u64) q->pq_dma_addr;
3069 			gaudi_init_tpc_qman(hdev, tpc_offset, i,
3070 						qman_base_addr);
3071 
3072 			if (i == 3) {
3073 				/* Initializing lower CP for TPC QMAN */
3074 				gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
3075 
3076 				/* Enable the QMAN and TPC channel */
3077 				WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
3078 						QMAN_TPC_ENABLE);
3079 			}
3080 		}
3081 
3082 		WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
3083 				so_base_hi);
3084 
3085 		tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3086 
3087 		gaudi->hw_cap_initialized |=
3088 				FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
3089 	}
3090 }
3091 
gaudi_init_nic_qman(struct hl_device * hdev,u32 nic_offset,int qman_id,u64 qman_base_addr,int nic_id)3092 static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3093 				int qman_id, u64 qman_base_addr, int nic_id)
3094 {
3095 	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3096 	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3097 	u32 q_off;
3098 	u32 nic_qm_err_cfg;
3099 
3100 	mtr_base_en_lo = lower_32_bits(CFG_BASE +
3101 			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3102 	mtr_base_en_hi = upper_32_bits(CFG_BASE +
3103 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3104 	so_base_en_lo = lower_32_bits(CFG_BASE +
3105 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3106 	so_base_en_hi = upper_32_bits(CFG_BASE +
3107 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3108 	mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3109 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3110 	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3111 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3112 	so_base_ws_lo = lower_32_bits(CFG_BASE +
3113 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3114 	so_base_ws_hi = upper_32_bits(CFG_BASE +
3115 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3116 
3117 	q_off = nic_offset + qman_id * 4;
3118 
3119 	WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3120 	WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3121 
3122 	WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3123 	WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3124 	WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3125 
3126 	WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3127 							QMAN_LDMA_SIZE_OFFSET);
3128 	WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3129 							QMAN_LDMA_SRC_OFFSET);
3130 	WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3131 							QMAN_LDMA_DST_OFFSET);
3132 
3133 	WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3134 	WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3135 	WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3136 	WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3137 
3138 	/* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3139 	WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3140 	WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3141 	WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3142 	WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
3143 
3144 	if (qman_id == 0) {
3145 		/* Configure RAZWI IRQ */
3146 		nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3147 		if (hdev->stop_on_err) {
3148 			nic_qm_err_cfg |=
3149 				NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3150 		}
3151 
3152 		WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
3153 		WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
3154 			lower_32_bits(CFG_BASE +
3155 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
3156 		WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
3157 			upper_32_bits(CFG_BASE +
3158 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
3159 		WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3160 			gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3161 									nic_id);
3162 
3163 		WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3164 				QM_ARB_ERR_MSG_EN_MASK);
3165 
3166 		/* Increase ARB WDT to support streams architecture */
3167 		WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset,
3168 				GAUDI_ARB_WDT_TIMEOUT);
3169 
3170 		WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3171 		WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3172 				QMAN_INTERNAL_MAKE_TRUSTED);
3173 	}
3174 }
3175 
gaudi_init_nic_qmans(struct hl_device * hdev)3176 static void gaudi_init_nic_qmans(struct hl_device *hdev)
3177 {
3178 	struct gaudi_device *gaudi = hdev->asic_specific;
3179 	struct gaudi_internal_qman_info *q;
3180 	u64 qman_base_addr;
3181 	u32 nic_offset = 0;
3182 	u32 nic_delta_between_qmans =
3183 			mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3184 	u32 nic_delta_between_nics =
3185 			mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3186 	int i, nic_id, internal_q_index;
3187 
3188 	if (!hdev->nic_ports_mask)
3189 		return;
3190 
3191 	if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3192 		return;
3193 
3194 	dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3195 
3196 	for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3197 		if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3198 			nic_offset += nic_delta_between_qmans;
3199 			if (nic_id & 1) {
3200 				nic_offset -= (nic_delta_between_qmans * 2);
3201 				nic_offset += nic_delta_between_nics;
3202 			}
3203 			continue;
3204 		}
3205 
3206 		for (i = 0 ; i < QMAN_STREAMS ; i++) {
3207 			internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3208 						nic_id * QMAN_STREAMS + i;
3209 			q = &gaudi->internal_qmans[internal_q_index];
3210 			qman_base_addr = (u64) q->pq_dma_addr;
3211 			gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3212 						qman_base_addr, nic_id);
3213 		}
3214 
3215 		/* Enable the QMAN */
3216 		WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3217 
3218 		nic_offset += nic_delta_between_qmans;
3219 		if (nic_id & 1) {
3220 			nic_offset -= (nic_delta_between_qmans * 2);
3221 			nic_offset += nic_delta_between_nics;
3222 		}
3223 
3224 		gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3225 	}
3226 }
3227 
gaudi_disable_pci_dma_qmans(struct hl_device * hdev)3228 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3229 {
3230 	struct gaudi_device *gaudi = hdev->asic_specific;
3231 
3232 	if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3233 		return;
3234 
3235 	WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3236 	WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3237 	WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3238 }
3239 
gaudi_disable_hbm_dma_qmans(struct hl_device * hdev)3240 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3241 {
3242 	struct gaudi_device *gaudi = hdev->asic_specific;
3243 
3244 	if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3245 		return;
3246 
3247 	WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3248 	WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3249 	WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3250 	WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3251 	WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3252 }
3253 
gaudi_disable_mme_qmans(struct hl_device * hdev)3254 static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3255 {
3256 	struct gaudi_device *gaudi = hdev->asic_specific;
3257 
3258 	if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3259 		return;
3260 
3261 	WREG32(mmMME2_QM_GLBL_CFG0, 0);
3262 	WREG32(mmMME0_QM_GLBL_CFG0, 0);
3263 }
3264 
gaudi_disable_tpc_qmans(struct hl_device * hdev)3265 static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3266 {
3267 	struct gaudi_device *gaudi = hdev->asic_specific;
3268 	u32 tpc_offset = 0;
3269 	int tpc_id;
3270 
3271 	if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3272 		return;
3273 
3274 	for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3275 		WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3276 		tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3277 	}
3278 }
3279 
gaudi_disable_nic_qmans(struct hl_device * hdev)3280 static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3281 {
3282 	struct gaudi_device *gaudi = hdev->asic_specific;
3283 	u32 nic_mask, nic_offset = 0;
3284 	u32 nic_delta_between_qmans =
3285 			mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3286 	u32 nic_delta_between_nics =
3287 			mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3288 	int nic_id;
3289 
3290 	for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3291 		nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3292 
3293 		if (gaudi->hw_cap_initialized & nic_mask)
3294 			WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3295 
3296 		nic_offset += nic_delta_between_qmans;
3297 		if (nic_id & 1) {
3298 			nic_offset -= (nic_delta_between_qmans * 2);
3299 			nic_offset += nic_delta_between_nics;
3300 		}
3301 	}
3302 }
3303 
gaudi_stop_pci_dma_qmans(struct hl_device * hdev)3304 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3305 {
3306 	struct gaudi_device *gaudi = hdev->asic_specific;
3307 
3308 	if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3309 		return;
3310 
3311 	/* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3312 	WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3313 	WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3314 	WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3315 }
3316 
gaudi_stop_hbm_dma_qmans(struct hl_device * hdev)3317 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3318 {
3319 	struct gaudi_device *gaudi = hdev->asic_specific;
3320 
3321 	if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3322 		return;
3323 
3324 	/* Stop CPs of HBM DMA QMANs */
3325 
3326 	WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3327 	WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3328 	WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3329 	WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3330 	WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3331 }
3332 
gaudi_stop_mme_qmans(struct hl_device * hdev)3333 static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3334 {
3335 	struct gaudi_device *gaudi = hdev->asic_specific;
3336 
3337 	if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3338 		return;
3339 
3340 	/* Stop CPs of MME QMANs */
3341 	WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3342 	WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3343 }
3344 
gaudi_stop_tpc_qmans(struct hl_device * hdev)3345 static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3346 {
3347 	struct gaudi_device *gaudi = hdev->asic_specific;
3348 
3349 	if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3350 		return;
3351 
3352 	WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3353 	WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3354 	WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3355 	WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3356 	WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3357 	WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3358 	WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3359 	WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3360 }
3361 
gaudi_stop_nic_qmans(struct hl_device * hdev)3362 static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3363 {
3364 	struct gaudi_device *gaudi = hdev->asic_specific;
3365 
3366 	/* Stop upper CPs of QMANs */
3367 
3368 	if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3369 		WREG32(mmNIC0_QM0_GLBL_CFG1,
3370 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3371 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3372 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3373 
3374 	if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3375 		WREG32(mmNIC0_QM1_GLBL_CFG1,
3376 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3377 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3378 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3379 
3380 	if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3381 		WREG32(mmNIC1_QM0_GLBL_CFG1,
3382 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3383 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3384 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3385 
3386 	if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3387 		WREG32(mmNIC1_QM1_GLBL_CFG1,
3388 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3389 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3390 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3391 
3392 	if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3393 		WREG32(mmNIC2_QM0_GLBL_CFG1,
3394 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3395 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3396 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3397 
3398 	if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3399 		WREG32(mmNIC2_QM1_GLBL_CFG1,
3400 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3401 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3402 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3403 
3404 	if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3405 		WREG32(mmNIC3_QM0_GLBL_CFG1,
3406 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3407 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3408 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3409 
3410 	if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3411 		WREG32(mmNIC3_QM1_GLBL_CFG1,
3412 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3413 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3414 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3415 
3416 	if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3417 		WREG32(mmNIC4_QM0_GLBL_CFG1,
3418 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3419 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3420 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3421 
3422 	if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3423 		WREG32(mmNIC4_QM1_GLBL_CFG1,
3424 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3425 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3426 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3427 }
3428 
gaudi_pci_dma_stall(struct hl_device * hdev)3429 static void gaudi_pci_dma_stall(struct hl_device *hdev)
3430 {
3431 	struct gaudi_device *gaudi = hdev->asic_specific;
3432 
3433 	if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3434 		return;
3435 
3436 	WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3437 	WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3438 	WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3439 }
3440 
gaudi_hbm_dma_stall(struct hl_device * hdev)3441 static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3442 {
3443 	struct gaudi_device *gaudi = hdev->asic_specific;
3444 
3445 	if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3446 		return;
3447 
3448 	WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3449 	WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3450 	WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3451 	WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3452 	WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3453 }
3454 
gaudi_mme_stall(struct hl_device * hdev)3455 static void gaudi_mme_stall(struct hl_device *hdev)
3456 {
3457 	struct gaudi_device *gaudi = hdev->asic_specific;
3458 
3459 	if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3460 		return;
3461 
3462 	/* WA for H3-1800 bug: do ACC and SBAB writes twice */
3463 	WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3464 	WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3465 	WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3466 	WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3467 	WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3468 	WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3469 	WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3470 	WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3471 	WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3472 	WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3473 	WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3474 	WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3475 	WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3476 	WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3477 	WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3478 	WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3479 }
3480 
gaudi_tpc_stall(struct hl_device * hdev)3481 static void gaudi_tpc_stall(struct hl_device *hdev)
3482 {
3483 	struct gaudi_device *gaudi = hdev->asic_specific;
3484 
3485 	if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3486 		return;
3487 
3488 	WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3489 	WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3490 	WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3491 	WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3492 	WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3493 	WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3494 	WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3495 	WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3496 }
3497 
gaudi_set_clock_gating(struct hl_device * hdev)3498 static void gaudi_set_clock_gating(struct hl_device *hdev)
3499 {
3500 	struct gaudi_device *gaudi = hdev->asic_specific;
3501 	u32 qman_offset;
3502 	bool enable;
3503 	int i;
3504 
3505 	/* In case we are during debug session, don't enable the clock gate
3506 	 * as it may interfere
3507 	 */
3508 	if (hdev->in_debug)
3509 		return;
3510 
3511 	if (!hdev->asic_prop.fw_security_disabled)
3512 		return;
3513 
3514 	for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) {
3515 		enable = !!(hdev->clock_gating_mask &
3516 				(BIT_ULL(gaudi_dma_assignment[i])));
3517 
3518 		qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
3519 		WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3520 				enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3521 		WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
3522 				enable ? QMAN_UPPER_CP_CGM_PWR_GATE_EN : 0);
3523 	}
3524 
3525 	for (i = GAUDI_HBM_DMA_1 ; i < GAUDI_DMA_MAX ; i++) {
3526 		enable = !!(hdev->clock_gating_mask &
3527 				(BIT_ULL(gaudi_dma_assignment[i])));
3528 
3529 		/* GC sends work to DMA engine through Upper CP in DMA5 so
3530 		 * we need to not enable clock gating in that DMA
3531 		 */
3532 		if (i == GAUDI_HBM_DMA_4)
3533 			enable = 0;
3534 
3535 		qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
3536 		WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3537 				enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3538 		WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
3539 				enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3540 	}
3541 
3542 	enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_0)));
3543 	WREG32(mmMME0_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3544 	WREG32(mmMME0_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3545 
3546 	enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_2)));
3547 	WREG32(mmMME2_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3548 	WREG32(mmMME2_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3549 
3550 	for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3551 		enable = !!(hdev->clock_gating_mask &
3552 				(BIT_ULL(GAUDI_ENGINE_ID_TPC_0 + i)));
3553 
3554 		WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset,
3555 				enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3556 		WREG32(mmTPC0_QM_CGM_CFG + qman_offset,
3557 				enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3558 
3559 		qman_offset += TPC_QMAN_OFFSET;
3560 	}
3561 
3562 	gaudi->hw_cap_initialized |= HW_CAP_CLK_GATE;
3563 }
3564 
gaudi_disable_clock_gating(struct hl_device * hdev)3565 static void gaudi_disable_clock_gating(struct hl_device *hdev)
3566 {
3567 	struct gaudi_device *gaudi = hdev->asic_specific;
3568 	u32 qman_offset;
3569 	int i;
3570 
3571 	if (!hdev->asic_prop.fw_security_disabled)
3572 		return;
3573 
3574 	for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3575 		WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3576 		WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3577 
3578 		qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3579 	}
3580 
3581 	WREG32(mmMME0_QM_CGM_CFG, 0);
3582 	WREG32(mmMME0_QM_CGM_CFG1, 0);
3583 	WREG32(mmMME2_QM_CGM_CFG, 0);
3584 	WREG32(mmMME2_QM_CGM_CFG1, 0);
3585 
3586 	for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3587 		WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3588 		WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3589 
3590 		qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3591 	}
3592 
3593 	gaudi->hw_cap_initialized &= ~(HW_CAP_CLK_GATE);
3594 }
3595 
gaudi_enable_timestamp(struct hl_device * hdev)3596 static void gaudi_enable_timestamp(struct hl_device *hdev)
3597 {
3598 	/* Disable the timestamp counter */
3599 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3600 
3601 	/* Zero the lower/upper parts of the 64-bit counter */
3602 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3603 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3604 
3605 	/* Enable the counter */
3606 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3607 }
3608 
gaudi_disable_timestamp(struct hl_device * hdev)3609 static void gaudi_disable_timestamp(struct hl_device *hdev)
3610 {
3611 	/* Disable the timestamp counter */
3612 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3613 }
3614 
gaudi_halt_engines(struct hl_device * hdev,bool hard_reset)3615 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset)
3616 {
3617 	u32 wait_timeout_ms;
3618 
3619 	dev_info(hdev->dev,
3620 		"Halting compute engines and disabling interrupts\n");
3621 
3622 	if (hdev->pldm)
3623 		wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3624 	else
3625 		wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
3626 
3627 	gaudi_stop_nic_qmans(hdev);
3628 	gaudi_stop_mme_qmans(hdev);
3629 	gaudi_stop_tpc_qmans(hdev);
3630 	gaudi_stop_hbm_dma_qmans(hdev);
3631 	gaudi_stop_pci_dma_qmans(hdev);
3632 
3633 	hdev->asic_funcs->disable_clock_gating(hdev);
3634 
3635 	msleep(wait_timeout_ms);
3636 
3637 	gaudi_pci_dma_stall(hdev);
3638 	gaudi_hbm_dma_stall(hdev);
3639 	gaudi_tpc_stall(hdev);
3640 	gaudi_mme_stall(hdev);
3641 
3642 	msleep(wait_timeout_ms);
3643 
3644 	gaudi_disable_nic_qmans(hdev);
3645 	gaudi_disable_mme_qmans(hdev);
3646 	gaudi_disable_tpc_qmans(hdev);
3647 	gaudi_disable_hbm_dma_qmans(hdev);
3648 	gaudi_disable_pci_dma_qmans(hdev);
3649 
3650 	gaudi_disable_timestamp(hdev);
3651 
3652 	gaudi_disable_msi(hdev);
3653 }
3654 
gaudi_mmu_init(struct hl_device * hdev)3655 static int gaudi_mmu_init(struct hl_device *hdev)
3656 {
3657 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3658 	struct gaudi_device *gaudi = hdev->asic_specific;
3659 	u64 hop0_addr;
3660 	int rc, i;
3661 
3662 	if (!hdev->mmu_enable)
3663 		return 0;
3664 
3665 	if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3666 		return 0;
3667 
3668 	for (i = 0 ; i < prop->max_asid ; i++) {
3669 		hop0_addr = prop->mmu_pgt_addr +
3670 				(i * prop->mmu_hop_table_size);
3671 
3672 		rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3673 		if (rc) {
3674 			dev_err(hdev->dev,
3675 				"failed to set hop0 addr for asid %d\n", i);
3676 			goto err;
3677 		}
3678 	}
3679 
3680 	/* init MMU cache manage page */
3681 	WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
3682 	WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
3683 
3684 	hdev->asic_funcs->mmu_invalidate_cache(hdev, true, 0);
3685 
3686 	WREG32(mmMMU_UP_MMU_ENABLE, 1);
3687 	WREG32(mmMMU_UP_SPI_MASK, 0xF);
3688 
3689 	WREG32(mmSTLB_HOP_CONFIGURATION,
3690 			hdev->mmu_huge_page_opt ? 0x30440 : 0x40440);
3691 
3692 	/*
3693 	 * The H/W expects the first PI after init to be 1. After wraparound
3694 	 * we'll write 0.
3695 	 */
3696 	gaudi->mmu_cache_inv_pi = 1;
3697 
3698 	gaudi->hw_cap_initialized |= HW_CAP_MMU;
3699 
3700 	return 0;
3701 
3702 err:
3703 	return rc;
3704 }
3705 
gaudi_load_firmware_to_device(struct hl_device * hdev)3706 static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3707 {
3708 	void __iomem *dst;
3709 
3710 	/* HBM scrambler must be initialized before pushing F/W to HBM */
3711 	gaudi_init_scrambler_hbm(hdev);
3712 
3713 	dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3714 
3715 	return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
3716 }
3717 
gaudi_load_boot_fit_to_device(struct hl_device * hdev)3718 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3719 {
3720 	void __iomem *dst;
3721 
3722 	dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3723 
3724 	return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
3725 }
3726 
gaudi_read_device_fw_version(struct hl_device * hdev,enum hl_fw_component fwc)3727 static int gaudi_read_device_fw_version(struct hl_device *hdev,
3728 					enum hl_fw_component fwc)
3729 {
3730 	const char *name;
3731 	u32 ver_off;
3732 	char *dest;
3733 
3734 	switch (fwc) {
3735 	case FW_COMP_UBOOT:
3736 		ver_off = RREG32(mmUBOOT_VER_OFFSET);
3737 		dest = hdev->asic_prop.uboot_ver;
3738 		name = "U-Boot";
3739 		break;
3740 	case FW_COMP_PREBOOT:
3741 		ver_off = RREG32(mmPREBOOT_VER_OFFSET);
3742 		dest = hdev->asic_prop.preboot_ver;
3743 		name = "Preboot";
3744 		break;
3745 	default:
3746 		dev_warn(hdev->dev, "Undefined FW component: %d\n", fwc);
3747 		return -EIO;
3748 	}
3749 
3750 	ver_off &= ~((u32)SRAM_BASE_ADDR);
3751 
3752 	if (ver_off < SRAM_SIZE - VERSION_MAX_LEN) {
3753 		memcpy_fromio(dest, hdev->pcie_bar[SRAM_BAR_ID] + ver_off,
3754 							VERSION_MAX_LEN);
3755 	} else {
3756 		dev_err(hdev->dev, "%s version offset (0x%x) is above SRAM\n",
3757 								name, ver_off);
3758 		strcpy(dest, "unavailable");
3759 		return -EIO;
3760 	}
3761 
3762 	return 0;
3763 }
3764 
gaudi_init_cpu(struct hl_device * hdev)3765 static int gaudi_init_cpu(struct hl_device *hdev)
3766 {
3767 	struct gaudi_device *gaudi = hdev->asic_specific;
3768 	int rc;
3769 
3770 	if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
3771 		return 0;
3772 
3773 	if (gaudi->hw_cap_initialized & HW_CAP_CPU)
3774 		return 0;
3775 
3776 	/*
3777 	 * The device CPU works with 40 bits addresses.
3778 	 * This register sets the extension to 50 bits.
3779 	 */
3780 	if (hdev->asic_prop.fw_security_disabled)
3781 		WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
3782 
3783 	rc = hl_fw_init_cpu(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
3784 			mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU,
3785 			mmCPU_CMD_STATUS_TO_HOST,
3786 			mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0,
3787 			!hdev->bmc_enable, GAUDI_CPU_TIMEOUT_USEC,
3788 			GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
3789 
3790 	if (rc)
3791 		return rc;
3792 
3793 	gaudi->hw_cap_initialized |= HW_CAP_CPU;
3794 
3795 	return 0;
3796 }
3797 
gaudi_init_cpu_queues(struct hl_device * hdev,u32 cpu_timeout)3798 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
3799 {
3800 	struct gaudi_device *gaudi = hdev->asic_specific;
3801 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3802 	struct hl_eq *eq;
3803 	u32 status;
3804 	struct hl_hw_queue *cpu_pq =
3805 			&hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
3806 	int err;
3807 
3808 	if (!hdev->cpu_queues_enable)
3809 		return 0;
3810 
3811 	if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3812 		return 0;
3813 
3814 	eq = &hdev->event_queue;
3815 
3816 	WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
3817 	WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
3818 
3819 	WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
3820 	WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
3821 
3822 	WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
3823 			lower_32_bits(hdev->cpu_accessible_dma_address));
3824 	WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
3825 			upper_32_bits(hdev->cpu_accessible_dma_address));
3826 
3827 	WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
3828 	WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
3829 	WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
3830 
3831 	/* Used for EQ CI */
3832 	WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
3833 
3834 	WREG32(mmCPU_IF_PF_PQ_PI, 0);
3835 
3836 	if (gaudi->multi_msi_mode)
3837 		WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
3838 	else
3839 		WREG32(mmCPU_IF_QUEUE_INIT,
3840 			PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
3841 
3842 	WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_PI_UPDATE);
3843 
3844 	err = hl_poll_timeout(
3845 		hdev,
3846 		mmCPU_IF_QUEUE_INIT,
3847 		status,
3848 		(status == PQ_INIT_STATUS_READY_FOR_HOST),
3849 		1000,
3850 		cpu_timeout);
3851 
3852 	if (err) {
3853 		dev_err(hdev->dev,
3854 			"Failed to communicate with Device CPU (CPU-CP timeout)\n");
3855 		return -EIO;
3856 	}
3857 
3858 	/* update FW application security bits */
3859 	if (prop->fw_security_status_valid)
3860 		prop->fw_app_security_map = RREG32(mmCPU_BOOT_DEV_STS0);
3861 
3862 	gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
3863 	return 0;
3864 }
3865 
gaudi_pre_hw_init(struct hl_device * hdev)3866 static void gaudi_pre_hw_init(struct hl_device *hdev)
3867 {
3868 	/* Perform read from the device to make sure device is up */
3869 	RREG32(mmHW_STATE);
3870 
3871 	if (hdev->asic_prop.fw_security_disabled) {
3872 		/* Set the access through PCI bars (Linux driver only) as
3873 		 * secured
3874 		 */
3875 		WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
3876 				(PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
3877 				PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
3878 
3879 		/* Perform read to flush the waiting writes to ensure
3880 		 * configuration was set in the device
3881 		 */
3882 		RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
3883 	}
3884 
3885 	/*
3886 	 * Let's mark in the H/W that we have reached this point. We check
3887 	 * this value in the reset_before_init function to understand whether
3888 	 * we need to reset the chip before doing H/W init. This register is
3889 	 * cleared by the H/W upon H/W reset
3890 	 */
3891 	WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
3892 }
3893 
gaudi_hw_init(struct hl_device * hdev)3894 static int gaudi_hw_init(struct hl_device *hdev)
3895 {
3896 	int rc;
3897 
3898 	gaudi_pre_hw_init(hdev);
3899 
3900 	gaudi_init_pci_dma_qmans(hdev);
3901 
3902 	gaudi_init_hbm_dma_qmans(hdev);
3903 
3904 	rc = gaudi_init_cpu(hdev);
3905 	if (rc) {
3906 		dev_err(hdev->dev, "failed to initialize CPU\n");
3907 		return rc;
3908 	}
3909 
3910 	/* In case the clock gating was enabled in preboot we need to disable
3911 	 * it here before touching the MME/TPC registers.
3912 	 * There is no need to take clk gating mutex because when this function
3913 	 * runs, no other relevant code can run
3914 	 */
3915 	hdev->asic_funcs->disable_clock_gating(hdev);
3916 
3917 	/* SRAM scrambler must be initialized after CPU is running from HBM */
3918 	gaudi_init_scrambler_sram(hdev);
3919 
3920 	/* This is here just in case we are working without CPU */
3921 	gaudi_init_scrambler_hbm(hdev);
3922 
3923 	gaudi_init_golden_registers(hdev);
3924 
3925 	rc = gaudi_mmu_init(hdev);
3926 	if (rc)
3927 		return rc;
3928 
3929 	gaudi_init_security(hdev);
3930 
3931 	gaudi_init_mme_qmans(hdev);
3932 
3933 	gaudi_init_tpc_qmans(hdev);
3934 
3935 	gaudi_init_nic_qmans(hdev);
3936 
3937 	hdev->asic_funcs->set_clock_gating(hdev);
3938 
3939 	gaudi_enable_timestamp(hdev);
3940 
3941 	/* MSI must be enabled before CPU queues and NIC are initialized */
3942 	rc = gaudi_enable_msi(hdev);
3943 	if (rc)
3944 		goto disable_queues;
3945 
3946 	/* must be called after MSI was enabled */
3947 	rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
3948 	if (rc) {
3949 		dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
3950 			rc);
3951 		goto disable_msi;
3952 	}
3953 
3954 	/* Perform read from the device to flush all configuration */
3955 	RREG32(mmHW_STATE);
3956 
3957 	return 0;
3958 
3959 disable_msi:
3960 	gaudi_disable_msi(hdev);
3961 disable_queues:
3962 	gaudi_disable_mme_qmans(hdev);
3963 	gaudi_disable_pci_dma_qmans(hdev);
3964 
3965 	return rc;
3966 }
3967 
gaudi_hw_fini(struct hl_device * hdev,bool hard_reset)3968 static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
3969 {
3970 	struct gaudi_device *gaudi = hdev->asic_specific;
3971 	u32 status, reset_timeout_ms, cpu_timeout_ms;
3972 
3973 	if (!hard_reset) {
3974 		dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
3975 		return;
3976 	}
3977 
3978 	if (hdev->pldm) {
3979 		reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
3980 		cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3981 	} else {
3982 		reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
3983 		cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
3984 	}
3985 
3986 	/* Set device to handle FLR by H/W as we will put the device CPU to
3987 	 * halt mode
3988 	 */
3989 	if (hdev->asic_prop.fw_security_disabled &&
3990 				!hdev->asic_prop.hard_reset_done_by_fw)
3991 		WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
3992 					PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
3993 
3994 	/* I don't know what is the state of the CPU so make sure it is
3995 	 * stopped in any means necessary
3996 	 */
3997 	if (hdev->asic_prop.hard_reset_done_by_fw)
3998 		WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_RST_DEV);
3999 	else
4000 		WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_GOTO_WFE);
4001 
4002 	WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_HALT_MACHINE);
4003 
4004 	if (hdev->asic_prop.fw_security_disabled &&
4005 				!hdev->asic_prop.hard_reset_done_by_fw) {
4006 
4007 		/* Configure the reset registers. Must be done as early as
4008 		 * possible in case we fail during H/W initialization
4009 		 */
4010 		WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
4011 						(CFG_RST_H_DMA_MASK |
4012 						CFG_RST_H_MME_MASK |
4013 						CFG_RST_H_SM_MASK |
4014 						CFG_RST_H_TPC_7_MASK));
4015 
4016 		WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
4017 
4018 		WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
4019 						(CFG_RST_H_HBM_MASK |
4020 						CFG_RST_H_TPC_7_MASK |
4021 						CFG_RST_H_NIC_MASK |
4022 						CFG_RST_H_SM_MASK |
4023 						CFG_RST_H_DMA_MASK |
4024 						CFG_RST_H_MME_MASK |
4025 						CFG_RST_H_CPU_MASK |
4026 						CFG_RST_H_MMU_MASK));
4027 
4028 		WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
4029 						(CFG_RST_L_IF_MASK |
4030 						CFG_RST_L_PSOC_MASK |
4031 						CFG_RST_L_TPC_MASK));
4032 
4033 		msleep(cpu_timeout_ms);
4034 
4035 		/* Tell ASIC not to re-initialize PCIe */
4036 		WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
4037 
4038 		/* Restart BTL/BLR upon hard-reset */
4039 		if (hdev->asic_prop.fw_security_disabled)
4040 			WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
4041 
4042 		WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
4043 			1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
4044 
4045 		dev_info(hdev->dev,
4046 			"Issued HARD reset command, going to wait %dms\n",
4047 			reset_timeout_ms);
4048 	} else {
4049 		dev_info(hdev->dev,
4050 			"Firmware performs HARD reset, going to wait %dms\n",
4051 			reset_timeout_ms);
4052 	}
4053 
4054 	/*
4055 	 * After hard reset, we can't poll the BTM_FSM register because the PSOC
4056 	 * itself is in reset. Need to wait until the reset is deasserted
4057 	 */
4058 	msleep(reset_timeout_ms);
4059 
4060 	status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
4061 	if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
4062 		dev_err(hdev->dev,
4063 			"Timeout while waiting for device to reset 0x%x\n",
4064 			status);
4065 
4066 	if (gaudi) {
4067 		gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
4068 				HW_CAP_HBM | HW_CAP_PCI_DMA |
4069 				HW_CAP_MME | HW_CAP_TPC_MASK |
4070 				HW_CAP_HBM_DMA | HW_CAP_PLL |
4071 				HW_CAP_NIC_MASK | HW_CAP_MMU |
4072 				HW_CAP_SRAM_SCRAMBLER |
4073 				HW_CAP_HBM_SCRAMBLER |
4074 				HW_CAP_CLK_GATE);
4075 
4076 		memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
4077 	}
4078 }
4079 
gaudi_suspend(struct hl_device * hdev)4080 static int gaudi_suspend(struct hl_device *hdev)
4081 {
4082 	int rc;
4083 
4084 	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
4085 	if (rc)
4086 		dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
4087 
4088 	return rc;
4089 }
4090 
gaudi_resume(struct hl_device * hdev)4091 static int gaudi_resume(struct hl_device *hdev)
4092 {
4093 	return gaudi_init_iatu(hdev);
4094 }
4095 
gaudi_cb_mmap(struct hl_device * hdev,struct vm_area_struct * vma,void * cpu_addr,dma_addr_t dma_addr,size_t size)4096 static int gaudi_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
4097 			void *cpu_addr, dma_addr_t dma_addr, size_t size)
4098 {
4099 	int rc;
4100 
4101 	vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
4102 			VM_DONTCOPY | VM_NORESERVE;
4103 
4104 	rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
4105 				(dma_addr - HOST_PHYS_BASE), size);
4106 	if (rc)
4107 		dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
4108 
4109 	return rc;
4110 }
4111 
gaudi_ring_doorbell(struct hl_device * hdev,u32 hw_queue_id,u32 pi)4112 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
4113 {
4114 	struct gaudi_device *gaudi = hdev->asic_specific;
4115 	u32 db_reg_offset, db_value, dma_qm_offset, q_off;
4116 	int dma_id;
4117 	bool invalid_queue = false;
4118 
4119 	switch (hw_queue_id) {
4120 	case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
4121 		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
4122 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4123 		q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4124 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4125 		break;
4126 
4127 	case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
4128 		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
4129 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4130 		q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4131 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4132 		break;
4133 
4134 	case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
4135 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
4136 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4137 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4138 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4139 		break;
4140 
4141 	case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
4142 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
4143 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4144 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4145 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4146 		break;
4147 
4148 	case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
4149 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
4150 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4151 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4152 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4153 		break;
4154 
4155 	case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
4156 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
4157 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4158 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4159 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4160 		break;
4161 
4162 	case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
4163 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
4164 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4165 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4166 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4167 		break;
4168 
4169 	case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
4170 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
4171 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4172 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4173 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4174 		break;
4175 
4176 	case GAUDI_QUEUE_ID_CPU_PQ:
4177 		if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4178 			db_reg_offset = mmCPU_IF_PF_PQ_PI;
4179 		else
4180 			invalid_queue = true;
4181 		break;
4182 
4183 	case GAUDI_QUEUE_ID_MME_0_0:
4184 		db_reg_offset = mmMME2_QM_PQ_PI_0;
4185 		break;
4186 
4187 	case GAUDI_QUEUE_ID_MME_0_1:
4188 		db_reg_offset = mmMME2_QM_PQ_PI_1;
4189 		break;
4190 
4191 	case GAUDI_QUEUE_ID_MME_0_2:
4192 		db_reg_offset = mmMME2_QM_PQ_PI_2;
4193 		break;
4194 
4195 	case GAUDI_QUEUE_ID_MME_0_3:
4196 		db_reg_offset = mmMME2_QM_PQ_PI_3;
4197 		break;
4198 
4199 	case GAUDI_QUEUE_ID_MME_1_0:
4200 		db_reg_offset = mmMME0_QM_PQ_PI_0;
4201 		break;
4202 
4203 	case GAUDI_QUEUE_ID_MME_1_1:
4204 		db_reg_offset = mmMME0_QM_PQ_PI_1;
4205 		break;
4206 
4207 	case GAUDI_QUEUE_ID_MME_1_2:
4208 		db_reg_offset = mmMME0_QM_PQ_PI_2;
4209 		break;
4210 
4211 	case GAUDI_QUEUE_ID_MME_1_3:
4212 		db_reg_offset = mmMME0_QM_PQ_PI_3;
4213 		break;
4214 
4215 	case GAUDI_QUEUE_ID_TPC_0_0:
4216 		db_reg_offset = mmTPC0_QM_PQ_PI_0;
4217 		break;
4218 
4219 	case GAUDI_QUEUE_ID_TPC_0_1:
4220 		db_reg_offset = mmTPC0_QM_PQ_PI_1;
4221 		break;
4222 
4223 	case GAUDI_QUEUE_ID_TPC_0_2:
4224 		db_reg_offset = mmTPC0_QM_PQ_PI_2;
4225 		break;
4226 
4227 	case GAUDI_QUEUE_ID_TPC_0_3:
4228 		db_reg_offset = mmTPC0_QM_PQ_PI_3;
4229 		break;
4230 
4231 	case GAUDI_QUEUE_ID_TPC_1_0:
4232 		db_reg_offset = mmTPC1_QM_PQ_PI_0;
4233 		break;
4234 
4235 	case GAUDI_QUEUE_ID_TPC_1_1:
4236 		db_reg_offset = mmTPC1_QM_PQ_PI_1;
4237 		break;
4238 
4239 	case GAUDI_QUEUE_ID_TPC_1_2:
4240 		db_reg_offset = mmTPC1_QM_PQ_PI_2;
4241 		break;
4242 
4243 	case GAUDI_QUEUE_ID_TPC_1_3:
4244 		db_reg_offset = mmTPC1_QM_PQ_PI_3;
4245 		break;
4246 
4247 	case GAUDI_QUEUE_ID_TPC_2_0:
4248 		db_reg_offset = mmTPC2_QM_PQ_PI_0;
4249 		break;
4250 
4251 	case GAUDI_QUEUE_ID_TPC_2_1:
4252 		db_reg_offset = mmTPC2_QM_PQ_PI_1;
4253 		break;
4254 
4255 	case GAUDI_QUEUE_ID_TPC_2_2:
4256 		db_reg_offset = mmTPC2_QM_PQ_PI_2;
4257 		break;
4258 
4259 	case GAUDI_QUEUE_ID_TPC_2_3:
4260 		db_reg_offset = mmTPC2_QM_PQ_PI_3;
4261 		break;
4262 
4263 	case GAUDI_QUEUE_ID_TPC_3_0:
4264 		db_reg_offset = mmTPC3_QM_PQ_PI_0;
4265 		break;
4266 
4267 	case GAUDI_QUEUE_ID_TPC_3_1:
4268 		db_reg_offset = mmTPC3_QM_PQ_PI_1;
4269 		break;
4270 
4271 	case GAUDI_QUEUE_ID_TPC_3_2:
4272 		db_reg_offset = mmTPC3_QM_PQ_PI_2;
4273 		break;
4274 
4275 	case GAUDI_QUEUE_ID_TPC_3_3:
4276 		db_reg_offset = mmTPC3_QM_PQ_PI_3;
4277 		break;
4278 
4279 	case GAUDI_QUEUE_ID_TPC_4_0:
4280 		db_reg_offset = mmTPC4_QM_PQ_PI_0;
4281 		break;
4282 
4283 	case GAUDI_QUEUE_ID_TPC_4_1:
4284 		db_reg_offset = mmTPC4_QM_PQ_PI_1;
4285 		break;
4286 
4287 	case GAUDI_QUEUE_ID_TPC_4_2:
4288 		db_reg_offset = mmTPC4_QM_PQ_PI_2;
4289 		break;
4290 
4291 	case GAUDI_QUEUE_ID_TPC_4_3:
4292 		db_reg_offset = mmTPC4_QM_PQ_PI_3;
4293 		break;
4294 
4295 	case GAUDI_QUEUE_ID_TPC_5_0:
4296 		db_reg_offset = mmTPC5_QM_PQ_PI_0;
4297 		break;
4298 
4299 	case GAUDI_QUEUE_ID_TPC_5_1:
4300 		db_reg_offset = mmTPC5_QM_PQ_PI_1;
4301 		break;
4302 
4303 	case GAUDI_QUEUE_ID_TPC_5_2:
4304 		db_reg_offset = mmTPC5_QM_PQ_PI_2;
4305 		break;
4306 
4307 	case GAUDI_QUEUE_ID_TPC_5_3:
4308 		db_reg_offset = mmTPC5_QM_PQ_PI_3;
4309 		break;
4310 
4311 	case GAUDI_QUEUE_ID_TPC_6_0:
4312 		db_reg_offset = mmTPC6_QM_PQ_PI_0;
4313 		break;
4314 
4315 	case GAUDI_QUEUE_ID_TPC_6_1:
4316 		db_reg_offset = mmTPC6_QM_PQ_PI_1;
4317 		break;
4318 
4319 	case GAUDI_QUEUE_ID_TPC_6_2:
4320 		db_reg_offset = mmTPC6_QM_PQ_PI_2;
4321 		break;
4322 
4323 	case GAUDI_QUEUE_ID_TPC_6_3:
4324 		db_reg_offset = mmTPC6_QM_PQ_PI_3;
4325 		break;
4326 
4327 	case GAUDI_QUEUE_ID_TPC_7_0:
4328 		db_reg_offset = mmTPC7_QM_PQ_PI_0;
4329 		break;
4330 
4331 	case GAUDI_QUEUE_ID_TPC_7_1:
4332 		db_reg_offset = mmTPC7_QM_PQ_PI_1;
4333 		break;
4334 
4335 	case GAUDI_QUEUE_ID_TPC_7_2:
4336 		db_reg_offset = mmTPC7_QM_PQ_PI_2;
4337 		break;
4338 
4339 	case GAUDI_QUEUE_ID_TPC_7_3:
4340 		db_reg_offset = mmTPC7_QM_PQ_PI_3;
4341 		break;
4342 
4343 	case GAUDI_QUEUE_ID_NIC_0_0:
4344 		db_reg_offset = mmNIC0_QM0_PQ_PI_0;
4345 		break;
4346 
4347 	case GAUDI_QUEUE_ID_NIC_0_1:
4348 		db_reg_offset = mmNIC0_QM0_PQ_PI_1;
4349 		break;
4350 
4351 	case GAUDI_QUEUE_ID_NIC_0_2:
4352 		db_reg_offset = mmNIC0_QM0_PQ_PI_2;
4353 		break;
4354 
4355 	case GAUDI_QUEUE_ID_NIC_0_3:
4356 		db_reg_offset = mmNIC0_QM0_PQ_PI_3;
4357 		break;
4358 
4359 	case GAUDI_QUEUE_ID_NIC_1_0:
4360 		db_reg_offset = mmNIC0_QM1_PQ_PI_0;
4361 		break;
4362 
4363 	case GAUDI_QUEUE_ID_NIC_1_1:
4364 		db_reg_offset = mmNIC0_QM1_PQ_PI_1;
4365 		break;
4366 
4367 	case GAUDI_QUEUE_ID_NIC_1_2:
4368 		db_reg_offset = mmNIC0_QM1_PQ_PI_2;
4369 		break;
4370 
4371 	case GAUDI_QUEUE_ID_NIC_1_3:
4372 		db_reg_offset = mmNIC0_QM1_PQ_PI_3;
4373 		break;
4374 
4375 	case GAUDI_QUEUE_ID_NIC_2_0:
4376 		db_reg_offset = mmNIC1_QM0_PQ_PI_0;
4377 		break;
4378 
4379 	case GAUDI_QUEUE_ID_NIC_2_1:
4380 		db_reg_offset = mmNIC1_QM0_PQ_PI_1;
4381 		break;
4382 
4383 	case GAUDI_QUEUE_ID_NIC_2_2:
4384 		db_reg_offset = mmNIC1_QM0_PQ_PI_2;
4385 		break;
4386 
4387 	case GAUDI_QUEUE_ID_NIC_2_3:
4388 		db_reg_offset = mmNIC1_QM0_PQ_PI_3;
4389 		break;
4390 
4391 	case GAUDI_QUEUE_ID_NIC_3_0:
4392 		db_reg_offset = mmNIC1_QM1_PQ_PI_0;
4393 		break;
4394 
4395 	case GAUDI_QUEUE_ID_NIC_3_1:
4396 		db_reg_offset = mmNIC1_QM1_PQ_PI_1;
4397 		break;
4398 
4399 	case GAUDI_QUEUE_ID_NIC_3_2:
4400 		db_reg_offset = mmNIC1_QM1_PQ_PI_2;
4401 		break;
4402 
4403 	case GAUDI_QUEUE_ID_NIC_3_3:
4404 		db_reg_offset = mmNIC1_QM1_PQ_PI_3;
4405 		break;
4406 
4407 	case GAUDI_QUEUE_ID_NIC_4_0:
4408 		db_reg_offset = mmNIC2_QM0_PQ_PI_0;
4409 		break;
4410 
4411 	case GAUDI_QUEUE_ID_NIC_4_1:
4412 		db_reg_offset = mmNIC2_QM0_PQ_PI_1;
4413 		break;
4414 
4415 	case GAUDI_QUEUE_ID_NIC_4_2:
4416 		db_reg_offset = mmNIC2_QM0_PQ_PI_2;
4417 		break;
4418 
4419 	case GAUDI_QUEUE_ID_NIC_4_3:
4420 		db_reg_offset = mmNIC2_QM0_PQ_PI_3;
4421 		break;
4422 
4423 	case GAUDI_QUEUE_ID_NIC_5_0:
4424 		db_reg_offset = mmNIC2_QM1_PQ_PI_0;
4425 		break;
4426 
4427 	case GAUDI_QUEUE_ID_NIC_5_1:
4428 		db_reg_offset = mmNIC2_QM1_PQ_PI_1;
4429 		break;
4430 
4431 	case GAUDI_QUEUE_ID_NIC_5_2:
4432 		db_reg_offset = mmNIC2_QM1_PQ_PI_2;
4433 		break;
4434 
4435 	case GAUDI_QUEUE_ID_NIC_5_3:
4436 		db_reg_offset = mmNIC2_QM1_PQ_PI_3;
4437 		break;
4438 
4439 	case GAUDI_QUEUE_ID_NIC_6_0:
4440 		db_reg_offset = mmNIC3_QM0_PQ_PI_0;
4441 		break;
4442 
4443 	case GAUDI_QUEUE_ID_NIC_6_1:
4444 		db_reg_offset = mmNIC3_QM0_PQ_PI_1;
4445 		break;
4446 
4447 	case GAUDI_QUEUE_ID_NIC_6_2:
4448 		db_reg_offset = mmNIC3_QM0_PQ_PI_2;
4449 		break;
4450 
4451 	case GAUDI_QUEUE_ID_NIC_6_3:
4452 		db_reg_offset = mmNIC3_QM0_PQ_PI_3;
4453 		break;
4454 
4455 	case GAUDI_QUEUE_ID_NIC_7_0:
4456 		db_reg_offset = mmNIC3_QM1_PQ_PI_0;
4457 		break;
4458 
4459 	case GAUDI_QUEUE_ID_NIC_7_1:
4460 		db_reg_offset = mmNIC3_QM1_PQ_PI_1;
4461 		break;
4462 
4463 	case GAUDI_QUEUE_ID_NIC_7_2:
4464 		db_reg_offset = mmNIC3_QM1_PQ_PI_2;
4465 		break;
4466 
4467 	case GAUDI_QUEUE_ID_NIC_7_3:
4468 		db_reg_offset = mmNIC3_QM1_PQ_PI_3;
4469 		break;
4470 
4471 	case GAUDI_QUEUE_ID_NIC_8_0:
4472 		db_reg_offset = mmNIC4_QM0_PQ_PI_0;
4473 		break;
4474 
4475 	case GAUDI_QUEUE_ID_NIC_8_1:
4476 		db_reg_offset = mmNIC4_QM0_PQ_PI_1;
4477 		break;
4478 
4479 	case GAUDI_QUEUE_ID_NIC_8_2:
4480 		db_reg_offset = mmNIC4_QM0_PQ_PI_2;
4481 		break;
4482 
4483 	case GAUDI_QUEUE_ID_NIC_8_3:
4484 		db_reg_offset = mmNIC4_QM0_PQ_PI_3;
4485 		break;
4486 
4487 	case GAUDI_QUEUE_ID_NIC_9_0:
4488 		db_reg_offset = mmNIC4_QM1_PQ_PI_0;
4489 		break;
4490 
4491 	case GAUDI_QUEUE_ID_NIC_9_1:
4492 		db_reg_offset = mmNIC4_QM1_PQ_PI_1;
4493 		break;
4494 
4495 	case GAUDI_QUEUE_ID_NIC_9_2:
4496 		db_reg_offset = mmNIC4_QM1_PQ_PI_2;
4497 		break;
4498 
4499 	case GAUDI_QUEUE_ID_NIC_9_3:
4500 		db_reg_offset = mmNIC4_QM1_PQ_PI_3;
4501 		break;
4502 
4503 	default:
4504 		invalid_queue = true;
4505 	}
4506 
4507 	if (invalid_queue) {
4508 		/* Should never get here */
4509 		dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4510 			hw_queue_id);
4511 		return;
4512 	}
4513 
4514 	db_value = pi;
4515 
4516 	/* ring the doorbell */
4517 	WREG32(db_reg_offset, db_value);
4518 
4519 	if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
4520 		/* make sure device CPU will read latest data from host */
4521 		mb();
4522 		WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
4523 				GAUDI_EVENT_PI_UPDATE);
4524 	}
4525 }
4526 
gaudi_pqe_write(struct hl_device * hdev,__le64 * pqe,struct hl_bd * bd)4527 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4528 				struct hl_bd *bd)
4529 {
4530 	__le64 *pbd = (__le64 *) bd;
4531 
4532 	/* The QMANs are on the host memory so a simple copy suffice */
4533 	pqe[0] = pbd[0];
4534 	pqe[1] = pbd[1];
4535 }
4536 
gaudi_dma_alloc_coherent(struct hl_device * hdev,size_t size,dma_addr_t * dma_handle,gfp_t flags)4537 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4538 					dma_addr_t *dma_handle, gfp_t flags)
4539 {
4540 	void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
4541 						dma_handle, flags);
4542 
4543 	/* Shift to the device's base physical address of host memory */
4544 	if (kernel_addr)
4545 		*dma_handle += HOST_PHYS_BASE;
4546 
4547 	return kernel_addr;
4548 }
4549 
gaudi_dma_free_coherent(struct hl_device * hdev,size_t size,void * cpu_addr,dma_addr_t dma_handle)4550 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4551 		void *cpu_addr, dma_addr_t dma_handle)
4552 {
4553 	/* Cancel the device's base physical address of host memory */
4554 	dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4555 
4556 	dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
4557 }
4558 
gaudi_hbm_scrubbing(struct hl_device * hdev)4559 static int gaudi_hbm_scrubbing(struct hl_device *hdev)
4560 {
4561 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4562 	u64  cur_addr = DRAM_BASE_ADDR_USER;
4563 	u32 val;
4564 	u32 chunk_size;
4565 	int rc, dma_id;
4566 
4567 	while (cur_addr < prop->dram_end_address) {
4568 		for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4569 			u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4570 
4571 			chunk_size =
4572 			min((u64)SZ_2G, prop->dram_end_address - cur_addr);
4573 
4574 			dev_dbg(hdev->dev,
4575 				"Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4576 				cur_addr, cur_addr + chunk_size);
4577 
4578 			WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, 0);
4579 			WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, 0);
4580 			WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
4581 						lower_32_bits(cur_addr));
4582 			WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
4583 						upper_32_bits(cur_addr));
4584 			WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
4585 					chunk_size);
4586 			WREG32(mmDMA0_CORE_COMMIT + dma_offset,
4587 					((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
4588 					(1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
4589 
4590 			cur_addr += chunk_size;
4591 
4592 			if (cur_addr == prop->dram_end_address)
4593 				break;
4594 		}
4595 
4596 		for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4597 			u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4598 
4599 			rc = hl_poll_timeout(
4600 				hdev,
4601 				mmDMA0_CORE_STS0 + dma_offset,
4602 				val,
4603 				((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
4604 				1000,
4605 				HBM_SCRUBBING_TIMEOUT_US);
4606 
4607 			if (rc) {
4608 				dev_err(hdev->dev,
4609 					"DMA Timeout during HBM scrubbing of DMA #%d\n",
4610 					dma_id);
4611 				return -EIO;
4612 			}
4613 		}
4614 	}
4615 
4616 	return 0;
4617 }
4618 
gaudi_scrub_device_mem(struct hl_device * hdev,u64 addr,u64 size)4619 static int gaudi_scrub_device_mem(struct hl_device *hdev, u64 addr, u64 size)
4620 {
4621 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4622 	struct gaudi_device *gaudi = hdev->asic_specific;
4623 	int rc = 0;
4624 	u64 val = 0;
4625 
4626 	if (!hdev->memory_scrub)
4627 		return 0;
4628 
4629 	if (!addr && !size) {
4630 		/* Wait till device is idle */
4631 		rc = hl_poll_timeout(
4632 				hdev,
4633 				mmDMA0_CORE_STS0/* dummy */,
4634 				val/* dummy */,
4635 				(hdev->asic_funcs->is_device_idle(hdev, NULL,
4636 						0, NULL)),
4637 						1000,
4638 						HBM_SCRUBBING_TIMEOUT_US);
4639 		if (rc) {
4640 			dev_err(hdev->dev, "waiting for idle timeout\n");
4641 			return -EIO;
4642 		}
4643 
4644 		/* Scrub SRAM */
4645 		addr = prop->sram_user_base_address;
4646 		size = hdev->pldm ? 0x10000 :
4647 				(prop->sram_size - SRAM_USER_BASE_OFFSET);
4648 		val = 0x7777777777777777ull;
4649 
4650 		rc = gaudi_memset_device_memory(hdev, addr, size, val);
4651 		if (rc) {
4652 			dev_err(hdev->dev,
4653 				"Failed to clear SRAM in mem scrub all\n");
4654 			return rc;
4655 		}
4656 
4657 		mutex_lock(&gaudi->clk_gate_mutex);
4658 		hdev->asic_funcs->disable_clock_gating(hdev);
4659 
4660 		/* Scrub HBM using all DMA channels in parallel */
4661 		rc = gaudi_hbm_scrubbing(hdev);
4662 		if (rc)
4663 			dev_err(hdev->dev,
4664 				"Failed to clear HBM in mem scrub all\n");
4665 
4666 		hdev->asic_funcs->set_clock_gating(hdev);
4667 		mutex_unlock(&gaudi->clk_gate_mutex);
4668 	}
4669 
4670 	return rc;
4671 }
4672 
gaudi_get_int_queue_base(struct hl_device * hdev,u32 queue_id,dma_addr_t * dma_handle,u16 * queue_len)4673 static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4674 				u32 queue_id, dma_addr_t *dma_handle,
4675 				u16 *queue_len)
4676 {
4677 	struct gaudi_device *gaudi = hdev->asic_specific;
4678 	struct gaudi_internal_qman_info *q;
4679 
4680 	if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4681 			gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4682 		dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4683 		return NULL;
4684 	}
4685 
4686 	q = &gaudi->internal_qmans[queue_id];
4687 	*dma_handle = q->pq_dma_addr;
4688 	*queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4689 
4690 	return q->pq_kernel_addr;
4691 }
4692 
gaudi_send_cpu_message(struct hl_device * hdev,u32 * msg,u16 len,u32 timeout,u64 * result)4693 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
4694 				u16 len, u32 timeout, u64 *result)
4695 {
4696 	struct gaudi_device *gaudi = hdev->asic_specific;
4697 
4698 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4699 		if (result)
4700 			*result = 0;
4701 		return 0;
4702 	}
4703 
4704 	if (!timeout)
4705 		timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4706 
4707 	return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4708 						timeout, result);
4709 }
4710 
gaudi_test_queue(struct hl_device * hdev,u32 hw_queue_id)4711 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4712 {
4713 	struct packet_msg_prot *fence_pkt;
4714 	dma_addr_t pkt_dma_addr;
4715 	u32 fence_val, tmp, timeout_usec;
4716 	dma_addr_t fence_dma_addr;
4717 	u32 *fence_ptr;
4718 	int rc;
4719 
4720 	if (hdev->pldm)
4721 		timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4722 	else
4723 		timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4724 
4725 	fence_val = GAUDI_QMAN0_FENCE_VAL;
4726 
4727 	fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
4728 							&fence_dma_addr);
4729 	if (!fence_ptr) {
4730 		dev_err(hdev->dev,
4731 			"Failed to allocate memory for H/W queue %d testing\n",
4732 			hw_queue_id);
4733 		return -ENOMEM;
4734 	}
4735 
4736 	*fence_ptr = 0;
4737 
4738 	fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
4739 					sizeof(struct packet_msg_prot),
4740 					GFP_KERNEL, &pkt_dma_addr);
4741 	if (!fence_pkt) {
4742 		dev_err(hdev->dev,
4743 			"Failed to allocate packet for H/W queue %d testing\n",
4744 			hw_queue_id);
4745 		rc = -ENOMEM;
4746 		goto free_fence_ptr;
4747 	}
4748 
4749 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4750 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4751 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4752 
4753 	fence_pkt->ctl = cpu_to_le32(tmp);
4754 	fence_pkt->value = cpu_to_le32(fence_val);
4755 	fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4756 
4757 	rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
4758 					sizeof(struct packet_msg_prot),
4759 					pkt_dma_addr);
4760 	if (rc) {
4761 		dev_err(hdev->dev,
4762 			"Failed to send fence packet to H/W queue %d\n",
4763 			hw_queue_id);
4764 		goto free_pkt;
4765 	}
4766 
4767 	rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
4768 					1000, timeout_usec, true);
4769 
4770 	hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
4771 
4772 	if (rc == -ETIMEDOUT) {
4773 		dev_err(hdev->dev,
4774 			"H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
4775 			hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
4776 		rc = -EIO;
4777 	}
4778 
4779 free_pkt:
4780 	hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
4781 					pkt_dma_addr);
4782 free_fence_ptr:
4783 	hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
4784 					fence_dma_addr);
4785 	return rc;
4786 }
4787 
gaudi_test_cpu_queue(struct hl_device * hdev)4788 static int gaudi_test_cpu_queue(struct hl_device *hdev)
4789 {
4790 	struct gaudi_device *gaudi = hdev->asic_specific;
4791 
4792 	/*
4793 	 * check capability here as send_cpu_message() won't update the result
4794 	 * value if no capability
4795 	 */
4796 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
4797 		return 0;
4798 
4799 	return hl_fw_test_cpu_queue(hdev);
4800 }
4801 
gaudi_test_queues(struct hl_device * hdev)4802 static int gaudi_test_queues(struct hl_device *hdev)
4803 {
4804 	int i, rc, ret_val = 0;
4805 
4806 	for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
4807 		if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
4808 			rc = gaudi_test_queue(hdev, i);
4809 			if (rc)
4810 				ret_val = -EINVAL;
4811 		}
4812 	}
4813 
4814 	rc = gaudi_test_cpu_queue(hdev);
4815 	if (rc)
4816 		ret_val = -EINVAL;
4817 
4818 	return ret_val;
4819 }
4820 
gaudi_dma_pool_zalloc(struct hl_device * hdev,size_t size,gfp_t mem_flags,dma_addr_t * dma_handle)4821 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
4822 		gfp_t mem_flags, dma_addr_t *dma_handle)
4823 {
4824 	void *kernel_addr;
4825 
4826 	if (size > GAUDI_DMA_POOL_BLK_SIZE)
4827 		return NULL;
4828 
4829 	kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
4830 
4831 	/* Shift to the device's base physical address of host memory */
4832 	if (kernel_addr)
4833 		*dma_handle += HOST_PHYS_BASE;
4834 
4835 	return kernel_addr;
4836 }
4837 
gaudi_dma_pool_free(struct hl_device * hdev,void * vaddr,dma_addr_t dma_addr)4838 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
4839 			dma_addr_t dma_addr)
4840 {
4841 	/* Cancel the device's base physical address of host memory */
4842 	dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
4843 
4844 	dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
4845 }
4846 
gaudi_cpu_accessible_dma_pool_alloc(struct hl_device * hdev,size_t size,dma_addr_t * dma_handle)4847 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
4848 					size_t size, dma_addr_t *dma_handle)
4849 {
4850 	return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
4851 }
4852 
gaudi_cpu_accessible_dma_pool_free(struct hl_device * hdev,size_t size,void * vaddr)4853 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
4854 						size_t size, void *vaddr)
4855 {
4856 	hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
4857 }
4858 
gaudi_dma_map_sg(struct hl_device * hdev,struct scatterlist * sgl,int nents,enum dma_data_direction dir)4859 static int gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
4860 			int nents, enum dma_data_direction dir)
4861 {
4862 	struct scatterlist *sg;
4863 	int i;
4864 
4865 	if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
4866 		return -ENOMEM;
4867 
4868 	/* Shift to the device's base physical address of host memory */
4869 	for_each_sg(sgl, sg, nents, i)
4870 		sg->dma_address += HOST_PHYS_BASE;
4871 
4872 	return 0;
4873 }
4874 
gaudi_dma_unmap_sg(struct hl_device * hdev,struct scatterlist * sgl,int nents,enum dma_data_direction dir)4875 static void gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
4876 			int nents, enum dma_data_direction dir)
4877 {
4878 	struct scatterlist *sg;
4879 	int i;
4880 
4881 	/* Cancel the device's base physical address of host memory */
4882 	for_each_sg(sgl, sg, nents, i)
4883 		sg->dma_address -= HOST_PHYS_BASE;
4884 
4885 	dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
4886 }
4887 
gaudi_get_dma_desc_list_size(struct hl_device * hdev,struct sg_table * sgt)4888 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev,
4889 					struct sg_table *sgt)
4890 {
4891 	struct scatterlist *sg, *sg_next_iter;
4892 	u32 count, dma_desc_cnt;
4893 	u64 len, len_next;
4894 	dma_addr_t addr, addr_next;
4895 
4896 	dma_desc_cnt = 0;
4897 
4898 	for_each_sg(sgt->sgl, sg, sgt->nents, count) {
4899 
4900 		len = sg_dma_len(sg);
4901 		addr = sg_dma_address(sg);
4902 
4903 		if (len == 0)
4904 			break;
4905 
4906 		while ((count + 1) < sgt->nents) {
4907 			sg_next_iter = sg_next(sg);
4908 			len_next = sg_dma_len(sg_next_iter);
4909 			addr_next = sg_dma_address(sg_next_iter);
4910 
4911 			if (len_next == 0)
4912 				break;
4913 
4914 			if ((addr + len == addr_next) &&
4915 				(len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
4916 				len += len_next;
4917 				count++;
4918 				sg = sg_next_iter;
4919 			} else {
4920 				break;
4921 			}
4922 		}
4923 
4924 		dma_desc_cnt++;
4925 	}
4926 
4927 	return dma_desc_cnt * sizeof(struct packet_lin_dma);
4928 }
4929 
gaudi_pin_memory_before_cs(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt,u64 addr,enum dma_data_direction dir)4930 static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
4931 				struct hl_cs_parser *parser,
4932 				struct packet_lin_dma *user_dma_pkt,
4933 				u64 addr, enum dma_data_direction dir)
4934 {
4935 	struct hl_userptr *userptr;
4936 	int rc;
4937 
4938 	if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4939 			parser->job_userptr_list, &userptr))
4940 		goto already_pinned;
4941 
4942 	userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
4943 	if (!userptr)
4944 		return -ENOMEM;
4945 
4946 	rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4947 				userptr);
4948 	if (rc)
4949 		goto free_userptr;
4950 
4951 	list_add_tail(&userptr->job_node, parser->job_userptr_list);
4952 
4953 	rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
4954 					userptr->sgt->nents, dir);
4955 	if (rc) {
4956 		dev_err(hdev->dev, "failed to map sgt with DMA region\n");
4957 		goto unpin_memory;
4958 	}
4959 
4960 	userptr->dma_mapped = true;
4961 	userptr->dir = dir;
4962 
4963 already_pinned:
4964 	parser->patched_cb_size +=
4965 			gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
4966 
4967 	return 0;
4968 
4969 unpin_memory:
4970 	hl_unpin_host_memory(hdev, userptr);
4971 free_userptr:
4972 	kfree(userptr);
4973 	return rc;
4974 }
4975 
gaudi_validate_dma_pkt_host(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt,bool src_in_host)4976 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
4977 				struct hl_cs_parser *parser,
4978 				struct packet_lin_dma *user_dma_pkt,
4979 				bool src_in_host)
4980 {
4981 	enum dma_data_direction dir;
4982 	bool skip_host_mem_pin = false, user_memset;
4983 	u64 addr;
4984 	int rc = 0;
4985 
4986 	user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
4987 			GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
4988 			GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
4989 
4990 	if (src_in_host) {
4991 		if (user_memset)
4992 			skip_host_mem_pin = true;
4993 
4994 		dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
4995 		dir = DMA_TO_DEVICE;
4996 		addr = le64_to_cpu(user_dma_pkt->src_addr);
4997 	} else {
4998 		dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
4999 		dir = DMA_FROM_DEVICE;
5000 		addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5001 				GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5002 				GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5003 	}
5004 
5005 	if (skip_host_mem_pin)
5006 		parser->patched_cb_size += sizeof(*user_dma_pkt);
5007 	else
5008 		rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
5009 						addr, dir);
5010 
5011 	return rc;
5012 }
5013 
gaudi_validate_dma_pkt_no_mmu(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt)5014 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
5015 				struct hl_cs_parser *parser,
5016 				struct packet_lin_dma *user_dma_pkt)
5017 {
5018 	bool src_in_host = false;
5019 	u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5020 			GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5021 			GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5022 
5023 	dev_dbg(hdev->dev, "DMA packet details:\n");
5024 	dev_dbg(hdev->dev, "source == 0x%llx\n",
5025 				le64_to_cpu(user_dma_pkt->src_addr));
5026 	dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
5027 	dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
5028 
5029 	/*
5030 	 * Special handling for DMA with size 0. Bypass all validations
5031 	 * because no transactions will be done except for WR_COMP, which
5032 	 * is not a security issue
5033 	 */
5034 	if (!le32_to_cpu(user_dma_pkt->tsize)) {
5035 		parser->patched_cb_size += sizeof(*user_dma_pkt);
5036 		return 0;
5037 	}
5038 
5039 	if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5040 		src_in_host = true;
5041 
5042 	return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
5043 						src_in_host);
5044 }
5045 
gaudi_validate_load_and_exe_pkt(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_load_and_exe * user_pkt)5046 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
5047 					struct hl_cs_parser *parser,
5048 					struct packet_load_and_exe *user_pkt)
5049 {
5050 	u32 cfg;
5051 
5052 	cfg = le32_to_cpu(user_pkt->cfg);
5053 
5054 	if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
5055 		dev_err(hdev->dev,
5056 			"User not allowed to use Load and Execute\n");
5057 		return -EPERM;
5058 	}
5059 
5060 	parser->patched_cb_size += sizeof(struct packet_load_and_exe);
5061 
5062 	return 0;
5063 }
5064 
gaudi_validate_cb(struct hl_device * hdev,struct hl_cs_parser * parser,bool is_mmu)5065 static int gaudi_validate_cb(struct hl_device *hdev,
5066 			struct hl_cs_parser *parser, bool is_mmu)
5067 {
5068 	u32 cb_parsed_length = 0;
5069 	int rc = 0;
5070 
5071 	parser->patched_cb_size = 0;
5072 
5073 	/* cb_user_size is more than 0 so loop will always be executed */
5074 	while (cb_parsed_length < parser->user_cb_size) {
5075 		enum packet_id pkt_id;
5076 		u16 pkt_size;
5077 		struct gaudi_packet *user_pkt;
5078 
5079 		user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5080 
5081 		pkt_id = (enum packet_id) (
5082 				(le64_to_cpu(user_pkt->header) &
5083 				PACKET_HEADER_PACKET_ID_MASK) >>
5084 					PACKET_HEADER_PACKET_ID_SHIFT);
5085 
5086 		if (!validate_packet_id(pkt_id)) {
5087 			dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5088 			rc = -EINVAL;
5089 			break;
5090 		}
5091 
5092 		pkt_size = gaudi_packet_sizes[pkt_id];
5093 		cb_parsed_length += pkt_size;
5094 		if (cb_parsed_length > parser->user_cb_size) {
5095 			dev_err(hdev->dev,
5096 				"packet 0x%x is out of CB boundary\n", pkt_id);
5097 			rc = -EINVAL;
5098 			break;
5099 		}
5100 
5101 		switch (pkt_id) {
5102 		case PACKET_MSG_PROT:
5103 			dev_err(hdev->dev,
5104 				"User not allowed to use MSG_PROT\n");
5105 			rc = -EPERM;
5106 			break;
5107 
5108 		case PACKET_CP_DMA:
5109 			dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5110 			rc = -EPERM;
5111 			break;
5112 
5113 		case PACKET_STOP:
5114 			dev_err(hdev->dev, "User not allowed to use STOP\n");
5115 			rc = -EPERM;
5116 			break;
5117 
5118 		case PACKET_WREG_BULK:
5119 			dev_err(hdev->dev,
5120 				"User not allowed to use WREG_BULK\n");
5121 			rc = -EPERM;
5122 			break;
5123 
5124 		case PACKET_LOAD_AND_EXE:
5125 			rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
5126 				(struct packet_load_and_exe *) user_pkt);
5127 			break;
5128 
5129 		case PACKET_LIN_DMA:
5130 			parser->contains_dma_pkt = true;
5131 			if (is_mmu)
5132 				parser->patched_cb_size += pkt_size;
5133 			else
5134 				rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
5135 					(struct packet_lin_dma *) user_pkt);
5136 			break;
5137 
5138 		case PACKET_WREG_32:
5139 		case PACKET_MSG_LONG:
5140 		case PACKET_MSG_SHORT:
5141 		case PACKET_REPEAT:
5142 		case PACKET_FENCE:
5143 		case PACKET_NOP:
5144 		case PACKET_ARB_POINT:
5145 			parser->patched_cb_size += pkt_size;
5146 			break;
5147 
5148 		default:
5149 			dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5150 				pkt_id);
5151 			rc = -EINVAL;
5152 			break;
5153 		}
5154 
5155 		if (rc)
5156 			break;
5157 	}
5158 
5159 	/*
5160 	 * The new CB should have space at the end for two MSG_PROT packets:
5161 	 * 1. A packet that will act as a completion packet
5162 	 * 2. A packet that will generate MSI-X interrupt
5163 	 */
5164 	if (parser->completion)
5165 		parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
5166 
5167 	return rc;
5168 }
5169 
gaudi_patch_dma_packet(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt,struct packet_lin_dma * new_dma_pkt,u32 * new_dma_pkt_size)5170 static int gaudi_patch_dma_packet(struct hl_device *hdev,
5171 				struct hl_cs_parser *parser,
5172 				struct packet_lin_dma *user_dma_pkt,
5173 				struct packet_lin_dma *new_dma_pkt,
5174 				u32 *new_dma_pkt_size)
5175 {
5176 	struct hl_userptr *userptr;
5177 	struct scatterlist *sg, *sg_next_iter;
5178 	u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
5179 	u64 len, len_next;
5180 	dma_addr_t dma_addr, dma_addr_next;
5181 	u64 device_memory_addr, addr;
5182 	enum dma_data_direction dir;
5183 	struct sg_table *sgt;
5184 	bool src_in_host = false;
5185 	bool skip_host_mem_pin = false;
5186 	bool user_memset;
5187 
5188 	ctl = le32_to_cpu(user_dma_pkt->ctl);
5189 
5190 	if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5191 		src_in_host = true;
5192 
5193 	user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5194 			GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5195 
5196 	if (src_in_host) {
5197 		addr = le64_to_cpu(user_dma_pkt->src_addr);
5198 		device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
5199 		dir = DMA_TO_DEVICE;
5200 		if (user_memset)
5201 			skip_host_mem_pin = true;
5202 	} else {
5203 		addr = le64_to_cpu(user_dma_pkt->dst_addr);
5204 		device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
5205 		dir = DMA_FROM_DEVICE;
5206 	}
5207 
5208 	if ((!skip_host_mem_pin) &&
5209 		(!hl_userptr_is_pinned(hdev, addr,
5210 					le32_to_cpu(user_dma_pkt->tsize),
5211 					parser->job_userptr_list, &userptr))) {
5212 		dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
5213 				addr, user_dma_pkt->tsize);
5214 		return -EFAULT;
5215 	}
5216 
5217 	if ((user_memset) && (dir == DMA_TO_DEVICE)) {
5218 		memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
5219 		*new_dma_pkt_size = sizeof(*user_dma_pkt);
5220 		return 0;
5221 	}
5222 
5223 	user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5224 
5225 	sgt = userptr->sgt;
5226 	dma_desc_cnt = 0;
5227 
5228 	for_each_sg(sgt->sgl, sg, sgt->nents, count) {
5229 		len = sg_dma_len(sg);
5230 		dma_addr = sg_dma_address(sg);
5231 
5232 		if (len == 0)
5233 			break;
5234 
5235 		while ((count + 1) < sgt->nents) {
5236 			sg_next_iter = sg_next(sg);
5237 			len_next = sg_dma_len(sg_next_iter);
5238 			dma_addr_next = sg_dma_address(sg_next_iter);
5239 
5240 			if (len_next == 0)
5241 				break;
5242 
5243 			if ((dma_addr + len == dma_addr_next) &&
5244 				(len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5245 				len += len_next;
5246 				count++;
5247 				sg = sg_next_iter;
5248 			} else {
5249 				break;
5250 			}
5251 		}
5252 
5253 		ctl = le32_to_cpu(user_dma_pkt->ctl);
5254 		if (likely(dma_desc_cnt))
5255 			ctl &= ~GAUDI_PKT_CTL_EB_MASK;
5256 		ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5257 		new_dma_pkt->ctl = cpu_to_le32(ctl);
5258 		new_dma_pkt->tsize = cpu_to_le32(len);
5259 
5260 		if (dir == DMA_TO_DEVICE) {
5261 			new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
5262 			new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
5263 		} else {
5264 			new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
5265 			new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
5266 		}
5267 
5268 		if (!user_memset)
5269 			device_memory_addr += len;
5270 		dma_desc_cnt++;
5271 		new_dma_pkt++;
5272 	}
5273 
5274 	if (!dma_desc_cnt) {
5275 		dev_err(hdev->dev,
5276 			"Error of 0 SG entries when patching DMA packet\n");
5277 		return -EFAULT;
5278 	}
5279 
5280 	/* Fix the last dma packet - wrcomp must be as user set it */
5281 	new_dma_pkt--;
5282 	new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
5283 
5284 	*new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
5285 
5286 	return 0;
5287 }
5288 
gaudi_patch_cb(struct hl_device * hdev,struct hl_cs_parser * parser)5289 static int gaudi_patch_cb(struct hl_device *hdev,
5290 				struct hl_cs_parser *parser)
5291 {
5292 	u32 cb_parsed_length = 0;
5293 	u32 cb_patched_cur_length = 0;
5294 	int rc = 0;
5295 
5296 	/* cb_user_size is more than 0 so loop will always be executed */
5297 	while (cb_parsed_length < parser->user_cb_size) {
5298 		enum packet_id pkt_id;
5299 		u16 pkt_size;
5300 		u32 new_pkt_size = 0;
5301 		struct gaudi_packet *user_pkt, *kernel_pkt;
5302 
5303 		user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5304 		kernel_pkt = parser->patched_cb->kernel_address +
5305 					cb_patched_cur_length;
5306 
5307 		pkt_id = (enum packet_id) (
5308 				(le64_to_cpu(user_pkt->header) &
5309 				PACKET_HEADER_PACKET_ID_MASK) >>
5310 					PACKET_HEADER_PACKET_ID_SHIFT);
5311 
5312 		if (!validate_packet_id(pkt_id)) {
5313 			dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5314 			rc = -EINVAL;
5315 			break;
5316 		}
5317 
5318 		pkt_size = gaudi_packet_sizes[pkt_id];
5319 		cb_parsed_length += pkt_size;
5320 		if (cb_parsed_length > parser->user_cb_size) {
5321 			dev_err(hdev->dev,
5322 				"packet 0x%x is out of CB boundary\n", pkt_id);
5323 			rc = -EINVAL;
5324 			break;
5325 		}
5326 
5327 		switch (pkt_id) {
5328 		case PACKET_LIN_DMA:
5329 			rc = gaudi_patch_dma_packet(hdev, parser,
5330 					(struct packet_lin_dma *) user_pkt,
5331 					(struct packet_lin_dma *) kernel_pkt,
5332 					&new_pkt_size);
5333 			cb_patched_cur_length += new_pkt_size;
5334 			break;
5335 
5336 		case PACKET_MSG_PROT:
5337 			dev_err(hdev->dev,
5338 				"User not allowed to use MSG_PROT\n");
5339 			rc = -EPERM;
5340 			break;
5341 
5342 		case PACKET_CP_DMA:
5343 			dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5344 			rc = -EPERM;
5345 			break;
5346 
5347 		case PACKET_STOP:
5348 			dev_err(hdev->dev, "User not allowed to use STOP\n");
5349 			rc = -EPERM;
5350 			break;
5351 
5352 		case PACKET_WREG_32:
5353 		case PACKET_WREG_BULK:
5354 		case PACKET_MSG_LONG:
5355 		case PACKET_MSG_SHORT:
5356 		case PACKET_REPEAT:
5357 		case PACKET_FENCE:
5358 		case PACKET_NOP:
5359 		case PACKET_ARB_POINT:
5360 		case PACKET_LOAD_AND_EXE:
5361 			memcpy(kernel_pkt, user_pkt, pkt_size);
5362 			cb_patched_cur_length += pkt_size;
5363 			break;
5364 
5365 		default:
5366 			dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5367 				pkt_id);
5368 			rc = -EINVAL;
5369 			break;
5370 		}
5371 
5372 		if (rc)
5373 			break;
5374 	}
5375 
5376 	return rc;
5377 }
5378 
gaudi_parse_cb_mmu(struct hl_device * hdev,struct hl_cs_parser * parser)5379 static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5380 		struct hl_cs_parser *parser)
5381 {
5382 	u64 patched_cb_handle;
5383 	u32 patched_cb_size;
5384 	struct hl_cb *user_cb;
5385 	int rc;
5386 
5387 	/*
5388 	 * The new CB should have space at the end for two MSG_PROT pkt:
5389 	 * 1. A packet that will act as a completion packet
5390 	 * 2. A packet that will generate MSI interrupt
5391 	 */
5392 	if (parser->completion)
5393 		parser->patched_cb_size = parser->user_cb_size +
5394 				sizeof(struct packet_msg_prot) * 2;
5395 	else
5396 		parser->patched_cb_size = parser->user_cb_size;
5397 
5398 	rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
5399 				parser->patched_cb_size, false, false,
5400 				&patched_cb_handle);
5401 
5402 	if (rc) {
5403 		dev_err(hdev->dev,
5404 			"Failed to allocate patched CB for DMA CS %d\n",
5405 			rc);
5406 		return rc;
5407 	}
5408 
5409 	patched_cb_handle >>= PAGE_SHIFT;
5410 	parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5411 				(u32) patched_cb_handle);
5412 	/* hl_cb_get should never fail */
5413 	if (!parser->patched_cb) {
5414 		dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
5415 			(u32) patched_cb_handle);
5416 		rc = -EFAULT;
5417 		goto out;
5418 	}
5419 
5420 	/*
5421 	 * The check that parser->user_cb_size <= parser->user_cb->size was done
5422 	 * in validate_queue_index().
5423 	 */
5424 	memcpy(parser->patched_cb->kernel_address,
5425 		parser->user_cb->kernel_address,
5426 		parser->user_cb_size);
5427 
5428 	patched_cb_size = parser->patched_cb_size;
5429 
5430 	/* Validate patched CB instead of user CB */
5431 	user_cb = parser->user_cb;
5432 	parser->user_cb = parser->patched_cb;
5433 	rc = gaudi_validate_cb(hdev, parser, true);
5434 	parser->user_cb = user_cb;
5435 
5436 	if (rc) {
5437 		hl_cb_put(parser->patched_cb);
5438 		goto out;
5439 	}
5440 
5441 	if (patched_cb_size != parser->patched_cb_size) {
5442 		dev_err(hdev->dev, "user CB size mismatch\n");
5443 		hl_cb_put(parser->patched_cb);
5444 		rc = -EINVAL;
5445 		goto out;
5446 	}
5447 
5448 out:
5449 	/*
5450 	 * Always call cb destroy here because we still have 1 reference
5451 	 * to it by calling cb_get earlier. After the job will be completed,
5452 	 * cb_put will release it, but here we want to remove it from the
5453 	 * idr
5454 	 */
5455 	hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5456 					patched_cb_handle << PAGE_SHIFT);
5457 
5458 	return rc;
5459 }
5460 
gaudi_parse_cb_no_mmu(struct hl_device * hdev,struct hl_cs_parser * parser)5461 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5462 		struct hl_cs_parser *parser)
5463 {
5464 	u64 patched_cb_handle;
5465 	int rc;
5466 
5467 	rc = gaudi_validate_cb(hdev, parser, false);
5468 
5469 	if (rc)
5470 		goto free_userptr;
5471 
5472 	rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
5473 				parser->patched_cb_size, false, false,
5474 				&patched_cb_handle);
5475 	if (rc) {
5476 		dev_err(hdev->dev,
5477 			"Failed to allocate patched CB for DMA CS %d\n", rc);
5478 		goto free_userptr;
5479 	}
5480 
5481 	patched_cb_handle >>= PAGE_SHIFT;
5482 	parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5483 				(u32) patched_cb_handle);
5484 	/* hl_cb_get should never fail here */
5485 	if (!parser->patched_cb) {
5486 		dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
5487 				(u32) patched_cb_handle);
5488 		rc = -EFAULT;
5489 		goto out;
5490 	}
5491 
5492 	rc = gaudi_patch_cb(hdev, parser);
5493 
5494 	if (rc)
5495 		hl_cb_put(parser->patched_cb);
5496 
5497 out:
5498 	/*
5499 	 * Always call cb destroy here because we still have 1 reference
5500 	 * to it by calling cb_get earlier. After the job will be completed,
5501 	 * cb_put will release it, but here we want to remove it from the
5502 	 * idr
5503 	 */
5504 	hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5505 				patched_cb_handle << PAGE_SHIFT);
5506 
5507 free_userptr:
5508 	if (rc)
5509 		hl_userptr_delete_list(hdev, parser->job_userptr_list);
5510 	return rc;
5511 }
5512 
gaudi_parse_cb_no_ext_queue(struct hl_device * hdev,struct hl_cs_parser * parser)5513 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5514 					struct hl_cs_parser *parser)
5515 {
5516 	struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5517 	struct gaudi_device *gaudi = hdev->asic_specific;
5518 	u32 nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT +
5519 		((parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2));
5520 
5521 	if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5522 			(parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3) &&
5523 			(!(gaudi->hw_cap_initialized & nic_mask_q_id))) {
5524 		dev_err(hdev->dev, "h/w queue %d is disabled\n",
5525 				parser->hw_queue_id);
5526 		return -EINVAL;
5527 	}
5528 
5529 	/* For internal queue jobs just check if CB address is valid */
5530 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5531 					parser->user_cb_size,
5532 					asic_prop->sram_user_base_address,
5533 					asic_prop->sram_end_address))
5534 		return 0;
5535 
5536 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5537 					parser->user_cb_size,
5538 					asic_prop->dram_user_base_address,
5539 					asic_prop->dram_end_address))
5540 		return 0;
5541 
5542 	/* PMMU and HPMMU addresses are equal, check only one of them */
5543 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5544 					parser->user_cb_size,
5545 					asic_prop->pmmu.start_addr,
5546 					asic_prop->pmmu.end_addr))
5547 		return 0;
5548 
5549 	dev_err(hdev->dev,
5550 		"CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5551 		parser->user_cb, parser->user_cb_size);
5552 
5553 	return -EFAULT;
5554 }
5555 
gaudi_cs_parser(struct hl_device * hdev,struct hl_cs_parser * parser)5556 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5557 {
5558 	struct gaudi_device *gaudi = hdev->asic_specific;
5559 
5560 	if (parser->queue_type == QUEUE_TYPE_INT)
5561 		return gaudi_parse_cb_no_ext_queue(hdev, parser);
5562 
5563 	if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5564 		return gaudi_parse_cb_mmu(hdev, parser);
5565 	else
5566 		return gaudi_parse_cb_no_mmu(hdev, parser);
5567 }
5568 
gaudi_add_end_of_cb_packets(struct hl_device * hdev,void * kernel_address,u32 len,u64 cq_addr,u32 cq_val,u32 msi_vec,bool eb)5569 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
5570 					void *kernel_address, u32 len,
5571 					u64 cq_addr, u32 cq_val, u32 msi_vec,
5572 					bool eb)
5573 {
5574 	struct gaudi_device *gaudi = hdev->asic_specific;
5575 	struct packet_msg_prot *cq_pkt;
5576 	u32 tmp;
5577 
5578 	cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
5579 
5580 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5581 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5582 
5583 	if (eb)
5584 		tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5585 
5586 	cq_pkt->ctl = cpu_to_le32(tmp);
5587 	cq_pkt->value = cpu_to_le32(cq_val);
5588 	cq_pkt->addr = cpu_to_le64(cq_addr);
5589 
5590 	cq_pkt++;
5591 
5592 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5593 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5594 	cq_pkt->ctl = cpu_to_le32(tmp);
5595 	cq_pkt->value = cpu_to_le32(1);
5596 
5597 	if (!gaudi->multi_msi_mode)
5598 		msi_vec = 0;
5599 
5600 	cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_MSI_INTR_0 + msi_vec * 4);
5601 }
5602 
gaudi_update_eq_ci(struct hl_device * hdev,u32 val)5603 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5604 {
5605 	WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5606 }
5607 
gaudi_memset_device_memory(struct hl_device * hdev,u64 addr,u32 size,u64 val)5608 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5609 					u32 size, u64 val)
5610 {
5611 	struct packet_lin_dma *lin_dma_pkt;
5612 	struct hl_cs_job *job;
5613 	u32 cb_size, ctl, err_cause;
5614 	struct hl_cb *cb;
5615 	int rc;
5616 
5617 	cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
5618 	if (!cb)
5619 		return -EFAULT;
5620 
5621 	lin_dma_pkt = cb->kernel_address;
5622 	memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5623 	cb_size = sizeof(*lin_dma_pkt);
5624 
5625 	ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5626 	ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5627 	ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5628 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5629 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5630 
5631 	lin_dma_pkt->ctl = cpu_to_le32(ctl);
5632 	lin_dma_pkt->src_addr = cpu_to_le64(val);
5633 	lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5634 	lin_dma_pkt->tsize = cpu_to_le32(size);
5635 
5636 	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5637 	if (!job) {
5638 		dev_err(hdev->dev, "Failed to allocate a new job\n");
5639 		rc = -ENOMEM;
5640 		goto release_cb;
5641 	}
5642 
5643 	/* Verify DMA is OK */
5644 	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5645 	if (err_cause && !hdev->init_done) {
5646 		dev_dbg(hdev->dev,
5647 			"Clearing DMA0 engine from errors (cause 0x%x)\n",
5648 			err_cause);
5649 		WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5650 	}
5651 
5652 	job->id = 0;
5653 	job->user_cb = cb;
5654 	atomic_inc(&job->user_cb->cs_cnt);
5655 	job->user_cb_size = cb_size;
5656 	job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5657 	job->patched_cb = job->user_cb;
5658 	job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5659 
5660 	hl_debugfs_add_job(hdev, job);
5661 
5662 	rc = gaudi_send_job_on_qman0(hdev, job);
5663 	hl_debugfs_remove_job(hdev, job);
5664 	kfree(job);
5665 	atomic_dec(&cb->cs_cnt);
5666 
5667 	/* Verify DMA is OK */
5668 	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5669 	if (err_cause) {
5670 		dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5671 		rc = -EIO;
5672 		if (!hdev->init_done) {
5673 			dev_dbg(hdev->dev,
5674 				"Clearing DMA0 engine from errors (cause 0x%x)\n",
5675 				err_cause);
5676 			WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5677 		}
5678 	}
5679 
5680 release_cb:
5681 	hl_cb_put(cb);
5682 	hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
5683 
5684 	return rc;
5685 }
5686 
gaudi_memset_registers(struct hl_device * hdev,u64 reg_base,u32 num_regs,u32 val)5687 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
5688 					u32 num_regs, u32 val)
5689 {
5690 	struct packet_msg_long *pkt;
5691 	struct hl_cs_job *job;
5692 	u32 cb_size, ctl;
5693 	struct hl_cb *cb;
5694 	int i, rc;
5695 
5696 	cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
5697 
5698 	if (cb_size > SZ_2M) {
5699 		dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5700 		return -ENOMEM;
5701 	}
5702 
5703 	cb = hl_cb_kernel_create(hdev, cb_size, false);
5704 	if (!cb)
5705 		return -EFAULT;
5706 
5707 	pkt = cb->kernel_address;
5708 
5709 	ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5710 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5711 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5712 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5713 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5714 
5715 	for (i = 0; i < num_regs ; i++, pkt++) {
5716 		pkt->ctl = cpu_to_le32(ctl);
5717 		pkt->value = cpu_to_le32(val);
5718 		pkt->addr = cpu_to_le64(reg_base + (i * 4));
5719 	}
5720 
5721 	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5722 	if (!job) {
5723 		dev_err(hdev->dev, "Failed to allocate a new job\n");
5724 		rc = -ENOMEM;
5725 		goto release_cb;
5726 	}
5727 
5728 	job->id = 0;
5729 	job->user_cb = cb;
5730 	atomic_inc(&job->user_cb->cs_cnt);
5731 	job->user_cb_size = cb_size;
5732 	job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5733 	job->patched_cb = job->user_cb;
5734 	job->job_cb_size = cb_size;
5735 
5736 	hl_debugfs_add_job(hdev, job);
5737 
5738 	rc = gaudi_send_job_on_qman0(hdev, job);
5739 	hl_debugfs_remove_job(hdev, job);
5740 	kfree(job);
5741 	atomic_dec(&cb->cs_cnt);
5742 
5743 release_cb:
5744 	hl_cb_put(cb);
5745 	hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
5746 
5747 	return rc;
5748 }
5749 
gaudi_schedule_register_memset(struct hl_device * hdev,u32 hw_queue_id,u64 reg_base,u32 num_regs,u32 val)5750 static int gaudi_schedule_register_memset(struct hl_device *hdev,
5751 		u32 hw_queue_id, u64 reg_base, u32 num_regs, u32 val)
5752 {
5753 	struct hl_ctx *ctx;
5754 	struct hl_pending_cb *pending_cb;
5755 	struct packet_msg_long *pkt;
5756 	u32 cb_size, ctl;
5757 	struct hl_cb *cb;
5758 	int i, rc;
5759 
5760 	mutex_lock(&hdev->fpriv_list_lock);
5761 	ctx = hdev->compute_ctx;
5762 
5763 	/* If no compute context available or context is going down
5764 	 * memset registers directly
5765 	 */
5766 	if (!ctx || kref_read(&ctx->refcount) == 0) {
5767 		rc = gaudi_memset_registers(hdev, reg_base, num_regs, val);
5768 		mutex_unlock(&hdev->fpriv_list_lock);
5769 		return rc;
5770 	}
5771 
5772 	mutex_unlock(&hdev->fpriv_list_lock);
5773 
5774 	cb_size = (sizeof(*pkt) * num_regs) +
5775 			sizeof(struct packet_msg_prot) * 2;
5776 
5777 	if (cb_size > SZ_2M) {
5778 		dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5779 		return -ENOMEM;
5780 	}
5781 
5782 	pending_cb = kzalloc(sizeof(*pending_cb), GFP_KERNEL);
5783 	if (!pending_cb)
5784 		return -ENOMEM;
5785 
5786 	cb = hl_cb_kernel_create(hdev, cb_size, false);
5787 	if (!cb) {
5788 		kfree(pending_cb);
5789 		return -EFAULT;
5790 	}
5791 
5792 	pkt = cb->kernel_address;
5793 
5794 	ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5795 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5796 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5797 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5798 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5799 
5800 	for (i = 0; i < num_regs ; i++, pkt++) {
5801 		pkt->ctl = cpu_to_le32(ctl);
5802 		pkt->value = cpu_to_le32(val);
5803 		pkt->addr = cpu_to_le64(reg_base + (i * 4));
5804 	}
5805 
5806 	hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
5807 
5808 	pending_cb->cb = cb;
5809 	pending_cb->cb_size = cb_size;
5810 	/* The queue ID MUST be an external queue ID. Otherwise, we will
5811 	 * have undefined behavior
5812 	 */
5813 	pending_cb->hw_queue_id = hw_queue_id;
5814 
5815 	spin_lock(&ctx->pending_cb_lock);
5816 	list_add_tail(&pending_cb->cb_node, &ctx->pending_cb_list);
5817 	spin_unlock(&ctx->pending_cb_lock);
5818 
5819 	return 0;
5820 }
5821 
gaudi_restore_sm_registers(struct hl_device * hdev)5822 static int gaudi_restore_sm_registers(struct hl_device *hdev)
5823 {
5824 	u64 base_addr;
5825 	u32 num_regs;
5826 	int rc;
5827 
5828 	base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5829 	num_regs = NUM_OF_SOB_IN_BLOCK;
5830 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5831 	if (rc) {
5832 		dev_err(hdev->dev, "failed resetting SM registers");
5833 		return -ENOMEM;
5834 	}
5835 
5836 	base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
5837 	num_regs = NUM_OF_SOB_IN_BLOCK;
5838 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5839 	if (rc) {
5840 		dev_err(hdev->dev, "failed resetting SM registers");
5841 		return -ENOMEM;
5842 	}
5843 
5844 	base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5845 	num_regs = NUM_OF_SOB_IN_BLOCK;
5846 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5847 	if (rc) {
5848 		dev_err(hdev->dev, "failed resetting SM registers");
5849 		return -ENOMEM;
5850 	}
5851 
5852 	base_addr = CFG_BASE +  mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5853 	num_regs = NUM_OF_MONITORS_IN_BLOCK;
5854 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5855 	if (rc) {
5856 		dev_err(hdev->dev, "failed resetting SM registers");
5857 		return -ENOMEM;
5858 	}
5859 
5860 	base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
5861 	num_regs = NUM_OF_MONITORS_IN_BLOCK;
5862 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5863 	if (rc) {
5864 		dev_err(hdev->dev, "failed resetting SM registers");
5865 		return -ENOMEM;
5866 	}
5867 
5868 	base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5869 	num_regs = NUM_OF_MONITORS_IN_BLOCK;
5870 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5871 	if (rc) {
5872 		dev_err(hdev->dev, "failed resetting SM registers");
5873 		return -ENOMEM;
5874 	}
5875 
5876 	base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5877 			(GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
5878 	num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
5879 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5880 	if (rc) {
5881 		dev_err(hdev->dev, "failed resetting SM registers");
5882 		return -ENOMEM;
5883 	}
5884 
5885 	base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
5886 			(GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
5887 	num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
5888 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5889 	if (rc) {
5890 		dev_err(hdev->dev, "failed resetting SM registers");
5891 		return -ENOMEM;
5892 	}
5893 
5894 	return 0;
5895 }
5896 
gaudi_restore_dma_registers(struct hl_device * hdev)5897 static void gaudi_restore_dma_registers(struct hl_device *hdev)
5898 {
5899 	u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
5900 			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5901 	int i;
5902 
5903 	for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5904 		u64 sob_addr = CFG_BASE +
5905 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5906 				(i * sob_delta);
5907 		u32 dma_offset = i * DMA_CORE_OFFSET;
5908 
5909 		WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
5910 				lower_32_bits(sob_addr));
5911 		WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
5912 				upper_32_bits(sob_addr));
5913 		WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
5914 
5915 		/* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
5916 		 * modified by the user for SRAM reduction
5917 		 */
5918 		if (i > 1)
5919 			WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
5920 								0x00000001);
5921 	}
5922 }
5923 
gaudi_restore_qm_registers(struct hl_device * hdev)5924 static void gaudi_restore_qm_registers(struct hl_device *hdev)
5925 {
5926 	u32 qman_offset;
5927 	int i;
5928 
5929 	for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5930 		qman_offset = i * DMA_QMAN_OFFSET;
5931 		WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
5932 	}
5933 
5934 	for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
5935 		qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
5936 		WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
5937 	}
5938 
5939 	for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
5940 		qman_offset = i * TPC_QMAN_OFFSET;
5941 		WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
5942 	}
5943 
5944 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
5945 		qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
5946 				(i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
5947 		WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
5948 	}
5949 }
5950 
gaudi_restore_user_registers(struct hl_device * hdev)5951 static int gaudi_restore_user_registers(struct hl_device *hdev)
5952 {
5953 	int rc;
5954 
5955 	rc = gaudi_restore_sm_registers(hdev);
5956 	if (rc)
5957 		return rc;
5958 
5959 	gaudi_restore_dma_registers(hdev);
5960 	gaudi_restore_qm_registers(hdev);
5961 
5962 	return 0;
5963 }
5964 
gaudi_context_switch(struct hl_device * hdev,u32 asid)5965 static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
5966 {
5967 	return gaudi_restore_user_registers(hdev);
5968 }
5969 
gaudi_mmu_clear_pgt_range(struct hl_device * hdev)5970 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
5971 {
5972 	struct asic_fixed_properties *prop = &hdev->asic_prop;
5973 	struct gaudi_device *gaudi = hdev->asic_specific;
5974 	u64 addr = prop->mmu_pgt_addr;
5975 	u32 size = prop->mmu_pgt_size + MMU_CACHE_MNG_SIZE;
5976 
5977 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
5978 		return 0;
5979 
5980 	return gaudi_memset_device_memory(hdev, addr, size, 0);
5981 }
5982 
gaudi_restore_phase_topology(struct hl_device * hdev)5983 static void gaudi_restore_phase_topology(struct hl_device *hdev)
5984 {
5985 
5986 }
5987 
gaudi_debugfs_read32(struct hl_device * hdev,u64 addr,bool user_address,u32 * val)5988 static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr,
5989 			bool user_address, u32 *val)
5990 {
5991 	struct asic_fixed_properties *prop = &hdev->asic_prop;
5992 	struct gaudi_device *gaudi = hdev->asic_specific;
5993 	u64 hbm_bar_addr, host_phys_end;
5994 	int rc = 0;
5995 
5996 	host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
5997 
5998 	if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
5999 
6000 		if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6001 				(hdev->clock_gating_mask &
6002 						GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6003 
6004 			dev_err_ratelimited(hdev->dev,
6005 				"Can't read register - clock gating is enabled!\n");
6006 			rc = -EFAULT;
6007 		} else {
6008 			*val = RREG32(addr - CFG_BASE);
6009 		}
6010 
6011 	} else if ((addr >= SRAM_BASE_ADDR) &&
6012 			(addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
6013 		*val = readl(hdev->pcie_bar[SRAM_BAR_ID] +
6014 				(addr - SRAM_BASE_ADDR));
6015 	} else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
6016 		u64 bar_base_addr = DRAM_PHYS_BASE +
6017 				(addr & ~(prop->dram_pci_bar_size - 0x1ull));
6018 
6019 		hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6020 		if (hbm_bar_addr != U64_MAX) {
6021 			*val = readl(hdev->pcie_bar[HBM_BAR_ID] +
6022 						(addr - bar_base_addr));
6023 
6024 			hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6025 						hbm_bar_addr);
6026 		}
6027 		if (hbm_bar_addr == U64_MAX)
6028 			rc = -EIO;
6029 	} else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6030 			user_address && !iommu_present(&pci_bus_type)) {
6031 		*val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
6032 	} else {
6033 		rc = -EFAULT;
6034 	}
6035 
6036 	return rc;
6037 }
6038 
gaudi_debugfs_write32(struct hl_device * hdev,u64 addr,bool user_address,u32 val)6039 static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr,
6040 			bool user_address, u32 val)
6041 {
6042 	struct asic_fixed_properties *prop = &hdev->asic_prop;
6043 	struct gaudi_device *gaudi = hdev->asic_specific;
6044 	u64 hbm_bar_addr, host_phys_end;
6045 	int rc = 0;
6046 
6047 	host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6048 
6049 	if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
6050 
6051 		if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6052 				(hdev->clock_gating_mask &
6053 						GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6054 
6055 			dev_err_ratelimited(hdev->dev,
6056 				"Can't write register - clock gating is enabled!\n");
6057 			rc = -EFAULT;
6058 		} else {
6059 			WREG32(addr - CFG_BASE, val);
6060 		}
6061 
6062 	} else if ((addr >= SRAM_BASE_ADDR) &&
6063 			(addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
6064 		writel(val, hdev->pcie_bar[SRAM_BAR_ID] +
6065 					(addr - SRAM_BASE_ADDR));
6066 	} else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
6067 		u64 bar_base_addr = DRAM_PHYS_BASE +
6068 				(addr & ~(prop->dram_pci_bar_size - 0x1ull));
6069 
6070 		hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6071 		if (hbm_bar_addr != U64_MAX) {
6072 			writel(val, hdev->pcie_bar[HBM_BAR_ID] +
6073 						(addr - bar_base_addr));
6074 
6075 			hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6076 						hbm_bar_addr);
6077 		}
6078 		if (hbm_bar_addr == U64_MAX)
6079 			rc = -EIO;
6080 	} else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6081 			user_address && !iommu_present(&pci_bus_type)) {
6082 		*(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
6083 	} else {
6084 		rc = -EFAULT;
6085 	}
6086 
6087 	return rc;
6088 }
6089 
gaudi_debugfs_read64(struct hl_device * hdev,u64 addr,bool user_address,u64 * val)6090 static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr,
6091 				bool user_address, u64 *val)
6092 {
6093 	struct asic_fixed_properties *prop = &hdev->asic_prop;
6094 	struct gaudi_device *gaudi = hdev->asic_specific;
6095 	u64 hbm_bar_addr, host_phys_end;
6096 	int rc = 0;
6097 
6098 	host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6099 
6100 	if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
6101 
6102 		if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6103 				(hdev->clock_gating_mask &
6104 						GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6105 
6106 			dev_err_ratelimited(hdev->dev,
6107 				"Can't read register - clock gating is enabled!\n");
6108 			rc = -EFAULT;
6109 		} else {
6110 			u32 val_l = RREG32(addr - CFG_BASE);
6111 			u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
6112 
6113 			*val = (((u64) val_h) << 32) | val_l;
6114 		}
6115 
6116 	} else if ((addr >= SRAM_BASE_ADDR) &&
6117 		   (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
6118 		*val = readq(hdev->pcie_bar[SRAM_BAR_ID] +
6119 				(addr - SRAM_BASE_ADDR));
6120 	} else if (addr <=
6121 		    DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
6122 		u64 bar_base_addr = DRAM_PHYS_BASE +
6123 				(addr & ~(prop->dram_pci_bar_size - 0x1ull));
6124 
6125 		hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6126 		if (hbm_bar_addr != U64_MAX) {
6127 			*val = readq(hdev->pcie_bar[HBM_BAR_ID] +
6128 						(addr - bar_base_addr));
6129 
6130 			hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6131 						hbm_bar_addr);
6132 		}
6133 		if (hbm_bar_addr == U64_MAX)
6134 			rc = -EIO;
6135 	} else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6136 			user_address && !iommu_present(&pci_bus_type)) {
6137 		*val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
6138 	} else {
6139 		rc = -EFAULT;
6140 	}
6141 
6142 	return rc;
6143 }
6144 
gaudi_debugfs_write64(struct hl_device * hdev,u64 addr,bool user_address,u64 val)6145 static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr,
6146 				bool user_address, u64 val)
6147 {
6148 	struct asic_fixed_properties *prop = &hdev->asic_prop;
6149 	struct gaudi_device *gaudi = hdev->asic_specific;
6150 	u64 hbm_bar_addr, host_phys_end;
6151 	int rc = 0;
6152 
6153 	host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6154 
6155 	if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
6156 
6157 		if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6158 				(hdev->clock_gating_mask &
6159 						GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6160 
6161 			dev_err_ratelimited(hdev->dev,
6162 				"Can't write register - clock gating is enabled!\n");
6163 			rc = -EFAULT;
6164 		} else {
6165 			WREG32(addr - CFG_BASE, lower_32_bits(val));
6166 			WREG32(addr + sizeof(u32) - CFG_BASE,
6167 				upper_32_bits(val));
6168 		}
6169 
6170 	} else if ((addr >= SRAM_BASE_ADDR) &&
6171 		   (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
6172 		writeq(val, hdev->pcie_bar[SRAM_BAR_ID] +
6173 					(addr - SRAM_BASE_ADDR));
6174 	} else if (addr <=
6175 		    DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
6176 		u64 bar_base_addr = DRAM_PHYS_BASE +
6177 				(addr & ~(prop->dram_pci_bar_size - 0x1ull));
6178 
6179 		hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6180 		if (hbm_bar_addr != U64_MAX) {
6181 			writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6182 						(addr - bar_base_addr));
6183 
6184 			hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6185 						hbm_bar_addr);
6186 		}
6187 		if (hbm_bar_addr == U64_MAX)
6188 			rc = -EIO;
6189 	} else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6190 			user_address && !iommu_present(&pci_bus_type)) {
6191 		*(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
6192 	} else {
6193 		rc = -EFAULT;
6194 	}
6195 
6196 	return rc;
6197 }
6198 
gaudi_dma_core_transfer(struct hl_device * hdev,int dma_id,u64 addr,u32 size_to_dma,dma_addr_t dma_addr)6199 static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,
6200 					u32 size_to_dma, dma_addr_t dma_addr)
6201 {
6202 	u32 err_cause, val;
6203 	u64 dma_offset;
6204 	int rc;
6205 
6206 	dma_offset = dma_id * DMA_CORE_OFFSET;
6207 
6208 	WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr));
6209 	WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr));
6210 	WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr));
6211 	WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr));
6212 	WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma);
6213 	WREG32(mmDMA0_CORE_COMMIT + dma_offset,
6214 			(1 << DMA0_CORE_COMMIT_LIN_SHIFT));
6215 
6216 	rc = hl_poll_timeout(
6217 		hdev,
6218 		mmDMA0_CORE_STS0 + dma_offset,
6219 		val,
6220 		((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
6221 		0,
6222 		1000000);
6223 
6224 	if (rc) {
6225 		dev_err(hdev->dev,
6226 			"DMA %d timed-out during reading of 0x%llx\n",
6227 			dma_id, addr);
6228 		return -EIO;
6229 	}
6230 
6231 	/* Verify DMA is OK */
6232 	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6233 	if (err_cause) {
6234 		dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
6235 		dev_dbg(hdev->dev,
6236 			"Clearing DMA0 engine from errors (cause 0x%x)\n",
6237 			err_cause);
6238 		WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6239 
6240 		return -EIO;
6241 	}
6242 
6243 	return 0;
6244 }
6245 
gaudi_debugfs_read_dma(struct hl_device * hdev,u64 addr,u32 size,void * blob_addr)6246 static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
6247 				void *blob_addr)
6248 {
6249 	u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
6250 	struct gaudi_device *gaudi = hdev->asic_specific;
6251 	u64 dma_offset, qm_offset;
6252 	dma_addr_t dma_addr;
6253 	void *kernel_addr;
6254 	bool is_eng_idle;
6255 	int rc = 0, dma_id;
6256 
6257 	kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
6258 						hdev, SZ_2M,
6259 						&dma_addr,
6260 						GFP_KERNEL | __GFP_ZERO);
6261 
6262 	if (!kernel_addr)
6263 		return -ENOMEM;
6264 
6265 	mutex_lock(&gaudi->clk_gate_mutex);
6266 
6267 	hdev->asic_funcs->disable_clock_gating(hdev);
6268 
6269 	hdev->asic_funcs->hw_queues_lock(hdev);
6270 
6271 	dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
6272 	dma_offset = dma_id * DMA_CORE_OFFSET;
6273 	qm_offset = dma_id * DMA_QMAN_OFFSET;
6274 	dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6275 	is_eng_idle = IS_DMA_IDLE(dma_core_sts0);
6276 
6277 	if (!is_eng_idle) {
6278 		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
6279 		dma_offset = dma_id * DMA_CORE_OFFSET;
6280 		qm_offset = dma_id * DMA_QMAN_OFFSET;
6281 		dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6282 		is_eng_idle = IS_DMA_IDLE(dma_core_sts0);
6283 
6284 		if (!is_eng_idle) {
6285 			dev_err_ratelimited(hdev->dev,
6286 				"Can't read via DMA because it is BUSY\n");
6287 			rc = -EAGAIN;
6288 			goto out;
6289 		}
6290 	}
6291 
6292 	cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset);
6293 	WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset,
6294 			0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
6295 
6296 	/* TODO: remove this by mapping the DMA temporary buffer to the MMU
6297 	 * using the compute ctx ASID, if exists. If not, use the kernel ctx
6298 	 * ASID
6299 	 */
6300 	WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
6301 
6302 	/* Verify DMA is OK */
6303 	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6304 	if (err_cause) {
6305 		dev_dbg(hdev->dev,
6306 			"Clearing DMA0 engine from errors (cause 0x%x)\n",
6307 			err_cause);
6308 		WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6309 	}
6310 
6311 	pos = 0;
6312 	size_left = size;
6313 	size_to_dma = SZ_2M;
6314 
6315 	while (size_left > 0) {
6316 
6317 		if (size_left < SZ_2M)
6318 			size_to_dma = size_left;
6319 
6320 		rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma,
6321 						dma_addr);
6322 		if (rc)
6323 			break;
6324 
6325 		memcpy(blob_addr + pos, kernel_addr, size_to_dma);
6326 
6327 		if (size_left <= SZ_2M)
6328 			break;
6329 
6330 		pos += SZ_2M;
6331 		addr += SZ_2M;
6332 		size_left -= SZ_2M;
6333 	}
6334 
6335 	/* TODO: remove this by mapping the DMA temporary buffer to the MMU
6336 	 * using the compute ctx ASID, if exists. If not, use the kernel ctx
6337 	 * ASID
6338 	 */
6339 	WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6340 			~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6341 
6342 	WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1);
6343 
6344 out:
6345 	hdev->asic_funcs->hw_queues_unlock(hdev);
6346 
6347 	hdev->asic_funcs->set_clock_gating(hdev);
6348 
6349 	mutex_unlock(&gaudi->clk_gate_mutex);
6350 
6351 	hdev->asic_funcs->asic_dma_free_coherent(hdev, SZ_2M, kernel_addr,
6352 						dma_addr);
6353 
6354 	return rc;
6355 }
6356 
gaudi_read_pte(struct hl_device * hdev,u64 addr)6357 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
6358 {
6359 	struct gaudi_device *gaudi = hdev->asic_specific;
6360 
6361 	if (hdev->hard_reset_pending)
6362 		return U64_MAX;
6363 
6364 	return readq(hdev->pcie_bar[HBM_BAR_ID] +
6365 			(addr - gaudi->hbm_bar_cur_addr));
6366 }
6367 
gaudi_write_pte(struct hl_device * hdev,u64 addr,u64 val)6368 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
6369 {
6370 	struct gaudi_device *gaudi = hdev->asic_specific;
6371 
6372 	if (hdev->hard_reset_pending)
6373 		return;
6374 
6375 	writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6376 			(addr - gaudi->hbm_bar_cur_addr));
6377 }
6378 
gaudi_mmu_prepare_reg(struct hl_device * hdev,u64 reg,u32 asid)6379 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
6380 {
6381 	/* mask to zero the MMBP and ASID bits */
6382 	WREG32_AND(reg, ~0x7FF);
6383 	WREG32_OR(reg, asid);
6384 }
6385 
gaudi_mmu_prepare(struct hl_device * hdev,u32 asid)6386 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
6387 {
6388 	struct gaudi_device *gaudi = hdev->asic_specific;
6389 
6390 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6391 		return;
6392 
6393 	if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
6394 		dev_crit(hdev->dev, "asid %u is too big\n", asid);
6395 		return;
6396 	}
6397 
6398 	mutex_lock(&gaudi->clk_gate_mutex);
6399 
6400 	hdev->asic_funcs->disable_clock_gating(hdev);
6401 
6402 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6403 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6404 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6405 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6406 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6407 
6408 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6409 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6410 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6411 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6412 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6413 
6414 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6415 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6416 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6417 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6418 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6419 
6420 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6421 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6422 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6423 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6424 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6425 
6426 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6427 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6428 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6429 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6430 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6431 
6432 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6433 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6434 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6435 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6436 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6437 
6438 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6439 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6440 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6441 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6442 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6443 
6444 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6445 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6446 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6447 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6448 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6449 
6450 	gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
6451 	gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
6452 	gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
6453 	gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
6454 	gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
6455 	gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
6456 	gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
6457 	gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
6458 
6459 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6460 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6461 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6462 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6463 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6464 	gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
6465 	gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
6466 
6467 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6468 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6469 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6470 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6471 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6472 	gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
6473 	gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
6474 
6475 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6476 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6477 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6478 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6479 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6480 	gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
6481 	gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
6482 
6483 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6484 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6485 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6486 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6487 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6488 	gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
6489 	gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
6490 
6491 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6492 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6493 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6494 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6495 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6496 	gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
6497 	gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
6498 
6499 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6500 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6501 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6502 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6503 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6504 	gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
6505 	gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
6506 
6507 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6508 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6509 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6510 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6511 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6512 	gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
6513 	gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
6514 
6515 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6516 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6517 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6518 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6519 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6520 	gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
6521 	gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
6522 
6523 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6524 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6525 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6526 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6527 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6528 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6529 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6530 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6531 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6532 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6533 
6534 	gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
6535 	gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
6536 	gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
6537 	gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
6538 	gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
6539 	gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
6540 	gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
6541 	gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
6542 	gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
6543 	gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
6544 	gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
6545 	gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
6546 
6547 	if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC0) {
6548 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
6549 				asid);
6550 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
6551 				asid);
6552 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
6553 				asid);
6554 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
6555 				asid);
6556 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
6557 				asid);
6558 	}
6559 
6560 	if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC1) {
6561 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
6562 				asid);
6563 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
6564 				asid);
6565 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
6566 				asid);
6567 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
6568 				asid);
6569 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
6570 				asid);
6571 	}
6572 
6573 	if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC2) {
6574 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
6575 				asid);
6576 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
6577 				asid);
6578 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
6579 				asid);
6580 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
6581 				asid);
6582 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
6583 				asid);
6584 	}
6585 
6586 	if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC3) {
6587 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
6588 				asid);
6589 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
6590 				asid);
6591 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
6592 				asid);
6593 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
6594 				asid);
6595 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
6596 				asid);
6597 	}
6598 
6599 	if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC4) {
6600 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
6601 				asid);
6602 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
6603 				asid);
6604 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
6605 				asid);
6606 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
6607 				asid);
6608 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
6609 				asid);
6610 	}
6611 
6612 	if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC5) {
6613 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
6614 				asid);
6615 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
6616 				asid);
6617 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
6618 				asid);
6619 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
6620 				asid);
6621 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
6622 				asid);
6623 	}
6624 
6625 	if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC6) {
6626 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
6627 				asid);
6628 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
6629 				asid);
6630 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
6631 				asid);
6632 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
6633 				asid);
6634 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
6635 				asid);
6636 	}
6637 
6638 	if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC7) {
6639 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
6640 				asid);
6641 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
6642 				asid);
6643 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
6644 				asid);
6645 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
6646 				asid);
6647 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
6648 				asid);
6649 	}
6650 
6651 	if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC8) {
6652 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
6653 				asid);
6654 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
6655 				asid);
6656 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
6657 				asid);
6658 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
6659 				asid);
6660 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
6661 				asid);
6662 	}
6663 
6664 	if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC9) {
6665 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
6666 				asid);
6667 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
6668 				asid);
6669 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
6670 				asid);
6671 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
6672 				asid);
6673 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
6674 				asid);
6675 	}
6676 
6677 	hdev->asic_funcs->set_clock_gating(hdev);
6678 
6679 	mutex_unlock(&gaudi->clk_gate_mutex);
6680 }
6681 
gaudi_send_job_on_qman0(struct hl_device * hdev,struct hl_cs_job * job)6682 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6683 		struct hl_cs_job *job)
6684 {
6685 	struct packet_msg_prot *fence_pkt;
6686 	u32 *fence_ptr;
6687 	dma_addr_t fence_dma_addr;
6688 	struct hl_cb *cb;
6689 	u32 tmp, timeout, dma_offset;
6690 	int rc;
6691 
6692 	if (hdev->pldm)
6693 		timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6694 	else
6695 		timeout = HL_DEVICE_TIMEOUT_USEC;
6696 
6697 	if (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
6698 		dev_err_ratelimited(hdev->dev,
6699 			"Can't send driver job on QMAN0 because the device is not idle\n");
6700 		return -EBUSY;
6701 	}
6702 
6703 	fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
6704 							&fence_dma_addr);
6705 	if (!fence_ptr) {
6706 		dev_err(hdev->dev,
6707 			"Failed to allocate fence memory for QMAN0\n");
6708 		return -ENOMEM;
6709 	}
6710 
6711 	cb = job->patched_cb;
6712 
6713 	fence_pkt = cb->kernel_address +
6714 			job->job_cb_size - sizeof(struct packet_msg_prot);
6715 
6716 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6717 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6718 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6719 
6720 	fence_pkt->ctl = cpu_to_le32(tmp);
6721 	fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6722 	fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6723 
6724 	dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6725 
6726 	WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
6727 
6728 	rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
6729 					job->job_cb_size, cb->bus_address);
6730 	if (rc) {
6731 		dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6732 		goto free_fence_ptr;
6733 	}
6734 
6735 	rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6736 				(tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6737 				timeout, true);
6738 
6739 	hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
6740 
6741 	if (rc == -ETIMEDOUT) {
6742 		dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6743 		goto free_fence_ptr;
6744 	}
6745 
6746 free_fence_ptr:
6747 	WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6748 			~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6749 
6750 	hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
6751 					fence_dma_addr);
6752 	return rc;
6753 }
6754 
gaudi_get_event_desc(u16 event_type,char * desc,size_t size)6755 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6756 {
6757 	if (event_type >= GAUDI_EVENT_SIZE)
6758 		goto event_not_supported;
6759 
6760 	if (!gaudi_irq_map_table[event_type].valid)
6761 		goto event_not_supported;
6762 
6763 	snprintf(desc, size, gaudi_irq_map_table[event_type].name);
6764 
6765 	return;
6766 
6767 event_not_supported:
6768 	snprintf(desc, size, "N/A");
6769 }
6770 
gaudi_get_razwi_initiator_dma_name(struct hl_device * hdev,u32 x_y,bool is_write)6771 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev,
6772 							u32 x_y, bool is_write)
6773 {
6774 	u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6775 
6776 	mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6777 				DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6778 
6779 	switch (x_y) {
6780 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6781 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6782 		dma_id[0] = 0;
6783 		dma_id[1] = 2;
6784 		break;
6785 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6786 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6787 		dma_id[0] = 1;
6788 		dma_id[1] = 3;
6789 		break;
6790 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6791 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6792 		dma_id[0] = 4;
6793 		dma_id[1] = 6;
6794 		break;
6795 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6796 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6797 		dma_id[0] = 5;
6798 		dma_id[1] = 7;
6799 		break;
6800 	default:
6801 		goto unknown_initiator;
6802 	}
6803 
6804 	for (i = 0 ; i < 2 ; i++) {
6805 		dma_offset = dma_id[i] * DMA_CORE_OFFSET;
6806 		err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6807 	}
6808 
6809 	switch (x_y) {
6810 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6811 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6812 		if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6813 			return "DMA0";
6814 		else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6815 			return "DMA2";
6816 		else
6817 			return "DMA0 or DMA2";
6818 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6819 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6820 		if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6821 			return "DMA1";
6822 		else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6823 			return "DMA3";
6824 		else
6825 			return "DMA1 or DMA3";
6826 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6827 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6828 		if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6829 			return "DMA4";
6830 		else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6831 			return "DMA6";
6832 		else
6833 			return "DMA4 or DMA6";
6834 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6835 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6836 		if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6837 			return "DMA5";
6838 		else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6839 			return "DMA7";
6840 		else
6841 			return "DMA5 or DMA7";
6842 	}
6843 
6844 unknown_initiator:
6845 	return "unknown initiator";
6846 }
6847 
gaudi_get_razwi_initiator_name(struct hl_device * hdev,bool is_write)6848 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev,
6849 							bool is_write)
6850 {
6851 	u32 val, x_y, axi_id;
6852 
6853 	val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
6854 				RREG32(mmMMU_UP_RAZWI_READ_ID);
6855 	x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
6856 			(RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
6857 	axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
6858 			RAZWI_INITIATOR_AXI_ID_SHIFT);
6859 
6860 	switch (x_y) {
6861 	case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
6862 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
6863 			return "TPC0";
6864 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
6865 			return "NIC0";
6866 		break;
6867 	case RAZWI_INITIATOR_ID_X_Y_TPC1:
6868 		return "TPC1";
6869 	case RAZWI_INITIATOR_ID_X_Y_MME0_0:
6870 	case RAZWI_INITIATOR_ID_X_Y_MME0_1:
6871 		return "MME0";
6872 	case RAZWI_INITIATOR_ID_X_Y_MME1_0:
6873 	case RAZWI_INITIATOR_ID_X_Y_MME1_1:
6874 		return "MME1";
6875 	case RAZWI_INITIATOR_ID_X_Y_TPC2:
6876 		return "TPC2";
6877 	case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
6878 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
6879 			return "TPC3";
6880 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
6881 			return "PCI";
6882 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
6883 			return "CPU";
6884 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
6885 			return "PSOC";
6886 		break;
6887 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6888 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6889 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6890 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6891 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6892 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6893 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6894 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6895 		return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write);
6896 	case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
6897 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
6898 			return "TPC4";
6899 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
6900 			return "NIC1";
6901 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
6902 			return "NIC2";
6903 		break;
6904 	case RAZWI_INITIATOR_ID_X_Y_TPC5:
6905 		return "TPC5";
6906 	case RAZWI_INITIATOR_ID_X_Y_MME2_0:
6907 	case RAZWI_INITIATOR_ID_X_Y_MME2_1:
6908 		return "MME2";
6909 	case RAZWI_INITIATOR_ID_X_Y_MME3_0:
6910 	case RAZWI_INITIATOR_ID_X_Y_MME3_1:
6911 		return "MME3";
6912 	case RAZWI_INITIATOR_ID_X_Y_TPC6:
6913 		return "TPC6";
6914 	case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
6915 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
6916 			return "TPC7";
6917 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
6918 			return "NIC4";
6919 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
6920 			return "NIC5";
6921 		break;
6922 	default:
6923 		break;
6924 	}
6925 
6926 	dev_err(hdev->dev,
6927 		"Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
6928 		val,
6929 		(val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
6930 		(val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
6931 		(val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
6932 			RAZWI_INITIATOR_AXI_ID_MASK);
6933 
6934 	return "unknown initiator";
6935 }
6936 
gaudi_print_razwi_info(struct hl_device * hdev)6937 static void gaudi_print_razwi_info(struct hl_device *hdev)
6938 {
6939 	if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
6940 		dev_err_ratelimited(hdev->dev,
6941 			"RAZWI event caused by illegal write of %s\n",
6942 			gaudi_get_razwi_initiator_name(hdev, true));
6943 		WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
6944 	}
6945 
6946 	if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
6947 		dev_err_ratelimited(hdev->dev,
6948 			"RAZWI event caused by illegal read of %s\n",
6949 			gaudi_get_razwi_initiator_name(hdev, false));
6950 		WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
6951 	}
6952 }
6953 
gaudi_print_mmu_error_info(struct hl_device * hdev)6954 static void gaudi_print_mmu_error_info(struct hl_device *hdev)
6955 {
6956 	struct gaudi_device *gaudi = hdev->asic_specific;
6957 	u64 addr;
6958 	u32 val;
6959 
6960 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6961 		return;
6962 
6963 	val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
6964 	if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6965 		addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
6966 		addr <<= 32;
6967 		addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
6968 
6969 		dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
6970 					addr);
6971 
6972 		WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
6973 	}
6974 
6975 	val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
6976 	if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6977 		addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
6978 		addr <<= 32;
6979 		addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
6980 
6981 		dev_err_ratelimited(hdev->dev,
6982 				"MMU access error on va 0x%llx\n", addr);
6983 
6984 		WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
6985 	}
6986 }
6987 
6988 /*
6989  *  +-------------------+------------------------------------------------------+
6990  *  | Configuration Reg |                     Description                      |
6991  *  |      Address      |                                                      |
6992  *  +-------------------+------------------------------------------------------+
6993  *  |  0xF30 - 0xF3F    |ECC single error indication (1 bit per memory wrapper)|
6994  *  |                   |0xF30 memory wrappers 31:0 (MSB to LSB)               |
6995  *  |                   |0xF34 memory wrappers 63:32                           |
6996  *  |                   |0xF38 memory wrappers 95:64                           |
6997  *  |                   |0xF3C memory wrappers 127:96                          |
6998  *  +-------------------+------------------------------------------------------+
6999  *  |  0xF40 - 0xF4F    |ECC double error indication (1 bit per memory wrapper)|
7000  *  |                   |0xF40 memory wrappers 31:0 (MSB to LSB)               |
7001  *  |                   |0xF44 memory wrappers 63:32                           |
7002  *  |                   |0xF48 memory wrappers 95:64                           |
7003  *  |                   |0xF4C memory wrappers 127:96                          |
7004  *  +-------------------+------------------------------------------------------+
7005  */
gaudi_extract_ecc_info(struct hl_device * hdev,struct ecc_info_extract_params * params,u64 * ecc_address,u64 * ecc_syndrom,u8 * memory_wrapper_idx)7006 static int gaudi_extract_ecc_info(struct hl_device *hdev,
7007 		struct ecc_info_extract_params *params, u64 *ecc_address,
7008 		u64 *ecc_syndrom, u8 *memory_wrapper_idx)
7009 {
7010 	struct gaudi_device *gaudi = hdev->asic_specific;
7011 	u32 i, num_mem_regs, reg, err_bit;
7012 	u64 err_addr, err_word = 0;
7013 	int rc = 0;
7014 
7015 	num_mem_regs = params->num_memories / 32 +
7016 			((params->num_memories % 32) ? 1 : 0);
7017 
7018 	if (params->block_address >= CFG_BASE)
7019 		params->block_address -= CFG_BASE;
7020 
7021 	if (params->derr)
7022 		err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
7023 	else
7024 		err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
7025 
7026 	if (params->disable_clock_gating) {
7027 		mutex_lock(&gaudi->clk_gate_mutex);
7028 		hdev->asic_funcs->disable_clock_gating(hdev);
7029 	}
7030 
7031 	/* Set invalid wrapper index */
7032 	*memory_wrapper_idx = 0xFF;
7033 
7034 	/* Iterate through memory wrappers, a single bit must be set */
7035 	for (i = 0 ; i < num_mem_regs ; i++) {
7036 		err_addr += i * 4;
7037 		err_word = RREG32(err_addr);
7038 		if (err_word) {
7039 			err_bit = __ffs(err_word);
7040 			*memory_wrapper_idx = err_bit + (32 * i);
7041 			break;
7042 		}
7043 	}
7044 
7045 	if (*memory_wrapper_idx == 0xFF) {
7046 		dev_err(hdev->dev, "ECC error information cannot be found\n");
7047 		rc = -EINVAL;
7048 		goto enable_clk_gate;
7049 	}
7050 
7051 	WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
7052 			*memory_wrapper_idx);
7053 
7054 	*ecc_address =
7055 		RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
7056 	*ecc_syndrom =
7057 		RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
7058 
7059 	/* Clear error indication */
7060 	reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
7061 	if (params->derr)
7062 		reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
7063 	else
7064 		reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
7065 
7066 	WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
7067 
7068 enable_clk_gate:
7069 	if (params->disable_clock_gating) {
7070 		hdev->asic_funcs->set_clock_gating(hdev);
7071 
7072 		mutex_unlock(&gaudi->clk_gate_mutex);
7073 	}
7074 
7075 	return rc;
7076 }
7077 
gaudi_handle_qman_err_generic(struct hl_device * hdev,const char * qm_name,u64 glbl_sts_addr,u64 arb_err_addr)7078 static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
7079 					  const char *qm_name,
7080 					  u64 glbl_sts_addr,
7081 					  u64 arb_err_addr)
7082 {
7083 	u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
7084 	char reg_desc[32];
7085 
7086 	/* Iterate through all stream GLBL_STS1 registers + Lower CP */
7087 	for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
7088 		glbl_sts_clr_val = 0;
7089 		glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
7090 
7091 		if (!glbl_sts_val)
7092 			continue;
7093 
7094 		if (i == QMAN_STREAMS)
7095 			snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
7096 		else
7097 			snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
7098 
7099 		for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
7100 			if (glbl_sts_val & BIT(j)) {
7101 				dev_err_ratelimited(hdev->dev,
7102 						"%s %s. err cause: %s\n",
7103 						qm_name, reg_desc,
7104 						gaudi_qman_error_cause[j]);
7105 				glbl_sts_clr_val |= BIT(j);
7106 			}
7107 		}
7108 
7109 		/* Write 1 clear errors */
7110 		if (!hdev->stop_on_err)
7111 			WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
7112 	}
7113 
7114 	arb_err_val = RREG32(arb_err_addr);
7115 
7116 	if (!arb_err_val)
7117 		return;
7118 
7119 	for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
7120 		if (arb_err_val & BIT(j)) {
7121 			dev_err_ratelimited(hdev->dev,
7122 					"%s ARB_ERR. err cause: %s\n",
7123 					qm_name,
7124 					gaudi_qman_arb_error_cause[j]);
7125 		}
7126 	}
7127 }
7128 
gaudi_print_sm_sei_info(struct hl_device * hdev,u16 event_type,struct hl_eq_sm_sei_data * sei_data)7129 static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
7130 		struct hl_eq_sm_sei_data *sei_data)
7131 {
7132 	u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
7133 
7134 	switch (sei_data->sei_cause) {
7135 	case SM_SEI_SO_OVERFLOW:
7136 		dev_err(hdev->dev,
7137 			"SM %u SEI Error: SO %u overflow/underflow",
7138 			index, le32_to_cpu(sei_data->sei_log));
7139 		break;
7140 	case SM_SEI_LBW_4B_UNALIGNED:
7141 		dev_err(hdev->dev,
7142 			"SM %u SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
7143 			index, le32_to_cpu(sei_data->sei_log));
7144 		break;
7145 	case SM_SEI_AXI_RESPONSE_ERR:
7146 		dev_err(hdev->dev,
7147 			"SM %u SEI Error: AXI ID %u response error",
7148 			index, le32_to_cpu(sei_data->sei_log));
7149 		break;
7150 	default:
7151 		dev_err(hdev->dev, "Unknown SM SEI cause %u",
7152 				le32_to_cpu(sei_data->sei_log));
7153 		break;
7154 	}
7155 }
7156 
gaudi_handle_ecc_event(struct hl_device * hdev,u16 event_type,struct hl_eq_ecc_data * ecc_data)7157 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7158 		struct hl_eq_ecc_data *ecc_data)
7159 {
7160 	struct ecc_info_extract_params params;
7161 	u64 ecc_address = 0, ecc_syndrom = 0;
7162 	u8 index, memory_wrapper_idx = 0;
7163 	bool extract_info_from_fw;
7164 	int rc;
7165 
7166 	switch (event_type) {
7167 	case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
7168 	case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
7169 		extract_info_from_fw = true;
7170 		break;
7171 	case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7172 		index = event_type - GAUDI_EVENT_TPC0_SERR;
7173 		params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7174 		params.num_memories = 90;
7175 		params.derr = false;
7176 		params.disable_clock_gating = true;
7177 		extract_info_from_fw = false;
7178 		break;
7179 	case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7180 		index = event_type - GAUDI_EVENT_TPC0_DERR;
7181 		params.block_address =
7182 			mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7183 		params.num_memories = 90;
7184 		params.derr = true;
7185 		params.disable_clock_gating = true;
7186 		extract_info_from_fw = false;
7187 		break;
7188 	case GAUDI_EVENT_MME0_ACC_SERR:
7189 	case GAUDI_EVENT_MME1_ACC_SERR:
7190 	case GAUDI_EVENT_MME2_ACC_SERR:
7191 	case GAUDI_EVENT_MME3_ACC_SERR:
7192 		index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
7193 		params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7194 		params.num_memories = 128;
7195 		params.derr = false;
7196 		params.disable_clock_gating = true;
7197 		extract_info_from_fw = false;
7198 		break;
7199 	case GAUDI_EVENT_MME0_ACC_DERR:
7200 	case GAUDI_EVENT_MME1_ACC_DERR:
7201 	case GAUDI_EVENT_MME2_ACC_DERR:
7202 	case GAUDI_EVENT_MME3_ACC_DERR:
7203 		index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
7204 		params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7205 		params.num_memories = 128;
7206 		params.derr = true;
7207 		params.disable_clock_gating = true;
7208 		extract_info_from_fw = false;
7209 		break;
7210 	case GAUDI_EVENT_MME0_SBAB_SERR:
7211 	case GAUDI_EVENT_MME1_SBAB_SERR:
7212 	case GAUDI_EVENT_MME2_SBAB_SERR:
7213 	case GAUDI_EVENT_MME3_SBAB_SERR:
7214 		index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
7215 		params.block_address =
7216 			mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7217 		params.num_memories = 33;
7218 		params.derr = false;
7219 		params.disable_clock_gating = true;
7220 		extract_info_from_fw = false;
7221 		break;
7222 	case GAUDI_EVENT_MME0_SBAB_DERR:
7223 	case GAUDI_EVENT_MME1_SBAB_DERR:
7224 	case GAUDI_EVENT_MME2_SBAB_DERR:
7225 	case GAUDI_EVENT_MME3_SBAB_DERR:
7226 		index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
7227 		params.block_address =
7228 			mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7229 		params.num_memories = 33;
7230 		params.derr = true;
7231 		params.disable_clock_gating = true;
7232 		extract_info_from_fw = false;
7233 		break;
7234 	default:
7235 		return;
7236 	}
7237 
7238 	if (extract_info_from_fw) {
7239 		ecc_address = le64_to_cpu(ecc_data->ecc_address);
7240 		ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7241 		memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7242 	} else {
7243 		rc = gaudi_extract_ecc_info(hdev, &params, &ecc_address,
7244 				&ecc_syndrom, &memory_wrapper_idx);
7245 		if (rc)
7246 			return;
7247 	}
7248 
7249 	dev_err(hdev->dev,
7250 		"ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
7251 		ecc_address, ecc_syndrom, memory_wrapper_idx);
7252 }
7253 
gaudi_handle_qman_err(struct hl_device * hdev,u16 event_type)7254 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type)
7255 {
7256 	u64 glbl_sts_addr, arb_err_addr;
7257 	u8 index;
7258 	char desc[32];
7259 
7260 	switch (event_type) {
7261 	case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7262 		index = event_type - GAUDI_EVENT_TPC0_QM;
7263 		glbl_sts_addr =
7264 			mmTPC0_QM_GLBL_STS1_0 + index * TPC_QMAN_OFFSET;
7265 		arb_err_addr =
7266 			mmTPC0_QM_ARB_ERR_CAUSE + index * TPC_QMAN_OFFSET;
7267 		snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
7268 		break;
7269 	case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7270 		index = event_type - GAUDI_EVENT_MME0_QM;
7271 		glbl_sts_addr =
7272 			mmMME0_QM_GLBL_STS1_0 + index * MME_QMAN_OFFSET;
7273 		arb_err_addr =
7274 			mmMME0_QM_ARB_ERR_CAUSE + index * MME_QMAN_OFFSET;
7275 		snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
7276 		break;
7277 	case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7278 		index = event_type - GAUDI_EVENT_DMA0_QM;
7279 		glbl_sts_addr =
7280 			mmDMA0_QM_GLBL_STS1_0 + index * DMA_QMAN_OFFSET;
7281 		arb_err_addr =
7282 			mmDMA0_QM_ARB_ERR_CAUSE + index * DMA_QMAN_OFFSET;
7283 		snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
7284 		break;
7285 	case GAUDI_EVENT_NIC0_QM0:
7286 		glbl_sts_addr = mmNIC0_QM0_GLBL_STS1_0;
7287 		arb_err_addr = mmNIC0_QM0_ARB_ERR_CAUSE;
7288 		snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
7289 		break;
7290 	case GAUDI_EVENT_NIC0_QM1:
7291 		glbl_sts_addr = mmNIC0_QM1_GLBL_STS1_0;
7292 		arb_err_addr = mmNIC0_QM1_ARB_ERR_CAUSE;
7293 		snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
7294 		break;
7295 	case GAUDI_EVENT_NIC1_QM0:
7296 		glbl_sts_addr = mmNIC1_QM0_GLBL_STS1_0;
7297 		arb_err_addr = mmNIC1_QM0_ARB_ERR_CAUSE;
7298 		snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
7299 		break;
7300 	case GAUDI_EVENT_NIC1_QM1:
7301 		glbl_sts_addr = mmNIC1_QM1_GLBL_STS1_0;
7302 		arb_err_addr = mmNIC1_QM1_ARB_ERR_CAUSE;
7303 		snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
7304 		break;
7305 	case GAUDI_EVENT_NIC2_QM0:
7306 		glbl_sts_addr = mmNIC2_QM0_GLBL_STS1_0;
7307 		arb_err_addr = mmNIC2_QM0_ARB_ERR_CAUSE;
7308 		snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
7309 		break;
7310 	case GAUDI_EVENT_NIC2_QM1:
7311 		glbl_sts_addr = mmNIC2_QM1_GLBL_STS1_0;
7312 		arb_err_addr = mmNIC2_QM1_ARB_ERR_CAUSE;
7313 		snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
7314 		break;
7315 	case GAUDI_EVENT_NIC3_QM0:
7316 		glbl_sts_addr = mmNIC3_QM0_GLBL_STS1_0;
7317 		arb_err_addr = mmNIC3_QM0_ARB_ERR_CAUSE;
7318 		snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
7319 		break;
7320 	case GAUDI_EVENT_NIC3_QM1:
7321 		glbl_sts_addr = mmNIC3_QM1_GLBL_STS1_0;
7322 		arb_err_addr = mmNIC3_QM1_ARB_ERR_CAUSE;
7323 		snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
7324 		break;
7325 	case GAUDI_EVENT_NIC4_QM0:
7326 		glbl_sts_addr = mmNIC4_QM0_GLBL_STS1_0;
7327 		arb_err_addr = mmNIC4_QM0_ARB_ERR_CAUSE;
7328 		snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
7329 		break;
7330 	case GAUDI_EVENT_NIC4_QM1:
7331 		glbl_sts_addr = mmNIC4_QM1_GLBL_STS1_0;
7332 		arb_err_addr = mmNIC4_QM1_ARB_ERR_CAUSE;
7333 		snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
7334 		break;
7335 	default:
7336 		return;
7337 	}
7338 
7339 	gaudi_handle_qman_err_generic(hdev, desc, glbl_sts_addr, arb_err_addr);
7340 }
7341 
gaudi_print_irq_info(struct hl_device * hdev,u16 event_type,bool razwi)7342 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
7343 					bool razwi)
7344 {
7345 	char desc[64] = "";
7346 
7347 	gaudi_get_event_desc(event_type, desc, sizeof(desc));
7348 	dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7349 		event_type, desc);
7350 
7351 	if (razwi) {
7352 		gaudi_print_razwi_info(hdev);
7353 		gaudi_print_mmu_error_info(hdev);
7354 	}
7355 }
7356 
gaudi_print_out_of_sync_info(struct hl_device * hdev,struct cpucp_pkt_sync_err * sync_err)7357 static void gaudi_print_out_of_sync_info(struct hl_device *hdev,
7358 					struct cpucp_pkt_sync_err *sync_err)
7359 {
7360 	struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
7361 
7362 	dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%u\n",
7363 			sync_err->pi, sync_err->ci, q->pi, atomic_read(&q->ci));
7364 }
7365 
gaudi_soft_reset_late_init(struct hl_device * hdev)7366 static int gaudi_soft_reset_late_init(struct hl_device *hdev)
7367 {
7368 	struct gaudi_device *gaudi = hdev->asic_specific;
7369 
7370 	/* Unmask all IRQs since some could have been received
7371 	 * during the soft reset
7372 	 */
7373 	return hl_fw_unmask_irq_arr(hdev, gaudi->events, sizeof(gaudi->events));
7374 }
7375 
gaudi_hbm_read_interrupts(struct hl_device * hdev,int device,struct hl_eq_hbm_ecc_data * hbm_ecc_data)7376 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
7377 			struct hl_eq_hbm_ecc_data *hbm_ecc_data)
7378 {
7379 	u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
7380 	int err = 0;
7381 
7382 	if (hdev->asic_prop.fw_security_status_valid &&
7383 			(hdev->asic_prop.fw_app_security_map &
7384 				CPU_BOOT_DEV_STS0_HBM_ECC_EN)) {
7385 		if (!hbm_ecc_data) {
7386 			dev_err(hdev->dev, "No FW ECC data");
7387 			return 0;
7388 		}
7389 
7390 		wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
7391 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7392 		rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
7393 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7394 		ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
7395 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7396 		derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
7397 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7398 		serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
7399 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7400 		type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
7401 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7402 		ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
7403 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7404 
7405 		dev_err(hdev->dev,
7406 			"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7407 			device, ch, wr_par, rd_par, ca_par, serr, derr);
7408 		dev_err(hdev->dev,
7409 			"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
7410 			device, ch, hbm_ecc_data->first_addr, type,
7411 			hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,
7412 			hbm_ecc_data->dec_cnt);
7413 
7414 		err = 1;
7415 
7416 		return 0;
7417 	}
7418 
7419 	if (!hdev->asic_prop.fw_security_disabled) {
7420 		dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
7421 		return 0;
7422 	}
7423 
7424 	base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
7425 	for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
7426 		val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
7427 		val = (val & 0xFF) | ((val >> 8) & 0xFF);
7428 		if (val) {
7429 			err = 1;
7430 			dev_err(hdev->dev,
7431 				"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7432 				device, ch * 2, val & 0x1, (val >> 1) & 0x1,
7433 				(val >> 2) & 0x1, (val >> 3) & 0x1,
7434 				(val >> 4) & 0x1);
7435 
7436 			val2 = RREG32(base + ch * 0x1000 + 0x060);
7437 			dev_err(hdev->dev,
7438 				"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7439 				device, ch * 2,
7440 				RREG32(base + ch * 0x1000 + 0x064),
7441 				(val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7442 				(val2 & 0xFF0000) >> 16,
7443 				(val2 & 0xFF000000) >> 24);
7444 		}
7445 
7446 		val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
7447 		val = (val & 0xFF) | ((val >> 8) & 0xFF);
7448 		if (val) {
7449 			err = 1;
7450 			dev_err(hdev->dev,
7451 				"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7452 				device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
7453 				(val >> 2) & 0x1, (val >> 3) & 0x1,
7454 				(val >> 4) & 0x1);
7455 
7456 			val2 = RREG32(base + ch * 0x1000 + 0x070);
7457 			dev_err(hdev->dev,
7458 				"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7459 				device, ch * 2 + 1,
7460 				RREG32(base + ch * 0x1000 + 0x074),
7461 				(val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7462 				(val2 & 0xFF0000) >> 16,
7463 				(val2 & 0xFF000000) >> 24);
7464 		}
7465 
7466 		/* Clear interrupts */
7467 		RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
7468 		RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
7469 		WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
7470 		WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
7471 		RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
7472 		RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
7473 	}
7474 
7475 	val  = RREG32(base + 0x8F30);
7476 	val2 = RREG32(base + 0x8F34);
7477 	if (val | val2) {
7478 		err = 1;
7479 		dev_err(hdev->dev,
7480 			"HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
7481 			device, val, val2);
7482 	}
7483 	val  = RREG32(base + 0x8F40);
7484 	val2 = RREG32(base + 0x8F44);
7485 	if (val | val2) {
7486 		err = 1;
7487 		dev_err(hdev->dev,
7488 			"HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
7489 			device, val, val2);
7490 	}
7491 
7492 	return err;
7493 }
7494 
gaudi_hbm_event_to_dev(u16 hbm_event_type)7495 static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
7496 {
7497 	switch (hbm_event_type) {
7498 	case GAUDI_EVENT_HBM0_SPI_0:
7499 	case GAUDI_EVENT_HBM0_SPI_1:
7500 		return 0;
7501 	case GAUDI_EVENT_HBM1_SPI_0:
7502 	case GAUDI_EVENT_HBM1_SPI_1:
7503 		return 1;
7504 	case GAUDI_EVENT_HBM2_SPI_0:
7505 	case GAUDI_EVENT_HBM2_SPI_1:
7506 		return 2;
7507 	case GAUDI_EVENT_HBM3_SPI_0:
7508 	case GAUDI_EVENT_HBM3_SPI_1:
7509 		return 3;
7510 	default:
7511 		break;
7512 	}
7513 
7514 	/* Should never happen */
7515 	return 0;
7516 }
7517 
gaudi_tpc_read_interrupts(struct hl_device * hdev,u8 tpc_id,char * interrupt_name)7518 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
7519 					char *interrupt_name)
7520 {
7521 	struct gaudi_device *gaudi = hdev->asic_specific;
7522 	u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
7523 	bool soft_reset_required = false;
7524 
7525 	/* Accessing the TPC_INTR_CAUSE registers requires disabling the clock
7526 	 * gating, and thus cannot be done in CPU-CP and should be done instead
7527 	 * by the driver.
7528 	 */
7529 
7530 	mutex_lock(&gaudi->clk_gate_mutex);
7531 
7532 	hdev->asic_funcs->disable_clock_gating(hdev);
7533 
7534 	tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
7535 				TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
7536 
7537 	for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
7538 		if (tpc_interrupts_cause & BIT(i)) {
7539 			dev_err_ratelimited(hdev->dev,
7540 					"TPC%d_%s interrupt cause: %s\n",
7541 					tpc_id, interrupt_name,
7542 					gaudi_tpc_interrupts_cause[i]);
7543 			/* If this is QM error, we need to soft-reset */
7544 			if (i == 15)
7545 				soft_reset_required = true;
7546 		}
7547 
7548 	/* Clear interrupts */
7549 	WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
7550 
7551 	hdev->asic_funcs->set_clock_gating(hdev);
7552 
7553 	mutex_unlock(&gaudi->clk_gate_mutex);
7554 
7555 	return soft_reset_required;
7556 }
7557 
tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)7558 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
7559 {
7560 	return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
7561 }
7562 
tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)7563 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
7564 {
7565 	return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
7566 }
7567 
gaudi_print_clk_change_info(struct hl_device * hdev,u16 event_type)7568 static void gaudi_print_clk_change_info(struct hl_device *hdev,
7569 					u16 event_type)
7570 {
7571 	switch (event_type) {
7572 	case GAUDI_EVENT_FIX_POWER_ENV_S:
7573 		hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER;
7574 		dev_info_ratelimited(hdev->dev,
7575 			"Clock throttling due to power consumption\n");
7576 		break;
7577 
7578 	case GAUDI_EVENT_FIX_POWER_ENV_E:
7579 		hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER;
7580 		dev_info_ratelimited(hdev->dev,
7581 			"Power envelop is safe, back to optimal clock\n");
7582 		break;
7583 
7584 	case GAUDI_EVENT_FIX_THERMAL_ENV_S:
7585 		hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL;
7586 		dev_info_ratelimited(hdev->dev,
7587 			"Clock throttling due to overheating\n");
7588 		break;
7589 
7590 	case GAUDI_EVENT_FIX_THERMAL_ENV_E:
7591 		hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL;
7592 		dev_info_ratelimited(hdev->dev,
7593 			"Thermal envelop is safe, back to optimal clock\n");
7594 		break;
7595 
7596 	default:
7597 		dev_err(hdev->dev, "Received invalid clock change event %d\n",
7598 			event_type);
7599 		break;
7600 	}
7601 }
7602 
gaudi_handle_eqe(struct hl_device * hdev,struct hl_eq_entry * eq_entry)7603 static void gaudi_handle_eqe(struct hl_device *hdev,
7604 				struct hl_eq_entry *eq_entry)
7605 {
7606 	struct gaudi_device *gaudi = hdev->asic_specific;
7607 	u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
7608 	u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
7609 			>> EQ_CTL_EVENT_TYPE_SHIFT);
7610 	u8 cause;
7611 	bool reset_required;
7612 
7613 	gaudi->events_stat[event_type]++;
7614 	gaudi->events_stat_aggregate[event_type]++;
7615 
7616 	switch (event_type) {
7617 	case GAUDI_EVENT_PCIE_CORE_DERR:
7618 	case GAUDI_EVENT_PCIE_IF_DERR:
7619 	case GAUDI_EVENT_PCIE_PHY_DERR:
7620 	case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7621 	case GAUDI_EVENT_MME0_ACC_DERR:
7622 	case GAUDI_EVENT_MME0_SBAB_DERR:
7623 	case GAUDI_EVENT_MME1_ACC_DERR:
7624 	case GAUDI_EVENT_MME1_SBAB_DERR:
7625 	case GAUDI_EVENT_MME2_ACC_DERR:
7626 	case GAUDI_EVENT_MME2_SBAB_DERR:
7627 	case GAUDI_EVENT_MME3_ACC_DERR:
7628 	case GAUDI_EVENT_MME3_SBAB_DERR:
7629 	case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
7630 		fallthrough;
7631 	case GAUDI_EVENT_CPU_IF_ECC_DERR:
7632 	case GAUDI_EVENT_PSOC_MEM_DERR:
7633 	case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
7634 	case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
7635 	case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
7636 	case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
7637 	case GAUDI_EVENT_MMU_DERR:
7638 		gaudi_print_irq_info(hdev, event_type, true);
7639 		gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7640 		goto reset_device;
7641 
7642 	case GAUDI_EVENT_GIC500:
7643 	case GAUDI_EVENT_AXI_ECC:
7644 	case GAUDI_EVENT_L2_RAM_ECC:
7645 	case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
7646 		gaudi_print_irq_info(hdev, event_type, false);
7647 		goto reset_device;
7648 
7649 	case GAUDI_EVENT_HBM0_SPI_0:
7650 	case GAUDI_EVENT_HBM1_SPI_0:
7651 	case GAUDI_EVENT_HBM2_SPI_0:
7652 	case GAUDI_EVENT_HBM3_SPI_0:
7653 		gaudi_print_irq_info(hdev, event_type, false);
7654 		gaudi_hbm_read_interrupts(hdev,
7655 				gaudi_hbm_event_to_dev(event_type),
7656 				&eq_entry->hbm_ecc_data);
7657 		goto reset_device;
7658 
7659 	case GAUDI_EVENT_HBM0_SPI_1:
7660 	case GAUDI_EVENT_HBM1_SPI_1:
7661 	case GAUDI_EVENT_HBM2_SPI_1:
7662 	case GAUDI_EVENT_HBM3_SPI_1:
7663 		gaudi_print_irq_info(hdev, event_type, false);
7664 		gaudi_hbm_read_interrupts(hdev,
7665 				gaudi_hbm_event_to_dev(event_type),
7666 				&eq_entry->hbm_ecc_data);
7667 		hl_fw_unmask_irq(hdev, event_type);
7668 		break;
7669 
7670 	case GAUDI_EVENT_TPC0_DEC:
7671 	case GAUDI_EVENT_TPC1_DEC:
7672 	case GAUDI_EVENT_TPC2_DEC:
7673 	case GAUDI_EVENT_TPC3_DEC:
7674 	case GAUDI_EVENT_TPC4_DEC:
7675 	case GAUDI_EVENT_TPC5_DEC:
7676 	case GAUDI_EVENT_TPC6_DEC:
7677 	case GAUDI_EVENT_TPC7_DEC:
7678 		gaudi_print_irq_info(hdev, event_type, true);
7679 		reset_required = gaudi_tpc_read_interrupts(hdev,
7680 					tpc_dec_event_to_tpc_id(event_type),
7681 					"AXI_SLV_DEC_Error");
7682 		if (reset_required) {
7683 			dev_err(hdev->dev, "hard reset required due to %s\n",
7684 				gaudi_irq_map_table[event_type].name);
7685 
7686 			goto reset_device;
7687 		} else {
7688 			hl_fw_unmask_irq(hdev, event_type);
7689 		}
7690 		break;
7691 
7692 	case GAUDI_EVENT_TPC0_KRN_ERR:
7693 	case GAUDI_EVENT_TPC1_KRN_ERR:
7694 	case GAUDI_EVENT_TPC2_KRN_ERR:
7695 	case GAUDI_EVENT_TPC3_KRN_ERR:
7696 	case GAUDI_EVENT_TPC4_KRN_ERR:
7697 	case GAUDI_EVENT_TPC5_KRN_ERR:
7698 	case GAUDI_EVENT_TPC6_KRN_ERR:
7699 	case GAUDI_EVENT_TPC7_KRN_ERR:
7700 		gaudi_print_irq_info(hdev, event_type, true);
7701 		reset_required = gaudi_tpc_read_interrupts(hdev,
7702 					tpc_krn_event_to_tpc_id(event_type),
7703 					"KRN_ERR");
7704 		if (reset_required) {
7705 			dev_err(hdev->dev, "hard reset required due to %s\n",
7706 				gaudi_irq_map_table[event_type].name);
7707 
7708 			goto reset_device;
7709 		} else {
7710 			hl_fw_unmask_irq(hdev, event_type);
7711 		}
7712 		break;
7713 
7714 	case GAUDI_EVENT_PCIE_CORE_SERR:
7715 	case GAUDI_EVENT_PCIE_IF_SERR:
7716 	case GAUDI_EVENT_PCIE_PHY_SERR:
7717 	case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7718 	case GAUDI_EVENT_MME0_ACC_SERR:
7719 	case GAUDI_EVENT_MME0_SBAB_SERR:
7720 	case GAUDI_EVENT_MME1_ACC_SERR:
7721 	case GAUDI_EVENT_MME1_SBAB_SERR:
7722 	case GAUDI_EVENT_MME2_ACC_SERR:
7723 	case GAUDI_EVENT_MME2_SBAB_SERR:
7724 	case GAUDI_EVENT_MME3_ACC_SERR:
7725 	case GAUDI_EVENT_MME3_SBAB_SERR:
7726 	case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
7727 	case GAUDI_EVENT_CPU_IF_ECC_SERR:
7728 	case GAUDI_EVENT_PSOC_MEM_SERR:
7729 	case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
7730 	case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
7731 	case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
7732 	case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
7733 		fallthrough;
7734 	case GAUDI_EVENT_MMU_SERR:
7735 		gaudi_print_irq_info(hdev, event_type, true);
7736 		gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7737 		hl_fw_unmask_irq(hdev, event_type);
7738 		break;
7739 
7740 	case GAUDI_EVENT_PCIE_DEC:
7741 	case GAUDI_EVENT_MME0_WBC_RSP:
7742 	case GAUDI_EVENT_MME0_SBAB0_RSP:
7743 	case GAUDI_EVENT_MME1_WBC_RSP:
7744 	case GAUDI_EVENT_MME1_SBAB0_RSP:
7745 	case GAUDI_EVENT_MME2_WBC_RSP:
7746 	case GAUDI_EVENT_MME2_SBAB0_RSP:
7747 	case GAUDI_EVENT_MME3_WBC_RSP:
7748 	case GAUDI_EVENT_MME3_SBAB0_RSP:
7749 	case GAUDI_EVENT_CPU_AXI_SPLITTER:
7750 	case GAUDI_EVENT_PSOC_AXI_DEC:
7751 	case GAUDI_EVENT_PSOC_PRSTN_FALL:
7752 	case GAUDI_EVENT_MMU_PAGE_FAULT:
7753 	case GAUDI_EVENT_MMU_WR_PERM:
7754 	case GAUDI_EVENT_RAZWI_OR_ADC:
7755 	case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7756 	case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7757 	case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7758 		fallthrough;
7759 	case GAUDI_EVENT_NIC0_QM0:
7760 	case GAUDI_EVENT_NIC0_QM1:
7761 	case GAUDI_EVENT_NIC1_QM0:
7762 	case GAUDI_EVENT_NIC1_QM1:
7763 	case GAUDI_EVENT_NIC2_QM0:
7764 	case GAUDI_EVENT_NIC2_QM1:
7765 	case GAUDI_EVENT_NIC3_QM0:
7766 	case GAUDI_EVENT_NIC3_QM1:
7767 	case GAUDI_EVENT_NIC4_QM0:
7768 	case GAUDI_EVENT_NIC4_QM1:
7769 	case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
7770 		gaudi_print_irq_info(hdev, event_type, true);
7771 		gaudi_handle_qman_err(hdev, event_type);
7772 		hl_fw_unmask_irq(hdev, event_type);
7773 		break;
7774 
7775 	case GAUDI_EVENT_RAZWI_OR_ADC_SW:
7776 		gaudi_print_irq_info(hdev, event_type, true);
7777 		goto reset_device;
7778 
7779 	case GAUDI_EVENT_TPC0_BMON_SPMU:
7780 	case GAUDI_EVENT_TPC1_BMON_SPMU:
7781 	case GAUDI_EVENT_TPC2_BMON_SPMU:
7782 	case GAUDI_EVENT_TPC3_BMON_SPMU:
7783 	case GAUDI_EVENT_TPC4_BMON_SPMU:
7784 	case GAUDI_EVENT_TPC5_BMON_SPMU:
7785 	case GAUDI_EVENT_TPC6_BMON_SPMU:
7786 	case GAUDI_EVENT_TPC7_BMON_SPMU:
7787 	case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
7788 		gaudi_print_irq_info(hdev, event_type, false);
7789 		hl_fw_unmask_irq(hdev, event_type);
7790 		break;
7791 
7792 	case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
7793 		gaudi_print_irq_info(hdev, event_type, false);
7794 		gaudi_print_sm_sei_info(hdev, event_type,
7795 					&eq_entry->sm_sei_data);
7796 		hl_fw_unmask_irq(hdev, event_type);
7797 		break;
7798 
7799 	case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
7800 		gaudi_print_clk_change_info(hdev, event_type);
7801 		hl_fw_unmask_irq(hdev, event_type);
7802 		break;
7803 
7804 	case GAUDI_EVENT_PSOC_GPIO_U16_0:
7805 		cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
7806 		dev_err(hdev->dev,
7807 			"Received high temp H/W interrupt %d (cause %d)\n",
7808 			event_type, cause);
7809 		break;
7810 
7811 	case GAUDI_EVENT_DEV_RESET_REQ:
7812 		gaudi_print_irq_info(hdev, event_type, false);
7813 		goto reset_device;
7814 
7815 	case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
7816 		gaudi_print_irq_info(hdev, event_type, false);
7817 		gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
7818 		goto reset_device;
7819 
7820 	default:
7821 		dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
7822 				event_type);
7823 		break;
7824 	}
7825 
7826 	return;
7827 
7828 reset_device:
7829 	if (hdev->hard_reset_on_fw_events)
7830 		hl_device_reset(hdev, HL_RESET_HARD);
7831 	else
7832 		hl_fw_unmask_irq(hdev, event_type);
7833 }
7834 
gaudi_get_events_stat(struct hl_device * hdev,bool aggregate,u32 * size)7835 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate,
7836 					u32 *size)
7837 {
7838 	struct gaudi_device *gaudi = hdev->asic_specific;
7839 
7840 	if (aggregate) {
7841 		*size = (u32) sizeof(gaudi->events_stat_aggregate);
7842 		return gaudi->events_stat_aggregate;
7843 	}
7844 
7845 	*size = (u32) sizeof(gaudi->events_stat);
7846 	return gaudi->events_stat;
7847 }
7848 
gaudi_mmu_invalidate_cache(struct hl_device * hdev,bool is_hard,u32 flags)7849 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
7850 					u32 flags)
7851 {
7852 	struct gaudi_device *gaudi = hdev->asic_specific;
7853 	u32 status, timeout_usec;
7854 	int rc;
7855 
7856 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
7857 		hdev->hard_reset_pending)
7858 		return 0;
7859 
7860 	if (hdev->pldm)
7861 		timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7862 	else
7863 		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7864 
7865 	/* L0 & L1 invalidation */
7866 	WREG32(mmSTLB_INV_PS, 3);
7867 	WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
7868 	WREG32(mmSTLB_INV_PS, 2);
7869 
7870 	rc = hl_poll_timeout(
7871 		hdev,
7872 		mmSTLB_INV_PS,
7873 		status,
7874 		!status,
7875 		1000,
7876 		timeout_usec);
7877 
7878 	WREG32(mmSTLB_INV_SET, 0);
7879 
7880 	if (rc) {
7881 		dev_err_ratelimited(hdev->dev,
7882 					"MMU cache invalidation timeout\n");
7883 		hl_device_reset(hdev, HL_RESET_HARD);
7884 	}
7885 
7886 	return rc;
7887 }
7888 
gaudi_mmu_invalidate_cache_range(struct hl_device * hdev,bool is_hard,u32 asid,u64 va,u64 size)7889 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
7890 				bool is_hard, u32 asid, u64 va, u64 size)
7891 {
7892 	struct gaudi_device *gaudi = hdev->asic_specific;
7893 	u32 status, timeout_usec;
7894 	u32 inv_data;
7895 	u32 pi;
7896 	int rc;
7897 
7898 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
7899 		hdev->hard_reset_pending)
7900 		return 0;
7901 
7902 	if (hdev->pldm)
7903 		timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7904 	else
7905 		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7906 
7907 	/*
7908 	 * TODO: currently invalidate entire L0 & L1 as in regular hard
7909 	 * invalidation. Need to apply invalidation of specific cache
7910 	 * lines with mask of ASID & VA & size.
7911 	 * Note that L1 with be flushed entirely in any case.
7912 	 */
7913 
7914 	/* L0 & L1 invalidation */
7915 	inv_data = RREG32(mmSTLB_CACHE_INV);
7916 	/* PI is 8 bit */
7917 	pi = ((inv_data & STLB_CACHE_INV_PRODUCER_INDEX_MASK) + 1) & 0xFF;
7918 	WREG32(mmSTLB_CACHE_INV,
7919 		(inv_data & STLB_CACHE_INV_INDEX_MASK_MASK) | pi);
7920 
7921 	rc = hl_poll_timeout(
7922 		hdev,
7923 		mmSTLB_INV_CONSUMER_INDEX,
7924 		status,
7925 		status == pi,
7926 		1000,
7927 		timeout_usec);
7928 
7929 	if (rc) {
7930 		dev_err_ratelimited(hdev->dev,
7931 					"MMU cache invalidation timeout\n");
7932 		hl_device_reset(hdev, HL_RESET_HARD);
7933 	}
7934 
7935 	return rc;
7936 }
7937 
gaudi_mmu_update_asid_hop0_addr(struct hl_device * hdev,u32 asid,u64 phys_addr)7938 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev,
7939 					u32 asid, u64 phys_addr)
7940 {
7941 	u32 status, timeout_usec;
7942 	int rc;
7943 
7944 	if (hdev->pldm)
7945 		timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7946 	else
7947 		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7948 
7949 	WREG32(MMU_ASID, asid);
7950 	WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
7951 	WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
7952 	WREG32(MMU_BUSY, 0x80000000);
7953 
7954 	rc = hl_poll_timeout(
7955 		hdev,
7956 		MMU_BUSY,
7957 		status,
7958 		!(status & 0x80000000),
7959 		1000,
7960 		timeout_usec);
7961 
7962 	if (rc) {
7963 		dev_err(hdev->dev,
7964 			"Timeout during MMU hop0 config of asid %d\n", asid);
7965 		return rc;
7966 	}
7967 
7968 	return 0;
7969 }
7970 
gaudi_send_heartbeat(struct hl_device * hdev)7971 static int gaudi_send_heartbeat(struct hl_device *hdev)
7972 {
7973 	struct gaudi_device *gaudi = hdev->asic_specific;
7974 
7975 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
7976 		return 0;
7977 
7978 	return hl_fw_send_heartbeat(hdev);
7979 }
7980 
gaudi_cpucp_info_get(struct hl_device * hdev)7981 static int gaudi_cpucp_info_get(struct hl_device *hdev)
7982 {
7983 	struct gaudi_device *gaudi = hdev->asic_specific;
7984 	struct asic_fixed_properties *prop = &hdev->asic_prop;
7985 	int rc;
7986 
7987 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
7988 		return 0;
7989 
7990 	rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0);
7991 	if (rc)
7992 		return rc;
7993 
7994 	if (!strlen(prop->cpucp_info.card_name))
7995 		strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
7996 				CARD_NAME_MAX_LEN);
7997 
7998 	hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
7999 
8000 	set_default_power_values(hdev);
8001 
8002 	hdev->max_power = prop->max_power_default;
8003 
8004 	return 0;
8005 }
8006 
gaudi_is_device_idle(struct hl_device * hdev,u64 * mask_arr,u8 mask_len,struct seq_file * s)8007 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
8008 					u8 mask_len, struct seq_file *s)
8009 {
8010 	struct gaudi_device *gaudi = hdev->asic_specific;
8011 	const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
8012 	const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
8013 	const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
8014 	unsigned long *mask = (unsigned long *)mask_arr;
8015 	u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
8016 	bool is_idle = true, is_eng_idle, is_slave;
8017 	u64 offset;
8018 	int i, dma_id, port;
8019 
8020 	mutex_lock(&gaudi->clk_gate_mutex);
8021 
8022 	hdev->asic_funcs->disable_clock_gating(hdev);
8023 
8024 	if (s)
8025 		seq_puts(s,
8026 			"\nDMA  is_idle  QM_GLBL_STS0  QM_CGM_STS  DMA_CORE_STS0\n"
8027 			"---  -------  ------------  ----------  -------------\n");
8028 
8029 	for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
8030 		dma_id = gaudi_dma_assignment[i];
8031 		offset = dma_id * DMA_QMAN_OFFSET;
8032 
8033 		qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
8034 		qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
8035 		dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
8036 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8037 				IS_DMA_IDLE(dma_core_sts0);
8038 		is_idle &= is_eng_idle;
8039 
8040 		if (mask && !is_eng_idle)
8041 			set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask);
8042 		if (s)
8043 			seq_printf(s, fmt, dma_id,
8044 				is_eng_idle ? "Y" : "N", qm_glbl_sts0,
8045 				qm_cgm_sts, dma_core_sts0);
8046 	}
8047 
8048 	if (s)
8049 		seq_puts(s,
8050 			"\nTPC  is_idle  QM_GLBL_STS0  QM_CGM_STS  CFG_STATUS\n"
8051 			"---  -------  ------------  ----------  ----------\n");
8052 
8053 	for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
8054 		offset = i * TPC_QMAN_OFFSET;
8055 		qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
8056 		qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
8057 		tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
8058 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8059 				IS_TPC_IDLE(tpc_cfg_sts);
8060 		is_idle &= is_eng_idle;
8061 
8062 		if (mask && !is_eng_idle)
8063 			set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask);
8064 		if (s)
8065 			seq_printf(s, fmt, i,
8066 				is_eng_idle ? "Y" : "N",
8067 				qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
8068 	}
8069 
8070 	if (s)
8071 		seq_puts(s,
8072 			"\nMME  is_idle  QM_GLBL_STS0  QM_CGM_STS  ARCH_STATUS\n"
8073 			"---  -------  ------------  ----------  -----------\n");
8074 
8075 	for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
8076 		offset = i * MME_QMAN_OFFSET;
8077 		mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
8078 		is_eng_idle = IS_MME_IDLE(mme_arch_sts);
8079 
8080 		/* MME 1 & 3 are slaves, no need to check their QMANs */
8081 		is_slave = i % 2;
8082 		if (!is_slave) {
8083 			qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
8084 			qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
8085 			is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8086 		}
8087 
8088 		is_idle &= is_eng_idle;
8089 
8090 		if (mask && !is_eng_idle)
8091 			set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask);
8092 		if (s) {
8093 			if (!is_slave)
8094 				seq_printf(s, fmt, i,
8095 					is_eng_idle ? "Y" : "N",
8096 					qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
8097 			else
8098 				seq_printf(s, mme_slave_fmt, i,
8099 					is_eng_idle ? "Y" : "N", "-",
8100 					"-", mme_arch_sts);
8101 		}
8102 	}
8103 
8104 	if (s)
8105 		seq_puts(s, "\nNIC  is_idle  QM_GLBL_STS0  QM_CGM_STS\n"
8106 				"---  -------  ------------  ----------\n");
8107 
8108 	for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
8109 		offset = i * NIC_MACRO_QMAN_OFFSET;
8110 		port = 2 * i;
8111 		if (hdev->nic_ports_mask & BIT(port)) {
8112 			qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
8113 			qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
8114 			is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8115 			is_idle &= is_eng_idle;
8116 
8117 			if (mask && !is_eng_idle)
8118 				set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8119 			if (s)
8120 				seq_printf(s, nic_fmt, port,
8121 						is_eng_idle ? "Y" : "N",
8122 						qm_glbl_sts0, qm_cgm_sts);
8123 		}
8124 
8125 		port = 2 * i + 1;
8126 		if (hdev->nic_ports_mask & BIT(port)) {
8127 			qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
8128 			qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
8129 			is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8130 			is_idle &= is_eng_idle;
8131 
8132 			if (mask && !is_eng_idle)
8133 				set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8134 			if (s)
8135 				seq_printf(s, nic_fmt, port,
8136 						is_eng_idle ? "Y" : "N",
8137 						qm_glbl_sts0, qm_cgm_sts);
8138 		}
8139 	}
8140 
8141 	if (s)
8142 		seq_puts(s, "\n");
8143 
8144 	hdev->asic_funcs->set_clock_gating(hdev);
8145 
8146 	mutex_unlock(&gaudi->clk_gate_mutex);
8147 
8148 	return is_idle;
8149 }
8150 
gaudi_hw_queues_lock(struct hl_device * hdev)8151 static void gaudi_hw_queues_lock(struct hl_device *hdev)
8152 	__acquires(&gaudi->hw_queues_lock)
8153 {
8154 	struct gaudi_device *gaudi = hdev->asic_specific;
8155 
8156 	spin_lock(&gaudi->hw_queues_lock);
8157 }
8158 
gaudi_hw_queues_unlock(struct hl_device * hdev)8159 static void gaudi_hw_queues_unlock(struct hl_device *hdev)
8160 	__releases(&gaudi->hw_queues_lock)
8161 {
8162 	struct gaudi_device *gaudi = hdev->asic_specific;
8163 
8164 	spin_unlock(&gaudi->hw_queues_lock);
8165 }
8166 
gaudi_get_pci_id(struct hl_device * hdev)8167 static u32 gaudi_get_pci_id(struct hl_device *hdev)
8168 {
8169 	return hdev->pdev->device;
8170 }
8171 
gaudi_get_eeprom_data(struct hl_device * hdev,void * data,size_t max_size)8172 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
8173 				size_t max_size)
8174 {
8175 	struct gaudi_device *gaudi = hdev->asic_specific;
8176 
8177 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8178 		return 0;
8179 
8180 	return hl_fw_get_eeprom_data(hdev, data, max_size);
8181 }
8182 
8183 /*
8184  * this function should be used only during initialization and/or after reset,
8185  * when there are no active users.
8186  */
gaudi_run_tpc_kernel(struct hl_device * hdev,u64 tpc_kernel,u32 tpc_id)8187 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
8188 				u32 tpc_id)
8189 {
8190 	struct gaudi_device *gaudi = hdev->asic_specific;
8191 	u64 kernel_timeout;
8192 	u32 status, offset;
8193 	int rc;
8194 
8195 	offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
8196 
8197 	if (hdev->pldm)
8198 		kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
8199 	else
8200 		kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
8201 
8202 	mutex_lock(&gaudi->clk_gate_mutex);
8203 
8204 	hdev->asic_funcs->disable_clock_gating(hdev);
8205 
8206 	WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
8207 			lower_32_bits(tpc_kernel));
8208 	WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
8209 			upper_32_bits(tpc_kernel));
8210 
8211 	WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
8212 			lower_32_bits(tpc_kernel));
8213 	WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
8214 			upper_32_bits(tpc_kernel));
8215 	/* set a valid LUT pointer, content is of no significance */
8216 	WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
8217 			lower_32_bits(tpc_kernel));
8218 	WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
8219 			upper_32_bits(tpc_kernel));
8220 
8221 	WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
8222 			lower_32_bits(CFG_BASE +
8223 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
8224 
8225 	WREG32(mmTPC0_CFG_TPC_CMD + offset,
8226 			(1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
8227 			1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
8228 	/* wait a bit for the engine to start executing */
8229 	usleep_range(1000, 1500);
8230 
8231 	/* wait until engine has finished executing */
8232 	rc = hl_poll_timeout(
8233 		hdev,
8234 		mmTPC0_CFG_STATUS + offset,
8235 		status,
8236 		(status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8237 				TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8238 		1000,
8239 		kernel_timeout);
8240 
8241 	if (rc) {
8242 		dev_err(hdev->dev,
8243 			"Timeout while waiting for TPC%d icache prefetch\n",
8244 			tpc_id);
8245 		hdev->asic_funcs->set_clock_gating(hdev);
8246 		mutex_unlock(&gaudi->clk_gate_mutex);
8247 		return -EIO;
8248 	}
8249 
8250 	WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
8251 			1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
8252 
8253 	/* wait a bit for the engine to start executing */
8254 	usleep_range(1000, 1500);
8255 
8256 	/* wait until engine has finished executing */
8257 	rc = hl_poll_timeout(
8258 		hdev,
8259 		mmTPC0_CFG_STATUS + offset,
8260 		status,
8261 		(status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8262 				TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8263 		1000,
8264 		kernel_timeout);
8265 
8266 	if (rc) {
8267 		dev_err(hdev->dev,
8268 			"Timeout while waiting for TPC%d vector pipe\n",
8269 			tpc_id);
8270 		hdev->asic_funcs->set_clock_gating(hdev);
8271 		mutex_unlock(&gaudi->clk_gate_mutex);
8272 		return -EIO;
8273 	}
8274 
8275 	rc = hl_poll_timeout(
8276 		hdev,
8277 		mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
8278 		status,
8279 		(status == 0),
8280 		1000,
8281 		kernel_timeout);
8282 
8283 	hdev->asic_funcs->set_clock_gating(hdev);
8284 	mutex_unlock(&gaudi->clk_gate_mutex);
8285 
8286 	if (rc) {
8287 		dev_err(hdev->dev,
8288 			"Timeout while waiting for TPC%d kernel to execute\n",
8289 			tpc_id);
8290 		return -EIO;
8291 	}
8292 
8293 	return 0;
8294 }
8295 
gaudi_internal_cb_pool_init(struct hl_device * hdev,struct hl_ctx * ctx)8296 static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
8297 		struct hl_ctx *ctx)
8298 {
8299 	struct gaudi_device *gaudi = hdev->asic_specific;
8300 	int min_alloc_order, rc, collective_cb_size;
8301 
8302 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8303 		return 0;
8304 
8305 	hdev->internal_cb_pool_virt_addr =
8306 			hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
8307 					HOST_SPACE_INTERNAL_CB_SZ,
8308 					&hdev->internal_cb_pool_dma_addr,
8309 					GFP_KERNEL | __GFP_ZERO);
8310 
8311 	if (!hdev->internal_cb_pool_virt_addr)
8312 		return -ENOMEM;
8313 
8314 	collective_cb_size = sizeof(struct packet_msg_short) * 5 +
8315 			sizeof(struct packet_fence);
8316 	min_alloc_order = ilog2(collective_cb_size);
8317 
8318 	hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
8319 	if (!hdev->internal_cb_pool) {
8320 		dev_err(hdev->dev,
8321 			"Failed to create internal CB pool\n");
8322 		rc = -ENOMEM;
8323 		goto free_internal_cb_pool;
8324 	}
8325 
8326 	rc = gen_pool_add(hdev->internal_cb_pool,
8327 				(uintptr_t) hdev->internal_cb_pool_virt_addr,
8328 				HOST_SPACE_INTERNAL_CB_SZ, -1);
8329 	if (rc) {
8330 		dev_err(hdev->dev,
8331 			"Failed to add memory to internal CB pool\n");
8332 		rc = -EFAULT;
8333 		goto destroy_internal_cb_pool;
8334 	}
8335 
8336 	hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
8337 			HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
8338 			HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
8339 
8340 	if (!hdev->internal_cb_va_base)
8341 		goto destroy_internal_cb_pool;
8342 
8343 	mutex_lock(&ctx->mmu_lock);
8344 	rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
8345 			hdev->internal_cb_pool_dma_addr,
8346 			HOST_SPACE_INTERNAL_CB_SZ);
8347 
8348 	hdev->asic_funcs->mmu_invalidate_cache(hdev, false, VM_TYPE_USERPTR);
8349 	mutex_unlock(&ctx->mmu_lock);
8350 
8351 	if (rc)
8352 		goto unreserve_internal_cb_pool;
8353 
8354 	return 0;
8355 
8356 unreserve_internal_cb_pool:
8357 	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8358 			HOST_SPACE_INTERNAL_CB_SZ);
8359 destroy_internal_cb_pool:
8360 	gen_pool_destroy(hdev->internal_cb_pool);
8361 free_internal_cb_pool:
8362 	hdev->asic_funcs->asic_dma_free_coherent(hdev,
8363 			HOST_SPACE_INTERNAL_CB_SZ,
8364 			hdev->internal_cb_pool_virt_addr,
8365 			hdev->internal_cb_pool_dma_addr);
8366 
8367 	return rc;
8368 }
8369 
gaudi_internal_cb_pool_fini(struct hl_device * hdev,struct hl_ctx * ctx)8370 static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
8371 		struct hl_ctx *ctx)
8372 {
8373 	struct gaudi_device *gaudi = hdev->asic_specific;
8374 
8375 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8376 		return;
8377 
8378 	mutex_lock(&ctx->mmu_lock);
8379 	hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8380 			HOST_SPACE_INTERNAL_CB_SZ);
8381 	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8382 			HOST_SPACE_INTERNAL_CB_SZ);
8383 	hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
8384 	mutex_unlock(&ctx->mmu_lock);
8385 
8386 	gen_pool_destroy(hdev->internal_cb_pool);
8387 
8388 	hdev->asic_funcs->asic_dma_free_coherent(hdev,
8389 			HOST_SPACE_INTERNAL_CB_SZ,
8390 			hdev->internal_cb_pool_virt_addr,
8391 			hdev->internal_cb_pool_dma_addr);
8392 }
8393 
gaudi_ctx_init(struct hl_ctx * ctx)8394 static int gaudi_ctx_init(struct hl_ctx *ctx)
8395 {
8396 	if (ctx->asid == HL_KERNEL_ASID_ID)
8397 		return 0;
8398 
8399 	gaudi_mmu_prepare(ctx->hdev, ctx->asid);
8400 	return gaudi_internal_cb_pool_init(ctx->hdev, ctx);
8401 }
8402 
gaudi_ctx_fini(struct hl_ctx * ctx)8403 static void gaudi_ctx_fini(struct hl_ctx *ctx)
8404 {
8405 	if (ctx->asid == HL_KERNEL_ASID_ID)
8406 		return;
8407 
8408 	gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8409 }
8410 
gaudi_get_queue_id_for_cq(struct hl_device * hdev,u32 cq_idx)8411 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
8412 {
8413 	return gaudi_cq_assignment[cq_idx];
8414 }
8415 
gaudi_get_signal_cb_size(struct hl_device * hdev)8416 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
8417 {
8418 	return sizeof(struct packet_msg_short) +
8419 			sizeof(struct packet_msg_prot) * 2;
8420 }
8421 
gaudi_get_wait_cb_size(struct hl_device * hdev)8422 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
8423 {
8424 	return sizeof(struct packet_msg_short) * 4 +
8425 			sizeof(struct packet_fence) +
8426 			sizeof(struct packet_msg_prot) * 2;
8427 }
8428 
gaudi_gen_signal_cb(struct hl_device * hdev,void * data,u16 sob_id,u32 size,bool eb)8429 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
8430 				u32 size, bool eb)
8431 {
8432 	struct hl_cb *cb = (struct hl_cb *) data;
8433 	struct packet_msg_short *pkt;
8434 	u32 value, ctl, pkt_size = sizeof(*pkt);
8435 
8436 	pkt = cb->kernel_address + size;
8437 	memset(pkt, 0, pkt_size);
8438 
8439 	/* Inc by 1, Mode ADD */
8440 	value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
8441 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
8442 
8443 	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
8444 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8445 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
8446 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8447 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);
8448 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8449 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8450 
8451 	pkt->value = cpu_to_le32(value);
8452 	pkt->ctl = cpu_to_le32(ctl);
8453 
8454 	return size + pkt_size;
8455 }
8456 
gaudi_add_mon_msg_short(struct packet_msg_short * pkt,u32 value,u16 addr)8457 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
8458 					u16 addr)
8459 {
8460 	u32 ctl, pkt_size = sizeof(*pkt);
8461 
8462 	memset(pkt, 0, pkt_size);
8463 
8464 	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
8465 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2);  /* W_S MON base */
8466 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8467 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8468 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8469 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */
8470 
8471 	pkt->value = cpu_to_le32(value);
8472 	pkt->ctl = cpu_to_le32(ctl);
8473 
8474 	return pkt_size;
8475 }
8476 
gaudi_add_arm_monitor_pkt(struct hl_device * hdev,struct packet_msg_short * pkt,u16 sob_base,u8 sob_mask,u16 sob_val,u16 mon_id)8477 static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
8478 		struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
8479 		u16 sob_val, u16 mon_id)
8480 {
8481 	u64 monitor_base;
8482 	u32 ctl, value, pkt_size = sizeof(*pkt);
8483 	u16 msg_addr_offset;
8484 	u8 mask;
8485 
8486 	if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
8487 		dev_err(hdev->dev,
8488 			"sob_base %u (mask %#x) is not valid\n",
8489 			sob_base, sob_mask);
8490 		return 0;
8491 	}
8492 
8493 	/*
8494 	 * monitor_base should be the content of the base0 address registers,
8495 	 * so it will be added to the msg short offsets
8496 	 */
8497 	monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8498 
8499 	msg_addr_offset =
8500 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
8501 				monitor_base;
8502 
8503 	memset(pkt, 0, pkt_size);
8504 
8505 	/* Monitor config packet: bind the monitor to a sync object */
8506 	value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
8507 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
8508 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
8509 			0); /* GREATER OR EQUAL*/
8510 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
8511 
8512 	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
8513 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8514 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8515 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8516 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8517 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8518 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8519 
8520 	pkt->value = cpu_to_le32(value);
8521 	pkt->ctl = cpu_to_le32(ctl);
8522 
8523 	return pkt_size;
8524 }
8525 
gaudi_add_fence_pkt(struct packet_fence * pkt)8526 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
8527 {
8528 	u32 ctl, cfg, pkt_size = sizeof(*pkt);
8529 
8530 	memset(pkt, 0, pkt_size);
8531 
8532 	cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
8533 	cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
8534 	cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
8535 
8536 	ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
8537 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8538 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8539 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8540 
8541 	pkt->cfg = cpu_to_le32(cfg);
8542 	pkt->ctl = cpu_to_le32(ctl);
8543 
8544 	return pkt_size;
8545 }
8546 
gaudi_get_fence_addr(struct hl_device * hdev,u32 queue_id,u64 * addr)8547 static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
8548 {
8549 	u32 offset, nic_index;
8550 
8551 	switch (queue_id) {
8552 	case GAUDI_QUEUE_ID_DMA_0_0:
8553 		offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
8554 		break;
8555 	case GAUDI_QUEUE_ID_DMA_0_1:
8556 		offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
8557 		break;
8558 	case GAUDI_QUEUE_ID_DMA_0_2:
8559 		offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
8560 		break;
8561 	case GAUDI_QUEUE_ID_DMA_0_3:
8562 		offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
8563 		break;
8564 	case GAUDI_QUEUE_ID_DMA_1_0:
8565 		offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
8566 		break;
8567 	case GAUDI_QUEUE_ID_DMA_1_1:
8568 		offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
8569 		break;
8570 	case GAUDI_QUEUE_ID_DMA_1_2:
8571 		offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
8572 		break;
8573 	case GAUDI_QUEUE_ID_DMA_1_3:
8574 		offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
8575 		break;
8576 	case GAUDI_QUEUE_ID_DMA_5_0:
8577 		offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
8578 		break;
8579 	case GAUDI_QUEUE_ID_DMA_5_1:
8580 		offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
8581 		break;
8582 	case GAUDI_QUEUE_ID_DMA_5_2:
8583 		offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
8584 		break;
8585 	case GAUDI_QUEUE_ID_DMA_5_3:
8586 		offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
8587 		break;
8588 	case GAUDI_QUEUE_ID_TPC_7_0:
8589 		offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
8590 		break;
8591 	case GAUDI_QUEUE_ID_TPC_7_1:
8592 		offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
8593 		break;
8594 	case GAUDI_QUEUE_ID_TPC_7_2:
8595 		offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
8596 		break;
8597 	case GAUDI_QUEUE_ID_TPC_7_3:
8598 		offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
8599 		break;
8600 	case GAUDI_QUEUE_ID_NIC_0_0:
8601 	case GAUDI_QUEUE_ID_NIC_1_0:
8602 	case GAUDI_QUEUE_ID_NIC_2_0:
8603 	case GAUDI_QUEUE_ID_NIC_3_0:
8604 	case GAUDI_QUEUE_ID_NIC_4_0:
8605 	case GAUDI_QUEUE_ID_NIC_5_0:
8606 	case GAUDI_QUEUE_ID_NIC_6_0:
8607 	case GAUDI_QUEUE_ID_NIC_7_0:
8608 	case GAUDI_QUEUE_ID_NIC_8_0:
8609 	case GAUDI_QUEUE_ID_NIC_9_0:
8610 		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
8611 		offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
8612 				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8613 				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8614 		break;
8615 	case GAUDI_QUEUE_ID_NIC_0_1:
8616 	case GAUDI_QUEUE_ID_NIC_1_1:
8617 	case GAUDI_QUEUE_ID_NIC_2_1:
8618 	case GAUDI_QUEUE_ID_NIC_3_1:
8619 	case GAUDI_QUEUE_ID_NIC_4_1:
8620 	case GAUDI_QUEUE_ID_NIC_5_1:
8621 	case GAUDI_QUEUE_ID_NIC_6_1:
8622 	case GAUDI_QUEUE_ID_NIC_7_1:
8623 	case GAUDI_QUEUE_ID_NIC_8_1:
8624 	case GAUDI_QUEUE_ID_NIC_9_1:
8625 		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
8626 		offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
8627 				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8628 				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8629 		break;
8630 	case GAUDI_QUEUE_ID_NIC_0_2:
8631 	case GAUDI_QUEUE_ID_NIC_1_2:
8632 	case GAUDI_QUEUE_ID_NIC_2_2:
8633 	case GAUDI_QUEUE_ID_NIC_3_2:
8634 	case GAUDI_QUEUE_ID_NIC_4_2:
8635 	case GAUDI_QUEUE_ID_NIC_5_2:
8636 	case GAUDI_QUEUE_ID_NIC_6_2:
8637 	case GAUDI_QUEUE_ID_NIC_7_2:
8638 	case GAUDI_QUEUE_ID_NIC_8_2:
8639 	case GAUDI_QUEUE_ID_NIC_9_2:
8640 		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
8641 		offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
8642 				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8643 				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8644 		break;
8645 	case GAUDI_QUEUE_ID_NIC_0_3:
8646 	case GAUDI_QUEUE_ID_NIC_1_3:
8647 	case GAUDI_QUEUE_ID_NIC_2_3:
8648 	case GAUDI_QUEUE_ID_NIC_3_3:
8649 	case GAUDI_QUEUE_ID_NIC_4_3:
8650 	case GAUDI_QUEUE_ID_NIC_5_3:
8651 	case GAUDI_QUEUE_ID_NIC_6_3:
8652 	case GAUDI_QUEUE_ID_NIC_7_3:
8653 	case GAUDI_QUEUE_ID_NIC_8_3:
8654 	case GAUDI_QUEUE_ID_NIC_9_3:
8655 		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
8656 		offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
8657 				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8658 				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8659 		break;
8660 	default:
8661 		return -EINVAL;
8662 	}
8663 
8664 	*addr = CFG_BASE + offset;
8665 
8666 	return 0;
8667 }
8668 
gaudi_add_mon_pkts(void * buf,u16 mon_id,u64 fence_addr)8669 static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
8670 {
8671 	u64 monitor_base;
8672 	u32 size = 0;
8673 	u16 msg_addr_offset;
8674 
8675 	/*
8676 	 * monitor_base should be the content of the base0 address registers,
8677 	 * so it will be added to the msg short offsets
8678 	 */
8679 	monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8680 
8681 	/* First monitor config packet: low address of the sync */
8682 	msg_addr_offset =
8683 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
8684 				monitor_base;
8685 
8686 	size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
8687 					msg_addr_offset);
8688 
8689 	/* Second monitor config packet: high address of the sync */
8690 	msg_addr_offset =
8691 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
8692 				monitor_base;
8693 
8694 	size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
8695 					msg_addr_offset);
8696 
8697 	/*
8698 	 * Third monitor config packet: the payload, i.e. what to write when the
8699 	 * sync triggers
8700 	 */
8701 	msg_addr_offset =
8702 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
8703 				monitor_base;
8704 
8705 	size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
8706 
8707 	return size;
8708 }
8709 
gaudi_gen_wait_cb(struct hl_device * hdev,struct hl_gen_wait_properties * prop)8710 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
8711 				struct hl_gen_wait_properties *prop)
8712 {
8713 	struct hl_cb *cb = (struct hl_cb *) prop->data;
8714 	void *buf = cb->kernel_address;
8715 	u64 fence_addr = 0;
8716 	u32 size = prop->size;
8717 
8718 	if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
8719 		dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
8720 				prop->q_idx);
8721 		return 0;
8722 	}
8723 
8724 	size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
8725 	size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
8726 			prop->sob_mask, prop->sob_val, prop->mon_id);
8727 	size += gaudi_add_fence_pkt(buf + size);
8728 
8729 	return size;
8730 }
8731 
gaudi_reset_sob(struct hl_device * hdev,void * data)8732 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
8733 {
8734 	struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
8735 	int rc;
8736 
8737 	dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
8738 		hw_sob->sob_id);
8739 
8740 	rc = gaudi_schedule_register_memset(hdev, hw_sob->q_idx,
8741 			CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
8742 			hw_sob->sob_id * 4, 1, 0);
8743 	if (rc)
8744 		dev_err(hdev->dev, "failed resetting sob %u", hw_sob->sob_id);
8745 
8746 	kref_init(&hw_sob->kref);
8747 }
8748 
gaudi_set_dma_mask_from_fw(struct hl_device * hdev)8749 static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev)
8750 {
8751 	if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
8752 							HL_POWER9_HOST_MAGIC) {
8753 		hdev->power9_64bit_dma_enable = 1;
8754 		hdev->dma_mask = 64;
8755 	} else {
8756 		hdev->power9_64bit_dma_enable = 0;
8757 		hdev->dma_mask = 48;
8758 	}
8759 }
8760 
gaudi_get_device_time(struct hl_device * hdev)8761 static u64 gaudi_get_device_time(struct hl_device *hdev)
8762 {
8763 	u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
8764 
8765 	return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
8766 }
8767 
gaudi_get_hw_block_id(struct hl_device * hdev,u64 block_addr,u32 * block_size,u32 * block_id)8768 static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
8769 				u32 *block_size, u32 *block_id)
8770 {
8771 	return -EPERM;
8772 }
8773 
gaudi_block_mmap(struct hl_device * hdev,struct vm_area_struct * vma,u32 block_id,u32 block_size)8774 static int gaudi_block_mmap(struct hl_device *hdev,
8775 				struct vm_area_struct *vma,
8776 				u32 block_id, u32 block_size)
8777 {
8778 	return -EPERM;
8779 }
8780 
gaudi_enable_events_from_fw(struct hl_device * hdev)8781 static void gaudi_enable_events_from_fw(struct hl_device *hdev)
8782 {
8783 	WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_INTS_REGISTER);
8784 }
8785 
8786 static const struct hl_asic_funcs gaudi_funcs = {
8787 	.early_init = gaudi_early_init,
8788 	.early_fini = gaudi_early_fini,
8789 	.late_init = gaudi_late_init,
8790 	.late_fini = gaudi_late_fini,
8791 	.sw_init = gaudi_sw_init,
8792 	.sw_fini = gaudi_sw_fini,
8793 	.hw_init = gaudi_hw_init,
8794 	.hw_fini = gaudi_hw_fini,
8795 	.halt_engines = gaudi_halt_engines,
8796 	.suspend = gaudi_suspend,
8797 	.resume = gaudi_resume,
8798 	.cb_mmap = gaudi_cb_mmap,
8799 	.ring_doorbell = gaudi_ring_doorbell,
8800 	.pqe_write = gaudi_pqe_write,
8801 	.asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
8802 	.asic_dma_free_coherent = gaudi_dma_free_coherent,
8803 	.scrub_device_mem = gaudi_scrub_device_mem,
8804 	.get_int_queue_base = gaudi_get_int_queue_base,
8805 	.test_queues = gaudi_test_queues,
8806 	.asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
8807 	.asic_dma_pool_free = gaudi_dma_pool_free,
8808 	.cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
8809 	.cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
8810 	.hl_dma_unmap_sg = gaudi_dma_unmap_sg,
8811 	.cs_parser = gaudi_cs_parser,
8812 	.asic_dma_map_sg = gaudi_dma_map_sg,
8813 	.get_dma_desc_list_size = gaudi_get_dma_desc_list_size,
8814 	.add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
8815 	.update_eq_ci = gaudi_update_eq_ci,
8816 	.context_switch = gaudi_context_switch,
8817 	.restore_phase_topology = gaudi_restore_phase_topology,
8818 	.debugfs_read32 = gaudi_debugfs_read32,
8819 	.debugfs_write32 = gaudi_debugfs_write32,
8820 	.debugfs_read64 = gaudi_debugfs_read64,
8821 	.debugfs_write64 = gaudi_debugfs_write64,
8822 	.debugfs_read_dma = gaudi_debugfs_read_dma,
8823 	.add_device_attr = gaudi_add_device_attr,
8824 	.handle_eqe = gaudi_handle_eqe,
8825 	.set_pll_profile = gaudi_set_pll_profile,
8826 	.get_events_stat = gaudi_get_events_stat,
8827 	.read_pte = gaudi_read_pte,
8828 	.write_pte = gaudi_write_pte,
8829 	.mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
8830 	.mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
8831 	.send_heartbeat = gaudi_send_heartbeat,
8832 	.set_clock_gating = gaudi_set_clock_gating,
8833 	.disable_clock_gating = gaudi_disable_clock_gating,
8834 	.debug_coresight = gaudi_debug_coresight,
8835 	.is_device_idle = gaudi_is_device_idle,
8836 	.soft_reset_late_init = gaudi_soft_reset_late_init,
8837 	.hw_queues_lock = gaudi_hw_queues_lock,
8838 	.hw_queues_unlock = gaudi_hw_queues_unlock,
8839 	.get_pci_id = gaudi_get_pci_id,
8840 	.get_eeprom_data = gaudi_get_eeprom_data,
8841 	.send_cpu_message = gaudi_send_cpu_message,
8842 	.pci_bars_map = gaudi_pci_bars_map,
8843 	.init_iatu = gaudi_init_iatu,
8844 	.rreg = hl_rreg,
8845 	.wreg = hl_wreg,
8846 	.halt_coresight = gaudi_halt_coresight,
8847 	.ctx_init = gaudi_ctx_init,
8848 	.ctx_fini = gaudi_ctx_fini,
8849 	.get_clk_rate = gaudi_get_clk_rate,
8850 	.get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
8851 	.read_device_fw_version = gaudi_read_device_fw_version,
8852 	.load_firmware_to_device = gaudi_load_firmware_to_device,
8853 	.load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
8854 	.get_signal_cb_size = gaudi_get_signal_cb_size,
8855 	.get_wait_cb_size = gaudi_get_wait_cb_size,
8856 	.gen_signal_cb = gaudi_gen_signal_cb,
8857 	.gen_wait_cb = gaudi_gen_wait_cb,
8858 	.reset_sob = gaudi_reset_sob,
8859 	.reset_sob_group = gaudi_reset_sob_group,
8860 	.set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw,
8861 	.get_device_time = gaudi_get_device_time,
8862 	.collective_wait_init_cs = gaudi_collective_wait_init_cs,
8863 	.collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
8864 	.scramble_addr = hl_mmu_scramble_addr,
8865 	.descramble_addr = hl_mmu_descramble_addr,
8866 	.ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
8867 	.get_hw_block_id = gaudi_get_hw_block_id,
8868 	.hw_block_mmap = gaudi_block_mmap,
8869 	.enable_events_from_fw = gaudi_enable_events_from_fw
8870 };
8871 
8872 /**
8873  * gaudi_set_asic_funcs - set GAUDI function pointers
8874  *
8875  * @hdev: pointer to hl_device structure
8876  *
8877  */
gaudi_set_asic_funcs(struct hl_device * hdev)8878 void gaudi_set_asic_funcs(struct hl_device *hdev)
8879 {
8880 	hdev->asic_funcs = &gaudi_funcs;
8881 }
8882