1 /*	$NetBSD: amdgpu_vce_v4_0.c,v 1.3 2021/12/19 12:21:29 riastradh Exp $	*/
2 
3 /*
4  * Copyright 2016 Advanced Micro Devices, Inc.
5  * All Rights Reserved.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the
9  * "Software"), to deal in the Software without restriction, including
10  * without limitation the rights to use, copy, modify, merge, publish,
11  * distribute, sub license, and/or sell copies of the Software, and to
12  * permit persons to whom the Software is furnished to do so, subject to
13  * the following conditions:
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
22  *
23  * The above copyright notice and this permission notice (including the
24  * next paragraph) shall be included in all copies or substantial portions
25  * of the Software.
26  *
27  */
28 
29 #include <sys/cdefs.h>
30 __KERNEL_RCSID(0, "$NetBSD: amdgpu_vce_v4_0.c,v 1.3 2021/12/19 12:21:29 riastradh Exp $");
31 
32 #include <linux/firmware.h>
33 
34 #include "amdgpu.h"
35 #include "amdgpu_vce.h"
36 #include "soc15.h"
37 #include "soc15d.h"
38 #include "soc15_common.h"
39 #include "mmsch_v1_0.h"
40 
41 #include "vce/vce_4_0_offset.h"
42 #include "vce/vce_4_0_default.h"
43 #include "vce/vce_4_0_sh_mask.h"
44 #include "mmhub/mmhub_1_0_offset.h"
45 #include "mmhub/mmhub_1_0_sh_mask.h"
46 
47 #include "ivsrcid/vce/irqsrcs_vce_4_0.h"
48 
49 #include <linux/nbsd-namespace.h>
50 
51 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK	0x02
52 
53 #define VCE_V4_0_FW_SIZE	(384 * 1024)
54 #define VCE_V4_0_STACK_SIZE	(64 * 1024)
55 #define VCE_V4_0_DATA_SIZE	((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
56 
57 static void vce_v4_0_mc_resume(struct amdgpu_device *adev);
58 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev);
59 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev);
60 
61 /**
62  * vce_v4_0_ring_get_rptr - get read pointer
63  *
64  * @ring: amdgpu_ring pointer
65  *
66  * Returns the current hardware read pointer
67  */
vce_v4_0_ring_get_rptr(struct amdgpu_ring * ring)68 static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
69 {
70 	struct amdgpu_device *adev = ring->adev;
71 
72 	if (ring->me == 0)
73 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR));
74 	else if (ring->me == 1)
75 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2));
76 	else
77 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3));
78 }
79 
80 /**
81  * vce_v4_0_ring_get_wptr - get write pointer
82  *
83  * @ring: amdgpu_ring pointer
84  *
85  * Returns the current hardware write pointer
86  */
vce_v4_0_ring_get_wptr(struct amdgpu_ring * ring)87 static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
88 {
89 	struct amdgpu_device *adev = ring->adev;
90 
91 	if (ring->use_doorbell)
92 		return adev->wb.wb[ring->wptr_offs];
93 
94 	if (ring->me == 0)
95 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR));
96 	else if (ring->me == 1)
97 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2));
98 	else
99 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3));
100 }
101 
102 /**
103  * vce_v4_0_ring_set_wptr - set write pointer
104  *
105  * @ring: amdgpu_ring pointer
106  *
107  * Commits the write pointer to the hardware
108  */
vce_v4_0_ring_set_wptr(struct amdgpu_ring * ring)109 static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
110 {
111 	struct amdgpu_device *adev = ring->adev;
112 
113 	if (ring->use_doorbell) {
114 		/* XXX check if swapping is necessary on BE */
115 		adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
116 		WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
117 		return;
118 	}
119 
120 	if (ring->me == 0)
121 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR),
122 			lower_32_bits(ring->wptr));
123 	else if (ring->me == 1)
124 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2),
125 			lower_32_bits(ring->wptr));
126 	else
127 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3),
128 			lower_32_bits(ring->wptr));
129 }
130 
vce_v4_0_firmware_loaded(struct amdgpu_device * adev)131 static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev)
132 {
133 	int i, j;
134 
135 	for (i = 0; i < 10; ++i) {
136 		for (j = 0; j < 100; ++j) {
137 			uint32_t status =
138 				RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS));
139 
140 			if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
141 				return 0;
142 			mdelay(10);
143 		}
144 
145 		DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
146 		WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
147 				VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
148 				~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
149 		mdelay(10);
150 		WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
151 				~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
152 		mdelay(10);
153 
154 	}
155 
156 	return -ETIMEDOUT;
157 }
158 
vce_v4_0_mmsch_start(struct amdgpu_device * adev,struct amdgpu_mm_table * table)159 static int vce_v4_0_mmsch_start(struct amdgpu_device *adev,
160 				struct amdgpu_mm_table *table)
161 {
162 	uint32_t data = 0, loop;
163 	uint64_t addr = table->gpu_addr;
164 	struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr;
165 	uint32_t size;
166 
167 	size = header->header_size + header->vce_table_size + header->uvd_table_size;
168 
169 	/* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */
170 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr));
171 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr));
172 
173 	/* 2, update vmid of descriptor */
174 	data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID));
175 	data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK;
176 	data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */
177 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data);
178 
179 	/* 3, notify mmsch about the size of this descriptor */
180 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size);
181 
182 	/* 4, set resp to zero */
183 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0);
184 
185 	WDOORBELL32(adev->vce.ring[0].doorbell_index, 0);
186 	adev->wb.wb[adev->vce.ring[0].wptr_offs] = 0;
187 	adev->vce.ring[0].wptr = 0;
188 	adev->vce.ring[0].wptr_old = 0;
189 
190 	/* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */
191 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001);
192 
193 	data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
194 	loop = 1000;
195 	while ((data & 0x10000002) != 0x10000002) {
196 		udelay(10);
197 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
198 		loop--;
199 		if (!loop)
200 			break;
201 	}
202 
203 	if (!loop) {
204 		dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data);
205 		return -EBUSY;
206 	}
207 
208 	return 0;
209 }
210 
vce_v4_0_sriov_start(struct amdgpu_device * adev)211 static int vce_v4_0_sriov_start(struct amdgpu_device *adev)
212 {
213 	struct amdgpu_ring *ring;
214 	uint32_t offset, size;
215 	uint32_t table_size = 0;
216 	struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } };
217 	struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } };
218 	struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } };
219 	struct mmsch_v1_0_cmd_end end = { { 0 } };
220 	uint32_t *init_table = adev->virt.mm_table.cpu_addr;
221 	struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table;
222 
223 	direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
224 	direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
225 	direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING;
226 	end.cmd_header.command_type = MMSCH_COMMAND__END;
227 
228 	if (header->vce_table_offset == 0 && header->vce_table_size == 0) {
229 		header->version = MMSCH_VERSION;
230 		header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2;
231 
232 		if (header->uvd_table_offset == 0 && header->uvd_table_size == 0)
233 			header->vce_table_offset = header->header_size;
234 		else
235 			header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset;
236 
237 		init_table += header->vce_table_offset;
238 
239 		ring = &adev->vce.ring[0];
240 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO),
241 					    lower_32_bits(ring->gpu_addr));
242 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI),
243 					    upper_32_bits(ring->gpu_addr));
244 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE),
245 					    ring->ring_size / 4);
246 
247 		/* BEGING OF MC_RESUME */
248 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000);
249 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0);
250 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
251 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
252 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
253 
254 		offset = AMDGPU_VCE_FIRMWARE_OFFSET;
255 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
256 			uint32_t low = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo;
257 			uint32_t hi = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi;
258 			uint64_t tmr_mc_addr = (uint64_t)(hi) << 32 | low;
259 
260 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
261 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), tmr_mc_addr >> 8);
262 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
263 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
264 						(tmr_mc_addr >> 40) & 0xff);
265 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0);
266 		} else {
267 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
268 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
269 						adev->vce.gpu_addr >> 8);
270 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
271 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
272 						(adev->vce.gpu_addr >> 40) & 0xff);
273 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0),
274 						offset & ~0x0f000000);
275 
276 		}
277 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
278 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
279 						adev->vce.gpu_addr >> 8);
280 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
281 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR1),
282 						(adev->vce.gpu_addr >> 40) & 0xff);
283 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
284 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
285 						adev->vce.gpu_addr >> 8);
286 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
287 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR2),
288 						(adev->vce.gpu_addr >> 40) & 0xff);
289 
290 		size = VCE_V4_0_FW_SIZE;
291 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
292 
293 		offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
294 		size = VCE_V4_0_STACK_SIZE;
295 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1),
296 					(offset & ~0x0f000000) | (1 << 24));
297 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
298 
299 		offset += size;
300 		size = VCE_V4_0_DATA_SIZE;
301 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2),
302 					(offset & ~0x0f000000) | (2 << 24));
303 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
304 
305 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0);
306 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
307 						   VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
308 						   VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
309 
310 		/* end of MC_RESUME */
311 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
312 						   VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK);
313 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL),
314 						   ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK);
315 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
316 						   ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0);
317 
318 		MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
319 					      VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK,
320 					      VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK);
321 
322 		/* clear BUSY flag */
323 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
324 						   ~VCE_STATUS__JOB_BUSY_MASK, 0);
325 
326 		/* add end packet */
327 		memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
328 		table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
329 		header->vce_table_size = table_size;
330 	}
331 
332 	return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table);
333 }
334 
335 /**
336  * vce_v4_0_start - start VCE block
337  *
338  * @adev: amdgpu_device pointer
339  *
340  * Setup and start the VCE block
341  */
vce_v4_0_start(struct amdgpu_device * adev)342 static int vce_v4_0_start(struct amdgpu_device *adev)
343 {
344 	struct amdgpu_ring *ring;
345 	int r;
346 
347 	ring = &adev->vce.ring[0];
348 
349 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr));
350 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr));
351 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr);
352 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
353 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4);
354 
355 	ring = &adev->vce.ring[1];
356 
357 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr));
358 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr));
359 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr);
360 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr));
361 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4);
362 
363 	ring = &adev->vce.ring[2];
364 
365 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr));
366 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr));
367 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr);
368 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr));
369 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4);
370 
371 	vce_v4_0_mc_resume(adev);
372 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK,
373 			~VCE_STATUS__JOB_BUSY_MASK);
374 
375 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001);
376 
377 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
378 			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
379 	mdelay(100);
380 
381 	r = vce_v4_0_firmware_loaded(adev);
382 
383 	/* clear BUSY flag */
384 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
385 
386 	if (r) {
387 		DRM_ERROR("VCE not responding, giving up!!!\n");
388 		return r;
389 	}
390 
391 	return 0;
392 }
393 
vce_v4_0_stop(struct amdgpu_device * adev)394 static int vce_v4_0_stop(struct amdgpu_device *adev)
395 {
396 
397 	/* Disable VCPU */
398 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001);
399 
400 	/* hold on ECPU */
401 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
402 			VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
403 			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
404 
405 	/* clear VCE_STATUS */
406 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0);
407 
408 	/* Set Clock-Gating off */
409 	/* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
410 		vce_v4_0_set_vce_sw_clock_gating(adev, false);
411 	*/
412 
413 	return 0;
414 }
415 
vce_v4_0_early_init(void * handle)416 static int vce_v4_0_early_init(void *handle)
417 {
418 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
419 
420 	if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */
421 		adev->vce.num_rings = 1;
422 	else
423 		adev->vce.num_rings = 3;
424 
425 	vce_v4_0_set_ring_funcs(adev);
426 	vce_v4_0_set_irq_funcs(adev);
427 
428 	return 0;
429 }
430 
vce_v4_0_sw_init(void * handle)431 static int vce_v4_0_sw_init(void *handle)
432 {
433 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
434 	struct amdgpu_ring *ring;
435 
436 	unsigned size;
437 	int r, i;
438 
439 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCE0, 167, &adev->vce.irq);
440 	if (r)
441 		return r;
442 
443 	size  = VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE;
444 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
445 		size += VCE_V4_0_FW_SIZE;
446 
447 	r = amdgpu_vce_sw_init(adev, size);
448 	if (r)
449 		return r;
450 
451 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
452 		const struct common_firmware_header *hdr;
453 		unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
454 
455 		adev->vce.saved_bo = kvmalloc(size, GFP_KERNEL);
456 		if (!adev->vce.saved_bo)
457 			return -ENOMEM;
458 
459 		hdr = (const struct common_firmware_header *)adev->vce.fw->data;
460 		adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE;
461 		adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw;
462 		adev->firmware.fw_size +=
463 			ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
464 		DRM_INFO("PSP loading VCE firmware\n");
465 	} else {
466 		r = amdgpu_vce_resume(adev);
467 		if (r)
468 			return r;
469 	}
470 
471 	for (i = 0; i < adev->vce.num_rings; i++) {
472 		ring = &adev->vce.ring[i];
473 		snprintf(ring->name, sizeof(ring->name), "vce%d", i);
474 		if (amdgpu_sriov_vf(adev)) {
475 			/* DOORBELL only works under SRIOV */
476 			ring->use_doorbell = true;
477 
478 			/* currently only use the first encoding ring for sriov,
479 			 * so set unused location for other unused rings.
480 			 */
481 			if (i == 0)
482 				ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring0_1 * 2;
483 			else
484 				ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring2_3 * 2 + 1;
485 		}
486 		r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0);
487 		if (r)
488 			return r;
489 	}
490 
491 
492 	r = amdgpu_vce_entity_init(adev);
493 	if (r)
494 		return r;
495 
496 	r = amdgpu_virt_alloc_mm_table(adev);
497 	if (r)
498 		return r;
499 
500 	return r;
501 }
502 
vce_v4_0_sw_fini(void * handle)503 static int vce_v4_0_sw_fini(void *handle)
504 {
505 	int r;
506 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
507 
508 	/* free MM table */
509 	amdgpu_virt_free_mm_table(adev);
510 
511 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
512 		kvfree(adev->vce.saved_bo);
513 		adev->vce.saved_bo = NULL;
514 	}
515 
516 	r = amdgpu_vce_suspend(adev);
517 	if (r)
518 		return r;
519 
520 	return amdgpu_vce_sw_fini(adev);
521 }
522 
vce_v4_0_hw_init(void * handle)523 static int vce_v4_0_hw_init(void *handle)
524 {
525 	int r, i;
526 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
527 
528 	if (amdgpu_sriov_vf(adev))
529 		r = vce_v4_0_sriov_start(adev);
530 	else
531 		r = vce_v4_0_start(adev);
532 	if (r)
533 		return r;
534 
535 	for (i = 0; i < adev->vce.num_rings; i++) {
536 		r = amdgpu_ring_test_helper(&adev->vce.ring[i]);
537 		if (r)
538 			return r;
539 	}
540 
541 	DRM_INFO("VCE initialized successfully.\n");
542 
543 	return 0;
544 }
545 
vce_v4_0_hw_fini(void * handle)546 static int vce_v4_0_hw_fini(void *handle)
547 {
548 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
549 	int i;
550 
551 	if (!amdgpu_sriov_vf(adev)) {
552 		/* vce_v4_0_wait_for_idle(handle); */
553 		vce_v4_0_stop(adev);
554 	} else {
555 		/* full access mode, so don't touch any VCE register */
556 		DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
557 	}
558 
559 	for (i = 0; i < adev->vce.num_rings; i++)
560 		adev->vce.ring[i].sched.ready = false;
561 
562 	return 0;
563 }
564 
vce_v4_0_suspend(void * handle)565 static int vce_v4_0_suspend(void *handle)
566 {
567 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
568 	int r;
569 
570 	if (adev->vce.vcpu_bo == NULL)
571 		return 0;
572 
573 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
574 		unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
575 		void *ptr = adev->vce.cpu_addr;
576 
577 		memcpy_fromio(adev->vce.saved_bo, ptr, size);
578 	}
579 
580 	r = vce_v4_0_hw_fini(adev);
581 	if (r)
582 		return r;
583 
584 	return amdgpu_vce_suspend(adev);
585 }
586 
vce_v4_0_resume(void * handle)587 static int vce_v4_0_resume(void *handle)
588 {
589 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
590 	int r;
591 
592 	if (adev->vce.vcpu_bo == NULL)
593 		return -EINVAL;
594 
595 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
596 		unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
597 		void *ptr = adev->vce.cpu_addr;
598 
599 		memcpy_toio(ptr, adev->vce.saved_bo, size);
600 	} else {
601 		r = amdgpu_vce_resume(adev);
602 		if (r)
603 			return r;
604 	}
605 
606 	return vce_v4_0_hw_init(adev);
607 }
608 
vce_v4_0_mc_resume(struct amdgpu_device * adev)609 static void vce_v4_0_mc_resume(struct amdgpu_device *adev)
610 {
611 	uint32_t offset, size;
612 	uint64_t tmr_mc_addr;
613 
614 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16));
615 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000);
616 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F);
617 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF);
618 
619 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000);
620 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1);
621 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
622 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
623 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
624 
625 	offset = AMDGPU_VCE_FIRMWARE_OFFSET;
626 
627 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
628 		tmr_mc_addr = (uint64_t)(adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi) << 32 |
629 										adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo;
630 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
631 			(tmr_mc_addr >> 8));
632 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
633 			(tmr_mc_addr >> 40) & 0xff);
634 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0);
635 	} else {
636 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
637 			(adev->vce.gpu_addr >> 8));
638 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
639 			(adev->vce.gpu_addr >> 40) & 0xff);
640 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000);
641 	}
642 
643 	size = VCE_V4_0_FW_SIZE;
644 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
645 
646 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8));
647 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff);
648 	offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
649 	size = VCE_V4_0_STACK_SIZE;
650 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24));
651 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
652 
653 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8));
654 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff);
655 	offset += size;
656 	size = VCE_V4_0_DATA_SIZE;
657 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24));
658 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
659 
660 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100);
661 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
662 			VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
663 			~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
664 }
665 
vce_v4_0_set_clockgating_state(void * handle,enum amd_clockgating_state state)666 static int vce_v4_0_set_clockgating_state(void *handle,
667 					  enum amd_clockgating_state state)
668 {
669 	/* needed for driver unload*/
670 	return 0;
671 }
672 
673 #if 0
674 static bool vce_v4_0_is_idle(void *handle)
675 {
676 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
677 	u32 mask = 0;
678 
679 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
680 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
681 
682 	return !(RREG32(mmSRBM_STATUS2) & mask);
683 }
684 
685 static int vce_v4_0_wait_for_idle(void *handle)
686 {
687 	unsigned i;
688 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
689 
690 	for (i = 0; i < adev->usec_timeout; i++)
691 		if (vce_v4_0_is_idle(handle))
692 			return 0;
693 
694 	return -ETIMEDOUT;
695 }
696 
697 #define  VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK  0x00000008L   /* AUTO_BUSY */
698 #define  VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK   0x00000010L   /* RB0_BUSY */
699 #define  VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK   0x00000020L   /* RB1_BUSY */
700 #define  AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
701 				      VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
702 
703 static bool vce_v4_0_check_soft_reset(void *handle)
704 {
705 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
706 	u32 srbm_soft_reset = 0;
707 
708 	/* According to VCE team , we should use VCE_STATUS instead
709 	 * SRBM_STATUS.VCE_BUSY bit for busy status checking.
710 	 * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
711 	 * instance's registers are accessed
712 	 * (0 for 1st instance, 10 for 2nd instance).
713 	 *
714 	 *VCE_STATUS
715 	 *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 |          |FW_LOADED|JOB |
716 	 *|----+----+-----------+----+----+----+----------+---------+----|
717 	 *|bit8|bit7|    bit6   |bit5|bit4|bit3|   bit2   |  bit1   |bit0|
718 	 *
719 	 * VCE team suggest use bit 3--bit 6 for busy status check
720 	 */
721 	mutex_lock(&adev->grbm_idx_mutex);
722 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
723 	if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
724 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
725 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
726 	}
727 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10);
728 	if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
729 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
730 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
731 	}
732 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
733 	mutex_unlock(&adev->grbm_idx_mutex);
734 
735 	if (srbm_soft_reset) {
736 		adev->vce.srbm_soft_reset = srbm_soft_reset;
737 		return true;
738 	} else {
739 		adev->vce.srbm_soft_reset = 0;
740 		return false;
741 	}
742 }
743 
744 static int vce_v4_0_soft_reset(void *handle)
745 {
746 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
747 	u32 srbm_soft_reset;
748 
749 	if (!adev->vce.srbm_soft_reset)
750 		return 0;
751 	srbm_soft_reset = adev->vce.srbm_soft_reset;
752 
753 	if (srbm_soft_reset) {
754 		u32 tmp;
755 
756 		tmp = RREG32(mmSRBM_SOFT_RESET);
757 		tmp |= srbm_soft_reset;
758 		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
759 		WREG32(mmSRBM_SOFT_RESET, tmp);
760 		tmp = RREG32(mmSRBM_SOFT_RESET);
761 
762 		udelay(50);
763 
764 		tmp &= ~srbm_soft_reset;
765 		WREG32(mmSRBM_SOFT_RESET, tmp);
766 		tmp = RREG32(mmSRBM_SOFT_RESET);
767 
768 		/* Wait a little for things to settle down */
769 		udelay(50);
770 	}
771 
772 	return 0;
773 }
774 
775 static int vce_v4_0_pre_soft_reset(void *handle)
776 {
777 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
778 
779 	if (!adev->vce.srbm_soft_reset)
780 		return 0;
781 
782 	mdelay(5);
783 
784 	return vce_v4_0_suspend(adev);
785 }
786 
787 
788 static int vce_v4_0_post_soft_reset(void *handle)
789 {
790 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
791 
792 	if (!adev->vce.srbm_soft_reset)
793 		return 0;
794 
795 	mdelay(5);
796 
797 	return vce_v4_0_resume(adev);
798 }
799 
800 static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
801 {
802 	u32 tmp, data;
803 
804 	tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL));
805 	if (override)
806 		data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
807 	else
808 		data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
809 
810 	if (tmp != data)
811 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data);
812 }
813 
814 static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
815 					     bool gated)
816 {
817 	u32 data;
818 
819 	/* Set Override to disable Clock Gating */
820 	vce_v4_0_override_vce_clock_gating(adev, true);
821 
822 	/* This function enables MGCG which is controlled by firmware.
823 	   With the clocks in the gated state the core is still
824 	   accessible but the firmware will throttle the clocks on the
825 	   fly as necessary.
826 	*/
827 	if (gated) {
828 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
829 		data |= 0x1ff;
830 		data &= ~0xef0000;
831 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
832 
833 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
834 		data |= 0x3ff000;
835 		data &= ~0xffc00000;
836 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
837 
838 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
839 		data |= 0x2;
840 		data &= ~0x00010000;
841 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
842 
843 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
844 		data |= 0x37f;
845 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
846 
847 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
848 		data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
849 			VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
850 			VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
851 			0x8;
852 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
853 	} else {
854 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
855 		data &= ~0x80010;
856 		data |= 0xe70008;
857 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
858 
859 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
860 		data |= 0xffc00000;
861 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
862 
863 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
864 		data |= 0x10000;
865 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
866 
867 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
868 		data &= ~0xffc00000;
869 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
870 
871 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
872 		data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
873 			  VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
874 			  VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
875 			  0x8);
876 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
877 	}
878 	vce_v4_0_override_vce_clock_gating(adev, false);
879 }
880 
881 static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
882 {
883 	u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
884 
885 	if (enable)
886 		tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
887 	else
888 		tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
889 
890 	WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
891 }
892 
893 static int vce_v4_0_set_clockgating_state(void *handle,
894 					  enum amd_clockgating_state state)
895 {
896 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
897 	bool enable = (state == AMD_CG_STATE_GATE);
898 	int i;
899 
900 	if ((adev->asic_type == CHIP_POLARIS10) ||
901 		(adev->asic_type == CHIP_TONGA) ||
902 		(adev->asic_type == CHIP_FIJI))
903 		vce_v4_0_set_bypass_mode(adev, enable);
904 
905 	if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
906 		return 0;
907 
908 	mutex_lock(&adev->grbm_idx_mutex);
909 	for (i = 0; i < 2; i++) {
910 		/* Program VCE Instance 0 or 1 if not harvested */
911 		if (adev->vce.harvest_config & (1 << i))
912 			continue;
913 
914 		WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i);
915 
916 		if (enable) {
917 			/* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
918 			uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A);
919 			data &= ~(0xf | 0xff0);
920 			data |= ((0x0 << 0) | (0x04 << 4));
921 			WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data);
922 
923 			/* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
924 			data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING);
925 			data &= ~(0xf | 0xff0);
926 			data |= ((0x0 << 0) | (0x04 << 4));
927 			WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data);
928 		}
929 
930 		vce_v4_0_set_vce_sw_clock_gating(adev, enable);
931 	}
932 
933 	WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
934 	mutex_unlock(&adev->grbm_idx_mutex);
935 
936 	return 0;
937 }
938 #endif
939 
vce_v4_0_set_powergating_state(void * handle,enum amd_powergating_state state)940 static int vce_v4_0_set_powergating_state(void *handle,
941 					  enum amd_powergating_state state)
942 {
943 	/* This doesn't actually powergate the VCE block.
944 	 * That's done in the dpm code via the SMC.  This
945 	 * just re-inits the block as necessary.  The actual
946 	 * gating still happens in the dpm code.  We should
947 	 * revisit this when there is a cleaner line between
948 	 * the smc and the hw blocks
949 	 */
950 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
951 
952 	if (state == AMD_PG_STATE_GATE)
953 		return vce_v4_0_stop(adev);
954 	else
955 		return vce_v4_0_start(adev);
956 }
957 
vce_v4_0_ring_emit_ib(struct amdgpu_ring * ring,struct amdgpu_job * job,struct amdgpu_ib * ib,uint32_t flags)958 static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job,
959 					struct amdgpu_ib *ib, uint32_t flags)
960 {
961 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
962 
963 	amdgpu_ring_write(ring, VCE_CMD_IB_VM);
964 	amdgpu_ring_write(ring, vmid);
965 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
966 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
967 	amdgpu_ring_write(ring, ib->length_dw);
968 }
969 
vce_v4_0_ring_emit_fence(struct amdgpu_ring * ring,u64 addr,u64 seq,unsigned flags)970 static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
971 			u64 seq, unsigned flags)
972 {
973 	WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
974 
975 	amdgpu_ring_write(ring, VCE_CMD_FENCE);
976 	amdgpu_ring_write(ring, addr);
977 	amdgpu_ring_write(ring, upper_32_bits(addr));
978 	amdgpu_ring_write(ring, seq);
979 	amdgpu_ring_write(ring, VCE_CMD_TRAP);
980 }
981 
vce_v4_0_ring_insert_end(struct amdgpu_ring * ring)982 static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring)
983 {
984 	amdgpu_ring_write(ring, VCE_CMD_END);
985 }
986 
vce_v4_0_emit_reg_wait(struct amdgpu_ring * ring,uint32_t reg,uint32_t val,uint32_t mask)987 static void vce_v4_0_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
988 				   uint32_t val, uint32_t mask)
989 {
990 	amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
991 	amdgpu_ring_write(ring,	reg << 2);
992 	amdgpu_ring_write(ring, mask);
993 	amdgpu_ring_write(ring, val);
994 }
995 
vce_v4_0_emit_vm_flush(struct amdgpu_ring * ring,unsigned int vmid,uint64_t pd_addr)996 static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
997 				   unsigned int vmid, uint64_t pd_addr)
998 {
999 	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
1000 
1001 	pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
1002 
1003 	/* wait for reg writes */
1004 	vce_v4_0_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2,
1005 			       lower_32_bits(pd_addr), 0xffffffff);
1006 }
1007 
vce_v4_0_emit_wreg(struct amdgpu_ring * ring,uint32_t reg,uint32_t val)1008 static void vce_v4_0_emit_wreg(struct amdgpu_ring *ring,
1009 			       uint32_t reg, uint32_t val)
1010 {
1011 	amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
1012 	amdgpu_ring_write(ring,	reg << 2);
1013 	amdgpu_ring_write(ring, val);
1014 }
1015 
vce_v4_0_set_interrupt_state(struct amdgpu_device * adev,struct amdgpu_irq_src * source,unsigned type,enum amdgpu_interrupt_state state)1016 static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev,
1017 					struct amdgpu_irq_src *source,
1018 					unsigned type,
1019 					enum amdgpu_interrupt_state state)
1020 {
1021 	uint32_t val = 0;
1022 
1023 	if (!amdgpu_sriov_vf(adev)) {
1024 		if (state == AMDGPU_IRQ_STATE_ENABLE)
1025 			val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
1026 
1027 		WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val,
1028 				~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
1029 	}
1030 	return 0;
1031 }
1032 
vce_v4_0_process_interrupt(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)1033 static int vce_v4_0_process_interrupt(struct amdgpu_device *adev,
1034 				      struct amdgpu_irq_src *source,
1035 				      struct amdgpu_iv_entry *entry)
1036 {
1037 	DRM_DEBUG("IH: VCE\n");
1038 
1039 	switch (entry->src_data[0]) {
1040 	case 0:
1041 	case 1:
1042 	case 2:
1043 		amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
1044 		break;
1045 	default:
1046 		DRM_ERROR("Unhandled interrupt: %d %d\n",
1047 			  entry->src_id, entry->src_data[0]);
1048 		break;
1049 	}
1050 
1051 	return 0;
1052 }
1053 
1054 const struct amd_ip_funcs vce_v4_0_ip_funcs = {
1055 	.name = "vce_v4_0",
1056 	.early_init = vce_v4_0_early_init,
1057 	.late_init = NULL,
1058 	.sw_init = vce_v4_0_sw_init,
1059 	.sw_fini = vce_v4_0_sw_fini,
1060 	.hw_init = vce_v4_0_hw_init,
1061 	.hw_fini = vce_v4_0_hw_fini,
1062 	.suspend = vce_v4_0_suspend,
1063 	.resume = vce_v4_0_resume,
1064 	.is_idle = NULL /* vce_v4_0_is_idle */,
1065 	.wait_for_idle = NULL /* vce_v4_0_wait_for_idle */,
1066 	.check_soft_reset = NULL /* vce_v4_0_check_soft_reset */,
1067 	.pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */,
1068 	.soft_reset = NULL /* vce_v4_0_soft_reset */,
1069 	.post_soft_reset = NULL /* vce_v4_0_post_soft_reset */,
1070 	.set_clockgating_state = vce_v4_0_set_clockgating_state,
1071 	.set_powergating_state = vce_v4_0_set_powergating_state,
1072 };
1073 
1074 static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
1075 	.type = AMDGPU_RING_TYPE_VCE,
1076 	.align_mask = 0x3f,
1077 	.nop = VCE_CMD_NO_OP,
1078 	.support_64bit_ptrs = false,
1079 	.no_user_fence = true,
1080 	.vmhub = AMDGPU_MMHUB_0,
1081 	.get_rptr = vce_v4_0_ring_get_rptr,
1082 	.get_wptr = vce_v4_0_ring_get_wptr,
1083 	.set_wptr = vce_v4_0_ring_set_wptr,
1084 	.parse_cs = amdgpu_vce_ring_parse_cs_vm,
1085 	.emit_frame_size =
1086 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
1087 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
1088 		4 + /* vce_v4_0_emit_vm_flush */
1089 		5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */
1090 		1, /* vce_v4_0_ring_insert_end */
1091 	.emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */
1092 	.emit_ib = vce_v4_0_ring_emit_ib,
1093 	.emit_vm_flush = vce_v4_0_emit_vm_flush,
1094 	.emit_fence = vce_v4_0_ring_emit_fence,
1095 	.test_ring = amdgpu_vce_ring_test_ring,
1096 	.test_ib = amdgpu_vce_ring_test_ib,
1097 	.insert_nop = amdgpu_ring_insert_nop,
1098 	.insert_end = vce_v4_0_ring_insert_end,
1099 	.pad_ib = amdgpu_ring_generic_pad_ib,
1100 	.begin_use = amdgpu_vce_ring_begin_use,
1101 	.end_use = amdgpu_vce_ring_end_use,
1102 	.emit_wreg = vce_v4_0_emit_wreg,
1103 	.emit_reg_wait = vce_v4_0_emit_reg_wait,
1104 	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
1105 };
1106 
vce_v4_0_set_ring_funcs(struct amdgpu_device * adev)1107 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev)
1108 {
1109 	int i;
1110 
1111 	for (i = 0; i < adev->vce.num_rings; i++) {
1112 		adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs;
1113 		adev->vce.ring[i].me = i;
1114 	}
1115 	DRM_INFO("VCE enabled in VM mode\n");
1116 }
1117 
1118 static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = {
1119 	.set = vce_v4_0_set_interrupt_state,
1120 	.process = vce_v4_0_process_interrupt,
1121 };
1122 
vce_v4_0_set_irq_funcs(struct amdgpu_device * adev)1123 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev)
1124 {
1125 	adev->vce.irq.num_types = 1;
1126 	adev->vce.irq.funcs = &vce_v4_0_irq_funcs;
1127 };
1128 
1129 const struct amdgpu_ip_block_version vce_v4_0_ip_block =
1130 {
1131 	.type = AMD_IP_BLOCK_TYPE_VCE,
1132 	.major = 4,
1133 	.minor = 0,
1134 	.rev = 0,
1135 	.funcs = &vce_v4_0_ip_funcs,
1136 };
1137