xref: /openbsd/sys/dev/pci/drm/amd/amdgpu/vcn_v4_0_3.c (revision f005ef32)
1*f005ef32Sjsg /*
2*f005ef32Sjsg  * Copyright 2022 Advanced Micro Devices, Inc.
3*f005ef32Sjsg  *
4*f005ef32Sjsg  * Permission is hereby granted, free of charge, to any person obtaining a
5*f005ef32Sjsg  * copy of this software and associated documentation files (the "Software"),
6*f005ef32Sjsg  * to deal in the Software without restriction, including without limitation
7*f005ef32Sjsg  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8*f005ef32Sjsg  * and/or sell copies of the Software, and to permit persons to whom the
9*f005ef32Sjsg  * Software is furnished to do so, subject to the following conditions:
10*f005ef32Sjsg  *
11*f005ef32Sjsg  * The above copyright notice and this permission notice shall be included in
12*f005ef32Sjsg  * all copies or substantial portions of the Software.
13*f005ef32Sjsg  *
14*f005ef32Sjsg  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15*f005ef32Sjsg  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16*f005ef32Sjsg  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17*f005ef32Sjsg  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18*f005ef32Sjsg  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19*f005ef32Sjsg  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20*f005ef32Sjsg  * OTHER DEALINGS IN THE SOFTWARE.
21*f005ef32Sjsg  *
22*f005ef32Sjsg  */
23*f005ef32Sjsg 
24*f005ef32Sjsg #include <linux/firmware.h>
25*f005ef32Sjsg #include <drm/drm_drv.h>
26*f005ef32Sjsg 
27*f005ef32Sjsg #include "amdgpu.h"
28*f005ef32Sjsg #include "amdgpu_vcn.h"
29*f005ef32Sjsg #include "amdgpu_pm.h"
30*f005ef32Sjsg #include "soc15.h"
31*f005ef32Sjsg #include "soc15d.h"
32*f005ef32Sjsg #include "soc15_hw_ip.h"
33*f005ef32Sjsg #include "vcn_v2_0.h"
34*f005ef32Sjsg #include "mmsch_v4_0_3.h"
35*f005ef32Sjsg 
36*f005ef32Sjsg #include "vcn/vcn_4_0_3_offset.h"
37*f005ef32Sjsg #include "vcn/vcn_4_0_3_sh_mask.h"
38*f005ef32Sjsg #include "ivsrcid/vcn/irqsrcs_vcn_4_0.h"
39*f005ef32Sjsg 
40*f005ef32Sjsg #define mmUVD_DPG_LMA_CTL		regUVD_DPG_LMA_CTL
41*f005ef32Sjsg #define mmUVD_DPG_LMA_CTL_BASE_IDX	regUVD_DPG_LMA_CTL_BASE_IDX
42*f005ef32Sjsg #define mmUVD_DPG_LMA_DATA		regUVD_DPG_LMA_DATA
43*f005ef32Sjsg #define mmUVD_DPG_LMA_DATA_BASE_IDX	regUVD_DPG_LMA_DATA_BASE_IDX
44*f005ef32Sjsg 
45*f005ef32Sjsg #define VCN_VID_SOC_ADDRESS_2_0		0x1fb00
46*f005ef32Sjsg #define VCN1_VID_SOC_ADDRESS_3_0	0x48300
47*f005ef32Sjsg 
48*f005ef32Sjsg static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev);
49*f005ef32Sjsg static void vcn_v4_0_3_set_unified_ring_funcs(struct amdgpu_device *adev);
50*f005ef32Sjsg static void vcn_v4_0_3_set_irq_funcs(struct amdgpu_device *adev);
51*f005ef32Sjsg static int vcn_v4_0_3_set_powergating_state(void *handle,
52*f005ef32Sjsg 		enum amd_powergating_state state);
53*f005ef32Sjsg static int vcn_v4_0_3_pause_dpg_mode(struct amdgpu_device *adev,
54*f005ef32Sjsg 		int inst_idx, struct dpg_pause_state *new_state);
55*f005ef32Sjsg static void vcn_v4_0_3_unified_ring_set_wptr(struct amdgpu_ring *ring);
56*f005ef32Sjsg static void vcn_v4_0_3_set_ras_funcs(struct amdgpu_device *adev);
57*f005ef32Sjsg static void vcn_v4_0_3_enable_ras(struct amdgpu_device *adev,
58*f005ef32Sjsg 				  int inst_idx, bool indirect);
59*f005ef32Sjsg /**
60*f005ef32Sjsg  * vcn_v4_0_3_early_init - set function pointers
61*f005ef32Sjsg  *
62*f005ef32Sjsg  * @handle: amdgpu_device pointer
63*f005ef32Sjsg  *
64*f005ef32Sjsg  * Set ring and irq function pointers
65*f005ef32Sjsg  */
vcn_v4_0_3_early_init(void * handle)66*f005ef32Sjsg static int vcn_v4_0_3_early_init(void *handle)
67*f005ef32Sjsg {
68*f005ef32Sjsg 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
69*f005ef32Sjsg 
70*f005ef32Sjsg 	/* re-use enc ring as unified ring */
71*f005ef32Sjsg 	adev->vcn.num_enc_rings = 1;
72*f005ef32Sjsg 
73*f005ef32Sjsg 	vcn_v4_0_3_set_unified_ring_funcs(adev);
74*f005ef32Sjsg 	vcn_v4_0_3_set_irq_funcs(adev);
75*f005ef32Sjsg 	vcn_v4_0_3_set_ras_funcs(adev);
76*f005ef32Sjsg 
77*f005ef32Sjsg 	return amdgpu_vcn_early_init(adev);
78*f005ef32Sjsg }
79*f005ef32Sjsg 
80*f005ef32Sjsg /**
81*f005ef32Sjsg  * vcn_v4_0_3_sw_init - sw init for VCN block
82*f005ef32Sjsg  *
83*f005ef32Sjsg  * @handle: amdgpu_device pointer
84*f005ef32Sjsg  *
85*f005ef32Sjsg  * Load firmware and sw initialization
86*f005ef32Sjsg  */
vcn_v4_0_3_sw_init(void * handle)87*f005ef32Sjsg static int vcn_v4_0_3_sw_init(void *handle)
88*f005ef32Sjsg {
89*f005ef32Sjsg 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
90*f005ef32Sjsg 	struct amdgpu_ring *ring;
91*f005ef32Sjsg 	int i, r, vcn_inst;
92*f005ef32Sjsg 
93*f005ef32Sjsg 	r = amdgpu_vcn_sw_init(adev);
94*f005ef32Sjsg 	if (r)
95*f005ef32Sjsg 		return r;
96*f005ef32Sjsg 
97*f005ef32Sjsg 	amdgpu_vcn_setup_ucode(adev);
98*f005ef32Sjsg 
99*f005ef32Sjsg 	r = amdgpu_vcn_resume(adev);
100*f005ef32Sjsg 	if (r)
101*f005ef32Sjsg 		return r;
102*f005ef32Sjsg 
103*f005ef32Sjsg 	/* VCN DEC TRAP */
104*f005ef32Sjsg 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
105*f005ef32Sjsg 		VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst->irq);
106*f005ef32Sjsg 	if (r)
107*f005ef32Sjsg 		return r;
108*f005ef32Sjsg 
109*f005ef32Sjsg 	for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
110*f005ef32Sjsg 		volatile struct amdgpu_vcn4_fw_shared *fw_shared;
111*f005ef32Sjsg 
112*f005ef32Sjsg 		vcn_inst = GET_INST(VCN, i);
113*f005ef32Sjsg 
114*f005ef32Sjsg 		ring = &adev->vcn.inst[i].ring_enc[0];
115*f005ef32Sjsg 		ring->use_doorbell = true;
116*f005ef32Sjsg 
117*f005ef32Sjsg 		if (!amdgpu_sriov_vf(adev))
118*f005ef32Sjsg 			ring->doorbell_index =
119*f005ef32Sjsg 				(adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
120*f005ef32Sjsg 				9 * vcn_inst;
121*f005ef32Sjsg 		else
122*f005ef32Sjsg 			ring->doorbell_index =
123*f005ef32Sjsg 				(adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
124*f005ef32Sjsg 				32 * vcn_inst;
125*f005ef32Sjsg 
126*f005ef32Sjsg 		ring->vm_hub = AMDGPU_MMHUB0(adev->vcn.inst[i].aid_id);
127*f005ef32Sjsg 		snprintf(ring->name, sizeof(ring->name), "vcn_unified_%d", adev->vcn.inst[i].aid_id);
128*f005ef32Sjsg 		r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0,
129*f005ef32Sjsg 				     AMDGPU_RING_PRIO_DEFAULT,
130*f005ef32Sjsg 				     &adev->vcn.inst[i].sched_score);
131*f005ef32Sjsg 		if (r)
132*f005ef32Sjsg 			return r;
133*f005ef32Sjsg 
134*f005ef32Sjsg 		fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
135*f005ef32Sjsg 		fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE);
136*f005ef32Sjsg 		fw_shared->sq.is_enabled = true;
137*f005ef32Sjsg 
138*f005ef32Sjsg 		if (amdgpu_vcnfw_log)
139*f005ef32Sjsg 			amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]);
140*f005ef32Sjsg 	}
141*f005ef32Sjsg 
142*f005ef32Sjsg 	if (amdgpu_sriov_vf(adev)) {
143*f005ef32Sjsg 		r = amdgpu_virt_alloc_mm_table(adev);
144*f005ef32Sjsg 		if (r)
145*f005ef32Sjsg 			return r;
146*f005ef32Sjsg 	}
147*f005ef32Sjsg 
148*f005ef32Sjsg 	if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
149*f005ef32Sjsg 		adev->vcn.pause_dpg_mode = vcn_v4_0_3_pause_dpg_mode;
150*f005ef32Sjsg 
151*f005ef32Sjsg 	if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN)) {
152*f005ef32Sjsg 		r = amdgpu_vcn_ras_sw_init(adev);
153*f005ef32Sjsg 		if (r) {
154*f005ef32Sjsg 			dev_err(adev->dev, "Failed to initialize vcn ras block!\n");
155*f005ef32Sjsg 			return r;
156*f005ef32Sjsg 		}
157*f005ef32Sjsg 	}
158*f005ef32Sjsg 
159*f005ef32Sjsg 	return 0;
160*f005ef32Sjsg }
161*f005ef32Sjsg 
162*f005ef32Sjsg /**
163*f005ef32Sjsg  * vcn_v4_0_3_sw_fini - sw fini for VCN block
164*f005ef32Sjsg  *
165*f005ef32Sjsg  * @handle: amdgpu_device pointer
166*f005ef32Sjsg  *
167*f005ef32Sjsg  * VCN suspend and free up sw allocation
168*f005ef32Sjsg  */
vcn_v4_0_3_sw_fini(void * handle)169*f005ef32Sjsg static int vcn_v4_0_3_sw_fini(void *handle)
170*f005ef32Sjsg {
171*f005ef32Sjsg 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
172*f005ef32Sjsg 	int i, r, idx;
173*f005ef32Sjsg 
174*f005ef32Sjsg 	if (drm_dev_enter(&adev->ddev, &idx)) {
175*f005ef32Sjsg 		for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
176*f005ef32Sjsg 			volatile struct amdgpu_vcn4_fw_shared *fw_shared;
177*f005ef32Sjsg 
178*f005ef32Sjsg 			fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
179*f005ef32Sjsg 			fw_shared->present_flag_0 = 0;
180*f005ef32Sjsg 			fw_shared->sq.is_enabled = cpu_to_le32(false);
181*f005ef32Sjsg 		}
182*f005ef32Sjsg 		drm_dev_exit(idx);
183*f005ef32Sjsg 	}
184*f005ef32Sjsg 
185*f005ef32Sjsg 	if (amdgpu_sriov_vf(adev))
186*f005ef32Sjsg 		amdgpu_virt_free_mm_table(adev);
187*f005ef32Sjsg 
188*f005ef32Sjsg 	r = amdgpu_vcn_suspend(adev);
189*f005ef32Sjsg 	if (r)
190*f005ef32Sjsg 		return r;
191*f005ef32Sjsg 
192*f005ef32Sjsg 	r = amdgpu_vcn_sw_fini(adev);
193*f005ef32Sjsg 
194*f005ef32Sjsg 	return r;
195*f005ef32Sjsg }
196*f005ef32Sjsg 
197*f005ef32Sjsg /**
198*f005ef32Sjsg  * vcn_v4_0_3_hw_init - start and test VCN block
199*f005ef32Sjsg  *
200*f005ef32Sjsg  * @handle: amdgpu_device pointer
201*f005ef32Sjsg  *
202*f005ef32Sjsg  * Initialize the hardware, boot up the VCPU and do some testing
203*f005ef32Sjsg  */
vcn_v4_0_3_hw_init(void * handle)204*f005ef32Sjsg static int vcn_v4_0_3_hw_init(void *handle)
205*f005ef32Sjsg {
206*f005ef32Sjsg 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
207*f005ef32Sjsg 	struct amdgpu_ring *ring;
208*f005ef32Sjsg 	int i, r, vcn_inst;
209*f005ef32Sjsg 
210*f005ef32Sjsg 	if (amdgpu_sriov_vf(adev)) {
211*f005ef32Sjsg 		r = vcn_v4_0_3_start_sriov(adev);
212*f005ef32Sjsg 		if (r)
213*f005ef32Sjsg 			goto done;
214*f005ef32Sjsg 
215*f005ef32Sjsg 		for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
216*f005ef32Sjsg 			ring = &adev->vcn.inst[i].ring_enc[0];
217*f005ef32Sjsg 			ring->wptr = 0;
218*f005ef32Sjsg 			ring->wptr_old = 0;
219*f005ef32Sjsg 			vcn_v4_0_3_unified_ring_set_wptr(ring);
220*f005ef32Sjsg 			ring->sched.ready = true;
221*f005ef32Sjsg 		}
222*f005ef32Sjsg 	} else {
223*f005ef32Sjsg 		for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
224*f005ef32Sjsg 			vcn_inst = GET_INST(VCN, i);
225*f005ef32Sjsg 			ring = &adev->vcn.inst[i].ring_enc[0];
226*f005ef32Sjsg 
227*f005ef32Sjsg 			if (ring->use_doorbell) {
228*f005ef32Sjsg 				adev->nbio.funcs->vcn_doorbell_range(
229*f005ef32Sjsg 					adev, ring->use_doorbell,
230*f005ef32Sjsg 					(adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
231*f005ef32Sjsg 						9 * vcn_inst,
232*f005ef32Sjsg 					adev->vcn.inst[i].aid_id);
233*f005ef32Sjsg 
234*f005ef32Sjsg 				WREG32_SOC15(
235*f005ef32Sjsg 					VCN, GET_INST(VCN, ring->me),
236*f005ef32Sjsg 					regVCN_RB1_DB_CTRL,
237*f005ef32Sjsg 					ring->doorbell_index
238*f005ef32Sjsg 							<< VCN_RB1_DB_CTRL__OFFSET__SHIFT |
239*f005ef32Sjsg 						VCN_RB1_DB_CTRL__EN_MASK);
240*f005ef32Sjsg 
241*f005ef32Sjsg 				/* Read DB_CTRL to flush the write DB_CTRL command. */
242*f005ef32Sjsg 				RREG32_SOC15(
243*f005ef32Sjsg 					VCN, GET_INST(VCN, ring->me),
244*f005ef32Sjsg 					regVCN_RB1_DB_CTRL);
245*f005ef32Sjsg 			}
246*f005ef32Sjsg 
247*f005ef32Sjsg 			r = amdgpu_ring_test_helper(ring);
248*f005ef32Sjsg 			if (r)
249*f005ef32Sjsg 				goto done;
250*f005ef32Sjsg 		}
251*f005ef32Sjsg 	}
252*f005ef32Sjsg 
253*f005ef32Sjsg done:
254*f005ef32Sjsg 	if (!r)
255*f005ef32Sjsg 		DRM_DEV_INFO(adev->dev, "VCN decode initialized successfully(under %s).\n",
256*f005ef32Sjsg 			(adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)?"DPG Mode":"SPG Mode");
257*f005ef32Sjsg 
258*f005ef32Sjsg 	return r;
259*f005ef32Sjsg }
260*f005ef32Sjsg 
261*f005ef32Sjsg /**
262*f005ef32Sjsg  * vcn_v4_0_3_hw_fini - stop the hardware block
263*f005ef32Sjsg  *
264*f005ef32Sjsg  * @handle: amdgpu_device pointer
265*f005ef32Sjsg  *
266*f005ef32Sjsg  * Stop the VCN block, mark ring as not ready any more
267*f005ef32Sjsg  */
vcn_v4_0_3_hw_fini(void * handle)268*f005ef32Sjsg static int vcn_v4_0_3_hw_fini(void *handle)
269*f005ef32Sjsg {
270*f005ef32Sjsg 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
271*f005ef32Sjsg 
272*f005ef32Sjsg 	cancel_delayed_work_sync(&adev->vcn.idle_work);
273*f005ef32Sjsg 
274*f005ef32Sjsg 	if (adev->vcn.cur_state != AMD_PG_STATE_GATE)
275*f005ef32Sjsg 		vcn_v4_0_3_set_powergating_state(adev, AMD_PG_STATE_GATE);
276*f005ef32Sjsg 
277*f005ef32Sjsg 	return 0;
278*f005ef32Sjsg }
279*f005ef32Sjsg 
280*f005ef32Sjsg /**
281*f005ef32Sjsg  * vcn_v4_0_3_suspend - suspend VCN block
282*f005ef32Sjsg  *
283*f005ef32Sjsg  * @handle: amdgpu_device pointer
284*f005ef32Sjsg  *
285*f005ef32Sjsg  * HW fini and suspend VCN block
286*f005ef32Sjsg  */
vcn_v4_0_3_suspend(void * handle)287*f005ef32Sjsg static int vcn_v4_0_3_suspend(void *handle)
288*f005ef32Sjsg {
289*f005ef32Sjsg 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
290*f005ef32Sjsg 	int r;
291*f005ef32Sjsg 
292*f005ef32Sjsg 	r = vcn_v4_0_3_hw_fini(adev);
293*f005ef32Sjsg 	if (r)
294*f005ef32Sjsg 		return r;
295*f005ef32Sjsg 
296*f005ef32Sjsg 	r = amdgpu_vcn_suspend(adev);
297*f005ef32Sjsg 
298*f005ef32Sjsg 	return r;
299*f005ef32Sjsg }
300*f005ef32Sjsg 
301*f005ef32Sjsg /**
302*f005ef32Sjsg  * vcn_v4_0_3_resume - resume VCN block
303*f005ef32Sjsg  *
304*f005ef32Sjsg  * @handle: amdgpu_device pointer
305*f005ef32Sjsg  *
306*f005ef32Sjsg  * Resume firmware and hw init VCN block
307*f005ef32Sjsg  */
vcn_v4_0_3_resume(void * handle)308*f005ef32Sjsg static int vcn_v4_0_3_resume(void *handle)
309*f005ef32Sjsg {
310*f005ef32Sjsg 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
311*f005ef32Sjsg 	int r;
312*f005ef32Sjsg 
313*f005ef32Sjsg 	r = amdgpu_vcn_resume(adev);
314*f005ef32Sjsg 	if (r)
315*f005ef32Sjsg 		return r;
316*f005ef32Sjsg 
317*f005ef32Sjsg 	r = vcn_v4_0_3_hw_init(adev);
318*f005ef32Sjsg 
319*f005ef32Sjsg 	return r;
320*f005ef32Sjsg }
321*f005ef32Sjsg 
322*f005ef32Sjsg /**
323*f005ef32Sjsg  * vcn_v4_0_3_mc_resume - memory controller programming
324*f005ef32Sjsg  *
325*f005ef32Sjsg  * @adev: amdgpu_device pointer
326*f005ef32Sjsg  * @inst_idx: instance number
327*f005ef32Sjsg  *
328*f005ef32Sjsg  * Let the VCN memory controller know it's offsets
329*f005ef32Sjsg  */
vcn_v4_0_3_mc_resume(struct amdgpu_device * adev,int inst_idx)330*f005ef32Sjsg static void vcn_v4_0_3_mc_resume(struct amdgpu_device *adev, int inst_idx)
331*f005ef32Sjsg {
332*f005ef32Sjsg 	uint32_t offset, size, vcn_inst;
333*f005ef32Sjsg 	const struct common_firmware_header *hdr;
334*f005ef32Sjsg 
335*f005ef32Sjsg 	hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
336*f005ef32Sjsg 	size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
337*f005ef32Sjsg 
338*f005ef32Sjsg 	vcn_inst = GET_INST(VCN, inst_idx);
339*f005ef32Sjsg 	/* cache window 0: fw */
340*f005ef32Sjsg 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
341*f005ef32Sjsg 		WREG32_SOC15(
342*f005ef32Sjsg 			VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
343*f005ef32Sjsg 			(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx]
344*f005ef32Sjsg 				 .tmr_mc_addr_lo));
345*f005ef32Sjsg 		WREG32_SOC15(
346*f005ef32Sjsg 			VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
347*f005ef32Sjsg 			(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx]
348*f005ef32Sjsg 				 .tmr_mc_addr_hi));
349*f005ef32Sjsg 		WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET0, 0);
350*f005ef32Sjsg 		offset = 0;
351*f005ef32Sjsg 	} else {
352*f005ef32Sjsg 		WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
353*f005ef32Sjsg 			     lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr));
354*f005ef32Sjsg 		WREG32_SOC15(VCN, vcn_inst,
355*f005ef32Sjsg 			     regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
356*f005ef32Sjsg 			     upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr));
357*f005ef32Sjsg 		offset = size;
358*f005ef32Sjsg 		WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET0,
359*f005ef32Sjsg 			     AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
360*f005ef32Sjsg 	}
361*f005ef32Sjsg 	WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_SIZE0, size);
362*f005ef32Sjsg 
363*f005ef32Sjsg 	/* cache window 1: stack */
364*f005ef32Sjsg 	WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
365*f005ef32Sjsg 		     lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset));
366*f005ef32Sjsg 	WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
367*f005ef32Sjsg 		     upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset));
368*f005ef32Sjsg 	WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET1, 0);
369*f005ef32Sjsg 	WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_SIZE1,
370*f005ef32Sjsg 		     AMDGPU_VCN_STACK_SIZE);
371*f005ef32Sjsg 
372*f005ef32Sjsg 	/* cache window 2: context */
373*f005ef32Sjsg 	WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
374*f005ef32Sjsg 		     lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset +
375*f005ef32Sjsg 				   AMDGPU_VCN_STACK_SIZE));
376*f005ef32Sjsg 	WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
377*f005ef32Sjsg 		     upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset +
378*f005ef32Sjsg 				   AMDGPU_VCN_STACK_SIZE));
379*f005ef32Sjsg 	WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET2, 0);
380*f005ef32Sjsg 	WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_SIZE2,
381*f005ef32Sjsg 		     AMDGPU_VCN_CONTEXT_SIZE);
382*f005ef32Sjsg 
383*f005ef32Sjsg 	/* non-cache window */
384*f005ef32Sjsg 	WREG32_SOC15(
385*f005ef32Sjsg 		VCN, vcn_inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW,
386*f005ef32Sjsg 		lower_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr));
387*f005ef32Sjsg 	WREG32_SOC15(
388*f005ef32Sjsg 		VCN, vcn_inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH,
389*f005ef32Sjsg 		upper_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr));
390*f005ef32Sjsg 	WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_NONCACHE_OFFSET0, 0);
391*f005ef32Sjsg 	WREG32_SOC15(
392*f005ef32Sjsg 		VCN, vcn_inst, regUVD_VCPU_NONCACHE_SIZE0,
393*f005ef32Sjsg 		AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)));
394*f005ef32Sjsg }
395*f005ef32Sjsg 
396*f005ef32Sjsg /**
397*f005ef32Sjsg  * vcn_v4_0_3_mc_resume_dpg_mode - memory controller programming for dpg mode
398*f005ef32Sjsg  *
399*f005ef32Sjsg  * @adev: amdgpu_device pointer
400*f005ef32Sjsg  * @inst_idx: instance number index
401*f005ef32Sjsg  * @indirect: indirectly write sram
402*f005ef32Sjsg  *
403*f005ef32Sjsg  * Let the VCN memory controller know it's offsets with dpg mode
404*f005ef32Sjsg  */
vcn_v4_0_3_mc_resume_dpg_mode(struct amdgpu_device * adev,int inst_idx,bool indirect)405*f005ef32Sjsg static void vcn_v4_0_3_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
406*f005ef32Sjsg {
407*f005ef32Sjsg 	uint32_t offset, size;
408*f005ef32Sjsg 	const struct common_firmware_header *hdr;
409*f005ef32Sjsg 
410*f005ef32Sjsg 	hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
411*f005ef32Sjsg 	size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
412*f005ef32Sjsg 
413*f005ef32Sjsg 	/* cache window 0: fw */
414*f005ef32Sjsg 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
415*f005ef32Sjsg 		if (!indirect) {
416*f005ef32Sjsg 			WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
417*f005ef32Sjsg 				VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
418*f005ef32Sjsg 				(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN +
419*f005ef32Sjsg 					inst_idx].tmr_mc_addr_lo), 0, indirect);
420*f005ef32Sjsg 			WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
421*f005ef32Sjsg 				VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
422*f005ef32Sjsg 				(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN +
423*f005ef32Sjsg 					inst_idx].tmr_mc_addr_hi), 0, indirect);
424*f005ef32Sjsg 			WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
425*f005ef32Sjsg 				VCN, 0, regUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
426*f005ef32Sjsg 		} else {
427*f005ef32Sjsg 			WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
428*f005ef32Sjsg 				VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect);
429*f005ef32Sjsg 			WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
430*f005ef32Sjsg 				VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect);
431*f005ef32Sjsg 			WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
432*f005ef32Sjsg 				VCN, 0, regUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
433*f005ef32Sjsg 		}
434*f005ef32Sjsg 		offset = 0;
435*f005ef32Sjsg 	} else {
436*f005ef32Sjsg 		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
437*f005ef32Sjsg 			VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
438*f005ef32Sjsg 			lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
439*f005ef32Sjsg 		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
440*f005ef32Sjsg 			VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
441*f005ef32Sjsg 			upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
442*f005ef32Sjsg 		offset = size;
443*f005ef32Sjsg 		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
444*f005ef32Sjsg 			VCN, 0, regUVD_VCPU_CACHE_OFFSET0),
445*f005ef32Sjsg 			AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect);
446*f005ef32Sjsg 	}
447*f005ef32Sjsg 
448*f005ef32Sjsg 	if (!indirect)
449*f005ef32Sjsg 		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
450*f005ef32Sjsg 			VCN, 0, regUVD_VCPU_CACHE_SIZE0), size, 0, indirect);
451*f005ef32Sjsg 	else
452*f005ef32Sjsg 		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
453*f005ef32Sjsg 			VCN, 0, regUVD_VCPU_CACHE_SIZE0), 0, 0, indirect);
454*f005ef32Sjsg 
455*f005ef32Sjsg 	/* cache window 1: stack */
456*f005ef32Sjsg 	if (!indirect) {
457*f005ef32Sjsg 		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
458*f005ef32Sjsg 			VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
459*f005ef32Sjsg 			lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
460*f005ef32Sjsg 		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
461*f005ef32Sjsg 			VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
462*f005ef32Sjsg 			upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
463*f005ef32Sjsg 		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
464*f005ef32Sjsg 			VCN, 0, regUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
465*f005ef32Sjsg 	} else {
466*f005ef32Sjsg 		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
467*f005ef32Sjsg 			VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect);
468*f005ef32Sjsg 		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
469*f005ef32Sjsg 			VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect);
470*f005ef32Sjsg 		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
471*f005ef32Sjsg 			VCN, 0, regUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
472*f005ef32Sjsg 	}
473*f005ef32Sjsg 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
474*f005ef32Sjsg 			VCN, 0, regUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect);
475*f005ef32Sjsg 
476*f005ef32Sjsg 	/* cache window 2: context */
477*f005ef32Sjsg 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
478*f005ef32Sjsg 			VCN, 0, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
479*f005ef32Sjsg 			lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset +
480*f005ef32Sjsg 				AMDGPU_VCN_STACK_SIZE), 0, indirect);
481*f005ef32Sjsg 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
482*f005ef32Sjsg 			VCN, 0, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
483*f005ef32Sjsg 			upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset +
484*f005ef32Sjsg 				AMDGPU_VCN_STACK_SIZE), 0, indirect);
485*f005ef32Sjsg 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
486*f005ef32Sjsg 			VCN, 0, regUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect);
487*f005ef32Sjsg 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
488*f005ef32Sjsg 			VCN, 0, regUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect);
489*f005ef32Sjsg 
490*f005ef32Sjsg 	/* non-cache window */
491*f005ef32Sjsg 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
492*f005ef32Sjsg 			VCN, 0, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
493*f005ef32Sjsg 			lower_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect);
494*f005ef32Sjsg 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
495*f005ef32Sjsg 			VCN, 0, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
496*f005ef32Sjsg 			upper_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect);
497*f005ef32Sjsg 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
498*f005ef32Sjsg 			VCN, 0, regUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect);
499*f005ef32Sjsg 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
500*f005ef32Sjsg 			VCN, 0, regUVD_VCPU_NONCACHE_SIZE0),
501*f005ef32Sjsg 			AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)), 0, indirect);
502*f005ef32Sjsg 
503*f005ef32Sjsg 	/* VCN global tiling registers */
504*f005ef32Sjsg 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
505*f005ef32Sjsg 		VCN, 0, regUVD_GFX8_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect);
506*f005ef32Sjsg 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
507*f005ef32Sjsg 		VCN, 0, regUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect);
508*f005ef32Sjsg }
509*f005ef32Sjsg 
510*f005ef32Sjsg /**
511*f005ef32Sjsg  * vcn_v4_0_3_disable_clock_gating - disable VCN clock gating
512*f005ef32Sjsg  *
513*f005ef32Sjsg  * @adev: amdgpu_device pointer
514*f005ef32Sjsg  * @inst_idx: instance number
515*f005ef32Sjsg  *
516*f005ef32Sjsg  * Disable clock gating for VCN block
517*f005ef32Sjsg  */
vcn_v4_0_3_disable_clock_gating(struct amdgpu_device * adev,int inst_idx)518*f005ef32Sjsg static void vcn_v4_0_3_disable_clock_gating(struct amdgpu_device *adev, int inst_idx)
519*f005ef32Sjsg {
520*f005ef32Sjsg 	uint32_t data;
521*f005ef32Sjsg 	int vcn_inst;
522*f005ef32Sjsg 
523*f005ef32Sjsg 	if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
524*f005ef32Sjsg 		return;
525*f005ef32Sjsg 
526*f005ef32Sjsg 	vcn_inst = GET_INST(VCN, inst_idx);
527*f005ef32Sjsg 
528*f005ef32Sjsg 	/* VCN disable CGC */
529*f005ef32Sjsg 	data = RREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL);
530*f005ef32Sjsg 	data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK;
531*f005ef32Sjsg 	data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
532*f005ef32Sjsg 	data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
533*f005ef32Sjsg 	WREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL, data);
534*f005ef32Sjsg 
535*f005ef32Sjsg 	data = RREG32_SOC15(VCN, vcn_inst, regUVD_CGC_GATE);
536*f005ef32Sjsg 	data &= ~(UVD_CGC_GATE__SYS_MASK
537*f005ef32Sjsg 		| UVD_CGC_GATE__MPEG2_MASK
538*f005ef32Sjsg 		| UVD_CGC_GATE__REGS_MASK
539*f005ef32Sjsg 		| UVD_CGC_GATE__RBC_MASK
540*f005ef32Sjsg 		| UVD_CGC_GATE__LMI_MC_MASK
541*f005ef32Sjsg 		| UVD_CGC_GATE__LMI_UMC_MASK
542*f005ef32Sjsg 		| UVD_CGC_GATE__MPC_MASK
543*f005ef32Sjsg 		| UVD_CGC_GATE__LBSI_MASK
544*f005ef32Sjsg 		| UVD_CGC_GATE__LRBBM_MASK
545*f005ef32Sjsg 		| UVD_CGC_GATE__WCB_MASK
546*f005ef32Sjsg 		| UVD_CGC_GATE__VCPU_MASK
547*f005ef32Sjsg 		| UVD_CGC_GATE__MMSCH_MASK);
548*f005ef32Sjsg 
549*f005ef32Sjsg 	WREG32_SOC15(VCN, vcn_inst, regUVD_CGC_GATE, data);
550*f005ef32Sjsg 	SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_CGC_GATE, 0, 0xFFFFFFFF);
551*f005ef32Sjsg 
552*f005ef32Sjsg 	data = RREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL);
553*f005ef32Sjsg 	data &= ~(UVD_CGC_CTRL__SYS_MODE_MASK
554*f005ef32Sjsg 		| UVD_CGC_CTRL__MPEG2_MODE_MASK
555*f005ef32Sjsg 		| UVD_CGC_CTRL__REGS_MODE_MASK
556*f005ef32Sjsg 		| UVD_CGC_CTRL__RBC_MODE_MASK
557*f005ef32Sjsg 		| UVD_CGC_CTRL__LMI_MC_MODE_MASK
558*f005ef32Sjsg 		| UVD_CGC_CTRL__LMI_UMC_MODE_MASK
559*f005ef32Sjsg 		| UVD_CGC_CTRL__MPC_MODE_MASK
560*f005ef32Sjsg 		| UVD_CGC_CTRL__LBSI_MODE_MASK
561*f005ef32Sjsg 		| UVD_CGC_CTRL__LRBBM_MODE_MASK
562*f005ef32Sjsg 		| UVD_CGC_CTRL__WCB_MODE_MASK
563*f005ef32Sjsg 		| UVD_CGC_CTRL__VCPU_MODE_MASK
564*f005ef32Sjsg 		| UVD_CGC_CTRL__MMSCH_MODE_MASK);
565*f005ef32Sjsg 	WREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL, data);
566*f005ef32Sjsg 
567*f005ef32Sjsg 	data = RREG32_SOC15(VCN, vcn_inst, regUVD_SUVD_CGC_GATE);
568*f005ef32Sjsg 	data |= (UVD_SUVD_CGC_GATE__SRE_MASK
569*f005ef32Sjsg 		| UVD_SUVD_CGC_GATE__SIT_MASK
570*f005ef32Sjsg 		| UVD_SUVD_CGC_GATE__SMP_MASK
571*f005ef32Sjsg 		| UVD_SUVD_CGC_GATE__SCM_MASK
572*f005ef32Sjsg 		| UVD_SUVD_CGC_GATE__SDB_MASK
573*f005ef32Sjsg 		| UVD_SUVD_CGC_GATE__SRE_H264_MASK
574*f005ef32Sjsg 		| UVD_SUVD_CGC_GATE__SRE_HEVC_MASK
575*f005ef32Sjsg 		| UVD_SUVD_CGC_GATE__SIT_H264_MASK
576*f005ef32Sjsg 		| UVD_SUVD_CGC_GATE__SIT_HEVC_MASK
577*f005ef32Sjsg 		| UVD_SUVD_CGC_GATE__SCM_H264_MASK
578*f005ef32Sjsg 		| UVD_SUVD_CGC_GATE__SCM_HEVC_MASK
579*f005ef32Sjsg 		| UVD_SUVD_CGC_GATE__SDB_H264_MASK
580*f005ef32Sjsg 		| UVD_SUVD_CGC_GATE__SDB_HEVC_MASK
581*f005ef32Sjsg 		| UVD_SUVD_CGC_GATE__ENT_MASK
582*f005ef32Sjsg 		| UVD_SUVD_CGC_GATE__SIT_HEVC_DEC_MASK
583*f005ef32Sjsg 		| UVD_SUVD_CGC_GATE__SITE_MASK
584*f005ef32Sjsg 		| UVD_SUVD_CGC_GATE__SRE_VP9_MASK
585*f005ef32Sjsg 		| UVD_SUVD_CGC_GATE__SCM_VP9_MASK
586*f005ef32Sjsg 		| UVD_SUVD_CGC_GATE__SIT_VP9_DEC_MASK
587*f005ef32Sjsg 		| UVD_SUVD_CGC_GATE__SDB_VP9_MASK
588*f005ef32Sjsg 		| UVD_SUVD_CGC_GATE__IME_HEVC_MASK);
589*f005ef32Sjsg 	WREG32_SOC15(VCN, vcn_inst, regUVD_SUVD_CGC_GATE, data);
590*f005ef32Sjsg 
591*f005ef32Sjsg 	data = RREG32_SOC15(VCN, vcn_inst, regUVD_SUVD_CGC_CTRL);
592*f005ef32Sjsg 	data &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
593*f005ef32Sjsg 		| UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
594*f005ef32Sjsg 		| UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
595*f005ef32Sjsg 		| UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
596*f005ef32Sjsg 		| UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
597*f005ef32Sjsg 		| UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
598*f005ef32Sjsg 		| UVD_SUVD_CGC_CTRL__IME_MODE_MASK
599*f005ef32Sjsg 		| UVD_SUVD_CGC_CTRL__SITE_MODE_MASK);
600*f005ef32Sjsg 	WREG32_SOC15(VCN, vcn_inst, regUVD_SUVD_CGC_CTRL, data);
601*f005ef32Sjsg }
602*f005ef32Sjsg 
603*f005ef32Sjsg /**
604*f005ef32Sjsg  * vcn_v4_0_3_disable_clock_gating_dpg_mode - disable VCN clock gating dpg mode
605*f005ef32Sjsg  *
606*f005ef32Sjsg  * @adev: amdgpu_device pointer
607*f005ef32Sjsg  * @sram_sel: sram select
608*f005ef32Sjsg  * @inst_idx: instance number index
609*f005ef32Sjsg  * @indirect: indirectly write sram
610*f005ef32Sjsg  *
611*f005ef32Sjsg  * Disable clock gating for VCN block with dpg mode
612*f005ef32Sjsg  */
vcn_v4_0_3_disable_clock_gating_dpg_mode(struct amdgpu_device * adev,uint8_t sram_sel,int inst_idx,uint8_t indirect)613*f005ef32Sjsg static void vcn_v4_0_3_disable_clock_gating_dpg_mode(struct amdgpu_device *adev, uint8_t sram_sel,
614*f005ef32Sjsg 				int inst_idx, uint8_t indirect)
615*f005ef32Sjsg {
616*f005ef32Sjsg 	uint32_t reg_data = 0;
617*f005ef32Sjsg 
618*f005ef32Sjsg 	if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
619*f005ef32Sjsg 		return;
620*f005ef32Sjsg 
621*f005ef32Sjsg 	/* enable sw clock gating control */
622*f005ef32Sjsg 	reg_data = 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
623*f005ef32Sjsg 	reg_data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
624*f005ef32Sjsg 	reg_data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
625*f005ef32Sjsg 	reg_data &= ~(UVD_CGC_CTRL__SYS_MODE_MASK |
626*f005ef32Sjsg 		 UVD_CGC_CTRL__MPEG2_MODE_MASK |
627*f005ef32Sjsg 		 UVD_CGC_CTRL__REGS_MODE_MASK |
628*f005ef32Sjsg 		 UVD_CGC_CTRL__RBC_MODE_MASK |
629*f005ef32Sjsg 		 UVD_CGC_CTRL__LMI_MC_MODE_MASK |
630*f005ef32Sjsg 		 UVD_CGC_CTRL__LMI_UMC_MODE_MASK |
631*f005ef32Sjsg 		 UVD_CGC_CTRL__IDCT_MODE_MASK |
632*f005ef32Sjsg 		 UVD_CGC_CTRL__MPRD_MODE_MASK |
633*f005ef32Sjsg 		 UVD_CGC_CTRL__MPC_MODE_MASK |
634*f005ef32Sjsg 		 UVD_CGC_CTRL__LBSI_MODE_MASK |
635*f005ef32Sjsg 		 UVD_CGC_CTRL__LRBBM_MODE_MASK |
636*f005ef32Sjsg 		 UVD_CGC_CTRL__WCB_MODE_MASK |
637*f005ef32Sjsg 		 UVD_CGC_CTRL__VCPU_MODE_MASK);
638*f005ef32Sjsg 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
639*f005ef32Sjsg 		VCN, 0, regUVD_CGC_CTRL), reg_data, sram_sel, indirect);
640*f005ef32Sjsg 
641*f005ef32Sjsg 	/* turn off clock gating */
642*f005ef32Sjsg 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
643*f005ef32Sjsg 		VCN, 0, regUVD_CGC_GATE), 0, sram_sel, indirect);
644*f005ef32Sjsg 
645*f005ef32Sjsg 	/* turn on SUVD clock gating */
646*f005ef32Sjsg 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
647*f005ef32Sjsg 		VCN, 0, regUVD_SUVD_CGC_GATE), 1, sram_sel, indirect);
648*f005ef32Sjsg 
649*f005ef32Sjsg 	/* turn on sw mode in UVD_SUVD_CGC_CTRL */
650*f005ef32Sjsg 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
651*f005ef32Sjsg 		VCN, 0, regUVD_SUVD_CGC_CTRL), 0, sram_sel, indirect);
652*f005ef32Sjsg }
653*f005ef32Sjsg 
654*f005ef32Sjsg /**
655*f005ef32Sjsg  * vcn_v4_0_3_enable_clock_gating - enable VCN clock gating
656*f005ef32Sjsg  *
657*f005ef32Sjsg  * @adev: amdgpu_device pointer
658*f005ef32Sjsg  * @inst_idx: instance number
659*f005ef32Sjsg  *
660*f005ef32Sjsg  * Enable clock gating for VCN block
661*f005ef32Sjsg  */
vcn_v4_0_3_enable_clock_gating(struct amdgpu_device * adev,int inst_idx)662*f005ef32Sjsg static void vcn_v4_0_3_enable_clock_gating(struct amdgpu_device *adev, int inst_idx)
663*f005ef32Sjsg {
664*f005ef32Sjsg 	uint32_t data;
665*f005ef32Sjsg 	int vcn_inst;
666*f005ef32Sjsg 
667*f005ef32Sjsg 	if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
668*f005ef32Sjsg 		return;
669*f005ef32Sjsg 
670*f005ef32Sjsg 	vcn_inst = GET_INST(VCN, inst_idx);
671*f005ef32Sjsg 
672*f005ef32Sjsg 	/* enable VCN CGC */
673*f005ef32Sjsg 	data = RREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL);
674*f005ef32Sjsg 	data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
675*f005ef32Sjsg 	data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
676*f005ef32Sjsg 	data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
677*f005ef32Sjsg 	WREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL, data);
678*f005ef32Sjsg 
679*f005ef32Sjsg 	data = RREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL);
680*f005ef32Sjsg 	data |= (UVD_CGC_CTRL__SYS_MODE_MASK
681*f005ef32Sjsg 		| UVD_CGC_CTRL__MPEG2_MODE_MASK
682*f005ef32Sjsg 		| UVD_CGC_CTRL__REGS_MODE_MASK
683*f005ef32Sjsg 		| UVD_CGC_CTRL__RBC_MODE_MASK
684*f005ef32Sjsg 		| UVD_CGC_CTRL__LMI_MC_MODE_MASK
685*f005ef32Sjsg 		| UVD_CGC_CTRL__LMI_UMC_MODE_MASK
686*f005ef32Sjsg 		| UVD_CGC_CTRL__MPC_MODE_MASK
687*f005ef32Sjsg 		| UVD_CGC_CTRL__LBSI_MODE_MASK
688*f005ef32Sjsg 		| UVD_CGC_CTRL__LRBBM_MODE_MASK
689*f005ef32Sjsg 		| UVD_CGC_CTRL__WCB_MODE_MASK
690*f005ef32Sjsg 		| UVD_CGC_CTRL__VCPU_MODE_MASK);
691*f005ef32Sjsg 	WREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL, data);
692*f005ef32Sjsg 
693*f005ef32Sjsg 	data = RREG32_SOC15(VCN, vcn_inst, regUVD_SUVD_CGC_CTRL);
694*f005ef32Sjsg 	data |= (UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
695*f005ef32Sjsg 		| UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
696*f005ef32Sjsg 		| UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
697*f005ef32Sjsg 		| UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
698*f005ef32Sjsg 		| UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
699*f005ef32Sjsg 		| UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
700*f005ef32Sjsg 		| UVD_SUVD_CGC_CTRL__IME_MODE_MASK
701*f005ef32Sjsg 		| UVD_SUVD_CGC_CTRL__SITE_MODE_MASK);
702*f005ef32Sjsg 	WREG32_SOC15(VCN, vcn_inst, regUVD_SUVD_CGC_CTRL, data);
703*f005ef32Sjsg }
704*f005ef32Sjsg 
705*f005ef32Sjsg /**
706*f005ef32Sjsg  * vcn_v4_0_3_start_dpg_mode - VCN start with dpg mode
707*f005ef32Sjsg  *
708*f005ef32Sjsg  * @adev: amdgpu_device pointer
709*f005ef32Sjsg  * @inst_idx: instance number index
710*f005ef32Sjsg  * @indirect: indirectly write sram
711*f005ef32Sjsg  *
712*f005ef32Sjsg  * Start VCN block with dpg mode
713*f005ef32Sjsg  */
vcn_v4_0_3_start_dpg_mode(struct amdgpu_device * adev,int inst_idx,bool indirect)714*f005ef32Sjsg static int vcn_v4_0_3_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
715*f005ef32Sjsg {
716*f005ef32Sjsg 	volatile struct amdgpu_vcn4_fw_shared *fw_shared =
717*f005ef32Sjsg 						adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
718*f005ef32Sjsg 	struct amdgpu_ring *ring;
719*f005ef32Sjsg 	int vcn_inst;
720*f005ef32Sjsg 	uint32_t tmp;
721*f005ef32Sjsg 
722*f005ef32Sjsg 	vcn_inst = GET_INST(VCN, inst_idx);
723*f005ef32Sjsg 	/* disable register anti-hang mechanism */
724*f005ef32Sjsg 	WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_POWER_STATUS), 1,
725*f005ef32Sjsg 		 ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
726*f005ef32Sjsg 	/* enable dynamic power gating mode */
727*f005ef32Sjsg 	tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_POWER_STATUS);
728*f005ef32Sjsg 	tmp |= UVD_POWER_STATUS__UVD_PG_MODE_MASK;
729*f005ef32Sjsg 	tmp |= UVD_POWER_STATUS__UVD_PG_EN_MASK;
730*f005ef32Sjsg 	WREG32_SOC15(VCN, vcn_inst, regUVD_POWER_STATUS, tmp);
731*f005ef32Sjsg 
732*f005ef32Sjsg 	if (indirect) {
733*f005ef32Sjsg 		DRM_DEV_DEBUG(adev->dev, "VCN %d start: on AID %d",
734*f005ef32Sjsg 			inst_idx, adev->vcn.inst[inst_idx].aid_id);
735*f005ef32Sjsg 		adev->vcn.inst[inst_idx].dpg_sram_curr_addr =
736*f005ef32Sjsg 				(uint32_t *)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr;
737*f005ef32Sjsg 		/* Use dummy register 0xDEADBEEF passing AID selection to PSP FW */
738*f005ef32Sjsg 		WREG32_SOC15_DPG_MODE(inst_idx, 0xDEADBEEF,
739*f005ef32Sjsg 			adev->vcn.inst[inst_idx].aid_id, 0, true);
740*f005ef32Sjsg 	}
741*f005ef32Sjsg 
742*f005ef32Sjsg 	/* enable clock gating */
743*f005ef32Sjsg 	vcn_v4_0_3_disable_clock_gating_dpg_mode(adev, 0, inst_idx, indirect);
744*f005ef32Sjsg 
745*f005ef32Sjsg 	/* enable VCPU clock */
746*f005ef32Sjsg 	tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
747*f005ef32Sjsg 	tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
748*f005ef32Sjsg 	tmp |= UVD_VCPU_CNTL__BLK_RST_MASK;
749*f005ef32Sjsg 
750*f005ef32Sjsg 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
751*f005ef32Sjsg 		VCN, 0, regUVD_VCPU_CNTL), tmp, 0, indirect);
752*f005ef32Sjsg 
753*f005ef32Sjsg 	/* disable master interrupt */
754*f005ef32Sjsg 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
755*f005ef32Sjsg 		VCN, 0, regUVD_MASTINT_EN), 0, 0, indirect);
756*f005ef32Sjsg 
757*f005ef32Sjsg 	/* setup regUVD_LMI_CTRL */
758*f005ef32Sjsg 	tmp = (UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
759*f005ef32Sjsg 		UVD_LMI_CTRL__REQ_MODE_MASK |
760*f005ef32Sjsg 		UVD_LMI_CTRL__CRC_RESET_MASK |
761*f005ef32Sjsg 		UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
762*f005ef32Sjsg 		UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
763*f005ef32Sjsg 		UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
764*f005ef32Sjsg 		(8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
765*f005ef32Sjsg 		0x00100000L);
766*f005ef32Sjsg 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
767*f005ef32Sjsg 		VCN, 0, regUVD_LMI_CTRL), tmp, 0, indirect);
768*f005ef32Sjsg 
769*f005ef32Sjsg 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
770*f005ef32Sjsg 		VCN, 0, regUVD_MPC_CNTL),
771*f005ef32Sjsg 		0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0, indirect);
772*f005ef32Sjsg 
773*f005ef32Sjsg 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
774*f005ef32Sjsg 		VCN, 0, regUVD_MPC_SET_MUXA0),
775*f005ef32Sjsg 		((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
776*f005ef32Sjsg 		 (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
777*f005ef32Sjsg 		 (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
778*f005ef32Sjsg 		 (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0, indirect);
779*f005ef32Sjsg 
780*f005ef32Sjsg 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
781*f005ef32Sjsg 		VCN, 0, regUVD_MPC_SET_MUXB0),
782*f005ef32Sjsg 		 ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
783*f005ef32Sjsg 		 (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
784*f005ef32Sjsg 		 (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
785*f005ef32Sjsg 		 (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0, indirect);
786*f005ef32Sjsg 
787*f005ef32Sjsg 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
788*f005ef32Sjsg 		VCN, 0, regUVD_MPC_SET_MUX),
789*f005ef32Sjsg 		((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
790*f005ef32Sjsg 		 (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
791*f005ef32Sjsg 		 (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect);
792*f005ef32Sjsg 
793*f005ef32Sjsg 	vcn_v4_0_3_mc_resume_dpg_mode(adev, inst_idx, indirect);
794*f005ef32Sjsg 
795*f005ef32Sjsg 	tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
796*f005ef32Sjsg 	tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
797*f005ef32Sjsg 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
798*f005ef32Sjsg 		VCN, 0, regUVD_VCPU_CNTL), tmp, 0, indirect);
799*f005ef32Sjsg 
800*f005ef32Sjsg 	/* enable LMI MC and UMC channels */
801*f005ef32Sjsg 	tmp = 0x1f << UVD_LMI_CTRL2__RE_OFLD_MIF_WR_REQ_NUM__SHIFT;
802*f005ef32Sjsg 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
803*f005ef32Sjsg 		VCN, 0, regUVD_LMI_CTRL2), tmp, 0, indirect);
804*f005ef32Sjsg 
805*f005ef32Sjsg 	vcn_v4_0_3_enable_ras(adev, inst_idx, indirect);
806*f005ef32Sjsg 
807*f005ef32Sjsg 	/* enable master interrupt */
808*f005ef32Sjsg 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
809*f005ef32Sjsg 		VCN, 0, regUVD_MASTINT_EN),
810*f005ef32Sjsg 		UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect);
811*f005ef32Sjsg 
812*f005ef32Sjsg 	if (indirect)
813*f005ef32Sjsg 		amdgpu_vcn_psp_update_sram(adev, inst_idx, AMDGPU_UCODE_ID_VCN0_RAM);
814*f005ef32Sjsg 
815*f005ef32Sjsg 	ring = &adev->vcn.inst[inst_idx].ring_enc[0];
816*f005ef32Sjsg 
817*f005ef32Sjsg 	/* program the RB_BASE for ring buffer */
818*f005ef32Sjsg 	WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_LO,
819*f005ef32Sjsg 		     lower_32_bits(ring->gpu_addr));
820*f005ef32Sjsg 	WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_HI,
821*f005ef32Sjsg 		     upper_32_bits(ring->gpu_addr));
822*f005ef32Sjsg 
823*f005ef32Sjsg 	WREG32_SOC15(VCN, vcn_inst, regUVD_RB_SIZE,
824*f005ef32Sjsg 		     ring->ring_size / sizeof(uint32_t));
825*f005ef32Sjsg 
826*f005ef32Sjsg 	/* resetting ring, fw should not check RB ring */
827*f005ef32Sjsg 	tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE);
828*f005ef32Sjsg 	tmp &= ~(VCN_RB_ENABLE__RB_EN_MASK);
829*f005ef32Sjsg 	WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp);
830*f005ef32Sjsg 	fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET;
831*f005ef32Sjsg 
832*f005ef32Sjsg 	/* Initialize the ring buffer's read and write pointers */
833*f005ef32Sjsg 	WREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR, 0);
834*f005ef32Sjsg 	WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, 0);
835*f005ef32Sjsg 	ring->wptr = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR);
836*f005ef32Sjsg 
837*f005ef32Sjsg 	tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE);
838*f005ef32Sjsg 	tmp |= VCN_RB_ENABLE__RB_EN_MASK;
839*f005ef32Sjsg 	WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp);
840*f005ef32Sjsg 	fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF);
841*f005ef32Sjsg 
842*f005ef32Sjsg 	/*resetting done, fw can check RB ring */
843*f005ef32Sjsg 	fw_shared->sq.queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET);
844*f005ef32Sjsg 
845*f005ef32Sjsg 	return 0;
846*f005ef32Sjsg }
847*f005ef32Sjsg 
vcn_v4_0_3_start_sriov(struct amdgpu_device * adev)848*f005ef32Sjsg static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev)
849*f005ef32Sjsg {
850*f005ef32Sjsg 	int i, vcn_inst;
851*f005ef32Sjsg 	struct amdgpu_ring *ring_enc;
852*f005ef32Sjsg 	uint64_t cache_addr;
853*f005ef32Sjsg 	uint64_t rb_enc_addr;
854*f005ef32Sjsg 	uint64_t ctx_addr;
855*f005ef32Sjsg 	uint32_t param, resp, expected;
856*f005ef32Sjsg 	uint32_t offset, cache_size;
857*f005ef32Sjsg 	uint32_t tmp, timeout;
858*f005ef32Sjsg 
859*f005ef32Sjsg 	struct amdgpu_mm_table *table = &adev->virt.mm_table;
860*f005ef32Sjsg 	uint32_t *table_loc;
861*f005ef32Sjsg 	uint32_t table_size;
862*f005ef32Sjsg 	uint32_t size, size_dw;
863*f005ef32Sjsg 	uint32_t init_status;
864*f005ef32Sjsg 	uint32_t enabled_vcn;
865*f005ef32Sjsg 
866*f005ef32Sjsg 	struct mmsch_v4_0_cmd_direct_write
867*f005ef32Sjsg 		direct_wt = { {0} };
868*f005ef32Sjsg 	struct mmsch_v4_0_cmd_direct_read_modify_write
869*f005ef32Sjsg 		direct_rd_mod_wt = { {0} };
870*f005ef32Sjsg 	struct mmsch_v4_0_cmd_end end = { {0} };
871*f005ef32Sjsg 	struct mmsch_v4_0_3_init_header header;
872*f005ef32Sjsg 
873*f005ef32Sjsg 	volatile struct amdgpu_vcn4_fw_shared *fw_shared;
874*f005ef32Sjsg 	volatile struct amdgpu_fw_shared_rb_setup *rb_setup;
875*f005ef32Sjsg 
876*f005ef32Sjsg 	direct_wt.cmd_header.command_type =
877*f005ef32Sjsg 		MMSCH_COMMAND__DIRECT_REG_WRITE;
878*f005ef32Sjsg 	direct_rd_mod_wt.cmd_header.command_type =
879*f005ef32Sjsg 		MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
880*f005ef32Sjsg 	end.cmd_header.command_type = MMSCH_COMMAND__END;
881*f005ef32Sjsg 
882*f005ef32Sjsg 	for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
883*f005ef32Sjsg 		vcn_inst = GET_INST(VCN, i);
884*f005ef32Sjsg 
885*f005ef32Sjsg 		memset(&header, 0, sizeof(struct mmsch_v4_0_3_init_header));
886*f005ef32Sjsg 		header.version = MMSCH_VERSION;
887*f005ef32Sjsg 		header.total_size = sizeof(struct mmsch_v4_0_3_init_header) >> 2;
888*f005ef32Sjsg 
889*f005ef32Sjsg 		table_loc = (uint32_t *)table->cpu_addr;
890*f005ef32Sjsg 		table_loc += header.total_size;
891*f005ef32Sjsg 
892*f005ef32Sjsg 		table_size = 0;
893*f005ef32Sjsg 
894*f005ef32Sjsg 		MMSCH_V4_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCN, 0, regUVD_STATUS),
895*f005ef32Sjsg 			~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY);
896*f005ef32Sjsg 
897*f005ef32Sjsg 		cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
898*f005ef32Sjsg 
899*f005ef32Sjsg 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
900*f005ef32Sjsg 			MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
901*f005ef32Sjsg 				regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
902*f005ef32Sjsg 				adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo);
903*f005ef32Sjsg 
904*f005ef32Sjsg 			MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
905*f005ef32Sjsg 				regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
906*f005ef32Sjsg 				adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi);
907*f005ef32Sjsg 
908*f005ef32Sjsg 			offset = 0;
909*f005ef32Sjsg 			MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
910*f005ef32Sjsg 				regUVD_VCPU_CACHE_OFFSET0), 0);
911*f005ef32Sjsg 		} else {
912*f005ef32Sjsg 			MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
913*f005ef32Sjsg 				regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
914*f005ef32Sjsg 				lower_32_bits(adev->vcn.inst[i].gpu_addr));
915*f005ef32Sjsg 			MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
916*f005ef32Sjsg 				regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
917*f005ef32Sjsg 				upper_32_bits(adev->vcn.inst[i].gpu_addr));
918*f005ef32Sjsg 			offset = cache_size;
919*f005ef32Sjsg 			MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
920*f005ef32Sjsg 				regUVD_VCPU_CACHE_OFFSET0),
921*f005ef32Sjsg 				AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
922*f005ef32Sjsg 		}
923*f005ef32Sjsg 
924*f005ef32Sjsg 		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
925*f005ef32Sjsg 			regUVD_VCPU_CACHE_SIZE0),
926*f005ef32Sjsg 			cache_size);
927*f005ef32Sjsg 
928*f005ef32Sjsg 		cache_addr = adev->vcn.inst[vcn_inst].gpu_addr + offset;
929*f005ef32Sjsg 		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
930*f005ef32Sjsg 			regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), lower_32_bits(cache_addr));
931*f005ef32Sjsg 		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
932*f005ef32Sjsg 			regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), upper_32_bits(cache_addr));
933*f005ef32Sjsg 		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
934*f005ef32Sjsg 			regUVD_VCPU_CACHE_OFFSET1), 0);
935*f005ef32Sjsg 		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
936*f005ef32Sjsg 			regUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE);
937*f005ef32Sjsg 
938*f005ef32Sjsg 		cache_addr = adev->vcn.inst[vcn_inst].gpu_addr + offset +
939*f005ef32Sjsg 			AMDGPU_VCN_STACK_SIZE;
940*f005ef32Sjsg 
941*f005ef32Sjsg 		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
942*f005ef32Sjsg 			regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), lower_32_bits(cache_addr));
943*f005ef32Sjsg 
944*f005ef32Sjsg 		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
945*f005ef32Sjsg 			regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), upper_32_bits(cache_addr));
946*f005ef32Sjsg 
947*f005ef32Sjsg 		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
948*f005ef32Sjsg 			regUVD_VCPU_CACHE_OFFSET2), 0);
949*f005ef32Sjsg 
950*f005ef32Sjsg 		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
951*f005ef32Sjsg 			regUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE);
952*f005ef32Sjsg 
953*f005ef32Sjsg 		fw_shared = adev->vcn.inst[vcn_inst].fw_shared.cpu_addr;
954*f005ef32Sjsg 		rb_setup = &fw_shared->rb_setup;
955*f005ef32Sjsg 
956*f005ef32Sjsg 		ring_enc = &adev->vcn.inst[vcn_inst].ring_enc[0];
957*f005ef32Sjsg 		ring_enc->wptr = 0;
958*f005ef32Sjsg 		rb_enc_addr = ring_enc->gpu_addr;
959*f005ef32Sjsg 
960*f005ef32Sjsg 		rb_setup->is_rb_enabled_flags |= RB_ENABLED;
961*f005ef32Sjsg 		rb_setup->rb_addr_lo = lower_32_bits(rb_enc_addr);
962*f005ef32Sjsg 		rb_setup->rb_addr_hi = upper_32_bits(rb_enc_addr);
963*f005ef32Sjsg 		rb_setup->rb_size = ring_enc->ring_size / 4;
964*f005ef32Sjsg 		fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG);
965*f005ef32Sjsg 
966*f005ef32Sjsg 		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
967*f005ef32Sjsg 			regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
968*f005ef32Sjsg 			lower_32_bits(adev->vcn.inst[vcn_inst].fw_shared.gpu_addr));
969*f005ef32Sjsg 		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
970*f005ef32Sjsg 			regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
971*f005ef32Sjsg 			upper_32_bits(adev->vcn.inst[vcn_inst].fw_shared.gpu_addr));
972*f005ef32Sjsg 		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
973*f005ef32Sjsg 			regUVD_VCPU_NONCACHE_SIZE0),
974*f005ef32Sjsg 			AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)));
975*f005ef32Sjsg 		MMSCH_V4_0_INSERT_END();
976*f005ef32Sjsg 
977*f005ef32Sjsg 		header.vcn0.init_status = 0;
978*f005ef32Sjsg 		header.vcn0.table_offset = header.total_size;
979*f005ef32Sjsg 		header.vcn0.table_size = table_size;
980*f005ef32Sjsg 		header.total_size += table_size;
981*f005ef32Sjsg 
982*f005ef32Sjsg 		/* Send init table to mmsch */
983*f005ef32Sjsg 		size = sizeof(struct mmsch_v4_0_3_init_header);
984*f005ef32Sjsg 		table_loc = (uint32_t *)table->cpu_addr;
985*f005ef32Sjsg 		memcpy((void *)table_loc, &header, size);
986*f005ef32Sjsg 
987*f005ef32Sjsg 		ctx_addr = table->gpu_addr;
988*f005ef32Sjsg 		WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr));
989*f005ef32Sjsg 		WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr));
990*f005ef32Sjsg 
991*f005ef32Sjsg 		tmp = RREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_VMID);
992*f005ef32Sjsg 		tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK;
993*f005ef32Sjsg 		tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT);
994*f005ef32Sjsg 		WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_VMID, tmp);
995*f005ef32Sjsg 
996*f005ef32Sjsg 		size = header.total_size;
997*f005ef32Sjsg 		WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_SIZE, size);
998*f005ef32Sjsg 
999*f005ef32Sjsg 		WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_RESP, 0);
1000*f005ef32Sjsg 
1001*f005ef32Sjsg 		param = 0x00000001;
1002*f005ef32Sjsg 		WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_HOST, param);
1003*f005ef32Sjsg 		tmp = 0;
1004*f005ef32Sjsg 		timeout = 1000;
1005*f005ef32Sjsg 		resp = 0;
1006*f005ef32Sjsg 		expected = MMSCH_VF_MAILBOX_RESP__OK;
1007*f005ef32Sjsg 		while (resp != expected) {
1008*f005ef32Sjsg 			resp = RREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_RESP);
1009*f005ef32Sjsg 			if (resp != 0)
1010*f005ef32Sjsg 				break;
1011*f005ef32Sjsg 
1012*f005ef32Sjsg 			udelay(10);
1013*f005ef32Sjsg 			tmp = tmp + 10;
1014*f005ef32Sjsg 			if (tmp >= timeout) {
1015*f005ef32Sjsg 				DRM_ERROR("failed to init MMSCH. TIME-OUT after %d usec"\
1016*f005ef32Sjsg 					" waiting for regMMSCH_VF_MAILBOX_RESP "\
1017*f005ef32Sjsg 					"(expected=0x%08x, readback=0x%08x)\n",
1018*f005ef32Sjsg 					tmp, expected, resp);
1019*f005ef32Sjsg 				return -EBUSY;
1020*f005ef32Sjsg 			}
1021*f005ef32Sjsg 		}
1022*f005ef32Sjsg 
1023*f005ef32Sjsg 		enabled_vcn = amdgpu_vcn_is_disabled_vcn(adev, VCN_DECODE_RING, 0) ? 1 : 0;
1024*f005ef32Sjsg 		init_status = ((struct mmsch_v4_0_3_init_header *)(table_loc))->vcn0.init_status;
1025*f005ef32Sjsg 		if (resp != expected && resp != MMSCH_VF_MAILBOX_RESP__INCOMPLETE
1026*f005ef32Sjsg 					&& init_status != MMSCH_VF_ENGINE_STATUS__PASS) {
1027*f005ef32Sjsg 			DRM_ERROR("MMSCH init status is incorrect! readback=0x%08x, header init "\
1028*f005ef32Sjsg 				"status for VCN%x: 0x%x\n", resp, enabled_vcn, init_status);
1029*f005ef32Sjsg 		}
1030*f005ef32Sjsg 	}
1031*f005ef32Sjsg 
1032*f005ef32Sjsg 	return 0;
1033*f005ef32Sjsg }
1034*f005ef32Sjsg 
1035*f005ef32Sjsg /**
1036*f005ef32Sjsg  * vcn_v4_0_3_start - VCN start
1037*f005ef32Sjsg  *
1038*f005ef32Sjsg  * @adev: amdgpu_device pointer
1039*f005ef32Sjsg  *
1040*f005ef32Sjsg  * Start VCN block
1041*f005ef32Sjsg  */
vcn_v4_0_3_start(struct amdgpu_device * adev)1042*f005ef32Sjsg static int vcn_v4_0_3_start(struct amdgpu_device *adev)
1043*f005ef32Sjsg {
1044*f005ef32Sjsg 	volatile struct amdgpu_vcn4_fw_shared *fw_shared;
1045*f005ef32Sjsg 	struct amdgpu_ring *ring;
1046*f005ef32Sjsg 	int i, j, k, r, vcn_inst;
1047*f005ef32Sjsg 	uint32_t tmp;
1048*f005ef32Sjsg 
1049*f005ef32Sjsg 	if (adev->pm.dpm_enabled)
1050*f005ef32Sjsg 		amdgpu_dpm_enable_uvd(adev, true);
1051*f005ef32Sjsg 
1052*f005ef32Sjsg 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
1053*f005ef32Sjsg 		if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
1054*f005ef32Sjsg 			r = vcn_v4_0_3_start_dpg_mode(adev, i, adev->vcn.indirect_sram);
1055*f005ef32Sjsg 			continue;
1056*f005ef32Sjsg 		}
1057*f005ef32Sjsg 
1058*f005ef32Sjsg 		vcn_inst = GET_INST(VCN, i);
1059*f005ef32Sjsg 		/* set VCN status busy */
1060*f005ef32Sjsg 		tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS) |
1061*f005ef32Sjsg 		      UVD_STATUS__UVD_BUSY;
1062*f005ef32Sjsg 		WREG32_SOC15(VCN, vcn_inst, regUVD_STATUS, tmp);
1063*f005ef32Sjsg 
1064*f005ef32Sjsg 		/*SW clock gating */
1065*f005ef32Sjsg 		vcn_v4_0_3_disable_clock_gating(adev, i);
1066*f005ef32Sjsg 
1067*f005ef32Sjsg 		/* enable VCPU clock */
1068*f005ef32Sjsg 		WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL),
1069*f005ef32Sjsg 			 UVD_VCPU_CNTL__CLK_EN_MASK,
1070*f005ef32Sjsg 			 ~UVD_VCPU_CNTL__CLK_EN_MASK);
1071*f005ef32Sjsg 
1072*f005ef32Sjsg 		/* disable master interrupt */
1073*f005ef32Sjsg 		WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_MASTINT_EN), 0,
1074*f005ef32Sjsg 			 ~UVD_MASTINT_EN__VCPU_EN_MASK);
1075*f005ef32Sjsg 
1076*f005ef32Sjsg 		/* enable LMI MC and UMC channels */
1077*f005ef32Sjsg 		WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_LMI_CTRL2), 0,
1078*f005ef32Sjsg 			 ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
1079*f005ef32Sjsg 
1080*f005ef32Sjsg 		tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET);
1081*f005ef32Sjsg 		tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
1082*f005ef32Sjsg 		tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
1083*f005ef32Sjsg 		WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp);
1084*f005ef32Sjsg 
1085*f005ef32Sjsg 		/* setup regUVD_LMI_CTRL */
1086*f005ef32Sjsg 		tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL);
1087*f005ef32Sjsg 		WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL,
1088*f005ef32Sjsg 			     tmp | UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
1089*f005ef32Sjsg 				     UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
1090*f005ef32Sjsg 				     UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
1091*f005ef32Sjsg 				     UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
1092*f005ef32Sjsg 
1093*f005ef32Sjsg 		/* setup regUVD_MPC_CNTL */
1094*f005ef32Sjsg 		tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_MPC_CNTL);
1095*f005ef32Sjsg 		tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK;
1096*f005ef32Sjsg 		tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT;
1097*f005ef32Sjsg 		WREG32_SOC15(VCN, vcn_inst, regUVD_MPC_CNTL, tmp);
1098*f005ef32Sjsg 
1099*f005ef32Sjsg 		/* setup UVD_MPC_SET_MUXA0 */
1100*f005ef32Sjsg 		WREG32_SOC15(VCN, vcn_inst, regUVD_MPC_SET_MUXA0,
1101*f005ef32Sjsg 			     ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
1102*f005ef32Sjsg 			      (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
1103*f005ef32Sjsg 			      (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
1104*f005ef32Sjsg 			      (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)));
1105*f005ef32Sjsg 
1106*f005ef32Sjsg 		/* setup UVD_MPC_SET_MUXB0 */
1107*f005ef32Sjsg 		WREG32_SOC15(VCN, vcn_inst, regUVD_MPC_SET_MUXB0,
1108*f005ef32Sjsg 			     ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
1109*f005ef32Sjsg 			      (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
1110*f005ef32Sjsg 			      (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
1111*f005ef32Sjsg 			      (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)));
1112*f005ef32Sjsg 
1113*f005ef32Sjsg 		/* setup UVD_MPC_SET_MUX */
1114*f005ef32Sjsg 		WREG32_SOC15(VCN, vcn_inst, regUVD_MPC_SET_MUX,
1115*f005ef32Sjsg 			     ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
1116*f005ef32Sjsg 			      (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
1117*f005ef32Sjsg 			      (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
1118*f005ef32Sjsg 
1119*f005ef32Sjsg 		vcn_v4_0_3_mc_resume(adev, i);
1120*f005ef32Sjsg 
1121*f005ef32Sjsg 		/* VCN global tiling registers */
1122*f005ef32Sjsg 		WREG32_SOC15(VCN, vcn_inst, regUVD_GFX8_ADDR_CONFIG,
1123*f005ef32Sjsg 			     adev->gfx.config.gb_addr_config);
1124*f005ef32Sjsg 		WREG32_SOC15(VCN, vcn_inst, regUVD_GFX10_ADDR_CONFIG,
1125*f005ef32Sjsg 			     adev->gfx.config.gb_addr_config);
1126*f005ef32Sjsg 
1127*f005ef32Sjsg 		/* unblock VCPU register access */
1128*f005ef32Sjsg 		WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_RB_ARB_CTRL), 0,
1129*f005ef32Sjsg 			 ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
1130*f005ef32Sjsg 
1131*f005ef32Sjsg 		/* release VCPU reset to boot */
1132*f005ef32Sjsg 		WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 0,
1133*f005ef32Sjsg 			 ~UVD_VCPU_CNTL__BLK_RST_MASK);
1134*f005ef32Sjsg 
1135*f005ef32Sjsg 		for (j = 0; j < 10; ++j) {
1136*f005ef32Sjsg 			uint32_t status;
1137*f005ef32Sjsg 
1138*f005ef32Sjsg 			for (k = 0; k < 100; ++k) {
1139*f005ef32Sjsg 				status = RREG32_SOC15(VCN, vcn_inst,
1140*f005ef32Sjsg 						      regUVD_STATUS);
1141*f005ef32Sjsg 				if (status & 2)
1142*f005ef32Sjsg 					break;
1143*f005ef32Sjsg 				mdelay(10);
1144*f005ef32Sjsg 			}
1145*f005ef32Sjsg 			r = 0;
1146*f005ef32Sjsg 			if (status & 2)
1147*f005ef32Sjsg 				break;
1148*f005ef32Sjsg 
1149*f005ef32Sjsg 			DRM_DEV_ERROR(adev->dev,
1150*f005ef32Sjsg 				"VCN decode not responding, trying to reset the VCPU!!!\n");
1151*f005ef32Sjsg 			WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst,
1152*f005ef32Sjsg 						  regUVD_VCPU_CNTL),
1153*f005ef32Sjsg 				 UVD_VCPU_CNTL__BLK_RST_MASK,
1154*f005ef32Sjsg 				 ~UVD_VCPU_CNTL__BLK_RST_MASK);
1155*f005ef32Sjsg 			mdelay(10);
1156*f005ef32Sjsg 			WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst,
1157*f005ef32Sjsg 						  regUVD_VCPU_CNTL),
1158*f005ef32Sjsg 				 0, ~UVD_VCPU_CNTL__BLK_RST_MASK);
1159*f005ef32Sjsg 
1160*f005ef32Sjsg 			mdelay(10);
1161*f005ef32Sjsg 			r = -1;
1162*f005ef32Sjsg 		}
1163*f005ef32Sjsg 
1164*f005ef32Sjsg 		if (r) {
1165*f005ef32Sjsg 			DRM_DEV_ERROR(adev->dev, "VCN decode not responding, giving up!!!\n");
1166*f005ef32Sjsg 			return r;
1167*f005ef32Sjsg 		}
1168*f005ef32Sjsg 
1169*f005ef32Sjsg 		/* enable master interrupt */
1170*f005ef32Sjsg 		WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_MASTINT_EN),
1171*f005ef32Sjsg 			 UVD_MASTINT_EN__VCPU_EN_MASK,
1172*f005ef32Sjsg 			 ~UVD_MASTINT_EN__VCPU_EN_MASK);
1173*f005ef32Sjsg 
1174*f005ef32Sjsg 		/* clear the busy bit of VCN_STATUS */
1175*f005ef32Sjsg 		WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_STATUS), 0,
1176*f005ef32Sjsg 			 ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
1177*f005ef32Sjsg 
1178*f005ef32Sjsg 		ring = &adev->vcn.inst[i].ring_enc[0];
1179*f005ef32Sjsg 		fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
1180*f005ef32Sjsg 
1181*f005ef32Sjsg 		/* program the RB_BASE for ring buffer */
1182*f005ef32Sjsg 		WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_LO,
1183*f005ef32Sjsg 			     lower_32_bits(ring->gpu_addr));
1184*f005ef32Sjsg 		WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_HI,
1185*f005ef32Sjsg 			     upper_32_bits(ring->gpu_addr));
1186*f005ef32Sjsg 
1187*f005ef32Sjsg 		WREG32_SOC15(VCN, vcn_inst, regUVD_RB_SIZE,
1188*f005ef32Sjsg 			     ring->ring_size / sizeof(uint32_t));
1189*f005ef32Sjsg 
1190*f005ef32Sjsg 		/* resetting ring, fw should not check RB ring */
1191*f005ef32Sjsg 		tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE);
1192*f005ef32Sjsg 		tmp &= ~(VCN_RB_ENABLE__RB_EN_MASK);
1193*f005ef32Sjsg 		WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp);
1194*f005ef32Sjsg 
1195*f005ef32Sjsg 		/* Initialize the ring buffer's read and write pointers */
1196*f005ef32Sjsg 		WREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR, 0);
1197*f005ef32Sjsg 		WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, 0);
1198*f005ef32Sjsg 
1199*f005ef32Sjsg 		tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE);
1200*f005ef32Sjsg 		tmp |= VCN_RB_ENABLE__RB_EN_MASK;
1201*f005ef32Sjsg 		WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp);
1202*f005ef32Sjsg 
1203*f005ef32Sjsg 		ring->wptr = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR);
1204*f005ef32Sjsg 		fw_shared->sq.queue_mode &=
1205*f005ef32Sjsg 			cpu_to_le32(~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF));
1206*f005ef32Sjsg 
1207*f005ef32Sjsg 	}
1208*f005ef32Sjsg 	return 0;
1209*f005ef32Sjsg }
1210*f005ef32Sjsg 
1211*f005ef32Sjsg /**
1212*f005ef32Sjsg  * vcn_v4_0_3_stop_dpg_mode - VCN stop with dpg mode
1213*f005ef32Sjsg  *
1214*f005ef32Sjsg  * @adev: amdgpu_device pointer
1215*f005ef32Sjsg  * @inst_idx: instance number index
1216*f005ef32Sjsg  *
1217*f005ef32Sjsg  * Stop VCN block with dpg mode
1218*f005ef32Sjsg  */
vcn_v4_0_3_stop_dpg_mode(struct amdgpu_device * adev,int inst_idx)1219*f005ef32Sjsg static int vcn_v4_0_3_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
1220*f005ef32Sjsg {
1221*f005ef32Sjsg 	uint32_t tmp;
1222*f005ef32Sjsg 	int vcn_inst;
1223*f005ef32Sjsg 
1224*f005ef32Sjsg 	vcn_inst = GET_INST(VCN, inst_idx);
1225*f005ef32Sjsg 
1226*f005ef32Sjsg 	/* Wait for power status to be 1 */
1227*f005ef32Sjsg 	SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_POWER_STATUS, 1,
1228*f005ef32Sjsg 			   UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
1229*f005ef32Sjsg 
1230*f005ef32Sjsg 	/* wait for read ptr to be equal to write ptr */
1231*f005ef32Sjsg 	tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR);
1232*f005ef32Sjsg 	SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_RB_RPTR, tmp, 0xFFFFFFFF);
1233*f005ef32Sjsg 
1234*f005ef32Sjsg 	SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_POWER_STATUS, 1,
1235*f005ef32Sjsg 			   UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
1236*f005ef32Sjsg 
1237*f005ef32Sjsg 	/* disable dynamic power gating mode */
1238*f005ef32Sjsg 	WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_POWER_STATUS), 0,
1239*f005ef32Sjsg 		 ~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
1240*f005ef32Sjsg 	return 0;
1241*f005ef32Sjsg }
1242*f005ef32Sjsg 
1243*f005ef32Sjsg /**
1244*f005ef32Sjsg  * vcn_v4_0_3_stop - VCN stop
1245*f005ef32Sjsg  *
1246*f005ef32Sjsg  * @adev: amdgpu_device pointer
1247*f005ef32Sjsg  *
1248*f005ef32Sjsg  * Stop VCN block
1249*f005ef32Sjsg  */
vcn_v4_0_3_stop(struct amdgpu_device * adev)1250*f005ef32Sjsg static int vcn_v4_0_3_stop(struct amdgpu_device *adev)
1251*f005ef32Sjsg {
1252*f005ef32Sjsg 	volatile struct amdgpu_vcn4_fw_shared *fw_shared;
1253*f005ef32Sjsg 	int i, r = 0, vcn_inst;
1254*f005ef32Sjsg 	uint32_t tmp;
1255*f005ef32Sjsg 
1256*f005ef32Sjsg 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
1257*f005ef32Sjsg 		vcn_inst = GET_INST(VCN, i);
1258*f005ef32Sjsg 
1259*f005ef32Sjsg 		fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
1260*f005ef32Sjsg 		fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF;
1261*f005ef32Sjsg 
1262*f005ef32Sjsg 		if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
1263*f005ef32Sjsg 			vcn_v4_0_3_stop_dpg_mode(adev, i);
1264*f005ef32Sjsg 			continue;
1265*f005ef32Sjsg 		}
1266*f005ef32Sjsg 
1267*f005ef32Sjsg 		/* wait for vcn idle */
1268*f005ef32Sjsg 		r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_STATUS,
1269*f005ef32Sjsg 				       UVD_STATUS__IDLE, 0x7);
1270*f005ef32Sjsg 		if (r)
1271*f005ef32Sjsg 			goto Done;
1272*f005ef32Sjsg 
1273*f005ef32Sjsg 		tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK |
1274*f005ef32Sjsg 			UVD_LMI_STATUS__READ_CLEAN_MASK |
1275*f005ef32Sjsg 			UVD_LMI_STATUS__WRITE_CLEAN_MASK |
1276*f005ef32Sjsg 			UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
1277*f005ef32Sjsg 		r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_LMI_STATUS, tmp,
1278*f005ef32Sjsg 				       tmp);
1279*f005ef32Sjsg 		if (r)
1280*f005ef32Sjsg 			goto Done;
1281*f005ef32Sjsg 
1282*f005ef32Sjsg 		/* stall UMC channel */
1283*f005ef32Sjsg 		tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL2);
1284*f005ef32Sjsg 		tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK;
1285*f005ef32Sjsg 		WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL2, tmp);
1286*f005ef32Sjsg 		tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK |
1287*f005ef32Sjsg 			UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
1288*f005ef32Sjsg 		r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_LMI_STATUS, tmp,
1289*f005ef32Sjsg 				       tmp);
1290*f005ef32Sjsg 		if (r)
1291*f005ef32Sjsg 			goto Done;
1292*f005ef32Sjsg 
1293*f005ef32Sjsg 		/* Unblock VCPU Register access */
1294*f005ef32Sjsg 		WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_RB_ARB_CTRL),
1295*f005ef32Sjsg 			 UVD_RB_ARB_CTRL__VCPU_DIS_MASK,
1296*f005ef32Sjsg 			 ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
1297*f005ef32Sjsg 
1298*f005ef32Sjsg 		/* release VCPU reset to boot */
1299*f005ef32Sjsg 		WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL),
1300*f005ef32Sjsg 			 UVD_VCPU_CNTL__BLK_RST_MASK,
1301*f005ef32Sjsg 			 ~UVD_VCPU_CNTL__BLK_RST_MASK);
1302*f005ef32Sjsg 
1303*f005ef32Sjsg 		/* disable VCPU clock */
1304*f005ef32Sjsg 		WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 0,
1305*f005ef32Sjsg 			 ~(UVD_VCPU_CNTL__CLK_EN_MASK));
1306*f005ef32Sjsg 
1307*f005ef32Sjsg 		/* reset LMI UMC/LMI/VCPU */
1308*f005ef32Sjsg 		tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET);
1309*f005ef32Sjsg 		tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
1310*f005ef32Sjsg 		WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp);
1311*f005ef32Sjsg 
1312*f005ef32Sjsg 		tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET);
1313*f005ef32Sjsg 		tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
1314*f005ef32Sjsg 		WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp);
1315*f005ef32Sjsg 
1316*f005ef32Sjsg 		/* clear VCN status */
1317*f005ef32Sjsg 		WREG32_SOC15(VCN, vcn_inst, regUVD_STATUS, 0);
1318*f005ef32Sjsg 
1319*f005ef32Sjsg 		/* apply HW clock gating */
1320*f005ef32Sjsg 		vcn_v4_0_3_enable_clock_gating(adev, i);
1321*f005ef32Sjsg 	}
1322*f005ef32Sjsg Done:
1323*f005ef32Sjsg 	if (adev->pm.dpm_enabled)
1324*f005ef32Sjsg 		amdgpu_dpm_enable_uvd(adev, false);
1325*f005ef32Sjsg 
1326*f005ef32Sjsg 	return 0;
1327*f005ef32Sjsg }
1328*f005ef32Sjsg 
1329*f005ef32Sjsg /**
1330*f005ef32Sjsg  * vcn_v4_0_3_pause_dpg_mode - VCN pause with dpg mode
1331*f005ef32Sjsg  *
1332*f005ef32Sjsg  * @adev: amdgpu_device pointer
1333*f005ef32Sjsg  * @inst_idx: instance number index
1334*f005ef32Sjsg  * @new_state: pause state
1335*f005ef32Sjsg  *
1336*f005ef32Sjsg  * Pause dpg mode for VCN block
1337*f005ef32Sjsg  */
vcn_v4_0_3_pause_dpg_mode(struct amdgpu_device * adev,int inst_idx,struct dpg_pause_state * new_state)1338*f005ef32Sjsg static int vcn_v4_0_3_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx,
1339*f005ef32Sjsg 				struct dpg_pause_state *new_state)
1340*f005ef32Sjsg {
1341*f005ef32Sjsg 
1342*f005ef32Sjsg 	return 0;
1343*f005ef32Sjsg }
1344*f005ef32Sjsg 
1345*f005ef32Sjsg /**
1346*f005ef32Sjsg  * vcn_v4_0_3_unified_ring_get_rptr - get unified read pointer
1347*f005ef32Sjsg  *
1348*f005ef32Sjsg  * @ring: amdgpu_ring pointer
1349*f005ef32Sjsg  *
1350*f005ef32Sjsg  * Returns the current hardware unified read pointer
1351*f005ef32Sjsg  */
vcn_v4_0_3_unified_ring_get_rptr(struct amdgpu_ring * ring)1352*f005ef32Sjsg static uint64_t vcn_v4_0_3_unified_ring_get_rptr(struct amdgpu_ring *ring)
1353*f005ef32Sjsg {
1354*f005ef32Sjsg 	struct amdgpu_device *adev = ring->adev;
1355*f005ef32Sjsg 
1356*f005ef32Sjsg 	if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
1357*f005ef32Sjsg 		DRM_ERROR("wrong ring id is identified in %s", __func__);
1358*f005ef32Sjsg 
1359*f005ef32Sjsg 	return RREG32_SOC15(VCN, GET_INST(VCN, ring->me), regUVD_RB_RPTR);
1360*f005ef32Sjsg }
1361*f005ef32Sjsg 
1362*f005ef32Sjsg /**
1363*f005ef32Sjsg  * vcn_v4_0_3_unified_ring_get_wptr - get unified write pointer
1364*f005ef32Sjsg  *
1365*f005ef32Sjsg  * @ring: amdgpu_ring pointer
1366*f005ef32Sjsg  *
1367*f005ef32Sjsg  * Returns the current hardware unified write pointer
1368*f005ef32Sjsg  */
vcn_v4_0_3_unified_ring_get_wptr(struct amdgpu_ring * ring)1369*f005ef32Sjsg static uint64_t vcn_v4_0_3_unified_ring_get_wptr(struct amdgpu_ring *ring)
1370*f005ef32Sjsg {
1371*f005ef32Sjsg 	struct amdgpu_device *adev = ring->adev;
1372*f005ef32Sjsg 
1373*f005ef32Sjsg 	if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
1374*f005ef32Sjsg 		DRM_ERROR("wrong ring id is identified in %s", __func__);
1375*f005ef32Sjsg 
1376*f005ef32Sjsg 	if (ring->use_doorbell)
1377*f005ef32Sjsg 		return *ring->wptr_cpu_addr;
1378*f005ef32Sjsg 	else
1379*f005ef32Sjsg 		return RREG32_SOC15(VCN, GET_INST(VCN, ring->me),
1380*f005ef32Sjsg 				    regUVD_RB_WPTR);
1381*f005ef32Sjsg }
1382*f005ef32Sjsg 
1383*f005ef32Sjsg /**
1384*f005ef32Sjsg  * vcn_v4_0_3_unified_ring_set_wptr - set enc write pointer
1385*f005ef32Sjsg  *
1386*f005ef32Sjsg  * @ring: amdgpu_ring pointer
1387*f005ef32Sjsg  *
1388*f005ef32Sjsg  * Commits the enc write pointer to the hardware
1389*f005ef32Sjsg  */
vcn_v4_0_3_unified_ring_set_wptr(struct amdgpu_ring * ring)1390*f005ef32Sjsg static void vcn_v4_0_3_unified_ring_set_wptr(struct amdgpu_ring *ring)
1391*f005ef32Sjsg {
1392*f005ef32Sjsg 	struct amdgpu_device *adev = ring->adev;
1393*f005ef32Sjsg 
1394*f005ef32Sjsg 	if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
1395*f005ef32Sjsg 		DRM_ERROR("wrong ring id is identified in %s", __func__);
1396*f005ef32Sjsg 
1397*f005ef32Sjsg 	if (ring->use_doorbell) {
1398*f005ef32Sjsg 		*ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
1399*f005ef32Sjsg 		WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
1400*f005ef32Sjsg 	} else {
1401*f005ef32Sjsg 		WREG32_SOC15(VCN, GET_INST(VCN, ring->me), regUVD_RB_WPTR,
1402*f005ef32Sjsg 			     lower_32_bits(ring->wptr));
1403*f005ef32Sjsg 	}
1404*f005ef32Sjsg }
1405*f005ef32Sjsg 
1406*f005ef32Sjsg static const struct amdgpu_ring_funcs vcn_v4_0_3_unified_ring_vm_funcs = {
1407*f005ef32Sjsg 	.type = AMDGPU_RING_TYPE_VCN_ENC,
1408*f005ef32Sjsg 	.align_mask = 0x3f,
1409*f005ef32Sjsg 	.nop = VCN_ENC_CMD_NO_OP,
1410*f005ef32Sjsg 	.get_rptr = vcn_v4_0_3_unified_ring_get_rptr,
1411*f005ef32Sjsg 	.get_wptr = vcn_v4_0_3_unified_ring_get_wptr,
1412*f005ef32Sjsg 	.set_wptr = vcn_v4_0_3_unified_ring_set_wptr,
1413*f005ef32Sjsg 	.emit_frame_size =
1414*f005ef32Sjsg 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
1415*f005ef32Sjsg 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
1416*f005ef32Sjsg 		4 + /* vcn_v2_0_enc_ring_emit_vm_flush */
1417*f005ef32Sjsg 		5 + 5 + /* vcn_v2_0_enc_ring_emit_fence x2 vm fence */
1418*f005ef32Sjsg 		1, /* vcn_v2_0_enc_ring_insert_end */
1419*f005ef32Sjsg 	.emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */
1420*f005ef32Sjsg 	.emit_ib = vcn_v2_0_enc_ring_emit_ib,
1421*f005ef32Sjsg 	.emit_fence = vcn_v2_0_enc_ring_emit_fence,
1422*f005ef32Sjsg 	.emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush,
1423*f005ef32Sjsg 	.test_ring = amdgpu_vcn_enc_ring_test_ring,
1424*f005ef32Sjsg 	.test_ib = amdgpu_vcn_unified_ring_test_ib,
1425*f005ef32Sjsg 	.insert_nop = amdgpu_ring_insert_nop,
1426*f005ef32Sjsg 	.insert_end = vcn_v2_0_enc_ring_insert_end,
1427*f005ef32Sjsg 	.pad_ib = amdgpu_ring_generic_pad_ib,
1428*f005ef32Sjsg 	.begin_use = amdgpu_vcn_ring_begin_use,
1429*f005ef32Sjsg 	.end_use = amdgpu_vcn_ring_end_use,
1430*f005ef32Sjsg 	.emit_wreg = vcn_v2_0_enc_ring_emit_wreg,
1431*f005ef32Sjsg 	.emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait,
1432*f005ef32Sjsg 	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
1433*f005ef32Sjsg };
1434*f005ef32Sjsg 
1435*f005ef32Sjsg /**
1436*f005ef32Sjsg  * vcn_v4_0_3_set_unified_ring_funcs - set unified ring functions
1437*f005ef32Sjsg  *
1438*f005ef32Sjsg  * @adev: amdgpu_device pointer
1439*f005ef32Sjsg  *
1440*f005ef32Sjsg  * Set unified ring functions
1441*f005ef32Sjsg  */
vcn_v4_0_3_set_unified_ring_funcs(struct amdgpu_device * adev)1442*f005ef32Sjsg static void vcn_v4_0_3_set_unified_ring_funcs(struct amdgpu_device *adev)
1443*f005ef32Sjsg {
1444*f005ef32Sjsg 	int i, vcn_inst;
1445*f005ef32Sjsg 
1446*f005ef32Sjsg 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
1447*f005ef32Sjsg 		adev->vcn.inst[i].ring_enc[0].funcs = &vcn_v4_0_3_unified_ring_vm_funcs;
1448*f005ef32Sjsg 		adev->vcn.inst[i].ring_enc[0].me = i;
1449*f005ef32Sjsg 		vcn_inst = GET_INST(VCN, i);
1450*f005ef32Sjsg 		adev->vcn.inst[i].aid_id =
1451*f005ef32Sjsg 			vcn_inst / adev->vcn.num_inst_per_aid;
1452*f005ef32Sjsg 	}
1453*f005ef32Sjsg 	DRM_DEV_INFO(adev->dev, "VCN decode is enabled in VM mode\n");
1454*f005ef32Sjsg }
1455*f005ef32Sjsg 
1456*f005ef32Sjsg /**
1457*f005ef32Sjsg  * vcn_v4_0_3_is_idle - check VCN block is idle
1458*f005ef32Sjsg  *
1459*f005ef32Sjsg  * @handle: amdgpu_device pointer
1460*f005ef32Sjsg  *
1461*f005ef32Sjsg  * Check whether VCN block is idle
1462*f005ef32Sjsg  */
vcn_v4_0_3_is_idle(void * handle)1463*f005ef32Sjsg static bool vcn_v4_0_3_is_idle(void *handle)
1464*f005ef32Sjsg {
1465*f005ef32Sjsg 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1466*f005ef32Sjsg 	int i, ret = 1;
1467*f005ef32Sjsg 
1468*f005ef32Sjsg 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
1469*f005ef32Sjsg 		ret &= (RREG32_SOC15(VCN, GET_INST(VCN, i), regUVD_STATUS) ==
1470*f005ef32Sjsg 			UVD_STATUS__IDLE);
1471*f005ef32Sjsg 	}
1472*f005ef32Sjsg 
1473*f005ef32Sjsg 	return ret;
1474*f005ef32Sjsg }
1475*f005ef32Sjsg 
1476*f005ef32Sjsg /**
1477*f005ef32Sjsg  * vcn_v4_0_3_wait_for_idle - wait for VCN block idle
1478*f005ef32Sjsg  *
1479*f005ef32Sjsg  * @handle: amdgpu_device pointer
1480*f005ef32Sjsg  *
1481*f005ef32Sjsg  * Wait for VCN block idle
1482*f005ef32Sjsg  */
vcn_v4_0_3_wait_for_idle(void * handle)1483*f005ef32Sjsg static int vcn_v4_0_3_wait_for_idle(void *handle)
1484*f005ef32Sjsg {
1485*f005ef32Sjsg 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1486*f005ef32Sjsg 	int i, ret = 0;
1487*f005ef32Sjsg 
1488*f005ef32Sjsg 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
1489*f005ef32Sjsg 		ret = SOC15_WAIT_ON_RREG(VCN, GET_INST(VCN, i), regUVD_STATUS,
1490*f005ef32Sjsg 					 UVD_STATUS__IDLE, UVD_STATUS__IDLE);
1491*f005ef32Sjsg 		if (ret)
1492*f005ef32Sjsg 			return ret;
1493*f005ef32Sjsg 	}
1494*f005ef32Sjsg 
1495*f005ef32Sjsg 	return ret;
1496*f005ef32Sjsg }
1497*f005ef32Sjsg 
1498*f005ef32Sjsg /* vcn_v4_0_3_set_clockgating_state - set VCN block clockgating state
1499*f005ef32Sjsg  *
1500*f005ef32Sjsg  * @handle: amdgpu_device pointer
1501*f005ef32Sjsg  * @state: clock gating state
1502*f005ef32Sjsg  *
1503*f005ef32Sjsg  * Set VCN block clockgating state
1504*f005ef32Sjsg  */
vcn_v4_0_3_set_clockgating_state(void * handle,enum amd_clockgating_state state)1505*f005ef32Sjsg static int vcn_v4_0_3_set_clockgating_state(void *handle,
1506*f005ef32Sjsg 					  enum amd_clockgating_state state)
1507*f005ef32Sjsg {
1508*f005ef32Sjsg 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1509*f005ef32Sjsg 	bool enable = state == AMD_CG_STATE_GATE;
1510*f005ef32Sjsg 	int i;
1511*f005ef32Sjsg 
1512*f005ef32Sjsg 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
1513*f005ef32Sjsg 		if (enable) {
1514*f005ef32Sjsg 			if (RREG32_SOC15(VCN, GET_INST(VCN, i),
1515*f005ef32Sjsg 					 regUVD_STATUS) != UVD_STATUS__IDLE)
1516*f005ef32Sjsg 				return -EBUSY;
1517*f005ef32Sjsg 			vcn_v4_0_3_enable_clock_gating(adev, i);
1518*f005ef32Sjsg 		} else {
1519*f005ef32Sjsg 			vcn_v4_0_3_disable_clock_gating(adev, i);
1520*f005ef32Sjsg 		}
1521*f005ef32Sjsg 	}
1522*f005ef32Sjsg 	return 0;
1523*f005ef32Sjsg }
1524*f005ef32Sjsg 
1525*f005ef32Sjsg /**
1526*f005ef32Sjsg  * vcn_v4_0_3_set_powergating_state - set VCN block powergating state
1527*f005ef32Sjsg  *
1528*f005ef32Sjsg  * @handle: amdgpu_device pointer
1529*f005ef32Sjsg  * @state: power gating state
1530*f005ef32Sjsg  *
1531*f005ef32Sjsg  * Set VCN block powergating state
1532*f005ef32Sjsg  */
vcn_v4_0_3_set_powergating_state(void * handle,enum amd_powergating_state state)1533*f005ef32Sjsg static int vcn_v4_0_3_set_powergating_state(void *handle,
1534*f005ef32Sjsg 					  enum amd_powergating_state state)
1535*f005ef32Sjsg {
1536*f005ef32Sjsg 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1537*f005ef32Sjsg 	int ret;
1538*f005ef32Sjsg 
1539*f005ef32Sjsg 	/* for SRIOV, guest should not control VCN Power-gating
1540*f005ef32Sjsg 	 * MMSCH FW should control Power-gating and clock-gating
1541*f005ef32Sjsg 	 * guest should avoid touching CGC and PG
1542*f005ef32Sjsg 	 */
1543*f005ef32Sjsg 	if (amdgpu_sriov_vf(adev)) {
1544*f005ef32Sjsg 		adev->vcn.cur_state = AMD_PG_STATE_UNGATE;
1545*f005ef32Sjsg 		return 0;
1546*f005ef32Sjsg 	}
1547*f005ef32Sjsg 
1548*f005ef32Sjsg 	if (state == adev->vcn.cur_state)
1549*f005ef32Sjsg 		return 0;
1550*f005ef32Sjsg 
1551*f005ef32Sjsg 	if (state == AMD_PG_STATE_GATE)
1552*f005ef32Sjsg 		ret = vcn_v4_0_3_stop(adev);
1553*f005ef32Sjsg 	else
1554*f005ef32Sjsg 		ret = vcn_v4_0_3_start(adev);
1555*f005ef32Sjsg 
1556*f005ef32Sjsg 	if (!ret)
1557*f005ef32Sjsg 		adev->vcn.cur_state = state;
1558*f005ef32Sjsg 
1559*f005ef32Sjsg 	return ret;
1560*f005ef32Sjsg }
1561*f005ef32Sjsg 
1562*f005ef32Sjsg /**
1563*f005ef32Sjsg  * vcn_v4_0_3_set_interrupt_state - set VCN block interrupt state
1564*f005ef32Sjsg  *
1565*f005ef32Sjsg  * @adev: amdgpu_device pointer
1566*f005ef32Sjsg  * @source: interrupt sources
1567*f005ef32Sjsg  * @type: interrupt types
1568*f005ef32Sjsg  * @state: interrupt states
1569*f005ef32Sjsg  *
1570*f005ef32Sjsg  * Set VCN block interrupt state
1571*f005ef32Sjsg  */
vcn_v4_0_3_set_interrupt_state(struct amdgpu_device * adev,struct amdgpu_irq_src * source,unsigned int type,enum amdgpu_interrupt_state state)1572*f005ef32Sjsg static int vcn_v4_0_3_set_interrupt_state(struct amdgpu_device *adev,
1573*f005ef32Sjsg 					struct amdgpu_irq_src *source,
1574*f005ef32Sjsg 					unsigned int type,
1575*f005ef32Sjsg 					enum amdgpu_interrupt_state state)
1576*f005ef32Sjsg {
1577*f005ef32Sjsg 	return 0;
1578*f005ef32Sjsg }
1579*f005ef32Sjsg 
1580*f005ef32Sjsg /**
1581*f005ef32Sjsg  * vcn_v4_0_3_process_interrupt - process VCN block interrupt
1582*f005ef32Sjsg  *
1583*f005ef32Sjsg  * @adev: amdgpu_device pointer
1584*f005ef32Sjsg  * @source: interrupt sources
1585*f005ef32Sjsg  * @entry: interrupt entry from clients and sources
1586*f005ef32Sjsg  *
1587*f005ef32Sjsg  * Process VCN block interrupt
1588*f005ef32Sjsg  */
vcn_v4_0_3_process_interrupt(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)1589*f005ef32Sjsg static int vcn_v4_0_3_process_interrupt(struct amdgpu_device *adev,
1590*f005ef32Sjsg 				      struct amdgpu_irq_src *source,
1591*f005ef32Sjsg 				      struct amdgpu_iv_entry *entry)
1592*f005ef32Sjsg {
1593*f005ef32Sjsg 	uint32_t i, inst;
1594*f005ef32Sjsg 
1595*f005ef32Sjsg 	i = node_id_to_phys_map[entry->node_id];
1596*f005ef32Sjsg 
1597*f005ef32Sjsg 	DRM_DEV_DEBUG(adev->dev, "IH: VCN TRAP\n");
1598*f005ef32Sjsg 
1599*f005ef32Sjsg 	for (inst = 0; inst < adev->vcn.num_vcn_inst; ++inst)
1600*f005ef32Sjsg 		if (adev->vcn.inst[inst].aid_id == i)
1601*f005ef32Sjsg 			break;
1602*f005ef32Sjsg 
1603*f005ef32Sjsg 	if (inst >= adev->vcn.num_vcn_inst) {
1604*f005ef32Sjsg 		dev_WARN_ONCE(adev->dev, 1,
1605*f005ef32Sjsg 			      "Interrupt received for unknown VCN instance %d",
1606*f005ef32Sjsg 			      entry->node_id);
1607*f005ef32Sjsg 		return 0;
1608*f005ef32Sjsg 	}
1609*f005ef32Sjsg 
1610*f005ef32Sjsg 	switch (entry->src_id) {
1611*f005ef32Sjsg 	case VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE:
1612*f005ef32Sjsg 		amdgpu_fence_process(&adev->vcn.inst[inst].ring_enc[0]);
1613*f005ef32Sjsg 		break;
1614*f005ef32Sjsg 	default:
1615*f005ef32Sjsg 		DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n",
1616*f005ef32Sjsg 			  entry->src_id, entry->src_data[0]);
1617*f005ef32Sjsg 		break;
1618*f005ef32Sjsg 	}
1619*f005ef32Sjsg 
1620*f005ef32Sjsg 	return 0;
1621*f005ef32Sjsg }
1622*f005ef32Sjsg 
1623*f005ef32Sjsg static const struct amdgpu_irq_src_funcs vcn_v4_0_3_irq_funcs = {
1624*f005ef32Sjsg 	.set = vcn_v4_0_3_set_interrupt_state,
1625*f005ef32Sjsg 	.process = vcn_v4_0_3_process_interrupt,
1626*f005ef32Sjsg };
1627*f005ef32Sjsg 
1628*f005ef32Sjsg /**
1629*f005ef32Sjsg  * vcn_v4_0_3_set_irq_funcs - set VCN block interrupt irq functions
1630*f005ef32Sjsg  *
1631*f005ef32Sjsg  * @adev: amdgpu_device pointer
1632*f005ef32Sjsg  *
1633*f005ef32Sjsg  * Set VCN block interrupt irq functions
1634*f005ef32Sjsg  */
vcn_v4_0_3_set_irq_funcs(struct amdgpu_device * adev)1635*f005ef32Sjsg static void vcn_v4_0_3_set_irq_funcs(struct amdgpu_device *adev)
1636*f005ef32Sjsg {
1637*f005ef32Sjsg 	int i;
1638*f005ef32Sjsg 
1639*f005ef32Sjsg 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
1640*f005ef32Sjsg 		adev->vcn.inst->irq.num_types++;
1641*f005ef32Sjsg 	}
1642*f005ef32Sjsg 	adev->vcn.inst->irq.funcs = &vcn_v4_0_3_irq_funcs;
1643*f005ef32Sjsg }
1644*f005ef32Sjsg 
1645*f005ef32Sjsg static const struct amd_ip_funcs vcn_v4_0_3_ip_funcs = {
1646*f005ef32Sjsg 	.name = "vcn_v4_0_3",
1647*f005ef32Sjsg 	.early_init = vcn_v4_0_3_early_init,
1648*f005ef32Sjsg 	.late_init = NULL,
1649*f005ef32Sjsg 	.sw_init = vcn_v4_0_3_sw_init,
1650*f005ef32Sjsg 	.sw_fini = vcn_v4_0_3_sw_fini,
1651*f005ef32Sjsg 	.hw_init = vcn_v4_0_3_hw_init,
1652*f005ef32Sjsg 	.hw_fini = vcn_v4_0_3_hw_fini,
1653*f005ef32Sjsg 	.suspend = vcn_v4_0_3_suspend,
1654*f005ef32Sjsg 	.resume = vcn_v4_0_3_resume,
1655*f005ef32Sjsg 	.is_idle = vcn_v4_0_3_is_idle,
1656*f005ef32Sjsg 	.wait_for_idle = vcn_v4_0_3_wait_for_idle,
1657*f005ef32Sjsg 	.check_soft_reset = NULL,
1658*f005ef32Sjsg 	.pre_soft_reset = NULL,
1659*f005ef32Sjsg 	.soft_reset = NULL,
1660*f005ef32Sjsg 	.post_soft_reset = NULL,
1661*f005ef32Sjsg 	.set_clockgating_state = vcn_v4_0_3_set_clockgating_state,
1662*f005ef32Sjsg 	.set_powergating_state = vcn_v4_0_3_set_powergating_state,
1663*f005ef32Sjsg };
1664*f005ef32Sjsg 
1665*f005ef32Sjsg const struct amdgpu_ip_block_version vcn_v4_0_3_ip_block = {
1666*f005ef32Sjsg 	.type = AMD_IP_BLOCK_TYPE_VCN,
1667*f005ef32Sjsg 	.major = 4,
1668*f005ef32Sjsg 	.minor = 0,
1669*f005ef32Sjsg 	.rev = 3,
1670*f005ef32Sjsg 	.funcs = &vcn_v4_0_3_ip_funcs,
1671*f005ef32Sjsg };
1672*f005ef32Sjsg 
1673*f005ef32Sjsg static const struct amdgpu_ras_err_status_reg_entry vcn_v4_0_3_ue_reg_list[] = {
1674*f005ef32Sjsg 	{AMDGPU_RAS_REG_ENTRY(VCN, 0, regVCN_UE_ERR_STATUS_LO_VIDD, regVCN_UE_ERR_STATUS_HI_VIDD),
1675*f005ef32Sjsg 	1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "VIDD"},
1676*f005ef32Sjsg 	{AMDGPU_RAS_REG_ENTRY(VCN, 0, regVCN_UE_ERR_STATUS_LO_VIDV, regVCN_UE_ERR_STATUS_HI_VIDV),
1677*f005ef32Sjsg 	1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "VIDV"},
1678*f005ef32Sjsg };
1679*f005ef32Sjsg 
vcn_v4_0_3_inst_query_ras_error_count(struct amdgpu_device * adev,uint32_t vcn_inst,void * ras_err_status)1680*f005ef32Sjsg static void vcn_v4_0_3_inst_query_ras_error_count(struct amdgpu_device *adev,
1681*f005ef32Sjsg 						  uint32_t vcn_inst,
1682*f005ef32Sjsg 						  void *ras_err_status)
1683*f005ef32Sjsg {
1684*f005ef32Sjsg 	struct ras_err_data *err_data = (struct ras_err_data *)ras_err_status;
1685*f005ef32Sjsg 
1686*f005ef32Sjsg 	/* vcn v4_0_3 only support query uncorrectable errors */
1687*f005ef32Sjsg 	amdgpu_ras_inst_query_ras_error_count(adev,
1688*f005ef32Sjsg 			vcn_v4_0_3_ue_reg_list,
1689*f005ef32Sjsg 			ARRAY_SIZE(vcn_v4_0_3_ue_reg_list),
1690*f005ef32Sjsg 			NULL, 0, GET_INST(VCN, vcn_inst),
1691*f005ef32Sjsg 			AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
1692*f005ef32Sjsg 			&err_data->ue_count);
1693*f005ef32Sjsg }
1694*f005ef32Sjsg 
vcn_v4_0_3_query_ras_error_count(struct amdgpu_device * adev,void * ras_err_status)1695*f005ef32Sjsg static void vcn_v4_0_3_query_ras_error_count(struct amdgpu_device *adev,
1696*f005ef32Sjsg 					     void *ras_err_status)
1697*f005ef32Sjsg {
1698*f005ef32Sjsg 	uint32_t i;
1699*f005ef32Sjsg 
1700*f005ef32Sjsg 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN)) {
1701*f005ef32Sjsg 		dev_warn(adev->dev, "VCN RAS is not supported\n");
1702*f005ef32Sjsg 		return;
1703*f005ef32Sjsg 	}
1704*f005ef32Sjsg 
1705*f005ef32Sjsg 	for (i = 0; i < adev->vcn.num_vcn_inst; i++)
1706*f005ef32Sjsg 		vcn_v4_0_3_inst_query_ras_error_count(adev, i, ras_err_status);
1707*f005ef32Sjsg }
1708*f005ef32Sjsg 
vcn_v4_0_3_inst_reset_ras_error_count(struct amdgpu_device * adev,uint32_t vcn_inst)1709*f005ef32Sjsg static void vcn_v4_0_3_inst_reset_ras_error_count(struct amdgpu_device *adev,
1710*f005ef32Sjsg 						  uint32_t vcn_inst)
1711*f005ef32Sjsg {
1712*f005ef32Sjsg 	amdgpu_ras_inst_reset_ras_error_count(adev,
1713*f005ef32Sjsg 					vcn_v4_0_3_ue_reg_list,
1714*f005ef32Sjsg 					ARRAY_SIZE(vcn_v4_0_3_ue_reg_list),
1715*f005ef32Sjsg 					GET_INST(VCN, vcn_inst));
1716*f005ef32Sjsg }
1717*f005ef32Sjsg 
vcn_v4_0_3_reset_ras_error_count(struct amdgpu_device * adev)1718*f005ef32Sjsg static void vcn_v4_0_3_reset_ras_error_count(struct amdgpu_device *adev)
1719*f005ef32Sjsg {
1720*f005ef32Sjsg 	uint32_t i;
1721*f005ef32Sjsg 
1722*f005ef32Sjsg 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN)) {
1723*f005ef32Sjsg 		dev_warn(adev->dev, "VCN RAS is not supported\n");
1724*f005ef32Sjsg 		return;
1725*f005ef32Sjsg 	}
1726*f005ef32Sjsg 
1727*f005ef32Sjsg 	for (i = 0; i < adev->vcn.num_vcn_inst; i++)
1728*f005ef32Sjsg 		vcn_v4_0_3_inst_reset_ras_error_count(adev, i);
1729*f005ef32Sjsg }
1730*f005ef32Sjsg 
1731*f005ef32Sjsg static const struct amdgpu_ras_block_hw_ops vcn_v4_0_3_ras_hw_ops = {
1732*f005ef32Sjsg 	.query_ras_error_count = vcn_v4_0_3_query_ras_error_count,
1733*f005ef32Sjsg 	.reset_ras_error_count = vcn_v4_0_3_reset_ras_error_count,
1734*f005ef32Sjsg };
1735*f005ef32Sjsg 
1736*f005ef32Sjsg static struct amdgpu_vcn_ras vcn_v4_0_3_ras = {
1737*f005ef32Sjsg 	.ras_block = {
1738*f005ef32Sjsg 		.hw_ops = &vcn_v4_0_3_ras_hw_ops,
1739*f005ef32Sjsg 	},
1740*f005ef32Sjsg };
1741*f005ef32Sjsg 
vcn_v4_0_3_set_ras_funcs(struct amdgpu_device * adev)1742*f005ef32Sjsg static void vcn_v4_0_3_set_ras_funcs(struct amdgpu_device *adev)
1743*f005ef32Sjsg {
1744*f005ef32Sjsg 	adev->vcn.ras = &vcn_v4_0_3_ras;
1745*f005ef32Sjsg }
1746*f005ef32Sjsg 
vcn_v4_0_3_enable_ras(struct amdgpu_device * adev,int inst_idx,bool indirect)1747*f005ef32Sjsg static void vcn_v4_0_3_enable_ras(struct amdgpu_device *adev,
1748*f005ef32Sjsg 				  int inst_idx, bool indirect)
1749*f005ef32Sjsg {
1750*f005ef32Sjsg 	uint32_t tmp;
1751*f005ef32Sjsg 
1752*f005ef32Sjsg 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN))
1753*f005ef32Sjsg 		return;
1754*f005ef32Sjsg 
1755*f005ef32Sjsg 	tmp = VCN_RAS_CNTL__VCPU_VCODEC_REARM_MASK |
1756*f005ef32Sjsg 	      VCN_RAS_CNTL__VCPU_VCODEC_IH_EN_MASK |
1757*f005ef32Sjsg 	      VCN_RAS_CNTL__VCPU_VCODEC_PMI_EN_MASK |
1758*f005ef32Sjsg 	      VCN_RAS_CNTL__VCPU_VCODEC_STALL_EN_MASK;
1759*f005ef32Sjsg 	WREG32_SOC15_DPG_MODE(inst_idx,
1760*f005ef32Sjsg 			      SOC15_DPG_MODE_OFFSET(VCN, 0, regVCN_RAS_CNTL),
1761*f005ef32Sjsg 			      tmp, 0, indirect);
1762*f005ef32Sjsg 
1763*f005ef32Sjsg 	tmp = UVD_SYS_INT_EN__RASCNTL_VCPU_VCODEC_EN_MASK;
1764*f005ef32Sjsg 	WREG32_SOC15_DPG_MODE(inst_idx,
1765*f005ef32Sjsg 			      SOC15_DPG_MODE_OFFSET(VCN, 0, regUVD_SYS_INT_EN),
1766*f005ef32Sjsg 			      tmp, 0, indirect);
1767*f005ef32Sjsg }
1768