1*65332265Sriastradh /*	$NetBSD: amdgpu_vce_v4_0.c,v 1.3 2021/12/19 12:21:29 riastradh Exp $	*/
21571a7a1Sriastradh 
31571a7a1Sriastradh /*
41571a7a1Sriastradh  * Copyright 2016 Advanced Micro Devices, Inc.
51571a7a1Sriastradh  * All Rights Reserved.
61571a7a1Sriastradh  *
71571a7a1Sriastradh  * Permission is hereby granted, free of charge, to any person obtaining a
81571a7a1Sriastradh  * copy of this software and associated documentation files (the
91571a7a1Sriastradh  * "Software"), to deal in the Software without restriction, including
101571a7a1Sriastradh  * without limitation the rights to use, copy, modify, merge, publish,
111571a7a1Sriastradh  * distribute, sub license, and/or sell copies of the Software, and to
121571a7a1Sriastradh  * permit persons to whom the Software is furnished to do so, subject to
131571a7a1Sriastradh  * the following conditions:
141571a7a1Sriastradh  *
151571a7a1Sriastradh  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
161571a7a1Sriastradh  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
171571a7a1Sriastradh  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
181571a7a1Sriastradh  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
191571a7a1Sriastradh  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
201571a7a1Sriastradh  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
211571a7a1Sriastradh  * USE OR OTHER DEALINGS IN THE SOFTWARE.
221571a7a1Sriastradh  *
231571a7a1Sriastradh  * The above copyright notice and this permission notice (including the
241571a7a1Sriastradh  * next paragraph) shall be included in all copies or substantial portions
251571a7a1Sriastradh  * of the Software.
261571a7a1Sriastradh  *
271571a7a1Sriastradh  */
281571a7a1Sriastradh 
291571a7a1Sriastradh #include <sys/cdefs.h>
30*65332265Sriastradh __KERNEL_RCSID(0, "$NetBSD: amdgpu_vce_v4_0.c,v 1.3 2021/12/19 12:21:29 riastradh Exp $");
311571a7a1Sriastradh 
321571a7a1Sriastradh #include <linux/firmware.h>
331571a7a1Sriastradh 
341571a7a1Sriastradh #include "amdgpu.h"
351571a7a1Sriastradh #include "amdgpu_vce.h"
361571a7a1Sriastradh #include "soc15.h"
371571a7a1Sriastradh #include "soc15d.h"
381571a7a1Sriastradh #include "soc15_common.h"
391571a7a1Sriastradh #include "mmsch_v1_0.h"
401571a7a1Sriastradh 
411571a7a1Sriastradh #include "vce/vce_4_0_offset.h"
421571a7a1Sriastradh #include "vce/vce_4_0_default.h"
431571a7a1Sriastradh #include "vce/vce_4_0_sh_mask.h"
441571a7a1Sriastradh #include "mmhub/mmhub_1_0_offset.h"
451571a7a1Sriastradh #include "mmhub/mmhub_1_0_sh_mask.h"
461571a7a1Sriastradh 
471571a7a1Sriastradh #include "ivsrcid/vce/irqsrcs_vce_4_0.h"
481571a7a1Sriastradh 
49*65332265Sriastradh #include <linux/nbsd-namespace.h>
50*65332265Sriastradh 
511571a7a1Sriastradh #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK	0x02
521571a7a1Sriastradh 
531571a7a1Sriastradh #define VCE_V4_0_FW_SIZE	(384 * 1024)
541571a7a1Sriastradh #define VCE_V4_0_STACK_SIZE	(64 * 1024)
551571a7a1Sriastradh #define VCE_V4_0_DATA_SIZE	((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
561571a7a1Sriastradh 
571571a7a1Sriastradh static void vce_v4_0_mc_resume(struct amdgpu_device *adev);
581571a7a1Sriastradh static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev);
591571a7a1Sriastradh static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev);
601571a7a1Sriastradh 
611571a7a1Sriastradh /**
621571a7a1Sriastradh  * vce_v4_0_ring_get_rptr - get read pointer
631571a7a1Sriastradh  *
641571a7a1Sriastradh  * @ring: amdgpu_ring pointer
651571a7a1Sriastradh  *
661571a7a1Sriastradh  * Returns the current hardware read pointer
671571a7a1Sriastradh  */
vce_v4_0_ring_get_rptr(struct amdgpu_ring * ring)681571a7a1Sriastradh static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
691571a7a1Sriastradh {
701571a7a1Sriastradh 	struct amdgpu_device *adev = ring->adev;
711571a7a1Sriastradh 
721571a7a1Sriastradh 	if (ring->me == 0)
731571a7a1Sriastradh 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR));
741571a7a1Sriastradh 	else if (ring->me == 1)
751571a7a1Sriastradh 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2));
761571a7a1Sriastradh 	else
771571a7a1Sriastradh 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3));
781571a7a1Sriastradh }
791571a7a1Sriastradh 
801571a7a1Sriastradh /**
811571a7a1Sriastradh  * vce_v4_0_ring_get_wptr - get write pointer
821571a7a1Sriastradh  *
831571a7a1Sriastradh  * @ring: amdgpu_ring pointer
841571a7a1Sriastradh  *
851571a7a1Sriastradh  * Returns the current hardware write pointer
861571a7a1Sriastradh  */
vce_v4_0_ring_get_wptr(struct amdgpu_ring * ring)871571a7a1Sriastradh static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
881571a7a1Sriastradh {
891571a7a1Sriastradh 	struct amdgpu_device *adev = ring->adev;
901571a7a1Sriastradh 
911571a7a1Sriastradh 	if (ring->use_doorbell)
921571a7a1Sriastradh 		return adev->wb.wb[ring->wptr_offs];
931571a7a1Sriastradh 
941571a7a1Sriastradh 	if (ring->me == 0)
951571a7a1Sriastradh 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR));
961571a7a1Sriastradh 	else if (ring->me == 1)
971571a7a1Sriastradh 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2));
981571a7a1Sriastradh 	else
991571a7a1Sriastradh 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3));
1001571a7a1Sriastradh }
1011571a7a1Sriastradh 
1021571a7a1Sriastradh /**
1031571a7a1Sriastradh  * vce_v4_0_ring_set_wptr - set write pointer
1041571a7a1Sriastradh  *
1051571a7a1Sriastradh  * @ring: amdgpu_ring pointer
1061571a7a1Sriastradh  *
1071571a7a1Sriastradh  * Commits the write pointer to the hardware
1081571a7a1Sriastradh  */
vce_v4_0_ring_set_wptr(struct amdgpu_ring * ring)1091571a7a1Sriastradh static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
1101571a7a1Sriastradh {
1111571a7a1Sriastradh 	struct amdgpu_device *adev = ring->adev;
1121571a7a1Sriastradh 
1131571a7a1Sriastradh 	if (ring->use_doorbell) {
1141571a7a1Sriastradh 		/* XXX check if swapping is necessary on BE */
1151571a7a1Sriastradh 		adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
1161571a7a1Sriastradh 		WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
1171571a7a1Sriastradh 		return;
1181571a7a1Sriastradh 	}
1191571a7a1Sriastradh 
1201571a7a1Sriastradh 	if (ring->me == 0)
1211571a7a1Sriastradh 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR),
1221571a7a1Sriastradh 			lower_32_bits(ring->wptr));
1231571a7a1Sriastradh 	else if (ring->me == 1)
1241571a7a1Sriastradh 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2),
1251571a7a1Sriastradh 			lower_32_bits(ring->wptr));
1261571a7a1Sriastradh 	else
1271571a7a1Sriastradh 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3),
1281571a7a1Sriastradh 			lower_32_bits(ring->wptr));
1291571a7a1Sriastradh }
1301571a7a1Sriastradh 
vce_v4_0_firmware_loaded(struct amdgpu_device * adev)1311571a7a1Sriastradh static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev)
1321571a7a1Sriastradh {
1331571a7a1Sriastradh 	int i, j;
1341571a7a1Sriastradh 
1351571a7a1Sriastradh 	for (i = 0; i < 10; ++i) {
1361571a7a1Sriastradh 		for (j = 0; j < 100; ++j) {
1371571a7a1Sriastradh 			uint32_t status =
1381571a7a1Sriastradh 				RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS));
1391571a7a1Sriastradh 
1401571a7a1Sriastradh 			if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
1411571a7a1Sriastradh 				return 0;
1421571a7a1Sriastradh 			mdelay(10);
1431571a7a1Sriastradh 		}
1441571a7a1Sriastradh 
1451571a7a1Sriastradh 		DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
1461571a7a1Sriastradh 		WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
1471571a7a1Sriastradh 				VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
1481571a7a1Sriastradh 				~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
1491571a7a1Sriastradh 		mdelay(10);
1501571a7a1Sriastradh 		WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
1511571a7a1Sriastradh 				~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
1521571a7a1Sriastradh 		mdelay(10);
1531571a7a1Sriastradh 
1541571a7a1Sriastradh 	}
1551571a7a1Sriastradh 
1561571a7a1Sriastradh 	return -ETIMEDOUT;
1571571a7a1Sriastradh }
1581571a7a1Sriastradh 
vce_v4_0_mmsch_start(struct amdgpu_device * adev,struct amdgpu_mm_table * table)1591571a7a1Sriastradh static int vce_v4_0_mmsch_start(struct amdgpu_device *adev,
1601571a7a1Sriastradh 				struct amdgpu_mm_table *table)
1611571a7a1Sriastradh {
1621571a7a1Sriastradh 	uint32_t data = 0, loop;
1631571a7a1Sriastradh 	uint64_t addr = table->gpu_addr;
1641571a7a1Sriastradh 	struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr;
1651571a7a1Sriastradh 	uint32_t size;
1661571a7a1Sriastradh 
1671571a7a1Sriastradh 	size = header->header_size + header->vce_table_size + header->uvd_table_size;
1681571a7a1Sriastradh 
1691571a7a1Sriastradh 	/* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */
1701571a7a1Sriastradh 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr));
1711571a7a1Sriastradh 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr));
1721571a7a1Sriastradh 
1731571a7a1Sriastradh 	/* 2, update vmid of descriptor */
1741571a7a1Sriastradh 	data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID));
1751571a7a1Sriastradh 	data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK;
1761571a7a1Sriastradh 	data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */
1771571a7a1Sriastradh 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data);
1781571a7a1Sriastradh 
1791571a7a1Sriastradh 	/* 3, notify mmsch about the size of this descriptor */
1801571a7a1Sriastradh 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size);
1811571a7a1Sriastradh 
1821571a7a1Sriastradh 	/* 4, set resp to zero */
1831571a7a1Sriastradh 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0);
1841571a7a1Sriastradh 
1851571a7a1Sriastradh 	WDOORBELL32(adev->vce.ring[0].doorbell_index, 0);
1861571a7a1Sriastradh 	adev->wb.wb[adev->vce.ring[0].wptr_offs] = 0;
1871571a7a1Sriastradh 	adev->vce.ring[0].wptr = 0;
1881571a7a1Sriastradh 	adev->vce.ring[0].wptr_old = 0;
1891571a7a1Sriastradh 
1901571a7a1Sriastradh 	/* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */
1911571a7a1Sriastradh 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001);
1921571a7a1Sriastradh 
1931571a7a1Sriastradh 	data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
1941571a7a1Sriastradh 	loop = 1000;
1951571a7a1Sriastradh 	while ((data & 0x10000002) != 0x10000002) {
1961571a7a1Sriastradh 		udelay(10);
1971571a7a1Sriastradh 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
1981571a7a1Sriastradh 		loop--;
1991571a7a1Sriastradh 		if (!loop)
2001571a7a1Sriastradh 			break;
2011571a7a1Sriastradh 	}
2021571a7a1Sriastradh 
2031571a7a1Sriastradh 	if (!loop) {
2041571a7a1Sriastradh 		dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data);
2051571a7a1Sriastradh 		return -EBUSY;
2061571a7a1Sriastradh 	}
2071571a7a1Sriastradh 
2081571a7a1Sriastradh 	return 0;
2091571a7a1Sriastradh }
2101571a7a1Sriastradh 
vce_v4_0_sriov_start(struct amdgpu_device * adev)2111571a7a1Sriastradh static int vce_v4_0_sriov_start(struct amdgpu_device *adev)
2121571a7a1Sriastradh {
2131571a7a1Sriastradh 	struct amdgpu_ring *ring;
2141571a7a1Sriastradh 	uint32_t offset, size;
2151571a7a1Sriastradh 	uint32_t table_size = 0;
2161571a7a1Sriastradh 	struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } };
2171571a7a1Sriastradh 	struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } };
2181571a7a1Sriastradh 	struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } };
2191571a7a1Sriastradh 	struct mmsch_v1_0_cmd_end end = { { 0 } };
2201571a7a1Sriastradh 	uint32_t *init_table = adev->virt.mm_table.cpu_addr;
2211571a7a1Sriastradh 	struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table;
2221571a7a1Sriastradh 
2231571a7a1Sriastradh 	direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
2241571a7a1Sriastradh 	direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
2251571a7a1Sriastradh 	direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING;
2261571a7a1Sriastradh 	end.cmd_header.command_type = MMSCH_COMMAND__END;
2271571a7a1Sriastradh 
2281571a7a1Sriastradh 	if (header->vce_table_offset == 0 && header->vce_table_size == 0) {
2291571a7a1Sriastradh 		header->version = MMSCH_VERSION;
2301571a7a1Sriastradh 		header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2;
2311571a7a1Sriastradh 
2321571a7a1Sriastradh 		if (header->uvd_table_offset == 0 && header->uvd_table_size == 0)
2331571a7a1Sriastradh 			header->vce_table_offset = header->header_size;
2341571a7a1Sriastradh 		else
2351571a7a1Sriastradh 			header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset;
2361571a7a1Sriastradh 
2371571a7a1Sriastradh 		init_table += header->vce_table_offset;
2381571a7a1Sriastradh 
2391571a7a1Sriastradh 		ring = &adev->vce.ring[0];
2401571a7a1Sriastradh 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO),
2411571a7a1Sriastradh 					    lower_32_bits(ring->gpu_addr));
2421571a7a1Sriastradh 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI),
2431571a7a1Sriastradh 					    upper_32_bits(ring->gpu_addr));
2441571a7a1Sriastradh 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE),
2451571a7a1Sriastradh 					    ring->ring_size / 4);
2461571a7a1Sriastradh 
2471571a7a1Sriastradh 		/* BEGING OF MC_RESUME */
2481571a7a1Sriastradh 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000);
2491571a7a1Sriastradh 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0);
2501571a7a1Sriastradh 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
2511571a7a1Sriastradh 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
2521571a7a1Sriastradh 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
2531571a7a1Sriastradh 
2541571a7a1Sriastradh 		offset = AMDGPU_VCE_FIRMWARE_OFFSET;
2551571a7a1Sriastradh 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
2561571a7a1Sriastradh 			uint32_t low = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo;
2571571a7a1Sriastradh 			uint32_t hi = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi;
2581571a7a1Sriastradh 			uint64_t tmr_mc_addr = (uint64_t)(hi) << 32 | low;
2591571a7a1Sriastradh 
2601571a7a1Sriastradh 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
2611571a7a1Sriastradh 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), tmr_mc_addr >> 8);
2621571a7a1Sriastradh 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
2631571a7a1Sriastradh 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
2641571a7a1Sriastradh 						(tmr_mc_addr >> 40) & 0xff);
2651571a7a1Sriastradh 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0);
2661571a7a1Sriastradh 		} else {
2671571a7a1Sriastradh 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
2681571a7a1Sriastradh 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
2691571a7a1Sriastradh 						adev->vce.gpu_addr >> 8);
2701571a7a1Sriastradh 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
2711571a7a1Sriastradh 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
2721571a7a1Sriastradh 						(adev->vce.gpu_addr >> 40) & 0xff);
2731571a7a1Sriastradh 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0),
2741571a7a1Sriastradh 						offset & ~0x0f000000);
2751571a7a1Sriastradh 
2761571a7a1Sriastradh 		}
2771571a7a1Sriastradh 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
2781571a7a1Sriastradh 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
2791571a7a1Sriastradh 						adev->vce.gpu_addr >> 8);
2801571a7a1Sriastradh 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
2811571a7a1Sriastradh 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR1),
2821571a7a1Sriastradh 						(adev->vce.gpu_addr >> 40) & 0xff);
2831571a7a1Sriastradh 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
2841571a7a1Sriastradh 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
2851571a7a1Sriastradh 						adev->vce.gpu_addr >> 8);
2861571a7a1Sriastradh 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
2871571a7a1Sriastradh 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR2),
2881571a7a1Sriastradh 						(adev->vce.gpu_addr >> 40) & 0xff);
2891571a7a1Sriastradh 
2901571a7a1Sriastradh 		size = VCE_V4_0_FW_SIZE;
2911571a7a1Sriastradh 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
2921571a7a1Sriastradh 
2931571a7a1Sriastradh 		offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
2941571a7a1Sriastradh 		size = VCE_V4_0_STACK_SIZE;
2951571a7a1Sriastradh 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1),
2961571a7a1Sriastradh 					(offset & ~0x0f000000) | (1 << 24));
2971571a7a1Sriastradh 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
2981571a7a1Sriastradh 
2991571a7a1Sriastradh 		offset += size;
3001571a7a1Sriastradh 		size = VCE_V4_0_DATA_SIZE;
3011571a7a1Sriastradh 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2),
3021571a7a1Sriastradh 					(offset & ~0x0f000000) | (2 << 24));
3031571a7a1Sriastradh 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
3041571a7a1Sriastradh 
3051571a7a1Sriastradh 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0);
3061571a7a1Sriastradh 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
3071571a7a1Sriastradh 						   VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
3081571a7a1Sriastradh 						   VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
3091571a7a1Sriastradh 
3101571a7a1Sriastradh 		/* end of MC_RESUME */
3111571a7a1Sriastradh 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
3121571a7a1Sriastradh 						   VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK);
3131571a7a1Sriastradh 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL),
3141571a7a1Sriastradh 						   ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK);
3151571a7a1Sriastradh 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
3161571a7a1Sriastradh 						   ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0);
3171571a7a1Sriastradh 
3181571a7a1Sriastradh 		MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
3191571a7a1Sriastradh 					      VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK,
3201571a7a1Sriastradh 					      VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK);
3211571a7a1Sriastradh 
3221571a7a1Sriastradh 		/* clear BUSY flag */
3231571a7a1Sriastradh 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
3241571a7a1Sriastradh 						   ~VCE_STATUS__JOB_BUSY_MASK, 0);
3251571a7a1Sriastradh 
3261571a7a1Sriastradh 		/* add end packet */
3271571a7a1Sriastradh 		memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
3281571a7a1Sriastradh 		table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
3291571a7a1Sriastradh 		header->vce_table_size = table_size;
3301571a7a1Sriastradh 	}
3311571a7a1Sriastradh 
3321571a7a1Sriastradh 	return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table);
3331571a7a1Sriastradh }
3341571a7a1Sriastradh 
3351571a7a1Sriastradh /**
3361571a7a1Sriastradh  * vce_v4_0_start - start VCE block
3371571a7a1Sriastradh  *
3381571a7a1Sriastradh  * @adev: amdgpu_device pointer
3391571a7a1Sriastradh  *
3401571a7a1Sriastradh  * Setup and start the VCE block
3411571a7a1Sriastradh  */
vce_v4_0_start(struct amdgpu_device * adev)3421571a7a1Sriastradh static int vce_v4_0_start(struct amdgpu_device *adev)
3431571a7a1Sriastradh {
3441571a7a1Sriastradh 	struct amdgpu_ring *ring;
3451571a7a1Sriastradh 	int r;
3461571a7a1Sriastradh 
3471571a7a1Sriastradh 	ring = &adev->vce.ring[0];
3481571a7a1Sriastradh 
3491571a7a1Sriastradh 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr));
3501571a7a1Sriastradh 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr));
3511571a7a1Sriastradh 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr);
3521571a7a1Sriastradh 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
3531571a7a1Sriastradh 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4);
3541571a7a1Sriastradh 
3551571a7a1Sriastradh 	ring = &adev->vce.ring[1];
3561571a7a1Sriastradh 
3571571a7a1Sriastradh 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr));
3581571a7a1Sriastradh 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr));
3591571a7a1Sriastradh 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr);
3601571a7a1Sriastradh 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr));
3611571a7a1Sriastradh 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4);
3621571a7a1Sriastradh 
3631571a7a1Sriastradh 	ring = &adev->vce.ring[2];
3641571a7a1Sriastradh 
3651571a7a1Sriastradh 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr));
3661571a7a1Sriastradh 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr));
3671571a7a1Sriastradh 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr);
3681571a7a1Sriastradh 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr));
3691571a7a1Sriastradh 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4);
3701571a7a1Sriastradh 
3711571a7a1Sriastradh 	vce_v4_0_mc_resume(adev);
3721571a7a1Sriastradh 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK,
3731571a7a1Sriastradh 			~VCE_STATUS__JOB_BUSY_MASK);
3741571a7a1Sriastradh 
3751571a7a1Sriastradh 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001);
3761571a7a1Sriastradh 
3771571a7a1Sriastradh 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
3781571a7a1Sriastradh 			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
3791571a7a1Sriastradh 	mdelay(100);
3801571a7a1Sriastradh 
3811571a7a1Sriastradh 	r = vce_v4_0_firmware_loaded(adev);
3821571a7a1Sriastradh 
3831571a7a1Sriastradh 	/* clear BUSY flag */
3841571a7a1Sriastradh 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
3851571a7a1Sriastradh 
3861571a7a1Sriastradh 	if (r) {
3871571a7a1Sriastradh 		DRM_ERROR("VCE not responding, giving up!!!\n");
3881571a7a1Sriastradh 		return r;
3891571a7a1Sriastradh 	}
3901571a7a1Sriastradh 
3911571a7a1Sriastradh 	return 0;
3921571a7a1Sriastradh }
3931571a7a1Sriastradh 
vce_v4_0_stop(struct amdgpu_device * adev)3941571a7a1Sriastradh static int vce_v4_0_stop(struct amdgpu_device *adev)
3951571a7a1Sriastradh {
3961571a7a1Sriastradh 
3971571a7a1Sriastradh 	/* Disable VCPU */
3981571a7a1Sriastradh 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001);
3991571a7a1Sriastradh 
4001571a7a1Sriastradh 	/* hold on ECPU */
4011571a7a1Sriastradh 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
4021571a7a1Sriastradh 			VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
4031571a7a1Sriastradh 			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
4041571a7a1Sriastradh 
4051571a7a1Sriastradh 	/* clear VCE_STATUS */
4061571a7a1Sriastradh 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0);
4071571a7a1Sriastradh 
4081571a7a1Sriastradh 	/* Set Clock-Gating off */
4091571a7a1Sriastradh 	/* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
4101571a7a1Sriastradh 		vce_v4_0_set_vce_sw_clock_gating(adev, false);
4111571a7a1Sriastradh 	*/
4121571a7a1Sriastradh 
4131571a7a1Sriastradh 	return 0;
4141571a7a1Sriastradh }
4151571a7a1Sriastradh 
vce_v4_0_early_init(void * handle)4161571a7a1Sriastradh static int vce_v4_0_early_init(void *handle)
4171571a7a1Sriastradh {
4181571a7a1Sriastradh 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4191571a7a1Sriastradh 
4201571a7a1Sriastradh 	if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */
4211571a7a1Sriastradh 		adev->vce.num_rings = 1;
4221571a7a1Sriastradh 	else
4231571a7a1Sriastradh 		adev->vce.num_rings = 3;
4241571a7a1Sriastradh 
4251571a7a1Sriastradh 	vce_v4_0_set_ring_funcs(adev);
4261571a7a1Sriastradh 	vce_v4_0_set_irq_funcs(adev);
4271571a7a1Sriastradh 
4281571a7a1Sriastradh 	return 0;
4291571a7a1Sriastradh }
4301571a7a1Sriastradh 
vce_v4_0_sw_init(void * handle)4311571a7a1Sriastradh static int vce_v4_0_sw_init(void *handle)
4321571a7a1Sriastradh {
4331571a7a1Sriastradh 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4341571a7a1Sriastradh 	struct amdgpu_ring *ring;
4351571a7a1Sriastradh 
4361571a7a1Sriastradh 	unsigned size;
4371571a7a1Sriastradh 	int r, i;
4381571a7a1Sriastradh 
4391571a7a1Sriastradh 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCE0, 167, &adev->vce.irq);
4401571a7a1Sriastradh 	if (r)
4411571a7a1Sriastradh 		return r;
4421571a7a1Sriastradh 
4431571a7a1Sriastradh 	size  = VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE;
4441571a7a1Sriastradh 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
4451571a7a1Sriastradh 		size += VCE_V4_0_FW_SIZE;
4461571a7a1Sriastradh 
4471571a7a1Sriastradh 	r = amdgpu_vce_sw_init(adev, size);
4481571a7a1Sriastradh 	if (r)
4491571a7a1Sriastradh 		return r;
4501571a7a1Sriastradh 
4511571a7a1Sriastradh 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
4521571a7a1Sriastradh 		const struct common_firmware_header *hdr;
4531571a7a1Sriastradh 		unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
4541571a7a1Sriastradh 
4551571a7a1Sriastradh 		adev->vce.saved_bo = kvmalloc(size, GFP_KERNEL);
4561571a7a1Sriastradh 		if (!adev->vce.saved_bo)
4571571a7a1Sriastradh 			return -ENOMEM;
4581571a7a1Sriastradh 
4591571a7a1Sriastradh 		hdr = (const struct common_firmware_header *)adev->vce.fw->data;
4601571a7a1Sriastradh 		adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE;
4611571a7a1Sriastradh 		adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw;
4621571a7a1Sriastradh 		adev->firmware.fw_size +=
4631571a7a1Sriastradh 			ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
4641571a7a1Sriastradh 		DRM_INFO("PSP loading VCE firmware\n");
4651571a7a1Sriastradh 	} else {
4661571a7a1Sriastradh 		r = amdgpu_vce_resume(adev);
4671571a7a1Sriastradh 		if (r)
4681571a7a1Sriastradh 			return r;
4691571a7a1Sriastradh 	}
4701571a7a1Sriastradh 
4711571a7a1Sriastradh 	for (i = 0; i < adev->vce.num_rings; i++) {
4721571a7a1Sriastradh 		ring = &adev->vce.ring[i];
473*65332265Sriastradh 		snprintf(ring->name, sizeof(ring->name), "vce%d", i);
4741571a7a1Sriastradh 		if (amdgpu_sriov_vf(adev)) {
4751571a7a1Sriastradh 			/* DOORBELL only works under SRIOV */
4761571a7a1Sriastradh 			ring->use_doorbell = true;
4771571a7a1Sriastradh 
4781571a7a1Sriastradh 			/* currently only use the first encoding ring for sriov,
4791571a7a1Sriastradh 			 * so set unused location for other unused rings.
4801571a7a1Sriastradh 			 */
4811571a7a1Sriastradh 			if (i == 0)
4821571a7a1Sriastradh 				ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring0_1 * 2;
4831571a7a1Sriastradh 			else
4841571a7a1Sriastradh 				ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring2_3 * 2 + 1;
4851571a7a1Sriastradh 		}
4861571a7a1Sriastradh 		r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0);
4871571a7a1Sriastradh 		if (r)
4881571a7a1Sriastradh 			return r;
4891571a7a1Sriastradh 	}
4901571a7a1Sriastradh 
4911571a7a1Sriastradh 
4921571a7a1Sriastradh 	r = amdgpu_vce_entity_init(adev);
4931571a7a1Sriastradh 	if (r)
4941571a7a1Sriastradh 		return r;
4951571a7a1Sriastradh 
4961571a7a1Sriastradh 	r = amdgpu_virt_alloc_mm_table(adev);
4971571a7a1Sriastradh 	if (r)
4981571a7a1Sriastradh 		return r;
4991571a7a1Sriastradh 
5001571a7a1Sriastradh 	return r;
5011571a7a1Sriastradh }
5021571a7a1Sriastradh 
vce_v4_0_sw_fini(void * handle)5031571a7a1Sriastradh static int vce_v4_0_sw_fini(void *handle)
5041571a7a1Sriastradh {
5051571a7a1Sriastradh 	int r;
5061571a7a1Sriastradh 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5071571a7a1Sriastradh 
5081571a7a1Sriastradh 	/* free MM table */
5091571a7a1Sriastradh 	amdgpu_virt_free_mm_table(adev);
5101571a7a1Sriastradh 
5111571a7a1Sriastradh 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
5121571a7a1Sriastradh 		kvfree(adev->vce.saved_bo);
5131571a7a1Sriastradh 		adev->vce.saved_bo = NULL;
5141571a7a1Sriastradh 	}
5151571a7a1Sriastradh 
5161571a7a1Sriastradh 	r = amdgpu_vce_suspend(adev);
5171571a7a1Sriastradh 	if (r)
5181571a7a1Sriastradh 		return r;
5191571a7a1Sriastradh 
5201571a7a1Sriastradh 	return amdgpu_vce_sw_fini(adev);
5211571a7a1Sriastradh }
5221571a7a1Sriastradh 
vce_v4_0_hw_init(void * handle)5231571a7a1Sriastradh static int vce_v4_0_hw_init(void *handle)
5241571a7a1Sriastradh {
5251571a7a1Sriastradh 	int r, i;
5261571a7a1Sriastradh 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5271571a7a1Sriastradh 
5281571a7a1Sriastradh 	if (amdgpu_sriov_vf(adev))
5291571a7a1Sriastradh 		r = vce_v4_0_sriov_start(adev);
5301571a7a1Sriastradh 	else
5311571a7a1Sriastradh 		r = vce_v4_0_start(adev);
5321571a7a1Sriastradh 	if (r)
5331571a7a1Sriastradh 		return r;
5341571a7a1Sriastradh 
5351571a7a1Sriastradh 	for (i = 0; i < adev->vce.num_rings; i++) {
5361571a7a1Sriastradh 		r = amdgpu_ring_test_helper(&adev->vce.ring[i]);
5371571a7a1Sriastradh 		if (r)
5381571a7a1Sriastradh 			return r;
5391571a7a1Sriastradh 	}
5401571a7a1Sriastradh 
5411571a7a1Sriastradh 	DRM_INFO("VCE initialized successfully.\n");
5421571a7a1Sriastradh 
5431571a7a1Sriastradh 	return 0;
5441571a7a1Sriastradh }
5451571a7a1Sriastradh 
vce_v4_0_hw_fini(void * handle)5461571a7a1Sriastradh static int vce_v4_0_hw_fini(void *handle)
5471571a7a1Sriastradh {
5481571a7a1Sriastradh 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5491571a7a1Sriastradh 	int i;
5501571a7a1Sriastradh 
5511571a7a1Sriastradh 	if (!amdgpu_sriov_vf(adev)) {
5521571a7a1Sriastradh 		/* vce_v4_0_wait_for_idle(handle); */
5531571a7a1Sriastradh 		vce_v4_0_stop(adev);
5541571a7a1Sriastradh 	} else {
5551571a7a1Sriastradh 		/* full access mode, so don't touch any VCE register */
5561571a7a1Sriastradh 		DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
5571571a7a1Sriastradh 	}
5581571a7a1Sriastradh 
5591571a7a1Sriastradh 	for (i = 0; i < adev->vce.num_rings; i++)
5601571a7a1Sriastradh 		adev->vce.ring[i].sched.ready = false;
5611571a7a1Sriastradh 
5621571a7a1Sriastradh 	return 0;
5631571a7a1Sriastradh }
5641571a7a1Sriastradh 
vce_v4_0_suspend(void * handle)5651571a7a1Sriastradh static int vce_v4_0_suspend(void *handle)
5661571a7a1Sriastradh {
5671571a7a1Sriastradh 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5681571a7a1Sriastradh 	int r;
5691571a7a1Sriastradh 
5701571a7a1Sriastradh 	if (adev->vce.vcpu_bo == NULL)
5711571a7a1Sriastradh 		return 0;
5721571a7a1Sriastradh 
5731571a7a1Sriastradh 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
5741571a7a1Sriastradh 		unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
5751571a7a1Sriastradh 		void *ptr = adev->vce.cpu_addr;
5761571a7a1Sriastradh 
5771571a7a1Sriastradh 		memcpy_fromio(adev->vce.saved_bo, ptr, size);
5781571a7a1Sriastradh 	}
5791571a7a1Sriastradh 
5801571a7a1Sriastradh 	r = vce_v4_0_hw_fini(adev);
5811571a7a1Sriastradh 	if (r)
5821571a7a1Sriastradh 		return r;
5831571a7a1Sriastradh 
5841571a7a1Sriastradh 	return amdgpu_vce_suspend(adev);
5851571a7a1Sriastradh }
5861571a7a1Sriastradh 
vce_v4_0_resume(void * handle)5871571a7a1Sriastradh static int vce_v4_0_resume(void *handle)
5881571a7a1Sriastradh {
5891571a7a1Sriastradh 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5901571a7a1Sriastradh 	int r;
5911571a7a1Sriastradh 
5921571a7a1Sriastradh 	if (adev->vce.vcpu_bo == NULL)
5931571a7a1Sriastradh 		return -EINVAL;
5941571a7a1Sriastradh 
5951571a7a1Sriastradh 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
5961571a7a1Sriastradh 		unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
5971571a7a1Sriastradh 		void *ptr = adev->vce.cpu_addr;
5981571a7a1Sriastradh 
5991571a7a1Sriastradh 		memcpy_toio(ptr, adev->vce.saved_bo, size);
6001571a7a1Sriastradh 	} else {
6011571a7a1Sriastradh 		r = amdgpu_vce_resume(adev);
6021571a7a1Sriastradh 		if (r)
6031571a7a1Sriastradh 			return r;
6041571a7a1Sriastradh 	}
6051571a7a1Sriastradh 
6061571a7a1Sriastradh 	return vce_v4_0_hw_init(adev);
6071571a7a1Sriastradh }
6081571a7a1Sriastradh 
vce_v4_0_mc_resume(struct amdgpu_device * adev)6091571a7a1Sriastradh static void vce_v4_0_mc_resume(struct amdgpu_device *adev)
6101571a7a1Sriastradh {
6111571a7a1Sriastradh 	uint32_t offset, size;
6121571a7a1Sriastradh 	uint64_t tmr_mc_addr;
6131571a7a1Sriastradh 
6141571a7a1Sriastradh 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16));
6151571a7a1Sriastradh 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000);
6161571a7a1Sriastradh 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F);
6171571a7a1Sriastradh 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF);
6181571a7a1Sriastradh 
6191571a7a1Sriastradh 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000);
6201571a7a1Sriastradh 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1);
6211571a7a1Sriastradh 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
6221571a7a1Sriastradh 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
6231571a7a1Sriastradh 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
6241571a7a1Sriastradh 
6251571a7a1Sriastradh 	offset = AMDGPU_VCE_FIRMWARE_OFFSET;
6261571a7a1Sriastradh 
6271571a7a1Sriastradh 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
6281571a7a1Sriastradh 		tmr_mc_addr = (uint64_t)(adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi) << 32 |
6291571a7a1Sriastradh 										adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo;
6301571a7a1Sriastradh 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
6311571a7a1Sriastradh 			(tmr_mc_addr >> 8));
6321571a7a1Sriastradh 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
6331571a7a1Sriastradh 			(tmr_mc_addr >> 40) & 0xff);
6341571a7a1Sriastradh 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0);
6351571a7a1Sriastradh 	} else {
6361571a7a1Sriastradh 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
6371571a7a1Sriastradh 			(adev->vce.gpu_addr >> 8));
6381571a7a1Sriastradh 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
6391571a7a1Sriastradh 			(adev->vce.gpu_addr >> 40) & 0xff);
6401571a7a1Sriastradh 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000);
6411571a7a1Sriastradh 	}
6421571a7a1Sriastradh 
6431571a7a1Sriastradh 	size = VCE_V4_0_FW_SIZE;
6441571a7a1Sriastradh 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
6451571a7a1Sriastradh 
6461571a7a1Sriastradh 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8));
6471571a7a1Sriastradh 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff);
6481571a7a1Sriastradh 	offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
6491571a7a1Sriastradh 	size = VCE_V4_0_STACK_SIZE;
6501571a7a1Sriastradh 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24));
6511571a7a1Sriastradh 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
6521571a7a1Sriastradh 
6531571a7a1Sriastradh 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8));
6541571a7a1Sriastradh 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff);
6551571a7a1Sriastradh 	offset += size;
6561571a7a1Sriastradh 	size = VCE_V4_0_DATA_SIZE;
6571571a7a1Sriastradh 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24));
6581571a7a1Sriastradh 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
6591571a7a1Sriastradh 
6601571a7a1Sriastradh 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100);
6611571a7a1Sriastradh 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
6621571a7a1Sriastradh 			VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
6631571a7a1Sriastradh 			~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
6641571a7a1Sriastradh }
6651571a7a1Sriastradh 
vce_v4_0_set_clockgating_state(void * handle,enum amd_clockgating_state state)6661571a7a1Sriastradh static int vce_v4_0_set_clockgating_state(void *handle,
6671571a7a1Sriastradh 					  enum amd_clockgating_state state)
6681571a7a1Sriastradh {
6691571a7a1Sriastradh 	/* needed for driver unload*/
6701571a7a1Sriastradh 	return 0;
6711571a7a1Sriastradh }
6721571a7a1Sriastradh 
6731571a7a1Sriastradh #if 0
6741571a7a1Sriastradh static bool vce_v4_0_is_idle(void *handle)
6751571a7a1Sriastradh {
6761571a7a1Sriastradh 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6771571a7a1Sriastradh 	u32 mask = 0;
6781571a7a1Sriastradh 
6791571a7a1Sriastradh 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
6801571a7a1Sriastradh 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
6811571a7a1Sriastradh 
6821571a7a1Sriastradh 	return !(RREG32(mmSRBM_STATUS2) & mask);
6831571a7a1Sriastradh }
6841571a7a1Sriastradh 
6851571a7a1Sriastradh static int vce_v4_0_wait_for_idle(void *handle)
6861571a7a1Sriastradh {
6871571a7a1Sriastradh 	unsigned i;
6881571a7a1Sriastradh 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6891571a7a1Sriastradh 
6901571a7a1Sriastradh 	for (i = 0; i < adev->usec_timeout; i++)
6911571a7a1Sriastradh 		if (vce_v4_0_is_idle(handle))
6921571a7a1Sriastradh 			return 0;
6931571a7a1Sriastradh 
6941571a7a1Sriastradh 	return -ETIMEDOUT;
6951571a7a1Sriastradh }
6961571a7a1Sriastradh 
6971571a7a1Sriastradh #define  VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK  0x00000008L   /* AUTO_BUSY */
6981571a7a1Sriastradh #define  VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK   0x00000010L   /* RB0_BUSY */
6991571a7a1Sriastradh #define  VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK   0x00000020L   /* RB1_BUSY */
7001571a7a1Sriastradh #define  AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
7011571a7a1Sriastradh 				      VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
7021571a7a1Sriastradh 
7031571a7a1Sriastradh static bool vce_v4_0_check_soft_reset(void *handle)
7041571a7a1Sriastradh {
7051571a7a1Sriastradh 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
7061571a7a1Sriastradh 	u32 srbm_soft_reset = 0;
7071571a7a1Sriastradh 
7081571a7a1Sriastradh 	/* According to VCE team , we should use VCE_STATUS instead
7091571a7a1Sriastradh 	 * SRBM_STATUS.VCE_BUSY bit for busy status checking.
7101571a7a1Sriastradh 	 * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
7111571a7a1Sriastradh 	 * instance's registers are accessed
7121571a7a1Sriastradh 	 * (0 for 1st instance, 10 for 2nd instance).
7131571a7a1Sriastradh 	 *
7141571a7a1Sriastradh 	 *VCE_STATUS
7151571a7a1Sriastradh 	 *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 |          |FW_LOADED|JOB |
7161571a7a1Sriastradh 	 *|----+----+-----------+----+----+----+----------+---------+----|
7171571a7a1Sriastradh 	 *|bit8|bit7|    bit6   |bit5|bit4|bit3|   bit2   |  bit1   |bit0|
7181571a7a1Sriastradh 	 *
7191571a7a1Sriastradh 	 * VCE team suggest use bit 3--bit 6 for busy status check
7201571a7a1Sriastradh 	 */
7211571a7a1Sriastradh 	mutex_lock(&adev->grbm_idx_mutex);
7221571a7a1Sriastradh 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
7231571a7a1Sriastradh 	if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
7241571a7a1Sriastradh 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
7251571a7a1Sriastradh 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
7261571a7a1Sriastradh 	}
7271571a7a1Sriastradh 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10);
7281571a7a1Sriastradh 	if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
7291571a7a1Sriastradh 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
7301571a7a1Sriastradh 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
7311571a7a1Sriastradh 	}
7321571a7a1Sriastradh 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
7331571a7a1Sriastradh 	mutex_unlock(&adev->grbm_idx_mutex);
7341571a7a1Sriastradh 
7351571a7a1Sriastradh 	if (srbm_soft_reset) {
7361571a7a1Sriastradh 		adev->vce.srbm_soft_reset = srbm_soft_reset;
7371571a7a1Sriastradh 		return true;
7381571a7a1Sriastradh 	} else {
7391571a7a1Sriastradh 		adev->vce.srbm_soft_reset = 0;
7401571a7a1Sriastradh 		return false;
7411571a7a1Sriastradh 	}
7421571a7a1Sriastradh }
7431571a7a1Sriastradh 
7441571a7a1Sriastradh static int vce_v4_0_soft_reset(void *handle)
7451571a7a1Sriastradh {
7461571a7a1Sriastradh 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
7471571a7a1Sriastradh 	u32 srbm_soft_reset;
7481571a7a1Sriastradh 
7491571a7a1Sriastradh 	if (!adev->vce.srbm_soft_reset)
7501571a7a1Sriastradh 		return 0;
7511571a7a1Sriastradh 	srbm_soft_reset = adev->vce.srbm_soft_reset;
7521571a7a1Sriastradh 
7531571a7a1Sriastradh 	if (srbm_soft_reset) {
7541571a7a1Sriastradh 		u32 tmp;
7551571a7a1Sriastradh 
7561571a7a1Sriastradh 		tmp = RREG32(mmSRBM_SOFT_RESET);
7571571a7a1Sriastradh 		tmp |= srbm_soft_reset;
7581571a7a1Sriastradh 		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
7591571a7a1Sriastradh 		WREG32(mmSRBM_SOFT_RESET, tmp);
7601571a7a1Sriastradh 		tmp = RREG32(mmSRBM_SOFT_RESET);
7611571a7a1Sriastradh 
7621571a7a1Sriastradh 		udelay(50);
7631571a7a1Sriastradh 
7641571a7a1Sriastradh 		tmp &= ~srbm_soft_reset;
7651571a7a1Sriastradh 		WREG32(mmSRBM_SOFT_RESET, tmp);
7661571a7a1Sriastradh 		tmp = RREG32(mmSRBM_SOFT_RESET);
7671571a7a1Sriastradh 
7681571a7a1Sriastradh 		/* Wait a little for things to settle down */
7691571a7a1Sriastradh 		udelay(50);
7701571a7a1Sriastradh 	}
7711571a7a1Sriastradh 
7721571a7a1Sriastradh 	return 0;
7731571a7a1Sriastradh }
7741571a7a1Sriastradh 
7751571a7a1Sriastradh static int vce_v4_0_pre_soft_reset(void *handle)
7761571a7a1Sriastradh {
7771571a7a1Sriastradh 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
7781571a7a1Sriastradh 
7791571a7a1Sriastradh 	if (!adev->vce.srbm_soft_reset)
7801571a7a1Sriastradh 		return 0;
7811571a7a1Sriastradh 
7821571a7a1Sriastradh 	mdelay(5);
7831571a7a1Sriastradh 
7841571a7a1Sriastradh 	return vce_v4_0_suspend(adev);
7851571a7a1Sriastradh }
7861571a7a1Sriastradh 
7871571a7a1Sriastradh 
7881571a7a1Sriastradh static int vce_v4_0_post_soft_reset(void *handle)
7891571a7a1Sriastradh {
7901571a7a1Sriastradh 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
7911571a7a1Sriastradh 
7921571a7a1Sriastradh 	if (!adev->vce.srbm_soft_reset)
7931571a7a1Sriastradh 		return 0;
7941571a7a1Sriastradh 
7951571a7a1Sriastradh 	mdelay(5);
7961571a7a1Sriastradh 
7971571a7a1Sriastradh 	return vce_v4_0_resume(adev);
7981571a7a1Sriastradh }
7991571a7a1Sriastradh 
8001571a7a1Sriastradh static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
8011571a7a1Sriastradh {
8021571a7a1Sriastradh 	u32 tmp, data;
8031571a7a1Sriastradh 
8041571a7a1Sriastradh 	tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL));
8051571a7a1Sriastradh 	if (override)
8061571a7a1Sriastradh 		data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
8071571a7a1Sriastradh 	else
8081571a7a1Sriastradh 		data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
8091571a7a1Sriastradh 
8101571a7a1Sriastradh 	if (tmp != data)
8111571a7a1Sriastradh 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data);
8121571a7a1Sriastradh }
8131571a7a1Sriastradh 
8141571a7a1Sriastradh static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
8151571a7a1Sriastradh 					     bool gated)
8161571a7a1Sriastradh {
8171571a7a1Sriastradh 	u32 data;
8181571a7a1Sriastradh 
8191571a7a1Sriastradh 	/* Set Override to disable Clock Gating */
8201571a7a1Sriastradh 	vce_v4_0_override_vce_clock_gating(adev, true);
8211571a7a1Sriastradh 
8221571a7a1Sriastradh 	/* This function enables MGCG which is controlled by firmware.
8231571a7a1Sriastradh 	   With the clocks in the gated state the core is still
8241571a7a1Sriastradh 	   accessible but the firmware will throttle the clocks on the
8251571a7a1Sriastradh 	   fly as necessary.
8261571a7a1Sriastradh 	*/
8271571a7a1Sriastradh 	if (gated) {
8281571a7a1Sriastradh 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
8291571a7a1Sriastradh 		data |= 0x1ff;
8301571a7a1Sriastradh 		data &= ~0xef0000;
8311571a7a1Sriastradh 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
8321571a7a1Sriastradh 
8331571a7a1Sriastradh 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
8341571a7a1Sriastradh 		data |= 0x3ff000;
8351571a7a1Sriastradh 		data &= ~0xffc00000;
8361571a7a1Sriastradh 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
8371571a7a1Sriastradh 
8381571a7a1Sriastradh 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
8391571a7a1Sriastradh 		data |= 0x2;
8401571a7a1Sriastradh 		data &= ~0x00010000;
8411571a7a1Sriastradh 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
8421571a7a1Sriastradh 
8431571a7a1Sriastradh 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
8441571a7a1Sriastradh 		data |= 0x37f;
8451571a7a1Sriastradh 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
8461571a7a1Sriastradh 
8471571a7a1Sriastradh 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
8481571a7a1Sriastradh 		data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
8491571a7a1Sriastradh 			VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
8501571a7a1Sriastradh 			VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
8511571a7a1Sriastradh 			0x8;
8521571a7a1Sriastradh 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
8531571a7a1Sriastradh 	} else {
8541571a7a1Sriastradh 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
8551571a7a1Sriastradh 		data &= ~0x80010;
8561571a7a1Sriastradh 		data |= 0xe70008;
8571571a7a1Sriastradh 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
8581571a7a1Sriastradh 
8591571a7a1Sriastradh 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
8601571a7a1Sriastradh 		data |= 0xffc00000;
8611571a7a1Sriastradh 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
8621571a7a1Sriastradh 
8631571a7a1Sriastradh 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
8641571a7a1Sriastradh 		data |= 0x10000;
8651571a7a1Sriastradh 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
8661571a7a1Sriastradh 
8671571a7a1Sriastradh 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
8681571a7a1Sriastradh 		data &= ~0xffc00000;
8691571a7a1Sriastradh 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
8701571a7a1Sriastradh 
8711571a7a1Sriastradh 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
8721571a7a1Sriastradh 		data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
8731571a7a1Sriastradh 			  VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
8741571a7a1Sriastradh 			  VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
8751571a7a1Sriastradh 			  0x8);
8761571a7a1Sriastradh 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
8771571a7a1Sriastradh 	}
8781571a7a1Sriastradh 	vce_v4_0_override_vce_clock_gating(adev, false);
8791571a7a1Sriastradh }
8801571a7a1Sriastradh 
8811571a7a1Sriastradh static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
8821571a7a1Sriastradh {
8831571a7a1Sriastradh 	u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
8841571a7a1Sriastradh 
8851571a7a1Sriastradh 	if (enable)
8861571a7a1Sriastradh 		tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
8871571a7a1Sriastradh 	else
8881571a7a1Sriastradh 		tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
8891571a7a1Sriastradh 
8901571a7a1Sriastradh 	WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
8911571a7a1Sriastradh }
8921571a7a1Sriastradh 
8931571a7a1Sriastradh static int vce_v4_0_set_clockgating_state(void *handle,
8941571a7a1Sriastradh 					  enum amd_clockgating_state state)
8951571a7a1Sriastradh {
8961571a7a1Sriastradh 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
8971571a7a1Sriastradh 	bool enable = (state == AMD_CG_STATE_GATE);
8981571a7a1Sriastradh 	int i;
8991571a7a1Sriastradh 
9001571a7a1Sriastradh 	if ((adev->asic_type == CHIP_POLARIS10) ||
9011571a7a1Sriastradh 		(adev->asic_type == CHIP_TONGA) ||
9021571a7a1Sriastradh 		(adev->asic_type == CHIP_FIJI))
9031571a7a1Sriastradh 		vce_v4_0_set_bypass_mode(adev, enable);
9041571a7a1Sriastradh 
9051571a7a1Sriastradh 	if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
9061571a7a1Sriastradh 		return 0;
9071571a7a1Sriastradh 
9081571a7a1Sriastradh 	mutex_lock(&adev->grbm_idx_mutex);
9091571a7a1Sriastradh 	for (i = 0; i < 2; i++) {
9101571a7a1Sriastradh 		/* Program VCE Instance 0 or 1 if not harvested */
9111571a7a1Sriastradh 		if (adev->vce.harvest_config & (1 << i))
9121571a7a1Sriastradh 			continue;
9131571a7a1Sriastradh 
9141571a7a1Sriastradh 		WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i);
9151571a7a1Sriastradh 
9161571a7a1Sriastradh 		if (enable) {
9171571a7a1Sriastradh 			/* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
9181571a7a1Sriastradh 			uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A);
9191571a7a1Sriastradh 			data &= ~(0xf | 0xff0);
9201571a7a1Sriastradh 			data |= ((0x0 << 0) | (0x04 << 4));
9211571a7a1Sriastradh 			WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data);
9221571a7a1Sriastradh 
9231571a7a1Sriastradh 			/* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
9241571a7a1Sriastradh 			data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING);
9251571a7a1Sriastradh 			data &= ~(0xf | 0xff0);
9261571a7a1Sriastradh 			data |= ((0x0 << 0) | (0x04 << 4));
9271571a7a1Sriastradh 			WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data);
9281571a7a1Sriastradh 		}
9291571a7a1Sriastradh 
9301571a7a1Sriastradh 		vce_v4_0_set_vce_sw_clock_gating(adev, enable);
9311571a7a1Sriastradh 	}
9321571a7a1Sriastradh 
9331571a7a1Sriastradh 	WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
9341571a7a1Sriastradh 	mutex_unlock(&adev->grbm_idx_mutex);
9351571a7a1Sriastradh 
9361571a7a1Sriastradh 	return 0;
9371571a7a1Sriastradh }
9381571a7a1Sriastradh #endif
9391571a7a1Sriastradh 
vce_v4_0_set_powergating_state(void * handle,enum amd_powergating_state state)9401571a7a1Sriastradh static int vce_v4_0_set_powergating_state(void *handle,
9411571a7a1Sriastradh 					  enum amd_powergating_state state)
9421571a7a1Sriastradh {
9431571a7a1Sriastradh 	/* This doesn't actually powergate the VCE block.
9441571a7a1Sriastradh 	 * That's done in the dpm code via the SMC.  This
9451571a7a1Sriastradh 	 * just re-inits the block as necessary.  The actual
9461571a7a1Sriastradh 	 * gating still happens in the dpm code.  We should
9471571a7a1Sriastradh 	 * revisit this when there is a cleaner line between
9481571a7a1Sriastradh 	 * the smc and the hw blocks
9491571a7a1Sriastradh 	 */
9501571a7a1Sriastradh 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
9511571a7a1Sriastradh 
9521571a7a1Sriastradh 	if (state == AMD_PG_STATE_GATE)
9531571a7a1Sriastradh 		return vce_v4_0_stop(adev);
9541571a7a1Sriastradh 	else
9551571a7a1Sriastradh 		return vce_v4_0_start(adev);
9561571a7a1Sriastradh }
9571571a7a1Sriastradh 
vce_v4_0_ring_emit_ib(struct amdgpu_ring * ring,struct amdgpu_job * job,struct amdgpu_ib * ib,uint32_t flags)9581571a7a1Sriastradh static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job,
9591571a7a1Sriastradh 					struct amdgpu_ib *ib, uint32_t flags)
9601571a7a1Sriastradh {
9611571a7a1Sriastradh 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
9621571a7a1Sriastradh 
9631571a7a1Sriastradh 	amdgpu_ring_write(ring, VCE_CMD_IB_VM);
9641571a7a1Sriastradh 	amdgpu_ring_write(ring, vmid);
9651571a7a1Sriastradh 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
9661571a7a1Sriastradh 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
9671571a7a1Sriastradh 	amdgpu_ring_write(ring, ib->length_dw);
9681571a7a1Sriastradh }
9691571a7a1Sriastradh 
vce_v4_0_ring_emit_fence(struct amdgpu_ring * ring,u64 addr,u64 seq,unsigned flags)9701571a7a1Sriastradh static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
9711571a7a1Sriastradh 			u64 seq, unsigned flags)
9721571a7a1Sriastradh {
9731571a7a1Sriastradh 	WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
9741571a7a1Sriastradh 
9751571a7a1Sriastradh 	amdgpu_ring_write(ring, VCE_CMD_FENCE);
9761571a7a1Sriastradh 	amdgpu_ring_write(ring, addr);
9771571a7a1Sriastradh 	amdgpu_ring_write(ring, upper_32_bits(addr));
9781571a7a1Sriastradh 	amdgpu_ring_write(ring, seq);
9791571a7a1Sriastradh 	amdgpu_ring_write(ring, VCE_CMD_TRAP);
9801571a7a1Sriastradh }
9811571a7a1Sriastradh 
vce_v4_0_ring_insert_end(struct amdgpu_ring * ring)9821571a7a1Sriastradh static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring)
9831571a7a1Sriastradh {
9841571a7a1Sriastradh 	amdgpu_ring_write(ring, VCE_CMD_END);
9851571a7a1Sriastradh }
9861571a7a1Sriastradh 
vce_v4_0_emit_reg_wait(struct amdgpu_ring * ring,uint32_t reg,uint32_t val,uint32_t mask)9871571a7a1Sriastradh static void vce_v4_0_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
9881571a7a1Sriastradh 				   uint32_t val, uint32_t mask)
9891571a7a1Sriastradh {
9901571a7a1Sriastradh 	amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
9911571a7a1Sriastradh 	amdgpu_ring_write(ring,	reg << 2);
9921571a7a1Sriastradh 	amdgpu_ring_write(ring, mask);
9931571a7a1Sriastradh 	amdgpu_ring_write(ring, val);
9941571a7a1Sriastradh }
9951571a7a1Sriastradh 
vce_v4_0_emit_vm_flush(struct amdgpu_ring * ring,unsigned int vmid,uint64_t pd_addr)9961571a7a1Sriastradh static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
9971571a7a1Sriastradh 				   unsigned int vmid, uint64_t pd_addr)
9981571a7a1Sriastradh {
9991571a7a1Sriastradh 	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
10001571a7a1Sriastradh 
10011571a7a1Sriastradh 	pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
10021571a7a1Sriastradh 
10031571a7a1Sriastradh 	/* wait for reg writes */
10041571a7a1Sriastradh 	vce_v4_0_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2,
10051571a7a1Sriastradh 			       lower_32_bits(pd_addr), 0xffffffff);
10061571a7a1Sriastradh }
10071571a7a1Sriastradh 
vce_v4_0_emit_wreg(struct amdgpu_ring * ring,uint32_t reg,uint32_t val)10081571a7a1Sriastradh static void vce_v4_0_emit_wreg(struct amdgpu_ring *ring,
10091571a7a1Sriastradh 			       uint32_t reg, uint32_t val)
10101571a7a1Sriastradh {
10111571a7a1Sriastradh 	amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
10121571a7a1Sriastradh 	amdgpu_ring_write(ring,	reg << 2);
10131571a7a1Sriastradh 	amdgpu_ring_write(ring, val);
10141571a7a1Sriastradh }
10151571a7a1Sriastradh 
vce_v4_0_set_interrupt_state(struct amdgpu_device * adev,struct amdgpu_irq_src * source,unsigned type,enum amdgpu_interrupt_state state)10161571a7a1Sriastradh static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev,
10171571a7a1Sriastradh 					struct amdgpu_irq_src *source,
10181571a7a1Sriastradh 					unsigned type,
10191571a7a1Sriastradh 					enum amdgpu_interrupt_state state)
10201571a7a1Sriastradh {
10211571a7a1Sriastradh 	uint32_t val = 0;
10221571a7a1Sriastradh 
10231571a7a1Sriastradh 	if (!amdgpu_sriov_vf(adev)) {
10241571a7a1Sriastradh 		if (state == AMDGPU_IRQ_STATE_ENABLE)
10251571a7a1Sriastradh 			val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
10261571a7a1Sriastradh 
10271571a7a1Sriastradh 		WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val,
10281571a7a1Sriastradh 				~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
10291571a7a1Sriastradh 	}
10301571a7a1Sriastradh 	return 0;
10311571a7a1Sriastradh }
10321571a7a1Sriastradh 
vce_v4_0_process_interrupt(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)10331571a7a1Sriastradh static int vce_v4_0_process_interrupt(struct amdgpu_device *adev,
10341571a7a1Sriastradh 				      struct amdgpu_irq_src *source,
10351571a7a1Sriastradh 				      struct amdgpu_iv_entry *entry)
10361571a7a1Sriastradh {
10371571a7a1Sriastradh 	DRM_DEBUG("IH: VCE\n");
10381571a7a1Sriastradh 
10391571a7a1Sriastradh 	switch (entry->src_data[0]) {
10401571a7a1Sriastradh 	case 0:
10411571a7a1Sriastradh 	case 1:
10421571a7a1Sriastradh 	case 2:
10431571a7a1Sriastradh 		amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
10441571a7a1Sriastradh 		break;
10451571a7a1Sriastradh 	default:
10461571a7a1Sriastradh 		DRM_ERROR("Unhandled interrupt: %d %d\n",
10471571a7a1Sriastradh 			  entry->src_id, entry->src_data[0]);
10481571a7a1Sriastradh 		break;
10491571a7a1Sriastradh 	}
10501571a7a1Sriastradh 
10511571a7a1Sriastradh 	return 0;
10521571a7a1Sriastradh }
10531571a7a1Sriastradh 
10541571a7a1Sriastradh const struct amd_ip_funcs vce_v4_0_ip_funcs = {
10551571a7a1Sriastradh 	.name = "vce_v4_0",
10561571a7a1Sriastradh 	.early_init = vce_v4_0_early_init,
10571571a7a1Sriastradh 	.late_init = NULL,
10581571a7a1Sriastradh 	.sw_init = vce_v4_0_sw_init,
10591571a7a1Sriastradh 	.sw_fini = vce_v4_0_sw_fini,
10601571a7a1Sriastradh 	.hw_init = vce_v4_0_hw_init,
10611571a7a1Sriastradh 	.hw_fini = vce_v4_0_hw_fini,
10621571a7a1Sriastradh 	.suspend = vce_v4_0_suspend,
10631571a7a1Sriastradh 	.resume = vce_v4_0_resume,
10641571a7a1Sriastradh 	.is_idle = NULL /* vce_v4_0_is_idle */,
10651571a7a1Sriastradh 	.wait_for_idle = NULL /* vce_v4_0_wait_for_idle */,
10661571a7a1Sriastradh 	.check_soft_reset = NULL /* vce_v4_0_check_soft_reset */,
10671571a7a1Sriastradh 	.pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */,
10681571a7a1Sriastradh 	.soft_reset = NULL /* vce_v4_0_soft_reset */,
10691571a7a1Sriastradh 	.post_soft_reset = NULL /* vce_v4_0_post_soft_reset */,
10701571a7a1Sriastradh 	.set_clockgating_state = vce_v4_0_set_clockgating_state,
10711571a7a1Sriastradh 	.set_powergating_state = vce_v4_0_set_powergating_state,
10721571a7a1Sriastradh };
10731571a7a1Sriastradh 
10741571a7a1Sriastradh static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
10751571a7a1Sriastradh 	.type = AMDGPU_RING_TYPE_VCE,
10761571a7a1Sriastradh 	.align_mask = 0x3f,
10771571a7a1Sriastradh 	.nop = VCE_CMD_NO_OP,
10781571a7a1Sriastradh 	.support_64bit_ptrs = false,
10791571a7a1Sriastradh 	.no_user_fence = true,
10801571a7a1Sriastradh 	.vmhub = AMDGPU_MMHUB_0,
10811571a7a1Sriastradh 	.get_rptr = vce_v4_0_ring_get_rptr,
10821571a7a1Sriastradh 	.get_wptr = vce_v4_0_ring_get_wptr,
10831571a7a1Sriastradh 	.set_wptr = vce_v4_0_ring_set_wptr,
10841571a7a1Sriastradh 	.parse_cs = amdgpu_vce_ring_parse_cs_vm,
10851571a7a1Sriastradh 	.emit_frame_size =
10861571a7a1Sriastradh 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
10871571a7a1Sriastradh 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
10881571a7a1Sriastradh 		4 + /* vce_v4_0_emit_vm_flush */
10891571a7a1Sriastradh 		5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */
10901571a7a1Sriastradh 		1, /* vce_v4_0_ring_insert_end */
10911571a7a1Sriastradh 	.emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */
10921571a7a1Sriastradh 	.emit_ib = vce_v4_0_ring_emit_ib,
10931571a7a1Sriastradh 	.emit_vm_flush = vce_v4_0_emit_vm_flush,
10941571a7a1Sriastradh 	.emit_fence = vce_v4_0_ring_emit_fence,
10951571a7a1Sriastradh 	.test_ring = amdgpu_vce_ring_test_ring,
10961571a7a1Sriastradh 	.test_ib = amdgpu_vce_ring_test_ib,
10971571a7a1Sriastradh 	.insert_nop = amdgpu_ring_insert_nop,
10981571a7a1Sriastradh 	.insert_end = vce_v4_0_ring_insert_end,
10991571a7a1Sriastradh 	.pad_ib = amdgpu_ring_generic_pad_ib,
11001571a7a1Sriastradh 	.begin_use = amdgpu_vce_ring_begin_use,
11011571a7a1Sriastradh 	.end_use = amdgpu_vce_ring_end_use,
11021571a7a1Sriastradh 	.emit_wreg = vce_v4_0_emit_wreg,
11031571a7a1Sriastradh 	.emit_reg_wait = vce_v4_0_emit_reg_wait,
11041571a7a1Sriastradh 	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
11051571a7a1Sriastradh };
11061571a7a1Sriastradh 
vce_v4_0_set_ring_funcs(struct amdgpu_device * adev)11071571a7a1Sriastradh static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev)
11081571a7a1Sriastradh {
11091571a7a1Sriastradh 	int i;
11101571a7a1Sriastradh 
11111571a7a1Sriastradh 	for (i = 0; i < adev->vce.num_rings; i++) {
11121571a7a1Sriastradh 		adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs;
11131571a7a1Sriastradh 		adev->vce.ring[i].me = i;
11141571a7a1Sriastradh 	}
11151571a7a1Sriastradh 	DRM_INFO("VCE enabled in VM mode\n");
11161571a7a1Sriastradh }
11171571a7a1Sriastradh 
11181571a7a1Sriastradh static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = {
11191571a7a1Sriastradh 	.set = vce_v4_0_set_interrupt_state,
11201571a7a1Sriastradh 	.process = vce_v4_0_process_interrupt,
11211571a7a1Sriastradh };
11221571a7a1Sriastradh 
vce_v4_0_set_irq_funcs(struct amdgpu_device * adev)11231571a7a1Sriastradh static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev)
11241571a7a1Sriastradh {
11251571a7a1Sriastradh 	adev->vce.irq.num_types = 1;
11261571a7a1Sriastradh 	adev->vce.irq.funcs = &vce_v4_0_irq_funcs;
11271571a7a1Sriastradh };
11281571a7a1Sriastradh 
11291571a7a1Sriastradh const struct amdgpu_ip_block_version vce_v4_0_ip_block =
11301571a7a1Sriastradh {
11311571a7a1Sriastradh 	.type = AMD_IP_BLOCK_TYPE_VCE,
11321571a7a1Sriastradh 	.major = 4,
11331571a7a1Sriastradh 	.minor = 0,
11341571a7a1Sriastradh 	.rev = 0,
11351571a7a1Sriastradh 	.funcs = &vce_v4_0_ip_funcs,
11361571a7a1Sriastradh };
1137