1*65332265Sriastradh /* $NetBSD: amdgpu_vce_v4_0.c,v 1.3 2021/12/19 12:21:29 riastradh Exp $ */
21571a7a1Sriastradh
31571a7a1Sriastradh /*
41571a7a1Sriastradh * Copyright 2016 Advanced Micro Devices, Inc.
51571a7a1Sriastradh * All Rights Reserved.
61571a7a1Sriastradh *
71571a7a1Sriastradh * Permission is hereby granted, free of charge, to any person obtaining a
81571a7a1Sriastradh * copy of this software and associated documentation files (the
91571a7a1Sriastradh * "Software"), to deal in the Software without restriction, including
101571a7a1Sriastradh * without limitation the rights to use, copy, modify, merge, publish,
111571a7a1Sriastradh * distribute, sub license, and/or sell copies of the Software, and to
121571a7a1Sriastradh * permit persons to whom the Software is furnished to do so, subject to
131571a7a1Sriastradh * the following conditions:
141571a7a1Sriastradh *
151571a7a1Sriastradh * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
161571a7a1Sriastradh * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
171571a7a1Sriastradh * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
181571a7a1Sriastradh * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
191571a7a1Sriastradh * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
201571a7a1Sriastradh * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
211571a7a1Sriastradh * USE OR OTHER DEALINGS IN THE SOFTWARE.
221571a7a1Sriastradh *
231571a7a1Sriastradh * The above copyright notice and this permission notice (including the
241571a7a1Sriastradh * next paragraph) shall be included in all copies or substantial portions
251571a7a1Sriastradh * of the Software.
261571a7a1Sriastradh *
271571a7a1Sriastradh */
281571a7a1Sriastradh
291571a7a1Sriastradh #include <sys/cdefs.h>
30*65332265Sriastradh __KERNEL_RCSID(0, "$NetBSD: amdgpu_vce_v4_0.c,v 1.3 2021/12/19 12:21:29 riastradh Exp $");
311571a7a1Sriastradh
321571a7a1Sriastradh #include <linux/firmware.h>
331571a7a1Sriastradh
341571a7a1Sriastradh #include "amdgpu.h"
351571a7a1Sriastradh #include "amdgpu_vce.h"
361571a7a1Sriastradh #include "soc15.h"
371571a7a1Sriastradh #include "soc15d.h"
381571a7a1Sriastradh #include "soc15_common.h"
391571a7a1Sriastradh #include "mmsch_v1_0.h"
401571a7a1Sriastradh
411571a7a1Sriastradh #include "vce/vce_4_0_offset.h"
421571a7a1Sriastradh #include "vce/vce_4_0_default.h"
431571a7a1Sriastradh #include "vce/vce_4_0_sh_mask.h"
441571a7a1Sriastradh #include "mmhub/mmhub_1_0_offset.h"
451571a7a1Sriastradh #include "mmhub/mmhub_1_0_sh_mask.h"
461571a7a1Sriastradh
471571a7a1Sriastradh #include "ivsrcid/vce/irqsrcs_vce_4_0.h"
481571a7a1Sriastradh
49*65332265Sriastradh #include <linux/nbsd-namespace.h>
50*65332265Sriastradh
511571a7a1Sriastradh #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK 0x02
521571a7a1Sriastradh
531571a7a1Sriastradh #define VCE_V4_0_FW_SIZE (384 * 1024)
541571a7a1Sriastradh #define VCE_V4_0_STACK_SIZE (64 * 1024)
551571a7a1Sriastradh #define VCE_V4_0_DATA_SIZE ((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
561571a7a1Sriastradh
571571a7a1Sriastradh static void vce_v4_0_mc_resume(struct amdgpu_device *adev);
581571a7a1Sriastradh static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev);
591571a7a1Sriastradh static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev);
601571a7a1Sriastradh
611571a7a1Sriastradh /**
621571a7a1Sriastradh * vce_v4_0_ring_get_rptr - get read pointer
631571a7a1Sriastradh *
641571a7a1Sriastradh * @ring: amdgpu_ring pointer
651571a7a1Sriastradh *
661571a7a1Sriastradh * Returns the current hardware read pointer
671571a7a1Sriastradh */
vce_v4_0_ring_get_rptr(struct amdgpu_ring * ring)681571a7a1Sriastradh static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
691571a7a1Sriastradh {
701571a7a1Sriastradh struct amdgpu_device *adev = ring->adev;
711571a7a1Sriastradh
721571a7a1Sriastradh if (ring->me == 0)
731571a7a1Sriastradh return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR));
741571a7a1Sriastradh else if (ring->me == 1)
751571a7a1Sriastradh return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2));
761571a7a1Sriastradh else
771571a7a1Sriastradh return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3));
781571a7a1Sriastradh }
791571a7a1Sriastradh
801571a7a1Sriastradh /**
811571a7a1Sriastradh * vce_v4_0_ring_get_wptr - get write pointer
821571a7a1Sriastradh *
831571a7a1Sriastradh * @ring: amdgpu_ring pointer
841571a7a1Sriastradh *
851571a7a1Sriastradh * Returns the current hardware write pointer
861571a7a1Sriastradh */
vce_v4_0_ring_get_wptr(struct amdgpu_ring * ring)871571a7a1Sriastradh static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
881571a7a1Sriastradh {
891571a7a1Sriastradh struct amdgpu_device *adev = ring->adev;
901571a7a1Sriastradh
911571a7a1Sriastradh if (ring->use_doorbell)
921571a7a1Sriastradh return adev->wb.wb[ring->wptr_offs];
931571a7a1Sriastradh
941571a7a1Sriastradh if (ring->me == 0)
951571a7a1Sriastradh return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR));
961571a7a1Sriastradh else if (ring->me == 1)
971571a7a1Sriastradh return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2));
981571a7a1Sriastradh else
991571a7a1Sriastradh return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3));
1001571a7a1Sriastradh }
1011571a7a1Sriastradh
1021571a7a1Sriastradh /**
1031571a7a1Sriastradh * vce_v4_0_ring_set_wptr - set write pointer
1041571a7a1Sriastradh *
1051571a7a1Sriastradh * @ring: amdgpu_ring pointer
1061571a7a1Sriastradh *
1071571a7a1Sriastradh * Commits the write pointer to the hardware
1081571a7a1Sriastradh */
vce_v4_0_ring_set_wptr(struct amdgpu_ring * ring)1091571a7a1Sriastradh static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
1101571a7a1Sriastradh {
1111571a7a1Sriastradh struct amdgpu_device *adev = ring->adev;
1121571a7a1Sriastradh
1131571a7a1Sriastradh if (ring->use_doorbell) {
1141571a7a1Sriastradh /* XXX check if swapping is necessary on BE */
1151571a7a1Sriastradh adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
1161571a7a1Sriastradh WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
1171571a7a1Sriastradh return;
1181571a7a1Sriastradh }
1191571a7a1Sriastradh
1201571a7a1Sriastradh if (ring->me == 0)
1211571a7a1Sriastradh WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR),
1221571a7a1Sriastradh lower_32_bits(ring->wptr));
1231571a7a1Sriastradh else if (ring->me == 1)
1241571a7a1Sriastradh WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2),
1251571a7a1Sriastradh lower_32_bits(ring->wptr));
1261571a7a1Sriastradh else
1271571a7a1Sriastradh WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3),
1281571a7a1Sriastradh lower_32_bits(ring->wptr));
1291571a7a1Sriastradh }
1301571a7a1Sriastradh
vce_v4_0_firmware_loaded(struct amdgpu_device * adev)1311571a7a1Sriastradh static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev)
1321571a7a1Sriastradh {
1331571a7a1Sriastradh int i, j;
1341571a7a1Sriastradh
1351571a7a1Sriastradh for (i = 0; i < 10; ++i) {
1361571a7a1Sriastradh for (j = 0; j < 100; ++j) {
1371571a7a1Sriastradh uint32_t status =
1381571a7a1Sriastradh RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS));
1391571a7a1Sriastradh
1401571a7a1Sriastradh if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
1411571a7a1Sriastradh return 0;
1421571a7a1Sriastradh mdelay(10);
1431571a7a1Sriastradh }
1441571a7a1Sriastradh
1451571a7a1Sriastradh DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
1461571a7a1Sriastradh WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
1471571a7a1Sriastradh VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
1481571a7a1Sriastradh ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
1491571a7a1Sriastradh mdelay(10);
1501571a7a1Sriastradh WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
1511571a7a1Sriastradh ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
1521571a7a1Sriastradh mdelay(10);
1531571a7a1Sriastradh
1541571a7a1Sriastradh }
1551571a7a1Sriastradh
1561571a7a1Sriastradh return -ETIMEDOUT;
1571571a7a1Sriastradh }
1581571a7a1Sriastradh
vce_v4_0_mmsch_start(struct amdgpu_device * adev,struct amdgpu_mm_table * table)1591571a7a1Sriastradh static int vce_v4_0_mmsch_start(struct amdgpu_device *adev,
1601571a7a1Sriastradh struct amdgpu_mm_table *table)
1611571a7a1Sriastradh {
1621571a7a1Sriastradh uint32_t data = 0, loop;
1631571a7a1Sriastradh uint64_t addr = table->gpu_addr;
1641571a7a1Sriastradh struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr;
1651571a7a1Sriastradh uint32_t size;
1661571a7a1Sriastradh
1671571a7a1Sriastradh size = header->header_size + header->vce_table_size + header->uvd_table_size;
1681571a7a1Sriastradh
1691571a7a1Sriastradh /* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */
1701571a7a1Sriastradh WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr));
1711571a7a1Sriastradh WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr));
1721571a7a1Sriastradh
1731571a7a1Sriastradh /* 2, update vmid of descriptor */
1741571a7a1Sriastradh data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID));
1751571a7a1Sriastradh data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK;
1761571a7a1Sriastradh data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */
1771571a7a1Sriastradh WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data);
1781571a7a1Sriastradh
1791571a7a1Sriastradh /* 3, notify mmsch about the size of this descriptor */
1801571a7a1Sriastradh WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size);
1811571a7a1Sriastradh
1821571a7a1Sriastradh /* 4, set resp to zero */
1831571a7a1Sriastradh WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0);
1841571a7a1Sriastradh
1851571a7a1Sriastradh WDOORBELL32(adev->vce.ring[0].doorbell_index, 0);
1861571a7a1Sriastradh adev->wb.wb[adev->vce.ring[0].wptr_offs] = 0;
1871571a7a1Sriastradh adev->vce.ring[0].wptr = 0;
1881571a7a1Sriastradh adev->vce.ring[0].wptr_old = 0;
1891571a7a1Sriastradh
1901571a7a1Sriastradh /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */
1911571a7a1Sriastradh WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001);
1921571a7a1Sriastradh
1931571a7a1Sriastradh data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
1941571a7a1Sriastradh loop = 1000;
1951571a7a1Sriastradh while ((data & 0x10000002) != 0x10000002) {
1961571a7a1Sriastradh udelay(10);
1971571a7a1Sriastradh data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
1981571a7a1Sriastradh loop--;
1991571a7a1Sriastradh if (!loop)
2001571a7a1Sriastradh break;
2011571a7a1Sriastradh }
2021571a7a1Sriastradh
2031571a7a1Sriastradh if (!loop) {
2041571a7a1Sriastradh dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data);
2051571a7a1Sriastradh return -EBUSY;
2061571a7a1Sriastradh }
2071571a7a1Sriastradh
2081571a7a1Sriastradh return 0;
2091571a7a1Sriastradh }
2101571a7a1Sriastradh
vce_v4_0_sriov_start(struct amdgpu_device * adev)2111571a7a1Sriastradh static int vce_v4_0_sriov_start(struct amdgpu_device *adev)
2121571a7a1Sriastradh {
2131571a7a1Sriastradh struct amdgpu_ring *ring;
2141571a7a1Sriastradh uint32_t offset, size;
2151571a7a1Sriastradh uint32_t table_size = 0;
2161571a7a1Sriastradh struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } };
2171571a7a1Sriastradh struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } };
2181571a7a1Sriastradh struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } };
2191571a7a1Sriastradh struct mmsch_v1_0_cmd_end end = { { 0 } };
2201571a7a1Sriastradh uint32_t *init_table = adev->virt.mm_table.cpu_addr;
2211571a7a1Sriastradh struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table;
2221571a7a1Sriastradh
2231571a7a1Sriastradh direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
2241571a7a1Sriastradh direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
2251571a7a1Sriastradh direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING;
2261571a7a1Sriastradh end.cmd_header.command_type = MMSCH_COMMAND__END;
2271571a7a1Sriastradh
2281571a7a1Sriastradh if (header->vce_table_offset == 0 && header->vce_table_size == 0) {
2291571a7a1Sriastradh header->version = MMSCH_VERSION;
2301571a7a1Sriastradh header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2;
2311571a7a1Sriastradh
2321571a7a1Sriastradh if (header->uvd_table_offset == 0 && header->uvd_table_size == 0)
2331571a7a1Sriastradh header->vce_table_offset = header->header_size;
2341571a7a1Sriastradh else
2351571a7a1Sriastradh header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset;
2361571a7a1Sriastradh
2371571a7a1Sriastradh init_table += header->vce_table_offset;
2381571a7a1Sriastradh
2391571a7a1Sriastradh ring = &adev->vce.ring[0];
2401571a7a1Sriastradh MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO),
2411571a7a1Sriastradh lower_32_bits(ring->gpu_addr));
2421571a7a1Sriastradh MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI),
2431571a7a1Sriastradh upper_32_bits(ring->gpu_addr));
2441571a7a1Sriastradh MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE),
2451571a7a1Sriastradh ring->ring_size / 4);
2461571a7a1Sriastradh
2471571a7a1Sriastradh /* BEGING OF MC_RESUME */
2481571a7a1Sriastradh MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000);
2491571a7a1Sriastradh MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0);
2501571a7a1Sriastradh MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
2511571a7a1Sriastradh MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
2521571a7a1Sriastradh MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
2531571a7a1Sriastradh
2541571a7a1Sriastradh offset = AMDGPU_VCE_FIRMWARE_OFFSET;
2551571a7a1Sriastradh if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
2561571a7a1Sriastradh uint32_t low = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo;
2571571a7a1Sriastradh uint32_t hi = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi;
2581571a7a1Sriastradh uint64_t tmr_mc_addr = (uint64_t)(hi) << 32 | low;
2591571a7a1Sriastradh
2601571a7a1Sriastradh MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
2611571a7a1Sriastradh mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), tmr_mc_addr >> 8);
2621571a7a1Sriastradh MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
2631571a7a1Sriastradh mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
2641571a7a1Sriastradh (tmr_mc_addr >> 40) & 0xff);
2651571a7a1Sriastradh MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0);
2661571a7a1Sriastradh } else {
2671571a7a1Sriastradh MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
2681571a7a1Sriastradh mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
2691571a7a1Sriastradh adev->vce.gpu_addr >> 8);
2701571a7a1Sriastradh MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
2711571a7a1Sriastradh mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
2721571a7a1Sriastradh (adev->vce.gpu_addr >> 40) & 0xff);
2731571a7a1Sriastradh MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0),
2741571a7a1Sriastradh offset & ~0x0f000000);
2751571a7a1Sriastradh
2761571a7a1Sriastradh }
2771571a7a1Sriastradh MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
2781571a7a1Sriastradh mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
2791571a7a1Sriastradh adev->vce.gpu_addr >> 8);
2801571a7a1Sriastradh MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
2811571a7a1Sriastradh mmVCE_LMI_VCPU_CACHE_64BIT_BAR1),
2821571a7a1Sriastradh (adev->vce.gpu_addr >> 40) & 0xff);
2831571a7a1Sriastradh MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
2841571a7a1Sriastradh mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
2851571a7a1Sriastradh adev->vce.gpu_addr >> 8);
2861571a7a1Sriastradh MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
2871571a7a1Sriastradh mmVCE_LMI_VCPU_CACHE_64BIT_BAR2),
2881571a7a1Sriastradh (adev->vce.gpu_addr >> 40) & 0xff);
2891571a7a1Sriastradh
2901571a7a1Sriastradh size = VCE_V4_0_FW_SIZE;
2911571a7a1Sriastradh MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
2921571a7a1Sriastradh
2931571a7a1Sriastradh offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
2941571a7a1Sriastradh size = VCE_V4_0_STACK_SIZE;
2951571a7a1Sriastradh MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1),
2961571a7a1Sriastradh (offset & ~0x0f000000) | (1 << 24));
2971571a7a1Sriastradh MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
2981571a7a1Sriastradh
2991571a7a1Sriastradh offset += size;
3001571a7a1Sriastradh size = VCE_V4_0_DATA_SIZE;
3011571a7a1Sriastradh MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2),
3021571a7a1Sriastradh (offset & ~0x0f000000) | (2 << 24));
3031571a7a1Sriastradh MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
3041571a7a1Sriastradh
3051571a7a1Sriastradh MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0);
3061571a7a1Sriastradh MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
3071571a7a1Sriastradh VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
3081571a7a1Sriastradh VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
3091571a7a1Sriastradh
3101571a7a1Sriastradh /* end of MC_RESUME */
3111571a7a1Sriastradh MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
3121571a7a1Sriastradh VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK);
3131571a7a1Sriastradh MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL),
3141571a7a1Sriastradh ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK);
3151571a7a1Sriastradh MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
3161571a7a1Sriastradh ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0);
3171571a7a1Sriastradh
3181571a7a1Sriastradh MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
3191571a7a1Sriastradh VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK,
3201571a7a1Sriastradh VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK);
3211571a7a1Sriastradh
3221571a7a1Sriastradh /* clear BUSY flag */
3231571a7a1Sriastradh MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
3241571a7a1Sriastradh ~VCE_STATUS__JOB_BUSY_MASK, 0);
3251571a7a1Sriastradh
3261571a7a1Sriastradh /* add end packet */
3271571a7a1Sriastradh memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
3281571a7a1Sriastradh table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
3291571a7a1Sriastradh header->vce_table_size = table_size;
3301571a7a1Sriastradh }
3311571a7a1Sriastradh
3321571a7a1Sriastradh return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table);
3331571a7a1Sriastradh }
3341571a7a1Sriastradh
3351571a7a1Sriastradh /**
3361571a7a1Sriastradh * vce_v4_0_start - start VCE block
3371571a7a1Sriastradh *
3381571a7a1Sriastradh * @adev: amdgpu_device pointer
3391571a7a1Sriastradh *
3401571a7a1Sriastradh * Setup and start the VCE block
3411571a7a1Sriastradh */
vce_v4_0_start(struct amdgpu_device * adev)3421571a7a1Sriastradh static int vce_v4_0_start(struct amdgpu_device *adev)
3431571a7a1Sriastradh {
3441571a7a1Sriastradh struct amdgpu_ring *ring;
3451571a7a1Sriastradh int r;
3461571a7a1Sriastradh
3471571a7a1Sriastradh ring = &adev->vce.ring[0];
3481571a7a1Sriastradh
3491571a7a1Sriastradh WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr));
3501571a7a1Sriastradh WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr));
3511571a7a1Sriastradh WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr);
3521571a7a1Sriastradh WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
3531571a7a1Sriastradh WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4);
3541571a7a1Sriastradh
3551571a7a1Sriastradh ring = &adev->vce.ring[1];
3561571a7a1Sriastradh
3571571a7a1Sriastradh WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr));
3581571a7a1Sriastradh WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr));
3591571a7a1Sriastradh WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr);
3601571a7a1Sriastradh WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr));
3611571a7a1Sriastradh WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4);
3621571a7a1Sriastradh
3631571a7a1Sriastradh ring = &adev->vce.ring[2];
3641571a7a1Sriastradh
3651571a7a1Sriastradh WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr));
3661571a7a1Sriastradh WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr));
3671571a7a1Sriastradh WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr);
3681571a7a1Sriastradh WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr));
3691571a7a1Sriastradh WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4);
3701571a7a1Sriastradh
3711571a7a1Sriastradh vce_v4_0_mc_resume(adev);
3721571a7a1Sriastradh WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK,
3731571a7a1Sriastradh ~VCE_STATUS__JOB_BUSY_MASK);
3741571a7a1Sriastradh
3751571a7a1Sriastradh WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001);
3761571a7a1Sriastradh
3771571a7a1Sriastradh WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
3781571a7a1Sriastradh ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
3791571a7a1Sriastradh mdelay(100);
3801571a7a1Sriastradh
3811571a7a1Sriastradh r = vce_v4_0_firmware_loaded(adev);
3821571a7a1Sriastradh
3831571a7a1Sriastradh /* clear BUSY flag */
3841571a7a1Sriastradh WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
3851571a7a1Sriastradh
3861571a7a1Sriastradh if (r) {
3871571a7a1Sriastradh DRM_ERROR("VCE not responding, giving up!!!\n");
3881571a7a1Sriastradh return r;
3891571a7a1Sriastradh }
3901571a7a1Sriastradh
3911571a7a1Sriastradh return 0;
3921571a7a1Sriastradh }
3931571a7a1Sriastradh
vce_v4_0_stop(struct amdgpu_device * adev)3941571a7a1Sriastradh static int vce_v4_0_stop(struct amdgpu_device *adev)
3951571a7a1Sriastradh {
3961571a7a1Sriastradh
3971571a7a1Sriastradh /* Disable VCPU */
3981571a7a1Sriastradh WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001);
3991571a7a1Sriastradh
4001571a7a1Sriastradh /* hold on ECPU */
4011571a7a1Sriastradh WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
4021571a7a1Sriastradh VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
4031571a7a1Sriastradh ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
4041571a7a1Sriastradh
4051571a7a1Sriastradh /* clear VCE_STATUS */
4061571a7a1Sriastradh WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0);
4071571a7a1Sriastradh
4081571a7a1Sriastradh /* Set Clock-Gating off */
4091571a7a1Sriastradh /* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
4101571a7a1Sriastradh vce_v4_0_set_vce_sw_clock_gating(adev, false);
4111571a7a1Sriastradh */
4121571a7a1Sriastradh
4131571a7a1Sriastradh return 0;
4141571a7a1Sriastradh }
4151571a7a1Sriastradh
vce_v4_0_early_init(void * handle)4161571a7a1Sriastradh static int vce_v4_0_early_init(void *handle)
4171571a7a1Sriastradh {
4181571a7a1Sriastradh struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4191571a7a1Sriastradh
4201571a7a1Sriastradh if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */
4211571a7a1Sriastradh adev->vce.num_rings = 1;
4221571a7a1Sriastradh else
4231571a7a1Sriastradh adev->vce.num_rings = 3;
4241571a7a1Sriastradh
4251571a7a1Sriastradh vce_v4_0_set_ring_funcs(adev);
4261571a7a1Sriastradh vce_v4_0_set_irq_funcs(adev);
4271571a7a1Sriastradh
4281571a7a1Sriastradh return 0;
4291571a7a1Sriastradh }
4301571a7a1Sriastradh
vce_v4_0_sw_init(void * handle)4311571a7a1Sriastradh static int vce_v4_0_sw_init(void *handle)
4321571a7a1Sriastradh {
4331571a7a1Sriastradh struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4341571a7a1Sriastradh struct amdgpu_ring *ring;
4351571a7a1Sriastradh
4361571a7a1Sriastradh unsigned size;
4371571a7a1Sriastradh int r, i;
4381571a7a1Sriastradh
4391571a7a1Sriastradh r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCE0, 167, &adev->vce.irq);
4401571a7a1Sriastradh if (r)
4411571a7a1Sriastradh return r;
4421571a7a1Sriastradh
4431571a7a1Sriastradh size = VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE;
4441571a7a1Sriastradh if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
4451571a7a1Sriastradh size += VCE_V4_0_FW_SIZE;
4461571a7a1Sriastradh
4471571a7a1Sriastradh r = amdgpu_vce_sw_init(adev, size);
4481571a7a1Sriastradh if (r)
4491571a7a1Sriastradh return r;
4501571a7a1Sriastradh
4511571a7a1Sriastradh if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
4521571a7a1Sriastradh const struct common_firmware_header *hdr;
4531571a7a1Sriastradh unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
4541571a7a1Sriastradh
4551571a7a1Sriastradh adev->vce.saved_bo = kvmalloc(size, GFP_KERNEL);
4561571a7a1Sriastradh if (!adev->vce.saved_bo)
4571571a7a1Sriastradh return -ENOMEM;
4581571a7a1Sriastradh
4591571a7a1Sriastradh hdr = (const struct common_firmware_header *)adev->vce.fw->data;
4601571a7a1Sriastradh adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE;
4611571a7a1Sriastradh adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw;
4621571a7a1Sriastradh adev->firmware.fw_size +=
4631571a7a1Sriastradh ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
4641571a7a1Sriastradh DRM_INFO("PSP loading VCE firmware\n");
4651571a7a1Sriastradh } else {
4661571a7a1Sriastradh r = amdgpu_vce_resume(adev);
4671571a7a1Sriastradh if (r)
4681571a7a1Sriastradh return r;
4691571a7a1Sriastradh }
4701571a7a1Sriastradh
4711571a7a1Sriastradh for (i = 0; i < adev->vce.num_rings; i++) {
4721571a7a1Sriastradh ring = &adev->vce.ring[i];
473*65332265Sriastradh snprintf(ring->name, sizeof(ring->name), "vce%d", i);
4741571a7a1Sriastradh if (amdgpu_sriov_vf(adev)) {
4751571a7a1Sriastradh /* DOORBELL only works under SRIOV */
4761571a7a1Sriastradh ring->use_doorbell = true;
4771571a7a1Sriastradh
4781571a7a1Sriastradh /* currently only use the first encoding ring for sriov,
4791571a7a1Sriastradh * so set unused location for other unused rings.
4801571a7a1Sriastradh */
4811571a7a1Sriastradh if (i == 0)
4821571a7a1Sriastradh ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring0_1 * 2;
4831571a7a1Sriastradh else
4841571a7a1Sriastradh ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring2_3 * 2 + 1;
4851571a7a1Sriastradh }
4861571a7a1Sriastradh r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0);
4871571a7a1Sriastradh if (r)
4881571a7a1Sriastradh return r;
4891571a7a1Sriastradh }
4901571a7a1Sriastradh
4911571a7a1Sriastradh
4921571a7a1Sriastradh r = amdgpu_vce_entity_init(adev);
4931571a7a1Sriastradh if (r)
4941571a7a1Sriastradh return r;
4951571a7a1Sriastradh
4961571a7a1Sriastradh r = amdgpu_virt_alloc_mm_table(adev);
4971571a7a1Sriastradh if (r)
4981571a7a1Sriastradh return r;
4991571a7a1Sriastradh
5001571a7a1Sriastradh return r;
5011571a7a1Sriastradh }
5021571a7a1Sriastradh
vce_v4_0_sw_fini(void * handle)5031571a7a1Sriastradh static int vce_v4_0_sw_fini(void *handle)
5041571a7a1Sriastradh {
5051571a7a1Sriastradh int r;
5061571a7a1Sriastradh struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5071571a7a1Sriastradh
5081571a7a1Sriastradh /* free MM table */
5091571a7a1Sriastradh amdgpu_virt_free_mm_table(adev);
5101571a7a1Sriastradh
5111571a7a1Sriastradh if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
5121571a7a1Sriastradh kvfree(adev->vce.saved_bo);
5131571a7a1Sriastradh adev->vce.saved_bo = NULL;
5141571a7a1Sriastradh }
5151571a7a1Sriastradh
5161571a7a1Sriastradh r = amdgpu_vce_suspend(adev);
5171571a7a1Sriastradh if (r)
5181571a7a1Sriastradh return r;
5191571a7a1Sriastradh
5201571a7a1Sriastradh return amdgpu_vce_sw_fini(adev);
5211571a7a1Sriastradh }
5221571a7a1Sriastradh
vce_v4_0_hw_init(void * handle)5231571a7a1Sriastradh static int vce_v4_0_hw_init(void *handle)
5241571a7a1Sriastradh {
5251571a7a1Sriastradh int r, i;
5261571a7a1Sriastradh struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5271571a7a1Sriastradh
5281571a7a1Sriastradh if (amdgpu_sriov_vf(adev))
5291571a7a1Sriastradh r = vce_v4_0_sriov_start(adev);
5301571a7a1Sriastradh else
5311571a7a1Sriastradh r = vce_v4_0_start(adev);
5321571a7a1Sriastradh if (r)
5331571a7a1Sriastradh return r;
5341571a7a1Sriastradh
5351571a7a1Sriastradh for (i = 0; i < adev->vce.num_rings; i++) {
5361571a7a1Sriastradh r = amdgpu_ring_test_helper(&adev->vce.ring[i]);
5371571a7a1Sriastradh if (r)
5381571a7a1Sriastradh return r;
5391571a7a1Sriastradh }
5401571a7a1Sriastradh
5411571a7a1Sriastradh DRM_INFO("VCE initialized successfully.\n");
5421571a7a1Sriastradh
5431571a7a1Sriastradh return 0;
5441571a7a1Sriastradh }
5451571a7a1Sriastradh
vce_v4_0_hw_fini(void * handle)5461571a7a1Sriastradh static int vce_v4_0_hw_fini(void *handle)
5471571a7a1Sriastradh {
5481571a7a1Sriastradh struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5491571a7a1Sriastradh int i;
5501571a7a1Sriastradh
5511571a7a1Sriastradh if (!amdgpu_sriov_vf(adev)) {
5521571a7a1Sriastradh /* vce_v4_0_wait_for_idle(handle); */
5531571a7a1Sriastradh vce_v4_0_stop(adev);
5541571a7a1Sriastradh } else {
5551571a7a1Sriastradh /* full access mode, so don't touch any VCE register */
5561571a7a1Sriastradh DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
5571571a7a1Sriastradh }
5581571a7a1Sriastradh
5591571a7a1Sriastradh for (i = 0; i < adev->vce.num_rings; i++)
5601571a7a1Sriastradh adev->vce.ring[i].sched.ready = false;
5611571a7a1Sriastradh
5621571a7a1Sriastradh return 0;
5631571a7a1Sriastradh }
5641571a7a1Sriastradh
vce_v4_0_suspend(void * handle)5651571a7a1Sriastradh static int vce_v4_0_suspend(void *handle)
5661571a7a1Sriastradh {
5671571a7a1Sriastradh struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5681571a7a1Sriastradh int r;
5691571a7a1Sriastradh
5701571a7a1Sriastradh if (adev->vce.vcpu_bo == NULL)
5711571a7a1Sriastradh return 0;
5721571a7a1Sriastradh
5731571a7a1Sriastradh if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
5741571a7a1Sriastradh unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
5751571a7a1Sriastradh void *ptr = adev->vce.cpu_addr;
5761571a7a1Sriastradh
5771571a7a1Sriastradh memcpy_fromio(adev->vce.saved_bo, ptr, size);
5781571a7a1Sriastradh }
5791571a7a1Sriastradh
5801571a7a1Sriastradh r = vce_v4_0_hw_fini(adev);
5811571a7a1Sriastradh if (r)
5821571a7a1Sriastradh return r;
5831571a7a1Sriastradh
5841571a7a1Sriastradh return amdgpu_vce_suspend(adev);
5851571a7a1Sriastradh }
5861571a7a1Sriastradh
vce_v4_0_resume(void * handle)5871571a7a1Sriastradh static int vce_v4_0_resume(void *handle)
5881571a7a1Sriastradh {
5891571a7a1Sriastradh struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5901571a7a1Sriastradh int r;
5911571a7a1Sriastradh
5921571a7a1Sriastradh if (adev->vce.vcpu_bo == NULL)
5931571a7a1Sriastradh return -EINVAL;
5941571a7a1Sriastradh
5951571a7a1Sriastradh if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
5961571a7a1Sriastradh unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
5971571a7a1Sriastradh void *ptr = adev->vce.cpu_addr;
5981571a7a1Sriastradh
5991571a7a1Sriastradh memcpy_toio(ptr, adev->vce.saved_bo, size);
6001571a7a1Sriastradh } else {
6011571a7a1Sriastradh r = amdgpu_vce_resume(adev);
6021571a7a1Sriastradh if (r)
6031571a7a1Sriastradh return r;
6041571a7a1Sriastradh }
6051571a7a1Sriastradh
6061571a7a1Sriastradh return vce_v4_0_hw_init(adev);
6071571a7a1Sriastradh }
6081571a7a1Sriastradh
vce_v4_0_mc_resume(struct amdgpu_device * adev)6091571a7a1Sriastradh static void vce_v4_0_mc_resume(struct amdgpu_device *adev)
6101571a7a1Sriastradh {
6111571a7a1Sriastradh uint32_t offset, size;
6121571a7a1Sriastradh uint64_t tmr_mc_addr;
6131571a7a1Sriastradh
6141571a7a1Sriastradh WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16));
6151571a7a1Sriastradh WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000);
6161571a7a1Sriastradh WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F);
6171571a7a1Sriastradh WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF);
6181571a7a1Sriastradh
6191571a7a1Sriastradh WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000);
6201571a7a1Sriastradh WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1);
6211571a7a1Sriastradh WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
6221571a7a1Sriastradh WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
6231571a7a1Sriastradh WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
6241571a7a1Sriastradh
6251571a7a1Sriastradh offset = AMDGPU_VCE_FIRMWARE_OFFSET;
6261571a7a1Sriastradh
6271571a7a1Sriastradh if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
6281571a7a1Sriastradh tmr_mc_addr = (uint64_t)(adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi) << 32 |
6291571a7a1Sriastradh adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo;
6301571a7a1Sriastradh WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
6311571a7a1Sriastradh (tmr_mc_addr >> 8));
6321571a7a1Sriastradh WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
6331571a7a1Sriastradh (tmr_mc_addr >> 40) & 0xff);
6341571a7a1Sriastradh WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0);
6351571a7a1Sriastradh } else {
6361571a7a1Sriastradh WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
6371571a7a1Sriastradh (adev->vce.gpu_addr >> 8));
6381571a7a1Sriastradh WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
6391571a7a1Sriastradh (adev->vce.gpu_addr >> 40) & 0xff);
6401571a7a1Sriastradh WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000);
6411571a7a1Sriastradh }
6421571a7a1Sriastradh
6431571a7a1Sriastradh size = VCE_V4_0_FW_SIZE;
6441571a7a1Sriastradh WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
6451571a7a1Sriastradh
6461571a7a1Sriastradh WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8));
6471571a7a1Sriastradh WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff);
6481571a7a1Sriastradh offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
6491571a7a1Sriastradh size = VCE_V4_0_STACK_SIZE;
6501571a7a1Sriastradh WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24));
6511571a7a1Sriastradh WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
6521571a7a1Sriastradh
6531571a7a1Sriastradh WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8));
6541571a7a1Sriastradh WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff);
6551571a7a1Sriastradh offset += size;
6561571a7a1Sriastradh size = VCE_V4_0_DATA_SIZE;
6571571a7a1Sriastradh WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24));
6581571a7a1Sriastradh WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
6591571a7a1Sriastradh
6601571a7a1Sriastradh WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100);
6611571a7a1Sriastradh WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
6621571a7a1Sriastradh VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
6631571a7a1Sriastradh ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
6641571a7a1Sriastradh }
6651571a7a1Sriastradh
vce_v4_0_set_clockgating_state(void * handle,enum amd_clockgating_state state)6661571a7a1Sriastradh static int vce_v4_0_set_clockgating_state(void *handle,
6671571a7a1Sriastradh enum amd_clockgating_state state)
6681571a7a1Sriastradh {
6691571a7a1Sriastradh /* needed for driver unload*/
6701571a7a1Sriastradh return 0;
6711571a7a1Sriastradh }
6721571a7a1Sriastradh
6731571a7a1Sriastradh #if 0
6741571a7a1Sriastradh static bool vce_v4_0_is_idle(void *handle)
6751571a7a1Sriastradh {
6761571a7a1Sriastradh struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6771571a7a1Sriastradh u32 mask = 0;
6781571a7a1Sriastradh
6791571a7a1Sriastradh mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
6801571a7a1Sriastradh mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
6811571a7a1Sriastradh
6821571a7a1Sriastradh return !(RREG32(mmSRBM_STATUS2) & mask);
6831571a7a1Sriastradh }
6841571a7a1Sriastradh
6851571a7a1Sriastradh static int vce_v4_0_wait_for_idle(void *handle)
6861571a7a1Sriastradh {
6871571a7a1Sriastradh unsigned i;
6881571a7a1Sriastradh struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6891571a7a1Sriastradh
6901571a7a1Sriastradh for (i = 0; i < adev->usec_timeout; i++)
6911571a7a1Sriastradh if (vce_v4_0_is_idle(handle))
6921571a7a1Sriastradh return 0;
6931571a7a1Sriastradh
6941571a7a1Sriastradh return -ETIMEDOUT;
6951571a7a1Sriastradh }
6961571a7a1Sriastradh
6971571a7a1Sriastradh #define VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK 0x00000008L /* AUTO_BUSY */
6981571a7a1Sriastradh #define VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK 0x00000010L /* RB0_BUSY */
6991571a7a1Sriastradh #define VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK 0x00000020L /* RB1_BUSY */
7001571a7a1Sriastradh #define AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
7011571a7a1Sriastradh VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
7021571a7a1Sriastradh
7031571a7a1Sriastradh static bool vce_v4_0_check_soft_reset(void *handle)
7041571a7a1Sriastradh {
7051571a7a1Sriastradh struct amdgpu_device *adev = (struct amdgpu_device *)handle;
7061571a7a1Sriastradh u32 srbm_soft_reset = 0;
7071571a7a1Sriastradh
7081571a7a1Sriastradh /* According to VCE team , we should use VCE_STATUS instead
7091571a7a1Sriastradh * SRBM_STATUS.VCE_BUSY bit for busy status checking.
7101571a7a1Sriastradh * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
7111571a7a1Sriastradh * instance's registers are accessed
7121571a7a1Sriastradh * (0 for 1st instance, 10 for 2nd instance).
7131571a7a1Sriastradh *
7141571a7a1Sriastradh *VCE_STATUS
7151571a7a1Sriastradh *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 | |FW_LOADED|JOB |
7161571a7a1Sriastradh *|----+----+-----------+----+----+----+----------+---------+----|
7171571a7a1Sriastradh *|bit8|bit7| bit6 |bit5|bit4|bit3| bit2 | bit1 |bit0|
7181571a7a1Sriastradh *
7191571a7a1Sriastradh * VCE team suggest use bit 3--bit 6 for busy status check
7201571a7a1Sriastradh */
7211571a7a1Sriastradh mutex_lock(&adev->grbm_idx_mutex);
7221571a7a1Sriastradh WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
7231571a7a1Sriastradh if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
7241571a7a1Sriastradh srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
7251571a7a1Sriastradh srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
7261571a7a1Sriastradh }
7271571a7a1Sriastradh WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10);
7281571a7a1Sriastradh if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
7291571a7a1Sriastradh srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
7301571a7a1Sriastradh srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
7311571a7a1Sriastradh }
7321571a7a1Sriastradh WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
7331571a7a1Sriastradh mutex_unlock(&adev->grbm_idx_mutex);
7341571a7a1Sriastradh
7351571a7a1Sriastradh if (srbm_soft_reset) {
7361571a7a1Sriastradh adev->vce.srbm_soft_reset = srbm_soft_reset;
7371571a7a1Sriastradh return true;
7381571a7a1Sriastradh } else {
7391571a7a1Sriastradh adev->vce.srbm_soft_reset = 0;
7401571a7a1Sriastradh return false;
7411571a7a1Sriastradh }
7421571a7a1Sriastradh }
7431571a7a1Sriastradh
7441571a7a1Sriastradh static int vce_v4_0_soft_reset(void *handle)
7451571a7a1Sriastradh {
7461571a7a1Sriastradh struct amdgpu_device *adev = (struct amdgpu_device *)handle;
7471571a7a1Sriastradh u32 srbm_soft_reset;
7481571a7a1Sriastradh
7491571a7a1Sriastradh if (!adev->vce.srbm_soft_reset)
7501571a7a1Sriastradh return 0;
7511571a7a1Sriastradh srbm_soft_reset = adev->vce.srbm_soft_reset;
7521571a7a1Sriastradh
7531571a7a1Sriastradh if (srbm_soft_reset) {
7541571a7a1Sriastradh u32 tmp;
7551571a7a1Sriastradh
7561571a7a1Sriastradh tmp = RREG32(mmSRBM_SOFT_RESET);
7571571a7a1Sriastradh tmp |= srbm_soft_reset;
7581571a7a1Sriastradh dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
7591571a7a1Sriastradh WREG32(mmSRBM_SOFT_RESET, tmp);
7601571a7a1Sriastradh tmp = RREG32(mmSRBM_SOFT_RESET);
7611571a7a1Sriastradh
7621571a7a1Sriastradh udelay(50);
7631571a7a1Sriastradh
7641571a7a1Sriastradh tmp &= ~srbm_soft_reset;
7651571a7a1Sriastradh WREG32(mmSRBM_SOFT_RESET, tmp);
7661571a7a1Sriastradh tmp = RREG32(mmSRBM_SOFT_RESET);
7671571a7a1Sriastradh
7681571a7a1Sriastradh /* Wait a little for things to settle down */
7691571a7a1Sriastradh udelay(50);
7701571a7a1Sriastradh }
7711571a7a1Sriastradh
7721571a7a1Sriastradh return 0;
7731571a7a1Sriastradh }
7741571a7a1Sriastradh
7751571a7a1Sriastradh static int vce_v4_0_pre_soft_reset(void *handle)
7761571a7a1Sriastradh {
7771571a7a1Sriastradh struct amdgpu_device *adev = (struct amdgpu_device *)handle;
7781571a7a1Sriastradh
7791571a7a1Sriastradh if (!adev->vce.srbm_soft_reset)
7801571a7a1Sriastradh return 0;
7811571a7a1Sriastradh
7821571a7a1Sriastradh mdelay(5);
7831571a7a1Sriastradh
7841571a7a1Sriastradh return vce_v4_0_suspend(adev);
7851571a7a1Sriastradh }
7861571a7a1Sriastradh
7871571a7a1Sriastradh
7881571a7a1Sriastradh static int vce_v4_0_post_soft_reset(void *handle)
7891571a7a1Sriastradh {
7901571a7a1Sriastradh struct amdgpu_device *adev = (struct amdgpu_device *)handle;
7911571a7a1Sriastradh
7921571a7a1Sriastradh if (!adev->vce.srbm_soft_reset)
7931571a7a1Sriastradh return 0;
7941571a7a1Sriastradh
7951571a7a1Sriastradh mdelay(5);
7961571a7a1Sriastradh
7971571a7a1Sriastradh return vce_v4_0_resume(adev);
7981571a7a1Sriastradh }
7991571a7a1Sriastradh
8001571a7a1Sriastradh static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
8011571a7a1Sriastradh {
8021571a7a1Sriastradh u32 tmp, data;
8031571a7a1Sriastradh
8041571a7a1Sriastradh tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL));
8051571a7a1Sriastradh if (override)
8061571a7a1Sriastradh data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
8071571a7a1Sriastradh else
8081571a7a1Sriastradh data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
8091571a7a1Sriastradh
8101571a7a1Sriastradh if (tmp != data)
8111571a7a1Sriastradh WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data);
8121571a7a1Sriastradh }
8131571a7a1Sriastradh
8141571a7a1Sriastradh static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
8151571a7a1Sriastradh bool gated)
8161571a7a1Sriastradh {
8171571a7a1Sriastradh u32 data;
8181571a7a1Sriastradh
8191571a7a1Sriastradh /* Set Override to disable Clock Gating */
8201571a7a1Sriastradh vce_v4_0_override_vce_clock_gating(adev, true);
8211571a7a1Sriastradh
8221571a7a1Sriastradh /* This function enables MGCG which is controlled by firmware.
8231571a7a1Sriastradh With the clocks in the gated state the core is still
8241571a7a1Sriastradh accessible but the firmware will throttle the clocks on the
8251571a7a1Sriastradh fly as necessary.
8261571a7a1Sriastradh */
8271571a7a1Sriastradh if (gated) {
8281571a7a1Sriastradh data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
8291571a7a1Sriastradh data |= 0x1ff;
8301571a7a1Sriastradh data &= ~0xef0000;
8311571a7a1Sriastradh WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
8321571a7a1Sriastradh
8331571a7a1Sriastradh data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
8341571a7a1Sriastradh data |= 0x3ff000;
8351571a7a1Sriastradh data &= ~0xffc00000;
8361571a7a1Sriastradh WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
8371571a7a1Sriastradh
8381571a7a1Sriastradh data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
8391571a7a1Sriastradh data |= 0x2;
8401571a7a1Sriastradh data &= ~0x00010000;
8411571a7a1Sriastradh WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
8421571a7a1Sriastradh
8431571a7a1Sriastradh data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
8441571a7a1Sriastradh data |= 0x37f;
8451571a7a1Sriastradh WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
8461571a7a1Sriastradh
8471571a7a1Sriastradh data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
8481571a7a1Sriastradh data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
8491571a7a1Sriastradh VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
8501571a7a1Sriastradh VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK |
8511571a7a1Sriastradh 0x8;
8521571a7a1Sriastradh WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
8531571a7a1Sriastradh } else {
8541571a7a1Sriastradh data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
8551571a7a1Sriastradh data &= ~0x80010;
8561571a7a1Sriastradh data |= 0xe70008;
8571571a7a1Sriastradh WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
8581571a7a1Sriastradh
8591571a7a1Sriastradh data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
8601571a7a1Sriastradh data |= 0xffc00000;
8611571a7a1Sriastradh WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
8621571a7a1Sriastradh
8631571a7a1Sriastradh data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
8641571a7a1Sriastradh data |= 0x10000;
8651571a7a1Sriastradh WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
8661571a7a1Sriastradh
8671571a7a1Sriastradh data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
8681571a7a1Sriastradh data &= ~0xffc00000;
8691571a7a1Sriastradh WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
8701571a7a1Sriastradh
8711571a7a1Sriastradh data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
8721571a7a1Sriastradh data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
8731571a7a1Sriastradh VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
8741571a7a1Sriastradh VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK |
8751571a7a1Sriastradh 0x8);
8761571a7a1Sriastradh WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
8771571a7a1Sriastradh }
8781571a7a1Sriastradh vce_v4_0_override_vce_clock_gating(adev, false);
8791571a7a1Sriastradh }
8801571a7a1Sriastradh
8811571a7a1Sriastradh static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
8821571a7a1Sriastradh {
8831571a7a1Sriastradh u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
8841571a7a1Sriastradh
8851571a7a1Sriastradh if (enable)
8861571a7a1Sriastradh tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
8871571a7a1Sriastradh else
8881571a7a1Sriastradh tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
8891571a7a1Sriastradh
8901571a7a1Sriastradh WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
8911571a7a1Sriastradh }
8921571a7a1Sriastradh
8931571a7a1Sriastradh static int vce_v4_0_set_clockgating_state(void *handle,
8941571a7a1Sriastradh enum amd_clockgating_state state)
8951571a7a1Sriastradh {
8961571a7a1Sriastradh struct amdgpu_device *adev = (struct amdgpu_device *)handle;
8971571a7a1Sriastradh bool enable = (state == AMD_CG_STATE_GATE);
8981571a7a1Sriastradh int i;
8991571a7a1Sriastradh
9001571a7a1Sriastradh if ((adev->asic_type == CHIP_POLARIS10) ||
9011571a7a1Sriastradh (adev->asic_type == CHIP_TONGA) ||
9021571a7a1Sriastradh (adev->asic_type == CHIP_FIJI))
9031571a7a1Sriastradh vce_v4_0_set_bypass_mode(adev, enable);
9041571a7a1Sriastradh
9051571a7a1Sriastradh if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
9061571a7a1Sriastradh return 0;
9071571a7a1Sriastradh
9081571a7a1Sriastradh mutex_lock(&adev->grbm_idx_mutex);
9091571a7a1Sriastradh for (i = 0; i < 2; i++) {
9101571a7a1Sriastradh /* Program VCE Instance 0 or 1 if not harvested */
9111571a7a1Sriastradh if (adev->vce.harvest_config & (1 << i))
9121571a7a1Sriastradh continue;
9131571a7a1Sriastradh
9141571a7a1Sriastradh WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i);
9151571a7a1Sriastradh
9161571a7a1Sriastradh if (enable) {
9171571a7a1Sriastradh /* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
9181571a7a1Sriastradh uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A);
9191571a7a1Sriastradh data &= ~(0xf | 0xff0);
9201571a7a1Sriastradh data |= ((0x0 << 0) | (0x04 << 4));
9211571a7a1Sriastradh WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data);
9221571a7a1Sriastradh
9231571a7a1Sriastradh /* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
9241571a7a1Sriastradh data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING);
9251571a7a1Sriastradh data &= ~(0xf | 0xff0);
9261571a7a1Sriastradh data |= ((0x0 << 0) | (0x04 << 4));
9271571a7a1Sriastradh WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data);
9281571a7a1Sriastradh }
9291571a7a1Sriastradh
9301571a7a1Sriastradh vce_v4_0_set_vce_sw_clock_gating(adev, enable);
9311571a7a1Sriastradh }
9321571a7a1Sriastradh
9331571a7a1Sriastradh WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
9341571a7a1Sriastradh mutex_unlock(&adev->grbm_idx_mutex);
9351571a7a1Sriastradh
9361571a7a1Sriastradh return 0;
9371571a7a1Sriastradh }
9381571a7a1Sriastradh #endif
9391571a7a1Sriastradh
vce_v4_0_set_powergating_state(void * handle,enum amd_powergating_state state)9401571a7a1Sriastradh static int vce_v4_0_set_powergating_state(void *handle,
9411571a7a1Sriastradh enum amd_powergating_state state)
9421571a7a1Sriastradh {
9431571a7a1Sriastradh /* This doesn't actually powergate the VCE block.
9441571a7a1Sriastradh * That's done in the dpm code via the SMC. This
9451571a7a1Sriastradh * just re-inits the block as necessary. The actual
9461571a7a1Sriastradh * gating still happens in the dpm code. We should
9471571a7a1Sriastradh * revisit this when there is a cleaner line between
9481571a7a1Sriastradh * the smc and the hw blocks
9491571a7a1Sriastradh */
9501571a7a1Sriastradh struct amdgpu_device *adev = (struct amdgpu_device *)handle;
9511571a7a1Sriastradh
9521571a7a1Sriastradh if (state == AMD_PG_STATE_GATE)
9531571a7a1Sriastradh return vce_v4_0_stop(adev);
9541571a7a1Sriastradh else
9551571a7a1Sriastradh return vce_v4_0_start(adev);
9561571a7a1Sriastradh }
9571571a7a1Sriastradh
vce_v4_0_ring_emit_ib(struct amdgpu_ring * ring,struct amdgpu_job * job,struct amdgpu_ib * ib,uint32_t flags)9581571a7a1Sriastradh static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job,
9591571a7a1Sriastradh struct amdgpu_ib *ib, uint32_t flags)
9601571a7a1Sriastradh {
9611571a7a1Sriastradh unsigned vmid = AMDGPU_JOB_GET_VMID(job);
9621571a7a1Sriastradh
9631571a7a1Sriastradh amdgpu_ring_write(ring, VCE_CMD_IB_VM);
9641571a7a1Sriastradh amdgpu_ring_write(ring, vmid);
9651571a7a1Sriastradh amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
9661571a7a1Sriastradh amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
9671571a7a1Sriastradh amdgpu_ring_write(ring, ib->length_dw);
9681571a7a1Sriastradh }
9691571a7a1Sriastradh
vce_v4_0_ring_emit_fence(struct amdgpu_ring * ring,u64 addr,u64 seq,unsigned flags)9701571a7a1Sriastradh static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
9711571a7a1Sriastradh u64 seq, unsigned flags)
9721571a7a1Sriastradh {
9731571a7a1Sriastradh WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
9741571a7a1Sriastradh
9751571a7a1Sriastradh amdgpu_ring_write(ring, VCE_CMD_FENCE);
9761571a7a1Sriastradh amdgpu_ring_write(ring, addr);
9771571a7a1Sriastradh amdgpu_ring_write(ring, upper_32_bits(addr));
9781571a7a1Sriastradh amdgpu_ring_write(ring, seq);
9791571a7a1Sriastradh amdgpu_ring_write(ring, VCE_CMD_TRAP);
9801571a7a1Sriastradh }
9811571a7a1Sriastradh
vce_v4_0_ring_insert_end(struct amdgpu_ring * ring)9821571a7a1Sriastradh static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring)
9831571a7a1Sriastradh {
9841571a7a1Sriastradh amdgpu_ring_write(ring, VCE_CMD_END);
9851571a7a1Sriastradh }
9861571a7a1Sriastradh
vce_v4_0_emit_reg_wait(struct amdgpu_ring * ring,uint32_t reg,uint32_t val,uint32_t mask)9871571a7a1Sriastradh static void vce_v4_0_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
9881571a7a1Sriastradh uint32_t val, uint32_t mask)
9891571a7a1Sriastradh {
9901571a7a1Sriastradh amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
9911571a7a1Sriastradh amdgpu_ring_write(ring, reg << 2);
9921571a7a1Sriastradh amdgpu_ring_write(ring, mask);
9931571a7a1Sriastradh amdgpu_ring_write(ring, val);
9941571a7a1Sriastradh }
9951571a7a1Sriastradh
vce_v4_0_emit_vm_flush(struct amdgpu_ring * ring,unsigned int vmid,uint64_t pd_addr)9961571a7a1Sriastradh static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
9971571a7a1Sriastradh unsigned int vmid, uint64_t pd_addr)
9981571a7a1Sriastradh {
9991571a7a1Sriastradh struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
10001571a7a1Sriastradh
10011571a7a1Sriastradh pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
10021571a7a1Sriastradh
10031571a7a1Sriastradh /* wait for reg writes */
10041571a7a1Sriastradh vce_v4_0_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2,
10051571a7a1Sriastradh lower_32_bits(pd_addr), 0xffffffff);
10061571a7a1Sriastradh }
10071571a7a1Sriastradh
vce_v4_0_emit_wreg(struct amdgpu_ring * ring,uint32_t reg,uint32_t val)10081571a7a1Sriastradh static void vce_v4_0_emit_wreg(struct amdgpu_ring *ring,
10091571a7a1Sriastradh uint32_t reg, uint32_t val)
10101571a7a1Sriastradh {
10111571a7a1Sriastradh amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
10121571a7a1Sriastradh amdgpu_ring_write(ring, reg << 2);
10131571a7a1Sriastradh amdgpu_ring_write(ring, val);
10141571a7a1Sriastradh }
10151571a7a1Sriastradh
vce_v4_0_set_interrupt_state(struct amdgpu_device * adev,struct amdgpu_irq_src * source,unsigned type,enum amdgpu_interrupt_state state)10161571a7a1Sriastradh static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev,
10171571a7a1Sriastradh struct amdgpu_irq_src *source,
10181571a7a1Sriastradh unsigned type,
10191571a7a1Sriastradh enum amdgpu_interrupt_state state)
10201571a7a1Sriastradh {
10211571a7a1Sriastradh uint32_t val = 0;
10221571a7a1Sriastradh
10231571a7a1Sriastradh if (!amdgpu_sriov_vf(adev)) {
10241571a7a1Sriastradh if (state == AMDGPU_IRQ_STATE_ENABLE)
10251571a7a1Sriastradh val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
10261571a7a1Sriastradh
10271571a7a1Sriastradh WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val,
10281571a7a1Sriastradh ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
10291571a7a1Sriastradh }
10301571a7a1Sriastradh return 0;
10311571a7a1Sriastradh }
10321571a7a1Sriastradh
vce_v4_0_process_interrupt(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)10331571a7a1Sriastradh static int vce_v4_0_process_interrupt(struct amdgpu_device *adev,
10341571a7a1Sriastradh struct amdgpu_irq_src *source,
10351571a7a1Sriastradh struct amdgpu_iv_entry *entry)
10361571a7a1Sriastradh {
10371571a7a1Sriastradh DRM_DEBUG("IH: VCE\n");
10381571a7a1Sriastradh
10391571a7a1Sriastradh switch (entry->src_data[0]) {
10401571a7a1Sriastradh case 0:
10411571a7a1Sriastradh case 1:
10421571a7a1Sriastradh case 2:
10431571a7a1Sriastradh amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
10441571a7a1Sriastradh break;
10451571a7a1Sriastradh default:
10461571a7a1Sriastradh DRM_ERROR("Unhandled interrupt: %d %d\n",
10471571a7a1Sriastradh entry->src_id, entry->src_data[0]);
10481571a7a1Sriastradh break;
10491571a7a1Sriastradh }
10501571a7a1Sriastradh
10511571a7a1Sriastradh return 0;
10521571a7a1Sriastradh }
10531571a7a1Sriastradh
10541571a7a1Sriastradh const struct amd_ip_funcs vce_v4_0_ip_funcs = {
10551571a7a1Sriastradh .name = "vce_v4_0",
10561571a7a1Sriastradh .early_init = vce_v4_0_early_init,
10571571a7a1Sriastradh .late_init = NULL,
10581571a7a1Sriastradh .sw_init = vce_v4_0_sw_init,
10591571a7a1Sriastradh .sw_fini = vce_v4_0_sw_fini,
10601571a7a1Sriastradh .hw_init = vce_v4_0_hw_init,
10611571a7a1Sriastradh .hw_fini = vce_v4_0_hw_fini,
10621571a7a1Sriastradh .suspend = vce_v4_0_suspend,
10631571a7a1Sriastradh .resume = vce_v4_0_resume,
10641571a7a1Sriastradh .is_idle = NULL /* vce_v4_0_is_idle */,
10651571a7a1Sriastradh .wait_for_idle = NULL /* vce_v4_0_wait_for_idle */,
10661571a7a1Sriastradh .check_soft_reset = NULL /* vce_v4_0_check_soft_reset */,
10671571a7a1Sriastradh .pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */,
10681571a7a1Sriastradh .soft_reset = NULL /* vce_v4_0_soft_reset */,
10691571a7a1Sriastradh .post_soft_reset = NULL /* vce_v4_0_post_soft_reset */,
10701571a7a1Sriastradh .set_clockgating_state = vce_v4_0_set_clockgating_state,
10711571a7a1Sriastradh .set_powergating_state = vce_v4_0_set_powergating_state,
10721571a7a1Sriastradh };
10731571a7a1Sriastradh
10741571a7a1Sriastradh static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
10751571a7a1Sriastradh .type = AMDGPU_RING_TYPE_VCE,
10761571a7a1Sriastradh .align_mask = 0x3f,
10771571a7a1Sriastradh .nop = VCE_CMD_NO_OP,
10781571a7a1Sriastradh .support_64bit_ptrs = false,
10791571a7a1Sriastradh .no_user_fence = true,
10801571a7a1Sriastradh .vmhub = AMDGPU_MMHUB_0,
10811571a7a1Sriastradh .get_rptr = vce_v4_0_ring_get_rptr,
10821571a7a1Sriastradh .get_wptr = vce_v4_0_ring_get_wptr,
10831571a7a1Sriastradh .set_wptr = vce_v4_0_ring_set_wptr,
10841571a7a1Sriastradh .parse_cs = amdgpu_vce_ring_parse_cs_vm,
10851571a7a1Sriastradh .emit_frame_size =
10861571a7a1Sriastradh SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
10871571a7a1Sriastradh SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
10881571a7a1Sriastradh 4 + /* vce_v4_0_emit_vm_flush */
10891571a7a1Sriastradh 5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */
10901571a7a1Sriastradh 1, /* vce_v4_0_ring_insert_end */
10911571a7a1Sriastradh .emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */
10921571a7a1Sriastradh .emit_ib = vce_v4_0_ring_emit_ib,
10931571a7a1Sriastradh .emit_vm_flush = vce_v4_0_emit_vm_flush,
10941571a7a1Sriastradh .emit_fence = vce_v4_0_ring_emit_fence,
10951571a7a1Sriastradh .test_ring = amdgpu_vce_ring_test_ring,
10961571a7a1Sriastradh .test_ib = amdgpu_vce_ring_test_ib,
10971571a7a1Sriastradh .insert_nop = amdgpu_ring_insert_nop,
10981571a7a1Sriastradh .insert_end = vce_v4_0_ring_insert_end,
10991571a7a1Sriastradh .pad_ib = amdgpu_ring_generic_pad_ib,
11001571a7a1Sriastradh .begin_use = amdgpu_vce_ring_begin_use,
11011571a7a1Sriastradh .end_use = amdgpu_vce_ring_end_use,
11021571a7a1Sriastradh .emit_wreg = vce_v4_0_emit_wreg,
11031571a7a1Sriastradh .emit_reg_wait = vce_v4_0_emit_reg_wait,
11041571a7a1Sriastradh .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
11051571a7a1Sriastradh };
11061571a7a1Sriastradh
vce_v4_0_set_ring_funcs(struct amdgpu_device * adev)11071571a7a1Sriastradh static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev)
11081571a7a1Sriastradh {
11091571a7a1Sriastradh int i;
11101571a7a1Sriastradh
11111571a7a1Sriastradh for (i = 0; i < adev->vce.num_rings; i++) {
11121571a7a1Sriastradh adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs;
11131571a7a1Sriastradh adev->vce.ring[i].me = i;
11141571a7a1Sriastradh }
11151571a7a1Sriastradh DRM_INFO("VCE enabled in VM mode\n");
11161571a7a1Sriastradh }
11171571a7a1Sriastradh
11181571a7a1Sriastradh static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = {
11191571a7a1Sriastradh .set = vce_v4_0_set_interrupt_state,
11201571a7a1Sriastradh .process = vce_v4_0_process_interrupt,
11211571a7a1Sriastradh };
11221571a7a1Sriastradh
vce_v4_0_set_irq_funcs(struct amdgpu_device * adev)11231571a7a1Sriastradh static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev)
11241571a7a1Sriastradh {
11251571a7a1Sriastradh adev->vce.irq.num_types = 1;
11261571a7a1Sriastradh adev->vce.irq.funcs = &vce_v4_0_irq_funcs;
11271571a7a1Sriastradh };
11281571a7a1Sriastradh
11291571a7a1Sriastradh const struct amdgpu_ip_block_version vce_v4_0_ip_block =
11301571a7a1Sriastradh {
11311571a7a1Sriastradh .type = AMD_IP_BLOCK_TYPE_VCE,
11321571a7a1Sriastradh .major = 4,
11331571a7a1Sriastradh .minor = 0,
11341571a7a1Sriastradh .rev = 0,
11351571a7a1Sriastradh .funcs = &vce_v4_0_ip_funcs,
11361571a7a1Sriastradh };
1137