1 /* $NetBSD: amdgpu_vce_v4_0.c,v 1.3 2021/12/19 12:21:29 riastradh Exp $ */
2
3 /*
4 * Copyright 2016 Advanced Micro Devices, Inc.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * The above copyright notice and this permission notice (including the
24 * next paragraph) shall be included in all copies or substantial portions
25 * of the Software.
26 *
27 */
28
29 #include <sys/cdefs.h>
30 __KERNEL_RCSID(0, "$NetBSD: amdgpu_vce_v4_0.c,v 1.3 2021/12/19 12:21:29 riastradh Exp $");
31
32 #include <linux/firmware.h>
33
34 #include "amdgpu.h"
35 #include "amdgpu_vce.h"
36 #include "soc15.h"
37 #include "soc15d.h"
38 #include "soc15_common.h"
39 #include "mmsch_v1_0.h"
40
41 #include "vce/vce_4_0_offset.h"
42 #include "vce/vce_4_0_default.h"
43 #include "vce/vce_4_0_sh_mask.h"
44 #include "mmhub/mmhub_1_0_offset.h"
45 #include "mmhub/mmhub_1_0_sh_mask.h"
46
47 #include "ivsrcid/vce/irqsrcs_vce_4_0.h"
48
49 #include <linux/nbsd-namespace.h>
50
51 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK 0x02
52
53 #define VCE_V4_0_FW_SIZE (384 * 1024)
54 #define VCE_V4_0_STACK_SIZE (64 * 1024)
55 #define VCE_V4_0_DATA_SIZE ((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
56
57 static void vce_v4_0_mc_resume(struct amdgpu_device *adev);
58 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev);
59 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev);
60
61 /**
62 * vce_v4_0_ring_get_rptr - get read pointer
63 *
64 * @ring: amdgpu_ring pointer
65 *
66 * Returns the current hardware read pointer
67 */
vce_v4_0_ring_get_rptr(struct amdgpu_ring * ring)68 static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
69 {
70 struct amdgpu_device *adev = ring->adev;
71
72 if (ring->me == 0)
73 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR));
74 else if (ring->me == 1)
75 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2));
76 else
77 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3));
78 }
79
80 /**
81 * vce_v4_0_ring_get_wptr - get write pointer
82 *
83 * @ring: amdgpu_ring pointer
84 *
85 * Returns the current hardware write pointer
86 */
vce_v4_0_ring_get_wptr(struct amdgpu_ring * ring)87 static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
88 {
89 struct amdgpu_device *adev = ring->adev;
90
91 if (ring->use_doorbell)
92 return adev->wb.wb[ring->wptr_offs];
93
94 if (ring->me == 0)
95 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR));
96 else if (ring->me == 1)
97 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2));
98 else
99 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3));
100 }
101
102 /**
103 * vce_v4_0_ring_set_wptr - set write pointer
104 *
105 * @ring: amdgpu_ring pointer
106 *
107 * Commits the write pointer to the hardware
108 */
vce_v4_0_ring_set_wptr(struct amdgpu_ring * ring)109 static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
110 {
111 struct amdgpu_device *adev = ring->adev;
112
113 if (ring->use_doorbell) {
114 /* XXX check if swapping is necessary on BE */
115 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
116 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
117 return;
118 }
119
120 if (ring->me == 0)
121 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR),
122 lower_32_bits(ring->wptr));
123 else if (ring->me == 1)
124 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2),
125 lower_32_bits(ring->wptr));
126 else
127 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3),
128 lower_32_bits(ring->wptr));
129 }
130
vce_v4_0_firmware_loaded(struct amdgpu_device * adev)131 static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev)
132 {
133 int i, j;
134
135 for (i = 0; i < 10; ++i) {
136 for (j = 0; j < 100; ++j) {
137 uint32_t status =
138 RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS));
139
140 if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
141 return 0;
142 mdelay(10);
143 }
144
145 DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
146 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
147 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
148 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
149 mdelay(10);
150 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
151 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
152 mdelay(10);
153
154 }
155
156 return -ETIMEDOUT;
157 }
158
vce_v4_0_mmsch_start(struct amdgpu_device * adev,struct amdgpu_mm_table * table)159 static int vce_v4_0_mmsch_start(struct amdgpu_device *adev,
160 struct amdgpu_mm_table *table)
161 {
162 uint32_t data = 0, loop;
163 uint64_t addr = table->gpu_addr;
164 struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr;
165 uint32_t size;
166
167 size = header->header_size + header->vce_table_size + header->uvd_table_size;
168
169 /* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */
170 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr));
171 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr));
172
173 /* 2, update vmid of descriptor */
174 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID));
175 data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK;
176 data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */
177 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data);
178
179 /* 3, notify mmsch about the size of this descriptor */
180 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size);
181
182 /* 4, set resp to zero */
183 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0);
184
185 WDOORBELL32(adev->vce.ring[0].doorbell_index, 0);
186 adev->wb.wb[adev->vce.ring[0].wptr_offs] = 0;
187 adev->vce.ring[0].wptr = 0;
188 adev->vce.ring[0].wptr_old = 0;
189
190 /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */
191 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001);
192
193 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
194 loop = 1000;
195 while ((data & 0x10000002) != 0x10000002) {
196 udelay(10);
197 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
198 loop--;
199 if (!loop)
200 break;
201 }
202
203 if (!loop) {
204 dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data);
205 return -EBUSY;
206 }
207
208 return 0;
209 }
210
vce_v4_0_sriov_start(struct amdgpu_device * adev)211 static int vce_v4_0_sriov_start(struct amdgpu_device *adev)
212 {
213 struct amdgpu_ring *ring;
214 uint32_t offset, size;
215 uint32_t table_size = 0;
216 struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } };
217 struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } };
218 struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } };
219 struct mmsch_v1_0_cmd_end end = { { 0 } };
220 uint32_t *init_table = adev->virt.mm_table.cpu_addr;
221 struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table;
222
223 direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
224 direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
225 direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING;
226 end.cmd_header.command_type = MMSCH_COMMAND__END;
227
228 if (header->vce_table_offset == 0 && header->vce_table_size == 0) {
229 header->version = MMSCH_VERSION;
230 header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2;
231
232 if (header->uvd_table_offset == 0 && header->uvd_table_size == 0)
233 header->vce_table_offset = header->header_size;
234 else
235 header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset;
236
237 init_table += header->vce_table_offset;
238
239 ring = &adev->vce.ring[0];
240 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO),
241 lower_32_bits(ring->gpu_addr));
242 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI),
243 upper_32_bits(ring->gpu_addr));
244 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE),
245 ring->ring_size / 4);
246
247 /* BEGING OF MC_RESUME */
248 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000);
249 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0);
250 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
251 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
252 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
253
254 offset = AMDGPU_VCE_FIRMWARE_OFFSET;
255 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
256 uint32_t low = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo;
257 uint32_t hi = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi;
258 uint64_t tmr_mc_addr = (uint64_t)(hi) << 32 | low;
259
260 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
261 mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), tmr_mc_addr >> 8);
262 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
263 mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
264 (tmr_mc_addr >> 40) & 0xff);
265 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0);
266 } else {
267 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
268 mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
269 adev->vce.gpu_addr >> 8);
270 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
271 mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
272 (adev->vce.gpu_addr >> 40) & 0xff);
273 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0),
274 offset & ~0x0f000000);
275
276 }
277 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
278 mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
279 adev->vce.gpu_addr >> 8);
280 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
281 mmVCE_LMI_VCPU_CACHE_64BIT_BAR1),
282 (adev->vce.gpu_addr >> 40) & 0xff);
283 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
284 mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
285 adev->vce.gpu_addr >> 8);
286 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
287 mmVCE_LMI_VCPU_CACHE_64BIT_BAR2),
288 (adev->vce.gpu_addr >> 40) & 0xff);
289
290 size = VCE_V4_0_FW_SIZE;
291 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
292
293 offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
294 size = VCE_V4_0_STACK_SIZE;
295 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1),
296 (offset & ~0x0f000000) | (1 << 24));
297 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
298
299 offset += size;
300 size = VCE_V4_0_DATA_SIZE;
301 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2),
302 (offset & ~0x0f000000) | (2 << 24));
303 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
304
305 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0);
306 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
307 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
308 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
309
310 /* end of MC_RESUME */
311 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
312 VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK);
313 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL),
314 ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK);
315 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
316 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0);
317
318 MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
319 VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK,
320 VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK);
321
322 /* clear BUSY flag */
323 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
324 ~VCE_STATUS__JOB_BUSY_MASK, 0);
325
326 /* add end packet */
327 memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
328 table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
329 header->vce_table_size = table_size;
330 }
331
332 return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table);
333 }
334
335 /**
336 * vce_v4_0_start - start VCE block
337 *
338 * @adev: amdgpu_device pointer
339 *
340 * Setup and start the VCE block
341 */
vce_v4_0_start(struct amdgpu_device * adev)342 static int vce_v4_0_start(struct amdgpu_device *adev)
343 {
344 struct amdgpu_ring *ring;
345 int r;
346
347 ring = &adev->vce.ring[0];
348
349 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr));
350 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr));
351 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr);
352 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
353 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4);
354
355 ring = &adev->vce.ring[1];
356
357 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr));
358 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr));
359 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr);
360 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr));
361 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4);
362
363 ring = &adev->vce.ring[2];
364
365 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr));
366 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr));
367 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr);
368 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr));
369 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4);
370
371 vce_v4_0_mc_resume(adev);
372 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK,
373 ~VCE_STATUS__JOB_BUSY_MASK);
374
375 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001);
376
377 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
378 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
379 mdelay(100);
380
381 r = vce_v4_0_firmware_loaded(adev);
382
383 /* clear BUSY flag */
384 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
385
386 if (r) {
387 DRM_ERROR("VCE not responding, giving up!!!\n");
388 return r;
389 }
390
391 return 0;
392 }
393
vce_v4_0_stop(struct amdgpu_device * adev)394 static int vce_v4_0_stop(struct amdgpu_device *adev)
395 {
396
397 /* Disable VCPU */
398 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001);
399
400 /* hold on ECPU */
401 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
402 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
403 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
404
405 /* clear VCE_STATUS */
406 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0);
407
408 /* Set Clock-Gating off */
409 /* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
410 vce_v4_0_set_vce_sw_clock_gating(adev, false);
411 */
412
413 return 0;
414 }
415
vce_v4_0_early_init(void * handle)416 static int vce_v4_0_early_init(void *handle)
417 {
418 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
419
420 if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */
421 adev->vce.num_rings = 1;
422 else
423 adev->vce.num_rings = 3;
424
425 vce_v4_0_set_ring_funcs(adev);
426 vce_v4_0_set_irq_funcs(adev);
427
428 return 0;
429 }
430
vce_v4_0_sw_init(void * handle)431 static int vce_v4_0_sw_init(void *handle)
432 {
433 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
434 struct amdgpu_ring *ring;
435
436 unsigned size;
437 int r, i;
438
439 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCE0, 167, &adev->vce.irq);
440 if (r)
441 return r;
442
443 size = VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE;
444 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
445 size += VCE_V4_0_FW_SIZE;
446
447 r = amdgpu_vce_sw_init(adev, size);
448 if (r)
449 return r;
450
451 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
452 const struct common_firmware_header *hdr;
453 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
454
455 adev->vce.saved_bo = kvmalloc(size, GFP_KERNEL);
456 if (!adev->vce.saved_bo)
457 return -ENOMEM;
458
459 hdr = (const struct common_firmware_header *)adev->vce.fw->data;
460 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE;
461 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw;
462 adev->firmware.fw_size +=
463 ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
464 DRM_INFO("PSP loading VCE firmware\n");
465 } else {
466 r = amdgpu_vce_resume(adev);
467 if (r)
468 return r;
469 }
470
471 for (i = 0; i < adev->vce.num_rings; i++) {
472 ring = &adev->vce.ring[i];
473 snprintf(ring->name, sizeof(ring->name), "vce%d", i);
474 if (amdgpu_sriov_vf(adev)) {
475 /* DOORBELL only works under SRIOV */
476 ring->use_doorbell = true;
477
478 /* currently only use the first encoding ring for sriov,
479 * so set unused location for other unused rings.
480 */
481 if (i == 0)
482 ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring0_1 * 2;
483 else
484 ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring2_3 * 2 + 1;
485 }
486 r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0);
487 if (r)
488 return r;
489 }
490
491
492 r = amdgpu_vce_entity_init(adev);
493 if (r)
494 return r;
495
496 r = amdgpu_virt_alloc_mm_table(adev);
497 if (r)
498 return r;
499
500 return r;
501 }
502
vce_v4_0_sw_fini(void * handle)503 static int vce_v4_0_sw_fini(void *handle)
504 {
505 int r;
506 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
507
508 /* free MM table */
509 amdgpu_virt_free_mm_table(adev);
510
511 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
512 kvfree(adev->vce.saved_bo);
513 adev->vce.saved_bo = NULL;
514 }
515
516 r = amdgpu_vce_suspend(adev);
517 if (r)
518 return r;
519
520 return amdgpu_vce_sw_fini(adev);
521 }
522
vce_v4_0_hw_init(void * handle)523 static int vce_v4_0_hw_init(void *handle)
524 {
525 int r, i;
526 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
527
528 if (amdgpu_sriov_vf(adev))
529 r = vce_v4_0_sriov_start(adev);
530 else
531 r = vce_v4_0_start(adev);
532 if (r)
533 return r;
534
535 for (i = 0; i < adev->vce.num_rings; i++) {
536 r = amdgpu_ring_test_helper(&adev->vce.ring[i]);
537 if (r)
538 return r;
539 }
540
541 DRM_INFO("VCE initialized successfully.\n");
542
543 return 0;
544 }
545
vce_v4_0_hw_fini(void * handle)546 static int vce_v4_0_hw_fini(void *handle)
547 {
548 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
549 int i;
550
551 if (!amdgpu_sriov_vf(adev)) {
552 /* vce_v4_0_wait_for_idle(handle); */
553 vce_v4_0_stop(adev);
554 } else {
555 /* full access mode, so don't touch any VCE register */
556 DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
557 }
558
559 for (i = 0; i < adev->vce.num_rings; i++)
560 adev->vce.ring[i].sched.ready = false;
561
562 return 0;
563 }
564
vce_v4_0_suspend(void * handle)565 static int vce_v4_0_suspend(void *handle)
566 {
567 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
568 int r;
569
570 if (adev->vce.vcpu_bo == NULL)
571 return 0;
572
573 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
574 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
575 void *ptr = adev->vce.cpu_addr;
576
577 memcpy_fromio(adev->vce.saved_bo, ptr, size);
578 }
579
580 r = vce_v4_0_hw_fini(adev);
581 if (r)
582 return r;
583
584 return amdgpu_vce_suspend(adev);
585 }
586
vce_v4_0_resume(void * handle)587 static int vce_v4_0_resume(void *handle)
588 {
589 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
590 int r;
591
592 if (adev->vce.vcpu_bo == NULL)
593 return -EINVAL;
594
595 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
596 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
597 void *ptr = adev->vce.cpu_addr;
598
599 memcpy_toio(ptr, adev->vce.saved_bo, size);
600 } else {
601 r = amdgpu_vce_resume(adev);
602 if (r)
603 return r;
604 }
605
606 return vce_v4_0_hw_init(adev);
607 }
608
vce_v4_0_mc_resume(struct amdgpu_device * adev)609 static void vce_v4_0_mc_resume(struct amdgpu_device *adev)
610 {
611 uint32_t offset, size;
612 uint64_t tmr_mc_addr;
613
614 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16));
615 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000);
616 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F);
617 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF);
618
619 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000);
620 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1);
621 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
622 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
623 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
624
625 offset = AMDGPU_VCE_FIRMWARE_OFFSET;
626
627 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
628 tmr_mc_addr = (uint64_t)(adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi) << 32 |
629 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo;
630 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
631 (tmr_mc_addr >> 8));
632 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
633 (tmr_mc_addr >> 40) & 0xff);
634 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0);
635 } else {
636 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
637 (adev->vce.gpu_addr >> 8));
638 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
639 (adev->vce.gpu_addr >> 40) & 0xff);
640 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000);
641 }
642
643 size = VCE_V4_0_FW_SIZE;
644 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
645
646 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8));
647 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff);
648 offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
649 size = VCE_V4_0_STACK_SIZE;
650 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24));
651 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
652
653 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8));
654 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff);
655 offset += size;
656 size = VCE_V4_0_DATA_SIZE;
657 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24));
658 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
659
660 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100);
661 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
662 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
663 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
664 }
665
vce_v4_0_set_clockgating_state(void * handle,enum amd_clockgating_state state)666 static int vce_v4_0_set_clockgating_state(void *handle,
667 enum amd_clockgating_state state)
668 {
669 /* needed for driver unload*/
670 return 0;
671 }
672
673 #if 0
674 static bool vce_v4_0_is_idle(void *handle)
675 {
676 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
677 u32 mask = 0;
678
679 mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
680 mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
681
682 return !(RREG32(mmSRBM_STATUS2) & mask);
683 }
684
685 static int vce_v4_0_wait_for_idle(void *handle)
686 {
687 unsigned i;
688 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
689
690 for (i = 0; i < adev->usec_timeout; i++)
691 if (vce_v4_0_is_idle(handle))
692 return 0;
693
694 return -ETIMEDOUT;
695 }
696
697 #define VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK 0x00000008L /* AUTO_BUSY */
698 #define VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK 0x00000010L /* RB0_BUSY */
699 #define VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK 0x00000020L /* RB1_BUSY */
700 #define AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
701 VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
702
703 static bool vce_v4_0_check_soft_reset(void *handle)
704 {
705 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
706 u32 srbm_soft_reset = 0;
707
708 /* According to VCE team , we should use VCE_STATUS instead
709 * SRBM_STATUS.VCE_BUSY bit for busy status checking.
710 * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
711 * instance's registers are accessed
712 * (0 for 1st instance, 10 for 2nd instance).
713 *
714 *VCE_STATUS
715 *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 | |FW_LOADED|JOB |
716 *|----+----+-----------+----+----+----+----------+---------+----|
717 *|bit8|bit7| bit6 |bit5|bit4|bit3| bit2 | bit1 |bit0|
718 *
719 * VCE team suggest use bit 3--bit 6 for busy status check
720 */
721 mutex_lock(&adev->grbm_idx_mutex);
722 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
723 if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
724 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
725 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
726 }
727 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10);
728 if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
729 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
730 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
731 }
732 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
733 mutex_unlock(&adev->grbm_idx_mutex);
734
735 if (srbm_soft_reset) {
736 adev->vce.srbm_soft_reset = srbm_soft_reset;
737 return true;
738 } else {
739 adev->vce.srbm_soft_reset = 0;
740 return false;
741 }
742 }
743
744 static int vce_v4_0_soft_reset(void *handle)
745 {
746 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
747 u32 srbm_soft_reset;
748
749 if (!adev->vce.srbm_soft_reset)
750 return 0;
751 srbm_soft_reset = adev->vce.srbm_soft_reset;
752
753 if (srbm_soft_reset) {
754 u32 tmp;
755
756 tmp = RREG32(mmSRBM_SOFT_RESET);
757 tmp |= srbm_soft_reset;
758 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
759 WREG32(mmSRBM_SOFT_RESET, tmp);
760 tmp = RREG32(mmSRBM_SOFT_RESET);
761
762 udelay(50);
763
764 tmp &= ~srbm_soft_reset;
765 WREG32(mmSRBM_SOFT_RESET, tmp);
766 tmp = RREG32(mmSRBM_SOFT_RESET);
767
768 /* Wait a little for things to settle down */
769 udelay(50);
770 }
771
772 return 0;
773 }
774
775 static int vce_v4_0_pre_soft_reset(void *handle)
776 {
777 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
778
779 if (!adev->vce.srbm_soft_reset)
780 return 0;
781
782 mdelay(5);
783
784 return vce_v4_0_suspend(adev);
785 }
786
787
788 static int vce_v4_0_post_soft_reset(void *handle)
789 {
790 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
791
792 if (!adev->vce.srbm_soft_reset)
793 return 0;
794
795 mdelay(5);
796
797 return vce_v4_0_resume(adev);
798 }
799
800 static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
801 {
802 u32 tmp, data;
803
804 tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL));
805 if (override)
806 data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
807 else
808 data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
809
810 if (tmp != data)
811 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data);
812 }
813
814 static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
815 bool gated)
816 {
817 u32 data;
818
819 /* Set Override to disable Clock Gating */
820 vce_v4_0_override_vce_clock_gating(adev, true);
821
822 /* This function enables MGCG which is controlled by firmware.
823 With the clocks in the gated state the core is still
824 accessible but the firmware will throttle the clocks on the
825 fly as necessary.
826 */
827 if (gated) {
828 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
829 data |= 0x1ff;
830 data &= ~0xef0000;
831 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
832
833 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
834 data |= 0x3ff000;
835 data &= ~0xffc00000;
836 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
837
838 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
839 data |= 0x2;
840 data &= ~0x00010000;
841 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
842
843 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
844 data |= 0x37f;
845 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
846
847 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
848 data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
849 VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
850 VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK |
851 0x8;
852 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
853 } else {
854 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
855 data &= ~0x80010;
856 data |= 0xe70008;
857 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
858
859 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
860 data |= 0xffc00000;
861 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
862
863 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
864 data |= 0x10000;
865 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
866
867 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
868 data &= ~0xffc00000;
869 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
870
871 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
872 data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
873 VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
874 VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK |
875 0x8);
876 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
877 }
878 vce_v4_0_override_vce_clock_gating(adev, false);
879 }
880
881 static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
882 {
883 u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
884
885 if (enable)
886 tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
887 else
888 tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
889
890 WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
891 }
892
893 static int vce_v4_0_set_clockgating_state(void *handle,
894 enum amd_clockgating_state state)
895 {
896 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
897 bool enable = (state == AMD_CG_STATE_GATE);
898 int i;
899
900 if ((adev->asic_type == CHIP_POLARIS10) ||
901 (adev->asic_type == CHIP_TONGA) ||
902 (adev->asic_type == CHIP_FIJI))
903 vce_v4_0_set_bypass_mode(adev, enable);
904
905 if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
906 return 0;
907
908 mutex_lock(&adev->grbm_idx_mutex);
909 for (i = 0; i < 2; i++) {
910 /* Program VCE Instance 0 or 1 if not harvested */
911 if (adev->vce.harvest_config & (1 << i))
912 continue;
913
914 WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i);
915
916 if (enable) {
917 /* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
918 uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A);
919 data &= ~(0xf | 0xff0);
920 data |= ((0x0 << 0) | (0x04 << 4));
921 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data);
922
923 /* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
924 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING);
925 data &= ~(0xf | 0xff0);
926 data |= ((0x0 << 0) | (0x04 << 4));
927 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data);
928 }
929
930 vce_v4_0_set_vce_sw_clock_gating(adev, enable);
931 }
932
933 WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
934 mutex_unlock(&adev->grbm_idx_mutex);
935
936 return 0;
937 }
938 #endif
939
vce_v4_0_set_powergating_state(void * handle,enum amd_powergating_state state)940 static int vce_v4_0_set_powergating_state(void *handle,
941 enum amd_powergating_state state)
942 {
943 /* This doesn't actually powergate the VCE block.
944 * That's done in the dpm code via the SMC. This
945 * just re-inits the block as necessary. The actual
946 * gating still happens in the dpm code. We should
947 * revisit this when there is a cleaner line between
948 * the smc and the hw blocks
949 */
950 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
951
952 if (state == AMD_PG_STATE_GATE)
953 return vce_v4_0_stop(adev);
954 else
955 return vce_v4_0_start(adev);
956 }
957
vce_v4_0_ring_emit_ib(struct amdgpu_ring * ring,struct amdgpu_job * job,struct amdgpu_ib * ib,uint32_t flags)958 static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job,
959 struct amdgpu_ib *ib, uint32_t flags)
960 {
961 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
962
963 amdgpu_ring_write(ring, VCE_CMD_IB_VM);
964 amdgpu_ring_write(ring, vmid);
965 amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
966 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
967 amdgpu_ring_write(ring, ib->length_dw);
968 }
969
vce_v4_0_ring_emit_fence(struct amdgpu_ring * ring,u64 addr,u64 seq,unsigned flags)970 static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
971 u64 seq, unsigned flags)
972 {
973 WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
974
975 amdgpu_ring_write(ring, VCE_CMD_FENCE);
976 amdgpu_ring_write(ring, addr);
977 amdgpu_ring_write(ring, upper_32_bits(addr));
978 amdgpu_ring_write(ring, seq);
979 amdgpu_ring_write(ring, VCE_CMD_TRAP);
980 }
981
vce_v4_0_ring_insert_end(struct amdgpu_ring * ring)982 static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring)
983 {
984 amdgpu_ring_write(ring, VCE_CMD_END);
985 }
986
vce_v4_0_emit_reg_wait(struct amdgpu_ring * ring,uint32_t reg,uint32_t val,uint32_t mask)987 static void vce_v4_0_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
988 uint32_t val, uint32_t mask)
989 {
990 amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
991 amdgpu_ring_write(ring, reg << 2);
992 amdgpu_ring_write(ring, mask);
993 amdgpu_ring_write(ring, val);
994 }
995
vce_v4_0_emit_vm_flush(struct amdgpu_ring * ring,unsigned int vmid,uint64_t pd_addr)996 static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
997 unsigned int vmid, uint64_t pd_addr)
998 {
999 struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
1000
1001 pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
1002
1003 /* wait for reg writes */
1004 vce_v4_0_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2,
1005 lower_32_bits(pd_addr), 0xffffffff);
1006 }
1007
vce_v4_0_emit_wreg(struct amdgpu_ring * ring,uint32_t reg,uint32_t val)1008 static void vce_v4_0_emit_wreg(struct amdgpu_ring *ring,
1009 uint32_t reg, uint32_t val)
1010 {
1011 amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
1012 amdgpu_ring_write(ring, reg << 2);
1013 amdgpu_ring_write(ring, val);
1014 }
1015
vce_v4_0_set_interrupt_state(struct amdgpu_device * adev,struct amdgpu_irq_src * source,unsigned type,enum amdgpu_interrupt_state state)1016 static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev,
1017 struct amdgpu_irq_src *source,
1018 unsigned type,
1019 enum amdgpu_interrupt_state state)
1020 {
1021 uint32_t val = 0;
1022
1023 if (!amdgpu_sriov_vf(adev)) {
1024 if (state == AMDGPU_IRQ_STATE_ENABLE)
1025 val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
1026
1027 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val,
1028 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
1029 }
1030 return 0;
1031 }
1032
vce_v4_0_process_interrupt(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)1033 static int vce_v4_0_process_interrupt(struct amdgpu_device *adev,
1034 struct amdgpu_irq_src *source,
1035 struct amdgpu_iv_entry *entry)
1036 {
1037 DRM_DEBUG("IH: VCE\n");
1038
1039 switch (entry->src_data[0]) {
1040 case 0:
1041 case 1:
1042 case 2:
1043 amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
1044 break;
1045 default:
1046 DRM_ERROR("Unhandled interrupt: %d %d\n",
1047 entry->src_id, entry->src_data[0]);
1048 break;
1049 }
1050
1051 return 0;
1052 }
1053
1054 const struct amd_ip_funcs vce_v4_0_ip_funcs = {
1055 .name = "vce_v4_0",
1056 .early_init = vce_v4_0_early_init,
1057 .late_init = NULL,
1058 .sw_init = vce_v4_0_sw_init,
1059 .sw_fini = vce_v4_0_sw_fini,
1060 .hw_init = vce_v4_0_hw_init,
1061 .hw_fini = vce_v4_0_hw_fini,
1062 .suspend = vce_v4_0_suspend,
1063 .resume = vce_v4_0_resume,
1064 .is_idle = NULL /* vce_v4_0_is_idle */,
1065 .wait_for_idle = NULL /* vce_v4_0_wait_for_idle */,
1066 .check_soft_reset = NULL /* vce_v4_0_check_soft_reset */,
1067 .pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */,
1068 .soft_reset = NULL /* vce_v4_0_soft_reset */,
1069 .post_soft_reset = NULL /* vce_v4_0_post_soft_reset */,
1070 .set_clockgating_state = vce_v4_0_set_clockgating_state,
1071 .set_powergating_state = vce_v4_0_set_powergating_state,
1072 };
1073
1074 static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
1075 .type = AMDGPU_RING_TYPE_VCE,
1076 .align_mask = 0x3f,
1077 .nop = VCE_CMD_NO_OP,
1078 .support_64bit_ptrs = false,
1079 .no_user_fence = true,
1080 .vmhub = AMDGPU_MMHUB_0,
1081 .get_rptr = vce_v4_0_ring_get_rptr,
1082 .get_wptr = vce_v4_0_ring_get_wptr,
1083 .set_wptr = vce_v4_0_ring_set_wptr,
1084 .parse_cs = amdgpu_vce_ring_parse_cs_vm,
1085 .emit_frame_size =
1086 SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
1087 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
1088 4 + /* vce_v4_0_emit_vm_flush */
1089 5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */
1090 1, /* vce_v4_0_ring_insert_end */
1091 .emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */
1092 .emit_ib = vce_v4_0_ring_emit_ib,
1093 .emit_vm_flush = vce_v4_0_emit_vm_flush,
1094 .emit_fence = vce_v4_0_ring_emit_fence,
1095 .test_ring = amdgpu_vce_ring_test_ring,
1096 .test_ib = amdgpu_vce_ring_test_ib,
1097 .insert_nop = amdgpu_ring_insert_nop,
1098 .insert_end = vce_v4_0_ring_insert_end,
1099 .pad_ib = amdgpu_ring_generic_pad_ib,
1100 .begin_use = amdgpu_vce_ring_begin_use,
1101 .end_use = amdgpu_vce_ring_end_use,
1102 .emit_wreg = vce_v4_0_emit_wreg,
1103 .emit_reg_wait = vce_v4_0_emit_reg_wait,
1104 .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
1105 };
1106
vce_v4_0_set_ring_funcs(struct amdgpu_device * adev)1107 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev)
1108 {
1109 int i;
1110
1111 for (i = 0; i < adev->vce.num_rings; i++) {
1112 adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs;
1113 adev->vce.ring[i].me = i;
1114 }
1115 DRM_INFO("VCE enabled in VM mode\n");
1116 }
1117
1118 static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = {
1119 .set = vce_v4_0_set_interrupt_state,
1120 .process = vce_v4_0_process_interrupt,
1121 };
1122
vce_v4_0_set_irq_funcs(struct amdgpu_device * adev)1123 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev)
1124 {
1125 adev->vce.irq.num_types = 1;
1126 adev->vce.irq.funcs = &vce_v4_0_irq_funcs;
1127 };
1128
1129 const struct amdgpu_ip_block_version vce_v4_0_ip_block =
1130 {
1131 .type = AMD_IP_BLOCK_TYPE_VCE,
1132 .major = 4,
1133 .minor = 0,
1134 .rev = 0,
1135 .funcs = &vce_v4_0_ip_funcs,
1136 };
1137