1 /*
2 * Copyright 2016 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
20 *
21 * The above copyright notice and this permission notice (including the
22 * next paragraph) shall be included in all copies or substantial portions
23 * of the Software.
24 *
25 */
26
27 #include <linux/firmware.h>
28 #include <drm/drmP.h>
29 #include "amdgpu.h"
30 #include "amdgpu_vce.h"
31 #include "soc15.h"
32 #include "soc15d.h"
33 #include "soc15_common.h"
34 #include "mmsch_v1_0.h"
35
36 #include "vce/vce_4_0_offset.h"
37 #include "vce/vce_4_0_default.h"
38 #include "vce/vce_4_0_sh_mask.h"
39 #include "mmhub/mmhub_1_0_offset.h"
40 #include "mmhub/mmhub_1_0_sh_mask.h"
41
42 #include "ivsrcid/vce/irqsrcs_vce_4_0.h"
43
44 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK 0x02
45
46 #define VCE_V4_0_FW_SIZE (384 * 1024)
47 #define VCE_V4_0_STACK_SIZE (64 * 1024)
48 #define VCE_V4_0_DATA_SIZE ((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
49
50 static void vce_v4_0_mc_resume(struct amdgpu_device *adev);
51 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev);
52 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev);
53
54 /**
55 * vce_v4_0_ring_get_rptr - get read pointer
56 *
57 * @ring: amdgpu_ring pointer
58 *
59 * Returns the current hardware read pointer
60 */
vce_v4_0_ring_get_rptr(struct amdgpu_ring * ring)61 static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
62 {
63 struct amdgpu_device *adev = ring->adev;
64
65 if (ring->me == 0)
66 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR));
67 else if (ring->me == 1)
68 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2));
69 else
70 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3));
71 }
72
73 /**
74 * vce_v4_0_ring_get_wptr - get write pointer
75 *
76 * @ring: amdgpu_ring pointer
77 *
78 * Returns the current hardware write pointer
79 */
vce_v4_0_ring_get_wptr(struct amdgpu_ring * ring)80 static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
81 {
82 struct amdgpu_device *adev = ring->adev;
83
84 if (ring->use_doorbell)
85 return adev->wb.wb[ring->wptr_offs];
86
87 if (ring->me == 0)
88 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR));
89 else if (ring->me == 1)
90 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2));
91 else
92 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3));
93 }
94
95 /**
96 * vce_v4_0_ring_set_wptr - set write pointer
97 *
98 * @ring: amdgpu_ring pointer
99 *
100 * Commits the write pointer to the hardware
101 */
vce_v4_0_ring_set_wptr(struct amdgpu_ring * ring)102 static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
103 {
104 struct amdgpu_device *adev = ring->adev;
105
106 if (ring->use_doorbell) {
107 /* XXX check if swapping is necessary on BE */
108 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
109 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
110 return;
111 }
112
113 if (ring->me == 0)
114 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR),
115 lower_32_bits(ring->wptr));
116 else if (ring->me == 1)
117 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2),
118 lower_32_bits(ring->wptr));
119 else
120 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3),
121 lower_32_bits(ring->wptr));
122 }
123
vce_v4_0_firmware_loaded(struct amdgpu_device * adev)124 static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev)
125 {
126 int i, j;
127
128 for (i = 0; i < 10; ++i) {
129 for (j = 0; j < 100; ++j) {
130 uint32_t status =
131 RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS));
132
133 if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
134 return 0;
135 mdelay(10);
136 }
137
138 DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
139 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
140 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
141 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
142 mdelay(10);
143 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
144 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
145 mdelay(10);
146
147 }
148
149 return -ETIMEDOUT;
150 }
151
vce_v4_0_mmsch_start(struct amdgpu_device * adev,struct amdgpu_mm_table * table)152 static int vce_v4_0_mmsch_start(struct amdgpu_device *adev,
153 struct amdgpu_mm_table *table)
154 {
155 uint32_t data = 0, loop;
156 uint64_t addr = table->gpu_addr;
157 struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr;
158 uint32_t size;
159
160 size = header->header_size + header->vce_table_size + header->uvd_table_size;
161
162 /* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */
163 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr));
164 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr));
165
166 /* 2, update vmid of descriptor */
167 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID));
168 data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK;
169 data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */
170 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data);
171
172 /* 3, notify mmsch about the size of this descriptor */
173 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size);
174
175 /* 4, set resp to zero */
176 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0);
177
178 WDOORBELL32(adev->vce.ring[0].doorbell_index, 0);
179 adev->wb.wb[adev->vce.ring[0].wptr_offs] = 0;
180 adev->vce.ring[0].wptr = 0;
181 adev->vce.ring[0].wptr_old = 0;
182
183 /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */
184 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001);
185
186 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
187 loop = 1000;
188 while ((data & 0x10000002) != 0x10000002) {
189 udelay(10);
190 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
191 loop--;
192 if (!loop)
193 break;
194 }
195
196 if (!loop) {
197 dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data);
198 return -EBUSY;
199 }
200
201 return 0;
202 }
203
vce_v4_0_sriov_start(struct amdgpu_device * adev)204 static int vce_v4_0_sriov_start(struct amdgpu_device *adev)
205 {
206 struct amdgpu_ring *ring;
207 uint32_t offset, size;
208 uint32_t table_size = 0;
209 struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } };
210 struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } };
211 struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } };
212 struct mmsch_v1_0_cmd_end end = { { 0 } };
213 uint32_t *init_table = adev->virt.mm_table.cpu_addr;
214 struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table;
215
216 direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
217 direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
218 direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING;
219 end.cmd_header.command_type = MMSCH_COMMAND__END;
220
221 if (header->vce_table_offset == 0 && header->vce_table_size == 0) {
222 header->version = MMSCH_VERSION;
223 header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2;
224
225 if (header->uvd_table_offset == 0 && header->uvd_table_size == 0)
226 header->vce_table_offset = header->header_size;
227 else
228 header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset;
229
230 init_table += header->vce_table_offset;
231
232 ring = &adev->vce.ring[0];
233 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO),
234 lower_32_bits(ring->gpu_addr));
235 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI),
236 upper_32_bits(ring->gpu_addr));
237 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE),
238 ring->ring_size / 4);
239
240 /* BEGING OF MC_RESUME */
241 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000);
242 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0);
243 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
244 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
245 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
246
247 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
248 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
249 mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
250 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8);
251 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
252 mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
253 (adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 40) & 0xff);
254 } else {
255 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
256 mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
257 adev->vce.gpu_addr >> 8);
258 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
259 mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
260 (adev->vce.gpu_addr >> 40) & 0xff);
261 }
262 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
263 mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
264 adev->vce.gpu_addr >> 8);
265 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
266 mmVCE_LMI_VCPU_CACHE_64BIT_BAR1),
267 (adev->vce.gpu_addr >> 40) & 0xff);
268 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
269 mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
270 adev->vce.gpu_addr >> 8);
271 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
272 mmVCE_LMI_VCPU_CACHE_64BIT_BAR2),
273 (adev->vce.gpu_addr >> 40) & 0xff);
274
275 offset = AMDGPU_VCE_FIRMWARE_OFFSET;
276 size = VCE_V4_0_FW_SIZE;
277 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0),
278 offset & ~0x0f000000);
279 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
280
281 offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
282 size = VCE_V4_0_STACK_SIZE;
283 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1),
284 (offset & ~0x0f000000) | (1 << 24));
285 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
286
287 offset += size;
288 size = VCE_V4_0_DATA_SIZE;
289 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2),
290 (offset & ~0x0f000000) | (2 << 24));
291 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
292
293 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0);
294 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
295 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
296 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
297
298 /* end of MC_RESUME */
299 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
300 VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK);
301 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL),
302 ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK);
303 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
304 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0);
305
306 MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
307 VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK,
308 VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK);
309
310 /* clear BUSY flag */
311 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
312 ~VCE_STATUS__JOB_BUSY_MASK, 0);
313
314 /* add end packet */
315 memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
316 table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
317 header->vce_table_size = table_size;
318 }
319
320 return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table);
321 }
322
323 /**
324 * vce_v4_0_start - start VCE block
325 *
326 * @adev: amdgpu_device pointer
327 *
328 * Setup and start the VCE block
329 */
vce_v4_0_start(struct amdgpu_device * adev)330 static int vce_v4_0_start(struct amdgpu_device *adev)
331 {
332 struct amdgpu_ring *ring;
333 int r;
334
335 ring = &adev->vce.ring[0];
336
337 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr));
338 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr));
339 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr);
340 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
341 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4);
342
343 ring = &adev->vce.ring[1];
344
345 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr));
346 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr));
347 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr);
348 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr));
349 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4);
350
351 ring = &adev->vce.ring[2];
352
353 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr));
354 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr));
355 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr);
356 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr));
357 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4);
358
359 vce_v4_0_mc_resume(adev);
360 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK,
361 ~VCE_STATUS__JOB_BUSY_MASK);
362
363 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001);
364
365 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
366 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
367 mdelay(100);
368
369 r = vce_v4_0_firmware_loaded(adev);
370
371 /* clear BUSY flag */
372 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
373
374 if (r) {
375 DRM_ERROR("VCE not responding, giving up!!!\n");
376 return r;
377 }
378
379 return 0;
380 }
381
vce_v4_0_stop(struct amdgpu_device * adev)382 static int vce_v4_0_stop(struct amdgpu_device *adev)
383 {
384
385 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001);
386
387 /* hold on ECPU */
388 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
389 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
390 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
391
392 /* clear BUSY flag */
393 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
394
395 /* Set Clock-Gating off */
396 /* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
397 vce_v4_0_set_vce_sw_clock_gating(adev, false);
398 */
399
400 return 0;
401 }
402
vce_v4_0_early_init(void * handle)403 static int vce_v4_0_early_init(void *handle)
404 {
405 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
406
407 if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */
408 adev->vce.num_rings = 1;
409 else
410 adev->vce.num_rings = 3;
411
412 vce_v4_0_set_ring_funcs(adev);
413 vce_v4_0_set_irq_funcs(adev);
414
415 return 0;
416 }
417
vce_v4_0_sw_init(void * handle)418 static int vce_v4_0_sw_init(void *handle)
419 {
420 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
421 struct amdgpu_ring *ring;
422
423 unsigned size;
424 int r, i;
425
426 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCE0, 167, &adev->vce.irq);
427 if (r)
428 return r;
429
430 size = VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE;
431 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
432 size += VCE_V4_0_FW_SIZE;
433
434 r = amdgpu_vce_sw_init(adev, size);
435 if (r)
436 return r;
437
438 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
439 const struct common_firmware_header *hdr;
440 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
441
442 adev->vce.saved_bo = kmalloc(size, M_DRM, GFP_KERNEL);
443 if (!adev->vce.saved_bo)
444 return -ENOMEM;
445
446 hdr = (const struct common_firmware_header *)adev->vce.fw->data;
447 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE;
448 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw;
449 adev->firmware.fw_size +=
450 ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
451 DRM_INFO("PSP loading VCE firmware\n");
452 } else {
453 r = amdgpu_vce_resume(adev);
454 if (r)
455 return r;
456 }
457
458 for (i = 0; i < adev->vce.num_rings; i++) {
459 ring = &adev->vce.ring[i];
460 ksprintf(ring->name, "vce%d", i);
461 if (amdgpu_sriov_vf(adev)) {
462 /* DOORBELL only works under SRIOV */
463 ring->use_doorbell = true;
464
465 /* currently only use the first encoding ring for sriov,
466 * so set unused location for other unused rings.
467 */
468 if (i == 0)
469 ring->doorbell_index = AMDGPU_DOORBELL64_VCE_RING0_1 * 2;
470 else
471 ring->doorbell_index = AMDGPU_DOORBELL64_VCE_RING2_3 * 2 + 1;
472 }
473 r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0);
474 if (r)
475 return r;
476 }
477
478
479 r = amdgpu_vce_entity_init(adev);
480 if (r)
481 return r;
482
483 r = amdgpu_virt_alloc_mm_table(adev);
484 if (r)
485 return r;
486
487 return r;
488 }
489
vce_v4_0_sw_fini(void * handle)490 static int vce_v4_0_sw_fini(void *handle)
491 {
492 int r;
493 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
494
495 /* free MM table */
496 amdgpu_virt_free_mm_table(adev);
497
498 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
499 kvfree(adev->vce.saved_bo);
500 adev->vce.saved_bo = NULL;
501 }
502
503 r = amdgpu_vce_suspend(adev);
504 if (r)
505 return r;
506
507 return amdgpu_vce_sw_fini(adev);
508 }
509
vce_v4_0_hw_init(void * handle)510 static int vce_v4_0_hw_init(void *handle)
511 {
512 int r, i;
513 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
514
515 if (amdgpu_sriov_vf(adev))
516 r = vce_v4_0_sriov_start(adev);
517 else
518 r = vce_v4_0_start(adev);
519 if (r)
520 return r;
521
522 for (i = 0; i < adev->vce.num_rings; i++)
523 adev->vce.ring[i].ready = false;
524
525 for (i = 0; i < adev->vce.num_rings; i++) {
526 r = amdgpu_ring_test_ring(&adev->vce.ring[i]);
527 if (r)
528 return r;
529 else
530 adev->vce.ring[i].ready = true;
531 }
532
533 DRM_INFO("VCE initialized successfully.\n");
534
535 return 0;
536 }
537
vce_v4_0_hw_fini(void * handle)538 static int vce_v4_0_hw_fini(void *handle)
539 {
540 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
541 int i;
542
543 if (!amdgpu_sriov_vf(adev)) {
544 /* vce_v4_0_wait_for_idle(handle); */
545 vce_v4_0_stop(adev);
546 } else {
547 /* full access mode, so don't touch any VCE register */
548 DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
549 }
550
551 for (i = 0; i < adev->vce.num_rings; i++)
552 adev->vce.ring[i].ready = false;
553
554 return 0;
555 }
556
vce_v4_0_suspend(void * handle)557 static int vce_v4_0_suspend(void *handle)
558 {
559 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
560 int r;
561
562 if (adev->vce.vcpu_bo == NULL)
563 return 0;
564
565 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
566 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
567 void *ptr = adev->vce.cpu_addr;
568
569 memcpy_fromio(adev->vce.saved_bo, ptr, size);
570 }
571
572 r = vce_v4_0_hw_fini(adev);
573 if (r)
574 return r;
575
576 return amdgpu_vce_suspend(adev);
577 }
578
vce_v4_0_resume(void * handle)579 static int vce_v4_0_resume(void *handle)
580 {
581 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
582 int r;
583
584 if (adev->vce.vcpu_bo == NULL)
585 return -EINVAL;
586
587 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
588 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
589 void *ptr = adev->vce.cpu_addr;
590
591 memcpy_toio(ptr, adev->vce.saved_bo, size);
592 } else {
593 r = amdgpu_vce_resume(adev);
594 if (r)
595 return r;
596 }
597
598 return vce_v4_0_hw_init(adev);
599 }
600
vce_v4_0_mc_resume(struct amdgpu_device * adev)601 static void vce_v4_0_mc_resume(struct amdgpu_device *adev)
602 {
603 uint32_t offset, size;
604
605 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16));
606 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000);
607 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F);
608 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF);
609
610 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000);
611 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1);
612 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
613 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
614 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
615
616 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
617 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
618 (adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8));
619 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
620 (adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 40) & 0xff);
621 } else {
622 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
623 (adev->vce.gpu_addr >> 8));
624 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
625 (adev->vce.gpu_addr >> 40) & 0xff);
626 }
627
628 offset = AMDGPU_VCE_FIRMWARE_OFFSET;
629 size = VCE_V4_0_FW_SIZE;
630 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000);
631 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
632
633 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8));
634 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff);
635 offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
636 size = VCE_V4_0_STACK_SIZE;
637 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24));
638 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
639
640 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8));
641 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff);
642 offset += size;
643 size = VCE_V4_0_DATA_SIZE;
644 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24));
645 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
646
647 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100);
648 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
649 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
650 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
651 }
652
vce_v4_0_set_clockgating_state(void * handle,enum amd_clockgating_state state)653 static int vce_v4_0_set_clockgating_state(void *handle,
654 enum amd_clockgating_state state)
655 {
656 /* needed for driver unload*/
657 return 0;
658 }
659
660 #if 0
661 static bool vce_v4_0_is_idle(void *handle)
662 {
663 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
664 u32 mask = 0;
665
666 mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
667 mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
668
669 return !(RREG32(mmSRBM_STATUS2) & mask);
670 }
671
672 static int vce_v4_0_wait_for_idle(void *handle)
673 {
674 unsigned i;
675 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
676
677 for (i = 0; i < adev->usec_timeout; i++)
678 if (vce_v4_0_is_idle(handle))
679 return 0;
680
681 return -ETIMEDOUT;
682 }
683
684 #define VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK 0x00000008L /* AUTO_BUSY */
685 #define VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK 0x00000010L /* RB0_BUSY */
686 #define VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK 0x00000020L /* RB1_BUSY */
687 #define AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
688 VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
689
690 static bool vce_v4_0_check_soft_reset(void *handle)
691 {
692 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
693 u32 srbm_soft_reset = 0;
694
695 /* According to VCE team , we should use VCE_STATUS instead
696 * SRBM_STATUS.VCE_BUSY bit for busy status checking.
697 * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
698 * instance's registers are accessed
699 * (0 for 1st instance, 10 for 2nd instance).
700 *
701 *VCE_STATUS
702 *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 | |FW_LOADED|JOB |
703 *|----+----+-----------+----+----+----+----------+---------+----|
704 *|bit8|bit7| bit6 |bit5|bit4|bit3| bit2 | bit1 |bit0|
705 *
706 * VCE team suggest use bit 3--bit 6 for busy status check
707 */
708 mutex_lock(&adev->grbm_idx_mutex);
709 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
710 if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
711 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
712 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
713 }
714 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10);
715 if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
716 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
717 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
718 }
719 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
720 mutex_unlock(&adev->grbm_idx_mutex);
721
722 if (srbm_soft_reset) {
723 adev->vce.srbm_soft_reset = srbm_soft_reset;
724 return true;
725 } else {
726 adev->vce.srbm_soft_reset = 0;
727 return false;
728 }
729 }
730
731 static int vce_v4_0_soft_reset(void *handle)
732 {
733 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
734 u32 srbm_soft_reset;
735
736 if (!adev->vce.srbm_soft_reset)
737 return 0;
738 srbm_soft_reset = adev->vce.srbm_soft_reset;
739
740 if (srbm_soft_reset) {
741 u32 tmp;
742
743 tmp = RREG32(mmSRBM_SOFT_RESET);
744 tmp |= srbm_soft_reset;
745 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
746 WREG32(mmSRBM_SOFT_RESET, tmp);
747 tmp = RREG32(mmSRBM_SOFT_RESET);
748
749 udelay(50);
750
751 tmp &= ~srbm_soft_reset;
752 WREG32(mmSRBM_SOFT_RESET, tmp);
753 tmp = RREG32(mmSRBM_SOFT_RESET);
754
755 /* Wait a little for things to settle down */
756 udelay(50);
757 }
758
759 return 0;
760 }
761
762 static int vce_v4_0_pre_soft_reset(void *handle)
763 {
764 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
765
766 if (!adev->vce.srbm_soft_reset)
767 return 0;
768
769 mdelay(5);
770
771 return vce_v4_0_suspend(adev);
772 }
773
774
775 static int vce_v4_0_post_soft_reset(void *handle)
776 {
777 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
778
779 if (!adev->vce.srbm_soft_reset)
780 return 0;
781
782 mdelay(5);
783
784 return vce_v4_0_resume(adev);
785 }
786
787 static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
788 {
789 u32 tmp, data;
790
791 tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL));
792 if (override)
793 data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
794 else
795 data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
796
797 if (tmp != data)
798 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data);
799 }
800
801 static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
802 bool gated)
803 {
804 u32 data;
805
806 /* Set Override to disable Clock Gating */
807 vce_v4_0_override_vce_clock_gating(adev, true);
808
809 /* This function enables MGCG which is controlled by firmware.
810 With the clocks in the gated state the core is still
811 accessible but the firmware will throttle the clocks on the
812 fly as necessary.
813 */
814 if (gated) {
815 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
816 data |= 0x1ff;
817 data &= ~0xef0000;
818 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
819
820 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
821 data |= 0x3ff000;
822 data &= ~0xffc00000;
823 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
824
825 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
826 data |= 0x2;
827 data &= ~0x00010000;
828 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
829
830 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
831 data |= 0x37f;
832 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
833
834 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
835 data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
836 VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
837 VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK |
838 0x8;
839 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
840 } else {
841 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
842 data &= ~0x80010;
843 data |= 0xe70008;
844 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
845
846 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
847 data |= 0xffc00000;
848 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
849
850 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
851 data |= 0x10000;
852 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
853
854 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
855 data &= ~0xffc00000;
856 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
857
858 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
859 data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
860 VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
861 VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK |
862 0x8);
863 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
864 }
865 vce_v4_0_override_vce_clock_gating(adev, false);
866 }
867
868 static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
869 {
870 u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
871
872 if (enable)
873 tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
874 else
875 tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
876
877 WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
878 }
879
880 static int vce_v4_0_set_clockgating_state(void *handle,
881 enum amd_clockgating_state state)
882 {
883 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
884 bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
885 int i;
886
887 if ((adev->asic_type == CHIP_POLARIS10) ||
888 (adev->asic_type == CHIP_TONGA) ||
889 (adev->asic_type == CHIP_FIJI))
890 vce_v4_0_set_bypass_mode(adev, enable);
891
892 if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
893 return 0;
894
895 mutex_lock(&adev->grbm_idx_mutex);
896 for (i = 0; i < 2; i++) {
897 /* Program VCE Instance 0 or 1 if not harvested */
898 if (adev->vce.harvest_config & (1 << i))
899 continue;
900
901 WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i);
902
903 if (enable) {
904 /* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
905 uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A);
906 data &= ~(0xf | 0xff0);
907 data |= ((0x0 << 0) | (0x04 << 4));
908 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data);
909
910 /* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
911 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING);
912 data &= ~(0xf | 0xff0);
913 data |= ((0x0 << 0) | (0x04 << 4));
914 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data);
915 }
916
917 vce_v4_0_set_vce_sw_clock_gating(adev, enable);
918 }
919
920 WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
921 mutex_unlock(&adev->grbm_idx_mutex);
922
923 return 0;
924 }
925
926 static int vce_v4_0_set_powergating_state(void *handle,
927 enum amd_powergating_state state)
928 {
929 /* This doesn't actually powergate the VCE block.
930 * That's done in the dpm code via the SMC. This
931 * just re-inits the block as necessary. The actual
932 * gating still happens in the dpm code. We should
933 * revisit this when there is a cleaner line between
934 * the smc and the hw blocks
935 */
936 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
937
938 if (!(adev->pg_flags & AMD_PG_SUPPORT_VCE))
939 return 0;
940
941 if (state == AMD_PG_STATE_GATE)
942 /* XXX do we need a vce_v4_0_stop()? */
943 return 0;
944 else
945 return vce_v4_0_start(adev);
946 }
947 #endif
948
vce_v4_0_ring_emit_ib(struct amdgpu_ring * ring,struct amdgpu_ib * ib,unsigned int vmid,bool ctx_switch)949 static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring,
950 struct amdgpu_ib *ib, unsigned int vmid, bool ctx_switch)
951 {
952 amdgpu_ring_write(ring, VCE_CMD_IB_VM);
953 amdgpu_ring_write(ring, vmid);
954 amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
955 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
956 amdgpu_ring_write(ring, ib->length_dw);
957 }
958
vce_v4_0_ring_emit_fence(struct amdgpu_ring * ring,uint64_t addr,uint64_t seq,unsigned flags)959 static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, uint64_t addr,
960 uint64_t seq, unsigned flags)
961 {
962 WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
963
964 amdgpu_ring_write(ring, VCE_CMD_FENCE);
965 amdgpu_ring_write(ring, addr);
966 amdgpu_ring_write(ring, upper_32_bits(addr));
967 amdgpu_ring_write(ring, seq);
968 amdgpu_ring_write(ring, VCE_CMD_TRAP);
969 }
970
vce_v4_0_ring_insert_end(struct amdgpu_ring * ring)971 static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring)
972 {
973 amdgpu_ring_write(ring, VCE_CMD_END);
974 }
975
vce_v4_0_emit_reg_wait(struct amdgpu_ring * ring,uint32_t reg,uint32_t val,uint32_t mask)976 static void vce_v4_0_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
977 uint32_t val, uint32_t mask)
978 {
979 amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
980 amdgpu_ring_write(ring, reg << 2);
981 amdgpu_ring_write(ring, mask);
982 amdgpu_ring_write(ring, val);
983 }
984
vce_v4_0_emit_vm_flush(struct amdgpu_ring * ring,unsigned int vmid,uint64_t pd_addr)985 static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
986 unsigned int vmid, uint64_t pd_addr)
987 {
988 struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
989
990 pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
991
992 /* wait for reg writes */
993 vce_v4_0_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2,
994 lower_32_bits(pd_addr), 0xffffffff);
995 }
996
vce_v4_0_emit_wreg(struct amdgpu_ring * ring,uint32_t reg,uint32_t val)997 static void vce_v4_0_emit_wreg(struct amdgpu_ring *ring,
998 uint32_t reg, uint32_t val)
999 {
1000 amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
1001 amdgpu_ring_write(ring, reg << 2);
1002 amdgpu_ring_write(ring, val);
1003 }
1004
vce_v4_0_set_interrupt_state(struct amdgpu_device * adev,struct amdgpu_irq_src * source,unsigned type,enum amdgpu_interrupt_state state)1005 static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev,
1006 struct amdgpu_irq_src *source,
1007 unsigned type,
1008 enum amdgpu_interrupt_state state)
1009 {
1010 uint32_t val = 0;
1011
1012 if (!amdgpu_sriov_vf(adev)) {
1013 if (state == AMDGPU_IRQ_STATE_ENABLE)
1014 val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
1015
1016 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val,
1017 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
1018 }
1019 return 0;
1020 }
1021
vce_v4_0_process_interrupt(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)1022 static int vce_v4_0_process_interrupt(struct amdgpu_device *adev,
1023 struct amdgpu_irq_src *source,
1024 struct amdgpu_iv_entry *entry)
1025 {
1026 DRM_DEBUG("IH: VCE\n");
1027
1028 switch (entry->src_data[0]) {
1029 case 0:
1030 case 1:
1031 case 2:
1032 amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
1033 break;
1034 default:
1035 DRM_ERROR("Unhandled interrupt: %d %d\n",
1036 entry->src_id, entry->src_data[0]);
1037 break;
1038 }
1039
1040 return 0;
1041 }
1042
1043 const struct amd_ip_funcs vce_v4_0_ip_funcs = {
1044 .name = "vce_v4_0",
1045 .early_init = vce_v4_0_early_init,
1046 .late_init = NULL,
1047 .sw_init = vce_v4_0_sw_init,
1048 .sw_fini = vce_v4_0_sw_fini,
1049 .hw_init = vce_v4_0_hw_init,
1050 .hw_fini = vce_v4_0_hw_fini,
1051 .suspend = vce_v4_0_suspend,
1052 .resume = vce_v4_0_resume,
1053 .is_idle = NULL /* vce_v4_0_is_idle */,
1054 .wait_for_idle = NULL /* vce_v4_0_wait_for_idle */,
1055 .check_soft_reset = NULL /* vce_v4_0_check_soft_reset */,
1056 .pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */,
1057 .soft_reset = NULL /* vce_v4_0_soft_reset */,
1058 .post_soft_reset = NULL /* vce_v4_0_post_soft_reset */,
1059 .set_clockgating_state = vce_v4_0_set_clockgating_state,
1060 .set_powergating_state = NULL /* vce_v4_0_set_powergating_state */,
1061 };
1062
1063 static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
1064 .type = AMDGPU_RING_TYPE_VCE,
1065 .align_mask = 0x3f,
1066 .nop = VCE_CMD_NO_OP,
1067 .support_64bit_ptrs = false,
1068 .vmhub = AMDGPU_MMHUB,
1069 .get_rptr = vce_v4_0_ring_get_rptr,
1070 .get_wptr = vce_v4_0_ring_get_wptr,
1071 .set_wptr = vce_v4_0_ring_set_wptr,
1072 .parse_cs = amdgpu_vce_ring_parse_cs_vm,
1073 .emit_frame_size =
1074 SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
1075 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
1076 4 + /* vce_v4_0_emit_vm_flush */
1077 5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */
1078 1, /* vce_v4_0_ring_insert_end */
1079 .emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */
1080 .emit_ib = vce_v4_0_ring_emit_ib,
1081 .emit_vm_flush = vce_v4_0_emit_vm_flush,
1082 .emit_fence = vce_v4_0_ring_emit_fence,
1083 .test_ring = amdgpu_vce_ring_test_ring,
1084 .test_ib = amdgpu_vce_ring_test_ib,
1085 .insert_nop = amdgpu_ring_insert_nop,
1086 .insert_end = vce_v4_0_ring_insert_end,
1087 .pad_ib = amdgpu_ring_generic_pad_ib,
1088 .begin_use = amdgpu_vce_ring_begin_use,
1089 .end_use = amdgpu_vce_ring_end_use,
1090 .emit_wreg = vce_v4_0_emit_wreg,
1091 .emit_reg_wait = vce_v4_0_emit_reg_wait,
1092 .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
1093 };
1094
vce_v4_0_set_ring_funcs(struct amdgpu_device * adev)1095 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev)
1096 {
1097 int i;
1098
1099 for (i = 0; i < adev->vce.num_rings; i++) {
1100 adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs;
1101 adev->vce.ring[i].me = i;
1102 }
1103 DRM_INFO("VCE enabled in VM mode\n");
1104 }
1105
1106 static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = {
1107 .set = vce_v4_0_set_interrupt_state,
1108 .process = vce_v4_0_process_interrupt,
1109 };
1110
vce_v4_0_set_irq_funcs(struct amdgpu_device * adev)1111 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev)
1112 {
1113 adev->vce.irq.num_types = 1;
1114 adev->vce.irq.funcs = &vce_v4_0_irq_funcs;
1115 };
1116
1117 const struct amdgpu_ip_block_version vce_v4_0_ip_block =
1118 {
1119 .type = AMD_IP_BLOCK_TYPE_VCE,
1120 .major = 4,
1121 .minor = 0,
1122 .rev = 0,
1123 .funcs = &vce_v4_0_ip_funcs,
1124 };
1125