xref: /openbsd/sys/dev/pci/drm/amd/amdgpu/jpeg_v1_0.c (revision b5a27a99)
1 /*
2  * Copyright 2019 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include "amdgpu.h"
25 #include "amdgpu_jpeg.h"
26 #include "amdgpu_cs.h"
27 #include "soc15.h"
28 #include "soc15d.h"
29 #include "vcn_v1_0.h"
30 #include "jpeg_v1_0.h"
31 
32 #include "vcn/vcn_1_0_offset.h"
33 #include "vcn/vcn_1_0_sh_mask.h"
34 
35 static void jpeg_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev);
36 static void jpeg_v1_0_set_irq_funcs(struct amdgpu_device *adev);
37 static void jpeg_v1_0_ring_begin_use(struct amdgpu_ring *ring);
38 static int jpeg_v1_dec_ring_parse_cs(struct amdgpu_cs_parser *parser,
39 				     struct amdgpu_job *job,
40 				     struct amdgpu_ib *ib);
41 
jpeg_v1_0_decode_ring_patch_wreg(struct amdgpu_ring * ring,uint32_t * ptr,uint32_t reg_offset,uint32_t val)42 static void jpeg_v1_0_decode_ring_patch_wreg(struct amdgpu_ring *ring, uint32_t *ptr, uint32_t reg_offset, uint32_t val)
43 {
44 	struct amdgpu_device *adev = ring->adev;
45 	ring->ring[(*ptr)++] = PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0);
46 	if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) ||
47 		((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) {
48 		ring->ring[(*ptr)++] = 0;
49 		ring->ring[(*ptr)++] = PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE0);
50 	} else {
51 		ring->ring[(*ptr)++] = reg_offset;
52 		ring->ring[(*ptr)++] = PACKETJ(0, 0, 0, PACKETJ_TYPE0);
53 	}
54 	ring->ring[(*ptr)++] = val;
55 }
56 
jpeg_v1_0_decode_ring_set_patch_ring(struct amdgpu_ring * ring,uint32_t ptr)57 static void jpeg_v1_0_decode_ring_set_patch_ring(struct amdgpu_ring *ring, uint32_t ptr)
58 {
59 	struct amdgpu_device *adev = ring->adev;
60 
61 	uint32_t reg, reg_offset, val, mask, i;
62 
63 	// 1st: program mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW
64 	reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW);
65 	reg_offset = (reg << 2);
66 	val = lower_32_bits(ring->gpu_addr);
67 	jpeg_v1_0_decode_ring_patch_wreg(ring, &ptr, reg_offset, val);
68 
69 	// 2nd: program mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH
70 	reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH);
71 	reg_offset = (reg << 2);
72 	val = upper_32_bits(ring->gpu_addr);
73 	jpeg_v1_0_decode_ring_patch_wreg(ring, &ptr, reg_offset, val);
74 
75 	// 3rd to 5th: issue MEM_READ commands
76 	for (i = 0; i <= 2; i++) {
77 		ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE2);
78 		ring->ring[ptr++] = 0;
79 	}
80 
81 	// 6th: program mmUVD_JRBC_RB_CNTL register to enable NO_FETCH and RPTR write ability
82 	reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_CNTL);
83 	reg_offset = (reg << 2);
84 	val = 0x13;
85 	jpeg_v1_0_decode_ring_patch_wreg(ring, &ptr, reg_offset, val);
86 
87 	// 7th: program mmUVD_JRBC_RB_REF_DATA
88 	reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_REF_DATA);
89 	reg_offset = (reg << 2);
90 	val = 0x1;
91 	jpeg_v1_0_decode_ring_patch_wreg(ring, &ptr, reg_offset, val);
92 
93 	// 8th: issue conditional register read mmUVD_JRBC_RB_CNTL
94 	reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_CNTL);
95 	reg_offset = (reg << 2);
96 	val = 0x1;
97 	mask = 0x1;
98 
99 	ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0);
100 	ring->ring[ptr++] = 0x01400200;
101 	ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0);
102 	ring->ring[ptr++] = val;
103 	ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0);
104 	if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) ||
105 		((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) {
106 		ring->ring[ptr++] = 0;
107 		ring->ring[ptr++] = PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE3);
108 	} else {
109 		ring->ring[ptr++] = reg_offset;
110 		ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE3);
111 	}
112 	ring->ring[ptr++] = mask;
113 
114 	//9th to 21st: insert no-op
115 	for (i = 0; i <= 12; i++) {
116 		ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE6);
117 		ring->ring[ptr++] = 0;
118 	}
119 
120 	//22nd: reset mmUVD_JRBC_RB_RPTR
121 	reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_RPTR);
122 	reg_offset = (reg << 2);
123 	val = 0;
124 	jpeg_v1_0_decode_ring_patch_wreg(ring, &ptr, reg_offset, val);
125 
126 	//23rd: program mmUVD_JRBC_RB_CNTL to disable no_fetch
127 	reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_CNTL);
128 	reg_offset = (reg << 2);
129 	val = 0x12;
130 	jpeg_v1_0_decode_ring_patch_wreg(ring, &ptr, reg_offset, val);
131 }
132 
133 /**
134  * jpeg_v1_0_decode_ring_get_rptr - get read pointer
135  *
136  * @ring: amdgpu_ring pointer
137  *
138  * Returns the current hardware read pointer
139  */
jpeg_v1_0_decode_ring_get_rptr(struct amdgpu_ring * ring)140 static uint64_t jpeg_v1_0_decode_ring_get_rptr(struct amdgpu_ring *ring)
141 {
142 	struct amdgpu_device *adev = ring->adev;
143 
144 	return RREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_RPTR);
145 }
146 
147 /**
148  * jpeg_v1_0_decode_ring_get_wptr - get write pointer
149  *
150  * @ring: amdgpu_ring pointer
151  *
152  * Returns the current hardware write pointer
153  */
jpeg_v1_0_decode_ring_get_wptr(struct amdgpu_ring * ring)154 static uint64_t jpeg_v1_0_decode_ring_get_wptr(struct amdgpu_ring *ring)
155 {
156 	struct amdgpu_device *adev = ring->adev;
157 
158 	return RREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR);
159 }
160 
161 /**
162  * jpeg_v1_0_decode_ring_set_wptr - set write pointer
163  *
164  * @ring: amdgpu_ring pointer
165  *
166  * Commits the write pointer to the hardware
167  */
jpeg_v1_0_decode_ring_set_wptr(struct amdgpu_ring * ring)168 static void jpeg_v1_0_decode_ring_set_wptr(struct amdgpu_ring *ring)
169 {
170 	struct amdgpu_device *adev = ring->adev;
171 
172 	WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr));
173 }
174 
175 /**
176  * jpeg_v1_0_decode_ring_insert_start - insert a start command
177  *
178  * @ring: amdgpu_ring pointer
179  *
180  * Write a start command to the ring.
181  */
jpeg_v1_0_decode_ring_insert_start(struct amdgpu_ring * ring)182 static void jpeg_v1_0_decode_ring_insert_start(struct amdgpu_ring *ring)
183 {
184 	struct amdgpu_device *adev = ring->adev;
185 
186 	amdgpu_ring_write(ring,
187 		PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0));
188 	amdgpu_ring_write(ring, 0x68e04);
189 
190 	amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE0));
191 	amdgpu_ring_write(ring, 0x80010000);
192 }
193 
194 /**
195  * jpeg_v1_0_decode_ring_insert_end - insert a end command
196  *
197  * @ring: amdgpu_ring pointer
198  *
199  * Write a end command to the ring.
200  */
jpeg_v1_0_decode_ring_insert_end(struct amdgpu_ring * ring)201 static void jpeg_v1_0_decode_ring_insert_end(struct amdgpu_ring *ring)
202 {
203 	struct amdgpu_device *adev = ring->adev;
204 
205 	amdgpu_ring_write(ring,
206 		PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0));
207 	amdgpu_ring_write(ring, 0x68e04);
208 
209 	amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE0));
210 	amdgpu_ring_write(ring, 0x00010000);
211 }
212 
213 /**
214  * jpeg_v1_0_decode_ring_emit_fence - emit an fence & trap command
215  *
216  * @ring: amdgpu_ring pointer
217  * @addr: address
218  * @seq: sequence number
219  * @flags: fence related flags
220  *
221  * Write a fence and a trap command to the ring.
222  */
jpeg_v1_0_decode_ring_emit_fence(struct amdgpu_ring * ring,u64 addr,u64 seq,unsigned flags)223 static void jpeg_v1_0_decode_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
224 				     unsigned flags)
225 {
226 	struct amdgpu_device *adev = ring->adev;
227 
228 	WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
229 
230 	amdgpu_ring_write(ring,
231 		PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_GPCOM_DATA0), 0, 0, PACKETJ_TYPE0));
232 	amdgpu_ring_write(ring, seq);
233 
234 	amdgpu_ring_write(ring,
235 		PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_GPCOM_DATA1), 0, 0, PACKETJ_TYPE0));
236 	amdgpu_ring_write(ring, seq);
237 
238 	amdgpu_ring_write(ring,
239 		PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0));
240 	amdgpu_ring_write(ring, lower_32_bits(addr));
241 
242 	amdgpu_ring_write(ring,
243 		PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0));
244 	amdgpu_ring_write(ring, upper_32_bits(addr));
245 
246 	amdgpu_ring_write(ring,
247 		PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_GPCOM_CMD), 0, 0, PACKETJ_TYPE0));
248 	amdgpu_ring_write(ring, 0x8);
249 
250 	amdgpu_ring_write(ring,
251 		PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_GPCOM_CMD), 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE4));
252 	amdgpu_ring_write(ring, 0);
253 
254 	amdgpu_ring_write(ring,
255 		PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0));
256 	amdgpu_ring_write(ring, 0x01400200);
257 
258 	amdgpu_ring_write(ring,
259 		PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0));
260 	amdgpu_ring_write(ring, seq);
261 
262 	amdgpu_ring_write(ring,
263 		PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0));
264 	amdgpu_ring_write(ring, lower_32_bits(addr));
265 
266 	amdgpu_ring_write(ring,
267 		PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0));
268 	amdgpu_ring_write(ring, upper_32_bits(addr));
269 
270 	amdgpu_ring_write(ring,
271 		PACKETJ(0, 0, PACKETJ_CONDITION_CHECK3, PACKETJ_TYPE2));
272 	amdgpu_ring_write(ring, 0xffffffff);
273 
274 	amdgpu_ring_write(ring,
275 		PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0));
276 	amdgpu_ring_write(ring, 0x3fbc);
277 
278 	amdgpu_ring_write(ring,
279 		PACKETJ(0, 0, 0, PACKETJ_TYPE0));
280 	amdgpu_ring_write(ring, 0x1);
281 
282 	/* emit trap */
283 	amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE7));
284 	amdgpu_ring_write(ring, 0);
285 }
286 
287 /**
288  * jpeg_v1_0_decode_ring_emit_ib - execute indirect buffer
289  *
290  * @ring: amdgpu_ring pointer
291  * @job: job to retrieve vmid from
292  * @ib: indirect buffer to execute
293  * @flags: unused
294  *
295  * Write ring commands to execute the indirect buffer.
296  */
jpeg_v1_0_decode_ring_emit_ib(struct amdgpu_ring * ring,struct amdgpu_job * job,struct amdgpu_ib * ib,uint32_t flags)297 static void jpeg_v1_0_decode_ring_emit_ib(struct amdgpu_ring *ring,
298 					struct amdgpu_job *job,
299 					struct amdgpu_ib *ib,
300 					uint32_t flags)
301 {
302 	struct amdgpu_device *adev = ring->adev;
303 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
304 
305 	amdgpu_ring_write(ring,
306 		PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_IB_VMID), 0, 0, PACKETJ_TYPE0));
307 	if (ring->funcs->parse_cs)
308 		amdgpu_ring_write(ring, 0);
309 	else
310 		amdgpu_ring_write(ring, (vmid | (vmid << 4)));
311 
312 	amdgpu_ring_write(ring,
313 		PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JPEG_VMID), 0, 0, PACKETJ_TYPE0));
314 	amdgpu_ring_write(ring, (vmid | (vmid << 4)));
315 
316 	amdgpu_ring_write(ring,
317 		PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0));
318 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
319 
320 	amdgpu_ring_write(ring,
321 		PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_IB_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0));
322 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
323 
324 	amdgpu_ring_write(ring,
325 		PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_IB_SIZE), 0, 0, PACKETJ_TYPE0));
326 	amdgpu_ring_write(ring, ib->length_dw);
327 
328 	amdgpu_ring_write(ring,
329 		PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0));
330 	amdgpu_ring_write(ring, lower_32_bits(ring->gpu_addr));
331 
332 	amdgpu_ring_write(ring,
333 		PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0));
334 	amdgpu_ring_write(ring, upper_32_bits(ring->gpu_addr));
335 
336 	amdgpu_ring_write(ring,
337 		PACKETJ(0, 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE2));
338 	amdgpu_ring_write(ring, 0);
339 
340 	amdgpu_ring_write(ring,
341 		PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0));
342 	amdgpu_ring_write(ring, 0x01400200);
343 
344 	amdgpu_ring_write(ring,
345 		PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0));
346 	amdgpu_ring_write(ring, 0x2);
347 
348 	amdgpu_ring_write(ring,
349 		PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_STATUS), 0, PACKETJ_CONDITION_CHECK3, PACKETJ_TYPE3));
350 	amdgpu_ring_write(ring, 0x2);
351 }
352 
jpeg_v1_0_decode_ring_emit_reg_wait(struct amdgpu_ring * ring,uint32_t reg,uint32_t val,uint32_t mask)353 static void jpeg_v1_0_decode_ring_emit_reg_wait(struct amdgpu_ring *ring,
354 					    uint32_t reg, uint32_t val,
355 					    uint32_t mask)
356 {
357 	struct amdgpu_device *adev = ring->adev;
358 	uint32_t reg_offset = (reg << 2);
359 
360 	amdgpu_ring_write(ring,
361 		PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0));
362 	amdgpu_ring_write(ring, 0x01400200);
363 
364 	amdgpu_ring_write(ring,
365 		PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0));
366 	amdgpu_ring_write(ring, val);
367 
368 	amdgpu_ring_write(ring,
369 		PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0));
370 	if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) ||
371 		((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) {
372 		amdgpu_ring_write(ring, 0);
373 		amdgpu_ring_write(ring,
374 			PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE3));
375 	} else {
376 		amdgpu_ring_write(ring, reg_offset);
377 		amdgpu_ring_write(ring,
378 			PACKETJ(0, 0, 0, PACKETJ_TYPE3));
379 	}
380 	amdgpu_ring_write(ring, mask);
381 }
382 
jpeg_v1_0_decode_ring_emit_vm_flush(struct amdgpu_ring * ring,unsigned vmid,uint64_t pd_addr)383 static void jpeg_v1_0_decode_ring_emit_vm_flush(struct amdgpu_ring *ring,
384 		unsigned vmid, uint64_t pd_addr)
385 {
386 	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub];
387 	uint32_t data0, data1, mask;
388 
389 	pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
390 
391 	/* wait for register write */
392 	data0 = hub->ctx0_ptb_addr_lo32 + vmid * hub->ctx_addr_distance;
393 	data1 = lower_32_bits(pd_addr);
394 	mask = 0xffffffff;
395 	jpeg_v1_0_decode_ring_emit_reg_wait(ring, data0, data1, mask);
396 }
397 
jpeg_v1_0_decode_ring_emit_wreg(struct amdgpu_ring * ring,uint32_t reg,uint32_t val)398 static void jpeg_v1_0_decode_ring_emit_wreg(struct amdgpu_ring *ring,
399 					uint32_t reg, uint32_t val)
400 {
401 	struct amdgpu_device *adev = ring->adev;
402 	uint32_t reg_offset = (reg << 2);
403 
404 	amdgpu_ring_write(ring,
405 		PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0));
406 	if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) ||
407 			((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) {
408 		amdgpu_ring_write(ring, 0);
409 		amdgpu_ring_write(ring,
410 			PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE0));
411 	} else {
412 		amdgpu_ring_write(ring, reg_offset);
413 		amdgpu_ring_write(ring,
414 			PACKETJ(0, 0, 0, PACKETJ_TYPE0));
415 	}
416 	amdgpu_ring_write(ring, val);
417 }
418 
jpeg_v1_0_decode_ring_nop(struct amdgpu_ring * ring,uint32_t count)419 static void jpeg_v1_0_decode_ring_nop(struct amdgpu_ring *ring, uint32_t count)
420 {
421 	int i;
422 
423 	WARN_ON(ring->wptr % 2 || count % 2);
424 
425 	for (i = 0; i < count / 2; i++) {
426 		amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6));
427 		amdgpu_ring_write(ring, 0);
428 	}
429 }
430 
jpeg_v1_0_set_interrupt_state(struct amdgpu_device * adev,struct amdgpu_irq_src * source,unsigned type,enum amdgpu_interrupt_state state)431 static int jpeg_v1_0_set_interrupt_state(struct amdgpu_device *adev,
432 					struct amdgpu_irq_src *source,
433 					unsigned type,
434 					enum amdgpu_interrupt_state state)
435 {
436 	return 0;
437 }
438 
jpeg_v1_0_process_interrupt(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)439 static int jpeg_v1_0_process_interrupt(struct amdgpu_device *adev,
440 				      struct amdgpu_irq_src *source,
441 				      struct amdgpu_iv_entry *entry)
442 {
443 	DRM_DEBUG("IH: JPEG decode TRAP\n");
444 
445 	switch (entry->src_id) {
446 	case 126:
447 		amdgpu_fence_process(adev->jpeg.inst->ring_dec);
448 		break;
449 	default:
450 		DRM_ERROR("Unhandled interrupt: %d %d\n",
451 			  entry->src_id, entry->src_data[0]);
452 		break;
453 	}
454 
455 	return 0;
456 }
457 
458 /**
459  * jpeg_v1_0_early_init - set function pointers
460  *
461  * @handle: amdgpu_device pointer
462  *
463  * Set ring and irq function pointers
464  */
jpeg_v1_0_early_init(void * handle)465 int jpeg_v1_0_early_init(void *handle)
466 {
467 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
468 
469 	adev->jpeg.num_jpeg_inst = 1;
470 	adev->jpeg.num_jpeg_rings = 1;
471 
472 	jpeg_v1_0_set_dec_ring_funcs(adev);
473 	jpeg_v1_0_set_irq_funcs(adev);
474 
475 	return 0;
476 }
477 
478 /**
479  * jpeg_v1_0_sw_init - sw init for JPEG block
480  *
481  * @handle: amdgpu_device pointer
482  *
483  */
jpeg_v1_0_sw_init(void * handle)484 int jpeg_v1_0_sw_init(void *handle)
485 {
486 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
487 	struct amdgpu_ring *ring;
488 	int r;
489 
490 	/* JPEG TRAP */
491 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, 126, &adev->jpeg.inst->irq);
492 	if (r)
493 		return r;
494 
495 	ring = adev->jpeg.inst->ring_dec;
496 	ring->vm_hub = AMDGPU_MMHUB0(0);
497 	snprintf(ring->name, sizeof(ring->name), "jpeg_dec");
498 	r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq,
499 			     0, AMDGPU_RING_PRIO_DEFAULT, NULL);
500 	if (r)
501 		return r;
502 
503 	adev->jpeg.internal.jpeg_pitch[0] = adev->jpeg.inst->external.jpeg_pitch[0] =
504 		SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_PITCH);
505 
506 	return 0;
507 }
508 
509 /**
510  * jpeg_v1_0_sw_fini - sw fini for JPEG block
511  *
512  * @handle: amdgpu_device pointer
513  *
514  * JPEG free up sw allocation
515  */
jpeg_v1_0_sw_fini(void * handle)516 void jpeg_v1_0_sw_fini(void *handle)
517 {
518 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
519 
520 	amdgpu_ring_fini(adev->jpeg.inst->ring_dec);
521 }
522 
523 /**
524  * jpeg_v1_0_start - start JPEG block
525  *
526  * @adev: amdgpu_device pointer
527  * @mode: SPG or DPG mode
528  *
529  * Setup and start the JPEG block
530  */
jpeg_v1_0_start(struct amdgpu_device * adev,int mode)531 void jpeg_v1_0_start(struct amdgpu_device *adev, int mode)
532 {
533 	struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec;
534 
535 	if (mode == 0) {
536 		WREG32_SOC15(JPEG, 0, mmUVD_LMI_JRBC_RB_VMID, 0);
537 		WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_CNTL, UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK |
538 				UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK);
539 		WREG32_SOC15(JPEG, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW, lower_32_bits(ring->gpu_addr));
540 		WREG32_SOC15(JPEG, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, upper_32_bits(ring->gpu_addr));
541 		WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_RPTR, 0);
542 		WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR, 0);
543 		WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_CNTL, UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK);
544 	}
545 
546 	/* initialize wptr */
547 	ring->wptr = RREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR);
548 
549 	/* copy patch commands to the jpeg ring */
550 	jpeg_v1_0_decode_ring_set_patch_ring(ring,
551 		(ring->wptr + ring->max_dw * amdgpu_sched_hw_submission));
552 }
553 
554 static const struct amdgpu_ring_funcs jpeg_v1_0_decode_ring_vm_funcs = {
555 	.type = AMDGPU_RING_TYPE_VCN_JPEG,
556 	.align_mask = 0xf,
557 	.nop = PACKET0(0x81ff, 0),
558 	.support_64bit_ptrs = false,
559 	.no_user_fence = true,
560 	.extra_dw = 64,
561 	.get_rptr = jpeg_v1_0_decode_ring_get_rptr,
562 	.get_wptr = jpeg_v1_0_decode_ring_get_wptr,
563 	.set_wptr = jpeg_v1_0_decode_ring_set_wptr,
564 	.parse_cs = jpeg_v1_dec_ring_parse_cs,
565 	.emit_frame_size =
566 		6 + 6 + /* hdp invalidate / flush */
567 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
568 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
569 		8 + /* jpeg_v1_0_decode_ring_emit_vm_flush */
570 		26 + 26 + /* jpeg_v1_0_decode_ring_emit_fence x2 vm fence */
571 		6,
572 	.emit_ib_size = 22, /* jpeg_v1_0_decode_ring_emit_ib */
573 	.emit_ib = jpeg_v1_0_decode_ring_emit_ib,
574 	.emit_fence = jpeg_v1_0_decode_ring_emit_fence,
575 	.emit_vm_flush = jpeg_v1_0_decode_ring_emit_vm_flush,
576 	.test_ring = amdgpu_jpeg_dec_ring_test_ring,
577 	.test_ib = amdgpu_jpeg_dec_ring_test_ib,
578 	.insert_nop = jpeg_v1_0_decode_ring_nop,
579 	.insert_start = jpeg_v1_0_decode_ring_insert_start,
580 	.insert_end = jpeg_v1_0_decode_ring_insert_end,
581 	.pad_ib = amdgpu_ring_generic_pad_ib,
582 	.begin_use = jpeg_v1_0_ring_begin_use,
583 	.end_use = vcn_v1_0_ring_end_use,
584 	.emit_wreg = jpeg_v1_0_decode_ring_emit_wreg,
585 	.emit_reg_wait = jpeg_v1_0_decode_ring_emit_reg_wait,
586 	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
587 };
588 
jpeg_v1_0_set_dec_ring_funcs(struct amdgpu_device * adev)589 static void jpeg_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev)
590 {
591 	adev->jpeg.inst->ring_dec->funcs = &jpeg_v1_0_decode_ring_vm_funcs;
592 	DRM_INFO("JPEG decode is enabled in VM mode\n");
593 }
594 
595 static const struct amdgpu_irq_src_funcs jpeg_v1_0_irq_funcs = {
596 	.set = jpeg_v1_0_set_interrupt_state,
597 	.process = jpeg_v1_0_process_interrupt,
598 };
599 
jpeg_v1_0_set_irq_funcs(struct amdgpu_device * adev)600 static void jpeg_v1_0_set_irq_funcs(struct amdgpu_device *adev)
601 {
602 	adev->jpeg.inst->irq.funcs = &jpeg_v1_0_irq_funcs;
603 }
604 
jpeg_v1_0_ring_begin_use(struct amdgpu_ring * ring)605 static void jpeg_v1_0_ring_begin_use(struct amdgpu_ring *ring)
606 {
607 	struct	amdgpu_device *adev = ring->adev;
608 	bool	set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work);
609 	int		cnt = 0;
610 
611 	mutex_lock(&adev->vcn.vcn1_jpeg1_workaround);
612 
613 	if (amdgpu_fence_wait_empty(&adev->vcn.inst->ring_dec))
614 		DRM_ERROR("JPEG dec: vcn dec ring may not be empty\n");
615 
616 	for (cnt = 0; cnt < adev->vcn.num_enc_rings; cnt++) {
617 		if (amdgpu_fence_wait_empty(&adev->vcn.inst->ring_enc[cnt]))
618 			DRM_ERROR("JPEG dec: vcn enc ring[%d] may not be empty\n", cnt);
619 	}
620 
621 	vcn_v1_0_set_pg_for_begin_use(ring, set_clocks);
622 }
623 
624 /**
625  * jpeg_v1_dec_ring_parse_cs - command submission parser
626  *
627  * @parser: Command submission parser context
628  * @job: the job to parse
629  * @ib: the IB to parse
630  *
631  * Parse the command stream, return -EINVAL for invalid packet,
632  * 0 otherwise
633  */
jpeg_v1_dec_ring_parse_cs(struct amdgpu_cs_parser * parser,struct amdgpu_job * job,struct amdgpu_ib * ib)634 static int jpeg_v1_dec_ring_parse_cs(struct amdgpu_cs_parser *parser,
635 				     struct amdgpu_job *job,
636 				     struct amdgpu_ib *ib)
637 {
638 	u32 i, reg, res, cond, type;
639 	int ret = 0;
640 	struct amdgpu_device *adev = parser->adev;
641 
642 	for (i = 0; i < ib->length_dw ; i += 2) {
643 		reg  = CP_PACKETJ_GET_REG(ib->ptr[i]);
644 		res  = CP_PACKETJ_GET_RES(ib->ptr[i]);
645 		cond = CP_PACKETJ_GET_COND(ib->ptr[i]);
646 		type = CP_PACKETJ_GET_TYPE(ib->ptr[i]);
647 
648 		if (res || cond != PACKETJ_CONDITION_CHECK0) /* only allow 0 for now */
649 			return -EINVAL;
650 
651 		if (reg >= JPEG_V1_REG_RANGE_START && reg <= JPEG_V1_REG_RANGE_END)
652 			continue;
653 
654 		switch (type) {
655 		case PACKETJ_TYPE0:
656 			if (reg != JPEG_V1_LMI_JPEG_WRITE_64BIT_BAR_HIGH &&
657 			    reg != JPEG_V1_LMI_JPEG_WRITE_64BIT_BAR_LOW &&
658 			    reg != JPEG_V1_LMI_JPEG_READ_64BIT_BAR_HIGH &&
659 			    reg != JPEG_V1_LMI_JPEG_READ_64BIT_BAR_LOW &&
660 			    reg != JPEG_V1_REG_CTX_INDEX &&
661 			    reg != JPEG_V1_REG_CTX_DATA) {
662 				ret = -EINVAL;
663 			}
664 			break;
665 		case PACKETJ_TYPE1:
666 			if (reg != JPEG_V1_REG_CTX_DATA)
667 				ret = -EINVAL;
668 			break;
669 		case PACKETJ_TYPE3:
670 			if (reg != JPEG_V1_REG_SOFT_RESET)
671 				ret = -EINVAL;
672 			break;
673 		case PACKETJ_TYPE6:
674 			if (ib->ptr[i] != CP_PACKETJ_NOP)
675 				ret = -EINVAL;
676 			break;
677 		default:
678 			ret = -EINVAL;
679 		}
680 
681 		if (ret) {
682 			dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]);
683 			break;
684 		}
685 	}
686 
687 	return ret;
688 }
689