1 /* $NetBSD: amdgpu_uvd.c,v 1.9 2021/12/19 12:21:29 riastradh Exp $ */
2
3 /*
4 * Copyright 2011 Advanced Micro Devices, Inc.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * The above copyright notice and this permission notice (including the
24 * next paragraph) shall be included in all copies or substantial portions
25 * of the Software.
26 *
27 */
28 /*
29 * Authors:
30 * Christian König <deathsimple@vodafone.de>
31 */
32
33 #include <sys/cdefs.h>
34 __KERNEL_RCSID(0, "$NetBSD: amdgpu_uvd.c,v 1.9 2021/12/19 12:21:29 riastradh Exp $");
35
36 #include <linux/firmware.h>
37 #include <linux/module.h>
38
39 #include <drm/drm.h>
40
41 #include "amdgpu.h"
42 #include "amdgpu_pm.h"
43 #include "amdgpu_uvd.h"
44 #include "cikd.h"
45 #include "uvd/uvd_4_2_d.h"
46
47 #include "amdgpu_ras.h"
48 #include <linux/nbsd-namespace.h>
49
50 /* 1 second timeout */
51 #define UVD_IDLE_TIMEOUT msecs_to_jiffies(1000)
52
53 /* Firmware versions for VI */
54 #define FW_1_65_10 ((1 << 24) | (65 << 16) | (10 << 8))
55 #define FW_1_87_11 ((1 << 24) | (87 << 16) | (11 << 8))
56 #define FW_1_87_12 ((1 << 24) | (87 << 16) | (12 << 8))
57 #define FW_1_37_15 ((1 << 24) | (37 << 16) | (15 << 8))
58
59 /* Polaris10/11 firmware version */
60 #define FW_1_66_16 ((1 << 24) | (66 << 16) | (16 << 8))
61
62 /* Firmware Names */
63 #ifdef CONFIG_DRM_AMDGPU_CIK
64 #define FIRMWARE_BONAIRE "amdgpu/bonaire_uvd.bin"
65 #define FIRMWARE_KABINI "amdgpu/kabini_uvd.bin"
66 #define FIRMWARE_KAVERI "amdgpu/kaveri_uvd.bin"
67 #define FIRMWARE_HAWAII "amdgpu/hawaii_uvd.bin"
68 #define FIRMWARE_MULLINS "amdgpu/mullins_uvd.bin"
69 #endif
70 #define FIRMWARE_TONGA "amdgpu/tonga_uvd.bin"
71 #define FIRMWARE_CARRIZO "amdgpu/carrizo_uvd.bin"
72 #define FIRMWARE_FIJI "amdgpu/fiji_uvd.bin"
73 #define FIRMWARE_STONEY "amdgpu/stoney_uvd.bin"
74 #define FIRMWARE_POLARIS10 "amdgpu/polaris10_uvd.bin"
75 #define FIRMWARE_POLARIS11 "amdgpu/polaris11_uvd.bin"
76 #define FIRMWARE_POLARIS12 "amdgpu/polaris12_uvd.bin"
77 #define FIRMWARE_VEGAM "amdgpu/vegam_uvd.bin"
78
79 #define FIRMWARE_VEGA10 "amdgpu/vega10_uvd.bin"
80 #define FIRMWARE_VEGA12 "amdgpu/vega12_uvd.bin"
81 #define FIRMWARE_VEGA20 "amdgpu/vega20_uvd.bin"
82
83 /* These are common relative offsets for all asics, from uvd_7_0_offset.h, */
84 #define UVD_GPCOM_VCPU_CMD 0x03c3
85 #define UVD_GPCOM_VCPU_DATA0 0x03c4
86 #define UVD_GPCOM_VCPU_DATA1 0x03c5
87 #define UVD_NO_OP 0x03ff
88 #define UVD_BASE_SI 0x3800
89
90 /**
91 * amdgpu_uvd_cs_ctx - Command submission parser context
92 *
93 * Used for emulating virtual memory support on UVD 4.2.
94 */
95 struct amdgpu_uvd_cs_ctx {
96 struct amdgpu_cs_parser *parser;
97 unsigned reg, count;
98 unsigned data0, data1;
99 unsigned idx;
100 unsigned ib_idx;
101
102 /* does the IB has a msg command */
103 bool has_msg_cmd;
104
105 /* minimum buffer sizes */
106 unsigned *buf_sizes;
107 };
108
109 #ifdef CONFIG_DRM_AMDGPU_CIK
110 MODULE_FIRMWARE(FIRMWARE_BONAIRE);
111 MODULE_FIRMWARE(FIRMWARE_KABINI);
112 MODULE_FIRMWARE(FIRMWARE_KAVERI);
113 MODULE_FIRMWARE(FIRMWARE_HAWAII);
114 MODULE_FIRMWARE(FIRMWARE_MULLINS);
115 #endif
116 MODULE_FIRMWARE(FIRMWARE_TONGA);
117 MODULE_FIRMWARE(FIRMWARE_CARRIZO);
118 MODULE_FIRMWARE(FIRMWARE_FIJI);
119 MODULE_FIRMWARE(FIRMWARE_STONEY);
120 MODULE_FIRMWARE(FIRMWARE_POLARIS10);
121 MODULE_FIRMWARE(FIRMWARE_POLARIS11);
122 MODULE_FIRMWARE(FIRMWARE_POLARIS12);
123 MODULE_FIRMWARE(FIRMWARE_VEGAM);
124
125 MODULE_FIRMWARE(FIRMWARE_VEGA10);
126 MODULE_FIRMWARE(FIRMWARE_VEGA12);
127 MODULE_FIRMWARE(FIRMWARE_VEGA20);
128
129 static void amdgpu_uvd_idle_work_handler(struct work_struct *work);
130
amdgpu_uvd_sw_init(struct amdgpu_device * adev)131 int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
132 {
133 unsigned long bo_size;
134 const char *fw_name;
135 const struct common_firmware_header *hdr;
136 unsigned family_id;
137 int i, j, r;
138
139 INIT_DELAYED_WORK(&adev->uvd.idle_work, amdgpu_uvd_idle_work_handler);
140
141 switch (adev->asic_type) {
142 #ifdef CONFIG_DRM_AMDGPU_CIK
143 case CHIP_BONAIRE:
144 fw_name = FIRMWARE_BONAIRE;
145 break;
146 case CHIP_KABINI:
147 fw_name = FIRMWARE_KABINI;
148 break;
149 case CHIP_KAVERI:
150 fw_name = FIRMWARE_KAVERI;
151 break;
152 case CHIP_HAWAII:
153 fw_name = FIRMWARE_HAWAII;
154 break;
155 case CHIP_MULLINS:
156 fw_name = FIRMWARE_MULLINS;
157 break;
158 #endif
159 case CHIP_TONGA:
160 fw_name = FIRMWARE_TONGA;
161 break;
162 case CHIP_FIJI:
163 fw_name = FIRMWARE_FIJI;
164 break;
165 case CHIP_CARRIZO:
166 fw_name = FIRMWARE_CARRIZO;
167 break;
168 case CHIP_STONEY:
169 fw_name = FIRMWARE_STONEY;
170 break;
171 case CHIP_POLARIS10:
172 fw_name = FIRMWARE_POLARIS10;
173 break;
174 case CHIP_POLARIS11:
175 fw_name = FIRMWARE_POLARIS11;
176 break;
177 case CHIP_POLARIS12:
178 fw_name = FIRMWARE_POLARIS12;
179 break;
180 case CHIP_VEGA10:
181 fw_name = FIRMWARE_VEGA10;
182 break;
183 case CHIP_VEGA12:
184 fw_name = FIRMWARE_VEGA12;
185 break;
186 case CHIP_VEGAM:
187 fw_name = FIRMWARE_VEGAM;
188 break;
189 case CHIP_VEGA20:
190 fw_name = FIRMWARE_VEGA20;
191 break;
192 default:
193 return -EINVAL;
194 }
195
196 r = request_firmware(&adev->uvd.fw, fw_name, adev->dev);
197 if (r) {
198 dev_err(adev->dev, "amdgpu_uvd: Can't load firmware \"%s\"\n",
199 fw_name);
200 return r;
201 }
202
203 r = amdgpu_ucode_validate(adev->uvd.fw);
204 if (r) {
205 dev_err(adev->dev, "amdgpu_uvd: Can't validate firmware \"%s\"\n",
206 fw_name);
207 release_firmware(adev->uvd.fw);
208 adev->uvd.fw = NULL;
209 return r;
210 }
211
212 /* Set the default UVD handles that the firmware can handle */
213 adev->uvd.max_handles = AMDGPU_DEFAULT_UVD_HANDLES;
214
215 hdr = (const struct common_firmware_header *)adev->uvd.fw->data;
216 family_id = le32_to_cpu(hdr->ucode_version) & 0xff;
217
218 if (adev->asic_type < CHIP_VEGA20) {
219 unsigned version_major, version_minor;
220
221 version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff;
222 version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff;
223 DRM_INFO("Found UVD firmware Version: %hu.%hu Family ID: %hu\n",
224 version_major, version_minor, family_id);
225
226 /*
227 * Limit the number of UVD handles depending on microcode major
228 * and minor versions. The firmware version which has 40 UVD
229 * instances support is 1.80. So all subsequent versions should
230 * also have the same support.
231 */
232 if ((version_major > 0x01) ||
233 ((version_major == 0x01) && (version_minor >= 0x50)))
234 adev->uvd.max_handles = AMDGPU_MAX_UVD_HANDLES;
235
236 adev->uvd.fw_version = ((version_major << 24) | (version_minor << 16) |
237 (family_id << 8));
238
239 if ((adev->asic_type == CHIP_POLARIS10 ||
240 adev->asic_type == CHIP_POLARIS11) &&
241 (adev->uvd.fw_version < FW_1_66_16))
242 DRM_ERROR("POLARIS10/11 UVD firmware version %hu.%hu is too old.\n",
243 version_major, version_minor);
244 } else {
245 unsigned int enc_major, enc_minor, dec_minor;
246
247 dec_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff;
248 enc_minor = (le32_to_cpu(hdr->ucode_version) >> 24) & 0x3f;
249 enc_major = (le32_to_cpu(hdr->ucode_version) >> 30) & 0x3;
250 DRM_INFO("Found UVD firmware ENC: %hu.%hu DEC: .%hu Family ID: %hu\n",
251 enc_major, enc_minor, dec_minor, family_id);
252
253 adev->uvd.max_handles = AMDGPU_MAX_UVD_HANDLES;
254
255 adev->uvd.fw_version = le32_to_cpu(hdr->ucode_version);
256 }
257
258 bo_size = AMDGPU_UVD_STACK_SIZE + AMDGPU_UVD_HEAP_SIZE
259 + AMDGPU_UVD_SESSION_SIZE * adev->uvd.max_handles;
260 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
261 bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
262
263 for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
264 if (adev->uvd.harvest_config & (1 << j))
265 continue;
266 r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
267 AMDGPU_GEM_DOMAIN_VRAM, &adev->uvd.inst[j].vcpu_bo,
268 &adev->uvd.inst[j].gpu_addr, &adev->uvd.inst[j].cpu_addr);
269 if (r) {
270 dev_err(adev->dev, "(%d) failed to allocate UVD bo\n", r);
271 return r;
272 }
273 }
274
275 for (i = 0; i < adev->uvd.max_handles; ++i) {
276 atomic_set(&adev->uvd.handles[i], 0);
277 adev->uvd.filp[i] = NULL;
278 }
279
280 /* from uvd v5.0 HW addressing capacity increased to 64 bits */
281 if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0))
282 adev->uvd.address_64_bit = true;
283
284 switch (adev->asic_type) {
285 case CHIP_TONGA:
286 adev->uvd.use_ctx_buf = adev->uvd.fw_version >= FW_1_65_10;
287 break;
288 case CHIP_CARRIZO:
289 adev->uvd.use_ctx_buf = adev->uvd.fw_version >= FW_1_87_11;
290 break;
291 case CHIP_FIJI:
292 adev->uvd.use_ctx_buf = adev->uvd.fw_version >= FW_1_87_12;
293 break;
294 case CHIP_STONEY:
295 adev->uvd.use_ctx_buf = adev->uvd.fw_version >= FW_1_37_15;
296 break;
297 default:
298 adev->uvd.use_ctx_buf = adev->asic_type >= CHIP_POLARIS10;
299 }
300
301 return 0;
302 }
303
amdgpu_uvd_sw_fini(struct amdgpu_device * adev)304 int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
305 {
306 int i, j;
307
308 cancel_delayed_work_sync(&adev->uvd.idle_work);
309 drm_sched_entity_destroy(&adev->uvd.entity);
310
311 for (j = 0; j < adev->uvd.num_uvd_inst; ++j) {
312 if (adev->uvd.harvest_config & (1 << j))
313 continue;
314 kvfree(adev->uvd.inst[j].saved_bo);
315
316 amdgpu_bo_free_kernel(&adev->uvd.inst[j].vcpu_bo,
317 &adev->uvd.inst[j].gpu_addr,
318 (void **)&adev->uvd.inst[j].cpu_addr);
319
320 amdgpu_ring_fini(&adev->uvd.inst[j].ring);
321
322 for (i = 0; i < AMDGPU_MAX_UVD_ENC_RINGS; ++i)
323 amdgpu_ring_fini(&adev->uvd.inst[j].ring_enc[i]);
324 }
325 release_firmware(adev->uvd.fw);
326
327 return 0;
328 }
329
330 /**
331 * amdgpu_uvd_entity_init - init entity
332 *
333 * @adev: amdgpu_device pointer
334 *
335 */
amdgpu_uvd_entity_init(struct amdgpu_device * adev)336 int amdgpu_uvd_entity_init(struct amdgpu_device *adev)
337 {
338 struct amdgpu_ring *ring;
339 struct drm_gpu_scheduler *sched;
340 int r;
341
342 ring = &adev->uvd.inst[0].ring;
343 sched = &ring->sched;
344 r = drm_sched_entity_init(&adev->uvd.entity, DRM_SCHED_PRIORITY_NORMAL,
345 &sched, 1, NULL);
346 if (r) {
347 DRM_ERROR("Failed setting up UVD kernel entity.\n");
348 return r;
349 }
350
351 return 0;
352 }
353
amdgpu_uvd_suspend(struct amdgpu_device * adev)354 int amdgpu_uvd_suspend(struct amdgpu_device *adev)
355 {
356 unsigned size;
357 void *ptr;
358 int i, j;
359 bool in_ras_intr = amdgpu_ras_intr_triggered();
360
361 cancel_delayed_work_sync(&adev->uvd.idle_work);
362
363 /* only valid for physical mode */
364 if (adev->asic_type < CHIP_POLARIS10) {
365 for (i = 0; i < adev->uvd.max_handles; ++i)
366 if (atomic_read(&adev->uvd.handles[i]))
367 break;
368
369 if (i == adev->uvd.max_handles)
370 return 0;
371 }
372
373 for (j = 0; j < adev->uvd.num_uvd_inst; ++j) {
374 if (adev->uvd.harvest_config & (1 << j))
375 continue;
376 if (adev->uvd.inst[j].vcpu_bo == NULL)
377 continue;
378
379 size = amdgpu_bo_size(adev->uvd.inst[j].vcpu_bo);
380 ptr = adev->uvd.inst[j].cpu_addr;
381
382 adev->uvd.inst[j].saved_bo = kvmalloc(size, GFP_KERNEL);
383 if (!adev->uvd.inst[j].saved_bo)
384 return -ENOMEM;
385
386 /* re-write 0 since err_event_athub will corrupt VCPU buffer */
387 if (in_ras_intr)
388 memset(adev->uvd.inst[j].saved_bo, 0, size);
389 else
390 memcpy_fromio(adev->uvd.inst[j].saved_bo, ptr, size);
391 }
392
393 if (in_ras_intr)
394 DRM_WARN("UVD VCPU state may lost due to RAS ERREVENT_ATHUB_INTERRUPT\n");
395
396 return 0;
397 }
398
amdgpu_uvd_resume(struct amdgpu_device * adev)399 int amdgpu_uvd_resume(struct amdgpu_device *adev)
400 {
401 unsigned size;
402 void *ptr;
403 int i;
404
405 for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
406 if (adev->uvd.harvest_config & (1 << i))
407 continue;
408 if (adev->uvd.inst[i].vcpu_bo == NULL)
409 return -EINVAL;
410
411 size = amdgpu_bo_size(adev->uvd.inst[i].vcpu_bo);
412 ptr = adev->uvd.inst[i].cpu_addr;
413
414 if (adev->uvd.inst[i].saved_bo != NULL) {
415 memcpy_toio(ptr, adev->uvd.inst[i].saved_bo, size);
416 kvfree(adev->uvd.inst[i].saved_bo);
417 adev->uvd.inst[i].saved_bo = NULL;
418 } else {
419 const struct common_firmware_header *hdr;
420 unsigned offset;
421
422 hdr = (const struct common_firmware_header *)adev->uvd.fw->data;
423 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
424 offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
425 memcpy_toio(adev->uvd.inst[i].cpu_addr, adev->uvd.fw->data + offset,
426 le32_to_cpu(hdr->ucode_size_bytes));
427 size -= le32_to_cpu(hdr->ucode_size_bytes);
428 ptr += le32_to_cpu(hdr->ucode_size_bytes);
429 }
430 memset_io(ptr, 0, size);
431 /* to restore uvd fence seq */
432 amdgpu_fence_driver_force_completion(&adev->uvd.inst[i].ring);
433 }
434 }
435 return 0;
436 }
437
amdgpu_uvd_free_handles(struct amdgpu_device * adev,struct drm_file * filp)438 void amdgpu_uvd_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
439 {
440 struct amdgpu_ring *ring = &adev->uvd.inst[0].ring;
441 int i, r;
442
443 for (i = 0; i < adev->uvd.max_handles; ++i) {
444 uint32_t handle = atomic_read(&adev->uvd.handles[i]);
445
446 if (handle != 0 && adev->uvd.filp[i] == filp) {
447 struct dma_fence *fence;
448
449 r = amdgpu_uvd_get_destroy_msg(ring, handle, false,
450 &fence);
451 if (r) {
452 DRM_ERROR("Error destroying UVD %d!\n", r);
453 continue;
454 }
455
456 dma_fence_wait(fence, false);
457 dma_fence_put(fence);
458
459 adev->uvd.filp[i] = NULL;
460 atomic_set(&adev->uvd.handles[i], 0);
461 }
462 }
463 }
464
amdgpu_uvd_force_into_uvd_segment(struct amdgpu_bo * abo)465 static void amdgpu_uvd_force_into_uvd_segment(struct amdgpu_bo *abo)
466 {
467 int i;
468 for (i = 0; i < abo->placement.num_placement; ++i) {
469 abo->placements[i].fpfn = 0 >> PAGE_SHIFT;
470 abo->placements[i].lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT;
471 }
472 }
473
amdgpu_uvd_get_addr_from_ctx(struct amdgpu_uvd_cs_ctx * ctx)474 static u64 amdgpu_uvd_get_addr_from_ctx(struct amdgpu_uvd_cs_ctx *ctx)
475 {
476 uint32_t lo, hi;
477 uint64_t addr;
478
479 lo = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->data0);
480 hi = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->data1);
481 addr = ((uint64_t)lo) | (((uint64_t)hi) << 32);
482
483 return addr;
484 }
485
486 /**
487 * amdgpu_uvd_cs_pass1 - first parsing round
488 *
489 * @ctx: UVD parser context
490 *
491 * Make sure UVD message and feedback buffers are in VRAM and
492 * nobody is violating an 256MB boundary.
493 */
amdgpu_uvd_cs_pass1(struct amdgpu_uvd_cs_ctx * ctx)494 static int amdgpu_uvd_cs_pass1(struct amdgpu_uvd_cs_ctx *ctx)
495 {
496 struct ttm_operation_ctx tctx = { false, false };
497 struct amdgpu_bo_va_mapping *mapping;
498 struct amdgpu_bo *bo;
499 uint32_t cmd;
500 uint64_t addr = amdgpu_uvd_get_addr_from_ctx(ctx);
501 int r = 0;
502
503 r = amdgpu_cs_find_mapping(ctx->parser, addr, &bo, &mapping);
504 if (r) {
505 DRM_ERROR("Can't find BO for addr 0x%08"PRIx64"\n", addr);
506 return r;
507 }
508
509 if (!ctx->parser->adev->uvd.address_64_bit) {
510 /* check if it's a message or feedback command */
511 cmd = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->idx) >> 1;
512 if (cmd == 0x0 || cmd == 0x3) {
513 /* yes, force it into VRAM */
514 uint32_t domain = AMDGPU_GEM_DOMAIN_VRAM;
515 amdgpu_bo_placement_from_domain(bo, domain);
516 }
517 amdgpu_uvd_force_into_uvd_segment(bo);
518
519 r = ttm_bo_validate(&bo->tbo, &bo->placement, &tctx);
520 }
521
522 return r;
523 }
524
525 /**
526 * amdgpu_uvd_cs_msg_decode - handle UVD decode message
527 *
528 * @msg: pointer to message structure
529 * @buf_sizes: returned buffer sizes
530 *
531 * Peek into the decode message and calculate the necessary buffer sizes.
532 */
amdgpu_uvd_cs_msg_decode(struct amdgpu_device * adev,uint32_t * msg,unsigned buf_sizes[])533 static int amdgpu_uvd_cs_msg_decode(struct amdgpu_device *adev, uint32_t *msg,
534 unsigned buf_sizes[])
535 {
536 unsigned stream_type = msg[4];
537 unsigned width = msg[6];
538 unsigned height = msg[7];
539 unsigned dpb_size = msg[9];
540 unsigned pitch = msg[28];
541 unsigned level = msg[57];
542
543 unsigned width_in_mb = width / 16;
544 unsigned height_in_mb = ALIGN(height / 16, 2);
545 unsigned fs_in_mb = width_in_mb * height_in_mb;
546
547 unsigned image_size, tmp, min_dpb_size, num_dpb_buffer;
548 unsigned min_ctx_size = ~0;
549
550 image_size = width * height;
551 image_size += image_size / 2;
552 image_size = ALIGN(image_size, 1024);
553
554 switch (stream_type) {
555 case 0: /* H264 */
556 switch(level) {
557 case 30:
558 num_dpb_buffer = 8100 / fs_in_mb;
559 break;
560 case 31:
561 num_dpb_buffer = 18000 / fs_in_mb;
562 break;
563 case 32:
564 num_dpb_buffer = 20480 / fs_in_mb;
565 break;
566 case 41:
567 num_dpb_buffer = 32768 / fs_in_mb;
568 break;
569 case 42:
570 num_dpb_buffer = 34816 / fs_in_mb;
571 break;
572 case 50:
573 num_dpb_buffer = 110400 / fs_in_mb;
574 break;
575 case 51:
576 num_dpb_buffer = 184320 / fs_in_mb;
577 break;
578 default:
579 num_dpb_buffer = 184320 / fs_in_mb;
580 break;
581 }
582 num_dpb_buffer++;
583 if (num_dpb_buffer > 17)
584 num_dpb_buffer = 17;
585
586 /* reference picture buffer */
587 min_dpb_size = image_size * num_dpb_buffer;
588
589 /* macroblock context buffer */
590 min_dpb_size += width_in_mb * height_in_mb * num_dpb_buffer * 192;
591
592 /* IT surface buffer */
593 min_dpb_size += width_in_mb * height_in_mb * 32;
594 break;
595
596 case 1: /* VC1 */
597
598 /* reference picture buffer */
599 min_dpb_size = image_size * 3;
600
601 /* CONTEXT_BUFFER */
602 min_dpb_size += width_in_mb * height_in_mb * 128;
603
604 /* IT surface buffer */
605 min_dpb_size += width_in_mb * 64;
606
607 /* DB surface buffer */
608 min_dpb_size += width_in_mb * 128;
609
610 /* BP */
611 tmp = max(width_in_mb, height_in_mb);
612 min_dpb_size += ALIGN(tmp * 7 * 16, 64);
613 break;
614
615 case 3: /* MPEG2 */
616
617 /* reference picture buffer */
618 min_dpb_size = image_size * 3;
619 break;
620
621 case 4: /* MPEG4 */
622
623 /* reference picture buffer */
624 min_dpb_size = image_size * 3;
625
626 /* CM */
627 min_dpb_size += width_in_mb * height_in_mb * 64;
628
629 /* IT surface buffer */
630 min_dpb_size += ALIGN(width_in_mb * height_in_mb * 32, 64);
631 break;
632
633 case 7: /* H264 Perf */
634 switch(level) {
635 case 30:
636 num_dpb_buffer = 8100 / fs_in_mb;
637 break;
638 case 31:
639 num_dpb_buffer = 18000 / fs_in_mb;
640 break;
641 case 32:
642 num_dpb_buffer = 20480 / fs_in_mb;
643 break;
644 case 41:
645 num_dpb_buffer = 32768 / fs_in_mb;
646 break;
647 case 42:
648 num_dpb_buffer = 34816 / fs_in_mb;
649 break;
650 case 50:
651 num_dpb_buffer = 110400 / fs_in_mb;
652 break;
653 case 51:
654 num_dpb_buffer = 184320 / fs_in_mb;
655 break;
656 default:
657 num_dpb_buffer = 184320 / fs_in_mb;
658 break;
659 }
660 num_dpb_buffer++;
661 if (num_dpb_buffer > 17)
662 num_dpb_buffer = 17;
663
664 /* reference picture buffer */
665 min_dpb_size = image_size * num_dpb_buffer;
666
667 if (!adev->uvd.use_ctx_buf){
668 /* macroblock context buffer */
669 min_dpb_size +=
670 width_in_mb * height_in_mb * num_dpb_buffer * 192;
671
672 /* IT surface buffer */
673 min_dpb_size += width_in_mb * height_in_mb * 32;
674 } else {
675 /* macroblock context buffer */
676 min_ctx_size =
677 width_in_mb * height_in_mb * num_dpb_buffer * 192;
678 }
679 break;
680
681 case 8: /* MJPEG */
682 min_dpb_size = 0;
683 break;
684
685 case 16: /* H265 */
686 image_size = (ALIGN(width, 16) * ALIGN(height, 16) * 3) / 2;
687 image_size = ALIGN(image_size, 256);
688
689 num_dpb_buffer = (le32_to_cpu(msg[59]) & 0xff) + 2;
690 min_dpb_size = image_size * num_dpb_buffer;
691 min_ctx_size = ((width + 255) / 16) * ((height + 255) / 16)
692 * 16 * num_dpb_buffer + 52 * 1024;
693 break;
694
695 default:
696 DRM_ERROR("UVD codec not handled %d!\n", stream_type);
697 return -EINVAL;
698 }
699
700 if (width > pitch) {
701 DRM_ERROR("Invalid UVD decoding target pitch!\n");
702 return -EINVAL;
703 }
704
705 if (dpb_size < min_dpb_size) {
706 DRM_ERROR("Invalid dpb_size in UVD message (%d / %d)!\n",
707 dpb_size, min_dpb_size);
708 return -EINVAL;
709 }
710
711 buf_sizes[0x1] = dpb_size;
712 buf_sizes[0x2] = image_size;
713 buf_sizes[0x4] = min_ctx_size;
714 /* store image width to adjust nb memory pstate */
715 adev->uvd.decode_image_width = width;
716 return 0;
717 }
718
719 /**
720 * amdgpu_uvd_cs_msg - handle UVD message
721 *
722 * @ctx: UVD parser context
723 * @bo: buffer object containing the message
724 * @offset: offset into the buffer object
725 *
726 * Peek into the UVD message and extract the session id.
727 * Make sure that we don't open up to many sessions.
728 */
amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx * ctx,struct amdgpu_bo * bo,unsigned offset)729 static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,
730 struct amdgpu_bo *bo, unsigned offset)
731 {
732 struct amdgpu_device *adev = ctx->parser->adev;
733 int32_t *msg, msg_type, handle;
734 void *ptr;
735 long r;
736 int i;
737
738 if (offset & 0x3F) {
739 DRM_ERROR("UVD messages must be 64 byte aligned!\n");
740 return -EINVAL;
741 }
742
743 r = amdgpu_bo_kmap(bo, &ptr);
744 if (r) {
745 DRM_ERROR("Failed mapping the UVD) message (%ld)!\n", r);
746 return r;
747 }
748
749 msg = ptr + offset;
750
751 msg_type = msg[1];
752 handle = msg[2];
753
754 if (handle == 0) {
755 DRM_ERROR("Invalid UVD handle!\n");
756 return -EINVAL;
757 }
758
759 switch (msg_type) {
760 case 0:
761 /* it's a create msg, calc image size (width * height) */
762 amdgpu_bo_kunmap(bo);
763
764 /* try to alloc a new handle */
765 for (i = 0; i < adev->uvd.max_handles; ++i) {
766 if (atomic_read(&adev->uvd.handles[i]) == handle) {
767 DRM_ERROR(")Handle 0x%x already in use!\n",
768 handle);
769 return -EINVAL;
770 }
771
772 if (!atomic_cmpxchg(&adev->uvd.handles[i], 0, handle)) {
773 adev->uvd.filp[i] = ctx->parser->filp;
774 return 0;
775 }
776 }
777
778 DRM_ERROR("No more free UVD handles!\n");
779 return -ENOSPC;
780
781 case 1:
782 /* it's a decode msg, calc buffer sizes */
783 r = amdgpu_uvd_cs_msg_decode(adev, msg, ctx->buf_sizes);
784 amdgpu_bo_kunmap(bo);
785 if (r)
786 return r;
787
788 /* validate the handle */
789 for (i = 0; i < adev->uvd.max_handles; ++i) {
790 if (atomic_read(&adev->uvd.handles[i]) == handle) {
791 if (adev->uvd.filp[i] != ctx->parser->filp) {
792 DRM_ERROR("UVD handle collision detected!\n");
793 return -EINVAL;
794 }
795 return 0;
796 }
797 }
798
799 DRM_ERROR("Invalid UVD handle 0x%x!\n", handle);
800 return -ENOENT;
801
802 case 2:
803 /* it's a destroy msg, free the handle */
804 for (i = 0; i < adev->uvd.max_handles; ++i)
805 atomic_cmpxchg(&adev->uvd.handles[i], handle, 0);
806 amdgpu_bo_kunmap(bo);
807 return 0;
808
809 default:
810 DRM_ERROR("Illegal UVD message type (%d)!\n", msg_type);
811 return -EINVAL;
812 }
813 BUG();
814 return -EINVAL;
815 }
816
817 /**
818 * amdgpu_uvd_cs_pass2 - second parsing round
819 *
820 * @ctx: UVD parser context
821 *
822 * Patch buffer addresses, make sure buffer sizes are correct.
823 */
amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx * ctx)824 static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx)
825 {
826 struct amdgpu_bo_va_mapping *mapping;
827 struct amdgpu_bo *bo;
828 uint32_t cmd;
829 uint64_t start, end;
830 uint64_t addr = amdgpu_uvd_get_addr_from_ctx(ctx);
831 int r;
832
833 r = amdgpu_cs_find_mapping(ctx->parser, addr, &bo, &mapping);
834 if (r) {
835 DRM_ERROR("Can't find BO for addr 0x%08"PRIx64"\n", addr);
836 return r;
837 }
838
839 start = amdgpu_bo_gpu_offset(bo);
840
841 end = (mapping->last + 1 - mapping->start);
842 end = end * AMDGPU_GPU_PAGE_SIZE + start;
843
844 addr -= mapping->start * AMDGPU_GPU_PAGE_SIZE;
845 start += addr;
846
847 amdgpu_set_ib_value(ctx->parser, ctx->ib_idx, ctx->data0,
848 lower_32_bits(start));
849 amdgpu_set_ib_value(ctx->parser, ctx->ib_idx, ctx->data1,
850 upper_32_bits(start));
851
852 cmd = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->idx) >> 1;
853 if (cmd < 0x4) {
854 if ((end - start) < ctx->buf_sizes[cmd]) {
855 DRM_ERROR("buffer (%d) to small (%d / %d)!\n", cmd,
856 (unsigned)(end - start),
857 ctx->buf_sizes[cmd]);
858 return -EINVAL;
859 }
860
861 } else if (cmd == 0x206) {
862 if ((end - start) < ctx->buf_sizes[4]) {
863 DRM_ERROR("buffer (%d) to small (%d / %d)!\n", cmd,
864 (unsigned)(end - start),
865 ctx->buf_sizes[4]);
866 return -EINVAL;
867 }
868 } else if ((cmd != 0x100) && (cmd != 0x204)) {
869 DRM_ERROR("invalid UVD command %X!\n", cmd);
870 return -EINVAL;
871 }
872
873 if (!ctx->parser->adev->uvd.address_64_bit) {
874 if ((start >> 28) != ((end - 1) >> 28)) {
875 DRM_ERROR("reloc %"PRIX64"-%"PRIX64" crossing 256MB boundary!\n",
876 start, end);
877 return -EINVAL;
878 }
879
880 if ((cmd == 0 || cmd == 0x3) &&
881 (start >> 28) != (ctx->parser->adev->uvd.inst->gpu_addr >> 28)) {
882 DRM_ERROR("msg/fb buffer %"PRIX64"-%"PRIX64" out of 256MB segment!\n",
883 start, end);
884 return -EINVAL;
885 }
886 }
887
888 if (cmd == 0) {
889 ctx->has_msg_cmd = true;
890 r = amdgpu_uvd_cs_msg(ctx, bo, addr);
891 if (r)
892 return r;
893 } else if (!ctx->has_msg_cmd) {
894 DRM_ERROR("Message needed before other commands are send!\n");
895 return -EINVAL;
896 }
897
898 return 0;
899 }
900
901 /**
902 * amdgpu_uvd_cs_reg - parse register writes
903 *
904 * @ctx: UVD parser context
905 * @cb: callback function
906 *
907 * Parse the register writes, call cb on each complete command.
908 */
amdgpu_uvd_cs_reg(struct amdgpu_uvd_cs_ctx * ctx,int (* cb)(struct amdgpu_uvd_cs_ctx * ctx))909 static int amdgpu_uvd_cs_reg(struct amdgpu_uvd_cs_ctx *ctx,
910 int (*cb)(struct amdgpu_uvd_cs_ctx *ctx))
911 {
912 struct amdgpu_ib *ib = &ctx->parser->job->ibs[ctx->ib_idx];
913 int i, r;
914
915 ctx->idx++;
916 for (i = 0; i <= ctx->count; ++i) {
917 unsigned reg = ctx->reg + i;
918
919 if (ctx->idx >= ib->length_dw) {
920 DRM_ERROR("Register command after end of CS!\n");
921 return -EINVAL;
922 }
923
924 switch (reg) {
925 case mmUVD_GPCOM_VCPU_DATA0:
926 ctx->data0 = ctx->idx;
927 break;
928 case mmUVD_GPCOM_VCPU_DATA1:
929 ctx->data1 = ctx->idx;
930 break;
931 case mmUVD_GPCOM_VCPU_CMD:
932 r = cb(ctx);
933 if (r)
934 return r;
935 break;
936 case mmUVD_ENGINE_CNTL:
937 case mmUVD_NO_OP:
938 break;
939 default:
940 DRM_ERROR("Invalid reg 0x%X!\n", reg);
941 return -EINVAL;
942 }
943 ctx->idx++;
944 }
945 return 0;
946 }
947
948 /**
949 * amdgpu_uvd_cs_packets - parse UVD packets
950 *
951 * @ctx: UVD parser context
952 * @cb: callback function
953 *
954 * Parse the command stream packets.
955 */
amdgpu_uvd_cs_packets(struct amdgpu_uvd_cs_ctx * ctx,int (* cb)(struct amdgpu_uvd_cs_ctx * ctx))956 static int amdgpu_uvd_cs_packets(struct amdgpu_uvd_cs_ctx *ctx,
957 int (*cb)(struct amdgpu_uvd_cs_ctx *ctx))
958 {
959 struct amdgpu_ib *ib = &ctx->parser->job->ibs[ctx->ib_idx];
960 int r;
961
962 for (ctx->idx = 0 ; ctx->idx < ib->length_dw; ) {
963 uint32_t cmd = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->idx);
964 unsigned type = CP_PACKET_GET_TYPE(cmd);
965 switch (type) {
966 case PACKET_TYPE0:
967 ctx->reg = CP_PACKET0_GET_REG(cmd);
968 ctx->count = CP_PACKET_GET_COUNT(cmd);
969 r = amdgpu_uvd_cs_reg(ctx, cb);
970 if (r)
971 return r;
972 break;
973 case PACKET_TYPE2:
974 ++ctx->idx;
975 break;
976 default:
977 DRM_ERROR("Unknown packet type %d !\n", type);
978 return -EINVAL;
979 }
980 }
981 return 0;
982 }
983
984 /**
985 * amdgpu_uvd_ring_parse_cs - UVD command submission parser
986 *
987 * @parser: Command submission parser context
988 *
989 * Parse the command stream, patch in addresses as necessary.
990 */
amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser * parser,uint32_t ib_idx)991 int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx)
992 {
993 struct amdgpu_uvd_cs_ctx ctx = {};
994 unsigned buf_sizes[] = {
995 [0x00000000] = 2048,
996 [0x00000001] = 0xFFFFFFFF,
997 [0x00000002] = 0xFFFFFFFF,
998 [0x00000003] = 2048,
999 [0x00000004] = 0xFFFFFFFF,
1000 };
1001 struct amdgpu_ib *ib = &parser->job->ibs[ib_idx];
1002 int r;
1003
1004 parser->job->vm = NULL;
1005 ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo);
1006
1007 if (ib->length_dw % 16) {
1008 DRM_ERROR("UVD IB length (%d) not 16 dwords aligned!\n",
1009 ib->length_dw);
1010 return -EINVAL;
1011 }
1012
1013 ctx.parser = parser;
1014 ctx.buf_sizes = buf_sizes;
1015 ctx.ib_idx = ib_idx;
1016
1017 /* first round only required on chips without UVD 64 bit address support */
1018 if (!parser->adev->uvd.address_64_bit) {
1019 /* first round, make sure the buffers are actually in the UVD segment */
1020 r = amdgpu_uvd_cs_packets(&ctx, amdgpu_uvd_cs_pass1);
1021 if (r)
1022 return r;
1023 }
1024
1025 /* second round, patch buffer addresses into the command stream */
1026 r = amdgpu_uvd_cs_packets(&ctx, amdgpu_uvd_cs_pass2);
1027 if (r)
1028 return r;
1029
1030 if (!ctx.has_msg_cmd) {
1031 DRM_ERROR("UVD-IBs need a msg command!\n");
1032 return -EINVAL;
1033 }
1034
1035 return 0;
1036 }
1037
amdgpu_uvd_send_msg(struct amdgpu_ring * ring,struct amdgpu_bo * bo,bool direct,struct dma_fence ** fence)1038 static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
1039 bool direct, struct dma_fence **fence)
1040 {
1041 struct amdgpu_device *adev = ring->adev;
1042 struct dma_fence *f = NULL;
1043 struct amdgpu_job *job;
1044 struct amdgpu_ib *ib;
1045 uint32_t data[4];
1046 uint64_t addr;
1047 long r;
1048 int i;
1049 unsigned offset_idx = 0;
1050 unsigned offset[3] = { UVD_BASE_SI, 0, 0 };
1051
1052 amdgpu_bo_kunmap(bo);
1053 amdgpu_bo_unpin(bo);
1054
1055 if (!ring->adev->uvd.address_64_bit) {
1056 struct ttm_operation_ctx ctx = { true, false };
1057
1058 amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM);
1059 amdgpu_uvd_force_into_uvd_segment(bo);
1060 r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
1061 if (r)
1062 goto err;
1063 }
1064
1065 r = amdgpu_job_alloc_with_ib(adev, 64, &job);
1066 if (r)
1067 goto err;
1068
1069 if (adev->asic_type >= CHIP_VEGA10) {
1070 offset_idx = 1 + ring->me;
1071 offset[1] = adev->reg_offset[UVD_HWIP][0][1];
1072 offset[2] = adev->reg_offset[UVD_HWIP][1][1];
1073 }
1074
1075 data[0] = PACKET0(offset[offset_idx] + UVD_GPCOM_VCPU_DATA0, 0);
1076 data[1] = PACKET0(offset[offset_idx] + UVD_GPCOM_VCPU_DATA1, 0);
1077 data[2] = PACKET0(offset[offset_idx] + UVD_GPCOM_VCPU_CMD, 0);
1078 data[3] = PACKET0(offset[offset_idx] + UVD_NO_OP, 0);
1079
1080 ib = &job->ibs[0];
1081 addr = amdgpu_bo_gpu_offset(bo);
1082 ib->ptr[0] = data[0];
1083 ib->ptr[1] = addr;
1084 ib->ptr[2] = data[1];
1085 ib->ptr[3] = addr >> 32;
1086 ib->ptr[4] = data[2];
1087 ib->ptr[5] = 0;
1088 for (i = 6; i < 16; i += 2) {
1089 ib->ptr[i] = data[3];
1090 ib->ptr[i+1] = 0;
1091 }
1092 ib->length_dw = 16;
1093
1094 if (direct) {
1095 r = dma_resv_wait_timeout_rcu(bo->tbo.base.resv,
1096 true, false,
1097 msecs_to_jiffies(10));
1098 if (r == 0)
1099 r = -ETIMEDOUT;
1100 if (r < 0)
1101 goto err_free;
1102
1103 r = amdgpu_job_submit_direct(job, ring, &f);
1104 if (r)
1105 goto err_free;
1106 } else {
1107 r = amdgpu_sync_resv(adev, &job->sync, bo->tbo.base.resv,
1108 AMDGPU_FENCE_OWNER_UNDEFINED, false);
1109 if (r)
1110 goto err_free;
1111
1112 r = amdgpu_job_submit(job, &adev->uvd.entity,
1113 AMDGPU_FENCE_OWNER_UNDEFINED, &f);
1114 if (r)
1115 goto err_free;
1116 }
1117
1118 amdgpu_bo_fence(bo, f, false);
1119 amdgpu_bo_unreserve(bo);
1120 amdgpu_bo_unref(&bo);
1121
1122 if (fence)
1123 *fence = dma_fence_get(f);
1124 dma_fence_put(f);
1125
1126 return 0;
1127
1128 err_free:
1129 amdgpu_job_free(job);
1130
1131 err:
1132 amdgpu_bo_unreserve(bo);
1133 amdgpu_bo_unref(&bo);
1134 return r;
1135 }
1136
1137 /* multiple fence commands without any stream commands in between can
1138 crash the vcpu so just try to emmit a dummy create/destroy msg to
1139 avoid this */
amdgpu_uvd_get_create_msg(struct amdgpu_ring * ring,uint32_t handle,struct dma_fence ** fence)1140 int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
1141 struct dma_fence **fence)
1142 {
1143 struct amdgpu_device *adev = ring->adev;
1144 struct amdgpu_bo *bo = NULL;
1145 uint32_t *msg;
1146 int r, i;
1147
1148 r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
1149 AMDGPU_GEM_DOMAIN_VRAM,
1150 &bo, NULL, (void **)&msg);
1151 if (r)
1152 return r;
1153
1154 /* stitch together an UVD create msg */
1155 msg[0] = cpu_to_le32(0x00000de4);
1156 msg[1] = cpu_to_le32(0x00000000);
1157 msg[2] = cpu_to_le32(handle);
1158 msg[3] = cpu_to_le32(0x00000000);
1159 msg[4] = cpu_to_le32(0x00000000);
1160 msg[5] = cpu_to_le32(0x00000000);
1161 msg[6] = cpu_to_le32(0x00000000);
1162 msg[7] = cpu_to_le32(0x00000780);
1163 msg[8] = cpu_to_le32(0x00000440);
1164 msg[9] = cpu_to_le32(0x00000000);
1165 msg[10] = cpu_to_le32(0x01b37000);
1166 for (i = 11; i < 1024; ++i)
1167 msg[i] = cpu_to_le32(0x0);
1168
1169 return amdgpu_uvd_send_msg(ring, bo, true, fence);
1170 }
1171
amdgpu_uvd_get_destroy_msg(struct amdgpu_ring * ring,uint32_t handle,bool direct,struct dma_fence ** fence)1172 int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
1173 bool direct, struct dma_fence **fence)
1174 {
1175 struct amdgpu_device *adev = ring->adev;
1176 struct amdgpu_bo *bo = NULL;
1177 uint32_t *msg;
1178 int r, i;
1179
1180 r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
1181 AMDGPU_GEM_DOMAIN_VRAM,
1182 &bo, NULL, (void **)&msg);
1183 if (r)
1184 return r;
1185
1186 /* stitch together an UVD destroy msg */
1187 msg[0] = cpu_to_le32(0x00000de4);
1188 msg[1] = cpu_to_le32(0x00000002);
1189 msg[2] = cpu_to_le32(handle);
1190 msg[3] = cpu_to_le32(0x00000000);
1191 for (i = 4; i < 1024; ++i)
1192 msg[i] = cpu_to_le32(0x0);
1193
1194 return amdgpu_uvd_send_msg(ring, bo, direct, fence);
1195 }
1196
amdgpu_uvd_idle_work_handler(struct work_struct * work)1197 static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
1198 {
1199 struct amdgpu_device *adev =
1200 container_of(work, struct amdgpu_device, uvd.idle_work.work);
1201 unsigned fences = 0, i, j;
1202
1203 for (i = 0; i < adev->uvd.num_uvd_inst; ++i) {
1204 if (adev->uvd.harvest_config & (1 << i))
1205 continue;
1206 fences += amdgpu_fence_count_emitted(&adev->uvd.inst[i].ring);
1207 for (j = 0; j < adev->uvd.num_enc_rings; ++j) {
1208 fences += amdgpu_fence_count_emitted(&adev->uvd.inst[i].ring_enc[j]);
1209 }
1210 }
1211
1212 if (fences == 0) {
1213 if (adev->pm.dpm_enabled) {
1214 amdgpu_dpm_enable_uvd(adev, false);
1215 } else {
1216 amdgpu_asic_set_uvd_clocks(adev, 0, 0);
1217 /* shutdown the UVD block */
1218 amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
1219 AMD_PG_STATE_GATE);
1220 amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
1221 AMD_CG_STATE_GATE);
1222 }
1223 } else {
1224 schedule_delayed_work(&adev->uvd.idle_work, UVD_IDLE_TIMEOUT);
1225 }
1226 }
1227
amdgpu_uvd_ring_begin_use(struct amdgpu_ring * ring)1228 void amdgpu_uvd_ring_begin_use(struct amdgpu_ring *ring)
1229 {
1230 struct amdgpu_device *adev = ring->adev;
1231 bool set_clocks;
1232
1233 if (amdgpu_sriov_vf(adev))
1234 return;
1235
1236 set_clocks = !cancel_delayed_work_sync(&adev->uvd.idle_work);
1237 if (set_clocks) {
1238 if (adev->pm.dpm_enabled) {
1239 amdgpu_dpm_enable_uvd(adev, true);
1240 } else {
1241 amdgpu_asic_set_uvd_clocks(adev, 53300, 40000);
1242 amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
1243 AMD_CG_STATE_UNGATE);
1244 amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
1245 AMD_PG_STATE_UNGATE);
1246 }
1247 }
1248 }
1249
amdgpu_uvd_ring_end_use(struct amdgpu_ring * ring)1250 void amdgpu_uvd_ring_end_use(struct amdgpu_ring *ring)
1251 {
1252 if (!amdgpu_sriov_vf(ring->adev))
1253 schedule_delayed_work(&ring->adev->uvd.idle_work, UVD_IDLE_TIMEOUT);
1254 }
1255
1256 /**
1257 * amdgpu_uvd_ring_test_ib - test ib execution
1258 *
1259 * @ring: amdgpu_ring pointer
1260 *
1261 * Test if we can successfully execute an IB
1262 */
amdgpu_uvd_ring_test_ib(struct amdgpu_ring * ring,long timeout)1263 int amdgpu_uvd_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1264 {
1265 struct dma_fence *fence;
1266 long r;
1267
1268 r = amdgpu_uvd_get_create_msg(ring, 1, NULL);
1269 if (r)
1270 goto error;
1271
1272 r = amdgpu_uvd_get_destroy_msg(ring, 1, true, &fence);
1273 if (r)
1274 goto error;
1275
1276 r = dma_fence_wait_timeout(fence, false, timeout);
1277 if (r == 0)
1278 r = -ETIMEDOUT;
1279 else if (r > 0)
1280 r = 0;
1281
1282 dma_fence_put(fence);
1283
1284 error:
1285 return r;
1286 }
1287
1288 /**
1289 * amdgpu_uvd_used_handles - returns used UVD handles
1290 *
1291 * @adev: amdgpu_device pointer
1292 *
1293 * Returns the number of UVD handles in use
1294 */
amdgpu_uvd_used_handles(struct amdgpu_device * adev)1295 uint32_t amdgpu_uvd_used_handles(struct amdgpu_device *adev)
1296 {
1297 unsigned i;
1298 uint32_t used_handles = 0;
1299
1300 for (i = 0; i < adev->uvd.max_handles; ++i) {
1301 /*
1302 * Handles can be freed in any order, and not
1303 * necessarily linear. So we need to count
1304 * all non-zero handles.
1305 */
1306 if (atomic_read(&adev->uvd.handles[i]))
1307 used_handles++;
1308 }
1309
1310 return used_handles;
1311 }
1312