1 /* 2 * Copyright 2011 Advanced Micro Devices, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sub license, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 19 * USE OR OTHER DEALINGS IN THE SOFTWARE. 20 * 21 * The above copyright notice and this permission notice (including the 22 * next paragraph) shall be included in all copies or substantial portions 23 * of the Software. 24 * 25 */ 26 /* 27 * Authors: 28 * Christian König <deathsimple@vodafone.de> 29 */ 30 31 #include <linux/firmware.h> 32 #include <linux/module.h> 33 #include <drm/drmP.h> 34 35 #include "radeon.h" 36 #include "r600d.h" 37 38 /* 1 second timeout */ 39 #define UVD_IDLE_TIMEOUT_MS 1000 40 41 /* Firmware Names */ 42 #define FIRMWARE_R600 "radeonkmsfw_R600_uvd" 43 #define FIRMWARE_RS780 "radeonkmsfw_RS780_uvd" 44 #define FIRMWARE_RV770 "radeonkmsfw_RV770_uvd" 45 #define FIRMWARE_RV710 "radeonkmsfw_RV710_uvd" 46 #define FIRMWARE_CYPRESS "radeonkmsfw_CYPRESS_uvd" 47 #define FIRMWARE_SUMO "radeonkmsfw_SUMO_uvd" 48 #define FIRMWARE_TAHITI "radeonkmsfw_TAHITI_uvd" 49 #define FIRMWARE_BONAIRE "radeonkmsfw_BONAIRE_uvd" 50 51 MODULE_FIRMWARE(FIRMWARE_R600); 52 MODULE_FIRMWARE(FIRMWARE_RS780); 53 MODULE_FIRMWARE(FIRMWARE_RV770); 54 MODULE_FIRMWARE(FIRMWARE_RV710); 55 MODULE_FIRMWARE(FIRMWARE_CYPRESS); 56 MODULE_FIRMWARE(FIRMWARE_SUMO); 57 MODULE_FIRMWARE(FIRMWARE_TAHITI); 58 MODULE_FIRMWARE(FIRMWARE_BONAIRE); 59 60 static void radeon_uvd_idle_work_handler(struct work_struct *work); 61 62 int radeon_uvd_init(struct radeon_device *rdev) 63 { 64 unsigned long bo_size; 65 const char *fw_name; 66 int i, r; 67 68 INIT_DELAYED_WORK(&rdev->uvd.idle_work, radeon_uvd_idle_work_handler); 69 70 switch (rdev->family) { 71 case CHIP_RV610: 72 case CHIP_RV630: 73 case CHIP_RV670: 74 case CHIP_RV620: 75 case CHIP_RV635: 76 fw_name = FIRMWARE_R600; 77 break; 78 79 case CHIP_RS780: 80 case CHIP_RS880: 81 fw_name = FIRMWARE_RS780; 82 break; 83 84 case CHIP_RV770: 85 fw_name = FIRMWARE_RV770; 86 break; 87 88 case CHIP_RV710: 89 case CHIP_RV730: 90 case CHIP_RV740: 91 fw_name = FIRMWARE_RV710; 92 break; 93 94 case CHIP_CYPRESS: 95 case CHIP_HEMLOCK: 96 case CHIP_JUNIPER: 97 case CHIP_REDWOOD: 98 case CHIP_CEDAR: 99 fw_name = FIRMWARE_CYPRESS; 100 break; 101 102 case CHIP_SUMO: 103 case CHIP_SUMO2: 104 case CHIP_PALM: 105 case CHIP_CAYMAN: 106 case CHIP_BARTS: 107 case CHIP_TURKS: 108 case CHIP_CAICOS: 109 fw_name = FIRMWARE_SUMO; 110 break; 111 112 case CHIP_TAHITI: 113 case CHIP_VERDE: 114 case CHIP_PITCAIRN: 115 case CHIP_ARUBA: 116 case CHIP_OLAND: 117 fw_name = FIRMWARE_TAHITI; 118 break; 119 120 case CHIP_BONAIRE: 121 case CHIP_KABINI: 122 case CHIP_KAVERI: 123 case CHIP_HAWAII: 124 case CHIP_MULLINS: 125 fw_name = FIRMWARE_BONAIRE; 126 break; 127 128 default: 129 return -EINVAL; 130 } 131 132 r = request_firmware(&rdev->uvd_fw, fw_name, rdev->dev); 133 if (r) { 134 dev_err(rdev->dev, "radeon_uvd: Can't load firmware \"%s\"\n", 135 fw_name); 136 return r; 137 } 138 139 bo_size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->datasize + 8) + 140 RADEON_UVD_STACK_SIZE + RADEON_UVD_HEAP_SIZE + 141 RADEON_GPU_PAGE_SIZE; 142 r = radeon_bo_create(rdev, bo_size, PAGE_SIZE, true, 143 RADEON_GEM_DOMAIN_VRAM, 0, NULL, &rdev->uvd.vcpu_bo); 144 if (r) { 145 dev_err(rdev->dev, "(%d) failed to allocate UVD bo\n", r); 146 return r; 147 } 148 149 r = radeon_bo_reserve(rdev->uvd.vcpu_bo, false); 150 if (r) { 151 radeon_bo_unref(&rdev->uvd.vcpu_bo); 152 dev_err(rdev->dev, "(%d) failed to reserve UVD bo\n", r); 153 return r; 154 } 155 156 r = radeon_bo_pin(rdev->uvd.vcpu_bo, RADEON_GEM_DOMAIN_VRAM, 157 &rdev->uvd.gpu_addr); 158 if (r) { 159 radeon_bo_unreserve(rdev->uvd.vcpu_bo); 160 radeon_bo_unref(&rdev->uvd.vcpu_bo); 161 dev_err(rdev->dev, "(%d) UVD bo pin failed\n", r); 162 return r; 163 } 164 165 r = radeon_bo_kmap(rdev->uvd.vcpu_bo, &rdev->uvd.cpu_addr); 166 if (r) { 167 dev_err(rdev->dev, "(%d) UVD map failed\n", r); 168 return r; 169 } 170 171 radeon_bo_unreserve(rdev->uvd.vcpu_bo); 172 173 for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) { 174 atomic_set(&rdev->uvd.handles[i], 0); 175 rdev->uvd.filp[i] = NULL; 176 rdev->uvd.img_size[i] = 0; 177 } 178 179 return 0; 180 } 181 182 void radeon_uvd_fini(struct radeon_device *rdev) 183 { 184 int r; 185 186 if (rdev->uvd.vcpu_bo == NULL) 187 return; 188 189 r = radeon_bo_reserve(rdev->uvd.vcpu_bo, false); 190 if (!r) { 191 radeon_bo_kunmap(rdev->uvd.vcpu_bo); 192 radeon_bo_unpin(rdev->uvd.vcpu_bo); 193 radeon_bo_unreserve(rdev->uvd.vcpu_bo); 194 } 195 196 radeon_bo_unref(&rdev->uvd.vcpu_bo); 197 198 radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX]); 199 200 release_firmware(rdev->uvd_fw); 201 } 202 203 int radeon_uvd_suspend(struct radeon_device *rdev) 204 { 205 unsigned size; 206 char *ptr; 207 int i; 208 209 if (rdev->uvd.vcpu_bo == NULL) 210 return 0; 211 212 for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) 213 if (atomic_read(&rdev->uvd.handles[i])) 214 break; 215 216 if (i == RADEON_MAX_UVD_HANDLES) 217 return 0; 218 219 size = radeon_bo_size(rdev->uvd.vcpu_bo); 220 size -= rdev->uvd_fw->datasize; 221 222 ptr = rdev->uvd.cpu_addr; 223 ptr += rdev->uvd_fw->datasize; 224 225 rdev->uvd.saved_bo = kmalloc(size, M_DRM, M_WAITOK); 226 memcpy(rdev->uvd.saved_bo, ptr, size); 227 228 return 0; 229 } 230 231 int radeon_uvd_resume(struct radeon_device *rdev) 232 { 233 unsigned size; 234 char *ptr; 235 236 if (rdev->uvd.vcpu_bo == NULL) 237 return -EINVAL; 238 239 memcpy(rdev->uvd.cpu_addr, rdev->uvd_fw->data, rdev->uvd_fw->datasize); 240 241 size = radeon_bo_size(rdev->uvd.vcpu_bo); 242 size -= rdev->uvd_fw->datasize; 243 244 ptr = rdev->uvd.cpu_addr; 245 ptr += rdev->uvd_fw->datasize; 246 247 if (rdev->uvd.saved_bo != NULL) { 248 memcpy(ptr, rdev->uvd.saved_bo, size); 249 kfree(rdev->uvd.saved_bo); 250 rdev->uvd.saved_bo = NULL; 251 } else 252 memset(ptr, 0, size); 253 254 return 0; 255 } 256 257 void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo, 258 uint32_t allowed_domains) 259 { 260 int i; 261 262 for (i = 0; i < rbo->placement.num_placement; ++i) { 263 rbo->placements[i].fpfn = 0 >> PAGE_SHIFT; 264 rbo->placements[i].lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT; 265 } 266 267 /* If it must be in VRAM it must be in the first segment as well */ 268 if (allowed_domains == RADEON_GEM_DOMAIN_VRAM) 269 return; 270 271 /* abort if we already have more than one placement */ 272 if (rbo->placement.num_placement > 1) 273 return; 274 275 /* add another 256MB segment */ 276 rbo->placements[1] = rbo->placements[0]; 277 rbo->placements[1].fpfn += (256 * 1024 * 1024) >> PAGE_SHIFT; 278 rbo->placements[1].lpfn += (256 * 1024 * 1024) >> PAGE_SHIFT; 279 rbo->placement.num_placement++; 280 rbo->placement.num_busy_placement++; 281 } 282 283 void radeon_uvd_free_handles(struct radeon_device *rdev, struct drm_file *filp) 284 { 285 int i, r; 286 for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) { 287 uint32_t handle = atomic_read(&rdev->uvd.handles[i]); 288 if (handle != 0 && rdev->uvd.filp[i] == filp) { 289 struct radeon_fence *fence; 290 291 radeon_uvd_note_usage(rdev); 292 293 r = radeon_uvd_get_destroy_msg(rdev, 294 R600_RING_TYPE_UVD_INDEX, handle, &fence); 295 if (r) { 296 DRM_ERROR("Error destroying UVD (%d)!\n", r); 297 continue; 298 } 299 300 radeon_fence_wait(fence, false); 301 radeon_fence_unref(&fence); 302 303 rdev->uvd.filp[i] = NULL; 304 atomic_set(&rdev->uvd.handles[i], 0); 305 } 306 } 307 } 308 309 static int radeon_uvd_cs_msg_decode(uint32_t *msg, unsigned buf_sizes[]) 310 { 311 unsigned stream_type = msg[4]; 312 unsigned width = msg[6]; 313 unsigned height = msg[7]; 314 unsigned dpb_size = msg[9]; 315 unsigned pitch = msg[28]; 316 317 unsigned width_in_mb = width / 16; 318 unsigned height_in_mb = ALIGN(height / 16, 2); 319 320 unsigned image_size, tmp, min_dpb_size; 321 322 image_size = width * height; 323 image_size += image_size / 2; 324 image_size = ALIGN(image_size, 1024); 325 326 switch (stream_type) { 327 case 0: /* H264 */ 328 329 /* reference picture buffer */ 330 min_dpb_size = image_size * 17; 331 332 /* macroblock context buffer */ 333 min_dpb_size += width_in_mb * height_in_mb * 17 * 192; 334 335 /* IT surface buffer */ 336 min_dpb_size += width_in_mb * height_in_mb * 32; 337 break; 338 339 case 1: /* VC1 */ 340 341 /* reference picture buffer */ 342 min_dpb_size = image_size * 3; 343 344 /* CONTEXT_BUFFER */ 345 min_dpb_size += width_in_mb * height_in_mb * 128; 346 347 /* IT surface buffer */ 348 min_dpb_size += width_in_mb * 64; 349 350 /* DB surface buffer */ 351 min_dpb_size += width_in_mb * 128; 352 353 /* BP */ 354 tmp = max(width_in_mb, height_in_mb); 355 min_dpb_size += ALIGN(tmp * 7 * 16, 64); 356 break; 357 358 case 3: /* MPEG2 */ 359 360 /* reference picture buffer */ 361 min_dpb_size = image_size * 3; 362 break; 363 364 case 4: /* MPEG4 */ 365 366 /* reference picture buffer */ 367 min_dpb_size = image_size * 3; 368 369 /* CM */ 370 min_dpb_size += width_in_mb * height_in_mb * 64; 371 372 /* IT surface buffer */ 373 min_dpb_size += ALIGN(width_in_mb * height_in_mb * 32, 64); 374 break; 375 376 default: 377 DRM_ERROR("UVD codec not handled %d!\n", stream_type); 378 return -EINVAL; 379 } 380 381 if (width > pitch) { 382 DRM_ERROR("Invalid UVD decoding target pitch!\n"); 383 return -EINVAL; 384 } 385 386 if (dpb_size < min_dpb_size) { 387 DRM_ERROR("Invalid dpb_size in UVD message (%d / %d)!\n", 388 dpb_size, min_dpb_size); 389 return -EINVAL; 390 } 391 392 buf_sizes[0x1] = dpb_size; 393 buf_sizes[0x2] = image_size; 394 return 0; 395 } 396 397 static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_bo *bo, 398 unsigned offset, unsigned buf_sizes[]) 399 { 400 int32_t *msg, msg_type, handle; 401 unsigned img_size = 0; 402 void *ptr; 403 404 int i, r; 405 406 if (offset & 0x3F) { 407 DRM_ERROR("UVD messages must be 64 byte aligned!\n"); 408 return -EINVAL; 409 } 410 411 if (bo->tbo.sync_obj) { 412 r = radeon_fence_wait(bo->tbo.sync_obj, false); 413 if (r) { 414 DRM_ERROR("Failed waiting for UVD message (%d)!\n", r); 415 return r; 416 } 417 } 418 419 r = radeon_bo_kmap(bo, &ptr); 420 if (r) { 421 DRM_ERROR("Failed mapping the UVD message (%d)!\n", r); 422 return r; 423 } 424 425 msg = (uint32_t*)((uint8_t*)ptr + offset); 426 427 msg_type = msg[1]; 428 handle = msg[2]; 429 430 if (handle == 0) { 431 DRM_ERROR("Invalid UVD handle!\n"); 432 return -EINVAL; 433 } 434 435 if (msg_type == 1) { 436 /* it's a decode msg, calc buffer sizes */ 437 r = radeon_uvd_cs_msg_decode(msg, buf_sizes); 438 /* calc image size (width * height) */ 439 img_size = msg[6] * msg[7]; 440 radeon_bo_kunmap(bo); 441 if (r) 442 return r; 443 444 } else if (msg_type == 2) { 445 /* it's a destroy msg, free the handle */ 446 for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) 447 atomic_cmpxchg(&p->rdev->uvd.handles[i], handle, 0); 448 radeon_bo_kunmap(bo); 449 return 0; 450 } else { 451 /* it's a create msg, calc image size (width * height) */ 452 img_size = msg[7] * msg[8]; 453 radeon_bo_kunmap(bo); 454 455 if (msg_type != 0) { 456 DRM_ERROR("Illegal UVD message type (%d)!\n", msg_type); 457 return -EINVAL; 458 } 459 460 /* it's a create msg, no special handling needed */ 461 } 462 463 /* create or decode, validate the handle */ 464 for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) { 465 if (atomic_read(&p->rdev->uvd.handles[i]) == handle) 466 return 0; 467 } 468 469 /* handle not found try to alloc a new one */ 470 for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) { 471 if (!atomic_cmpxchg(&p->rdev->uvd.handles[i], 0, handle)) { 472 p->rdev->uvd.filp[i] = p->filp; 473 p->rdev->uvd.img_size[i] = img_size; 474 return 0; 475 } 476 } 477 478 DRM_ERROR("No more free UVD handles!\n"); 479 return -EINVAL; 480 } 481 482 static int radeon_uvd_cs_reloc(struct radeon_cs_parser *p, 483 int data0, int data1, 484 unsigned buf_sizes[], bool *has_msg_cmd) 485 { 486 struct radeon_cs_chunk *relocs_chunk; 487 struct radeon_cs_reloc *reloc; 488 unsigned idx, cmd, offset; 489 uint64_t start, end; 490 int r; 491 492 relocs_chunk = &p->chunks[p->chunk_relocs_idx]; 493 offset = radeon_get_ib_value(p, data0); 494 idx = radeon_get_ib_value(p, data1); 495 if (idx >= relocs_chunk->length_dw) { 496 DRM_ERROR("Relocs at %d after relocations chunk end %d !\n", 497 idx, relocs_chunk->length_dw); 498 return -EINVAL; 499 } 500 501 reloc = p->relocs_ptr[(idx / 4)]; 502 start = reloc->gpu_offset; 503 end = start + radeon_bo_size(reloc->robj); 504 start += offset; 505 506 p->ib.ptr[data0] = start & 0xFFFFFFFF; 507 p->ib.ptr[data1] = start >> 32; 508 509 cmd = radeon_get_ib_value(p, p->idx) >> 1; 510 511 if (cmd < 0x4) { 512 if (end <= start) { 513 DRM_ERROR("invalid reloc offset %X!\n", offset); 514 return -EINVAL; 515 } 516 if ((end - start) < buf_sizes[cmd]) { 517 DRM_ERROR("buffer (%d) to small (%d / %d)!\n", cmd, 518 (unsigned)(end - start), buf_sizes[cmd]); 519 return -EINVAL; 520 } 521 522 } else if (cmd != 0x100) { 523 DRM_ERROR("invalid UVD command %X!\n", cmd); 524 return -EINVAL; 525 } 526 527 if ((start >> 28) != ((end - 1) >> 28)) { 528 DRM_ERROR("reloc %lX-%lX crossing 256MB boundary!\n", 529 start, end); 530 return -EINVAL; 531 } 532 533 /* TODO: is this still necessary on NI+ ? */ 534 if ((cmd == 0 || cmd == 0x3) && 535 (start >> 28) != (p->rdev->uvd.gpu_addr >> 28)) { 536 DRM_ERROR("msg/fb buffer %lX-%lX out of 256MB segment!\n", 537 start, end); 538 return -EINVAL; 539 } 540 541 if (cmd == 0) { 542 if (*has_msg_cmd) { 543 DRM_ERROR("More than one message in a UVD-IB!\n"); 544 return -EINVAL; 545 } 546 *has_msg_cmd = true; 547 r = radeon_uvd_cs_msg(p, reloc->robj, offset, buf_sizes); 548 if (r) 549 return r; 550 } else if (!*has_msg_cmd) { 551 DRM_ERROR("Message needed before other commands are send!\n"); 552 return -EINVAL; 553 } 554 555 return 0; 556 } 557 558 static int radeon_uvd_cs_reg(struct radeon_cs_parser *p, 559 struct radeon_cs_packet *pkt, 560 int *data0, int *data1, 561 unsigned buf_sizes[], 562 bool *has_msg_cmd) 563 { 564 int i, r; 565 566 p->idx++; 567 for (i = 0; i <= pkt->count; ++i) { 568 switch (pkt->reg + i*4) { 569 case UVD_GPCOM_VCPU_DATA0: 570 *data0 = p->idx; 571 break; 572 case UVD_GPCOM_VCPU_DATA1: 573 *data1 = p->idx; 574 break; 575 case UVD_GPCOM_VCPU_CMD: 576 r = radeon_uvd_cs_reloc(p, *data0, *data1, 577 buf_sizes, has_msg_cmd); 578 if (r) 579 return r; 580 break; 581 case UVD_ENGINE_CNTL: 582 break; 583 default: 584 DRM_ERROR("Invalid reg 0x%X!\n", 585 pkt->reg + i*4); 586 return -EINVAL; 587 } 588 p->idx++; 589 } 590 return 0; 591 } 592 593 int radeon_uvd_cs_parse(struct radeon_cs_parser *p) 594 { 595 struct radeon_cs_packet pkt; 596 int r, data0 = 0, data1 = 0; 597 598 /* does the IB has a msg command */ 599 bool has_msg_cmd = false; 600 601 /* minimum buffer sizes */ 602 unsigned buf_sizes[] = { 603 [0x00000000] = 2048, 604 [0x00000001] = 32 * 1024 * 1024, 605 [0x00000002] = 2048 * 1152 * 3, 606 [0x00000003] = 2048, 607 }; 608 609 if (p->chunks[p->chunk_ib_idx].length_dw % 16) { 610 DRM_ERROR("UVD IB length (%d) not 16 dwords aligned!\n", 611 p->chunks[p->chunk_ib_idx].length_dw); 612 return -EINVAL; 613 } 614 615 if (p->chunk_relocs_idx == -1) { 616 DRM_ERROR("No relocation chunk !\n"); 617 return -EINVAL; 618 } 619 620 621 do { 622 r = radeon_cs_packet_parse(p, &pkt, p->idx); 623 if (r) 624 return r; 625 switch (pkt.type) { 626 case RADEON_PACKET_TYPE0: 627 r = radeon_uvd_cs_reg(p, &pkt, &data0, &data1, 628 buf_sizes, &has_msg_cmd); 629 if (r) 630 return r; 631 break; 632 case RADEON_PACKET_TYPE2: 633 p->idx += pkt.count + 2; 634 break; 635 default: 636 DRM_ERROR("Unknown packet type %d !\n", pkt.type); 637 return -EINVAL; 638 } 639 } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw); 640 641 if (!has_msg_cmd) { 642 DRM_ERROR("UVD-IBs need a msg command!\n"); 643 return -EINVAL; 644 } 645 646 return 0; 647 } 648 649 static int radeon_uvd_send_msg(struct radeon_device *rdev, 650 int ring, uint64_t addr, 651 struct radeon_fence **fence) 652 { 653 struct radeon_ib ib; 654 int i, r; 655 656 r = radeon_ib_get(rdev, ring, &ib, NULL, 64); 657 if (r) 658 return r; 659 660 ib.ptr[0] = PACKET0(UVD_GPCOM_VCPU_DATA0, 0); 661 ib.ptr[1] = addr; 662 ib.ptr[2] = PACKET0(UVD_GPCOM_VCPU_DATA1, 0); 663 ib.ptr[3] = addr >> 32; 664 ib.ptr[4] = PACKET0(UVD_GPCOM_VCPU_CMD, 0); 665 ib.ptr[5] = 0; 666 for (i = 6; i < 16; ++i) 667 ib.ptr[i] = PACKET2(0); 668 ib.length_dw = 16; 669 670 r = radeon_ib_schedule(rdev, &ib, NULL, false); 671 672 if (fence) 673 *fence = radeon_fence_ref(ib.fence); 674 675 radeon_ib_free(rdev, &ib); 676 return r; 677 } 678 679 /* multiple fence commands without any stream commands in between can 680 crash the vcpu so just try to emmit a dummy create/destroy msg to 681 avoid this */ 682 int radeon_uvd_get_create_msg(struct radeon_device *rdev, int ring, 683 uint32_t handle, struct radeon_fence **fence) 684 { 685 /* we use the last page of the vcpu bo for the UVD message */ 686 uint64_t offs = radeon_bo_size(rdev->uvd.vcpu_bo) - 687 RADEON_GPU_PAGE_SIZE; 688 689 uint32_t *msg = (uint32_t*)((uint8_t*)rdev->uvd.cpu_addr + offs); 690 uint64_t addr = rdev->uvd.gpu_addr + offs; 691 692 int r, i; 693 694 r = radeon_bo_reserve(rdev->uvd.vcpu_bo, true); 695 if (r) 696 return r; 697 698 /* stitch together an UVD create msg */ 699 msg[0] = cpu_to_le32(0x00000de4); 700 msg[1] = cpu_to_le32(0x00000000); 701 msg[2] = cpu_to_le32(handle); 702 msg[3] = cpu_to_le32(0x00000000); 703 msg[4] = cpu_to_le32(0x00000000); 704 msg[5] = cpu_to_le32(0x00000000); 705 msg[6] = cpu_to_le32(0x00000000); 706 msg[7] = cpu_to_le32(0x00000780); 707 msg[8] = cpu_to_le32(0x00000440); 708 msg[9] = cpu_to_le32(0x00000000); 709 msg[10] = cpu_to_le32(0x01b37000); 710 for (i = 11; i < 1024; ++i) 711 msg[i] = cpu_to_le32(0x0); 712 713 r = radeon_uvd_send_msg(rdev, ring, addr, fence); 714 radeon_bo_unreserve(rdev->uvd.vcpu_bo); 715 return r; 716 } 717 718 int radeon_uvd_get_destroy_msg(struct radeon_device *rdev, int ring, 719 uint32_t handle, struct radeon_fence **fence) 720 { 721 /* we use the last page of the vcpu bo for the UVD message */ 722 uint64_t offs = radeon_bo_size(rdev->uvd.vcpu_bo) - 723 RADEON_GPU_PAGE_SIZE; 724 725 uint32_t *msg = (uint32_t*)((uint8_t*)rdev->uvd.cpu_addr + offs); 726 uint64_t addr = rdev->uvd.gpu_addr + offs; 727 728 int r, i; 729 730 r = radeon_bo_reserve(rdev->uvd.vcpu_bo, true); 731 if (r) 732 return r; 733 734 /* stitch together an UVD destroy msg */ 735 msg[0] = cpu_to_le32(0x00000de4); 736 msg[1] = cpu_to_le32(0x00000002); 737 msg[2] = cpu_to_le32(handle); 738 msg[3] = cpu_to_le32(0x00000000); 739 for (i = 4; i < 1024; ++i) 740 msg[i] = cpu_to_le32(0x0); 741 742 r = radeon_uvd_send_msg(rdev, ring, addr, fence); 743 radeon_bo_unreserve(rdev->uvd.vcpu_bo); 744 return r; 745 } 746 747 /** 748 * radeon_uvd_count_handles - count number of open streams 749 * 750 * @rdev: radeon_device pointer 751 * @sd: number of SD streams 752 * @hd: number of HD streams 753 * 754 * Count the number of open SD/HD streams as a hint for power mangement 755 */ 756 static void radeon_uvd_count_handles(struct radeon_device *rdev, 757 unsigned *sd, unsigned *hd) 758 { 759 unsigned i; 760 761 *sd = 0; 762 *hd = 0; 763 764 for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) { 765 if (!atomic_read(&rdev->uvd.handles[i])) 766 continue; 767 768 if (rdev->uvd.img_size[i] >= 720*576) 769 ++(*hd); 770 else 771 ++(*sd); 772 } 773 } 774 775 static void radeon_uvd_idle_work_handler(struct work_struct *work) 776 { 777 struct radeon_device *rdev = 778 container_of(work, struct radeon_device, uvd.idle_work.work); 779 780 if (radeon_fence_count_emitted(rdev, R600_RING_TYPE_UVD_INDEX) == 0) { 781 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { 782 radeon_uvd_count_handles(rdev, &rdev->pm.dpm.sd, 783 &rdev->pm.dpm.hd); 784 radeon_dpm_enable_uvd(rdev, false); 785 } else { 786 radeon_set_uvd_clocks(rdev, 0, 0); 787 } 788 } else { 789 schedule_delayed_work(&rdev->uvd.idle_work, 790 msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS)); 791 } 792 } 793 794 void radeon_uvd_note_usage(struct radeon_device *rdev) 795 { 796 bool streams_changed = false; 797 bool set_clocks = !cancel_delayed_work_sync(&rdev->uvd.idle_work); 798 set_clocks &= schedule_delayed_work(&rdev->uvd.idle_work, 799 msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS)); 800 801 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { 802 unsigned hd = 0, sd = 0; 803 radeon_uvd_count_handles(rdev, &sd, &hd); 804 if ((rdev->pm.dpm.sd != sd) || 805 (rdev->pm.dpm.hd != hd)) { 806 rdev->pm.dpm.sd = sd; 807 rdev->pm.dpm.hd = hd; 808 /* disable this for now */ 809 /*streams_changed = true;*/ 810 } 811 } 812 813 if (set_clocks || streams_changed) { 814 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { 815 radeon_dpm_enable_uvd(rdev, true); 816 } else { 817 radeon_set_uvd_clocks(rdev, 53300, 40000); 818 } 819 } 820 } 821 822 static unsigned radeon_uvd_calc_upll_post_div(unsigned vco_freq, 823 unsigned target_freq, 824 unsigned pd_min, 825 unsigned pd_even) 826 { 827 unsigned post_div = vco_freq / target_freq; 828 829 /* adjust to post divider minimum value */ 830 if (post_div < pd_min) 831 post_div = pd_min; 832 833 /* we alway need a frequency less than or equal the target */ 834 if ((vco_freq / post_div) > target_freq) 835 post_div += 1; 836 837 /* post dividers above a certain value must be even */ 838 if (post_div > pd_even && post_div % 2) 839 post_div += 1; 840 841 return post_div; 842 } 843 844 /** 845 * radeon_uvd_calc_upll_dividers - calc UPLL clock dividers 846 * 847 * @rdev: radeon_device pointer 848 * @vclk: wanted VCLK 849 * @dclk: wanted DCLK 850 * @vco_min: minimum VCO frequency 851 * @vco_max: maximum VCO frequency 852 * @fb_factor: factor to multiply vco freq with 853 * @fb_mask: limit and bitmask for feedback divider 854 * @pd_min: post divider minimum 855 * @pd_max: post divider maximum 856 * @pd_even: post divider must be even above this value 857 * @optimal_fb_div: resulting feedback divider 858 * @optimal_vclk_div: resulting vclk post divider 859 * @optimal_dclk_div: resulting dclk post divider 860 * 861 * Calculate dividers for UVDs UPLL (R6xx-SI, except APUs). 862 * Returns zero on success -EINVAL on error. 863 */ 864 int radeon_uvd_calc_upll_dividers(struct radeon_device *rdev, 865 unsigned vclk, unsigned dclk, 866 unsigned vco_min, unsigned vco_max, 867 unsigned fb_factor, unsigned fb_mask, 868 unsigned pd_min, unsigned pd_max, 869 unsigned pd_even, 870 unsigned *optimal_fb_div, 871 unsigned *optimal_vclk_div, 872 unsigned *optimal_dclk_div) 873 { 874 unsigned vco_freq, ref_freq = rdev->clock.spll.reference_freq; 875 876 /* start off with something large */ 877 unsigned optimal_score = ~0; 878 879 /* loop through vco from low to high */ 880 vco_min = max(max(vco_min, vclk), dclk); 881 for (vco_freq = vco_min; vco_freq <= vco_max; vco_freq += 100) { 882 883 uint64_t fb_div = (uint64_t)vco_freq * fb_factor; 884 unsigned vclk_div, dclk_div, score; 885 886 do_div(fb_div, ref_freq); 887 888 /* fb div out of range ? */ 889 if (fb_div > fb_mask) 890 break; /* it can oly get worse */ 891 892 fb_div &= fb_mask; 893 894 /* calc vclk divider with current vco freq */ 895 vclk_div = radeon_uvd_calc_upll_post_div(vco_freq, vclk, 896 pd_min, pd_even); 897 if (vclk_div > pd_max) 898 break; /* vco is too big, it has to stop */ 899 900 /* calc dclk divider with current vco freq */ 901 dclk_div = radeon_uvd_calc_upll_post_div(vco_freq, dclk, 902 pd_min, pd_even); 903 if (vclk_div > pd_max) 904 break; /* vco is too big, it has to stop */ 905 906 /* calc score with current vco freq */ 907 score = vclk - (vco_freq / vclk_div) + dclk - (vco_freq / dclk_div); 908 909 /* determine if this vco setting is better than current optimal settings */ 910 if (score < optimal_score) { 911 *optimal_fb_div = fb_div; 912 *optimal_vclk_div = vclk_div; 913 *optimal_dclk_div = dclk_div; 914 optimal_score = score; 915 if (optimal_score == 0) 916 break; /* it can't get better than this */ 917 } 918 } 919 920 /* did we found a valid setup ? */ 921 if (optimal_score == ~0) 922 return -EINVAL; 923 924 return 0; 925 } 926 927 int radeon_uvd_send_upll_ctlreq(struct radeon_device *rdev, 928 unsigned cg_upll_func_cntl) 929 { 930 unsigned i; 931 932 /* make sure UPLL_CTLREQ is deasserted */ 933 WREG32_P(cg_upll_func_cntl, 0, ~UPLL_CTLREQ_MASK); 934 935 mdelay(10); 936 937 /* assert UPLL_CTLREQ */ 938 WREG32_P(cg_upll_func_cntl, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK); 939 940 /* wait for CTLACK and CTLACK2 to get asserted */ 941 for (i = 0; i < 100; ++i) { 942 uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK; 943 if ((RREG32(cg_upll_func_cntl) & mask) == mask) 944 break; 945 mdelay(10); 946 } 947 948 /* deassert UPLL_CTLREQ */ 949 WREG32_P(cg_upll_func_cntl, 0, ~UPLL_CTLREQ_MASK); 950 951 if (i == 100) { 952 DRM_ERROR("Timeout setting UVD clocks!\n"); 953 return -ETIMEDOUT; 954 } 955 956 return 0; 957 } 958