1 /* 2 * Copyright 2011 Advanced Micro Devices, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sub license, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 19 * USE OR OTHER DEALINGS IN THE SOFTWARE. 20 * 21 * The above copyright notice and this permission notice (including the 22 * next paragraph) shall be included in all copies or substantial portions 23 * of the Software. 24 * 25 */ 26 /* 27 * Authors: 28 * Christian König <deathsimple@vodafone.de> 29 */ 30 31 #include <linux/firmware.h> 32 #include <linux/module.h> 33 #include <drm/drmP.h> 34 35 #include "radeon.h" 36 #include "radeon_ucode.h" 37 #include "r600d.h" 38 39 /* 1 second timeout */ 40 #define UVD_IDLE_TIMEOUT_MS 1000 41 42 /* Firmware Names */ 43 #define FIRMWARE_R600 "radeonkmsfw_R600_uvd" 44 #define FIRMWARE_RS780 "radeonkmsfw_RS780_uvd" 45 #define FIRMWARE_RV770 "radeonkmsfw_RV770_uvd" 46 #define FIRMWARE_RV710 "radeonkmsfw_RV710_uvd" 47 #define FIRMWARE_CYPRESS "radeonkmsfw_CYPRESS_uvd" 48 #define FIRMWARE_SUMO "radeonkmsfw_SUMO_uvd" 49 #define FIRMWARE_TAHITI "radeonkmsfw_TAHITI_uvd" 50 #define FIRMWARE_BONAIRE_LEGACY "radeonkmsfw_BONAIRE_uvd" 51 #define FIRMWARE_BONAIRE "radeonkmsfw_bonaire_uvd" 52 53 MODULE_FIRMWARE(FIRMWARE_R600); 54 MODULE_FIRMWARE(FIRMWARE_RS780); 55 MODULE_FIRMWARE(FIRMWARE_RV770); 56 MODULE_FIRMWARE(FIRMWARE_RV710); 57 MODULE_FIRMWARE(FIRMWARE_CYPRESS); 58 MODULE_FIRMWARE(FIRMWARE_SUMO); 59 MODULE_FIRMWARE(FIRMWARE_TAHITI); 60 MODULE_FIRMWARE(FIRMWARE_BONAIRE_LEGACY); 61 MODULE_FIRMWARE(FIRMWARE_BONAIRE); 62 63 static void radeon_uvd_idle_work_handler(struct work_struct *work); 64 65 int radeon_uvd_init(struct radeon_device *rdev) 66 { 67 unsigned long bo_size; 68 const char *fw_name = NULL, *legacy_fw_name = NULL; 69 int i, r; 70 71 INIT_DELAYED_WORK(&rdev->uvd.idle_work, radeon_uvd_idle_work_handler); 72 73 switch (rdev->family) { 74 case CHIP_RV610: 75 case CHIP_RV630: 76 case CHIP_RV670: 77 case CHIP_RV620: 78 case CHIP_RV635: 79 legacy_fw_name = FIRMWARE_R600; 80 break; 81 82 case CHIP_RS780: 83 case CHIP_RS880: 84 legacy_fw_name = FIRMWARE_RS780; 85 break; 86 87 case CHIP_RV770: 88 legacy_fw_name = FIRMWARE_RV770; 89 break; 90 91 case CHIP_RV710: 92 case CHIP_RV730: 93 case CHIP_RV740: 94 legacy_fw_name = FIRMWARE_RV710; 95 break; 96 97 case CHIP_CYPRESS: 98 case CHIP_HEMLOCK: 99 case CHIP_JUNIPER: 100 case CHIP_REDWOOD: 101 case CHIP_CEDAR: 102 legacy_fw_name = FIRMWARE_CYPRESS; 103 break; 104 105 case CHIP_SUMO: 106 case CHIP_SUMO2: 107 case CHIP_PALM: 108 case CHIP_CAYMAN: 109 case CHIP_BARTS: 110 case CHIP_TURKS: 111 case CHIP_CAICOS: 112 legacy_fw_name = FIRMWARE_SUMO; 113 break; 114 115 case CHIP_TAHITI: 116 case CHIP_VERDE: 117 case CHIP_PITCAIRN: 118 case CHIP_ARUBA: 119 case CHIP_OLAND: 120 legacy_fw_name = FIRMWARE_TAHITI; 121 break; 122 123 case CHIP_BONAIRE: 124 case CHIP_KABINI: 125 case CHIP_KAVERI: 126 case CHIP_HAWAII: 127 case CHIP_MULLINS: 128 legacy_fw_name = FIRMWARE_BONAIRE_LEGACY; 129 fw_name = FIRMWARE_BONAIRE; 130 break; 131 132 default: 133 return -EINVAL; 134 } 135 136 rdev->uvd.fw_header_present = false; 137 rdev->uvd.max_handles = RADEON_DEFAULT_UVD_HANDLES; 138 if (fw_name) { 139 /* Let's try to load the newer firmware first */ 140 r = request_firmware(&rdev->uvd_fw, fw_name, rdev->dev); 141 if (r) { 142 dev_err(rdev->dev, "radeon_uvd: Can't load firmware \"%s\"\n", 143 fw_name); 144 } else { 145 const struct common_firmware_header *hdr = (const struct common_firmware_header *)rdev->uvd_fw->data; 146 unsigned version_major, version_minor, family_id; 147 148 r = radeon_ucode_validate(rdev->uvd_fw); 149 if (r) 150 return r; 151 152 rdev->uvd.fw_header_present = true; 153 154 family_id = le32_to_cpu(hdr->ucode_version) & 0xff; 155 version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff; 156 version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff; 157 DRM_INFO("Found UVD firmware Version: %hu.%hu Family ID: %hu\n", 158 version_major, version_minor, family_id); 159 160 /* 161 * Limit the number of UVD handles depending on 162 * microcode major and minor versions. 163 */ 164 if ((version_major >= 0x01) && (version_minor >= 0x37)) 165 rdev->uvd.max_handles = RADEON_MAX_UVD_HANDLES; 166 } 167 } 168 169 /* 170 * In case there is only legacy firmware, or we encounter an error 171 * while loading the new firmware, we fall back to loading the legacy 172 * firmware now. 173 */ 174 if (!fw_name || r) { 175 r = request_firmware(&rdev->uvd_fw, legacy_fw_name, rdev->dev); 176 if (r) { 177 dev_err(rdev->dev, "radeon_uvd: Can't load firmware \"%s\"\n", 178 legacy_fw_name); 179 return r; 180 } 181 } 182 183 bo_size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->datasize + 8) + 184 RADEON_UVD_STACK_SIZE + RADEON_UVD_HEAP_SIZE + 185 RADEON_UVD_SESSION_SIZE * rdev->uvd.max_handles; 186 r = radeon_bo_create(rdev, bo_size, PAGE_SIZE, true, 187 RADEON_GEM_DOMAIN_VRAM, 0, NULL, &rdev->uvd.vcpu_bo); 188 if (r) { 189 dev_err(rdev->dev, "(%d) failed to allocate UVD bo\n", r); 190 return r; 191 } 192 193 r = radeon_bo_reserve(rdev->uvd.vcpu_bo, false); 194 if (r) { 195 radeon_bo_unref(&rdev->uvd.vcpu_bo); 196 dev_err(rdev->dev, "(%d) failed to reserve UVD bo\n", r); 197 return r; 198 } 199 200 r = radeon_bo_pin(rdev->uvd.vcpu_bo, RADEON_GEM_DOMAIN_VRAM, 201 &rdev->uvd.gpu_addr); 202 if (r) { 203 radeon_bo_unreserve(rdev->uvd.vcpu_bo); 204 radeon_bo_unref(&rdev->uvd.vcpu_bo); 205 dev_err(rdev->dev, "(%d) UVD bo pin failed\n", r); 206 return r; 207 } 208 209 r = radeon_bo_kmap(rdev->uvd.vcpu_bo, &rdev->uvd.cpu_addr); 210 if (r) { 211 dev_err(rdev->dev, "(%d) UVD map failed\n", r); 212 return r; 213 } 214 215 radeon_bo_unreserve(rdev->uvd.vcpu_bo); 216 217 for (i = 0; i < rdev->uvd.max_handles; ++i) { 218 atomic_set(&rdev->uvd.handles[i], 0); 219 rdev->uvd.filp[i] = NULL; 220 rdev->uvd.img_size[i] = 0; 221 } 222 223 return 0; 224 } 225 226 void radeon_uvd_fini(struct radeon_device *rdev) 227 { 228 int r; 229 230 if (rdev->uvd.vcpu_bo == NULL) 231 return; 232 233 r = radeon_bo_reserve(rdev->uvd.vcpu_bo, false); 234 if (!r) { 235 radeon_bo_kunmap(rdev->uvd.vcpu_bo); 236 radeon_bo_unpin(rdev->uvd.vcpu_bo); 237 radeon_bo_unreserve(rdev->uvd.vcpu_bo); 238 } 239 240 radeon_bo_unref(&rdev->uvd.vcpu_bo); 241 242 radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX]); 243 244 release_firmware(rdev->uvd_fw); 245 } 246 247 int radeon_uvd_suspend(struct radeon_device *rdev) 248 { 249 int i, r; 250 251 if (rdev->uvd.vcpu_bo == NULL) 252 return 0; 253 254 for (i = 0; i < rdev->uvd.max_handles; ++i) { 255 uint32_t handle = atomic_read(&rdev->uvd.handles[i]); 256 if (handle != 0) { 257 struct radeon_fence *fence; 258 259 radeon_uvd_note_usage(rdev); 260 261 r = radeon_uvd_get_destroy_msg(rdev, 262 R600_RING_TYPE_UVD_INDEX, handle, &fence); 263 if (r) { 264 DRM_ERROR("Error destroying UVD (%d)!\n", r); 265 continue; 266 } 267 268 radeon_fence_wait(fence, false); 269 radeon_fence_unref(&fence); 270 271 rdev->uvd.filp[i] = NULL; 272 atomic_set(&rdev->uvd.handles[i], 0); 273 } 274 } 275 276 return 0; 277 } 278 279 int radeon_uvd_resume(struct radeon_device *rdev) 280 { 281 unsigned size; 282 char *ptr; 283 284 if (rdev->uvd.vcpu_bo == NULL) 285 return -EINVAL; 286 287 memcpy(rdev->uvd.cpu_addr, rdev->uvd_fw->data, rdev->uvd_fw->datasize); 288 289 size = radeon_bo_size(rdev->uvd.vcpu_bo); 290 size -= rdev->uvd_fw->datasize; 291 292 ptr = rdev->uvd.cpu_addr; 293 ptr += rdev->uvd_fw->datasize; 294 295 memset(ptr, 0, size); 296 297 return 0; 298 } 299 300 void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo, 301 uint32_t allowed_domains) 302 { 303 int i; 304 305 for (i = 0; i < rbo->placement.num_placement; ++i) { 306 rbo->placements[i].fpfn = 0 >> PAGE_SHIFT; 307 rbo->placements[i].lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT; 308 } 309 310 /* If it must be in VRAM it must be in the first segment as well */ 311 if (allowed_domains == RADEON_GEM_DOMAIN_VRAM) 312 return; 313 314 /* abort if we already have more than one placement */ 315 if (rbo->placement.num_placement > 1) 316 return; 317 318 /* add another 256MB segment */ 319 rbo->placements[1] = rbo->placements[0]; 320 rbo->placements[1].fpfn += (256 * 1024 * 1024) >> PAGE_SHIFT; 321 rbo->placements[1].lpfn += (256 * 1024 * 1024) >> PAGE_SHIFT; 322 rbo->placement.num_placement++; 323 rbo->placement.num_busy_placement++; 324 } 325 326 void radeon_uvd_free_handles(struct radeon_device *rdev, struct drm_file *filp) 327 { 328 int i, r; 329 for (i = 0; i < rdev->uvd.max_handles; ++i) { 330 uint32_t handle = atomic_read(&rdev->uvd.handles[i]); 331 if (handle != 0 && rdev->uvd.filp[i] == filp) { 332 struct radeon_fence *fence; 333 334 radeon_uvd_note_usage(rdev); 335 336 r = radeon_uvd_get_destroy_msg(rdev, 337 R600_RING_TYPE_UVD_INDEX, handle, &fence); 338 if (r) { 339 DRM_ERROR("Error destroying UVD (%d)!\n", r); 340 continue; 341 } 342 343 radeon_fence_wait(fence, false); 344 radeon_fence_unref(&fence); 345 346 rdev->uvd.filp[i] = NULL; 347 atomic_set(&rdev->uvd.handles[i], 0); 348 } 349 } 350 } 351 352 static int radeon_uvd_cs_msg_decode(uint32_t *msg, unsigned buf_sizes[]) 353 { 354 unsigned stream_type = msg[4]; 355 unsigned width = msg[6]; 356 unsigned height = msg[7]; 357 unsigned dpb_size = msg[9]; 358 unsigned pitch = msg[28]; 359 360 unsigned width_in_mb = width / 16; 361 unsigned height_in_mb = ALIGN(height / 16, 2); 362 363 unsigned image_size, tmp, min_dpb_size; 364 365 image_size = width * height; 366 image_size += image_size / 2; 367 image_size = ALIGN(image_size, 1024); 368 369 switch (stream_type) { 370 case 0: /* H264 */ 371 372 /* reference picture buffer */ 373 min_dpb_size = image_size * 17; 374 375 /* macroblock context buffer */ 376 min_dpb_size += width_in_mb * height_in_mb * 17 * 192; 377 378 /* IT surface buffer */ 379 min_dpb_size += width_in_mb * height_in_mb * 32; 380 break; 381 382 case 1: /* VC1 */ 383 384 /* reference picture buffer */ 385 min_dpb_size = image_size * 3; 386 387 /* CONTEXT_BUFFER */ 388 min_dpb_size += width_in_mb * height_in_mb * 128; 389 390 /* IT surface buffer */ 391 min_dpb_size += width_in_mb * 64; 392 393 /* DB surface buffer */ 394 min_dpb_size += width_in_mb * 128; 395 396 /* BP */ 397 tmp = max(width_in_mb, height_in_mb); 398 min_dpb_size += ALIGN(tmp * 7 * 16, 64); 399 break; 400 401 case 3: /* MPEG2 */ 402 403 /* reference picture buffer */ 404 min_dpb_size = image_size * 3; 405 break; 406 407 case 4: /* MPEG4 */ 408 409 /* reference picture buffer */ 410 min_dpb_size = image_size * 3; 411 412 /* CM */ 413 min_dpb_size += width_in_mb * height_in_mb * 64; 414 415 /* IT surface buffer */ 416 min_dpb_size += ALIGN(width_in_mb * height_in_mb * 32, 64); 417 break; 418 419 default: 420 DRM_ERROR("UVD codec not handled %d!\n", stream_type); 421 return -EINVAL; 422 } 423 424 if (width > pitch) { 425 DRM_ERROR("Invalid UVD decoding target pitch!\n"); 426 return -EINVAL; 427 } 428 429 if (dpb_size < min_dpb_size) { 430 DRM_ERROR("Invalid dpb_size in UVD message (%d / %d)!\n", 431 dpb_size, min_dpb_size); 432 return -EINVAL; 433 } 434 435 buf_sizes[0x1] = dpb_size; 436 buf_sizes[0x2] = image_size; 437 return 0; 438 } 439 440 static int radeon_uvd_validate_codec(struct radeon_cs_parser *p, 441 unsigned stream_type) 442 { 443 switch (stream_type) { 444 case 0: /* H264 */ 445 case 1: /* VC1 */ 446 /* always supported */ 447 return 0; 448 449 case 3: /* MPEG2 */ 450 case 4: /* MPEG4 */ 451 /* only since UVD 3 */ 452 if (p->rdev->family >= CHIP_PALM) 453 return 0; 454 455 /* fall through */ 456 default: 457 DRM_ERROR("UVD codec not supported by hardware %d!\n", 458 stream_type); 459 return -EINVAL; 460 } 461 } 462 463 static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_bo *bo, 464 unsigned offset, unsigned buf_sizes[]) 465 { 466 int32_t *msg, msg_type, handle; 467 unsigned img_size = 0; 468 void *ptr; 469 470 int i, r; 471 472 if (offset & 0x3F) { 473 DRM_ERROR("UVD messages must be 64 byte aligned!\n"); 474 return -EINVAL; 475 } 476 477 if (bo->tbo.sync_obj) { 478 r = radeon_fence_wait(bo->tbo.sync_obj, false); 479 if (r) { 480 DRM_ERROR("Failed waiting for UVD message (%d)!\n", r); 481 return r; 482 } 483 } 484 485 r = radeon_bo_kmap(bo, &ptr); 486 if (r) { 487 DRM_ERROR("Failed mapping the UVD message (%d)!\n", r); 488 return r; 489 } 490 491 msg = (uint32_t*)((uint8_t*)ptr + offset); 492 493 msg_type = msg[1]; 494 handle = msg[2]; 495 496 if (handle == 0) { 497 DRM_ERROR("Invalid UVD handle!\n"); 498 return -EINVAL; 499 } 500 501 switch (msg_type) { 502 case 0: 503 /* it's a create msg, calc image size (width * height) */ 504 img_size = msg[7] * msg[8]; 505 506 r = radeon_uvd_validate_codec(p, msg[4]); 507 radeon_bo_kunmap(bo); 508 if (r) 509 return r; 510 511 /* try to alloc a new handle */ 512 for (i = 0; i < p->rdev->uvd.max_handles; ++i) { 513 if (atomic_read(&p->rdev->uvd.handles[i]) == handle) { 514 DRM_ERROR("Handle 0x%x already in use!\n", handle); 515 return -EINVAL; 516 } 517 518 if (!atomic_cmpxchg(&p->rdev->uvd.handles[i], 0, handle)) { 519 p->rdev->uvd.filp[i] = p->filp; 520 p->rdev->uvd.img_size[i] = img_size; 521 return 0; 522 } 523 } 524 525 DRM_ERROR("No more free UVD handles!\n"); 526 return -EINVAL; 527 528 case 1: 529 /* it's a decode msg, validate codec and calc buffer sizes */ 530 r = radeon_uvd_validate_codec(p, msg[4]); 531 if (!r) 532 r = radeon_uvd_cs_msg_decode(msg, buf_sizes); 533 radeon_bo_kunmap(bo); 534 if (r) 535 return r; 536 537 /* validate the handle */ 538 for (i = 0; i < p->rdev->uvd.max_handles; ++i) { 539 if (atomic_read(&p->rdev->uvd.handles[i]) == handle) { 540 if (p->rdev->uvd.filp[i] != p->filp) { 541 DRM_ERROR("UVD handle collision detected!\n"); 542 return -EINVAL; 543 } 544 return 0; 545 } 546 } 547 548 DRM_ERROR("Invalid UVD handle 0x%x!\n", handle); 549 return -ENOENT; 550 551 case 2: 552 /* it's a destroy msg, free the handle */ 553 for (i = 0; i < p->rdev->uvd.max_handles; ++i) 554 atomic_cmpxchg(&p->rdev->uvd.handles[i], handle, 0); 555 radeon_bo_kunmap(bo); 556 return 0; 557 558 default: 559 560 DRM_ERROR("Illegal UVD message type (%d)!\n", msg_type); 561 return -EINVAL; 562 } 563 564 BUG(); 565 return -EINVAL; 566 } 567 568 static int radeon_uvd_cs_reloc(struct radeon_cs_parser *p, 569 int data0, int data1, 570 unsigned buf_sizes[], bool *has_msg_cmd) 571 { 572 struct radeon_cs_chunk *relocs_chunk; 573 struct radeon_bo_list *reloc; 574 unsigned idx, cmd, offset; 575 uint64_t start, end; 576 int r; 577 578 relocs_chunk = &p->chunks[p->chunk_relocs_idx]; 579 offset = radeon_get_ib_value(p, data0); 580 idx = radeon_get_ib_value(p, data1); 581 if (idx >= relocs_chunk->length_dw) { 582 DRM_ERROR("Relocs at %d after relocations chunk end %d !\n", 583 idx, relocs_chunk->length_dw); 584 return -EINVAL; 585 } 586 587 reloc = p->relocs_ptr[(idx / 4)]; 588 start = reloc->gpu_offset; 589 end = start + radeon_bo_size(reloc->robj); 590 start += offset; 591 592 p->ib.ptr[data0] = start & 0xFFFFFFFF; 593 p->ib.ptr[data1] = start >> 32; 594 595 cmd = radeon_get_ib_value(p, p->idx) >> 1; 596 597 if (cmd < 0x4) { 598 if (end <= start) { 599 DRM_ERROR("invalid reloc offset %X!\n", offset); 600 return -EINVAL; 601 } 602 if ((end - start) < buf_sizes[cmd]) { 603 DRM_ERROR("buffer (%d) to small (%d / %d)!\n", cmd, 604 (unsigned)(end - start), buf_sizes[cmd]); 605 return -EINVAL; 606 } 607 608 } else if (cmd != 0x100) { 609 DRM_ERROR("invalid UVD command %X!\n", cmd); 610 return -EINVAL; 611 } 612 613 if ((start >> 28) != ((end - 1) >> 28)) { 614 DRM_ERROR("reloc %lX-%lX crossing 256MB boundary!\n", 615 start, end); 616 return -EINVAL; 617 } 618 619 /* TODO: is this still necessary on NI+ ? */ 620 if ((cmd == 0 || cmd == 0x3) && 621 (start >> 28) != (p->rdev->uvd.gpu_addr >> 28)) { 622 DRM_ERROR("msg/fb buffer %lX-%lX out of 256MB segment!\n", 623 start, end); 624 return -EINVAL; 625 } 626 627 if (cmd == 0) { 628 if (*has_msg_cmd) { 629 DRM_ERROR("More than one message in a UVD-IB!\n"); 630 return -EINVAL; 631 } 632 *has_msg_cmd = true; 633 r = radeon_uvd_cs_msg(p, reloc->robj, offset, buf_sizes); 634 if (r) 635 return r; 636 } else if (!*has_msg_cmd) { 637 DRM_ERROR("Message needed before other commands are send!\n"); 638 return -EINVAL; 639 } 640 641 return 0; 642 } 643 644 static int radeon_uvd_cs_reg(struct radeon_cs_parser *p, 645 struct radeon_cs_packet *pkt, 646 int *data0, int *data1, 647 unsigned buf_sizes[], 648 bool *has_msg_cmd) 649 { 650 int i, r; 651 652 p->idx++; 653 for (i = 0; i <= pkt->count; ++i) { 654 switch (pkt->reg + i*4) { 655 case UVD_GPCOM_VCPU_DATA0: 656 *data0 = p->idx; 657 break; 658 case UVD_GPCOM_VCPU_DATA1: 659 *data1 = p->idx; 660 break; 661 case UVD_GPCOM_VCPU_CMD: 662 r = radeon_uvd_cs_reloc(p, *data0, *data1, 663 buf_sizes, has_msg_cmd); 664 if (r) 665 return r; 666 break; 667 case UVD_ENGINE_CNTL: 668 break; 669 default: 670 DRM_ERROR("Invalid reg 0x%X!\n", 671 pkt->reg + i*4); 672 return -EINVAL; 673 } 674 p->idx++; 675 } 676 return 0; 677 } 678 679 int radeon_uvd_cs_parse(struct radeon_cs_parser *p) 680 { 681 struct radeon_cs_packet pkt; 682 int r, data0 = 0, data1 = 0; 683 684 /* does the IB has a msg command */ 685 bool has_msg_cmd = false; 686 687 /* minimum buffer sizes */ 688 unsigned buf_sizes[] = { 689 [0x00000000] = 2048, 690 [0x00000001] = 32 * 1024 * 1024, 691 [0x00000002] = 2048 * 1152 * 3, 692 [0x00000003] = 2048, 693 }; 694 695 if (p->chunks[p->chunk_ib_idx].length_dw % 16) { 696 DRM_ERROR("UVD IB length (%d) not 16 dwords aligned!\n", 697 p->chunks[p->chunk_ib_idx].length_dw); 698 return -EINVAL; 699 } 700 701 if (p->chunk_relocs_idx == -1) { 702 DRM_ERROR("No relocation chunk !\n"); 703 return -EINVAL; 704 } 705 706 707 do { 708 r = radeon_cs_packet_parse(p, &pkt, p->idx); 709 if (r) 710 return r; 711 switch (pkt.type) { 712 case RADEON_PACKET_TYPE0: 713 r = radeon_uvd_cs_reg(p, &pkt, &data0, &data1, 714 buf_sizes, &has_msg_cmd); 715 if (r) 716 return r; 717 break; 718 case RADEON_PACKET_TYPE2: 719 p->idx += pkt.count + 2; 720 break; 721 default: 722 DRM_ERROR("Unknown packet type %d !\n", pkt.type); 723 return -EINVAL; 724 } 725 } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw); 726 727 if (!has_msg_cmd) { 728 DRM_ERROR("UVD-IBs need a msg command!\n"); 729 return -EINVAL; 730 } 731 732 return 0; 733 } 734 735 static int radeon_uvd_send_msg(struct radeon_device *rdev, 736 int ring, uint64_t addr, 737 struct radeon_fence **fence) 738 { 739 struct radeon_ib ib; 740 int i, r; 741 742 r = radeon_ib_get(rdev, ring, &ib, NULL, 64); 743 if (r) 744 return r; 745 746 ib.ptr[0] = PACKET0(UVD_GPCOM_VCPU_DATA0, 0); 747 ib.ptr[1] = addr; 748 ib.ptr[2] = PACKET0(UVD_GPCOM_VCPU_DATA1, 0); 749 ib.ptr[3] = addr >> 32; 750 ib.ptr[4] = PACKET0(UVD_GPCOM_VCPU_CMD, 0); 751 ib.ptr[5] = 0; 752 for (i = 6; i < 16; ++i) 753 ib.ptr[i] = PACKET2(0); 754 ib.length_dw = 16; 755 756 r = radeon_ib_schedule(rdev, &ib, NULL, false); 757 758 if (fence) 759 *fence = radeon_fence_ref(ib.fence); 760 761 radeon_ib_free(rdev, &ib); 762 return r; 763 } 764 765 /* 766 * multiple fence commands without any stream commands in between can 767 * crash the vcpu so just try to emmit a dummy create/destroy msg to 768 * avoid this 769 */ 770 int radeon_uvd_get_create_msg(struct radeon_device *rdev, int ring, 771 uint32_t handle, struct radeon_fence **fence) 772 { 773 /* we use the last page of the vcpu bo for the UVD message */ 774 uint64_t offs = radeon_bo_size(rdev->uvd.vcpu_bo) - 775 RADEON_GPU_PAGE_SIZE; 776 777 uint32_t *msg = (uint32_t*)((uint8_t*)rdev->uvd.cpu_addr + offs); 778 uint64_t addr = rdev->uvd.gpu_addr + offs; 779 780 int r, i; 781 782 r = radeon_bo_reserve(rdev->uvd.vcpu_bo, true); 783 if (r) 784 return r; 785 786 /* stitch together an UVD create msg */ 787 msg[0] = cpu_to_le32(0x00000de4); 788 msg[1] = cpu_to_le32(0x00000000); 789 msg[2] = cpu_to_le32(handle); 790 msg[3] = cpu_to_le32(0x00000000); 791 msg[4] = cpu_to_le32(0x00000000); 792 msg[5] = cpu_to_le32(0x00000000); 793 msg[6] = cpu_to_le32(0x00000000); 794 msg[7] = cpu_to_le32(0x00000780); 795 msg[8] = cpu_to_le32(0x00000440); 796 msg[9] = cpu_to_le32(0x00000000); 797 msg[10] = cpu_to_le32(0x01b37000); 798 for (i = 11; i < 1024; ++i) 799 msg[i] = cpu_to_le32(0x0); 800 801 r = radeon_uvd_send_msg(rdev, ring, addr, fence); 802 radeon_bo_unreserve(rdev->uvd.vcpu_bo); 803 return r; 804 } 805 806 int radeon_uvd_get_destroy_msg(struct radeon_device *rdev, int ring, 807 uint32_t handle, struct radeon_fence **fence) 808 { 809 /* we use the last page of the vcpu bo for the UVD message */ 810 uint64_t offs = radeon_bo_size(rdev->uvd.vcpu_bo) - 811 RADEON_GPU_PAGE_SIZE; 812 813 uint32_t *msg = (uint32_t*)((uint8_t*)rdev->uvd.cpu_addr + offs); 814 uint64_t addr = rdev->uvd.gpu_addr + offs; 815 816 int r, i; 817 818 r = radeon_bo_reserve(rdev->uvd.vcpu_bo, true); 819 if (r) 820 return r; 821 822 /* stitch together an UVD destroy msg */ 823 msg[0] = cpu_to_le32(0x00000de4); 824 msg[1] = cpu_to_le32(0x00000002); 825 msg[2] = cpu_to_le32(handle); 826 msg[3] = cpu_to_le32(0x00000000); 827 for (i = 4; i < 1024; ++i) 828 msg[i] = cpu_to_le32(0x0); 829 830 r = radeon_uvd_send_msg(rdev, ring, addr, fence); 831 radeon_bo_unreserve(rdev->uvd.vcpu_bo); 832 return r; 833 } 834 835 /** 836 * radeon_uvd_count_handles - count number of open streams 837 * 838 * @rdev: radeon_device pointer 839 * @sd: number of SD streams 840 * @hd: number of HD streams 841 * 842 * Count the number of open SD/HD streams as a hint for power mangement 843 */ 844 static void radeon_uvd_count_handles(struct radeon_device *rdev, 845 unsigned *sd, unsigned *hd) 846 { 847 unsigned i; 848 849 *sd = 0; 850 *hd = 0; 851 852 for (i = 0; i < rdev->uvd.max_handles; ++i) { 853 if (!atomic_read(&rdev->uvd.handles[i])) 854 continue; 855 856 if (rdev->uvd.img_size[i] >= 720*576) 857 ++(*hd); 858 else 859 ++(*sd); 860 } 861 } 862 863 static void radeon_uvd_idle_work_handler(struct work_struct *work) 864 { 865 struct radeon_device *rdev = 866 container_of(work, struct radeon_device, uvd.idle_work.work); 867 868 if (radeon_fence_count_emitted(rdev, R600_RING_TYPE_UVD_INDEX) == 0) { 869 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { 870 radeon_uvd_count_handles(rdev, &rdev->pm.dpm.sd, 871 &rdev->pm.dpm.hd); 872 radeon_dpm_enable_uvd(rdev, false); 873 } else { 874 radeon_set_uvd_clocks(rdev, 0, 0); 875 } 876 } else { 877 schedule_delayed_work(&rdev->uvd.idle_work, 878 msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS)); 879 } 880 } 881 882 void radeon_uvd_note_usage(struct radeon_device *rdev) 883 { 884 bool streams_changed = false; 885 bool set_clocks = !cancel_delayed_work_sync(&rdev->uvd.idle_work); 886 set_clocks &= schedule_delayed_work(&rdev->uvd.idle_work, 887 msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS)); 888 889 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { 890 unsigned hd = 0, sd = 0; 891 radeon_uvd_count_handles(rdev, &sd, &hd); 892 if ((rdev->pm.dpm.sd != sd) || 893 (rdev->pm.dpm.hd != hd)) { 894 rdev->pm.dpm.sd = sd; 895 rdev->pm.dpm.hd = hd; 896 /* disable this for now */ 897 /*streams_changed = true;*/ 898 } 899 } 900 901 if (set_clocks || streams_changed) { 902 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { 903 radeon_dpm_enable_uvd(rdev, true); 904 } else { 905 radeon_set_uvd_clocks(rdev, 53300, 40000); 906 } 907 } 908 } 909 910 static unsigned radeon_uvd_calc_upll_post_div(unsigned vco_freq, 911 unsigned target_freq, 912 unsigned pd_min, 913 unsigned pd_even) 914 { 915 unsigned post_div = vco_freq / target_freq; 916 917 /* adjust to post divider minimum value */ 918 if (post_div < pd_min) 919 post_div = pd_min; 920 921 /* we alway need a frequency less than or equal the target */ 922 if ((vco_freq / post_div) > target_freq) 923 post_div += 1; 924 925 /* post dividers above a certain value must be even */ 926 if (post_div > pd_even && post_div % 2) 927 post_div += 1; 928 929 return post_div; 930 } 931 932 /** 933 * radeon_uvd_calc_upll_dividers - calc UPLL clock dividers 934 * 935 * @rdev: radeon_device pointer 936 * @vclk: wanted VCLK 937 * @dclk: wanted DCLK 938 * @vco_min: minimum VCO frequency 939 * @vco_max: maximum VCO frequency 940 * @fb_factor: factor to multiply vco freq with 941 * @fb_mask: limit and bitmask for feedback divider 942 * @pd_min: post divider minimum 943 * @pd_max: post divider maximum 944 * @pd_even: post divider must be even above this value 945 * @optimal_fb_div: resulting feedback divider 946 * @optimal_vclk_div: resulting vclk post divider 947 * @optimal_dclk_div: resulting dclk post divider 948 * 949 * Calculate dividers for UVDs UPLL (R6xx-SI, except APUs). 950 * Returns zero on success -EINVAL on error. 951 */ 952 int radeon_uvd_calc_upll_dividers(struct radeon_device *rdev, 953 unsigned vclk, unsigned dclk, 954 unsigned vco_min, unsigned vco_max, 955 unsigned fb_factor, unsigned fb_mask, 956 unsigned pd_min, unsigned pd_max, 957 unsigned pd_even, 958 unsigned *optimal_fb_div, 959 unsigned *optimal_vclk_div, 960 unsigned *optimal_dclk_div) 961 { 962 unsigned vco_freq, ref_freq = rdev->clock.spll.reference_freq; 963 964 /* start off with something large */ 965 unsigned optimal_score = ~0; 966 967 /* loop through vco from low to high */ 968 vco_min = max(max(vco_min, vclk), dclk); 969 for (vco_freq = vco_min; vco_freq <= vco_max; vco_freq += 100) { 970 971 uint64_t fb_div = (uint64_t)vco_freq * fb_factor; 972 unsigned vclk_div, dclk_div, score; 973 974 do_div(fb_div, ref_freq); 975 976 /* fb div out of range ? */ 977 if (fb_div > fb_mask) 978 break; /* it can oly get worse */ 979 980 fb_div &= fb_mask; 981 982 /* calc vclk divider with current vco freq */ 983 vclk_div = radeon_uvd_calc_upll_post_div(vco_freq, vclk, 984 pd_min, pd_even); 985 if (vclk_div > pd_max) 986 break; /* vco is too big, it has to stop */ 987 988 /* calc dclk divider with current vco freq */ 989 dclk_div = radeon_uvd_calc_upll_post_div(vco_freq, dclk, 990 pd_min, pd_even); 991 if (vclk_div > pd_max) 992 break; /* vco is too big, it has to stop */ 993 994 /* calc score with current vco freq */ 995 score = vclk - (vco_freq / vclk_div) + dclk - (vco_freq / dclk_div); 996 997 /* determine if this vco setting is better than current optimal settings */ 998 if (score < optimal_score) { 999 *optimal_fb_div = fb_div; 1000 *optimal_vclk_div = vclk_div; 1001 *optimal_dclk_div = dclk_div; 1002 optimal_score = score; 1003 if (optimal_score == 0) 1004 break; /* it can't get better than this */ 1005 } 1006 } 1007 1008 /* did we found a valid setup ? */ 1009 if (optimal_score == ~0) 1010 return -EINVAL; 1011 1012 return 0; 1013 } 1014 1015 int radeon_uvd_send_upll_ctlreq(struct radeon_device *rdev, 1016 unsigned cg_upll_func_cntl) 1017 { 1018 unsigned i; 1019 1020 /* make sure UPLL_CTLREQ is deasserted */ 1021 WREG32_P(cg_upll_func_cntl, 0, ~UPLL_CTLREQ_MASK); 1022 1023 mdelay(10); 1024 1025 /* assert UPLL_CTLREQ */ 1026 WREG32_P(cg_upll_func_cntl, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK); 1027 1028 /* wait for CTLACK and CTLACK2 to get asserted */ 1029 for (i = 0; i < 100; ++i) { 1030 uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK; 1031 if ((RREG32(cg_upll_func_cntl) & mask) == mask) 1032 break; 1033 mdelay(10); 1034 } 1035 1036 /* deassert UPLL_CTLREQ */ 1037 WREG32_P(cg_upll_func_cntl, 0, ~UPLL_CTLREQ_MASK); 1038 1039 if (i == 100) { 1040 DRM_ERROR("Timeout setting UVD clocks!\n"); 1041 return -ETIMEDOUT; 1042 } 1043 1044 return 0; 1045 } 1046