1 /* 2 * Copyright 2011 Advanced Micro Devices, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sub license, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 19 * USE OR OTHER DEALINGS IN THE SOFTWARE. 20 * 21 * The above copyright notice and this permission notice (including the 22 * next paragraph) shall be included in all copies or substantial portions 23 * of the Software. 24 * 25 */ 26 /* 27 * Authors: 28 * Christian König <deathsimple@vodafone.de> 29 */ 30 31 #include <linux/firmware.h> 32 #include <linux/module.h> 33 #include <drm/drmP.h> 34 35 #include "radeon.h" 36 #include "r600d.h" 37 38 /* 1 second timeout */ 39 #define UVD_IDLE_TIMEOUT_MS 1000 40 41 /* Firmware Names */ 42 #define FIRMWARE_R600 "radeonkmsfw_R600_uvd" 43 #define FIRMWARE_RS780 "radeonkmsfw_RS780_uvd" 44 #define FIRMWARE_RV770 "radeonkmsfw_RV770_uvd" 45 #define FIRMWARE_RV710 "radeonkmsfw_RV710_uvd" 46 #define FIRMWARE_CYPRESS "radeonkmsfw_CYPRESS_uvd" 47 #define FIRMWARE_SUMO "radeonkmsfw_SUMO_uvd" 48 #define FIRMWARE_TAHITI "radeonkmsfw_TAHITI_uvd" 49 #define FIRMWARE_BONAIRE "radeonkmsfw_BONAIRE_uvd" 50 51 MODULE_FIRMWARE(FIRMWARE_R600); 52 MODULE_FIRMWARE(FIRMWARE_RS780); 53 MODULE_FIRMWARE(FIRMWARE_RV770); 54 MODULE_FIRMWARE(FIRMWARE_RV710); 55 MODULE_FIRMWARE(FIRMWARE_CYPRESS); 56 MODULE_FIRMWARE(FIRMWARE_SUMO); 57 MODULE_FIRMWARE(FIRMWARE_TAHITI); 58 MODULE_FIRMWARE(FIRMWARE_BONAIRE); 59 60 static void radeon_uvd_idle_work_handler(struct work_struct *work); 61 62 int radeon_uvd_init(struct radeon_device *rdev) 63 { 64 unsigned long bo_size; 65 const char *fw_name; 66 int i, r; 67 68 INIT_DELAYED_WORK(&rdev->uvd.idle_work, radeon_uvd_idle_work_handler); 69 70 switch (rdev->family) { 71 case CHIP_RV610: 72 case CHIP_RV630: 73 case CHIP_RV670: 74 case CHIP_RV620: 75 case CHIP_RV635: 76 fw_name = FIRMWARE_R600; 77 break; 78 79 case CHIP_RS780: 80 case CHIP_RS880: 81 fw_name = FIRMWARE_RS780; 82 break; 83 84 case CHIP_RV770: 85 fw_name = FIRMWARE_RV770; 86 break; 87 88 case CHIP_RV710: 89 case CHIP_RV730: 90 case CHIP_RV740: 91 fw_name = FIRMWARE_RV710; 92 break; 93 94 case CHIP_CYPRESS: 95 case CHIP_HEMLOCK: 96 case CHIP_JUNIPER: 97 case CHIP_REDWOOD: 98 case CHIP_CEDAR: 99 fw_name = FIRMWARE_CYPRESS; 100 break; 101 102 case CHIP_SUMO: 103 case CHIP_SUMO2: 104 case CHIP_PALM: 105 case CHIP_CAYMAN: 106 case CHIP_BARTS: 107 case CHIP_TURKS: 108 case CHIP_CAICOS: 109 fw_name = FIRMWARE_SUMO; 110 break; 111 112 case CHIP_TAHITI: 113 case CHIP_VERDE: 114 case CHIP_PITCAIRN: 115 case CHIP_ARUBA: 116 case CHIP_OLAND: 117 fw_name = FIRMWARE_TAHITI; 118 break; 119 120 case CHIP_BONAIRE: 121 case CHIP_KABINI: 122 case CHIP_KAVERI: 123 case CHIP_HAWAII: 124 case CHIP_MULLINS: 125 fw_name = FIRMWARE_BONAIRE; 126 break; 127 128 default: 129 return -EINVAL; 130 } 131 132 r = request_firmware(&rdev->uvd_fw, fw_name, rdev->dev); 133 if (r) { 134 dev_err(rdev->dev, "radeon_uvd: Can't load firmware \"%s\"\n", 135 fw_name); 136 return r; 137 } 138 139 bo_size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->datasize + 8) + 140 RADEON_UVD_STACK_SIZE + RADEON_UVD_HEAP_SIZE + 141 RADEON_GPU_PAGE_SIZE; 142 r = radeon_bo_create(rdev, bo_size, PAGE_SIZE, true, 143 RADEON_GEM_DOMAIN_VRAM, 0, NULL, &rdev->uvd.vcpu_bo); 144 if (r) { 145 dev_err(rdev->dev, "(%d) failed to allocate UVD bo\n", r); 146 return r; 147 } 148 149 r = radeon_bo_reserve(rdev->uvd.vcpu_bo, false); 150 if (r) { 151 radeon_bo_unref(&rdev->uvd.vcpu_bo); 152 dev_err(rdev->dev, "(%d) failed to reserve UVD bo\n", r); 153 return r; 154 } 155 156 r = radeon_bo_pin(rdev->uvd.vcpu_bo, RADEON_GEM_DOMAIN_VRAM, 157 &rdev->uvd.gpu_addr); 158 if (r) { 159 radeon_bo_unreserve(rdev->uvd.vcpu_bo); 160 radeon_bo_unref(&rdev->uvd.vcpu_bo); 161 dev_err(rdev->dev, "(%d) UVD bo pin failed\n", r); 162 return r; 163 } 164 165 r = radeon_bo_kmap(rdev->uvd.vcpu_bo, &rdev->uvd.cpu_addr); 166 if (r) { 167 dev_err(rdev->dev, "(%d) UVD map failed\n", r); 168 return r; 169 } 170 171 radeon_bo_unreserve(rdev->uvd.vcpu_bo); 172 173 for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) { 174 atomic_set(&rdev->uvd.handles[i], 0); 175 rdev->uvd.filp[i] = NULL; 176 rdev->uvd.img_size[i] = 0; 177 } 178 179 return 0; 180 } 181 182 void radeon_uvd_fini(struct radeon_device *rdev) 183 { 184 int r; 185 186 if (rdev->uvd.vcpu_bo == NULL) 187 return; 188 189 r = radeon_bo_reserve(rdev->uvd.vcpu_bo, false); 190 if (!r) { 191 radeon_bo_kunmap(rdev->uvd.vcpu_bo); 192 radeon_bo_unpin(rdev->uvd.vcpu_bo); 193 radeon_bo_unreserve(rdev->uvd.vcpu_bo); 194 } 195 196 radeon_bo_unref(&rdev->uvd.vcpu_bo); 197 198 radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX]); 199 200 release_firmware(rdev->uvd_fw); 201 } 202 203 int radeon_uvd_suspend(struct radeon_device *rdev) 204 { 205 unsigned size; 206 char *ptr; 207 int i; 208 209 if (rdev->uvd.vcpu_bo == NULL) 210 return 0; 211 212 for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) 213 if (atomic_read(&rdev->uvd.handles[i])) 214 break; 215 216 if (i == RADEON_MAX_UVD_HANDLES) 217 return 0; 218 219 size = radeon_bo_size(rdev->uvd.vcpu_bo); 220 size -= rdev->uvd_fw->datasize; 221 222 ptr = rdev->uvd.cpu_addr; 223 ptr += rdev->uvd_fw->datasize; 224 225 rdev->uvd.saved_bo = kmalloc(size, M_DRM, M_WAITOK); 226 memcpy(rdev->uvd.saved_bo, ptr, size); 227 228 return 0; 229 } 230 231 int radeon_uvd_resume(struct radeon_device *rdev) 232 { 233 unsigned size; 234 char *ptr; 235 236 if (rdev->uvd.vcpu_bo == NULL) 237 return -EINVAL; 238 239 memcpy(rdev->uvd.cpu_addr, rdev->uvd_fw->data, rdev->uvd_fw->datasize); 240 241 size = radeon_bo_size(rdev->uvd.vcpu_bo); 242 size -= rdev->uvd_fw->datasize; 243 244 ptr = rdev->uvd.cpu_addr; 245 ptr += rdev->uvd_fw->datasize; 246 247 if (rdev->uvd.saved_bo != NULL) { 248 memcpy(ptr, rdev->uvd.saved_bo, size); 249 kfree(rdev->uvd.saved_bo); 250 rdev->uvd.saved_bo = NULL; 251 } else 252 memset(ptr, 0, size); 253 254 return 0; 255 } 256 257 void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo, 258 uint32_t allowed_domains) 259 { 260 int i; 261 262 for (i = 0; i < rbo->placement.num_placement; ++i) { 263 rbo->placements[i].fpfn = 0 >> PAGE_SHIFT; 264 rbo->placements[i].lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT; 265 } 266 267 /* If it must be in VRAM it must be in the first segment as well */ 268 if (allowed_domains == RADEON_GEM_DOMAIN_VRAM) 269 return; 270 271 /* abort if we already have more than one placement */ 272 if (rbo->placement.num_placement > 1) 273 return; 274 275 /* add another 256MB segment */ 276 rbo->placements[1] = rbo->placements[0]; 277 rbo->placements[1].fpfn += (256 * 1024 * 1024) >> PAGE_SHIFT; 278 rbo->placements[1].lpfn += (256 * 1024 * 1024) >> PAGE_SHIFT; 279 rbo->placement.num_placement++; 280 rbo->placement.num_busy_placement++; 281 } 282 283 void radeon_uvd_free_handles(struct radeon_device *rdev, struct drm_file *filp) 284 { 285 int i, r; 286 for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) { 287 uint32_t handle = atomic_read(&rdev->uvd.handles[i]); 288 if (handle != 0 && rdev->uvd.filp[i] == filp) { 289 struct radeon_fence *fence; 290 291 radeon_uvd_note_usage(rdev); 292 293 r = radeon_uvd_get_destroy_msg(rdev, 294 R600_RING_TYPE_UVD_INDEX, handle, &fence); 295 if (r) { 296 DRM_ERROR("Error destroying UVD (%d)!\n", r); 297 continue; 298 } 299 300 radeon_fence_wait(fence, false); 301 radeon_fence_unref(&fence); 302 303 rdev->uvd.filp[i] = NULL; 304 atomic_set(&rdev->uvd.handles[i], 0); 305 } 306 } 307 } 308 309 static int radeon_uvd_cs_msg_decode(uint32_t *msg, unsigned buf_sizes[]) 310 { 311 unsigned stream_type = msg[4]; 312 unsigned width = msg[6]; 313 unsigned height = msg[7]; 314 unsigned dpb_size = msg[9]; 315 unsigned pitch = msg[28]; 316 317 unsigned width_in_mb = width / 16; 318 unsigned height_in_mb = ALIGN(height / 16, 2); 319 320 unsigned image_size, tmp, min_dpb_size; 321 322 image_size = width * height; 323 image_size += image_size / 2; 324 image_size = ALIGN(image_size, 1024); 325 326 switch (stream_type) { 327 case 0: /* H264 */ 328 329 /* reference picture buffer */ 330 min_dpb_size = image_size * 17; 331 332 /* macroblock context buffer */ 333 min_dpb_size += width_in_mb * height_in_mb * 17 * 192; 334 335 /* IT surface buffer */ 336 min_dpb_size += width_in_mb * height_in_mb * 32; 337 break; 338 339 case 1: /* VC1 */ 340 341 /* reference picture buffer */ 342 min_dpb_size = image_size * 3; 343 344 /* CONTEXT_BUFFER */ 345 min_dpb_size += width_in_mb * height_in_mb * 128; 346 347 /* IT surface buffer */ 348 min_dpb_size += width_in_mb * 64; 349 350 /* DB surface buffer */ 351 min_dpb_size += width_in_mb * 128; 352 353 /* BP */ 354 tmp = max(width_in_mb, height_in_mb); 355 min_dpb_size += ALIGN(tmp * 7 * 16, 64); 356 break; 357 358 case 3: /* MPEG2 */ 359 360 /* reference picture buffer */ 361 min_dpb_size = image_size * 3; 362 break; 363 364 case 4: /* MPEG4 */ 365 366 /* reference picture buffer */ 367 min_dpb_size = image_size * 3; 368 369 /* CM */ 370 min_dpb_size += width_in_mb * height_in_mb * 64; 371 372 /* IT surface buffer */ 373 min_dpb_size += ALIGN(width_in_mb * height_in_mb * 32, 64); 374 break; 375 376 default: 377 DRM_ERROR("UVD codec not handled %d!\n", stream_type); 378 return -EINVAL; 379 } 380 381 if (width > pitch) { 382 DRM_ERROR("Invalid UVD decoding target pitch!\n"); 383 return -EINVAL; 384 } 385 386 if (dpb_size < min_dpb_size) { 387 DRM_ERROR("Invalid dpb_size in UVD message (%d / %d)!\n", 388 dpb_size, min_dpb_size); 389 return -EINVAL; 390 } 391 392 buf_sizes[0x1] = dpb_size; 393 buf_sizes[0x2] = image_size; 394 return 0; 395 } 396 397 static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_bo *bo, 398 unsigned offset, unsigned buf_sizes[]) 399 { 400 int32_t *msg, msg_type, handle; 401 unsigned img_size = 0; 402 struct fence *f; 403 void *ptr; 404 405 int i, r; 406 407 if (offset & 0x3F) { 408 DRM_ERROR("UVD messages must be 64 byte aligned!\n"); 409 return -EINVAL; 410 } 411 412 f = reservation_object_get_excl(bo->tbo.resv); 413 if (f) { 414 r = radeon_fence_wait((struct radeon_fence *)f, false); 415 if (r) { 416 DRM_ERROR("Failed waiting for UVD message (%d)!\n", r); 417 return r; 418 } 419 } 420 421 r = radeon_bo_kmap(bo, &ptr); 422 if (r) { 423 DRM_ERROR("Failed mapping the UVD message (%d)!\n", r); 424 return r; 425 } 426 427 msg = (uint32_t*)((uint8_t*)ptr + offset); 428 429 msg_type = msg[1]; 430 handle = msg[2]; 431 432 if (handle == 0) { 433 DRM_ERROR("Invalid UVD handle!\n"); 434 return -EINVAL; 435 } 436 437 if (msg_type == 1) { 438 /* it's a decode msg, calc buffer sizes */ 439 r = radeon_uvd_cs_msg_decode(msg, buf_sizes); 440 /* calc image size (width * height) */ 441 img_size = msg[6] * msg[7]; 442 radeon_bo_kunmap(bo); 443 if (r) 444 return r; 445 446 } else if (msg_type == 2) { 447 /* it's a destroy msg, free the handle */ 448 for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) 449 atomic_cmpxchg(&p->rdev->uvd.handles[i], handle, 0); 450 radeon_bo_kunmap(bo); 451 return 0; 452 } else { 453 /* it's a create msg, calc image size (width * height) */ 454 img_size = msg[7] * msg[8]; 455 radeon_bo_kunmap(bo); 456 457 if (msg_type != 0) { 458 DRM_ERROR("Illegal UVD message type (%d)!\n", msg_type); 459 return -EINVAL; 460 } 461 462 /* it's a create msg, no special handling needed */ 463 } 464 465 /* create or decode, validate the handle */ 466 for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) { 467 if (atomic_read(&p->rdev->uvd.handles[i]) == handle) 468 return 0; 469 } 470 471 /* handle not found try to alloc a new one */ 472 for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) { 473 if (!atomic_cmpxchg(&p->rdev->uvd.handles[i], 0, handle)) { 474 p->rdev->uvd.filp[i] = p->filp; 475 p->rdev->uvd.img_size[i] = img_size; 476 return 0; 477 } 478 } 479 480 DRM_ERROR("No more free UVD handles!\n"); 481 return -EINVAL; 482 } 483 484 static int radeon_uvd_cs_reloc(struct radeon_cs_parser *p, 485 int data0, int data1, 486 unsigned buf_sizes[], bool *has_msg_cmd) 487 { 488 struct radeon_cs_chunk *relocs_chunk; 489 struct radeon_cs_reloc *reloc; 490 unsigned idx, cmd, offset; 491 uint64_t start, end; 492 int r; 493 494 relocs_chunk = &p->chunks[p->chunk_relocs_idx]; 495 offset = radeon_get_ib_value(p, data0); 496 idx = radeon_get_ib_value(p, data1); 497 if (idx >= relocs_chunk->length_dw) { 498 DRM_ERROR("Relocs at %d after relocations chunk end %d !\n", 499 idx, relocs_chunk->length_dw); 500 return -EINVAL; 501 } 502 503 reloc = p->relocs_ptr[(idx / 4)]; 504 start = reloc->gpu_offset; 505 end = start + radeon_bo_size(reloc->robj); 506 start += offset; 507 508 p->ib.ptr[data0] = start & 0xFFFFFFFF; 509 p->ib.ptr[data1] = start >> 32; 510 511 cmd = radeon_get_ib_value(p, p->idx) >> 1; 512 513 if (cmd < 0x4) { 514 if (end <= start) { 515 DRM_ERROR("invalid reloc offset %X!\n", offset); 516 return -EINVAL; 517 } 518 if ((end - start) < buf_sizes[cmd]) { 519 DRM_ERROR("buffer (%d) to small (%d / %d)!\n", cmd, 520 (unsigned)(end - start), buf_sizes[cmd]); 521 return -EINVAL; 522 } 523 524 } else if (cmd != 0x100) { 525 DRM_ERROR("invalid UVD command %X!\n", cmd); 526 return -EINVAL; 527 } 528 529 if ((start >> 28) != ((end - 1) >> 28)) { 530 DRM_ERROR("reloc %lX-%lX crossing 256MB boundary!\n", 531 start, end); 532 return -EINVAL; 533 } 534 535 /* TODO: is this still necessary on NI+ ? */ 536 if ((cmd == 0 || cmd == 0x3) && 537 (start >> 28) != (p->rdev->uvd.gpu_addr >> 28)) { 538 DRM_ERROR("msg/fb buffer %lX-%lX out of 256MB segment!\n", 539 start, end); 540 return -EINVAL; 541 } 542 543 if (cmd == 0) { 544 if (*has_msg_cmd) { 545 DRM_ERROR("More than one message in a UVD-IB!\n"); 546 return -EINVAL; 547 } 548 *has_msg_cmd = true; 549 r = radeon_uvd_cs_msg(p, reloc->robj, offset, buf_sizes); 550 if (r) 551 return r; 552 } else if (!*has_msg_cmd) { 553 DRM_ERROR("Message needed before other commands are send!\n"); 554 return -EINVAL; 555 } 556 557 return 0; 558 } 559 560 static int radeon_uvd_cs_reg(struct radeon_cs_parser *p, 561 struct radeon_cs_packet *pkt, 562 int *data0, int *data1, 563 unsigned buf_sizes[], 564 bool *has_msg_cmd) 565 { 566 int i, r; 567 568 p->idx++; 569 for (i = 0; i <= pkt->count; ++i) { 570 switch (pkt->reg + i*4) { 571 case UVD_GPCOM_VCPU_DATA0: 572 *data0 = p->idx; 573 break; 574 case UVD_GPCOM_VCPU_DATA1: 575 *data1 = p->idx; 576 break; 577 case UVD_GPCOM_VCPU_CMD: 578 r = radeon_uvd_cs_reloc(p, *data0, *data1, 579 buf_sizes, has_msg_cmd); 580 if (r) 581 return r; 582 break; 583 case UVD_ENGINE_CNTL: 584 break; 585 default: 586 DRM_ERROR("Invalid reg 0x%X!\n", 587 pkt->reg + i*4); 588 return -EINVAL; 589 } 590 p->idx++; 591 } 592 return 0; 593 } 594 595 int radeon_uvd_cs_parse(struct radeon_cs_parser *p) 596 { 597 struct radeon_cs_packet pkt; 598 int r, data0 = 0, data1 = 0; 599 600 /* does the IB has a msg command */ 601 bool has_msg_cmd = false; 602 603 /* minimum buffer sizes */ 604 unsigned buf_sizes[] = { 605 [0x00000000] = 2048, 606 [0x00000001] = 32 * 1024 * 1024, 607 [0x00000002] = 2048 * 1152 * 3, 608 [0x00000003] = 2048, 609 }; 610 611 if (p->chunks[p->chunk_ib_idx].length_dw % 16) { 612 DRM_ERROR("UVD IB length (%d) not 16 dwords aligned!\n", 613 p->chunks[p->chunk_ib_idx].length_dw); 614 return -EINVAL; 615 } 616 617 if (p->chunk_relocs_idx == -1) { 618 DRM_ERROR("No relocation chunk !\n"); 619 return -EINVAL; 620 } 621 622 623 do { 624 r = radeon_cs_packet_parse(p, &pkt, p->idx); 625 if (r) 626 return r; 627 switch (pkt.type) { 628 case RADEON_PACKET_TYPE0: 629 r = radeon_uvd_cs_reg(p, &pkt, &data0, &data1, 630 buf_sizes, &has_msg_cmd); 631 if (r) 632 return r; 633 break; 634 case RADEON_PACKET_TYPE2: 635 p->idx += pkt.count + 2; 636 break; 637 default: 638 DRM_ERROR("Unknown packet type %d !\n", pkt.type); 639 return -EINVAL; 640 } 641 } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw); 642 643 if (!has_msg_cmd) { 644 DRM_ERROR("UVD-IBs need a msg command!\n"); 645 return -EINVAL; 646 } 647 648 return 0; 649 } 650 651 static int radeon_uvd_send_msg(struct radeon_device *rdev, 652 int ring, uint64_t addr, 653 struct radeon_fence **fence) 654 { 655 struct radeon_ib ib; 656 int i, r; 657 658 r = radeon_ib_get(rdev, ring, &ib, NULL, 64); 659 if (r) 660 return r; 661 662 ib.ptr[0] = PACKET0(UVD_GPCOM_VCPU_DATA0, 0); 663 ib.ptr[1] = addr; 664 ib.ptr[2] = PACKET0(UVD_GPCOM_VCPU_DATA1, 0); 665 ib.ptr[3] = addr >> 32; 666 ib.ptr[4] = PACKET0(UVD_GPCOM_VCPU_CMD, 0); 667 ib.ptr[5] = 0; 668 for (i = 6; i < 16; ++i) 669 ib.ptr[i] = PACKET2(0); 670 ib.length_dw = 16; 671 672 r = radeon_ib_schedule(rdev, &ib, NULL, false); 673 674 if (fence) 675 *fence = radeon_fence_ref(ib.fence); 676 677 radeon_ib_free(rdev, &ib); 678 return r; 679 } 680 681 /* multiple fence commands without any stream commands in between can 682 crash the vcpu so just try to emmit a dummy create/destroy msg to 683 avoid this */ 684 int radeon_uvd_get_create_msg(struct radeon_device *rdev, int ring, 685 uint32_t handle, struct radeon_fence **fence) 686 { 687 /* we use the last page of the vcpu bo for the UVD message */ 688 uint64_t offs = radeon_bo_size(rdev->uvd.vcpu_bo) - 689 RADEON_GPU_PAGE_SIZE; 690 691 uint32_t *msg = (uint32_t*)((uint8_t*)rdev->uvd.cpu_addr + offs); 692 uint64_t addr = rdev->uvd.gpu_addr + offs; 693 694 int r, i; 695 696 r = radeon_bo_reserve(rdev->uvd.vcpu_bo, true); 697 if (r) 698 return r; 699 700 /* stitch together an UVD create msg */ 701 msg[0] = cpu_to_le32(0x00000de4); 702 msg[1] = cpu_to_le32(0x00000000); 703 msg[2] = cpu_to_le32(handle); 704 msg[3] = cpu_to_le32(0x00000000); 705 msg[4] = cpu_to_le32(0x00000000); 706 msg[5] = cpu_to_le32(0x00000000); 707 msg[6] = cpu_to_le32(0x00000000); 708 msg[7] = cpu_to_le32(0x00000780); 709 msg[8] = cpu_to_le32(0x00000440); 710 msg[9] = cpu_to_le32(0x00000000); 711 msg[10] = cpu_to_le32(0x01b37000); 712 for (i = 11; i < 1024; ++i) 713 msg[i] = cpu_to_le32(0x0); 714 715 r = radeon_uvd_send_msg(rdev, ring, addr, fence); 716 radeon_bo_unreserve(rdev->uvd.vcpu_bo); 717 return r; 718 } 719 720 int radeon_uvd_get_destroy_msg(struct radeon_device *rdev, int ring, 721 uint32_t handle, struct radeon_fence **fence) 722 { 723 /* we use the last page of the vcpu bo for the UVD message */ 724 uint64_t offs = radeon_bo_size(rdev->uvd.vcpu_bo) - 725 RADEON_GPU_PAGE_SIZE; 726 727 uint32_t *msg = (uint32_t*)((uint8_t*)rdev->uvd.cpu_addr + offs); 728 uint64_t addr = rdev->uvd.gpu_addr + offs; 729 730 int r, i; 731 732 r = radeon_bo_reserve(rdev->uvd.vcpu_bo, true); 733 if (r) 734 return r; 735 736 /* stitch together an UVD destroy msg */ 737 msg[0] = cpu_to_le32(0x00000de4); 738 msg[1] = cpu_to_le32(0x00000002); 739 msg[2] = cpu_to_le32(handle); 740 msg[3] = cpu_to_le32(0x00000000); 741 for (i = 4; i < 1024; ++i) 742 msg[i] = cpu_to_le32(0x0); 743 744 r = radeon_uvd_send_msg(rdev, ring, addr, fence); 745 radeon_bo_unreserve(rdev->uvd.vcpu_bo); 746 return r; 747 } 748 749 /** 750 * radeon_uvd_count_handles - count number of open streams 751 * 752 * @rdev: radeon_device pointer 753 * @sd: number of SD streams 754 * @hd: number of HD streams 755 * 756 * Count the number of open SD/HD streams as a hint for power mangement 757 */ 758 static void radeon_uvd_count_handles(struct radeon_device *rdev, 759 unsigned *sd, unsigned *hd) 760 { 761 unsigned i; 762 763 *sd = 0; 764 *hd = 0; 765 766 for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) { 767 if (!atomic_read(&rdev->uvd.handles[i])) 768 continue; 769 770 if (rdev->uvd.img_size[i] >= 720*576) 771 ++(*hd); 772 else 773 ++(*sd); 774 } 775 } 776 777 static void radeon_uvd_idle_work_handler(struct work_struct *work) 778 { 779 struct radeon_device *rdev = 780 container_of(work, struct radeon_device, uvd.idle_work.work); 781 782 if (radeon_fence_count_emitted(rdev, R600_RING_TYPE_UVD_INDEX) == 0) { 783 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { 784 radeon_uvd_count_handles(rdev, &rdev->pm.dpm.sd, 785 &rdev->pm.dpm.hd); 786 radeon_dpm_enable_uvd(rdev, false); 787 } else { 788 radeon_set_uvd_clocks(rdev, 0, 0); 789 } 790 } else { 791 schedule_delayed_work(&rdev->uvd.idle_work, 792 msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS)); 793 } 794 } 795 796 void radeon_uvd_note_usage(struct radeon_device *rdev) 797 { 798 bool streams_changed = false; 799 bool set_clocks = !cancel_delayed_work_sync(&rdev->uvd.idle_work); 800 set_clocks &= schedule_delayed_work(&rdev->uvd.idle_work, 801 msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS)); 802 803 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { 804 unsigned hd = 0, sd = 0; 805 radeon_uvd_count_handles(rdev, &sd, &hd); 806 if ((rdev->pm.dpm.sd != sd) || 807 (rdev->pm.dpm.hd != hd)) { 808 rdev->pm.dpm.sd = sd; 809 rdev->pm.dpm.hd = hd; 810 /* disable this for now */ 811 /*streams_changed = true;*/ 812 } 813 } 814 815 if (set_clocks || streams_changed) { 816 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { 817 radeon_dpm_enable_uvd(rdev, true); 818 } else { 819 radeon_set_uvd_clocks(rdev, 53300, 40000); 820 } 821 } 822 } 823 824 static unsigned radeon_uvd_calc_upll_post_div(unsigned vco_freq, 825 unsigned target_freq, 826 unsigned pd_min, 827 unsigned pd_even) 828 { 829 unsigned post_div = vco_freq / target_freq; 830 831 /* adjust to post divider minimum value */ 832 if (post_div < pd_min) 833 post_div = pd_min; 834 835 /* we alway need a frequency less than or equal the target */ 836 if ((vco_freq / post_div) > target_freq) 837 post_div += 1; 838 839 /* post dividers above a certain value must be even */ 840 if (post_div > pd_even && post_div % 2) 841 post_div += 1; 842 843 return post_div; 844 } 845 846 /** 847 * radeon_uvd_calc_upll_dividers - calc UPLL clock dividers 848 * 849 * @rdev: radeon_device pointer 850 * @vclk: wanted VCLK 851 * @dclk: wanted DCLK 852 * @vco_min: minimum VCO frequency 853 * @vco_max: maximum VCO frequency 854 * @fb_factor: factor to multiply vco freq with 855 * @fb_mask: limit and bitmask for feedback divider 856 * @pd_min: post divider minimum 857 * @pd_max: post divider maximum 858 * @pd_even: post divider must be even above this value 859 * @optimal_fb_div: resulting feedback divider 860 * @optimal_vclk_div: resulting vclk post divider 861 * @optimal_dclk_div: resulting dclk post divider 862 * 863 * Calculate dividers for UVDs UPLL (R6xx-SI, except APUs). 864 * Returns zero on success -EINVAL on error. 865 */ 866 int radeon_uvd_calc_upll_dividers(struct radeon_device *rdev, 867 unsigned vclk, unsigned dclk, 868 unsigned vco_min, unsigned vco_max, 869 unsigned fb_factor, unsigned fb_mask, 870 unsigned pd_min, unsigned pd_max, 871 unsigned pd_even, 872 unsigned *optimal_fb_div, 873 unsigned *optimal_vclk_div, 874 unsigned *optimal_dclk_div) 875 { 876 unsigned vco_freq, ref_freq = rdev->clock.spll.reference_freq; 877 878 /* start off with something large */ 879 unsigned optimal_score = ~0; 880 881 /* loop through vco from low to high */ 882 vco_min = max(max(vco_min, vclk), dclk); 883 for (vco_freq = vco_min; vco_freq <= vco_max; vco_freq += 100) { 884 885 uint64_t fb_div = (uint64_t)vco_freq * fb_factor; 886 unsigned vclk_div, dclk_div, score; 887 888 do_div(fb_div, ref_freq); 889 890 /* fb div out of range ? */ 891 if (fb_div > fb_mask) 892 break; /* it can oly get worse */ 893 894 fb_div &= fb_mask; 895 896 /* calc vclk divider with current vco freq */ 897 vclk_div = radeon_uvd_calc_upll_post_div(vco_freq, vclk, 898 pd_min, pd_even); 899 if (vclk_div > pd_max) 900 break; /* vco is too big, it has to stop */ 901 902 /* calc dclk divider with current vco freq */ 903 dclk_div = radeon_uvd_calc_upll_post_div(vco_freq, dclk, 904 pd_min, pd_even); 905 if (dclk_div > pd_max) 906 break; /* vco is too big, it has to stop */ 907 908 /* calc score with current vco freq */ 909 score = vclk - (vco_freq / vclk_div) + dclk - (vco_freq / dclk_div); 910 911 /* determine if this vco setting is better than current optimal settings */ 912 if (score < optimal_score) { 913 *optimal_fb_div = fb_div; 914 *optimal_vclk_div = vclk_div; 915 *optimal_dclk_div = dclk_div; 916 optimal_score = score; 917 if (optimal_score == 0) 918 break; /* it can't get better than this */ 919 } 920 } 921 922 /* did we found a valid setup ? */ 923 if (optimal_score == ~0) 924 return -EINVAL; 925 926 return 0; 927 } 928 929 int radeon_uvd_send_upll_ctlreq(struct radeon_device *rdev, 930 unsigned cg_upll_func_cntl) 931 { 932 unsigned i; 933 934 /* make sure UPLL_CTLREQ is deasserted */ 935 WREG32_P(cg_upll_func_cntl, 0, ~UPLL_CTLREQ_MASK); 936 937 mdelay(10); 938 939 /* assert UPLL_CTLREQ */ 940 WREG32_P(cg_upll_func_cntl, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK); 941 942 /* wait for CTLACK and CTLACK2 to get asserted */ 943 for (i = 0; i < 100; ++i) { 944 uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK; 945 if ((RREG32(cg_upll_func_cntl) & mask) == mask) 946 break; 947 mdelay(10); 948 } 949 950 /* deassert UPLL_CTLREQ */ 951 WREG32_P(cg_upll_func_cntl, 0, ~UPLL_CTLREQ_MASK); 952 953 if (i == 100) { 954 DRM_ERROR("Timeout setting UVD clocks!\n"); 955 return -ETIMEDOUT; 956 } 957 958 return 0; 959 } 960