1 /* 2 * Copyright 2008 Advanced Micro Devices, Inc. 3 * Copyright 2008 Red Hat Inc. 4 * Copyright 2009 Jerome Glisse. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 * OTHER DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: Dave Airlie 25 * Alex Deucher 26 * Jerome Glisse 27 */ 28 #include <drm/drmP.h> 29 #include <drm/radeon_drm.h> 30 #include "radeon.h" 31 #ifdef TRACE_TODO 32 #include "radeon_trace.h" 33 #endif 34 35 /* 36 * GPUVM 37 * GPUVM is similar to the legacy gart on older asics, however 38 * rather than there being a single global gart table 39 * for the entire GPU, there are multiple VM page tables active 40 * at any given time. The VM page tables can contain a mix 41 * vram pages and system memory pages and system memory pages 42 * can be mapped as snooped (cached system pages) or unsnooped 43 * (uncached system pages). 44 * Each VM has an ID associated with it and there is a page table 45 * associated with each VMID. When execting a command buffer, 46 * the kernel tells the the ring what VMID to use for that command 47 * buffer. VMIDs are allocated dynamically as commands are submitted. 48 * The userspace drivers maintain their own address space and the kernel 49 * sets up their pages tables accordingly when they submit their 50 * command buffers and a VMID is assigned. 51 * Cayman/Trinity support up to 8 active VMs at any given time; 52 * SI supports 16. 53 */ 54 55 /** 56 * radeon_vm_num_pde - return the number of page directory entries 57 * 58 * @rdev: radeon_device pointer 59 * 60 * Calculate the number of page directory entries (cayman+). 61 */ 62 static unsigned radeon_vm_num_pdes(struct radeon_device *rdev) 63 { 64 return rdev->vm_manager.max_pfn >> radeon_vm_block_size; 65 } 66 67 /** 68 * radeon_vm_directory_size - returns the size of the page directory in bytes 69 * 70 * @rdev: radeon_device pointer 71 * 72 * Calculate the size of the page directory in bytes (cayman+). 73 */ 74 static unsigned radeon_vm_directory_size(struct radeon_device *rdev) 75 { 76 return RADEON_GPU_PAGE_ALIGN(radeon_vm_num_pdes(rdev) * 8); 77 } 78 79 /** 80 * radeon_vm_manager_init - init the vm manager 81 * 82 * @rdev: radeon_device pointer 83 * 84 * Init the vm manager (cayman+). 85 * Returns 0 for success, error for failure. 86 */ 87 int radeon_vm_manager_init(struct radeon_device *rdev) 88 { 89 int r; 90 91 if (!rdev->vm_manager.enabled) { 92 r = radeon_asic_vm_init(rdev); 93 if (r) 94 return r; 95 96 rdev->vm_manager.enabled = true; 97 } 98 return 0; 99 } 100 101 /** 102 * radeon_vm_manager_fini - tear down the vm manager 103 * 104 * @rdev: radeon_device pointer 105 * 106 * Tear down the VM manager (cayman+). 107 */ 108 void radeon_vm_manager_fini(struct radeon_device *rdev) 109 { 110 int i; 111 112 if (!rdev->vm_manager.enabled) 113 return; 114 115 for (i = 0; i < RADEON_NUM_VM; ++i) 116 radeon_fence_unref(&rdev->vm_manager.active[i]); 117 radeon_asic_vm_fini(rdev); 118 rdev->vm_manager.enabled = false; 119 } 120 121 /** 122 * radeon_vm_get_bos - add the vm BOs to a validation list 123 * 124 * @vm: vm providing the BOs 125 * @head: head of validation list 126 * 127 * Add the page directory to the list of BOs to 128 * validate for command submission (cayman+). 129 */ 130 struct radeon_cs_reloc *radeon_vm_get_bos(struct radeon_device *rdev, 131 struct radeon_vm *vm, 132 struct list_head *head) 133 { 134 struct radeon_cs_reloc *list; 135 unsigned i, idx; 136 137 list = drm_malloc_ab(vm->max_pde_used + 2, 138 sizeof(struct radeon_cs_reloc)); 139 if (!list) 140 return NULL; 141 142 /* add the vm page table to the list */ 143 list[0].gobj = NULL; 144 list[0].robj = vm->page_directory; 145 list[0].prefered_domains = RADEON_GEM_DOMAIN_VRAM; 146 list[0].allowed_domains = RADEON_GEM_DOMAIN_VRAM; 147 list[0].tv.bo = &vm->page_directory->tbo; 148 list[0].tiling_flags = 0; 149 list[0].handle = 0; 150 list_add(&list[0].tv.head, head); 151 152 for (i = 0, idx = 1; i <= vm->max_pde_used; i++) { 153 if (!vm->page_tables[i].bo) 154 continue; 155 156 list[idx].gobj = NULL; 157 list[idx].robj = vm->page_tables[i].bo; 158 list[idx].prefered_domains = RADEON_GEM_DOMAIN_VRAM; 159 list[idx].allowed_domains = RADEON_GEM_DOMAIN_VRAM; 160 list[idx].tv.bo = &list[idx].robj->tbo; 161 list[idx].tiling_flags = 0; 162 list[idx].handle = 0; 163 list_add(&list[idx++].tv.head, head); 164 } 165 166 return list; 167 } 168 169 /** 170 * radeon_vm_grab_id - allocate the next free VMID 171 * 172 * @rdev: radeon_device pointer 173 * @vm: vm to allocate id for 174 * @ring: ring we want to submit job to 175 * 176 * Allocate an id for the vm (cayman+). 177 * Returns the fence we need to sync to (if any). 178 * 179 * Global and local mutex must be locked! 180 */ 181 struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev, 182 struct radeon_vm *vm, int ring) 183 { 184 struct radeon_fence *best[RADEON_NUM_RINGS] = {}; 185 unsigned choices[2] = {}; 186 unsigned i; 187 188 /* check if the id is still valid */ 189 if (vm->last_id_use && vm->last_id_use == rdev->vm_manager.active[vm->id]) 190 return NULL; 191 192 /* we definately need to flush */ 193 radeon_fence_unref(&vm->last_flush); 194 195 /* skip over VMID 0, since it is the system VM */ 196 for (i = 1; i < rdev->vm_manager.nvm; ++i) { 197 struct radeon_fence *fence = rdev->vm_manager.active[i]; 198 199 if (fence == NULL) { 200 /* found a free one */ 201 vm->id = i; 202 #ifdef TRACE_TODO 203 trace_radeon_vm_grab_id(vm->id, ring); 204 #endif 205 return NULL; 206 } 207 208 if (radeon_fence_is_earlier(fence, best[fence->ring])) { 209 best[fence->ring] = fence; 210 choices[fence->ring == ring ? 0 : 1] = i; 211 } 212 } 213 214 for (i = 0; i < 2; ++i) { 215 if (choices[i]) { 216 vm->id = choices[i]; 217 #ifdef TRACE_TODO 218 trace_radeon_vm_grab_id(vm->id, ring); 219 #endif 220 return rdev->vm_manager.active[choices[i]]; 221 } 222 } 223 224 /* should never happen */ 225 BUG(); 226 return NULL; 227 } 228 229 /** 230 * radeon_vm_flush - hardware flush the vm 231 * 232 * @rdev: radeon_device pointer 233 * @vm: vm we want to flush 234 * @ring: ring to use for flush 235 * 236 * Flush the vm (cayman+). 237 * 238 * Global and local mutex must be locked! 239 */ 240 void radeon_vm_flush(struct radeon_device *rdev, 241 struct radeon_vm *vm, 242 int ring) 243 { 244 uint64_t pd_addr = radeon_bo_gpu_offset(vm->page_directory); 245 246 /* if we can't remember our last VM flush then flush now! */ 247 /* XXX figure out why we have to flush all the time */ 248 if (!vm->last_flush || true || pd_addr != vm->pd_gpu_addr) { 249 #ifdef TRACE_TODO 250 trace_radeon_vm_flush(pd_addr, ring, vm->id); 251 #endif 252 vm->pd_gpu_addr = pd_addr; 253 radeon_ring_vm_flush(rdev, ring, vm); 254 } 255 } 256 257 /** 258 * radeon_vm_fence - remember fence for vm 259 * 260 * @rdev: radeon_device pointer 261 * @vm: vm we want to fence 262 * @fence: fence to remember 263 * 264 * Fence the vm (cayman+). 265 * Set the fence used to protect page table and id. 266 * 267 * Global and local mutex must be locked! 268 */ 269 void radeon_vm_fence(struct radeon_device *rdev, 270 struct radeon_vm *vm, 271 struct radeon_fence *fence) 272 { 273 radeon_fence_unref(&vm->fence); 274 vm->fence = radeon_fence_ref(fence); 275 276 radeon_fence_unref(&rdev->vm_manager.active[vm->id]); 277 rdev->vm_manager.active[vm->id] = radeon_fence_ref(fence); 278 279 radeon_fence_unref(&vm->last_id_use); 280 vm->last_id_use = radeon_fence_ref(fence); 281 282 /* we just flushed the VM, remember that */ 283 if (!vm->last_flush) 284 vm->last_flush = radeon_fence_ref(fence); 285 } 286 287 /** 288 * radeon_vm_bo_find - find the bo_va for a specific vm & bo 289 * 290 * @vm: requested vm 291 * @bo: requested buffer object 292 * 293 * Find @bo inside the requested vm (cayman+). 294 * Search inside the @bos vm list for the requested vm 295 * Returns the found bo_va or NULL if none is found 296 * 297 * Object has to be reserved! 298 */ 299 struct radeon_bo_va *radeon_vm_bo_find(struct radeon_vm *vm, 300 struct radeon_bo *bo) 301 { 302 struct radeon_bo_va *bo_va; 303 304 list_for_each_entry(bo_va, &bo->va, bo_list) { 305 if (bo_va->vm == vm) { 306 return bo_va; 307 } 308 } 309 return NULL; 310 } 311 312 /** 313 * radeon_vm_bo_add - add a bo to a specific vm 314 * 315 * @rdev: radeon_device pointer 316 * @vm: requested vm 317 * @bo: radeon buffer object 318 * 319 * Add @bo into the requested vm (cayman+). 320 * Add @bo to the list of bos associated with the vm 321 * Returns newly added bo_va or NULL for failure 322 * 323 * Object has to be reserved! 324 */ 325 struct radeon_bo_va *radeon_vm_bo_add(struct radeon_device *rdev, 326 struct radeon_vm *vm, 327 struct radeon_bo *bo) 328 { 329 struct radeon_bo_va *bo_va; 330 331 bo_va = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL); 332 if (bo_va == NULL) { 333 return NULL; 334 } 335 bo_va->vm = vm; 336 bo_va->bo = bo; 337 bo_va->soffset = 0; 338 bo_va->eoffset = 0; 339 bo_va->flags = 0; 340 bo_va->addr = 0; 341 bo_va->ref_count = 1; 342 INIT_LIST_HEAD(&bo_va->bo_list); 343 INIT_LIST_HEAD(&bo_va->vm_list); 344 INIT_LIST_HEAD(&bo_va->vm_status); 345 346 lockmgr(&vm->mutex, LK_EXCLUSIVE); 347 list_add(&bo_va->vm_list, &vm->va); 348 list_add_tail(&bo_va->bo_list, &bo->va); 349 lockmgr(&vm->mutex, LK_RELEASE); 350 351 return bo_va; 352 } 353 354 /** 355 * radeon_vm_set_pages - helper to call the right asic function 356 * 357 * @rdev: radeon_device pointer 358 * @ib: indirect buffer to fill with commands 359 * @pe: addr of the page entry 360 * @addr: dst addr to write into pe 361 * @count: number of page entries to update 362 * @incr: increase next addr by incr bytes 363 * @flags: hw access flags 364 * 365 * Traces the parameters and calls the right asic functions 366 * to setup the page table using the DMA. 367 */ 368 static void radeon_vm_set_pages(struct radeon_device *rdev, 369 struct radeon_ib *ib, 370 uint64_t pe, 371 uint64_t addr, unsigned count, 372 uint32_t incr, uint32_t flags) 373 { 374 #ifdef TRACE_TODO 375 trace_radeon_vm_set_page(pe, addr, count, incr, flags); 376 #endif 377 378 if ((flags & R600_PTE_GART_MASK) == R600_PTE_GART_MASK) { 379 uint64_t src = rdev->gart.table_addr + (addr >> 12) * 8; 380 radeon_asic_vm_copy_pages(rdev, ib, pe, src, count); 381 382 } else if ((flags & R600_PTE_SYSTEM) || (count < 3)) { 383 radeon_asic_vm_write_pages(rdev, ib, pe, addr, 384 count, incr, flags); 385 386 } else { 387 radeon_asic_vm_set_pages(rdev, ib, pe, addr, 388 count, incr, flags); 389 } 390 } 391 392 /** 393 * radeon_vm_clear_bo - initially clear the page dir/table 394 * 395 * @rdev: radeon_device pointer 396 * @bo: bo to clear 397 */ 398 static int radeon_vm_clear_bo(struct radeon_device *rdev, 399 struct radeon_bo *bo) 400 { 401 struct ttm_validate_buffer tv; 402 struct ww_acquire_ctx ticket; 403 struct list_head head; 404 struct radeon_ib ib; 405 unsigned entries; 406 uint64_t addr; 407 int r; 408 409 memset(&tv, 0, sizeof(tv)); 410 tv.bo = &bo->tbo; 411 412 INIT_LIST_HEAD(&head); 413 list_add(&tv.head, &head); 414 415 r = ttm_eu_reserve_buffers(&ticket, &head); 416 if (r) 417 return r; 418 419 r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); 420 if (r) 421 goto error; 422 423 addr = radeon_bo_gpu_offset(bo); 424 entries = radeon_bo_size(bo) / 8; 425 426 r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, 256); 427 if (r) 428 goto error; 429 430 ib.length_dw = 0; 431 432 radeon_vm_set_pages(rdev, &ib, addr, 0, entries, 0, 0); 433 radeon_asic_vm_pad_ib(rdev, &ib); 434 WARN_ON(ib.length_dw > 64); 435 436 r = radeon_ib_schedule(rdev, &ib, NULL, false); 437 if (r) 438 goto error; 439 440 ttm_eu_fence_buffer_objects(&ticket, &head, ib.fence); 441 radeon_ib_free(rdev, &ib); 442 443 return 0; 444 445 error: 446 ttm_eu_backoff_reservation(&ticket, &head); 447 return r; 448 } 449 450 /** 451 * radeon_vm_bo_set_addr - set bos virtual address inside a vm 452 * 453 * @rdev: radeon_device pointer 454 * @bo_va: bo_va to store the address 455 * @soffset: requested offset of the buffer in the VM address space 456 * @flags: attributes of pages (read/write/valid/etc.) 457 * 458 * Set offset of @bo_va (cayman+). 459 * Validate and set the offset requested within the vm address space. 460 * Returns 0 for success, error for failure. 461 * 462 * Object has to be reserved! 463 */ 464 int radeon_vm_bo_set_addr(struct radeon_device *rdev, 465 struct radeon_bo_va *bo_va, 466 uint64_t soffset, 467 uint32_t flags) 468 { 469 uint64_t size = radeon_bo_size(bo_va->bo); 470 uint64_t eoffset, last_offset = 0; 471 struct radeon_vm *vm = bo_va->vm; 472 struct radeon_bo_va *tmp; 473 struct list_head *head; 474 unsigned last_pfn, pt_idx; 475 int r; 476 477 if (soffset) { 478 /* make sure object fit at this offset */ 479 eoffset = soffset + size; 480 if (soffset >= eoffset) { 481 return -EINVAL; 482 } 483 484 last_pfn = eoffset / RADEON_GPU_PAGE_SIZE; 485 if (last_pfn > rdev->vm_manager.max_pfn) { 486 dev_err(rdev->dev, "va above limit (0x%08X > 0x%08X)\n", 487 last_pfn, rdev->vm_manager.max_pfn); 488 return -EINVAL; 489 } 490 491 } else { 492 eoffset = last_pfn = 0; 493 } 494 495 lockmgr(&vm->mutex, LK_EXCLUSIVE); 496 head = &vm->va; 497 last_offset = 0; 498 list_for_each_entry(tmp, &vm->va, vm_list) { 499 if (bo_va == tmp) { 500 /* skip over currently modified bo */ 501 continue; 502 } 503 504 if (soffset >= last_offset && eoffset <= tmp->soffset) { 505 /* bo can be added before this one */ 506 break; 507 } 508 if (eoffset > tmp->soffset && soffset < tmp->eoffset) { 509 /* bo and tmp overlap, invalid offset */ 510 dev_err(rdev->dev, "bo %p va 0x%08X conflict with (bo %p 0x%08X 0x%08X)\n", 511 bo_va->bo, (unsigned)bo_va->soffset, tmp->bo, 512 (unsigned)tmp->soffset, (unsigned)tmp->eoffset); 513 lockmgr(&vm->mutex, LK_RELEASE); 514 return -EINVAL; 515 } 516 last_offset = tmp->eoffset; 517 head = &tmp->vm_list; 518 } 519 520 if (bo_va->soffset) { 521 /* add a clone of the bo_va to clear the old address */ 522 tmp = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL); 523 if (!tmp) { 524 lockmgr(&vm->mutex, LK_RELEASE); 525 return -ENOMEM; 526 } 527 tmp->soffset = bo_va->soffset; 528 tmp->eoffset = bo_va->eoffset; 529 tmp->vm = vm; 530 list_add(&tmp->vm_status, &vm->freed); 531 } 532 533 bo_va->soffset = soffset; 534 bo_va->eoffset = eoffset; 535 bo_va->flags = flags; 536 bo_va->addr = 0; 537 list_move(&bo_va->vm_list, head); 538 539 soffset = (soffset / RADEON_GPU_PAGE_SIZE) >> radeon_vm_block_size; 540 eoffset = (eoffset / RADEON_GPU_PAGE_SIZE) >> radeon_vm_block_size; 541 542 BUG_ON(eoffset >= radeon_vm_num_pdes(rdev)); 543 544 if (eoffset > vm->max_pde_used) 545 vm->max_pde_used = eoffset; 546 547 radeon_bo_unreserve(bo_va->bo); 548 549 /* walk over the address space and allocate the page tables */ 550 for (pt_idx = soffset; pt_idx <= eoffset; ++pt_idx) { 551 struct radeon_bo *pt; 552 553 if (vm->page_tables[pt_idx].bo) 554 continue; 555 556 /* drop mutex to allocate and clear page table */ 557 lockmgr(&vm->mutex, LK_RELEASE); 558 559 r = radeon_bo_create(rdev, RADEON_VM_PTE_COUNT * 8, 560 RADEON_GPU_PAGE_SIZE, true, 561 RADEON_GEM_DOMAIN_VRAM, 0, NULL, &pt); 562 if (r) 563 return r; 564 565 r = radeon_vm_clear_bo(rdev, pt); 566 if (r) { 567 radeon_bo_unref(&pt); 568 radeon_bo_reserve(bo_va->bo, false); 569 return r; 570 } 571 572 /* aquire mutex again */ 573 lockmgr(&vm->mutex, LK_EXCLUSIVE); 574 if (vm->page_tables[pt_idx].bo) { 575 /* someone else allocated the pt in the meantime */ 576 lockmgr(&vm->mutex, LK_RELEASE); 577 radeon_bo_unref(&pt); 578 lockmgr(&vm->mutex, LK_EXCLUSIVE); 579 continue; 580 } 581 582 vm->page_tables[pt_idx].addr = 0; 583 vm->page_tables[pt_idx].bo = pt; 584 } 585 586 lockmgr(&vm->mutex, LK_RELEASE); 587 return radeon_bo_reserve(bo_va->bo, false); 588 } 589 590 /** 591 * radeon_vm_map_gart - get the physical address of a gart page 592 * 593 * @rdev: radeon_device pointer 594 * @addr: the unmapped addr 595 * 596 * Look up the physical address of the page that the pte resolves 597 * to (cayman+). 598 * Returns the physical address of the page. 599 */ 600 uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr) 601 { 602 uint64_t result; 603 604 /* page table offset */ 605 result = rdev->gart.pages_addr[addr >> PAGE_SHIFT]; 606 607 /* in case cpu page size != gpu page size*/ 608 result |= addr & (PAGE_MASK); /* XXX */ 609 610 return result; 611 } 612 613 /** 614 * radeon_vm_page_flags - translate page flags to what the hw uses 615 * 616 * @flags: flags comming from userspace 617 * 618 * Translate the flags the userspace ABI uses to hw flags. 619 */ 620 static uint32_t radeon_vm_page_flags(uint32_t flags) 621 { 622 uint32_t hw_flags = 0; 623 hw_flags |= (flags & RADEON_VM_PAGE_VALID) ? R600_PTE_VALID : 0; 624 hw_flags |= (flags & RADEON_VM_PAGE_READABLE) ? R600_PTE_READABLE : 0; 625 hw_flags |= (flags & RADEON_VM_PAGE_WRITEABLE) ? R600_PTE_WRITEABLE : 0; 626 if (flags & RADEON_VM_PAGE_SYSTEM) { 627 hw_flags |= R600_PTE_SYSTEM; 628 hw_flags |= (flags & RADEON_VM_PAGE_SNOOPED) ? R600_PTE_SNOOPED : 0; 629 } 630 return hw_flags; 631 } 632 633 /** 634 * radeon_vm_update_pdes - make sure that page directory is valid 635 * 636 * @rdev: radeon_device pointer 637 * @vm: requested vm 638 * @start: start of GPU address range 639 * @end: end of GPU address range 640 * 641 * Allocates new page tables if necessary 642 * and updates the page directory (cayman+). 643 * Returns 0 for success, error for failure. 644 * 645 * Global and local mutex must be locked! 646 */ 647 int radeon_vm_update_page_directory(struct radeon_device *rdev, 648 struct radeon_vm *vm) 649 { 650 struct radeon_bo *pd = vm->page_directory; 651 uint64_t pd_addr = radeon_bo_gpu_offset(pd); 652 uint32_t incr = RADEON_VM_PTE_COUNT * 8; 653 uint64_t last_pde = ~0, last_pt = ~0; 654 unsigned count = 0, pt_idx, ndw; 655 struct radeon_ib ib; 656 int r; 657 658 /* padding, etc. */ 659 ndw = 64; 660 661 /* assume the worst case */ 662 ndw += vm->max_pde_used * 6; 663 664 /* update too big for an IB */ 665 if (ndw > 0xfffff) 666 return -ENOMEM; 667 668 r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, ndw * 4); 669 if (r) 670 return r; 671 ib.length_dw = 0; 672 673 /* walk over the address space and update the page directory */ 674 for (pt_idx = 0; pt_idx <= vm->max_pde_used; ++pt_idx) { 675 struct radeon_bo *bo = vm->page_tables[pt_idx].bo; 676 uint64_t pde, pt; 677 678 if (bo == NULL) 679 continue; 680 681 pt = radeon_bo_gpu_offset(bo); 682 if (vm->page_tables[pt_idx].addr == pt) 683 continue; 684 vm->page_tables[pt_idx].addr = pt; 685 686 pde = pd_addr + pt_idx * 8; 687 if (((last_pde + 8 * count) != pde) || 688 ((last_pt + incr * count) != pt)) { 689 690 if (count) { 691 radeon_vm_set_pages(rdev, &ib, last_pde, 692 last_pt, count, incr, 693 R600_PTE_VALID); 694 } 695 696 count = 1; 697 last_pde = pde; 698 last_pt = pt; 699 } else { 700 ++count; 701 } 702 } 703 704 if (count) 705 radeon_vm_set_pages(rdev, &ib, last_pde, last_pt, count, 706 incr, R600_PTE_VALID); 707 708 if (ib.length_dw != 0) { 709 radeon_asic_vm_pad_ib(rdev, &ib); 710 radeon_semaphore_sync_to(ib.semaphore, pd->tbo.sync_obj); 711 radeon_semaphore_sync_to(ib.semaphore, vm->last_id_use); 712 WARN_ON(ib.length_dw > ndw); 713 r = radeon_ib_schedule(rdev, &ib, NULL, false); 714 if (r) { 715 radeon_ib_free(rdev, &ib); 716 return r; 717 } 718 radeon_fence_unref(&vm->fence); 719 vm->fence = radeon_fence_ref(ib.fence); 720 radeon_fence_unref(&vm->last_flush); 721 } 722 radeon_ib_free(rdev, &ib); 723 724 return 0; 725 } 726 727 /** 728 * radeon_vm_frag_ptes - add fragment information to PTEs 729 * 730 * @rdev: radeon_device pointer 731 * @ib: IB for the update 732 * @pe_start: first PTE to handle 733 * @pe_end: last PTE to handle 734 * @addr: addr those PTEs should point to 735 * @flags: hw mapping flags 736 * 737 * Global and local mutex must be locked! 738 */ 739 static void radeon_vm_frag_ptes(struct radeon_device *rdev, 740 struct radeon_ib *ib, 741 uint64_t pe_start, uint64_t pe_end, 742 uint64_t addr, uint32_t flags) 743 { 744 /** 745 * The MC L1 TLB supports variable sized pages, based on a fragment 746 * field in the PTE. When this field is set to a non-zero value, page 747 * granularity is increased from 4KB to (1 << (12 + frag)). The PTE 748 * flags are considered valid for all PTEs within the fragment range 749 * and corresponding mappings are assumed to be physically contiguous. 750 * 751 * The L1 TLB can store a single PTE for the whole fragment, 752 * significantly increasing the space available for translation 753 * caching. This leads to large improvements in throughput when the 754 * TLB is under pressure. 755 * 756 * The L2 TLB distributes small and large fragments into two 757 * asymmetric partitions. The large fragment cache is significantly 758 * larger. Thus, we try to use large fragments wherever possible. 759 * Userspace can support this by aligning virtual base address and 760 * allocation size to the fragment size. 761 */ 762 763 /* NI is optimized for 256KB fragments, SI and newer for 64KB */ 764 uint64_t frag_flags = rdev->family == CHIP_CAYMAN ? 765 R600_PTE_FRAG_256KB : R600_PTE_FRAG_64KB; 766 uint64_t frag_align = rdev->family == CHIP_CAYMAN ? 0x200 : 0x80; 767 768 uint64_t frag_start = ALIGN(pe_start, frag_align); 769 uint64_t frag_end = pe_end & ~(frag_align - 1); 770 771 unsigned count; 772 773 /* system pages are non continuously */ 774 if ((flags & R600_PTE_SYSTEM) || !(flags & R600_PTE_VALID) || 775 (frag_start >= frag_end)) { 776 777 count = (pe_end - pe_start) / 8; 778 radeon_vm_set_pages(rdev, ib, pe_start, addr, count, 779 RADEON_GPU_PAGE_SIZE, flags); 780 return; 781 } 782 783 /* handle the 4K area at the beginning */ 784 if (pe_start != frag_start) { 785 count = (frag_start - pe_start) / 8; 786 radeon_vm_set_pages(rdev, ib, pe_start, addr, count, 787 RADEON_GPU_PAGE_SIZE, flags); 788 addr += RADEON_GPU_PAGE_SIZE * count; 789 } 790 791 /* handle the area in the middle */ 792 count = (frag_end - frag_start) / 8; 793 radeon_vm_set_pages(rdev, ib, frag_start, addr, count, 794 RADEON_GPU_PAGE_SIZE, flags | frag_flags); 795 796 /* handle the 4K area at the end */ 797 if (frag_end != pe_end) { 798 addr += RADEON_GPU_PAGE_SIZE * count; 799 count = (pe_end - frag_end) / 8; 800 radeon_vm_set_pages(rdev, ib, frag_end, addr, count, 801 RADEON_GPU_PAGE_SIZE, flags); 802 } 803 } 804 805 /** 806 * radeon_vm_update_ptes - make sure that page tables are valid 807 * 808 * @rdev: radeon_device pointer 809 * @vm: requested vm 810 * @start: start of GPU address range 811 * @end: end of GPU address range 812 * @dst: destination address to map to 813 * @flags: mapping flags 814 * 815 * Update the page tables in the range @start - @end (cayman+). 816 * 817 * Global and local mutex must be locked! 818 */ 819 static void radeon_vm_update_ptes(struct radeon_device *rdev, 820 struct radeon_vm *vm, 821 struct radeon_ib *ib, 822 uint64_t start, uint64_t end, 823 uint64_t dst, uint32_t flags) 824 { 825 uint64_t mask = RADEON_VM_PTE_COUNT - 1; 826 uint64_t last_pte = ~0, last_dst = ~0; 827 unsigned count = 0; 828 uint64_t addr; 829 830 start = start / RADEON_GPU_PAGE_SIZE; 831 end = end / RADEON_GPU_PAGE_SIZE; 832 833 /* walk over the address space and update the page tables */ 834 for (addr = start; addr < end; ) { 835 uint64_t pt_idx = addr >> radeon_vm_block_size; 836 struct radeon_bo *pt = vm->page_tables[pt_idx].bo; 837 unsigned nptes; 838 uint64_t pte; 839 840 radeon_semaphore_sync_to(ib->semaphore, pt->tbo.sync_obj); 841 842 if ((addr & ~mask) == (end & ~mask)) 843 nptes = end - addr; 844 else 845 nptes = RADEON_VM_PTE_COUNT - (addr & mask); 846 847 pte = radeon_bo_gpu_offset(pt); 848 pte += (addr & mask) * 8; 849 850 if ((last_pte + 8 * count) != pte) { 851 852 if (count) { 853 radeon_vm_frag_ptes(rdev, ib, last_pte, 854 last_pte + 8 * count, 855 last_dst, flags); 856 } 857 858 count = nptes; 859 last_pte = pte; 860 last_dst = dst; 861 } else { 862 count += nptes; 863 } 864 865 addr += nptes; 866 dst += nptes * RADEON_GPU_PAGE_SIZE; 867 } 868 869 if (count) { 870 radeon_vm_frag_ptes(rdev, ib, last_pte, 871 last_pte + 8 * count, 872 last_dst, flags); 873 } 874 } 875 876 /** 877 * radeon_vm_bo_update - map a bo into the vm page table 878 * 879 * @rdev: radeon_device pointer 880 * @vm: requested vm 881 * @bo: radeon buffer object 882 * @mem: ttm mem 883 * 884 * Fill in the page table entries for @bo (cayman+). 885 * Returns 0 for success, -EINVAL for failure. 886 * 887 * Object have to be reserved and mutex must be locked! 888 */ 889 int radeon_vm_bo_update(struct radeon_device *rdev, 890 struct radeon_bo_va *bo_va, 891 struct ttm_mem_reg *mem) 892 { 893 struct radeon_vm *vm = bo_va->vm; 894 struct radeon_ib ib; 895 unsigned nptes, ncmds, ndw; 896 uint64_t addr; 897 uint32_t flags; 898 int r; 899 900 if (!bo_va->soffset) { 901 dev_err(rdev->dev, "bo %p don't has a mapping in vm %p\n", 902 bo_va->bo, vm); 903 return -EINVAL; 904 } 905 906 list_del_init(&bo_va->vm_status); 907 908 bo_va->flags &= ~RADEON_VM_PAGE_VALID; 909 bo_va->flags &= ~RADEON_VM_PAGE_SYSTEM; 910 bo_va->flags &= ~RADEON_VM_PAGE_SNOOPED; 911 if (mem) { 912 addr = mem->start << PAGE_SHIFT; 913 if (mem->mem_type != TTM_PL_SYSTEM) { 914 bo_va->flags |= RADEON_VM_PAGE_VALID; 915 } 916 if (mem->mem_type == TTM_PL_TT) { 917 bo_va->flags |= RADEON_VM_PAGE_SYSTEM; 918 if (!(bo_va->bo->flags & (RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC))) 919 bo_va->flags |= RADEON_VM_PAGE_SNOOPED; 920 921 } else { 922 addr += rdev->vm_manager.vram_base_offset; 923 } 924 } else { 925 addr = 0; 926 } 927 928 if (addr == bo_va->addr) 929 return 0; 930 bo_va->addr = addr; 931 932 #ifdef TRACE_TODO 933 trace_radeon_vm_bo_update(bo_va); 934 #endif 935 936 nptes = (bo_va->eoffset - bo_va->soffset) / RADEON_GPU_PAGE_SIZE; 937 938 /* reserve space for one command every (1 << BLOCK_SIZE) entries 939 or 2k dwords (whatever is smaller) */ 940 ncmds = (nptes >> min(radeon_vm_block_size, 11)) + 1; 941 942 /* padding, etc. */ 943 ndw = 64; 944 945 flags = radeon_vm_page_flags(bo_va->flags); 946 if ((flags & R600_PTE_GART_MASK) == R600_PTE_GART_MASK) { 947 /* only copy commands needed */ 948 ndw += ncmds * 7; 949 950 } else if (flags & R600_PTE_SYSTEM) { 951 /* header for write data commands */ 952 ndw += ncmds * 4; 953 954 /* body of write data command */ 955 ndw += nptes * 2; 956 957 } else { 958 /* set page commands needed */ 959 ndw += ncmds * 10; 960 961 /* two extra commands for begin/end of fragment */ 962 ndw += 2 * 10; 963 } 964 965 /* update too big for an IB */ 966 if (ndw > 0xfffff) 967 return -ENOMEM; 968 969 r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, ndw * 4); 970 if (r) 971 return r; 972 ib.length_dw = 0; 973 974 radeon_vm_update_ptes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset, 975 addr, radeon_vm_page_flags(bo_va->flags)); 976 977 radeon_asic_vm_pad_ib(rdev, &ib); 978 WARN_ON(ib.length_dw > ndw); 979 980 radeon_semaphore_sync_to(ib.semaphore, vm->fence); 981 r = radeon_ib_schedule(rdev, &ib, NULL, false); 982 if (r) { 983 radeon_ib_free(rdev, &ib); 984 return r; 985 } 986 radeon_fence_unref(&vm->fence); 987 vm->fence = radeon_fence_ref(ib.fence); 988 radeon_ib_free(rdev, &ib); 989 radeon_fence_unref(&vm->last_flush); 990 991 return 0; 992 } 993 994 /** 995 * radeon_vm_clear_freed - clear freed BOs in the PT 996 * 997 * @rdev: radeon_device pointer 998 * @vm: requested vm 999 * 1000 * Make sure all freed BOs are cleared in the PT. 1001 * Returns 0 for success. 1002 * 1003 * PTs have to be reserved and mutex must be locked! 1004 */ 1005 int radeon_vm_clear_freed(struct radeon_device *rdev, 1006 struct radeon_vm *vm) 1007 { 1008 struct radeon_bo_va *bo_va, *tmp; 1009 int r; 1010 1011 list_for_each_entry_safe(bo_va, tmp, &vm->freed, vm_status) { 1012 r = radeon_vm_bo_update(rdev, bo_va, NULL); 1013 kfree(bo_va); 1014 if (r) 1015 return r; 1016 } 1017 return 0; 1018 1019 } 1020 1021 /** 1022 * radeon_vm_clear_invalids - clear invalidated BOs in the PT 1023 * 1024 * @rdev: radeon_device pointer 1025 * @vm: requested vm 1026 * 1027 * Make sure all invalidated BOs are cleared in the PT. 1028 * Returns 0 for success. 1029 * 1030 * PTs have to be reserved and mutex must be locked! 1031 */ 1032 int radeon_vm_clear_invalids(struct radeon_device *rdev, 1033 struct radeon_vm *vm) 1034 { 1035 struct radeon_bo_va *bo_va, *tmp; 1036 int r; 1037 1038 list_for_each_entry_safe(bo_va, tmp, &vm->invalidated, vm_status) { 1039 r = radeon_vm_bo_update(rdev, bo_va, NULL); 1040 if (r) 1041 return r; 1042 } 1043 return 0; 1044 } 1045 1046 /** 1047 * radeon_vm_bo_rmv - remove a bo to a specific vm 1048 * 1049 * @rdev: radeon_device pointer 1050 * @bo_va: requested bo_va 1051 * 1052 * Remove @bo_va->bo from the requested vm (cayman+). 1053 * 1054 * Object have to be reserved! 1055 */ 1056 void radeon_vm_bo_rmv(struct radeon_device *rdev, 1057 struct radeon_bo_va *bo_va) 1058 { 1059 struct radeon_vm *vm = bo_va->vm; 1060 1061 list_del(&bo_va->bo_list); 1062 1063 lockmgr(&vm->mutex, LK_EXCLUSIVE); 1064 list_del(&bo_va->vm_list); 1065 list_del(&bo_va->vm_status); 1066 1067 if (bo_va->addr) { 1068 bo_va->bo = NULL; 1069 list_add(&bo_va->vm_status, &vm->freed); 1070 } else { 1071 kfree(bo_va); 1072 } 1073 1074 lockmgr(&vm->mutex, LK_RELEASE); 1075 } 1076 1077 /** 1078 * radeon_vm_bo_invalidate - mark the bo as invalid 1079 * 1080 * @rdev: radeon_device pointer 1081 * @vm: requested vm 1082 * @bo: radeon buffer object 1083 * 1084 * Mark @bo as invalid (cayman+). 1085 */ 1086 void radeon_vm_bo_invalidate(struct radeon_device *rdev, 1087 struct radeon_bo *bo) 1088 { 1089 struct radeon_bo_va *bo_va; 1090 1091 list_for_each_entry(bo_va, &bo->va, bo_list) { 1092 if (bo_va->addr) { 1093 mutex_lock(&bo_va->vm->mutex); 1094 list_del(&bo_va->vm_status); 1095 list_add(&bo_va->vm_status, &bo_va->vm->invalidated); 1096 mutex_unlock(&bo_va->vm->mutex); 1097 } 1098 } 1099 } 1100 1101 /** 1102 * radeon_vm_init - initialize a vm instance 1103 * 1104 * @rdev: radeon_device pointer 1105 * @vm: requested vm 1106 * 1107 * Init @vm fields (cayman+). 1108 */ 1109 int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) 1110 { 1111 const unsigned align = min(RADEON_VM_PTB_ALIGN_SIZE, 1112 RADEON_VM_PTE_COUNT * 8); 1113 unsigned pd_size, pd_entries, pts_size; 1114 int r; 1115 1116 vm->id = 0; 1117 vm->ib_bo_va = NULL; 1118 vm->fence = NULL; 1119 vm->last_flush = NULL; 1120 vm->last_id_use = NULL; 1121 lockinit(&vm->mutex, "rvmmtx", 0, LK_CANRECURSE); 1122 INIT_LIST_HEAD(&vm->va); 1123 INIT_LIST_HEAD(&vm->invalidated); 1124 INIT_LIST_HEAD(&vm->freed); 1125 1126 pd_size = radeon_vm_directory_size(rdev); 1127 pd_entries = radeon_vm_num_pdes(rdev); 1128 1129 /* allocate page table array */ 1130 pts_size = pd_entries * sizeof(struct radeon_vm_pt); 1131 vm->page_tables = kzalloc(pts_size, GFP_KERNEL); 1132 if (vm->page_tables == NULL) { 1133 DRM_ERROR("Cannot allocate memory for page table array\n"); 1134 return -ENOMEM; 1135 } 1136 1137 r = radeon_bo_create(rdev, pd_size, align, true, 1138 RADEON_GEM_DOMAIN_VRAM, 0, NULL, 1139 &vm->page_directory); 1140 if (r) 1141 return r; 1142 1143 r = radeon_vm_clear_bo(rdev, vm->page_directory); 1144 if (r) { 1145 radeon_bo_unref(&vm->page_directory); 1146 vm->page_directory = NULL; 1147 return r; 1148 } 1149 1150 return 0; 1151 } 1152 1153 /** 1154 * radeon_vm_fini - tear down a vm instance 1155 * 1156 * @rdev: radeon_device pointer 1157 * @vm: requested vm 1158 * 1159 * Tear down @vm (cayman+). 1160 * Unbind the VM and remove all bos from the vm bo list 1161 */ 1162 void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm) 1163 { 1164 struct radeon_bo_va *bo_va, *tmp; 1165 int i, r; 1166 1167 if (!list_empty(&vm->va)) { 1168 dev_err(rdev->dev, "still active bo inside vm\n"); 1169 } 1170 list_for_each_entry_safe(bo_va, tmp, &vm->va, vm_list) { 1171 list_del_init(&bo_va->vm_list); 1172 r = radeon_bo_reserve(bo_va->bo, false); 1173 if (!r) { 1174 list_del_init(&bo_va->bo_list); 1175 radeon_bo_unreserve(bo_va->bo); 1176 kfree(bo_va); 1177 } 1178 } 1179 list_for_each_entry_safe(bo_va, tmp, &vm->freed, vm_status) 1180 kfree(bo_va); 1181 1182 for (i = 0; i < radeon_vm_num_pdes(rdev); i++) 1183 radeon_bo_unref(&vm->page_tables[i].bo); 1184 kfree(vm->page_tables); 1185 1186 radeon_bo_unref(&vm->page_directory); 1187 1188 radeon_fence_unref(&vm->fence); 1189 radeon_fence_unref(&vm->last_flush); 1190 radeon_fence_unref(&vm->last_id_use); 1191 1192 lockuninit(&vm->mutex); 1193 } 1194