1 /* 2 * Copyright 2008 Advanced Micro Devices, Inc. 3 * Copyright 2008 Red Hat Inc. 4 * Copyright 2009 Jerome Glisse. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 * OTHER DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: Dave Airlie 25 * Alex Deucher 26 * Jerome Glisse 27 */ 28 #include <linux/dma-fence-array.h> 29 #include <linux/interval_tree_generic.h> 30 #include <linux/idr.h> 31 #include <drm/drmP.h> 32 #include <drm/amdgpu_drm.h> 33 #include "amdgpu.h" 34 #include "amdgpu_trace.h" 35 #include "amdgpu_amdkfd.h" 36 #include "amdgpu_gmc.h" 37 38 #include <linux/rbtree.h> 39 #include "amdgpu_vm.h" 40 41 /** 42 * DOC: GPUVM 43 * 44 * GPUVM is similar to the legacy gart on older asics, however 45 * rather than there being a single global gart table 46 * for the entire GPU, there are multiple VM page tables active 47 * at any given time. The VM page tables can contain a mix 48 * vram pages and system memory pages and system memory pages 49 * can be mapped as snooped (cached system pages) or unsnooped 50 * (uncached system pages). 51 * Each VM has an ID associated with it and there is a page table 52 * associated with each VMID. When execting a command buffer, 53 * the kernel tells the the ring what VMID to use for that command 54 * buffer. VMIDs are allocated dynamically as commands are submitted. 55 * The userspace drivers maintain their own address space and the kernel 56 * sets up their pages tables accordingly when they submit their 57 * command buffers and a VMID is assigned. 58 * Cayman/Trinity support up to 8 active VMs at any given time; 59 * SI supports 16. 60 */ 61 62 #define START(node) ((node)->start) 63 #define LAST(node) ((node)->last) 64 65 #ifdef __linux__ 66 67 INTERVAL_TREE_DEFINE(struct amdgpu_bo_va_mapping, rb, uint64_t, __subtree_last, 68 START, LAST, static, amdgpu_vm_it) 69 #else 70 static struct amdgpu_bo_va_mapping * 71 amdgpu_vm_it_iter_first(struct rb_root_cached *root, uint64_t start, 72 uint64_t last) 73 { 74 struct amdgpu_bo_va_mapping *node; 75 struct rb_node *rb; 76 77 for (rb = rb_first_cached(root); rb; rb = rb_next(rb)) { 78 node = rb_entry(rb, typeof(*node), rb); 79 if (LAST(node) >= start && START(node) <= last) 80 return node; 81 } 82 return NULL; 83 } 84 85 static struct amdgpu_bo_va_mapping * 86 amdgpu_vm_it_iter_next(struct amdgpu_bo_va_mapping *node, uint64_t start, 87 uint64_t last) 88 { 89 STUB(); 90 struct rb_node *rb = &node->rb; 91 92 for (rb = rb_next(rb); rb; rb = rb_next(rb)) { 93 node = rb_entry(rb, typeof(*node), rb); 94 if (LAST(node) >= start && START(node) <= last) 95 return node; 96 } 97 return NULL; 98 } 99 100 static void 101 amdgpu_vm_it_remove(struct amdgpu_bo_va_mapping *node, 102 struct rb_root_cached *root) 103 { 104 rb_erase_cached(&node->rb, root); 105 } 106 107 static void 108 amdgpu_vm_it_insert(struct amdgpu_bo_va_mapping *node, 109 struct rb_root_cached *root) 110 { 111 struct rb_node **iter = &root->rb_root.rb_node; 112 struct rb_node *parent = NULL; 113 struct amdgpu_bo_va_mapping *iter_node; 114 115 while (*iter) { 116 parent = *iter; 117 iter_node = rb_entry(*iter, struct amdgpu_bo_va_mapping, rb); 118 119 if (node->start < iter_node->start) 120 iter = &(*iter)->rb_left; 121 else 122 iter = &(*iter)->rb_right; 123 } 124 125 rb_link_node(&node->rb, parent, iter); 126 rb_insert_color_cached(&node->rb, root, false); 127 } 128 #endif 129 130 #undef START 131 #undef LAST 132 133 /** 134 * struct amdgpu_pte_update_params - Local structure 135 * 136 * Encapsulate some VM table update parameters to reduce 137 * the number of function parameters 138 * 139 */ 140 struct amdgpu_pte_update_params { 141 142 /** 143 * @adev: amdgpu device we do this update for 144 */ 145 struct amdgpu_device *adev; 146 147 /** 148 * @vm: optional amdgpu_vm we do this update for 149 */ 150 struct amdgpu_vm *vm; 151 152 /** 153 * @src: address where to copy page table entries from 154 */ 155 uint64_t src; 156 157 /** 158 * @ib: indirect buffer to fill with commands 159 */ 160 struct amdgpu_ib *ib; 161 162 /** 163 * @func: Function which actually does the update 164 */ 165 void (*func)(struct amdgpu_pte_update_params *params, 166 struct amdgpu_bo *bo, uint64_t pe, 167 uint64_t addr, unsigned count, uint32_t incr, 168 uint64_t flags); 169 /** 170 * @pages_addr: 171 * 172 * DMA addresses to use for mapping, used during VM update by CPU 173 */ 174 dma_addr_t *pages_addr; 175 176 /** 177 * @kptr: 178 * 179 * Kernel pointer of PD/PT BO that needs to be updated, 180 * used during VM update by CPU 181 */ 182 void *kptr; 183 }; 184 185 /** 186 * struct amdgpu_prt_cb - Helper to disable partial resident texture feature from a fence callback 187 */ 188 struct amdgpu_prt_cb { 189 190 /** 191 * @adev: amdgpu device 192 */ 193 struct amdgpu_device *adev; 194 195 /** 196 * @cb: callback 197 */ 198 struct dma_fence_cb cb; 199 }; 200 201 /** 202 * amdgpu_vm_bo_base_init - Adds bo to the list of bos associated with the vm 203 * 204 * @base: base structure for tracking BO usage in a VM 205 * @vm: vm to which bo is to be added 206 * @bo: amdgpu buffer object 207 * 208 * Initialize a bo_va_base structure and add it to the appropriate lists 209 * 210 */ 211 static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, 212 struct amdgpu_vm *vm, 213 struct amdgpu_bo *bo) 214 { 215 base->vm = vm; 216 base->bo = bo; 217 INIT_LIST_HEAD(&base->bo_list); 218 INIT_LIST_HEAD(&base->vm_status); 219 220 if (!bo) 221 return; 222 list_add_tail(&base->bo_list, &bo->va); 223 224 if (bo->tbo.type == ttm_bo_type_kernel) 225 list_move(&base->vm_status, &vm->relocated); 226 227 if (bo->tbo.resv != vm->root.base.bo->tbo.resv) 228 return; 229 230 if (bo->preferred_domains & 231 amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type)) 232 return; 233 234 /* 235 * we checked all the prerequisites, but it looks like this per vm bo 236 * is currently evicted. add the bo to the evicted list to make sure it 237 * is validated on next vm use to avoid fault. 238 * */ 239 list_move_tail(&base->vm_status, &vm->evicted); 240 base->moved = true; 241 } 242 243 /** 244 * amdgpu_vm_level_shift - return the addr shift for each level 245 * 246 * @adev: amdgpu_device pointer 247 * @level: VMPT level 248 * 249 * Returns: 250 * The number of bits the pfn needs to be right shifted for a level. 251 */ 252 static unsigned amdgpu_vm_level_shift(struct amdgpu_device *adev, 253 unsigned level) 254 { 255 unsigned shift = 0xff; 256 257 switch (level) { 258 case AMDGPU_VM_PDB2: 259 case AMDGPU_VM_PDB1: 260 case AMDGPU_VM_PDB0: 261 shift = 9 * (AMDGPU_VM_PDB0 - level) + 262 adev->vm_manager.block_size; 263 break; 264 case AMDGPU_VM_PTB: 265 shift = 0; 266 break; 267 default: 268 dev_err(adev->dev, "the level%d isn't supported.\n", level); 269 } 270 271 return shift; 272 } 273 274 /** 275 * amdgpu_vm_num_entries - return the number of entries in a PD/PT 276 * 277 * @adev: amdgpu_device pointer 278 * @level: VMPT level 279 * 280 * Returns: 281 * The number of entries in a page directory or page table. 282 */ 283 static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev, 284 unsigned level) 285 { 286 unsigned shift = amdgpu_vm_level_shift(adev, 287 adev->vm_manager.root_level); 288 289 if (level == adev->vm_manager.root_level) 290 /* For the root directory */ 291 return round_up(adev->vm_manager.max_pfn, 1 << shift) >> shift; 292 else if (level != AMDGPU_VM_PTB) 293 /* Everything in between */ 294 return 512; 295 else 296 /* For the page tables on the leaves */ 297 return AMDGPU_VM_PTE_COUNT(adev); 298 } 299 300 /** 301 * amdgpu_vm_bo_size - returns the size of the BOs in bytes 302 * 303 * @adev: amdgpu_device pointer 304 * @level: VMPT level 305 * 306 * Returns: 307 * The size of the BO for a page directory or page table in bytes. 308 */ 309 static unsigned amdgpu_vm_bo_size(struct amdgpu_device *adev, unsigned level) 310 { 311 return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_num_entries(adev, level) * 8); 312 } 313 314 /** 315 * amdgpu_vm_get_pd_bo - add the VM PD to a validation list 316 * 317 * @vm: vm providing the BOs 318 * @validated: head of validation list 319 * @entry: entry to add 320 * 321 * Add the page directory to the list of BOs to 322 * validate for command submission. 323 */ 324 void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, 325 struct list_head *validated, 326 struct amdgpu_bo_list_entry *entry) 327 { 328 entry->robj = vm->root.base.bo; 329 entry->priority = 0; 330 entry->tv.bo = &entry->robj->tbo; 331 entry->tv.shared = true; 332 entry->user_pages = NULL; 333 list_add(&entry->tv.head, validated); 334 } 335 336 /** 337 * amdgpu_vm_validate_pt_bos - validate the page table BOs 338 * 339 * @adev: amdgpu device pointer 340 * @vm: vm providing the BOs 341 * @validate: callback to do the validation 342 * @param: parameter for the validation callback 343 * 344 * Validate the page table BOs on command submission if neccessary. 345 * 346 * Returns: 347 * Validation result. 348 */ 349 int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, 350 int (*validate)(void *p, struct amdgpu_bo *bo), 351 void *param) 352 { 353 struct ttm_bo_global *glob = adev->mman.bdev.glob; 354 struct amdgpu_vm_bo_base *bo_base, *tmp; 355 int r = 0; 356 357 list_for_each_entry_safe(bo_base, tmp, &vm->evicted, vm_status) { 358 struct amdgpu_bo *bo = bo_base->bo; 359 360 if (bo->parent) { 361 r = validate(param, bo); 362 if (r) 363 break; 364 365 lockmgr(&glob->lru_lock, LK_EXCLUSIVE); 366 ttm_bo_move_to_lru_tail(&bo->tbo); 367 if (bo->shadow) 368 ttm_bo_move_to_lru_tail(&bo->shadow->tbo); 369 lockmgr(&glob->lru_lock, LK_RELEASE); 370 } 371 372 if (bo->tbo.type != ttm_bo_type_kernel) { 373 lockmgr(&vm->moved_lock, LK_EXCLUSIVE); 374 list_move(&bo_base->vm_status, &vm->moved); 375 lockmgr(&vm->moved_lock, LK_RELEASE); 376 } else { 377 list_move(&bo_base->vm_status, &vm->relocated); 378 } 379 } 380 381 lockmgr(&glob->lru_lock, LK_EXCLUSIVE); 382 list_for_each_entry(bo_base, &vm->idle, vm_status) { 383 struct amdgpu_bo *bo = bo_base->bo; 384 385 if (!bo->parent) 386 continue; 387 388 ttm_bo_move_to_lru_tail(&bo->tbo); 389 if (bo->shadow) 390 ttm_bo_move_to_lru_tail(&bo->shadow->tbo); 391 } 392 lockmgr(&glob->lru_lock, LK_RELEASE); 393 394 return r; 395 } 396 397 /** 398 * amdgpu_vm_ready - check VM is ready for updates 399 * 400 * @vm: VM to check 401 * 402 * Check if all VM PDs/PTs are ready for updates 403 * 404 * Returns: 405 * True if eviction list is empty. 406 */ 407 bool amdgpu_vm_ready(struct amdgpu_vm *vm) 408 { 409 return list_empty(&vm->evicted); 410 } 411 412 /** 413 * amdgpu_vm_clear_bo - initially clear the PDs/PTs 414 * 415 * @adev: amdgpu_device pointer 416 * @vm: VM to clear BO from 417 * @bo: BO to clear 418 * @level: level this BO is at 419 * @pte_support_ats: indicate ATS support from PTE 420 * 421 * Root PD needs to be reserved when calling this. 422 * 423 * Returns: 424 * 0 on success, errno otherwise. 425 */ 426 static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, 427 struct amdgpu_vm *vm, struct amdgpu_bo *bo, 428 unsigned level, bool pte_support_ats) 429 { 430 struct ttm_operation_ctx ctx = { true, false }; 431 struct dma_fence *fence = NULL; 432 unsigned entries, ats_entries; 433 struct amdgpu_ring *ring; 434 struct amdgpu_job *job; 435 uint64_t addr; 436 int r; 437 438 entries = amdgpu_bo_size(bo) / 8; 439 440 if (pte_support_ats) { 441 if (level == adev->vm_manager.root_level) { 442 ats_entries = amdgpu_vm_level_shift(adev, level); 443 ats_entries += AMDGPU_GPU_PAGE_SHIFT; 444 ats_entries = AMDGPU_VA_HOLE_START >> ats_entries; 445 ats_entries = min(ats_entries, entries); 446 entries -= ats_entries; 447 } else { 448 ats_entries = entries; 449 entries = 0; 450 } 451 } else { 452 ats_entries = 0; 453 } 454 455 ring = container_of(vm->entity.rq->sched, struct amdgpu_ring, sched); 456 457 r = reservation_object_reserve_shared(bo->tbo.resv); 458 if (r) 459 return r; 460 461 r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); 462 if (r) 463 goto error; 464 465 r = amdgpu_job_alloc_with_ib(adev, 64, &job); 466 if (r) 467 goto error; 468 469 addr = amdgpu_bo_gpu_offset(bo); 470 if (ats_entries) { 471 uint64_t ats_value; 472 473 ats_value = AMDGPU_PTE_DEFAULT_ATC; 474 if (level != AMDGPU_VM_PTB) 475 ats_value |= AMDGPU_PDE_PTE; 476 477 amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0, 478 ats_entries, 0, ats_value); 479 addr += ats_entries * 8; 480 } 481 482 if (entries) 483 amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0, 484 entries, 0, 0); 485 486 amdgpu_ring_pad_ib(ring, &job->ibs[0]); 487 488 WARN_ON(job->ibs[0].length_dw > 64); 489 r = amdgpu_sync_resv(adev, &job->sync, bo->tbo.resv, 490 AMDGPU_FENCE_OWNER_UNDEFINED, false); 491 if (r) 492 goto error_free; 493 494 r = amdgpu_job_submit(job, &vm->entity, AMDGPU_FENCE_OWNER_UNDEFINED, 495 &fence); 496 if (r) 497 goto error_free; 498 499 amdgpu_bo_fence(bo, fence, true); 500 dma_fence_put(fence); 501 502 if (bo->shadow) 503 return amdgpu_vm_clear_bo(adev, vm, bo->shadow, 504 level, pte_support_ats); 505 506 return 0; 507 508 error_free: 509 amdgpu_job_free(job); 510 511 error: 512 return r; 513 } 514 515 /** 516 * amdgpu_vm_alloc_levels - allocate the PD/PT levels 517 * 518 * @adev: amdgpu_device pointer 519 * @vm: requested vm 520 * @parent: parent PT 521 * @saddr: start of the address range 522 * @eaddr: end of the address range 523 * @level: VMPT level 524 * @ats: indicate ATS support from PTE 525 * 526 * Make sure the page directories and page tables are allocated 527 * 528 * Returns: 529 * 0 on success, errno otherwise. 530 */ 531 static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, 532 struct amdgpu_vm *vm, 533 struct amdgpu_vm_pt *parent, 534 uint64_t saddr, uint64_t eaddr, 535 unsigned level, bool ats) 536 { 537 unsigned shift = amdgpu_vm_level_shift(adev, level); 538 unsigned pt_idx, from, to; 539 u64 flags; 540 int r; 541 542 if (!parent->entries) { 543 unsigned num_entries = amdgpu_vm_num_entries(adev, level); 544 545 parent->entries = kvmalloc_array(num_entries, 546 sizeof(struct amdgpu_vm_pt), 547 GFP_KERNEL | __GFP_ZERO); 548 if (!parent->entries) 549 return -ENOMEM; 550 memset(parent->entries, 0 , sizeof(struct amdgpu_vm_pt)); 551 } 552 553 from = saddr >> shift; 554 to = eaddr >> shift; 555 if (from >= amdgpu_vm_num_entries(adev, level) || 556 to >= amdgpu_vm_num_entries(adev, level)) 557 return -EINVAL; 558 559 ++level; 560 saddr = saddr & ((1 << shift) - 1); 561 eaddr = eaddr & ((1 << shift) - 1); 562 563 flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; 564 if (vm->root.base.bo->shadow) 565 flags |= AMDGPU_GEM_CREATE_SHADOW; 566 if (vm->use_cpu_for_update) 567 flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; 568 else 569 flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS; 570 571 /* walk over the address space and allocate the page tables */ 572 for (pt_idx = from; pt_idx <= to; ++pt_idx) { 573 struct reservation_object *resv = vm->root.base.bo->tbo.resv; 574 struct amdgpu_vm_pt *entry = &parent->entries[pt_idx]; 575 struct amdgpu_bo *pt; 576 577 if (!entry->base.bo) { 578 struct amdgpu_bo_param bp; 579 580 memset(&bp, 0, sizeof(bp)); 581 bp.size = amdgpu_vm_bo_size(adev, level); 582 bp.byte_align = AMDGPU_GPU_PAGE_SIZE; 583 bp.domain = AMDGPU_GEM_DOMAIN_VRAM; 584 bp.flags = flags; 585 bp.type = ttm_bo_type_kernel; 586 bp.resv = resv; 587 r = amdgpu_bo_create(adev, &bp, &pt); 588 if (r) 589 return r; 590 591 r = amdgpu_vm_clear_bo(adev, vm, pt, level, ats); 592 if (r) { 593 amdgpu_bo_unref(&pt->shadow); 594 amdgpu_bo_unref(&pt); 595 return r; 596 } 597 598 if (vm->use_cpu_for_update) { 599 r = amdgpu_bo_kmap(pt, NULL); 600 if (r) { 601 amdgpu_bo_unref(&pt->shadow); 602 amdgpu_bo_unref(&pt); 603 return r; 604 } 605 } 606 607 /* Keep a reference to the root directory to avoid 608 * freeing them up in the wrong order. 609 */ 610 pt->parent = amdgpu_bo_ref(parent->base.bo); 611 612 amdgpu_vm_bo_base_init(&entry->base, vm, pt); 613 } 614 615 if (level < AMDGPU_VM_PTB) { 616 uint64_t sub_saddr = (pt_idx == from) ? saddr : 0; 617 uint64_t sub_eaddr = (pt_idx == to) ? eaddr : 618 ((1 << shift) - 1); 619 r = amdgpu_vm_alloc_levels(adev, vm, entry, sub_saddr, 620 sub_eaddr, level, ats); 621 if (r) 622 return r; 623 } 624 } 625 626 return 0; 627 } 628 629 /** 630 * amdgpu_vm_alloc_pts - Allocate page tables. 631 * 632 * @adev: amdgpu_device pointer 633 * @vm: VM to allocate page tables for 634 * @saddr: Start address which needs to be allocated 635 * @size: Size from start address we need. 636 * 637 * Make sure the page tables are allocated. 638 * 639 * Returns: 640 * 0 on success, errno otherwise. 641 */ 642 int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, 643 struct amdgpu_vm *vm, 644 uint64_t saddr, uint64_t size) 645 { 646 uint64_t eaddr; 647 bool ats = false; 648 649 /* validate the parameters */ 650 if (saddr & AMDGPU_GPU_PAGE_MASK || size & AMDGPU_GPU_PAGE_MASK) 651 return -EINVAL; 652 653 eaddr = saddr + size - 1; 654 655 if (vm->pte_support_ats) 656 ats = saddr < AMDGPU_VA_HOLE_START; 657 658 saddr /= AMDGPU_GPU_PAGE_SIZE; 659 eaddr /= AMDGPU_GPU_PAGE_SIZE; 660 661 if (eaddr >= adev->vm_manager.max_pfn) { 662 dev_err(adev->dev, "va above limit (0x%08lX >= 0x%08lX)\n", 663 eaddr, adev->vm_manager.max_pfn); 664 return -EINVAL; 665 } 666 667 return amdgpu_vm_alloc_levels(adev, vm, &vm->root, saddr, eaddr, 668 adev->vm_manager.root_level, ats); 669 } 670 671 /** 672 * amdgpu_vm_check_compute_bug - check whether asic has compute vm bug 673 * 674 * @adev: amdgpu_device pointer 675 */ 676 void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev) 677 { 678 const struct amdgpu_ip_block *ip_block; 679 bool has_compute_vm_bug; 680 struct amdgpu_ring *ring; 681 int i; 682 683 has_compute_vm_bug = false; 684 685 ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX); 686 if (ip_block) { 687 /* Compute has a VM bug for GFX version < 7. 688 Compute has a VM bug for GFX 8 MEC firmware version < 673.*/ 689 if (ip_block->version->major <= 7) 690 has_compute_vm_bug = true; 691 else if (ip_block->version->major == 8) 692 if (adev->gfx.mec_fw_version < 673) 693 has_compute_vm_bug = true; 694 } 695 696 for (i = 0; i < adev->num_rings; i++) { 697 ring = adev->rings[i]; 698 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) 699 /* only compute rings */ 700 ring->has_compute_vm_bug = has_compute_vm_bug; 701 else 702 ring->has_compute_vm_bug = false; 703 } 704 } 705 706 /** 707 * amdgpu_vm_need_pipeline_sync - Check if pipe sync is needed for job. 708 * 709 * @ring: ring on which the job will be submitted 710 * @job: job to submit 711 * 712 * Returns: 713 * True if sync is needed. 714 */ 715 bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, 716 struct amdgpu_job *job) 717 { 718 struct amdgpu_device *adev = ring->adev; 719 unsigned vmhub = ring->funcs->vmhub; 720 struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; 721 struct amdgpu_vmid *id; 722 bool gds_switch_needed; 723 bool vm_flush_needed = job->vm_needs_flush || ring->has_compute_vm_bug; 724 725 if (job->vmid == 0) 726 return false; 727 id = &id_mgr->ids[job->vmid]; 728 gds_switch_needed = ring->funcs->emit_gds_switch && ( 729 id->gds_base != job->gds_base || 730 id->gds_size != job->gds_size || 731 id->gws_base != job->gws_base || 732 id->gws_size != job->gws_size || 733 id->oa_base != job->oa_base || 734 id->oa_size != job->oa_size); 735 736 if (amdgpu_vmid_had_gpu_reset(adev, id)) 737 return true; 738 739 return vm_flush_needed || gds_switch_needed; 740 } 741 742 /** 743 * amdgpu_vm_flush - hardware flush the vm 744 * 745 * @ring: ring to use for flush 746 * @job: related job 747 * @need_pipe_sync: is pipe sync needed 748 * 749 * Emit a VM flush when it is necessary. 750 * 751 * Returns: 752 * 0 on success, errno otherwise. 753 */ 754 int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync) 755 { 756 struct amdgpu_device *adev = ring->adev; 757 unsigned vmhub = ring->funcs->vmhub; 758 struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; 759 struct amdgpu_vmid *id = &id_mgr->ids[job->vmid]; 760 bool gds_switch_needed = ring->funcs->emit_gds_switch && ( 761 id->gds_base != job->gds_base || 762 id->gds_size != job->gds_size || 763 id->gws_base != job->gws_base || 764 id->gws_size != job->gws_size || 765 id->oa_base != job->oa_base || 766 id->oa_size != job->oa_size); 767 bool vm_flush_needed = job->vm_needs_flush; 768 struct dma_fence *fence = NULL; 769 bool pasid_mapping_needed = false; 770 unsigned patch_offset = 0; 771 int r; 772 773 if (amdgpu_vmid_had_gpu_reset(adev, id)) { 774 gds_switch_needed = true; 775 vm_flush_needed = true; 776 pasid_mapping_needed = true; 777 } 778 779 mutex_lock(&id_mgr->lock); 780 if (id->pasid != job->pasid || !id->pasid_mapping || 781 !dma_fence_is_signaled(id->pasid_mapping)) 782 pasid_mapping_needed = true; 783 mutex_unlock(&id_mgr->lock); 784 785 gds_switch_needed &= !!ring->funcs->emit_gds_switch; 786 vm_flush_needed &= !!ring->funcs->emit_vm_flush && 787 job->vm_pd_addr != AMDGPU_BO_INVALID_OFFSET; 788 pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping && 789 ring->funcs->emit_wreg; 790 791 if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync) 792 return 0; 793 794 if (ring->funcs->init_cond_exec) 795 patch_offset = amdgpu_ring_init_cond_exec(ring); 796 797 if (need_pipe_sync) 798 amdgpu_ring_emit_pipeline_sync(ring); 799 800 if (vm_flush_needed) { 801 trace_amdgpu_vm_flush(ring, job->vmid, job->vm_pd_addr); 802 amdgpu_ring_emit_vm_flush(ring, job->vmid, job->vm_pd_addr); 803 } 804 805 if (pasid_mapping_needed) 806 amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid); 807 808 if (vm_flush_needed || pasid_mapping_needed) { 809 r = amdgpu_fence_emit(ring, &fence, 0); 810 if (r) 811 return r; 812 } 813 814 if (vm_flush_needed) { 815 mutex_lock(&id_mgr->lock); 816 dma_fence_put(id->last_flush); 817 id->last_flush = dma_fence_get(fence); 818 id->current_gpu_reset_count = 819 atomic_read(&adev->gpu_reset_counter); 820 mutex_unlock(&id_mgr->lock); 821 } 822 823 if (pasid_mapping_needed) { 824 mutex_lock(&id_mgr->lock); 825 id->pasid = job->pasid; 826 dma_fence_put(id->pasid_mapping); 827 id->pasid_mapping = dma_fence_get(fence); 828 mutex_unlock(&id_mgr->lock); 829 } 830 dma_fence_put(fence); 831 832 if (ring->funcs->emit_gds_switch && gds_switch_needed) { 833 id->gds_base = job->gds_base; 834 id->gds_size = job->gds_size; 835 id->gws_base = job->gws_base; 836 id->gws_size = job->gws_size; 837 id->oa_base = job->oa_base; 838 id->oa_size = job->oa_size; 839 amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base, 840 job->gds_size, job->gws_base, 841 job->gws_size, job->oa_base, 842 job->oa_size); 843 } 844 845 if (ring->funcs->patch_cond_exec) 846 amdgpu_ring_patch_cond_exec(ring, patch_offset); 847 848 /* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC */ 849 if (ring->funcs->emit_switch_buffer) { 850 amdgpu_ring_emit_switch_buffer(ring); 851 amdgpu_ring_emit_switch_buffer(ring); 852 } 853 return 0; 854 } 855 856 /** 857 * amdgpu_vm_bo_find - find the bo_va for a specific vm & bo 858 * 859 * @vm: requested vm 860 * @bo: requested buffer object 861 * 862 * Find @bo inside the requested vm. 863 * Search inside the @bos vm list for the requested vm 864 * Returns the found bo_va or NULL if none is found 865 * 866 * Object has to be reserved! 867 * 868 * Returns: 869 * Found bo_va or NULL. 870 */ 871 struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, 872 struct amdgpu_bo *bo) 873 { 874 struct amdgpu_bo_va *bo_va; 875 876 list_for_each_entry(bo_va, &bo->va, base.bo_list) { 877 if (bo_va->base.vm == vm) { 878 return bo_va; 879 } 880 } 881 return NULL; 882 } 883 884 /** 885 * amdgpu_vm_do_set_ptes - helper to call the right asic function 886 * 887 * @params: see amdgpu_pte_update_params definition 888 * @bo: PD/PT to update 889 * @pe: addr of the page entry 890 * @addr: dst addr to write into pe 891 * @count: number of page entries to update 892 * @incr: increase next addr by incr bytes 893 * @flags: hw access flags 894 * 895 * Traces the parameters and calls the right asic functions 896 * to setup the page table using the DMA. 897 */ 898 static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params, 899 struct amdgpu_bo *bo, 900 uint64_t pe, uint64_t addr, 901 unsigned count, uint32_t incr, 902 uint64_t flags) 903 { 904 pe += amdgpu_bo_gpu_offset(bo); 905 trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags); 906 907 if (count < 3) { 908 amdgpu_vm_write_pte(params->adev, params->ib, pe, 909 addr | flags, count, incr); 910 911 } else { 912 amdgpu_vm_set_pte_pde(params->adev, params->ib, pe, addr, 913 count, incr, flags); 914 } 915 } 916 917 /** 918 * amdgpu_vm_do_copy_ptes - copy the PTEs from the GART 919 * 920 * @params: see amdgpu_pte_update_params definition 921 * @bo: PD/PT to update 922 * @pe: addr of the page entry 923 * @addr: dst addr to write into pe 924 * @count: number of page entries to update 925 * @incr: increase next addr by incr bytes 926 * @flags: hw access flags 927 * 928 * Traces the parameters and calls the DMA function to copy the PTEs. 929 */ 930 static void amdgpu_vm_do_copy_ptes(struct amdgpu_pte_update_params *params, 931 struct amdgpu_bo *bo, 932 uint64_t pe, uint64_t addr, 933 unsigned count, uint32_t incr, 934 uint64_t flags) 935 { 936 uint64_t src = (params->src + (addr >> 12) * 8); 937 938 pe += amdgpu_bo_gpu_offset(bo); 939 trace_amdgpu_vm_copy_ptes(pe, src, count); 940 941 amdgpu_vm_copy_pte(params->adev, params->ib, pe, src, count); 942 } 943 944 /** 945 * amdgpu_vm_map_gart - Resolve gart mapping of addr 946 * 947 * @pages_addr: optional DMA address to use for lookup 948 * @addr: the unmapped addr 949 * 950 * Look up the physical address of the page that the pte resolves 951 * to. 952 * 953 * Returns: 954 * The pointer for the page table entry. 955 */ 956 static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr) 957 { 958 uint64_t result; 959 960 /* page table offset */ 961 result = pages_addr[addr >> PAGE_SHIFT]; 962 963 /* in case cpu page size != gpu page size*/ 964 result |= addr & (~PAGE_MASK); 965 966 result &= 0xFFFFFFFFFFFFF000ULL; 967 968 return result; 969 } 970 971 /** 972 * amdgpu_vm_cpu_set_ptes - helper to update page tables via CPU 973 * 974 * @params: see amdgpu_pte_update_params definition 975 * @bo: PD/PT to update 976 * @pe: kmap addr of the page entry 977 * @addr: dst addr to write into pe 978 * @count: number of page entries to update 979 * @incr: increase next addr by incr bytes 980 * @flags: hw access flags 981 * 982 * Write count number of PT/PD entries directly. 983 */ 984 static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params, 985 struct amdgpu_bo *bo, 986 uint64_t pe, uint64_t addr, 987 unsigned count, uint32_t incr, 988 uint64_t flags) 989 { 990 unsigned int i; 991 uint64_t value; 992 993 pe += (unsigned long)amdgpu_bo_kptr(bo); 994 995 trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags); 996 997 for (i = 0; i < count; i++) { 998 value = params->pages_addr ? 999 amdgpu_vm_map_gart(params->pages_addr, addr) : 1000 addr; 1001 amdgpu_gmc_set_pte_pde(params->adev, (void *)(uintptr_t)pe, 1002 i, value, flags); 1003 addr += incr; 1004 } 1005 } 1006 1007 1008 /** 1009 * amdgpu_vm_wait_pd - Wait for PT BOs to be free. 1010 * 1011 * @adev: amdgpu_device pointer 1012 * @vm: related vm 1013 * @owner: fence owner 1014 * 1015 * Returns: 1016 * 0 on success, errno otherwise. 1017 */ 1018 static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm, 1019 void *owner) 1020 { 1021 struct amdgpu_sync sync; 1022 int r; 1023 1024 amdgpu_sync_create(&sync); 1025 amdgpu_sync_resv(adev, &sync, vm->root.base.bo->tbo.resv, owner, false); 1026 r = amdgpu_sync_wait(&sync, true); 1027 amdgpu_sync_free(&sync); 1028 1029 return r; 1030 } 1031 1032 /* 1033 * amdgpu_vm_update_pde - update a single level in the hierarchy 1034 * 1035 * @param: parameters for the update 1036 * @vm: requested vm 1037 * @parent: parent directory 1038 * @entry: entry to update 1039 * 1040 * Makes sure the requested entry in parent is up to date. 1041 */ 1042 static void amdgpu_vm_update_pde(struct amdgpu_pte_update_params *params, 1043 struct amdgpu_vm *vm, 1044 struct amdgpu_vm_pt *parent, 1045 struct amdgpu_vm_pt *entry) 1046 { 1047 struct amdgpu_bo *bo = parent->base.bo, *pbo; 1048 uint64_t pde, pt, flags; 1049 unsigned level; 1050 1051 /* Don't update huge pages here */ 1052 if (entry->huge) 1053 return; 1054 1055 for (level = 0, pbo = bo->parent; pbo; ++level) 1056 pbo = pbo->parent; 1057 1058 level += params->adev->vm_manager.root_level; 1059 pt = amdgpu_bo_gpu_offset(entry->base.bo); 1060 flags = AMDGPU_PTE_VALID; 1061 amdgpu_gmc_get_vm_pde(params->adev, level, &pt, &flags); 1062 pde = (entry - parent->entries) * 8; 1063 if (bo->shadow) 1064 params->func(params, bo->shadow, pde, pt, 1, 0, flags); 1065 params->func(params, bo, pde, pt, 1, 0, flags); 1066 } 1067 1068 /* 1069 * amdgpu_vm_invalidate_level - mark all PD levels as invalid 1070 * 1071 * @adev: amdgpu_device pointer 1072 * @vm: related vm 1073 * @parent: parent PD 1074 * @level: VMPT level 1075 * 1076 * Mark all PD level as invalid after an error. 1077 */ 1078 static void amdgpu_vm_invalidate_level(struct amdgpu_device *adev, 1079 struct amdgpu_vm *vm, 1080 struct amdgpu_vm_pt *parent, 1081 unsigned level) 1082 { 1083 unsigned pt_idx, num_entries; 1084 1085 /* 1086 * Recurse into the subdirectories. This recursion is harmless because 1087 * we only have a maximum of 5 layers. 1088 */ 1089 num_entries = amdgpu_vm_num_entries(adev, level); 1090 for (pt_idx = 0; pt_idx < num_entries; ++pt_idx) { 1091 struct amdgpu_vm_pt *entry = &parent->entries[pt_idx]; 1092 1093 if (!entry->base.bo) 1094 continue; 1095 1096 if (!entry->base.moved) 1097 list_move(&entry->base.vm_status, &vm->relocated); 1098 amdgpu_vm_invalidate_level(adev, vm, entry, level + 1); 1099 } 1100 } 1101 1102 /* 1103 * amdgpu_vm_update_directories - make sure that all directories are valid 1104 * 1105 * @adev: amdgpu_device pointer 1106 * @vm: requested vm 1107 * 1108 * Makes sure all directories are up to date. 1109 * 1110 * Returns: 1111 * 0 for success, error for failure. 1112 */ 1113 int amdgpu_vm_update_directories(struct amdgpu_device *adev, 1114 struct amdgpu_vm *vm) 1115 { 1116 struct amdgpu_pte_update_params params; 1117 struct amdgpu_job *job; 1118 unsigned ndw = 0; 1119 int r = 0; 1120 1121 if (list_empty(&vm->relocated)) 1122 return 0; 1123 1124 restart: 1125 memset(¶ms, 0, sizeof(params)); 1126 params.adev = adev; 1127 1128 if (vm->use_cpu_for_update) { 1129 struct amdgpu_vm_bo_base *bo_base; 1130 1131 list_for_each_entry(bo_base, &vm->relocated, vm_status) { 1132 r = amdgpu_bo_kmap(bo_base->bo, NULL); 1133 if (unlikely(r)) 1134 return r; 1135 } 1136 1137 r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM); 1138 if (unlikely(r)) 1139 return r; 1140 1141 params.func = amdgpu_vm_cpu_set_ptes; 1142 } else { 1143 ndw = 512 * 8; 1144 r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job); 1145 if (r) 1146 return r; 1147 1148 params.ib = &job->ibs[0]; 1149 params.func = amdgpu_vm_do_set_ptes; 1150 } 1151 1152 while (!list_empty(&vm->relocated)) { 1153 struct amdgpu_vm_bo_base *bo_base, *parent; 1154 struct amdgpu_vm_pt *pt, *entry; 1155 struct amdgpu_bo *bo; 1156 1157 bo_base = list_first_entry(&vm->relocated, 1158 struct amdgpu_vm_bo_base, 1159 vm_status); 1160 bo_base->moved = false; 1161 list_del_init(&bo_base->vm_status); 1162 1163 bo = bo_base->bo->parent; 1164 if (!bo) 1165 continue; 1166 1167 parent = list_first_entry(&bo->va, struct amdgpu_vm_bo_base, 1168 bo_list); 1169 pt = container_of(parent, struct amdgpu_vm_pt, base); 1170 entry = container_of(bo_base, struct amdgpu_vm_pt, base); 1171 1172 amdgpu_vm_update_pde(¶ms, vm, pt, entry); 1173 1174 if (!vm->use_cpu_for_update && 1175 (ndw - params.ib->length_dw) < 32) 1176 break; 1177 } 1178 1179 if (vm->use_cpu_for_update) { 1180 /* Flush HDP */ 1181 mb(); 1182 amdgpu_asic_flush_hdp(adev, NULL); 1183 } else if (params.ib->length_dw == 0) { 1184 amdgpu_job_free(job); 1185 } else { 1186 struct amdgpu_bo *root = vm->root.base.bo; 1187 struct amdgpu_ring *ring; 1188 struct dma_fence *fence; 1189 1190 ring = container_of(vm->entity.rq->sched, struct amdgpu_ring, 1191 sched); 1192 1193 amdgpu_ring_pad_ib(ring, params.ib); 1194 amdgpu_sync_resv(adev, &job->sync, root->tbo.resv, 1195 AMDGPU_FENCE_OWNER_VM, false); 1196 WARN_ON(params.ib->length_dw > ndw); 1197 r = amdgpu_job_submit(job, &vm->entity, AMDGPU_FENCE_OWNER_VM, 1198 &fence); 1199 if (r) 1200 goto error; 1201 1202 amdgpu_bo_fence(root, fence, true); 1203 dma_fence_put(vm->last_update); 1204 vm->last_update = fence; 1205 } 1206 1207 if (!list_empty(&vm->relocated)) 1208 goto restart; 1209 1210 return 0; 1211 1212 error: 1213 amdgpu_vm_invalidate_level(adev, vm, &vm->root, 1214 adev->vm_manager.root_level); 1215 amdgpu_job_free(job); 1216 return r; 1217 } 1218 1219 /** 1220 * amdgpu_vm_find_entry - find the entry for an address 1221 * 1222 * @p: see amdgpu_pte_update_params definition 1223 * @addr: virtual address in question 1224 * @entry: resulting entry or NULL 1225 * @parent: parent entry 1226 * 1227 * Find the vm_pt entry and it's parent for the given address. 1228 */ 1229 void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr, 1230 struct amdgpu_vm_pt **entry, 1231 struct amdgpu_vm_pt **parent); 1232 void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr, 1233 struct amdgpu_vm_pt **entry, 1234 struct amdgpu_vm_pt **parent) 1235 { 1236 unsigned level = p->adev->vm_manager.root_level; 1237 1238 *parent = NULL; 1239 *entry = &p->vm->root; 1240 while ((*entry)->entries) { 1241 unsigned shift = amdgpu_vm_level_shift(p->adev, level++); 1242 1243 *parent = *entry; 1244 *entry = &(*entry)->entries[addr >> shift]; 1245 addr &= (1ULL << shift) - 1; 1246 } 1247 1248 if (level != AMDGPU_VM_PTB) 1249 *entry = NULL; 1250 } 1251 1252 /** 1253 * amdgpu_vm_handle_huge_pages - handle updating the PD with huge pages 1254 * 1255 * @p: see amdgpu_pte_update_params definition 1256 * @entry: vm_pt entry to check 1257 * @parent: parent entry 1258 * @nptes: number of PTEs updated with this operation 1259 * @dst: destination address where the PTEs should point to 1260 * @flags: access flags fro the PTEs 1261 * 1262 * Check if we can update the PD with a huge page. 1263 */ 1264 static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p, 1265 struct amdgpu_vm_pt *entry, 1266 struct amdgpu_vm_pt *parent, 1267 unsigned nptes, uint64_t dst, 1268 uint64_t flags) 1269 { 1270 uint64_t pde; 1271 1272 /* In the case of a mixed PT the PDE must point to it*/ 1273 if (p->adev->asic_type >= CHIP_VEGA10 && !p->src && 1274 nptes == AMDGPU_VM_PTE_COUNT(p->adev)) { 1275 /* Set the huge page flag to stop scanning at this PDE */ 1276 flags |= AMDGPU_PDE_PTE; 1277 } 1278 1279 if (!(flags & AMDGPU_PDE_PTE)) { 1280 if (entry->huge) { 1281 /* Add the entry to the relocated list to update it. */ 1282 entry->huge = false; 1283 list_move(&entry->base.vm_status, &p->vm->relocated); 1284 } 1285 return; 1286 } 1287 1288 entry->huge = true; 1289 amdgpu_gmc_get_vm_pde(p->adev, AMDGPU_VM_PDB0, &dst, &flags); 1290 1291 pde = (entry - parent->entries) * 8; 1292 if (parent->base.bo->shadow) 1293 p->func(p, parent->base.bo->shadow, pde, dst, 1, 0, flags); 1294 p->func(p, parent->base.bo, pde, dst, 1, 0, flags); 1295 } 1296 1297 /** 1298 * amdgpu_vm_update_ptes - make sure that page tables are valid 1299 * 1300 * @params: see amdgpu_pte_update_params definition 1301 * @start: start of GPU address range 1302 * @end: end of GPU address range 1303 * @dst: destination address to map to, the next dst inside the function 1304 * @flags: mapping flags 1305 * 1306 * Update the page tables in the range @start - @end. 1307 * 1308 * Returns: 1309 * 0 for success, -EINVAL for failure. 1310 */ 1311 static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, 1312 uint64_t start, uint64_t end, 1313 uint64_t dst, uint64_t flags) 1314 { 1315 struct amdgpu_device *adev = params->adev; 1316 const uint64_t mask = AMDGPU_VM_PTE_COUNT(adev) - 1; 1317 1318 uint64_t addr, pe_start; 1319 struct amdgpu_bo *pt; 1320 unsigned nptes; 1321 1322 /* walk over the address space and update the page tables */ 1323 for (addr = start; addr < end; addr += nptes, 1324 dst += nptes * AMDGPU_GPU_PAGE_SIZE) { 1325 struct amdgpu_vm_pt *entry, *parent; 1326 1327 amdgpu_vm_get_entry(params, addr, &entry, &parent); 1328 if (!entry) 1329 return -ENOENT; 1330 1331 if ((addr & ~mask) == (end & ~mask)) 1332 nptes = end - addr; 1333 else 1334 nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask); 1335 1336 amdgpu_vm_handle_huge_pages(params, entry, parent, 1337 nptes, dst, flags); 1338 /* We don't need to update PTEs for huge pages */ 1339 if (entry->huge) 1340 continue; 1341 1342 pt = entry->base.bo; 1343 pe_start = (addr & mask) * 8; 1344 if (pt->shadow) 1345 params->func(params, pt->shadow, pe_start, dst, nptes, 1346 AMDGPU_GPU_PAGE_SIZE, flags); 1347 params->func(params, pt, pe_start, dst, nptes, 1348 AMDGPU_GPU_PAGE_SIZE, flags); 1349 } 1350 1351 return 0; 1352 } 1353 1354 /* 1355 * amdgpu_vm_frag_ptes - add fragment information to PTEs 1356 * 1357 * @params: see amdgpu_pte_update_params definition 1358 * @vm: requested vm 1359 * @start: first PTE to handle 1360 * @end: last PTE to handle 1361 * @dst: addr those PTEs should point to 1362 * @flags: hw mapping flags 1363 * 1364 * Returns: 1365 * 0 for success, -EINVAL for failure. 1366 */ 1367 static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, 1368 uint64_t start, uint64_t end, 1369 uint64_t dst, uint64_t flags) 1370 { 1371 /** 1372 * The MC L1 TLB supports variable sized pages, based on a fragment 1373 * field in the PTE. When this field is set to a non-zero value, page 1374 * granularity is increased from 4KB to (1 << (12 + frag)). The PTE 1375 * flags are considered valid for all PTEs within the fragment range 1376 * and corresponding mappings are assumed to be physically contiguous. 1377 * 1378 * The L1 TLB can store a single PTE for the whole fragment, 1379 * significantly increasing the space available for translation 1380 * caching. This leads to large improvements in throughput when the 1381 * TLB is under pressure. 1382 * 1383 * The L2 TLB distributes small and large fragments into two 1384 * asymmetric partitions. The large fragment cache is significantly 1385 * larger. Thus, we try to use large fragments wherever possible. 1386 * Userspace can support this by aligning virtual base address and 1387 * allocation size to the fragment size. 1388 */ 1389 unsigned max_frag = params->adev->vm_manager.fragment_size; 1390 int r; 1391 1392 /* system pages are non continuously */ 1393 if (params->src || !(flags & AMDGPU_PTE_VALID)) 1394 return amdgpu_vm_update_ptes(params, start, end, dst, flags); 1395 1396 while (start != end) { 1397 uint64_t frag_flags, frag_end; 1398 unsigned frag; 1399 1400 /* This intentionally wraps around if no bit is set */ 1401 frag = min((unsigned)ffs(start) - 1, 1402 (unsigned)fls64(end - start) - 1); 1403 if (frag >= max_frag) { 1404 frag_flags = AMDGPU_PTE_FRAG(max_frag); 1405 frag_end = end & ~((1ULL << max_frag) - 1); 1406 } else { 1407 frag_flags = AMDGPU_PTE_FRAG(frag); 1408 frag_end = start + (1 << frag); 1409 } 1410 1411 r = amdgpu_vm_update_ptes(params, start, frag_end, dst, 1412 flags | frag_flags); 1413 if (r) 1414 return r; 1415 1416 dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE; 1417 start = frag_end; 1418 } 1419 1420 return 0; 1421 } 1422 1423 /** 1424 * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table 1425 * 1426 * @adev: amdgpu_device pointer 1427 * @exclusive: fence we need to sync to 1428 * @pages_addr: DMA addresses to use for mapping 1429 * @vm: requested vm 1430 * @start: start of mapped range 1431 * @last: last mapped entry 1432 * @flags: flags for the entries 1433 * @addr: addr to set the area to 1434 * @fence: optional resulting fence 1435 * 1436 * Fill in the page table entries between @start and @last. 1437 * 1438 * Returns: 1439 * 0 for success, -EINVAL for failure. 1440 */ 1441 static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, 1442 struct dma_fence *exclusive, 1443 dma_addr_t *pages_addr, 1444 struct amdgpu_vm *vm, 1445 uint64_t start, uint64_t last, 1446 uint64_t flags, uint64_t addr, 1447 struct dma_fence **fence) 1448 { 1449 struct amdgpu_ring *ring; 1450 void *owner = AMDGPU_FENCE_OWNER_VM; 1451 unsigned nptes, ncmds, ndw; 1452 struct amdgpu_job *job; 1453 struct amdgpu_pte_update_params params; 1454 struct dma_fence *f = NULL; 1455 int r; 1456 1457 memset(¶ms, 0, sizeof(params)); 1458 params.adev = adev; 1459 params.vm = vm; 1460 1461 /* sync to everything on unmapping */ 1462 if (!(flags & AMDGPU_PTE_VALID)) 1463 owner = AMDGPU_FENCE_OWNER_UNDEFINED; 1464 1465 if (vm->use_cpu_for_update) { 1466 /* params.src is used as flag to indicate system Memory */ 1467 if (pages_addr) 1468 params.src = ~0; 1469 1470 /* Wait for PT BOs to be free. PTs share the same resv. object 1471 * as the root PD BO 1472 */ 1473 r = amdgpu_vm_wait_pd(adev, vm, owner); 1474 if (unlikely(r)) 1475 return r; 1476 1477 params.func = amdgpu_vm_cpu_set_ptes; 1478 params.pages_addr = pages_addr; 1479 return amdgpu_vm_frag_ptes(¶ms, start, last + 1, 1480 addr, flags); 1481 } 1482 1483 ring = container_of(vm->entity.rq->sched, struct amdgpu_ring, sched); 1484 1485 nptes = last - start + 1; 1486 1487 /* 1488 * reserve space for two commands every (1 << BLOCK_SIZE) 1489 * entries or 2k dwords (whatever is smaller) 1490 * 1491 * The second command is for the shadow pagetables. 1492 */ 1493 if (vm->root.base.bo->shadow) 1494 ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1) * 2; 1495 else 1496 ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1); 1497 1498 /* padding, etc. */ 1499 ndw = 64; 1500 1501 if (pages_addr) { 1502 /* copy commands needed */ 1503 ndw += ncmds * adev->vm_manager.vm_pte_funcs->copy_pte_num_dw; 1504 1505 /* and also PTEs */ 1506 ndw += nptes * 2; 1507 1508 params.func = amdgpu_vm_do_copy_ptes; 1509 1510 } else { 1511 /* set page commands needed */ 1512 ndw += ncmds * 10; 1513 1514 /* extra commands for begin/end fragments */ 1515 if (vm->root.base.bo->shadow) 1516 ndw += 2 * 10 * adev->vm_manager.fragment_size * 2; 1517 else 1518 ndw += 2 * 10 * adev->vm_manager.fragment_size; 1519 1520 params.func = amdgpu_vm_do_set_ptes; 1521 } 1522 1523 r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job); 1524 if (r) 1525 return r; 1526 1527 params.ib = &job->ibs[0]; 1528 1529 if (pages_addr) { 1530 uint64_t *pte; 1531 unsigned i; 1532 1533 /* Put the PTEs at the end of the IB. */ 1534 i = ndw - nptes * 2; 1535 pte= (uint64_t *)&(job->ibs->ptr[i]); 1536 params.src = job->ibs->gpu_addr + i * 4; 1537 1538 for (i = 0; i < nptes; ++i) { 1539 pte[i] = amdgpu_vm_map_gart(pages_addr, addr + i * 1540 AMDGPU_GPU_PAGE_SIZE); 1541 pte[i] |= flags; 1542 } 1543 addr = 0; 1544 } 1545 1546 r = amdgpu_sync_fence(adev, &job->sync, exclusive, false); 1547 if (r) 1548 goto error_free; 1549 1550 r = amdgpu_sync_resv(adev, &job->sync, vm->root.base.bo->tbo.resv, 1551 owner, false); 1552 if (r) 1553 goto error_free; 1554 1555 r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv); 1556 if (r) 1557 goto error_free; 1558 1559 r = amdgpu_vm_frag_ptes(¶ms, start, last + 1, addr, flags); 1560 if (r) 1561 goto error_free; 1562 1563 amdgpu_ring_pad_ib(ring, params.ib); 1564 WARN_ON(params.ib->length_dw > ndw); 1565 r = amdgpu_job_submit(job, &vm->entity, AMDGPU_FENCE_OWNER_VM, &f); 1566 if (r) 1567 goto error_free; 1568 1569 amdgpu_bo_fence(vm->root.base.bo, f, true); 1570 dma_fence_put(*fence); 1571 *fence = f; 1572 return 0; 1573 1574 error_free: 1575 amdgpu_job_free(job); 1576 return r; 1577 } 1578 1579 /** 1580 * amdgpu_vm_bo_split_mapping - split a mapping into smaller chunks 1581 * 1582 * @adev: amdgpu_device pointer 1583 * @exclusive: fence we need to sync to 1584 * @pages_addr: DMA addresses to use for mapping 1585 * @vm: requested vm 1586 * @mapping: mapped range and flags to use for the update 1587 * @flags: HW flags for the mapping 1588 * @nodes: array of drm_mm_nodes with the MC addresses 1589 * @fence: optional resulting fence 1590 * 1591 * Split the mapping into smaller chunks so that each update fits 1592 * into a SDMA IB. 1593 * 1594 * Returns: 1595 * 0 for success, -EINVAL for failure. 1596 */ 1597 static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, 1598 struct dma_fence *exclusive, 1599 dma_addr_t *pages_addr, 1600 struct amdgpu_vm *vm, 1601 struct amdgpu_bo_va_mapping *mapping, 1602 uint64_t flags, 1603 struct drm_mm_node *nodes, 1604 struct dma_fence **fence) 1605 { 1606 unsigned min_linear_pages = 1 << adev->vm_manager.fragment_size; 1607 uint64_t pfn, start = mapping->start; 1608 int r; 1609 1610 /* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here 1611 * but in case of something, we filter the flags in first place 1612 */ 1613 if (!(mapping->flags & AMDGPU_PTE_READABLE)) 1614 flags &= ~AMDGPU_PTE_READABLE; 1615 if (!(mapping->flags & AMDGPU_PTE_WRITEABLE)) 1616 flags &= ~AMDGPU_PTE_WRITEABLE; 1617 1618 flags &= ~AMDGPU_PTE_EXECUTABLE; 1619 flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE; 1620 1621 flags &= ~AMDGPU_PTE_MTYPE_MASK; 1622 flags |= (mapping->flags & AMDGPU_PTE_MTYPE_MASK); 1623 1624 if ((mapping->flags & AMDGPU_PTE_PRT) && 1625 (adev->asic_type >= CHIP_VEGA10)) { 1626 flags |= AMDGPU_PTE_PRT; 1627 flags &= ~AMDGPU_PTE_VALID; 1628 } 1629 1630 trace_amdgpu_vm_bo_update(mapping); 1631 1632 pfn = mapping->offset >> PAGE_SHIFT; 1633 if (nodes) { 1634 while (pfn >= nodes->size) { 1635 pfn -= nodes->size; 1636 ++nodes; 1637 } 1638 } 1639 1640 do { 1641 dma_addr_t *dma_addr = NULL; 1642 uint64_t max_entries; 1643 uint64_t addr, last; 1644 1645 if (nodes) { 1646 addr = nodes->start << PAGE_SHIFT; 1647 max_entries = (nodes->size - pfn) * 1648 AMDGPU_GPU_PAGES_IN_CPU_PAGE; 1649 } else { 1650 addr = 0; 1651 max_entries = S64_MAX; 1652 } 1653 1654 if (pages_addr) { 1655 uint64_t count; 1656 1657 max_entries = min(max_entries, 16ull * 1024ull); 1658 for (count = 1; 1659 count < max_entries / AMDGPU_GPU_PAGES_IN_CPU_PAGE; 1660 ++count) { 1661 uint64_t idx = pfn + count; 1662 1663 if (pages_addr[idx] != 1664 (pages_addr[idx - 1] + PAGE_SIZE)) 1665 break; 1666 } 1667 1668 if (count < min_linear_pages) { 1669 addr = pfn << PAGE_SHIFT; 1670 dma_addr = pages_addr; 1671 } else { 1672 addr = pages_addr[pfn]; 1673 max_entries = count * AMDGPU_GPU_PAGES_IN_CPU_PAGE; 1674 } 1675 1676 } else if (flags & AMDGPU_PTE_VALID) { 1677 addr += adev->vm_manager.vram_base_offset; 1678 addr += pfn << PAGE_SHIFT; 1679 } 1680 1681 last = min((uint64_t)mapping->last, start + max_entries - 1); 1682 r = amdgpu_vm_bo_update_mapping(adev, exclusive, dma_addr, vm, 1683 start, last, flags, addr, 1684 fence); 1685 if (r) 1686 return r; 1687 1688 pfn += (last - start + 1) / AMDGPU_GPU_PAGES_IN_CPU_PAGE; 1689 if (nodes && nodes->size == pfn) { 1690 pfn = 0; 1691 ++nodes; 1692 } 1693 start = last + 1; 1694 1695 } while (unlikely(start != mapping->last + 1)); 1696 1697 return 0; 1698 } 1699 1700 /** 1701 * amdgpu_vm_bo_update - update all BO mappings in the vm page table 1702 * 1703 * @adev: amdgpu_device pointer 1704 * @bo_va: requested BO and VM object 1705 * @clear: if true clear the entries 1706 * 1707 * Fill in the page table entries for @bo_va. 1708 * 1709 * Returns: 1710 * 0 for success, -EINVAL for failure. 1711 */ 1712 int amdgpu_vm_bo_update(struct amdgpu_device *adev, 1713 struct amdgpu_bo_va *bo_va, 1714 bool clear) 1715 { 1716 struct amdgpu_bo *bo = bo_va->base.bo; 1717 struct amdgpu_vm *vm = bo_va->base.vm; 1718 struct amdgpu_bo_va_mapping *mapping; 1719 dma_addr_t *pages_addr = NULL; 1720 struct ttm_mem_reg *mem; 1721 struct drm_mm_node *nodes; 1722 struct dma_fence *exclusive, **last_update; 1723 uint64_t flags; 1724 int r; 1725 1726 if (clear || !bo) { 1727 mem = NULL; 1728 nodes = NULL; 1729 exclusive = NULL; 1730 } else { 1731 struct ttm_dma_tt *ttm; 1732 1733 mem = &bo->tbo.mem; 1734 nodes = mem->mm_node; 1735 if (mem->mem_type == TTM_PL_TT) { 1736 ttm = container_of(bo->tbo.ttm, struct ttm_dma_tt, ttm); 1737 pages_addr = ttm->dma_address; 1738 } 1739 exclusive = reservation_object_get_excl(bo->tbo.resv); 1740 } 1741 1742 if (bo) 1743 flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem); 1744 else 1745 flags = 0x0; 1746 1747 if (clear || (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv)) 1748 last_update = &vm->last_update; 1749 else 1750 last_update = &bo_va->last_pt_update; 1751 1752 if (!clear && bo_va->base.moved) { 1753 bo_va->base.moved = false; 1754 list_splice_init(&bo_va->valids, &bo_va->invalids); 1755 1756 } else if (bo_va->cleared != clear) { 1757 list_splice_init(&bo_va->valids, &bo_va->invalids); 1758 } 1759 1760 list_for_each_entry(mapping, &bo_va->invalids, list) { 1761 r = amdgpu_vm_bo_split_mapping(adev, exclusive, pages_addr, vm, 1762 mapping, flags, nodes, 1763 last_update); 1764 if (r) 1765 return r; 1766 } 1767 1768 if (vm->use_cpu_for_update) { 1769 /* Flush HDP */ 1770 mb(); 1771 amdgpu_asic_flush_hdp(adev, NULL); 1772 } 1773 1774 lockmgr(&vm->moved_lock, LK_EXCLUSIVE); 1775 list_del_init(&bo_va->base.vm_status); 1776 lockmgr(&vm->moved_lock, LK_RELEASE); 1777 1778 /* If the BO is not in its preferred location add it back to 1779 * the evicted list so that it gets validated again on the 1780 * next command submission. 1781 */ 1782 if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv) { 1783 uint32_t mem_type = bo->tbo.mem.mem_type; 1784 1785 if (!(bo->preferred_domains & amdgpu_mem_type_to_domain(mem_type))) 1786 list_add_tail(&bo_va->base.vm_status, &vm->evicted); 1787 else 1788 list_add(&bo_va->base.vm_status, &vm->idle); 1789 } 1790 1791 list_splice_init(&bo_va->invalids, &bo_va->valids); 1792 bo_va->cleared = clear; 1793 1794 #if 0 1795 if (trace_amdgpu_vm_bo_mapping_enabled()) { 1796 list_for_each_entry(mapping, &bo_va->valids, list) 1797 trace_amdgpu_vm_bo_mapping(mapping); 1798 } 1799 #endif 1800 1801 return 0; 1802 } 1803 1804 /** 1805 * amdgpu_vm_update_prt_state - update the global PRT state 1806 * 1807 * @adev: amdgpu_device pointer 1808 */ 1809 static void amdgpu_vm_update_prt_state(struct amdgpu_device *adev) 1810 { 1811 unsigned long flags; 1812 bool enable; 1813 1814 spin_lock_irqsave(&adev->vm_manager.prt_lock, flags); 1815 enable = !!atomic_read(&adev->vm_manager.num_prt_users); 1816 adev->gmc.gmc_funcs->set_prt(adev, enable); 1817 spin_unlock_irqrestore(&adev->vm_manager.prt_lock, flags); 1818 } 1819 1820 /** 1821 * amdgpu_vm_prt_get - add a PRT user 1822 * 1823 * @adev: amdgpu_device pointer 1824 */ 1825 static void amdgpu_vm_prt_get(struct amdgpu_device *adev) 1826 { 1827 if (!adev->gmc.gmc_funcs->set_prt) 1828 return; 1829 1830 if (atomic_inc_return(&adev->vm_manager.num_prt_users) == 1) 1831 amdgpu_vm_update_prt_state(adev); 1832 } 1833 1834 /** 1835 * amdgpu_vm_prt_put - drop a PRT user 1836 * 1837 * @adev: amdgpu_device pointer 1838 */ 1839 static void amdgpu_vm_prt_put(struct amdgpu_device *adev) 1840 { 1841 if (atomic_dec_return(&adev->vm_manager.num_prt_users) == 0) 1842 amdgpu_vm_update_prt_state(adev); 1843 } 1844 1845 /** 1846 * amdgpu_vm_prt_cb - callback for updating the PRT status 1847 * 1848 * @fence: fence for the callback 1849 * @_cb: the callback function 1850 */ 1851 static void amdgpu_vm_prt_cb(struct dma_fence *fence, struct dma_fence_cb *_cb) 1852 { 1853 struct amdgpu_prt_cb *cb = container_of(_cb, struct amdgpu_prt_cb, cb); 1854 1855 amdgpu_vm_prt_put(cb->adev); 1856 kfree(cb); 1857 } 1858 1859 /** 1860 * amdgpu_vm_add_prt_cb - add callback for updating the PRT status 1861 * 1862 * @adev: amdgpu_device pointer 1863 * @fence: fence for the callback 1864 */ 1865 static void amdgpu_vm_add_prt_cb(struct amdgpu_device *adev, 1866 struct dma_fence *fence) 1867 { 1868 struct amdgpu_prt_cb *cb; 1869 1870 if (!adev->gmc.gmc_funcs->set_prt) 1871 return; 1872 1873 cb = kmalloc(sizeof(struct amdgpu_prt_cb), M_DRM, GFP_KERNEL); 1874 if (!cb) { 1875 /* Last resort when we are OOM */ 1876 if (fence) 1877 dma_fence_wait(fence, false); 1878 1879 amdgpu_vm_prt_put(adev); 1880 } else { 1881 cb->adev = adev; 1882 if (!fence || dma_fence_add_callback(fence, &cb->cb, 1883 amdgpu_vm_prt_cb)) 1884 amdgpu_vm_prt_cb(fence, &cb->cb); 1885 } 1886 } 1887 1888 /** 1889 * amdgpu_vm_free_mapping - free a mapping 1890 * 1891 * @adev: amdgpu_device pointer 1892 * @vm: requested vm 1893 * @mapping: mapping to be freed 1894 * @fence: fence of the unmap operation 1895 * 1896 * Free a mapping and make sure we decrease the PRT usage count if applicable. 1897 */ 1898 static void amdgpu_vm_free_mapping(struct amdgpu_device *adev, 1899 struct amdgpu_vm *vm, 1900 struct amdgpu_bo_va_mapping *mapping, 1901 struct dma_fence *fence) 1902 { 1903 if (mapping->flags & AMDGPU_PTE_PRT) 1904 amdgpu_vm_add_prt_cb(adev, fence); 1905 kfree(mapping); 1906 } 1907 1908 /** 1909 * amdgpu_vm_prt_fini - finish all prt mappings 1910 * 1911 * @adev: amdgpu_device pointer 1912 * @vm: requested vm 1913 * 1914 * Register a cleanup callback to disable PRT support after VM dies. 1915 */ 1916 static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) 1917 { 1918 struct reservation_object *resv = vm->root.base.bo->tbo.resv; 1919 struct dma_fence *excl, **shared; 1920 unsigned i, shared_count; 1921 int r; 1922 1923 r = reservation_object_get_fences_rcu(resv, &excl, 1924 &shared_count, &shared); 1925 if (r) { 1926 /* Not enough memory to grab the fence list, as last resort 1927 * block for all the fences to complete. 1928 */ 1929 reservation_object_wait_timeout_rcu(resv, true, false, 1930 MAX_SCHEDULE_TIMEOUT); 1931 return; 1932 } 1933 1934 /* Add a callback for each fence in the reservation object */ 1935 amdgpu_vm_prt_get(adev); 1936 amdgpu_vm_add_prt_cb(adev, excl); 1937 1938 for (i = 0; i < shared_count; ++i) { 1939 amdgpu_vm_prt_get(adev); 1940 amdgpu_vm_add_prt_cb(adev, shared[i]); 1941 } 1942 1943 kfree(shared); 1944 } 1945 1946 /** 1947 * amdgpu_vm_clear_freed - clear freed BOs in the PT 1948 * 1949 * @adev: amdgpu_device pointer 1950 * @vm: requested vm 1951 * @fence: optional resulting fence (unchanged if no work needed to be done 1952 * or if an error occurred) 1953 * 1954 * Make sure all freed BOs are cleared in the PT. 1955 * PTs have to be reserved and mutex must be locked! 1956 * 1957 * Returns: 1958 * 0 for success. 1959 * 1960 */ 1961 int amdgpu_vm_clear_freed(struct amdgpu_device *adev, 1962 struct amdgpu_vm *vm, 1963 struct dma_fence **fence) 1964 { 1965 struct amdgpu_bo_va_mapping *mapping; 1966 uint64_t init_pte_value = 0; 1967 struct dma_fence *f = NULL; 1968 int r; 1969 1970 while (!list_empty(&vm->freed)) { 1971 mapping = list_first_entry(&vm->freed, 1972 struct amdgpu_bo_va_mapping, list); 1973 list_del(&mapping->list); 1974 1975 if (vm->pte_support_ats && mapping->start < AMDGPU_VA_HOLE_START) 1976 init_pte_value = AMDGPU_PTE_DEFAULT_ATC; 1977 1978 r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm, 1979 mapping->start, mapping->last, 1980 init_pte_value, 0, &f); 1981 amdgpu_vm_free_mapping(adev, vm, mapping, f); 1982 if (r) { 1983 dma_fence_put(f); 1984 return r; 1985 } 1986 } 1987 1988 if (fence && f) { 1989 dma_fence_put(*fence); 1990 *fence = f; 1991 } else { 1992 dma_fence_put(f); 1993 } 1994 1995 return 0; 1996 1997 } 1998 1999 /** 2000 * amdgpu_vm_handle_moved - handle moved BOs in the PT 2001 * 2002 * @adev: amdgpu_device pointer 2003 * @vm: requested vm 2004 * 2005 * Make sure all BOs which are moved are updated in the PTs. 2006 * 2007 * Returns: 2008 * 0 for success. 2009 * 2010 * PTs have to be reserved! 2011 */ 2012 int amdgpu_vm_handle_moved(struct amdgpu_device *adev, 2013 struct amdgpu_vm *vm) 2014 { 2015 struct amdgpu_bo_va *bo_va, *tmp; 2016 struct list_head moved; 2017 bool clear; 2018 int r; 2019 2020 INIT_LIST_HEAD(&moved); 2021 lockmgr(&vm->moved_lock, LK_EXCLUSIVE); 2022 list_splice_init(&vm->moved, &moved); 2023 lockmgr(&vm->moved_lock, LK_RELEASE); 2024 2025 list_for_each_entry_safe(bo_va, tmp, &moved, base.vm_status) { 2026 struct reservation_object *resv = bo_va->base.bo->tbo.resv; 2027 2028 /* Per VM BOs never need to bo cleared in the page tables */ 2029 if (resv == vm->root.base.bo->tbo.resv) 2030 clear = false; 2031 /* Try to reserve the BO to avoid clearing its ptes */ 2032 else if (!amdgpu_vm_debug && reservation_object_trylock(resv)) 2033 clear = false; 2034 /* Somebody else is using the BO right now */ 2035 else 2036 clear = true; 2037 2038 r = amdgpu_vm_bo_update(adev, bo_va, clear); 2039 if (r) { 2040 lockmgr(&vm->moved_lock, LK_EXCLUSIVE); 2041 list_splice(&moved, &vm->moved); 2042 lockmgr(&vm->moved_lock, LK_RELEASE); 2043 return r; 2044 } 2045 2046 if (!clear && resv != vm->root.base.bo->tbo.resv) 2047 reservation_object_unlock(resv); 2048 2049 } 2050 2051 return 0; 2052 } 2053 2054 /** 2055 * amdgpu_vm_bo_add - add a bo to a specific vm 2056 * 2057 * @adev: amdgpu_device pointer 2058 * @vm: requested vm 2059 * @bo: amdgpu buffer object 2060 * 2061 * Add @bo into the requested vm. 2062 * Add @bo to the list of bos associated with the vm 2063 * 2064 * Returns: 2065 * Newly added bo_va or NULL for failure 2066 * 2067 * Object has to be reserved! 2068 */ 2069 struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev, 2070 struct amdgpu_vm *vm, 2071 struct amdgpu_bo *bo) 2072 { 2073 struct amdgpu_bo_va *bo_va; 2074 2075 bo_va = kzalloc(sizeof(struct amdgpu_bo_va), GFP_KERNEL); 2076 if (bo_va == NULL) { 2077 return NULL; 2078 } 2079 amdgpu_vm_bo_base_init(&bo_va->base, vm, bo); 2080 2081 bo_va->ref_count = 1; 2082 INIT_LIST_HEAD(&bo_va->valids); 2083 INIT_LIST_HEAD(&bo_va->invalids); 2084 2085 return bo_va; 2086 } 2087 2088 2089 /** 2090 * amdgpu_vm_bo_insert_mapping - insert a new mapping 2091 * 2092 * @adev: amdgpu_device pointer 2093 * @bo_va: bo_va to store the address 2094 * @mapping: the mapping to insert 2095 * 2096 * Insert a new mapping into all structures. 2097 */ 2098 static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev, 2099 struct amdgpu_bo_va *bo_va, 2100 struct amdgpu_bo_va_mapping *mapping) 2101 { 2102 struct amdgpu_vm *vm = bo_va->base.vm; 2103 struct amdgpu_bo *bo = bo_va->base.bo; 2104 2105 mapping->bo_va = bo_va; 2106 list_add(&mapping->list, &bo_va->invalids); 2107 amdgpu_vm_it_insert(mapping, &vm->va); 2108 2109 if (mapping->flags & AMDGPU_PTE_PRT) 2110 amdgpu_vm_prt_get(adev); 2111 2112 if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv && 2113 !bo_va->base.moved) { 2114 lockmgr(&vm->moved_lock, LK_EXCLUSIVE); 2115 list_move(&bo_va->base.vm_status, &vm->moved); 2116 lockmgr(&vm->moved_lock, LK_RELEASE); 2117 } 2118 #if 0 2119 trace_amdgpu_vm_bo_map(bo_va, mapping); 2120 #endif 2121 } 2122 2123 /** 2124 * amdgpu_vm_bo_map - map bo inside a vm 2125 * 2126 * @adev: amdgpu_device pointer 2127 * @bo_va: bo_va to store the address 2128 * @saddr: where to map the BO 2129 * @offset: requested offset in the BO 2130 * @size: BO size in bytes 2131 * @flags: attributes of pages (read/write/valid/etc.) 2132 * 2133 * Add a mapping of the BO at the specefied addr into the VM. 2134 * 2135 * Returns: 2136 * 0 for success, error for failure. 2137 * 2138 * Object has to be reserved and unreserved outside! 2139 */ 2140 int amdgpu_vm_bo_map(struct amdgpu_device *adev, 2141 struct amdgpu_bo_va *bo_va, 2142 uint64_t saddr, uint64_t offset, 2143 uint64_t size, uint64_t flags) 2144 { 2145 struct amdgpu_bo_va_mapping *mapping, *tmp; 2146 struct amdgpu_bo *bo = bo_va->base.bo; 2147 struct amdgpu_vm *vm = bo_va->base.vm; 2148 uint64_t eaddr; 2149 2150 /* validate the parameters */ 2151 if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK || 2152 size == 0 || size & ~PAGE_MASK) 2153 return -EINVAL; 2154 2155 /* make sure object fit at this offset */ 2156 eaddr = saddr + size - 1; 2157 if (saddr >= eaddr || 2158 (bo && offset + size > amdgpu_bo_size(bo))) 2159 return -EINVAL; 2160 2161 saddr /= AMDGPU_GPU_PAGE_SIZE; 2162 eaddr /= AMDGPU_GPU_PAGE_SIZE; 2163 2164 tmp = amdgpu_vm_it_iter_first(&vm->va, saddr, eaddr); 2165 if (tmp) { 2166 /* bo and tmp overlap, invalid addr */ 2167 dev_err(adev->dev, "bo %p va 0x%010lx-0x%010lx conflict with " 2168 "0x%010lx-0x%010lx\n", bo, saddr, eaddr, 2169 tmp->start, tmp->last + 1); 2170 return -EINVAL; 2171 } 2172 2173 mapping = kmalloc(sizeof(*mapping), M_DRM, GFP_KERNEL); 2174 if (!mapping) 2175 return -ENOMEM; 2176 2177 mapping->start = saddr; 2178 mapping->last = eaddr; 2179 mapping->offset = offset; 2180 mapping->flags = flags; 2181 2182 amdgpu_vm_bo_insert_map(adev, bo_va, mapping); 2183 2184 return 0; 2185 } 2186 2187 /** 2188 * amdgpu_vm_bo_replace_map - map bo inside a vm, replacing existing mappings 2189 * 2190 * @adev: amdgpu_device pointer 2191 * @bo_va: bo_va to store the address 2192 * @saddr: where to map the BO 2193 * @offset: requested offset in the BO 2194 * @size: BO size in bytes 2195 * @flags: attributes of pages (read/write/valid/etc.) 2196 * 2197 * Add a mapping of the BO at the specefied addr into the VM. Replace existing 2198 * mappings as we do so. 2199 * 2200 * Returns: 2201 * 0 for success, error for failure. 2202 * 2203 * Object has to be reserved and unreserved outside! 2204 */ 2205 int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, 2206 struct amdgpu_bo_va *bo_va, 2207 uint64_t saddr, uint64_t offset, 2208 uint64_t size, uint64_t flags) 2209 { 2210 struct amdgpu_bo_va_mapping *mapping; 2211 struct amdgpu_bo *bo = bo_va->base.bo; 2212 uint64_t eaddr; 2213 int r; 2214 2215 /* validate the parameters */ 2216 if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK || 2217 size == 0 || size & ~PAGE_MASK) 2218 return -EINVAL; 2219 2220 /* make sure object fit at this offset */ 2221 eaddr = saddr + size - 1; 2222 if (saddr >= eaddr || 2223 (bo && offset + size > amdgpu_bo_size(bo))) 2224 return -EINVAL; 2225 2226 /* Allocate all the needed memory */ 2227 mapping = kmalloc(sizeof(*mapping), M_DRM, GFP_KERNEL); 2228 if (!mapping) 2229 return -ENOMEM; 2230 2231 r = amdgpu_vm_bo_clear_mappings(adev, bo_va->base.vm, saddr, size); 2232 if (r) { 2233 kfree(mapping); 2234 return r; 2235 } 2236 2237 saddr /= AMDGPU_GPU_PAGE_SIZE; 2238 eaddr /= AMDGPU_GPU_PAGE_SIZE; 2239 2240 mapping->start = saddr; 2241 mapping->last = eaddr; 2242 mapping->offset = offset; 2243 mapping->flags = flags; 2244 2245 amdgpu_vm_bo_insert_map(adev, bo_va, mapping); 2246 2247 return 0; 2248 } 2249 2250 /** 2251 * amdgpu_vm_bo_unmap - remove bo mapping from vm 2252 * 2253 * @adev: amdgpu_device pointer 2254 * @bo_va: bo_va to remove the address from 2255 * @saddr: where to the BO is mapped 2256 * 2257 * Remove a mapping of the BO at the specefied addr from the VM. 2258 * 2259 * Returns: 2260 * 0 for success, error for failure. 2261 * 2262 * Object has to be reserved and unreserved outside! 2263 */ 2264 int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, 2265 struct amdgpu_bo_va *bo_va, 2266 uint64_t saddr) 2267 { 2268 struct amdgpu_bo_va_mapping *mapping; 2269 struct amdgpu_vm *vm = bo_va->base.vm; 2270 bool valid = true; 2271 2272 saddr /= AMDGPU_GPU_PAGE_SIZE; 2273 2274 list_for_each_entry(mapping, &bo_va->valids, list) { 2275 if (mapping->start == saddr) 2276 break; 2277 } 2278 2279 if (&mapping->list == &bo_va->valids) { 2280 valid = false; 2281 2282 list_for_each_entry(mapping, &bo_va->invalids, list) { 2283 if (mapping->start == saddr) 2284 break; 2285 } 2286 2287 if (&mapping->list == &bo_va->invalids) 2288 return -ENOENT; 2289 } 2290 2291 list_del(&mapping->list); 2292 amdgpu_vm_it_remove(mapping, &vm->va); 2293 mapping->bo_va = NULL; 2294 trace_amdgpu_vm_bo_unmap(bo_va, mapping); 2295 2296 if (valid) 2297 list_add(&mapping->list, &vm->freed); 2298 else 2299 amdgpu_vm_free_mapping(adev, vm, mapping, 2300 bo_va->last_pt_update); 2301 2302 return 0; 2303 } 2304 2305 /** 2306 * amdgpu_vm_bo_clear_mappings - remove all mappings in a specific range 2307 * 2308 * @adev: amdgpu_device pointer 2309 * @vm: VM structure to use 2310 * @saddr: start of the range 2311 * @size: size of the range 2312 * 2313 * Remove all mappings in a range, split them as appropriate. 2314 * 2315 * Returns: 2316 * 0 for success, error for failure. 2317 */ 2318 int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, 2319 struct amdgpu_vm *vm, 2320 uint64_t saddr, uint64_t size) 2321 { 2322 struct amdgpu_bo_va_mapping *before, *after, *tmp, *next; 2323 DRM_LIST_HEAD(removed); 2324 uint64_t eaddr; 2325 2326 eaddr = saddr + size - 1; 2327 saddr /= AMDGPU_GPU_PAGE_SIZE; 2328 eaddr /= AMDGPU_GPU_PAGE_SIZE; 2329 2330 /* Allocate all the needed memory */ 2331 before = kzalloc(sizeof(*before), GFP_KERNEL); 2332 if (!before) 2333 return -ENOMEM; 2334 INIT_LIST_HEAD(&before->list); 2335 2336 after = kzalloc(sizeof(*after), GFP_KERNEL); 2337 if (!after) { 2338 kfree(before); 2339 return -ENOMEM; 2340 } 2341 INIT_LIST_HEAD(&after->list); 2342 2343 /* Now gather all removed mappings */ 2344 tmp = amdgpu_vm_it_iter_first(&vm->va, saddr, eaddr); 2345 while (tmp) { 2346 /* Remember mapping split at the start */ 2347 if (tmp->start < saddr) { 2348 before->start = tmp->start; 2349 before->last = saddr - 1; 2350 before->offset = tmp->offset; 2351 before->flags = tmp->flags; 2352 before->bo_va = tmp->bo_va; 2353 list_add(&before->list, &tmp->bo_va->invalids); 2354 } 2355 2356 /* Remember mapping split at the end */ 2357 if (tmp->last > eaddr) { 2358 after->start = eaddr + 1; 2359 after->last = tmp->last; 2360 after->offset = tmp->offset; 2361 after->offset += (after->start - tmp->start) << PAGE_SHIFT; 2362 after->flags = tmp->flags; 2363 after->bo_va = tmp->bo_va; 2364 list_add(&after->list, &tmp->bo_va->invalids); 2365 } 2366 2367 list_del(&tmp->list); 2368 list_add(&tmp->list, &removed); 2369 2370 tmp = amdgpu_vm_it_iter_next(tmp, saddr, eaddr); 2371 } 2372 2373 /* And free them up */ 2374 list_for_each_entry_safe(tmp, next, &removed, list) { 2375 amdgpu_vm_it_remove(tmp, &vm->va); 2376 list_del(&tmp->list); 2377 2378 if (tmp->start < saddr) 2379 tmp->start = saddr; 2380 if (tmp->last > eaddr) 2381 tmp->last = eaddr; 2382 2383 tmp->bo_va = NULL; 2384 list_add(&tmp->list, &vm->freed); 2385 trace_amdgpu_vm_bo_unmap(NULL, tmp); 2386 } 2387 2388 /* Insert partial mapping before the range */ 2389 if (!list_empty(&before->list)) { 2390 amdgpu_vm_it_insert(before, &vm->va); 2391 if (before->flags & AMDGPU_PTE_PRT) 2392 amdgpu_vm_prt_get(adev); 2393 } else { 2394 kfree(before); 2395 } 2396 2397 /* Insert partial mapping after the range */ 2398 if (!list_empty(&after->list)) { 2399 amdgpu_vm_it_insert(after, &vm->va); 2400 if (after->flags & AMDGPU_PTE_PRT) 2401 amdgpu_vm_prt_get(adev); 2402 } else { 2403 kfree(after); 2404 } 2405 2406 return 0; 2407 } 2408 2409 /** 2410 * amdgpu_vm_bo_lookup_mapping - find mapping by address 2411 * 2412 * @vm: the requested VM 2413 * @addr: the address 2414 * 2415 * Find a mapping by it's address. 2416 * 2417 * Returns: 2418 * The amdgpu_bo_va_mapping matching for addr or NULL 2419 * 2420 */ 2421 struct amdgpu_bo_va_mapping *amdgpu_vm_bo_lookup_mapping(struct amdgpu_vm *vm, 2422 uint64_t addr) 2423 { 2424 return amdgpu_vm_it_iter_first(&vm->va, addr, addr); 2425 } 2426 2427 /** 2428 * amdgpu_vm_bo_trace_cs - trace all reserved mappings 2429 * 2430 * @vm: the requested vm 2431 * @ticket: CS ticket 2432 * 2433 * Trace all mappings of BOs reserved during a command submission. 2434 */ 2435 void amdgpu_vm_bo_trace_cs(struct amdgpu_vm *vm, struct ww_acquire_ctx *ticket) 2436 { 2437 #if 0 2438 struct amdgpu_bo_va_mapping *mapping; 2439 2440 if (!trace_amdgpu_vm_bo_cs_enabled()) 2441 return; 2442 2443 for (mapping = amdgpu_vm_it_iter_first(&vm->va, 0, U64_MAX); mapping; 2444 mapping = amdgpu_vm_it_iter_next(mapping, 0, U64_MAX)) { 2445 if (mapping->bo_va && mapping->bo_va->base.bo) { 2446 struct amdgpu_bo *bo; 2447 2448 bo = mapping->bo_va->base.bo; 2449 if (READ_ONCE(bo->tbo.resv->lock.ctx) != ticket) 2450 continue; 2451 } 2452 2453 trace_amdgpu_vm_bo_cs(mapping); 2454 } 2455 #endif 2456 } 2457 2458 /** 2459 * amdgpu_vm_bo_rmv - remove a bo to a specific vm 2460 * 2461 * @adev: amdgpu_device pointer 2462 * @bo_va: requested bo_va 2463 * 2464 * Remove @bo_va->bo from the requested vm. 2465 * 2466 * Object have to be reserved! 2467 */ 2468 void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, 2469 struct amdgpu_bo_va *bo_va) 2470 { 2471 struct amdgpu_bo_va_mapping *mapping, *next; 2472 struct amdgpu_vm *vm = bo_va->base.vm; 2473 2474 list_del(&bo_va->base.bo_list); 2475 2476 lockmgr(&vm->moved_lock, LK_EXCLUSIVE); 2477 list_del(&bo_va->base.vm_status); 2478 lockmgr(&vm->moved_lock, LK_RELEASE); 2479 2480 list_for_each_entry_safe(mapping, next, &bo_va->valids, list) { 2481 list_del(&mapping->list); 2482 amdgpu_vm_it_remove(mapping, &vm->va); 2483 mapping->bo_va = NULL; 2484 trace_amdgpu_vm_bo_unmap(bo_va, mapping); 2485 list_add(&mapping->list, &vm->freed); 2486 } 2487 list_for_each_entry_safe(mapping, next, &bo_va->invalids, list) { 2488 list_del(&mapping->list); 2489 amdgpu_vm_it_remove(mapping, &vm->va); 2490 amdgpu_vm_free_mapping(adev, vm, mapping, 2491 bo_va->last_pt_update); 2492 } 2493 2494 dma_fence_put(bo_va->last_pt_update); 2495 kfree(bo_va); 2496 } 2497 2498 /** 2499 * amdgpu_vm_bo_invalidate - mark the bo as invalid 2500 * 2501 * @adev: amdgpu_device pointer 2502 * @bo: amdgpu buffer object 2503 * @evicted: is the BO evicted 2504 * 2505 * Mark @bo as invalid. 2506 */ 2507 void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, 2508 struct amdgpu_bo *bo, bool evicted) 2509 { 2510 struct amdgpu_vm_bo_base *bo_base; 2511 2512 /* shadow bo doesn't have bo base, its validation needs its parent */ 2513 if (bo->parent && bo->parent->shadow == bo) 2514 bo = bo->parent; 2515 2516 list_for_each_entry(bo_base, &bo->va, bo_list) { 2517 struct amdgpu_vm *vm = bo_base->vm; 2518 bool was_moved = bo_base->moved; 2519 2520 bo_base->moved = true; 2521 if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) { 2522 if (bo->tbo.type == ttm_bo_type_kernel) 2523 list_move(&bo_base->vm_status, &vm->evicted); 2524 else 2525 list_move_tail(&bo_base->vm_status, 2526 &vm->evicted); 2527 continue; 2528 } 2529 2530 if (was_moved) 2531 continue; 2532 2533 if (bo->tbo.type == ttm_bo_type_kernel) { 2534 list_move(&bo_base->vm_status, &vm->relocated); 2535 } else { 2536 lockmgr(&bo_base->vm->moved_lock, LK_EXCLUSIVE); 2537 list_move(&bo_base->vm_status, &vm->moved); 2538 lockmgr(&bo_base->vm->moved_lock, LK_RELEASE); 2539 } 2540 } 2541 } 2542 2543 /** 2544 * amdgpu_vm_get_block_size - calculate VM page table size as power of two 2545 * 2546 * @vm_size: VM size 2547 * 2548 * Returns: 2549 * VM page table as power of two 2550 */ 2551 static uint32_t amdgpu_vm_get_block_size(uint64_t vm_size) 2552 { 2553 /* Total bits covered by PD + PTs */ 2554 unsigned bits = ilog2(vm_size) + 18; 2555 2556 /* Make sure the PD is 4K in size up to 8GB address space. 2557 Above that split equal between PD and PTs */ 2558 if (vm_size <= 8) 2559 return (bits - 9); 2560 else 2561 return ((bits + 3) / 2); 2562 } 2563 2564 /** 2565 * amdgpu_vm_adjust_size - adjust vm size, block size and fragment size 2566 * 2567 * @adev: amdgpu_device pointer 2568 * @min_vm_size: the minimum vm size in GB if it's set auto 2569 * @fragment_size_default: Default PTE fragment size 2570 * @max_level: max VMPT level 2571 * @max_bits: max address space size in bits 2572 * 2573 */ 2574 void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size, 2575 uint32_t fragment_size_default, unsigned max_level, 2576 unsigned max_bits) 2577 { 2578 unsigned int max_size = 1 << (max_bits - 30); 2579 unsigned int vm_size; 2580 uint64_t tmp; 2581 2582 /* adjust vm size first */ 2583 if (amdgpu_vm_size != -1) { 2584 vm_size = amdgpu_vm_size; 2585 if (vm_size > max_size) { 2586 dev_warn(adev->dev, "VM size (%d) too large, max is %u GB\n", 2587 amdgpu_vm_size, max_size); 2588 vm_size = max_size; 2589 } 2590 } else { 2591 struct sysinfo si; 2592 unsigned int phys_ram_gb; 2593 2594 /* Optimal VM size depends on the amount of physical 2595 * RAM available. Underlying requirements and 2596 * assumptions: 2597 * 2598 * - Need to map system memory and VRAM from all GPUs 2599 * - VRAM from other GPUs not known here 2600 * - Assume VRAM <= system memory 2601 * - On GFX8 and older, VM space can be segmented for 2602 * different MTYPEs 2603 * - Need to allow room for fragmentation, guard pages etc. 2604 * 2605 * This adds up to a rough guess of system memory x3. 2606 * Round up to power of two to maximize the available 2607 * VM size with the given page table size. 2608 */ 2609 si_meminfo(&si); 2610 phys_ram_gb = ((uint64_t)si.totalram * si.mem_unit + 2611 (1 << 30) - 1) >> 30; 2612 vm_size = roundup_pow_of_two( 2613 min(max(phys_ram_gb * 3, min_vm_size), max_size)); 2614 } 2615 2616 adev->vm_manager.max_pfn = (uint64_t)vm_size << 18; 2617 2618 tmp = roundup_pow_of_two(adev->vm_manager.max_pfn); 2619 if (amdgpu_vm_block_size != -1) 2620 tmp >>= amdgpu_vm_block_size - 9; 2621 tmp = DIV_ROUND_UP(fls64(tmp) - 1, 9) - 1; 2622 adev->vm_manager.num_level = min(max_level, (unsigned)tmp); 2623 switch (adev->vm_manager.num_level) { 2624 case 3: 2625 adev->vm_manager.root_level = AMDGPU_VM_PDB2; 2626 break; 2627 case 2: 2628 adev->vm_manager.root_level = AMDGPU_VM_PDB1; 2629 break; 2630 case 1: 2631 adev->vm_manager.root_level = AMDGPU_VM_PDB0; 2632 break; 2633 default: 2634 dev_err(adev->dev, "VMPT only supports 2~4+1 levels\n"); 2635 } 2636 /* block size depends on vm size and hw setup*/ 2637 if (amdgpu_vm_block_size != -1) 2638 adev->vm_manager.block_size = 2639 min((unsigned)amdgpu_vm_block_size, max_bits 2640 - AMDGPU_GPU_PAGE_SHIFT 2641 - 9 * adev->vm_manager.num_level); 2642 else if (adev->vm_manager.num_level > 1) 2643 adev->vm_manager.block_size = 9; 2644 else 2645 adev->vm_manager.block_size = amdgpu_vm_get_block_size(tmp); 2646 2647 if (amdgpu_vm_fragment_size == -1) 2648 adev->vm_manager.fragment_size = fragment_size_default; 2649 else 2650 adev->vm_manager.fragment_size = amdgpu_vm_fragment_size; 2651 2652 DRM_INFO("vm size is %u GB, %u levels, block size is %u-bit, fragment size is %u-bit\n", 2653 vm_size, adev->vm_manager.num_level + 1, 2654 adev->vm_manager.block_size, 2655 adev->vm_manager.fragment_size); 2656 } 2657 2658 /** 2659 * amdgpu_vm_init - initialize a vm instance 2660 * 2661 * @adev: amdgpu_device pointer 2662 * @vm: requested vm 2663 * @vm_context: Indicates if it GFX or Compute context 2664 * @pasid: Process address space identifier 2665 * 2666 * Init @vm fields. 2667 * 2668 * Returns: 2669 * 0 for success, error for failure. 2670 */ 2671 int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, 2672 int vm_context, unsigned int pasid) 2673 { 2674 struct amdgpu_bo_param bp; 2675 struct amdgpu_bo *root; 2676 const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE, 2677 AMDGPU_VM_PTE_COUNT(adev) * 8); 2678 unsigned ring_instance; 2679 struct amdgpu_ring *ring; 2680 struct drm_sched_rq *rq; 2681 unsigned long size; 2682 uint64_t flags; 2683 int r, i; 2684 2685 vm->va = LINUX_RB_ROOT_CACHED; 2686 for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) 2687 vm->reserved_vmid[i] = NULL; 2688 INIT_LIST_HEAD(&vm->evicted); 2689 INIT_LIST_HEAD(&vm->relocated); 2690 lockinit(&vm->moved_lock, "agvmml", 0, LK_CANRECURSE); 2691 INIT_LIST_HEAD(&vm->moved); 2692 INIT_LIST_HEAD(&vm->idle); 2693 INIT_LIST_HEAD(&vm->freed); 2694 2695 /* create scheduler entity for page table updates */ 2696 2697 ring_instance = atomic_inc_return(&adev->vm_manager.vm_pte_next_ring); 2698 ring_instance %= adev->vm_manager.vm_pte_num_rings; 2699 ring = adev->vm_manager.vm_pte_rings[ring_instance]; 2700 rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL]; 2701 r = drm_sched_entity_init(&vm->entity, &rq, 1, NULL); 2702 if (r) 2703 return r; 2704 2705 vm->pte_support_ats = false; 2706 2707 if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE) { 2708 vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & 2709 AMDGPU_VM_USE_CPU_FOR_COMPUTE); 2710 2711 if (adev->asic_type == CHIP_RAVEN) 2712 vm->pte_support_ats = true; 2713 } else { 2714 vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & 2715 AMDGPU_VM_USE_CPU_FOR_GFX); 2716 } 2717 DRM_DEBUG_DRIVER("VM update mode is %s\n", 2718 vm->use_cpu_for_update ? "CPU" : "SDMA"); 2719 WARN_ONCE((vm->use_cpu_for_update & !amdgpu_gmc_vram_full_visible(&adev->gmc)), 2720 "CPU update of VM recommended only for large BAR system\n"); 2721 vm->last_update = NULL; 2722 2723 flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; 2724 if (vm->use_cpu_for_update) 2725 flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; 2726 else if (vm_context != AMDGPU_VM_CONTEXT_COMPUTE) 2727 flags |= AMDGPU_GEM_CREATE_SHADOW; 2728 2729 size = amdgpu_vm_bo_size(adev, adev->vm_manager.root_level); 2730 memset(&bp, 0, sizeof(bp)); 2731 bp.size = size; 2732 bp.byte_align = align; 2733 bp.domain = AMDGPU_GEM_DOMAIN_VRAM; 2734 bp.flags = flags; 2735 bp.type = ttm_bo_type_kernel; 2736 bp.resv = NULL; 2737 r = amdgpu_bo_create(adev, &bp, &root); 2738 if (r) 2739 goto error_free_sched_entity; 2740 2741 r = amdgpu_bo_reserve(root, true); 2742 if (r) 2743 goto error_free_root; 2744 2745 r = amdgpu_vm_clear_bo(adev, vm, root, 2746 adev->vm_manager.root_level, 2747 vm->pte_support_ats); 2748 if (r) 2749 goto error_unreserve; 2750 2751 amdgpu_vm_bo_base_init(&vm->root.base, vm, root); 2752 amdgpu_bo_unreserve(vm->root.base.bo); 2753 2754 if (pasid) { 2755 unsigned long flags; 2756 2757 spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags); 2758 r = idr_alloc(&adev->vm_manager.pasid_idr, vm, pasid, pasid + 1, 2759 GFP_ATOMIC); 2760 spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); 2761 if (r < 0) 2762 goto error_free_root; 2763 2764 vm->pasid = pasid; 2765 } 2766 2767 #if 0 2768 INIT_KFIFO(vm->faults); 2769 #endif 2770 vm->fault_credit = 16; 2771 2772 return 0; 2773 2774 error_unreserve: 2775 amdgpu_bo_unreserve(vm->root.base.bo); 2776 2777 error_free_root: 2778 amdgpu_bo_unref(&vm->root.base.bo->shadow); 2779 amdgpu_bo_unref(&vm->root.base.bo); 2780 vm->root.base.bo = NULL; 2781 2782 error_free_sched_entity: 2783 drm_sched_entity_destroy(&vm->entity); 2784 2785 return r; 2786 } 2787 2788 /** 2789 * amdgpu_vm_make_compute - Turn a GFX VM into a compute VM 2790 * 2791 * @adev: amdgpu_device pointer 2792 * @vm: requested vm 2793 * 2794 * This only works on GFX VMs that don't have any BOs added and no 2795 * page tables allocated yet. 2796 * 2797 * Changes the following VM parameters: 2798 * - use_cpu_for_update 2799 * - pte_supports_ats 2800 * - pasid (old PASID is released, because compute manages its own PASIDs) 2801 * 2802 * Reinitializes the page directory to reflect the changed ATS 2803 * setting. 2804 * 2805 * Returns: 2806 * 0 for success, -errno for errors. 2807 */ 2808 int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) 2809 { 2810 bool pte_support_ats = (adev->asic_type == CHIP_RAVEN); 2811 int r; 2812 2813 r = amdgpu_bo_reserve(vm->root.base.bo, true); 2814 if (r) 2815 return r; 2816 2817 /* Sanity checks */ 2818 if (!RB_EMPTY_ROOT(&vm->va.rb_root) || vm->root.entries) { 2819 r = -EINVAL; 2820 goto error; 2821 } 2822 2823 /* Check if PD needs to be reinitialized and do it before 2824 * changing any other state, in case it fails. 2825 */ 2826 if (pte_support_ats != vm->pte_support_ats) { 2827 r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo, 2828 adev->vm_manager.root_level, 2829 pte_support_ats); 2830 if (r) 2831 goto error; 2832 } 2833 2834 /* Update VM state */ 2835 vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & 2836 AMDGPU_VM_USE_CPU_FOR_COMPUTE); 2837 vm->pte_support_ats = pte_support_ats; 2838 DRM_DEBUG_DRIVER("VM update mode is %s\n", 2839 vm->use_cpu_for_update ? "CPU" : "SDMA"); 2840 WARN_ONCE((vm->use_cpu_for_update & !amdgpu_gmc_vram_full_visible(&adev->gmc)), 2841 "CPU update of VM recommended only for large BAR system\n"); 2842 2843 if (vm->pasid) { 2844 unsigned long flags; 2845 2846 spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags); 2847 idr_remove(&adev->vm_manager.pasid_idr, vm->pasid); 2848 spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); 2849 2850 vm->pasid = 0; 2851 } 2852 2853 /* Free the shadow bo for compute VM */ 2854 amdgpu_bo_unref(&vm->root.base.bo->shadow); 2855 2856 error: 2857 amdgpu_bo_unreserve(vm->root.base.bo); 2858 return r; 2859 } 2860 2861 /** 2862 * amdgpu_vm_free_levels - free PD/PT levels 2863 * 2864 * @adev: amdgpu device structure 2865 * @parent: PD/PT starting level to free 2866 * @level: level of parent structure 2867 * 2868 * Free the page directory or page table level and all sub levels. 2869 */ 2870 static void amdgpu_vm_free_levels(struct amdgpu_device *adev, 2871 struct amdgpu_vm_pt *parent, 2872 unsigned level) 2873 { 2874 unsigned i, num_entries = amdgpu_vm_num_entries(adev, level); 2875 2876 if (parent->base.bo) { 2877 list_del(&parent->base.bo_list); 2878 list_del(&parent->base.vm_status); 2879 amdgpu_bo_unref(&parent->base.bo->shadow); 2880 amdgpu_bo_unref(&parent->base.bo); 2881 } 2882 2883 if (parent->entries) 2884 for (i = 0; i < num_entries; i++) 2885 amdgpu_vm_free_levels(adev, &parent->entries[i], 2886 level + 1); 2887 2888 kvfree(parent->entries); 2889 } 2890 2891 /** 2892 * amdgpu_vm_fini - tear down a vm instance 2893 * 2894 * @adev: amdgpu_device pointer 2895 * @vm: requested vm 2896 * 2897 * Tear down @vm. 2898 * Unbind the VM and remove all bos from the vm bo list 2899 */ 2900 void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) 2901 { 2902 struct amdgpu_bo_va_mapping *mapping, *tmp; 2903 bool prt_fini_needed = !!adev->gmc.gmc_funcs->set_prt; 2904 struct amdgpu_bo *root; 2905 #if 0 2906 u64 fault; 2907 #endif 2908 int i, r; 2909 2910 amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm); 2911 2912 #if 0 2913 /* Clear pending page faults from IH when the VM is destroyed */ 2914 while (kfifo_get(&vm->faults, &fault)) 2915 amdgpu_ih_clear_fault(adev, fault); 2916 #endif 2917 2918 if (vm->pasid) { 2919 unsigned long flags; 2920 2921 spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags); 2922 idr_remove(&adev->vm_manager.pasid_idr, vm->pasid); 2923 spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); 2924 } 2925 2926 drm_sched_entity_destroy(&vm->entity); 2927 2928 if (!RB_EMPTY_ROOT(&vm->va.rb_root)) { 2929 dev_err(adev->dev, "still active bo inside vm\n"); 2930 } 2931 #ifndef __DragonFly__ 2932 rbtree_postorder_for_each_entry_safe(mapping, tmp, &vm->va, it.rb) { 2933 #else 2934 /* 2935 * DFly interval tree mock-up does not use RB trees, the RB iterator 2936 * can not be used. 2937 * This code is removing all entries so it is fairly easy to replace. 2938 */ 2939 while (vm->va.rb_leftmost) { 2940 mapping = container_of((void *)vm->va.rb_leftmost, struct amdgpu_bo_va_mapping, rb); 2941 #endif 2942 list_del(&mapping->list); 2943 amdgpu_vm_it_remove(mapping, &vm->va); 2944 kfree(mapping); 2945 } 2946 list_for_each_entry_safe(mapping, tmp, &vm->freed, list) { 2947 if (mapping->flags & AMDGPU_PTE_PRT && prt_fini_needed) { 2948 amdgpu_vm_prt_fini(adev, vm); 2949 prt_fini_needed = false; 2950 } 2951 2952 list_del(&mapping->list); 2953 amdgpu_vm_free_mapping(adev, vm, mapping, NULL); 2954 } 2955 2956 root = amdgpu_bo_ref(vm->root.base.bo); 2957 r = amdgpu_bo_reserve(root, true); 2958 if (r) { 2959 dev_err(adev->dev, "Leaking page tables because BO reservation failed\n"); 2960 } else { 2961 amdgpu_vm_free_levels(adev, &vm->root, 2962 adev->vm_manager.root_level); 2963 amdgpu_bo_unreserve(root); 2964 } 2965 amdgpu_bo_unref(&root); 2966 dma_fence_put(vm->last_update); 2967 for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) 2968 amdgpu_vmid_free_reserved(adev, vm, i); 2969 } 2970 2971 /** 2972 * amdgpu_vm_pasid_fault_credit - Check fault credit for given PASID 2973 * 2974 * @adev: amdgpu_device pointer 2975 * @pasid: PASID do identify the VM 2976 * 2977 * This function is expected to be called in interrupt context. 2978 * 2979 * Returns: 2980 * True if there was fault credit, false otherwise 2981 */ 2982 bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev, 2983 unsigned int pasid) 2984 { 2985 struct amdgpu_vm *vm; 2986 2987 lockmgr(&adev->vm_manager.pasid_lock, LK_EXCLUSIVE); 2988 vm = idr_find(&adev->vm_manager.pasid_idr, pasid); 2989 if (!vm) { 2990 /* VM not found, can't track fault credit */ 2991 lockmgr(&adev->vm_manager.pasid_lock, LK_RELEASE); 2992 return true; 2993 } 2994 2995 /* No lock needed. only accessed by IRQ handler */ 2996 if (!vm->fault_credit) { 2997 /* Too many faults in this VM */ 2998 lockmgr(&adev->vm_manager.pasid_lock, LK_RELEASE); 2999 return false; 3000 } 3001 3002 vm->fault_credit--; 3003 lockmgr(&adev->vm_manager.pasid_lock, LK_RELEASE); 3004 return true; 3005 } 3006 3007 /** 3008 * amdgpu_vm_manager_init - init the VM manager 3009 * 3010 * @adev: amdgpu_device pointer 3011 * 3012 * Initialize the VM manager structures 3013 */ 3014 void amdgpu_vm_manager_init(struct amdgpu_device *adev) 3015 { 3016 unsigned i; 3017 3018 amdgpu_vmid_mgr_init(adev); 3019 3020 adev->vm_manager.fence_context = 3021 dma_fence_context_alloc(AMDGPU_MAX_RINGS); 3022 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) 3023 adev->vm_manager.seqno[i] = 0; 3024 3025 atomic_set(&adev->vm_manager.vm_pte_next_ring, 0); 3026 lockinit(&adev->vm_manager.prt_lock, "agvmmprtl", 0, LK_CANRECURSE); 3027 atomic_set(&adev->vm_manager.num_prt_users, 0); 3028 3029 /* If not overridden by the user, by default, only in large BAR systems 3030 * Compute VM tables will be updated by CPU 3031 */ 3032 #ifdef CONFIG_X86_64 3033 if (amdgpu_vm_update_mode == -1) { 3034 if (amdgpu_gmc_vram_full_visible(&adev->gmc)) 3035 adev->vm_manager.vm_update_mode = 3036 AMDGPU_VM_USE_CPU_FOR_COMPUTE; 3037 else 3038 adev->vm_manager.vm_update_mode = 0; 3039 } else 3040 adev->vm_manager.vm_update_mode = amdgpu_vm_update_mode; 3041 #else 3042 adev->vm_manager.vm_update_mode = 0; 3043 #endif 3044 3045 idr_init(&adev->vm_manager.pasid_idr); 3046 lockinit(&adev->vm_manager.pasid_lock, "agvmmpl", 0, LK_CANRECURSE); 3047 } 3048 3049 /** 3050 * amdgpu_vm_manager_fini - cleanup VM manager 3051 * 3052 * @adev: amdgpu_device pointer 3053 * 3054 * Cleanup the VM manager and free resources. 3055 */ 3056 void amdgpu_vm_manager_fini(struct amdgpu_device *adev) 3057 { 3058 #if 0 3059 WARN_ON(!idr_is_empty(&adev->vm_manager.pasid_idr)); 3060 #endif 3061 idr_destroy(&adev->vm_manager.pasid_idr); 3062 3063 amdgpu_vmid_mgr_fini(adev); 3064 } 3065 3066 /** 3067 * amdgpu_vm_ioctl - Manages VMID reservation for vm hubs. 3068 * 3069 * @dev: drm device pointer 3070 * @data: drm_amdgpu_vm 3071 * @filp: drm file pointer 3072 * 3073 * Returns: 3074 * 0 for success, -errno for errors. 3075 */ 3076 int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) 3077 { 3078 union drm_amdgpu_vm *args = data; 3079 struct amdgpu_device *adev = dev->dev_private; 3080 struct amdgpu_fpriv *fpriv = filp->driver_priv; 3081 int r; 3082 3083 switch (args->in.op) { 3084 case AMDGPU_VM_OP_RESERVE_VMID: 3085 /* current, we only have requirement to reserve vmid from gfxhub */ 3086 r = amdgpu_vmid_alloc_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB); 3087 if (r) 3088 return r; 3089 break; 3090 case AMDGPU_VM_OP_UNRESERVE_VMID: 3091 amdgpu_vmid_free_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB); 3092 break; 3093 default: 3094 return -EINVAL; 3095 } 3096 3097 return 0; 3098 } 3099 3100 /** 3101 * amdgpu_vm_get_task_info - Extracts task info for a PASID. 3102 * 3103 * @dev: drm device pointer 3104 * @pasid: PASID identifier for VM 3105 * @task_info: task_info to fill. 3106 */ 3107 void amdgpu_vm_get_task_info(struct amdgpu_device *adev, unsigned int pasid, 3108 struct amdgpu_task_info *task_info) 3109 { 3110 struct amdgpu_vm *vm; 3111 unsigned long flags; 3112 3113 spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags); 3114 3115 vm = idr_find(&adev->vm_manager.pasid_idr, pasid); 3116 if (vm) 3117 *task_info = vm->task_info; 3118 3119 spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); 3120 } 3121 3122 /** 3123 * amdgpu_vm_set_task_info - Sets VMs task info. 3124 * 3125 * @vm: vm for which to set the info 3126 */ 3127 void amdgpu_vm_set_task_info(struct amdgpu_vm *vm) 3128 { 3129 kprintf("amdgpu_vm_set_task_info: not implemented\n"); 3130 #if 0 3131 if (!vm->task_info.pid) { 3132 vm->task_info.pid = current->pid; 3133 get_task_comm(vm->task_info.task_name, current); 3134 3135 if (current->group_leader->mm == current->mm) { 3136 vm->task_info.tgid = current->group_leader->pid; 3137 get_task_comm(vm->task_info.process_name, current->group_leader); 3138 } 3139 } 3140 #endif 3141 } 3142