1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sub license, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 19 * USE OR OTHER DEALINGS IN THE SOFTWARE. 20 * 21 * The above copyright notice and this permission notice (including the 22 * next paragraph) shall be included in all copies or substantial portions 23 * of the Software. 24 * 25 */ 26 /* 27 * Authors: 28 * Christian König <christian.koenig@amd.com> 29 */ 30 31 /** 32 * DOC: MMU Notifier 33 * 34 * For coherent userptr handling registers an MMU notifier to inform the driver 35 * about updates on the page tables of a process. 36 * 37 * When somebody tries to invalidate the page tables we block the update until 38 * all operations on the pages in question are completed, then those pages are 39 * marked as accessed and also dirty if it wasn't a read only access. 40 * 41 * New command submissions using the userptrs in question are delayed until all 42 * page table invalidation are completed and we once more see a coherent process 43 * address space. 44 */ 45 46 #include <linux/firmware.h> 47 #include <linux/module.h> 48 #include <linux/mmu_notifier.h> 49 #include <linux/interval_tree.h> 50 #include <drm/drmP.h> 51 #include <drm/drm.h> 52 53 #include "amdgpu.h" 54 #include "amdgpu_amdkfd.h" 55 56 /** 57 * struct amdgpu_mn 58 * 59 * @adev: amdgpu device pointer 60 * @mm: process address space 61 * @mn: MMU notifier structure 62 * @type: type of MMU notifier 63 * @work: destruction work item 64 * @node: hash table node to find structure by adev and mn 65 * @lock: rw semaphore protecting the notifier nodes 66 * @objects: interval tree containing amdgpu_mn_nodes 67 * @read_lock: mutex for recursive locking of @lock 68 * @recursion: depth of recursion 69 * 70 * Data for each amdgpu device and process address space. 71 */ 72 struct amdgpu_mn { 73 /* constant after initialisation */ 74 struct amdgpu_device *adev; 75 struct mm_struct *mm; 76 struct mmu_notifier mn; 77 enum amdgpu_mn_type type; 78 79 /* only used on destruction */ 80 struct work_struct work; 81 82 /* protected by adev->mn_lock */ 83 struct hlist_node node; 84 85 /* objects protected by lock */ 86 struct rw_semaphore lock; 87 struct rb_root_cached objects; 88 struct lock read_lock; 89 atomic_t recursion; 90 }; 91 92 /** 93 * struct amdgpu_mn_node 94 * 95 * @it: interval node defining start-last of the affected address range 96 * @bos: list of all BOs in the affected address range 97 * 98 * Manages all BOs which are affected of a certain range of address space. 99 */ 100 struct amdgpu_mn_node { 101 struct interval_tree_node it; 102 struct list_head bos; 103 }; 104 105 /** 106 * amdgpu_mn_destroy - destroy the MMU notifier 107 * 108 * @work: previously sheduled work item 109 * 110 * Lazy destroys the notifier from a work item 111 */ 112 static void amdgpu_mn_destroy(struct work_struct *work) 113 { 114 struct amdgpu_mn *amn = container_of(work, struct amdgpu_mn, work); 115 struct amdgpu_device *adev = amn->adev; 116 struct amdgpu_mn_node *node, *next_node; 117 struct amdgpu_bo *bo, *next_bo; 118 119 mutex_lock(&adev->mn_lock); 120 down_write(&amn->lock); 121 hash_del(&amn->node); 122 rbtree_postorder_for_each_entry_safe(node, next_node, 123 &amn->objects.rb_root, it.rb) { 124 list_for_each_entry_safe(bo, next_bo, &node->bos, mn_list) { 125 bo->mn = NULL; 126 list_del_init(&bo->mn_list); 127 } 128 kfree(node); 129 } 130 up_write(&amn->lock); 131 mutex_unlock(&adev->mn_lock); 132 mmu_notifier_unregister_no_release(&amn->mn, amn->mm); 133 kfree(amn); 134 } 135 136 /** 137 * amdgpu_mn_release - callback to notify about mm destruction 138 * 139 * @mn: our notifier 140 * @mm: the mm this callback is about 141 * 142 * Shedule a work item to lazy destroy our notifier. 143 */ 144 static void amdgpu_mn_release(struct mmu_notifier *mn, 145 struct mm_struct *mm) 146 { 147 struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); 148 149 INIT_WORK(&amn->work, amdgpu_mn_destroy); 150 schedule_work(&amn->work); 151 } 152 153 154 /** 155 * amdgpu_mn_lock - take the write side lock for this notifier 156 * 157 * @mn: our notifier 158 */ 159 void amdgpu_mn_lock(struct amdgpu_mn *mn) 160 { 161 if (mn) 162 down_write(&mn->lock); 163 } 164 165 /** 166 * amdgpu_mn_unlock - drop the write side lock for this notifier 167 * 168 * @mn: our notifier 169 */ 170 void amdgpu_mn_unlock(struct amdgpu_mn *mn) 171 { 172 if (mn) 173 up_write(&mn->lock); 174 } 175 176 /** 177 * amdgpu_mn_read_lock - take the read side lock for this notifier 178 * 179 * @amn: our notifier 180 */ 181 static int amdgpu_mn_read_lock(struct amdgpu_mn *amn, bool blockable) 182 { 183 if (blockable) 184 mutex_lock(&amn->read_lock); 185 else if (!mutex_trylock(&amn->read_lock)) 186 return -EAGAIN; 187 188 if (atomic_inc_return(&amn->recursion) == 1) 189 down_read_non_owner(&amn->lock); 190 mutex_unlock(&amn->read_lock); 191 192 return 0; 193 } 194 195 /** 196 * amdgpu_mn_read_unlock - drop the read side lock for this notifier 197 * 198 * @amn: our notifier 199 */ 200 static void amdgpu_mn_read_unlock(struct amdgpu_mn *amn) 201 { 202 if (atomic_dec_return(&amn->recursion) == 0) 203 up_read_non_owner(&amn->lock); 204 } 205 206 /** 207 * amdgpu_mn_invalidate_node - unmap all BOs of a node 208 * 209 * @node: the node with the BOs to unmap 210 * @start: start of address range affected 211 * @end: end of address range affected 212 * 213 * Block for operations on BOs to finish and mark pages as accessed and 214 * potentially dirty. 215 */ 216 static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node, 217 unsigned long start, 218 unsigned long end) 219 { 220 struct amdgpu_bo *bo; 221 long r; 222 223 list_for_each_entry(bo, &node->bos, mn_list) { 224 225 if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, start, end)) 226 continue; 227 228 r = reservation_object_wait_timeout_rcu(bo->tbo.resv, 229 true, false, MAX_SCHEDULE_TIMEOUT); 230 if (r <= 0) 231 DRM_ERROR("(%ld) failed to wait for user bo\n", r); 232 233 amdgpu_ttm_tt_mark_user_pages(bo->tbo.ttm); 234 } 235 } 236 237 /** 238 * amdgpu_mn_invalidate_range_start_gfx - callback to notify about mm change 239 * 240 * @mn: our notifier 241 * @mm: the mm this callback is about 242 * @start: start of updated range 243 * @end: end of updated range 244 * 245 * Block for operations on BOs to finish and mark pages as accessed and 246 * potentially dirty. 247 */ 248 static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn, 249 struct mm_struct *mm, 250 unsigned long start, 251 unsigned long end, 252 bool blockable) 253 { 254 struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); 255 struct interval_tree_node *it; 256 257 /* notification is exclusive, but interval is inclusive */ 258 end -= 1; 259 260 /* TODO we should be able to split locking for interval tree and 261 * amdgpu_mn_invalidate_node 262 */ 263 if (amdgpu_mn_read_lock(amn, blockable)) 264 return -EAGAIN; 265 266 it = interval_tree_iter_first(&amn->objects, start, end); 267 while (it) { 268 struct amdgpu_mn_node *node; 269 270 if (!blockable) { 271 amdgpu_mn_read_unlock(amn); 272 return -EAGAIN; 273 } 274 275 node = container_of(it, struct amdgpu_mn_node, it); 276 it = interval_tree_iter_next(it, start, end); 277 278 amdgpu_mn_invalidate_node(node, start, end); 279 } 280 281 return 0; 282 } 283 284 /** 285 * amdgpu_mn_invalidate_range_start_hsa - callback to notify about mm change 286 * 287 * @mn: our notifier 288 * @mm: the mm this callback is about 289 * @start: start of updated range 290 * @end: end of updated range 291 * 292 * We temporarily evict all BOs between start and end. This 293 * necessitates evicting all user-mode queues of the process. The BOs 294 * are restorted in amdgpu_mn_invalidate_range_end_hsa. 295 */ 296 static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn, 297 struct mm_struct *mm, 298 unsigned long start, 299 unsigned long end, 300 bool blockable) 301 { 302 struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); 303 struct interval_tree_node *it; 304 305 /* notification is exclusive, but interval is inclusive */ 306 end -= 1; 307 308 if (amdgpu_mn_read_lock(amn, blockable)) 309 return -EAGAIN; 310 311 it = interval_tree_iter_first(&amn->objects, start, end); 312 while (it) { 313 struct amdgpu_mn_node *node; 314 struct amdgpu_bo *bo; 315 316 if (!blockable) { 317 amdgpu_mn_read_unlock(amn); 318 return -EAGAIN; 319 } 320 321 node = container_of(it, struct amdgpu_mn_node, it); 322 it = interval_tree_iter_next(it, start, end); 323 324 list_for_each_entry(bo, &node->bos, mn_list) { 325 struct kgd_mem *mem = bo->kfd_bo; 326 327 if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, 328 start, end)) 329 amdgpu_amdkfd_evict_userptr(mem, mm); 330 } 331 } 332 333 return 0; 334 } 335 336 /** 337 * amdgpu_mn_invalidate_range_end - callback to notify about mm change 338 * 339 * @mn: our notifier 340 * @mm: the mm this callback is about 341 * @start: start of updated range 342 * @end: end of updated range 343 * 344 * Release the lock again to allow new command submissions. 345 */ 346 static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn, 347 struct mm_struct *mm, 348 unsigned long start, 349 unsigned long end) 350 { 351 struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); 352 353 amdgpu_mn_read_unlock(amn); 354 } 355 356 static const struct mmu_notifier_ops amdgpu_mn_ops[] = { 357 [AMDGPU_MN_TYPE_GFX] = { 358 .release = amdgpu_mn_release, 359 .invalidate_range_start = amdgpu_mn_invalidate_range_start_gfx, 360 .invalidate_range_end = amdgpu_mn_invalidate_range_end, 361 }, 362 [AMDGPU_MN_TYPE_HSA] = { 363 .release = amdgpu_mn_release, 364 .invalidate_range_start = amdgpu_mn_invalidate_range_start_hsa, 365 .invalidate_range_end = amdgpu_mn_invalidate_range_end, 366 }, 367 }; 368 369 /* Low bits of any reasonable mm pointer will be unused due to struct 370 * alignment. Use these bits to make a unique key from the mm pointer 371 * and notifier type. 372 */ 373 #define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type)) 374 375 /** 376 * amdgpu_mn_get - create notifier context 377 * 378 * @adev: amdgpu device pointer 379 * @type: type of MMU notifier context 380 * 381 * Creates a notifier context for current->mm. 382 */ 383 struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, 384 enum amdgpu_mn_type type) 385 { 386 struct mm_struct *mm = current->mm; 387 struct amdgpu_mn *amn; 388 unsigned long key = AMDGPU_MN_KEY(mm, type); 389 int r; 390 391 mutex_lock(&adev->mn_lock); 392 if (down_write_killable(&mm->mmap_sem)) { 393 mutex_unlock(&adev->mn_lock); 394 return ERR_PTR(-EINTR); 395 } 396 397 hash_for_each_possible(adev->mn_hash, amn, node, key) 398 if (AMDGPU_MN_KEY(amn->mm, amn->type) == key) 399 goto release_locks; 400 401 amn = kzalloc(sizeof(*amn), GFP_KERNEL); 402 if (!amn) { 403 amn = ERR_PTR(-ENOMEM); 404 goto release_locks; 405 } 406 407 amn->adev = adev; 408 amn->mm = mm; 409 init_rwsem(&amn->lock); 410 amn->type = type; 411 amn->mn.ops = &amdgpu_mn_ops[type]; 412 amn->objects = RB_ROOT_CACHED; 413 mutex_init(&amn->read_lock); 414 atomic_set(&amn->recursion, 0); 415 416 r = __mmu_notifier_register(&amn->mn, mm); 417 if (r) 418 goto free_amn; 419 420 hash_add(adev->mn_hash, &amn->node, AMDGPU_MN_KEY(mm, type)); 421 422 release_locks: 423 up_write(&mm->mmap_sem); 424 mutex_unlock(&adev->mn_lock); 425 426 return amn; 427 428 free_amn: 429 up_write(&mm->mmap_sem); 430 mutex_unlock(&adev->mn_lock); 431 kfree(amn); 432 433 return ERR_PTR(r); 434 } 435 436 /** 437 * amdgpu_mn_register - register a BO for notifier updates 438 * 439 * @bo: amdgpu buffer object 440 * @addr: userptr addr we should monitor 441 * 442 * Registers an MMU notifier for the given BO at the specified address. 443 * Returns 0 on success, -ERRNO if anything goes wrong. 444 */ 445 int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) 446 { 447 unsigned long end = addr + amdgpu_bo_size(bo) - 1; 448 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); 449 enum amdgpu_mn_type type = 450 bo->kfd_bo ? AMDGPU_MN_TYPE_HSA : AMDGPU_MN_TYPE_GFX; 451 struct amdgpu_mn *amn; 452 struct amdgpu_mn_node *node = NULL, *new_node; 453 struct list_head bos; 454 struct interval_tree_node *it; 455 456 amn = amdgpu_mn_get(adev, type); 457 if (IS_ERR(amn)) 458 return PTR_ERR(amn); 459 460 new_node = kmalloc(sizeof(*new_node), GFP_KERNEL); 461 if (!new_node) 462 return -ENOMEM; 463 464 INIT_LIST_HEAD(&bos); 465 466 down_write(&amn->lock); 467 468 while ((it = interval_tree_iter_first(&amn->objects, addr, end))) { 469 kfree(node); 470 node = container_of(it, struct amdgpu_mn_node, it); 471 interval_tree_remove(&node->it, &amn->objects); 472 addr = min(it->start, addr); 473 end = max(it->last, end); 474 list_splice(&node->bos, &bos); 475 } 476 477 if (!node) 478 node = new_node; 479 else 480 kfree(new_node); 481 482 bo->mn = amn; 483 484 node->it.start = addr; 485 node->it.last = end; 486 INIT_LIST_HEAD(&node->bos); 487 list_splice(&bos, &node->bos); 488 list_add(&bo->mn_list, &node->bos); 489 490 interval_tree_insert(&node->it, &amn->objects); 491 492 up_write(&amn->lock); 493 494 return 0; 495 } 496 497 /** 498 * amdgpu_mn_unregister - unregister a BO for notifier updates 499 * 500 * @bo: amdgpu buffer object 501 * 502 * Remove any registration of MMU notifier updates from the buffer object. 503 */ 504 void amdgpu_mn_unregister(struct amdgpu_bo *bo) 505 { 506 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); 507 struct amdgpu_mn *amn; 508 struct list_head *head; 509 510 mutex_lock(&adev->mn_lock); 511 512 amn = bo->mn; 513 if (amn == NULL) { 514 mutex_unlock(&adev->mn_lock); 515 return; 516 } 517 518 down_write(&amn->lock); 519 520 /* save the next list entry for later */ 521 head = bo->mn_list.next; 522 523 bo->mn = NULL; 524 list_del_init(&bo->mn_list); 525 526 if (list_empty(head)) { 527 struct amdgpu_mn_node *node; 528 529 node = container_of(head, struct amdgpu_mn_node, bos); 530 interval_tree_remove(&node->it, &amn->objects); 531 kfree(node); 532 } 533 534 up_write(&amn->lock); 535 mutex_unlock(&adev->mn_lock); 536 } 537 538