1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include <linux/interval_tree_generic.h> 7 #include <linux/sched/mm.h> 8 9 #include "i915_sw_fence.h" 10 #include "i915_vma_resource.h" 11 #include "i915_drv.h" 12 #include "intel_memory_region.h" 13 14 #include "gt/intel_gtt.h" 15 16 static struct pool slab_vma_resources; 17 18 /** 19 * DOC: 20 * We use a per-vm interval tree to keep track of vma_resources 21 * scheduled for unbind but not yet unbound. The tree is protected by 22 * the vm mutex, and nodes are removed just after the unbind fence signals. 23 * The removal takes the vm mutex from a kernel thread which we need to 24 * keep in mind so that we don't grab the mutex and try to wait for all 25 * pending unbinds to complete, because that will temporaryily block many 26 * of the workqueue threads, and people will get angry. 27 * 28 * We should consider using a single ordered fence per VM instead but that 29 * requires ordering the unbinds and might introduce unnecessary waiting 30 * for unrelated unbinds. Amount of code will probably be roughly the same 31 * due to the simplicity of using the interval tree interface. 32 * 33 * Another drawback of this interval tree is that the complexity of insertion 34 * and removal of fences increases as O(ln(pending_unbinds)) instead of 35 * O(1) for a single fence without interval tree. 36 */ 37 #define VMA_RES_START(_node) ((_node)->start - (_node)->guard) 38 #define VMA_RES_LAST(_node) ((_node)->start + (_node)->node_size + (_node)->guard - 1) 39 #ifdef __linux__ 40 INTERVAL_TREE_DEFINE(struct i915_vma_resource, rb, 41 u64, __subtree_last, 42 VMA_RES_START, VMA_RES_LAST, static, vma_res_itree); 43 #else 44 static struct i915_vma_resource * 45 vma_res_itree_iter_first(struct rb_root_cached *root, uint64_t start, 46 uint64_t last) 47 { 48 struct i915_vma_resource *node; 49 struct rb_node *rb; 50 51 for (rb = rb_first_cached(root); rb; rb = rb_next(rb)) { 52 node = rb_entry(rb, typeof(*node), rb); 53 if (VMA_RES_LAST(node) >= start && VMA_RES_START(node) <= last) 54 return node; 55 } 56 return NULL; 57 } 58 59 static struct i915_vma_resource * 60 vma_res_itree_iter_next(struct i915_vma_resource *node, uint64_t start, 61 uint64_t last) 62 { 63 struct rb_node *rb = &node->rb; 64 65 for (rb = rb_next(rb); rb; rb = rb_next(rb)) { 66 node = rb_entry(rb, typeof(*node), rb); 67 if (VMA_RES_LAST(node) >= start && VMA_RES_START(node) <= last) 68 return node; 69 } 70 return NULL; 71 } 72 73 static void 74 vma_res_itree_remove(struct i915_vma_resource *node, 75 struct rb_root_cached *root) 76 { 77 rb_erase_cached(&node->rb, root); 78 } 79 80 static void 81 vma_res_itree_insert(struct i915_vma_resource *node, 82 struct rb_root_cached *root) 83 { 84 struct rb_node **iter = &root->rb_root.rb_node; 85 struct rb_node *parent = NULL; 86 struct i915_vma_resource *iter_node; 87 88 while (*iter) { 89 parent = *iter; 90 iter_node = rb_entry(*iter, struct i915_vma_resource, rb); 91 92 if (node->start < iter_node->start) 93 iter = &(*iter)->rb_left; 94 else 95 iter = &(*iter)->rb_right; 96 } 97 98 rb_link_node(&node->rb, parent, iter); 99 rb_insert_color_cached(&node->rb, root, false); 100 } 101 #endif 102 103 /* Callbacks for the unbind dma-fence. */ 104 105 /** 106 * i915_vma_resource_alloc - Allocate a vma resource 107 * 108 * Return: A pointer to a cleared struct i915_vma_resource or 109 * a -ENOMEM error pointer if allocation fails. 110 */ 111 struct i915_vma_resource *i915_vma_resource_alloc(void) 112 { 113 #ifdef __linux__ 114 struct i915_vma_resource *vma_res = 115 kmem_cache_zalloc(slab_vma_resources, GFP_KERNEL); 116 #else 117 struct i915_vma_resource *vma_res = 118 pool_get(&slab_vma_resources, PR_WAITOK | PR_ZERO); 119 #endif 120 121 return vma_res ? vma_res : ERR_PTR(-ENOMEM); 122 } 123 124 /** 125 * i915_vma_resource_free - Free a vma resource 126 * @vma_res: The vma resource to free. 127 */ 128 void i915_vma_resource_free(struct i915_vma_resource *vma_res) 129 { 130 #ifdef __linux__ 131 if (vma_res) 132 kmem_cache_free(slab_vma_resources, vma_res); 133 #else 134 if (vma_res) 135 pool_put(&slab_vma_resources, vma_res); 136 #endif 137 } 138 139 static const char *get_driver_name(struct dma_fence *fence) 140 { 141 return "vma unbind fence"; 142 } 143 144 static const char *get_timeline_name(struct dma_fence *fence) 145 { 146 return "unbound"; 147 } 148 149 static void unbind_fence_free_rcu(struct rcu_head *head) 150 { 151 struct i915_vma_resource *vma_res = 152 container_of(head, typeof(*vma_res), unbind_fence.rcu); 153 154 i915_vma_resource_free(vma_res); 155 } 156 157 static void unbind_fence_release(struct dma_fence *fence) 158 { 159 struct i915_vma_resource *vma_res = 160 container_of(fence, typeof(*vma_res), unbind_fence); 161 162 i915_sw_fence_fini(&vma_res->chain); 163 164 call_rcu(&fence->rcu, unbind_fence_free_rcu); 165 } 166 167 static struct dma_fence_ops unbind_fence_ops = { 168 .get_driver_name = get_driver_name, 169 .get_timeline_name = get_timeline_name, 170 .release = unbind_fence_release, 171 }; 172 173 static void __i915_vma_resource_unhold(struct i915_vma_resource *vma_res) 174 { 175 struct i915_address_space *vm; 176 177 if (!refcount_dec_and_test(&vma_res->hold_count)) 178 return; 179 180 dma_fence_signal(&vma_res->unbind_fence); 181 182 vm = vma_res->vm; 183 if (vma_res->wakeref) 184 intel_runtime_pm_put(&vm->i915->runtime_pm, vma_res->wakeref); 185 186 vma_res->vm = NULL; 187 if (!RB_EMPTY_NODE(&vma_res->rb)) { 188 mutex_lock(&vm->mutex); 189 vma_res_itree_remove(vma_res, &vm->pending_unbind); 190 mutex_unlock(&vm->mutex); 191 } 192 193 if (vma_res->bi.pages_rsgt) 194 i915_refct_sgt_put(vma_res->bi.pages_rsgt); 195 } 196 197 /** 198 * i915_vma_resource_unhold - Unhold the signaling of the vma resource unbind 199 * fence. 200 * @vma_res: The vma resource. 201 * @lockdep_cookie: The lockdep cookie returned from i915_vma_resource_hold. 202 * 203 * The function may leave a dma_fence critical section. 204 */ 205 void i915_vma_resource_unhold(struct i915_vma_resource *vma_res, 206 bool lockdep_cookie) 207 { 208 dma_fence_end_signalling(lockdep_cookie); 209 210 if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { 211 unsigned long irq_flags; 212 213 /* Inefficient open-coded might_lock_irqsave() */ 214 spin_lock_irqsave(&vma_res->lock, irq_flags); 215 spin_unlock_irqrestore(&vma_res->lock, irq_flags); 216 } 217 218 __i915_vma_resource_unhold(vma_res); 219 } 220 221 /** 222 * i915_vma_resource_hold - Hold the signaling of the vma resource unbind fence. 223 * @vma_res: The vma resource. 224 * @lockdep_cookie: Pointer to a bool serving as a lockdep cooke that should 225 * be given as an argument to the pairing i915_vma_resource_unhold. 226 * 227 * If returning true, the function enters a dma_fence signalling critical 228 * section if not in one already. 229 * 230 * Return: true if holding successful, false if not. 231 */ 232 bool i915_vma_resource_hold(struct i915_vma_resource *vma_res, 233 bool *lockdep_cookie) 234 { 235 bool held = refcount_inc_not_zero(&vma_res->hold_count); 236 237 if (held) 238 *lockdep_cookie = dma_fence_begin_signalling(); 239 240 return held; 241 } 242 243 static void i915_vma_resource_unbind_work(struct work_struct *work) 244 { 245 struct i915_vma_resource *vma_res = 246 container_of(work, typeof(*vma_res), work); 247 struct i915_address_space *vm = vma_res->vm; 248 bool lockdep_cookie; 249 250 lockdep_cookie = dma_fence_begin_signalling(); 251 if (likely(!vma_res->skip_pte_rewrite)) 252 vma_res->ops->unbind_vma(vm, vma_res); 253 254 dma_fence_end_signalling(lockdep_cookie); 255 __i915_vma_resource_unhold(vma_res); 256 i915_vma_resource_put(vma_res); 257 } 258 259 static int 260 i915_vma_resource_fence_notify(struct i915_sw_fence *fence, 261 enum i915_sw_fence_notify state) 262 { 263 struct i915_vma_resource *vma_res = 264 container_of(fence, typeof(*vma_res), chain); 265 struct dma_fence *unbind_fence = 266 &vma_res->unbind_fence; 267 268 switch (state) { 269 case FENCE_COMPLETE: 270 dma_fence_get(unbind_fence); 271 if (vma_res->immediate_unbind) { 272 i915_vma_resource_unbind_work(&vma_res->work); 273 } else { 274 INIT_WORK(&vma_res->work, i915_vma_resource_unbind_work); 275 queue_work(system_unbound_wq, &vma_res->work); 276 } 277 break; 278 case FENCE_FREE: 279 i915_vma_resource_put(vma_res); 280 break; 281 } 282 283 return NOTIFY_DONE; 284 } 285 286 /** 287 * i915_vma_resource_unbind - Unbind a vma resource 288 * @vma_res: The vma resource to unbind. 289 * @tlb: pointer to vma->obj->mm.tlb associated with the resource 290 * to be stored at vma_res->tlb. When not-NULL, it will be used 291 * to do TLB cache invalidation before freeing a VMA resource. 292 * Used only for async unbind. 293 * 294 * At this point this function does little more than publish a fence that 295 * signals immediately unless signaling is held back. 296 * 297 * Return: A refcounted pointer to a dma-fence that signals when unbinding is 298 * complete. 299 */ 300 struct dma_fence *i915_vma_resource_unbind(struct i915_vma_resource *vma_res, 301 u32 *tlb) 302 { 303 struct i915_address_space *vm = vma_res->vm; 304 305 vma_res->tlb = tlb; 306 307 /* Reference for the sw fence */ 308 i915_vma_resource_get(vma_res); 309 310 /* Caller must already have a wakeref in this case. */ 311 if (vma_res->needs_wakeref) 312 vma_res->wakeref = intel_runtime_pm_get_if_in_use(&vm->i915->runtime_pm); 313 314 if (atomic_read(&vma_res->chain.pending) <= 1) { 315 RB_CLEAR_NODE(&vma_res->rb); 316 vma_res->immediate_unbind = 1; 317 } else { 318 vma_res_itree_insert(vma_res, &vma_res->vm->pending_unbind); 319 } 320 321 i915_sw_fence_commit(&vma_res->chain); 322 323 return &vma_res->unbind_fence; 324 } 325 326 /** 327 * __i915_vma_resource_init - Initialize a vma resource. 328 * @vma_res: The vma resource to initialize 329 * 330 * Initializes the private members of a vma resource. 331 */ 332 void __i915_vma_resource_init(struct i915_vma_resource *vma_res) 333 { 334 mtx_init(&vma_res->lock, IPL_TTY); 335 dma_fence_init(&vma_res->unbind_fence, &unbind_fence_ops, 336 &vma_res->lock, 0, 0); 337 refcount_set(&vma_res->hold_count, 1); 338 i915_sw_fence_init(&vma_res->chain, i915_vma_resource_fence_notify); 339 } 340 341 static void 342 i915_vma_resource_color_adjust_range(struct i915_address_space *vm, 343 u64 *start, 344 u64 *end) 345 { 346 if (i915_vm_has_cache_coloring(vm)) { 347 if (*start) 348 *start -= I915_GTT_PAGE_SIZE; 349 *end += I915_GTT_PAGE_SIZE; 350 } 351 } 352 353 /** 354 * i915_vma_resource_bind_dep_sync - Wait for / sync all unbinds touching a 355 * certain vm range. 356 * @vm: The vm to look at. 357 * @offset: The range start. 358 * @size: The range size. 359 * @intr: Whether to wait interrubtible. 360 * 361 * The function needs to be called with the vm lock held. 362 * 363 * Return: Zero on success, -ERESTARTSYS if interrupted and @intr==true 364 */ 365 int i915_vma_resource_bind_dep_sync(struct i915_address_space *vm, 366 u64 offset, 367 u64 size, 368 bool intr) 369 { 370 struct i915_vma_resource *node; 371 u64 last = offset + size - 1; 372 373 lockdep_assert_held(&vm->mutex); 374 might_sleep(); 375 376 i915_vma_resource_color_adjust_range(vm, &offset, &last); 377 node = vma_res_itree_iter_first(&vm->pending_unbind, offset, last); 378 while (node) { 379 int ret = dma_fence_wait(&node->unbind_fence, intr); 380 381 if (ret) 382 return ret; 383 384 node = vma_res_itree_iter_next(node, offset, last); 385 } 386 387 return 0; 388 } 389 390 /** 391 * i915_vma_resource_bind_dep_sync_all - Wait for / sync all unbinds of a vm, 392 * releasing the vm lock while waiting. 393 * @vm: The vm to look at. 394 * 395 * The function may not be called with the vm lock held. 396 * Typically this is called at vm destruction to finish any pending 397 * unbind operations. The vm mutex is released while waiting to avoid 398 * stalling kernel workqueues trying to grab the mutex. 399 */ 400 void i915_vma_resource_bind_dep_sync_all(struct i915_address_space *vm) 401 { 402 struct i915_vma_resource *node; 403 struct dma_fence *fence; 404 405 do { 406 fence = NULL; 407 mutex_lock(&vm->mutex); 408 node = vma_res_itree_iter_first(&vm->pending_unbind, 0, 409 U64_MAX); 410 if (node) 411 fence = dma_fence_get_rcu(&node->unbind_fence); 412 mutex_unlock(&vm->mutex); 413 414 if (fence) { 415 /* 416 * The wait makes sure the node eventually removes 417 * itself from the tree. 418 */ 419 dma_fence_wait(fence, false); 420 dma_fence_put(fence); 421 } 422 } while (node); 423 } 424 425 /** 426 * i915_vma_resource_bind_dep_await - Have a struct i915_sw_fence await all 427 * pending unbinds in a certain range of a vm. 428 * @vm: The vm to look at. 429 * @sw_fence: The struct i915_sw_fence that will be awaiting the unbinds. 430 * @offset: The range start. 431 * @size: The range size. 432 * @intr: Whether to wait interrubtible. 433 * @gfp: Allocation mode for memory allocations. 434 * 435 * The function makes @sw_fence await all pending unbinds in a certain 436 * vm range before calling the complete notifier. To be able to await 437 * each individual unbind, the function needs to allocate memory using 438 * the @gpf allocation mode. If that fails, the function will instead 439 * wait for the unbind fence to signal, using @intr to judge whether to 440 * wait interruptible or not. Note that @gfp should ideally be selected so 441 * as to avoid any expensive memory allocation stalls and rather fail and 442 * synchronize itself. For now the vm mutex is required when calling this 443 * function with means that @gfp can't call into direct reclaim. In reality 444 * this means that during heavy memory pressure, we will sync in this 445 * function. 446 * 447 * Return: Zero on success, -ERESTARTSYS if interrupted and @intr==true 448 */ 449 int i915_vma_resource_bind_dep_await(struct i915_address_space *vm, 450 struct i915_sw_fence *sw_fence, 451 u64 offset, 452 u64 size, 453 bool intr, 454 gfp_t gfp) 455 { 456 struct i915_vma_resource *node; 457 u64 last = offset + size - 1; 458 459 lockdep_assert_held(&vm->mutex); 460 might_alloc(gfp); 461 might_sleep(); 462 463 i915_vma_resource_color_adjust_range(vm, &offset, &last); 464 node = vma_res_itree_iter_first(&vm->pending_unbind, offset, last); 465 while (node) { 466 int ret; 467 468 ret = i915_sw_fence_await_dma_fence(sw_fence, 469 &node->unbind_fence, 470 0, gfp); 471 if (ret < 0) { 472 ret = dma_fence_wait(&node->unbind_fence, intr); 473 if (ret) 474 return ret; 475 } 476 477 node = vma_res_itree_iter_next(node, offset, last); 478 } 479 480 return 0; 481 } 482 483 void i915_vma_resource_module_exit(void) 484 { 485 #ifdef __linux__ 486 kmem_cache_destroy(slab_vma_resources); 487 #else 488 pool_destroy(&slab_vma_resources); 489 #endif 490 } 491 492 int __init i915_vma_resource_module_init(void) 493 { 494 #ifdef __linux__ 495 slab_vma_resources = KMEM_CACHE(i915_vma_resource, SLAB_HWCACHE_ALIGN); 496 if (!slab_vma_resources) 497 return -ENOMEM; 498 #else 499 pool_init(&slab_vma_resources, sizeof(struct i915_vma_resource), 500 0, IPL_NONE, 0, "svmar", NULL); 501 #endif 502 503 return 0; 504 } 505