1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2021 Intel Corporation
4 */
5
6 #include <linux/interval_tree_generic.h>
7 #include <linux/sched/mm.h>
8
9 #include "i915_sw_fence.h"
10 #include "i915_vma_resource.h"
11 #include "i915_drv.h"
12 #include "intel_memory_region.h"
13
14 #include "gt/intel_gtt.h"
15
16 static struct pool slab_vma_resources;
17
18 /**
19 * DOC:
20 * We use a per-vm interval tree to keep track of vma_resources
21 * scheduled for unbind but not yet unbound. The tree is protected by
22 * the vm mutex, and nodes are removed just after the unbind fence signals.
23 * The removal takes the vm mutex from a kernel thread which we need to
24 * keep in mind so that we don't grab the mutex and try to wait for all
25 * pending unbinds to complete, because that will temporaryily block many
26 * of the workqueue threads, and people will get angry.
27 *
28 * We should consider using a single ordered fence per VM instead but that
29 * requires ordering the unbinds and might introduce unnecessary waiting
30 * for unrelated unbinds. Amount of code will probably be roughly the same
31 * due to the simplicity of using the interval tree interface.
32 *
33 * Another drawback of this interval tree is that the complexity of insertion
34 * and removal of fences increases as O(ln(pending_unbinds)) instead of
35 * O(1) for a single fence without interval tree.
36 */
37 #define VMA_RES_START(_node) ((_node)->start - (_node)->guard)
38 #define VMA_RES_LAST(_node) ((_node)->start + (_node)->node_size + (_node)->guard - 1)
39 #ifdef __linux__
40 INTERVAL_TREE_DEFINE(struct i915_vma_resource, rb,
41 u64, __subtree_last,
42 VMA_RES_START, VMA_RES_LAST, static, vma_res_itree);
43 #else
44 static struct i915_vma_resource *
vma_res_itree_iter_first(struct rb_root_cached * root,uint64_t start,uint64_t last)45 vma_res_itree_iter_first(struct rb_root_cached *root, uint64_t start,
46 uint64_t last)
47 {
48 struct i915_vma_resource *node;
49 struct rb_node *rb;
50
51 for (rb = rb_first_cached(root); rb; rb = rb_next(rb)) {
52 node = rb_entry(rb, typeof(*node), rb);
53 if (VMA_RES_LAST(node) >= start && VMA_RES_START(node) <= last)
54 return node;
55 }
56 return NULL;
57 }
58
59 static struct i915_vma_resource *
vma_res_itree_iter_next(struct i915_vma_resource * node,uint64_t start,uint64_t last)60 vma_res_itree_iter_next(struct i915_vma_resource *node, uint64_t start,
61 uint64_t last)
62 {
63 struct rb_node *rb = &node->rb;
64
65 for (rb = rb_next(rb); rb; rb = rb_next(rb)) {
66 node = rb_entry(rb, typeof(*node), rb);
67 if (VMA_RES_LAST(node) >= start && VMA_RES_START(node) <= last)
68 return node;
69 }
70 return NULL;
71 }
72
73 static void
vma_res_itree_remove(struct i915_vma_resource * node,struct rb_root_cached * root)74 vma_res_itree_remove(struct i915_vma_resource *node,
75 struct rb_root_cached *root)
76 {
77 rb_erase_cached(&node->rb, root);
78 }
79
80 static void
vma_res_itree_insert(struct i915_vma_resource * node,struct rb_root_cached * root)81 vma_res_itree_insert(struct i915_vma_resource *node,
82 struct rb_root_cached *root)
83 {
84 struct rb_node **iter = &root->rb_root.rb_node;
85 struct rb_node *parent = NULL;
86 struct i915_vma_resource *iter_node;
87
88 while (*iter) {
89 parent = *iter;
90 iter_node = rb_entry(*iter, struct i915_vma_resource, rb);
91
92 if (node->start < iter_node->start)
93 iter = &(*iter)->rb_left;
94 else
95 iter = &(*iter)->rb_right;
96 }
97
98 rb_link_node(&node->rb, parent, iter);
99 rb_insert_color_cached(&node->rb, root, false);
100 }
101 #endif
102
103 /* Callbacks for the unbind dma-fence. */
104
105 /**
106 * i915_vma_resource_alloc - Allocate a vma resource
107 *
108 * Return: A pointer to a cleared struct i915_vma_resource or
109 * a -ENOMEM error pointer if allocation fails.
110 */
i915_vma_resource_alloc(void)111 struct i915_vma_resource *i915_vma_resource_alloc(void)
112 {
113 #ifdef __linux__
114 struct i915_vma_resource *vma_res =
115 kmem_cache_zalloc(slab_vma_resources, GFP_KERNEL);
116 #else
117 struct i915_vma_resource *vma_res =
118 pool_get(&slab_vma_resources, PR_WAITOK | PR_ZERO);
119 #endif
120
121 return vma_res ? vma_res : ERR_PTR(-ENOMEM);
122 }
123
124 /**
125 * i915_vma_resource_free - Free a vma resource
126 * @vma_res: The vma resource to free.
127 */
i915_vma_resource_free(struct i915_vma_resource * vma_res)128 void i915_vma_resource_free(struct i915_vma_resource *vma_res)
129 {
130 #ifdef __linux__
131 if (vma_res)
132 kmem_cache_free(slab_vma_resources, vma_res);
133 #else
134 if (vma_res)
135 pool_put(&slab_vma_resources, vma_res);
136 #endif
137 }
138
get_driver_name(struct dma_fence * fence)139 static const char *get_driver_name(struct dma_fence *fence)
140 {
141 return "vma unbind fence";
142 }
143
get_timeline_name(struct dma_fence * fence)144 static const char *get_timeline_name(struct dma_fence *fence)
145 {
146 return "unbound";
147 }
148
unbind_fence_free_rcu(struct rcu_head * head)149 static void unbind_fence_free_rcu(struct rcu_head *head)
150 {
151 struct i915_vma_resource *vma_res =
152 container_of(head, typeof(*vma_res), unbind_fence.rcu);
153
154 i915_vma_resource_free(vma_res);
155 }
156
unbind_fence_release(struct dma_fence * fence)157 static void unbind_fence_release(struct dma_fence *fence)
158 {
159 struct i915_vma_resource *vma_res =
160 container_of(fence, typeof(*vma_res), unbind_fence);
161
162 i915_sw_fence_fini(&vma_res->chain);
163
164 call_rcu(&fence->rcu, unbind_fence_free_rcu);
165 }
166
167 static struct dma_fence_ops unbind_fence_ops = {
168 .get_driver_name = get_driver_name,
169 .get_timeline_name = get_timeline_name,
170 .release = unbind_fence_release,
171 };
172
__i915_vma_resource_unhold(struct i915_vma_resource * vma_res)173 static void __i915_vma_resource_unhold(struct i915_vma_resource *vma_res)
174 {
175 struct i915_address_space *vm;
176
177 if (!refcount_dec_and_test(&vma_res->hold_count))
178 return;
179
180 dma_fence_signal(&vma_res->unbind_fence);
181
182 vm = vma_res->vm;
183 if (vma_res->wakeref)
184 intel_runtime_pm_put(&vm->i915->runtime_pm, vma_res->wakeref);
185
186 vma_res->vm = NULL;
187 if (!RB_EMPTY_NODE(&vma_res->rb)) {
188 mutex_lock(&vm->mutex);
189 vma_res_itree_remove(vma_res, &vm->pending_unbind);
190 mutex_unlock(&vm->mutex);
191 }
192
193 if (vma_res->bi.pages_rsgt)
194 i915_refct_sgt_put(vma_res->bi.pages_rsgt);
195 }
196
197 /**
198 * i915_vma_resource_unhold - Unhold the signaling of the vma resource unbind
199 * fence.
200 * @vma_res: The vma resource.
201 * @lockdep_cookie: The lockdep cookie returned from i915_vma_resource_hold.
202 *
203 * The function may leave a dma_fence critical section.
204 */
i915_vma_resource_unhold(struct i915_vma_resource * vma_res,bool lockdep_cookie)205 void i915_vma_resource_unhold(struct i915_vma_resource *vma_res,
206 bool lockdep_cookie)
207 {
208 dma_fence_end_signalling(lockdep_cookie);
209
210 if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
211 unsigned long irq_flags;
212
213 /* Inefficient open-coded might_lock_irqsave() */
214 spin_lock_irqsave(&vma_res->lock, irq_flags);
215 spin_unlock_irqrestore(&vma_res->lock, irq_flags);
216 }
217
218 __i915_vma_resource_unhold(vma_res);
219 }
220
221 /**
222 * i915_vma_resource_hold - Hold the signaling of the vma resource unbind fence.
223 * @vma_res: The vma resource.
224 * @lockdep_cookie: Pointer to a bool serving as a lockdep cooke that should
225 * be given as an argument to the pairing i915_vma_resource_unhold.
226 *
227 * If returning true, the function enters a dma_fence signalling critical
228 * section if not in one already.
229 *
230 * Return: true if holding successful, false if not.
231 */
i915_vma_resource_hold(struct i915_vma_resource * vma_res,bool * lockdep_cookie)232 bool i915_vma_resource_hold(struct i915_vma_resource *vma_res,
233 bool *lockdep_cookie)
234 {
235 bool held = refcount_inc_not_zero(&vma_res->hold_count);
236
237 if (held)
238 *lockdep_cookie = dma_fence_begin_signalling();
239
240 return held;
241 }
242
i915_vma_resource_unbind_work(struct work_struct * work)243 static void i915_vma_resource_unbind_work(struct work_struct *work)
244 {
245 struct i915_vma_resource *vma_res =
246 container_of(work, typeof(*vma_res), work);
247 struct i915_address_space *vm = vma_res->vm;
248 bool lockdep_cookie;
249
250 lockdep_cookie = dma_fence_begin_signalling();
251 if (likely(!vma_res->skip_pte_rewrite))
252 vma_res->ops->unbind_vma(vm, vma_res);
253
254 dma_fence_end_signalling(lockdep_cookie);
255 __i915_vma_resource_unhold(vma_res);
256 i915_vma_resource_put(vma_res);
257 }
258
259 static int
i915_vma_resource_fence_notify(struct i915_sw_fence * fence,enum i915_sw_fence_notify state)260 i915_vma_resource_fence_notify(struct i915_sw_fence *fence,
261 enum i915_sw_fence_notify state)
262 {
263 struct i915_vma_resource *vma_res =
264 container_of(fence, typeof(*vma_res), chain);
265 struct dma_fence *unbind_fence =
266 &vma_res->unbind_fence;
267
268 switch (state) {
269 case FENCE_COMPLETE:
270 dma_fence_get(unbind_fence);
271 if (vma_res->immediate_unbind) {
272 i915_vma_resource_unbind_work(&vma_res->work);
273 } else {
274 INIT_WORK(&vma_res->work, i915_vma_resource_unbind_work);
275 queue_work(system_unbound_wq, &vma_res->work);
276 }
277 break;
278 case FENCE_FREE:
279 i915_vma_resource_put(vma_res);
280 break;
281 }
282
283 return NOTIFY_DONE;
284 }
285
286 /**
287 * i915_vma_resource_unbind - Unbind a vma resource
288 * @vma_res: The vma resource to unbind.
289 * @tlb: pointer to vma->obj->mm.tlb associated with the resource
290 * to be stored at vma_res->tlb. When not-NULL, it will be used
291 * to do TLB cache invalidation before freeing a VMA resource.
292 * Used only for async unbind.
293 *
294 * At this point this function does little more than publish a fence that
295 * signals immediately unless signaling is held back.
296 *
297 * Return: A refcounted pointer to a dma-fence that signals when unbinding is
298 * complete.
299 */
i915_vma_resource_unbind(struct i915_vma_resource * vma_res,u32 * tlb)300 struct dma_fence *i915_vma_resource_unbind(struct i915_vma_resource *vma_res,
301 u32 *tlb)
302 {
303 struct i915_address_space *vm = vma_res->vm;
304
305 vma_res->tlb = tlb;
306
307 /* Reference for the sw fence */
308 i915_vma_resource_get(vma_res);
309
310 /* Caller must already have a wakeref in this case. */
311 if (vma_res->needs_wakeref)
312 vma_res->wakeref = intel_runtime_pm_get_if_in_use(&vm->i915->runtime_pm);
313
314 if (atomic_read(&vma_res->chain.pending) <= 1) {
315 RB_CLEAR_NODE(&vma_res->rb);
316 vma_res->immediate_unbind = 1;
317 } else {
318 vma_res_itree_insert(vma_res, &vma_res->vm->pending_unbind);
319 }
320
321 i915_sw_fence_commit(&vma_res->chain);
322
323 return &vma_res->unbind_fence;
324 }
325
326 /**
327 * __i915_vma_resource_init - Initialize a vma resource.
328 * @vma_res: The vma resource to initialize
329 *
330 * Initializes the private members of a vma resource.
331 */
__i915_vma_resource_init(struct i915_vma_resource * vma_res)332 void __i915_vma_resource_init(struct i915_vma_resource *vma_res)
333 {
334 mtx_init(&vma_res->lock, IPL_TTY);
335 dma_fence_init(&vma_res->unbind_fence, &unbind_fence_ops,
336 &vma_res->lock, 0, 0);
337 refcount_set(&vma_res->hold_count, 1);
338 i915_sw_fence_init(&vma_res->chain, i915_vma_resource_fence_notify);
339 }
340
341 static void
i915_vma_resource_color_adjust_range(struct i915_address_space * vm,u64 * start,u64 * end)342 i915_vma_resource_color_adjust_range(struct i915_address_space *vm,
343 u64 *start,
344 u64 *end)
345 {
346 if (i915_vm_has_cache_coloring(vm)) {
347 if (*start)
348 *start -= I915_GTT_PAGE_SIZE;
349 *end += I915_GTT_PAGE_SIZE;
350 }
351 }
352
353 /**
354 * i915_vma_resource_bind_dep_sync - Wait for / sync all unbinds touching a
355 * certain vm range.
356 * @vm: The vm to look at.
357 * @offset: The range start.
358 * @size: The range size.
359 * @intr: Whether to wait interrubtible.
360 *
361 * The function needs to be called with the vm lock held.
362 *
363 * Return: Zero on success, -ERESTARTSYS if interrupted and @intr==true
364 */
i915_vma_resource_bind_dep_sync(struct i915_address_space * vm,u64 offset,u64 size,bool intr)365 int i915_vma_resource_bind_dep_sync(struct i915_address_space *vm,
366 u64 offset,
367 u64 size,
368 bool intr)
369 {
370 struct i915_vma_resource *node;
371 u64 last = offset + size - 1;
372
373 lockdep_assert_held(&vm->mutex);
374 might_sleep();
375
376 i915_vma_resource_color_adjust_range(vm, &offset, &last);
377 node = vma_res_itree_iter_first(&vm->pending_unbind, offset, last);
378 while (node) {
379 int ret = dma_fence_wait(&node->unbind_fence, intr);
380
381 if (ret)
382 return ret;
383
384 node = vma_res_itree_iter_next(node, offset, last);
385 }
386
387 return 0;
388 }
389
390 /**
391 * i915_vma_resource_bind_dep_sync_all - Wait for / sync all unbinds of a vm,
392 * releasing the vm lock while waiting.
393 * @vm: The vm to look at.
394 *
395 * The function may not be called with the vm lock held.
396 * Typically this is called at vm destruction to finish any pending
397 * unbind operations. The vm mutex is released while waiting to avoid
398 * stalling kernel workqueues trying to grab the mutex.
399 */
i915_vma_resource_bind_dep_sync_all(struct i915_address_space * vm)400 void i915_vma_resource_bind_dep_sync_all(struct i915_address_space *vm)
401 {
402 struct i915_vma_resource *node;
403 struct dma_fence *fence;
404
405 do {
406 fence = NULL;
407 mutex_lock(&vm->mutex);
408 node = vma_res_itree_iter_first(&vm->pending_unbind, 0,
409 U64_MAX);
410 if (node)
411 fence = dma_fence_get_rcu(&node->unbind_fence);
412 mutex_unlock(&vm->mutex);
413
414 if (fence) {
415 /*
416 * The wait makes sure the node eventually removes
417 * itself from the tree.
418 */
419 dma_fence_wait(fence, false);
420 dma_fence_put(fence);
421 }
422 } while (node);
423 }
424
425 /**
426 * i915_vma_resource_bind_dep_await - Have a struct i915_sw_fence await all
427 * pending unbinds in a certain range of a vm.
428 * @vm: The vm to look at.
429 * @sw_fence: The struct i915_sw_fence that will be awaiting the unbinds.
430 * @offset: The range start.
431 * @size: The range size.
432 * @intr: Whether to wait interrubtible.
433 * @gfp: Allocation mode for memory allocations.
434 *
435 * The function makes @sw_fence await all pending unbinds in a certain
436 * vm range before calling the complete notifier. To be able to await
437 * each individual unbind, the function needs to allocate memory using
438 * the @gpf allocation mode. If that fails, the function will instead
439 * wait for the unbind fence to signal, using @intr to judge whether to
440 * wait interruptible or not. Note that @gfp should ideally be selected so
441 * as to avoid any expensive memory allocation stalls and rather fail and
442 * synchronize itself. For now the vm mutex is required when calling this
443 * function with means that @gfp can't call into direct reclaim. In reality
444 * this means that during heavy memory pressure, we will sync in this
445 * function.
446 *
447 * Return: Zero on success, -ERESTARTSYS if interrupted and @intr==true
448 */
i915_vma_resource_bind_dep_await(struct i915_address_space * vm,struct i915_sw_fence * sw_fence,u64 offset,u64 size,bool intr,gfp_t gfp)449 int i915_vma_resource_bind_dep_await(struct i915_address_space *vm,
450 struct i915_sw_fence *sw_fence,
451 u64 offset,
452 u64 size,
453 bool intr,
454 gfp_t gfp)
455 {
456 struct i915_vma_resource *node;
457 u64 last = offset + size - 1;
458
459 lockdep_assert_held(&vm->mutex);
460 might_alloc(gfp);
461 might_sleep();
462
463 i915_vma_resource_color_adjust_range(vm, &offset, &last);
464 node = vma_res_itree_iter_first(&vm->pending_unbind, offset, last);
465 while (node) {
466 int ret;
467
468 ret = i915_sw_fence_await_dma_fence(sw_fence,
469 &node->unbind_fence,
470 0, gfp);
471 if (ret < 0) {
472 ret = dma_fence_wait(&node->unbind_fence, intr);
473 if (ret)
474 return ret;
475 }
476
477 node = vma_res_itree_iter_next(node, offset, last);
478 }
479
480 return 0;
481 }
482
i915_vma_resource_module_exit(void)483 void i915_vma_resource_module_exit(void)
484 {
485 #ifdef __linux__
486 kmem_cache_destroy(slab_vma_resources);
487 #else
488 pool_destroy(&slab_vma_resources);
489 #endif
490 }
491
i915_vma_resource_module_init(void)492 int __init i915_vma_resource_module_init(void)
493 {
494 #ifdef __linux__
495 slab_vma_resources = KMEM_CACHE(i915_vma_resource, SLAB_HWCACHE_ALIGN);
496 if (!slab_vma_resources)
497 return -ENOMEM;
498 #else
499 pool_init(&slab_vma_resources, sizeof(struct i915_vma_resource),
500 0, IPL_NONE, 0, "svmar", NULL);
501 #endif
502
503 return 0;
504 }
505