xref: /openbsd/sys/dev/pci/drm/i915/i915_vma_resource.c (revision f005ef32)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021 Intel Corporation
4  */
5 
6 #include <linux/interval_tree_generic.h>
7 #include <linux/sched/mm.h>
8 
9 #include "i915_sw_fence.h"
10 #include "i915_vma_resource.h"
11 #include "i915_drv.h"
12 #include "intel_memory_region.h"
13 
14 #include "gt/intel_gtt.h"
15 
16 static struct pool slab_vma_resources;
17 
18 /**
19  * DOC:
20  * We use a per-vm interval tree to keep track of vma_resources
21  * scheduled for unbind but not yet unbound. The tree is protected by
22  * the vm mutex, and nodes are removed just after the unbind fence signals.
23  * The removal takes the vm mutex from a kernel thread which we need to
24  * keep in mind so that we don't grab the mutex and try to wait for all
25  * pending unbinds to complete, because that will temporaryily block many
26  * of the workqueue threads, and people will get angry.
27  *
28  * We should consider using a single ordered fence per VM instead but that
29  * requires ordering the unbinds and might introduce unnecessary waiting
30  * for unrelated unbinds. Amount of code will probably be roughly the same
31  * due to the simplicity of using the interval tree interface.
32  *
33  * Another drawback of this interval tree is that the complexity of insertion
34  * and removal of fences increases as O(ln(pending_unbinds)) instead of
35  * O(1) for a single fence without interval tree.
36  */
37 #define VMA_RES_START(_node) ((_node)->start - (_node)->guard)
38 #define VMA_RES_LAST(_node) ((_node)->start + (_node)->node_size + (_node)->guard - 1)
39 #ifdef __linux__
40 INTERVAL_TREE_DEFINE(struct i915_vma_resource, rb,
41 		     u64, __subtree_last,
42 		     VMA_RES_START, VMA_RES_LAST, static, vma_res_itree);
43 #else
44 static struct i915_vma_resource *
vma_res_itree_iter_first(struct rb_root_cached * root,uint64_t start,uint64_t last)45 vma_res_itree_iter_first(struct rb_root_cached *root, uint64_t start,
46     uint64_t last)
47 {
48 	struct i915_vma_resource *node;
49 	struct rb_node *rb;
50 
51 	for (rb = rb_first_cached(root); rb; rb = rb_next(rb)) {
52 		node = rb_entry(rb, typeof(*node), rb);
53 		if (VMA_RES_LAST(node) >= start && VMA_RES_START(node) <= last)
54 			return node;
55 	}
56 	return NULL;
57 }
58 
59 static struct i915_vma_resource *
vma_res_itree_iter_next(struct i915_vma_resource * node,uint64_t start,uint64_t last)60 vma_res_itree_iter_next(struct i915_vma_resource *node, uint64_t start,
61     uint64_t last)
62 {
63 	struct rb_node *rb = &node->rb;
64 
65 	for (rb = rb_next(rb); rb; rb = rb_next(rb)) {
66 		node = rb_entry(rb, typeof(*node), rb);
67 		if (VMA_RES_LAST(node) >= start && VMA_RES_START(node) <= last)
68 			return node;
69 	}
70 	return NULL;
71 }
72 
73 static void
vma_res_itree_remove(struct i915_vma_resource * node,struct rb_root_cached * root)74 vma_res_itree_remove(struct i915_vma_resource *node,
75     struct rb_root_cached *root)
76 {
77 	rb_erase_cached(&node->rb, root);
78 }
79 
80 static void
vma_res_itree_insert(struct i915_vma_resource * node,struct rb_root_cached * root)81 vma_res_itree_insert(struct i915_vma_resource *node,
82     struct rb_root_cached *root)
83 {
84 	struct rb_node **iter = &root->rb_root.rb_node;
85 	struct rb_node *parent = NULL;
86 	struct i915_vma_resource *iter_node;
87 
88 	while (*iter) {
89 		parent = *iter;
90 		iter_node = rb_entry(*iter, struct i915_vma_resource, rb);
91 
92 		if (node->start < iter_node->start)
93 			iter = &(*iter)->rb_left;
94 		else
95 			iter = &(*iter)->rb_right;
96 	}
97 
98 	rb_link_node(&node->rb, parent, iter);
99 	rb_insert_color_cached(&node->rb, root, false);
100 }
101 #endif
102 
103 /* Callbacks for the unbind dma-fence. */
104 
105 /**
106  * i915_vma_resource_alloc - Allocate a vma resource
107  *
108  * Return: A pointer to a cleared struct i915_vma_resource or
109  * a -ENOMEM error pointer if allocation fails.
110  */
i915_vma_resource_alloc(void)111 struct i915_vma_resource *i915_vma_resource_alloc(void)
112 {
113 #ifdef __linux__
114 	struct i915_vma_resource *vma_res =
115 		kmem_cache_zalloc(slab_vma_resources, GFP_KERNEL);
116 #else
117 	struct i915_vma_resource *vma_res =
118 		pool_get(&slab_vma_resources, PR_WAITOK | PR_ZERO);
119 #endif
120 
121 	return vma_res ? vma_res : ERR_PTR(-ENOMEM);
122 }
123 
124 /**
125  * i915_vma_resource_free - Free a vma resource
126  * @vma_res: The vma resource to free.
127  */
i915_vma_resource_free(struct i915_vma_resource * vma_res)128 void i915_vma_resource_free(struct i915_vma_resource *vma_res)
129 {
130 #ifdef __linux__
131 	if (vma_res)
132 		kmem_cache_free(slab_vma_resources, vma_res);
133 #else
134 	if (vma_res)
135 		pool_put(&slab_vma_resources, vma_res);
136 #endif
137 }
138 
get_driver_name(struct dma_fence * fence)139 static const char *get_driver_name(struct dma_fence *fence)
140 {
141 	return "vma unbind fence";
142 }
143 
get_timeline_name(struct dma_fence * fence)144 static const char *get_timeline_name(struct dma_fence *fence)
145 {
146 	return "unbound";
147 }
148 
unbind_fence_free_rcu(struct rcu_head * head)149 static void unbind_fence_free_rcu(struct rcu_head *head)
150 {
151 	struct i915_vma_resource *vma_res =
152 		container_of(head, typeof(*vma_res), unbind_fence.rcu);
153 
154 	i915_vma_resource_free(vma_res);
155 }
156 
unbind_fence_release(struct dma_fence * fence)157 static void unbind_fence_release(struct dma_fence *fence)
158 {
159 	struct i915_vma_resource *vma_res =
160 		container_of(fence, typeof(*vma_res), unbind_fence);
161 
162 	i915_sw_fence_fini(&vma_res->chain);
163 
164 	call_rcu(&fence->rcu, unbind_fence_free_rcu);
165 }
166 
167 static struct dma_fence_ops unbind_fence_ops = {
168 	.get_driver_name = get_driver_name,
169 	.get_timeline_name = get_timeline_name,
170 	.release = unbind_fence_release,
171 };
172 
__i915_vma_resource_unhold(struct i915_vma_resource * vma_res)173 static void __i915_vma_resource_unhold(struct i915_vma_resource *vma_res)
174 {
175 	struct i915_address_space *vm;
176 
177 	if (!refcount_dec_and_test(&vma_res->hold_count))
178 		return;
179 
180 	dma_fence_signal(&vma_res->unbind_fence);
181 
182 	vm = vma_res->vm;
183 	if (vma_res->wakeref)
184 		intel_runtime_pm_put(&vm->i915->runtime_pm, vma_res->wakeref);
185 
186 	vma_res->vm = NULL;
187 	if (!RB_EMPTY_NODE(&vma_res->rb)) {
188 		mutex_lock(&vm->mutex);
189 		vma_res_itree_remove(vma_res, &vm->pending_unbind);
190 		mutex_unlock(&vm->mutex);
191 	}
192 
193 	if (vma_res->bi.pages_rsgt)
194 		i915_refct_sgt_put(vma_res->bi.pages_rsgt);
195 }
196 
197 /**
198  * i915_vma_resource_unhold - Unhold the signaling of the vma resource unbind
199  * fence.
200  * @vma_res: The vma resource.
201  * @lockdep_cookie: The lockdep cookie returned from i915_vma_resource_hold.
202  *
203  * The function may leave a dma_fence critical section.
204  */
i915_vma_resource_unhold(struct i915_vma_resource * vma_res,bool lockdep_cookie)205 void i915_vma_resource_unhold(struct i915_vma_resource *vma_res,
206 			      bool lockdep_cookie)
207 {
208 	dma_fence_end_signalling(lockdep_cookie);
209 
210 	if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
211 		unsigned long irq_flags;
212 
213 		/* Inefficient open-coded might_lock_irqsave() */
214 		spin_lock_irqsave(&vma_res->lock, irq_flags);
215 		spin_unlock_irqrestore(&vma_res->lock, irq_flags);
216 	}
217 
218 	__i915_vma_resource_unhold(vma_res);
219 }
220 
221 /**
222  * i915_vma_resource_hold - Hold the signaling of the vma resource unbind fence.
223  * @vma_res: The vma resource.
224  * @lockdep_cookie: Pointer to a bool serving as a lockdep cooke that should
225  * be given as an argument to the pairing i915_vma_resource_unhold.
226  *
227  * If returning true, the function enters a dma_fence signalling critical
228  * section if not in one already.
229  *
230  * Return: true if holding successful, false if not.
231  */
i915_vma_resource_hold(struct i915_vma_resource * vma_res,bool * lockdep_cookie)232 bool i915_vma_resource_hold(struct i915_vma_resource *vma_res,
233 			    bool *lockdep_cookie)
234 {
235 	bool held = refcount_inc_not_zero(&vma_res->hold_count);
236 
237 	if (held)
238 		*lockdep_cookie = dma_fence_begin_signalling();
239 
240 	return held;
241 }
242 
i915_vma_resource_unbind_work(struct work_struct * work)243 static void i915_vma_resource_unbind_work(struct work_struct *work)
244 {
245 	struct i915_vma_resource *vma_res =
246 		container_of(work, typeof(*vma_res), work);
247 	struct i915_address_space *vm = vma_res->vm;
248 	bool lockdep_cookie;
249 
250 	lockdep_cookie = dma_fence_begin_signalling();
251 	if (likely(!vma_res->skip_pte_rewrite))
252 		vma_res->ops->unbind_vma(vm, vma_res);
253 
254 	dma_fence_end_signalling(lockdep_cookie);
255 	__i915_vma_resource_unhold(vma_res);
256 	i915_vma_resource_put(vma_res);
257 }
258 
259 static int
i915_vma_resource_fence_notify(struct i915_sw_fence * fence,enum i915_sw_fence_notify state)260 i915_vma_resource_fence_notify(struct i915_sw_fence *fence,
261 			       enum i915_sw_fence_notify state)
262 {
263 	struct i915_vma_resource *vma_res =
264 		container_of(fence, typeof(*vma_res), chain);
265 	struct dma_fence *unbind_fence =
266 		&vma_res->unbind_fence;
267 
268 	switch (state) {
269 	case FENCE_COMPLETE:
270 		dma_fence_get(unbind_fence);
271 		if (vma_res->immediate_unbind) {
272 			i915_vma_resource_unbind_work(&vma_res->work);
273 		} else {
274 			INIT_WORK(&vma_res->work, i915_vma_resource_unbind_work);
275 			queue_work(system_unbound_wq, &vma_res->work);
276 		}
277 		break;
278 	case FENCE_FREE:
279 		i915_vma_resource_put(vma_res);
280 		break;
281 	}
282 
283 	return NOTIFY_DONE;
284 }
285 
286 /**
287  * i915_vma_resource_unbind - Unbind a vma resource
288  * @vma_res: The vma resource to unbind.
289  * @tlb: pointer to vma->obj->mm.tlb associated with the resource
290  *	 to be stored at vma_res->tlb. When not-NULL, it will be used
291  *	 to do TLB cache invalidation before freeing a VMA resource.
292  *	 Used only for async unbind.
293  *
294  * At this point this function does little more than publish a fence that
295  * signals immediately unless signaling is held back.
296  *
297  * Return: A refcounted pointer to a dma-fence that signals when unbinding is
298  * complete.
299  */
i915_vma_resource_unbind(struct i915_vma_resource * vma_res,u32 * tlb)300 struct dma_fence *i915_vma_resource_unbind(struct i915_vma_resource *vma_res,
301 					   u32 *tlb)
302 {
303 	struct i915_address_space *vm = vma_res->vm;
304 
305 	vma_res->tlb = tlb;
306 
307 	/* Reference for the sw fence */
308 	i915_vma_resource_get(vma_res);
309 
310 	/* Caller must already have a wakeref in this case. */
311 	if (vma_res->needs_wakeref)
312 		vma_res->wakeref = intel_runtime_pm_get_if_in_use(&vm->i915->runtime_pm);
313 
314 	if (atomic_read(&vma_res->chain.pending) <= 1) {
315 		RB_CLEAR_NODE(&vma_res->rb);
316 		vma_res->immediate_unbind = 1;
317 	} else {
318 		vma_res_itree_insert(vma_res, &vma_res->vm->pending_unbind);
319 	}
320 
321 	i915_sw_fence_commit(&vma_res->chain);
322 
323 	return &vma_res->unbind_fence;
324 }
325 
326 /**
327  * __i915_vma_resource_init - Initialize a vma resource.
328  * @vma_res: The vma resource to initialize
329  *
330  * Initializes the private members of a vma resource.
331  */
__i915_vma_resource_init(struct i915_vma_resource * vma_res)332 void __i915_vma_resource_init(struct i915_vma_resource *vma_res)
333 {
334 	mtx_init(&vma_res->lock, IPL_TTY);
335 	dma_fence_init(&vma_res->unbind_fence, &unbind_fence_ops,
336 		       &vma_res->lock, 0, 0);
337 	refcount_set(&vma_res->hold_count, 1);
338 	i915_sw_fence_init(&vma_res->chain, i915_vma_resource_fence_notify);
339 }
340 
341 static void
i915_vma_resource_color_adjust_range(struct i915_address_space * vm,u64 * start,u64 * end)342 i915_vma_resource_color_adjust_range(struct i915_address_space *vm,
343 				     u64 *start,
344 				     u64 *end)
345 {
346 	if (i915_vm_has_cache_coloring(vm)) {
347 		if (*start)
348 			*start -= I915_GTT_PAGE_SIZE;
349 		*end += I915_GTT_PAGE_SIZE;
350 	}
351 }
352 
353 /**
354  * i915_vma_resource_bind_dep_sync - Wait for / sync all unbinds touching a
355  * certain vm range.
356  * @vm: The vm to look at.
357  * @offset: The range start.
358  * @size: The range size.
359  * @intr: Whether to wait interrubtible.
360  *
361  * The function needs to be called with the vm lock held.
362  *
363  * Return: Zero on success, -ERESTARTSYS if interrupted and @intr==true
364  */
i915_vma_resource_bind_dep_sync(struct i915_address_space * vm,u64 offset,u64 size,bool intr)365 int i915_vma_resource_bind_dep_sync(struct i915_address_space *vm,
366 				    u64 offset,
367 				    u64 size,
368 				    bool intr)
369 {
370 	struct i915_vma_resource *node;
371 	u64 last = offset + size - 1;
372 
373 	lockdep_assert_held(&vm->mutex);
374 	might_sleep();
375 
376 	i915_vma_resource_color_adjust_range(vm, &offset, &last);
377 	node = vma_res_itree_iter_first(&vm->pending_unbind, offset, last);
378 	while (node) {
379 		int ret = dma_fence_wait(&node->unbind_fence, intr);
380 
381 		if (ret)
382 			return ret;
383 
384 		node = vma_res_itree_iter_next(node, offset, last);
385 	}
386 
387 	return 0;
388 }
389 
390 /**
391  * i915_vma_resource_bind_dep_sync_all - Wait for / sync all unbinds of a vm,
392  * releasing the vm lock while waiting.
393  * @vm: The vm to look at.
394  *
395  * The function may not be called with the vm lock held.
396  * Typically this is called at vm destruction to finish any pending
397  * unbind operations. The vm mutex is released while waiting to avoid
398  * stalling kernel workqueues trying to grab the mutex.
399  */
i915_vma_resource_bind_dep_sync_all(struct i915_address_space * vm)400 void i915_vma_resource_bind_dep_sync_all(struct i915_address_space *vm)
401 {
402 	struct i915_vma_resource *node;
403 	struct dma_fence *fence;
404 
405 	do {
406 		fence = NULL;
407 		mutex_lock(&vm->mutex);
408 		node = vma_res_itree_iter_first(&vm->pending_unbind, 0,
409 						U64_MAX);
410 		if (node)
411 			fence = dma_fence_get_rcu(&node->unbind_fence);
412 		mutex_unlock(&vm->mutex);
413 
414 		if (fence) {
415 			/*
416 			 * The wait makes sure the node eventually removes
417 			 * itself from the tree.
418 			 */
419 			dma_fence_wait(fence, false);
420 			dma_fence_put(fence);
421 		}
422 	} while (node);
423 }
424 
425 /**
426  * i915_vma_resource_bind_dep_await - Have a struct i915_sw_fence await all
427  * pending unbinds in a certain range of a vm.
428  * @vm: The vm to look at.
429  * @sw_fence: The struct i915_sw_fence that will be awaiting the unbinds.
430  * @offset: The range start.
431  * @size: The range size.
432  * @intr: Whether to wait interrubtible.
433  * @gfp: Allocation mode for memory allocations.
434  *
435  * The function makes @sw_fence await all pending unbinds in a certain
436  * vm range before calling the complete notifier. To be able to await
437  * each individual unbind, the function needs to allocate memory using
438  * the @gpf allocation mode. If that fails, the function will instead
439  * wait for the unbind fence to signal, using @intr to judge whether to
440  * wait interruptible or not. Note that @gfp should ideally be selected so
441  * as to avoid any expensive memory allocation stalls and rather fail and
442  * synchronize itself. For now the vm mutex is required when calling this
443  * function with means that @gfp can't call into direct reclaim. In reality
444  * this means that during heavy memory pressure, we will sync in this
445  * function.
446  *
447  * Return: Zero on success, -ERESTARTSYS if interrupted and @intr==true
448  */
i915_vma_resource_bind_dep_await(struct i915_address_space * vm,struct i915_sw_fence * sw_fence,u64 offset,u64 size,bool intr,gfp_t gfp)449 int i915_vma_resource_bind_dep_await(struct i915_address_space *vm,
450 				     struct i915_sw_fence *sw_fence,
451 				     u64 offset,
452 				     u64 size,
453 				     bool intr,
454 				     gfp_t gfp)
455 {
456 	struct i915_vma_resource *node;
457 	u64 last = offset + size - 1;
458 
459 	lockdep_assert_held(&vm->mutex);
460 	might_alloc(gfp);
461 	might_sleep();
462 
463 	i915_vma_resource_color_adjust_range(vm, &offset, &last);
464 	node = vma_res_itree_iter_first(&vm->pending_unbind, offset, last);
465 	while (node) {
466 		int ret;
467 
468 		ret = i915_sw_fence_await_dma_fence(sw_fence,
469 						    &node->unbind_fence,
470 						    0, gfp);
471 		if (ret < 0) {
472 			ret = dma_fence_wait(&node->unbind_fence, intr);
473 			if (ret)
474 				return ret;
475 		}
476 
477 		node = vma_res_itree_iter_next(node, offset, last);
478 	}
479 
480 	return 0;
481 }
482 
i915_vma_resource_module_exit(void)483 void i915_vma_resource_module_exit(void)
484 {
485 #ifdef __linux__
486 	kmem_cache_destroy(slab_vma_resources);
487 #else
488 	pool_destroy(&slab_vma_resources);
489 #endif
490 }
491 
i915_vma_resource_module_init(void)492 int __init i915_vma_resource_module_init(void)
493 {
494 #ifdef __linux__
495 	slab_vma_resources = KMEM_CACHE(i915_vma_resource, SLAB_HWCACHE_ALIGN);
496 	if (!slab_vma_resources)
497 		return -ENOMEM;
498 #else
499 	pool_init(&slab_vma_resources, sizeof(struct i915_vma_resource),
500 	    0, IPL_NONE, 0, "svmar", NULL);
501 #endif
502 
503 	return 0;
504 }
505