xref: /dragonfly/sys/dev/drm/i915/i915_gem_userptr.c (revision 0fe46dc6)
1 /*
2  * Copyright © 2012-2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 
25 #include <drm/drmP.h>
26 #include <drm/i915_drm.h>
27 #include "i915_drv.h"
28 #include "i915_trace.h"
29 #include "intel_drv.h"
30 
31 struct i915_mm_struct {
32 	struct mm_struct *mm;
33 	struct drm_device *dev;
34 	struct i915_mmu_notifier *mn;
35 	struct hlist_node node;
36 	struct kref kref;
37 	struct work_struct work;
38 };
39 
40 #if defined(CONFIG_MMU_NOTIFIER)
41 #include <linux/interval_tree.h>
42 
43 struct i915_mmu_notifier {
44 	spinlock_t lock;
45 	struct hlist_node node;
46 	struct mmu_notifier mn;
47 	struct rb_root objects;
48 	struct list_head linear;
49 	bool has_linear;
50 };
51 
52 struct i915_mmu_object {
53 	struct i915_mmu_notifier *mn;
54 	struct interval_tree_node it;
55 	struct list_head link;
56 	struct drm_i915_gem_object *obj;
57 	struct work_struct work;
58 	bool active;
59 	bool is_linear;
60 };
61 
62 static void __cancel_userptr__worker(struct work_struct *work)
63 {
64 	struct i915_mmu_object *mo = container_of(work, typeof(*mo), work);
65 	struct drm_i915_gem_object *obj = mo->obj;
66 	struct drm_device *dev = obj->base.dev;
67 
68 	mutex_lock(&dev->struct_mutex);
69 	/* Cancel any active worker and force us to re-evaluate gup */
70 	obj->userptr.work = NULL;
71 
72 	if (obj->pages != NULL) {
73 		struct drm_i915_private *dev_priv = to_i915(dev);
74 		struct i915_vma *vma, *tmp;
75 		bool was_interruptible;
76 
77 		was_interruptible = dev_priv->mm.interruptible;
78 		dev_priv->mm.interruptible = false;
79 
80 		list_for_each_entry_safe(vma, tmp, &obj->vma_list, vma_link) {
81 			int ret = i915_vma_unbind(vma);
82 			WARN_ON(ret && ret != -EIO);
83 		}
84 		WARN_ON(i915_gem_object_put_pages(obj));
85 
86 		dev_priv->mm.interruptible = was_interruptible;
87 	}
88 
89 	drm_gem_object_unreference(&obj->base);
90 	mutex_unlock(&dev->struct_mutex);
91 }
92 
93 static unsigned long cancel_userptr(struct i915_mmu_object *mo)
94 {
95 	unsigned long end = mo->obj->userptr.ptr + mo->obj->base.size;
96 
97 	/* The mmu_object is released late when destroying the
98 	 * GEM object so it is entirely possible to gain a
99 	 * reference on an object in the process of being freed
100 	 * since our serialisation is via the spinlock and not
101 	 * the struct_mutex - and consequently use it after it
102 	 * is freed and then double free it.
103 	 */
104 	if (mo->active && kref_get_unless_zero(&mo->obj->base.refcount)) {
105 		schedule_work(&mo->work);
106 		/* only schedule one work packet to avoid the refleak */
107 		mo->active = false;
108 	}
109 
110 	return end;
111 }
112 
113 static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
114 						       struct mm_struct *mm,
115 						       unsigned long start,
116 						       unsigned long end)
117 {
118 	struct i915_mmu_notifier *mn =
119 		container_of(_mn, struct i915_mmu_notifier, mn);
120 	struct i915_mmu_object *mo;
121 
122 	/* interval ranges are inclusive, but invalidate range is exclusive */
123 	end--;
124 
125 	spin_lock(&mn->lock);
126 	if (mn->has_linear) {
127 		list_for_each_entry(mo, &mn->linear, link) {
128 			if (mo->it.last < start || mo->it.start > end)
129 				continue;
130 
131 			cancel_userptr(mo);
132 		}
133 	} else {
134 		struct interval_tree_node *it;
135 
136 		it = interval_tree_iter_first(&mn->objects, start, end);
137 		while (it) {
138 			mo = container_of(it, struct i915_mmu_object, it);
139 			start = cancel_userptr(mo);
140 			it = interval_tree_iter_next(it, start, end);
141 		}
142 	}
143 	spin_unlock(&mn->lock);
144 }
145 
146 static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
147 	.invalidate_range_start = i915_gem_userptr_mn_invalidate_range_start,
148 };
149 
150 static struct i915_mmu_notifier *
151 i915_mmu_notifier_create(struct mm_struct *mm)
152 {
153 	struct i915_mmu_notifier *mn;
154 	int ret;
155 
156 	mn = kmalloc(sizeof(*mn), GFP_KERNEL);
157 	if (mn == NULL)
158 		return ERR_PTR(-ENOMEM);
159 
160 	spin_lock_init(&mn->lock);
161 	mn->mn.ops = &i915_gem_userptr_notifier;
162 	mn->objects = RB_ROOT;
163 	INIT_LIST_HEAD(&mn->linear);
164 	mn->has_linear = false;
165 
166 	 /* Protected by mmap_sem (write-lock) */
167 	ret = __mmu_notifier_register(&mn->mn, mm);
168 	if (ret) {
169 		kfree(mn);
170 		return ERR_PTR(ret);
171 	}
172 
173 	return mn;
174 }
175 
176 static int
177 i915_mmu_notifier_add(struct drm_device *dev,
178 		      struct i915_mmu_notifier *mn,
179 		      struct i915_mmu_object *mo)
180 {
181 	struct interval_tree_node *it;
182 	int ret = 0;
183 
184 	/* By this point we have already done a lot of expensive setup that
185 	 * we do not want to repeat just because the caller (e.g. X) has a
186 	 * signal pending (and partly because of that expensive setup, X
187 	 * using an interrupt timer is likely to get stuck in an EINTR loop).
188 	 */
189 	mutex_lock(&dev->struct_mutex);
190 
191 	/* Make sure we drop the final active reference (and thereby
192 	 * remove the objects from the interval tree) before we do
193 	 * the check for overlapping objects.
194 	 */
195 	i915_gem_retire_requests(dev);
196 
197 	spin_lock(&mn->lock);
198 	it = interval_tree_iter_first(&mn->objects,
199 				      mo->it.start, mo->it.last);
200 	if (it) {
201 		struct drm_i915_gem_object *obj;
202 
203 		/* We only need to check the first object in the range as it
204 		 * either has cancelled gup work queued and we need to
205 		 * return back to the user to give time for the gup-workers
206 		 * to flush their object references upon which the object will
207 		 * be removed from the interval-tree, or the the range is
208 		 * still in use by another client and the overlap is invalid.
209 		 *
210 		 * If we do have an overlap, we cannot use the interval tree
211 		 * for fast range invalidation.
212 		 */
213 
214 		obj = container_of(it, struct i915_mmu_object, it)->obj;
215 		if (!obj->userptr.workers)
216 			mn->has_linear = mo->is_linear = true;
217 		else
218 			ret = -EAGAIN;
219 	} else
220 		interval_tree_insert(&mo->it, &mn->objects);
221 
222 	if (ret == 0)
223 		list_add(&mo->link, &mn->linear);
224 
225 	spin_unlock(&mn->lock);
226 	mutex_unlock(&dev->struct_mutex);
227 
228 	return ret;
229 }
230 
231 static bool i915_mmu_notifier_has_linear(struct i915_mmu_notifier *mn)
232 {
233 	struct i915_mmu_object *mo;
234 
235 	list_for_each_entry(mo, &mn->linear, link)
236 		if (mo->is_linear)
237 			return true;
238 
239 	return false;
240 }
241 
242 static void
243 i915_mmu_notifier_del(struct i915_mmu_notifier *mn,
244 		      struct i915_mmu_object *mo)
245 {
246 	spin_lock(&mn->lock);
247 	list_del(&mo->link);
248 	if (mo->is_linear)
249 		mn->has_linear = i915_mmu_notifier_has_linear(mn);
250 	else
251 		interval_tree_remove(&mo->it, &mn->objects);
252 	spin_unlock(&mn->lock);
253 }
254 
255 static void
256 i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
257 {
258 	struct i915_mmu_object *mo;
259 
260 	mo = obj->userptr.mmu_object;
261 	if (mo == NULL)
262 		return;
263 
264 	i915_mmu_notifier_del(mo->mn, mo);
265 	kfree(mo);
266 
267 	obj->userptr.mmu_object = NULL;
268 }
269 
270 static struct i915_mmu_notifier *
271 i915_mmu_notifier_find(struct i915_mm_struct *mm)
272 {
273 	struct i915_mmu_notifier *mn = mm->mn;
274 
275 	mn = mm->mn;
276 	if (mn)
277 		return mn;
278 
279 	down_write(&mm->mm->mmap_sem);
280 	mutex_lock(&to_i915(mm->dev)->mm_lock);
281 	if ((mn = mm->mn) == NULL) {
282 		mn = i915_mmu_notifier_create(mm->mm);
283 		if (!IS_ERR(mn))
284 			mm->mn = mn;
285 	}
286 	mutex_unlock(&to_i915(mm->dev)->mm_lock);
287 	up_write(&mm->mm->mmap_sem);
288 
289 	return mn;
290 }
291 
292 static int
293 i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
294 				    unsigned flags)
295 {
296 	struct i915_mmu_notifier *mn;
297 	struct i915_mmu_object *mo;
298 	int ret;
299 
300 	if (flags & I915_USERPTR_UNSYNCHRONIZED)
301 		return capable(CAP_SYS_ADMIN) ? 0 : -EPERM;
302 
303 	if (WARN_ON(obj->userptr.mm == NULL))
304 		return -EINVAL;
305 
306 	mn = i915_mmu_notifier_find(obj->userptr.mm);
307 	if (IS_ERR(mn))
308 		return PTR_ERR(mn);
309 
310 	mo = kzalloc(sizeof(*mo), GFP_KERNEL);
311 	if (mo == NULL)
312 		return -ENOMEM;
313 
314 	mo->mn = mn;
315 	mo->it.start = obj->userptr.ptr;
316 	mo->it.last = mo->it.start + obj->base.size - 1;
317 	mo->obj = obj;
318 	INIT_WORK(&mo->work, __cancel_userptr__worker);
319 
320 	ret = i915_mmu_notifier_add(obj->base.dev, mn, mo);
321 	if (ret) {
322 		kfree(mo);
323 		return ret;
324 	}
325 
326 	obj->userptr.mmu_object = mo;
327 	return 0;
328 }
329 
330 static void
331 i915_mmu_notifier_free(struct i915_mmu_notifier *mn,
332 		       struct mm_struct *mm)
333 {
334 	if (mn == NULL)
335 		return;
336 
337 	mmu_notifier_unregister(&mn->mn, mm);
338 	kfree(mn);
339 }
340 
341 #else
342 
343 #if 0
344 static void
345 i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
346 {
347 }
348 
349 static int
350 i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
351 				    unsigned flags)
352 {
353 	if ((flags & I915_USERPTR_UNSYNCHRONIZED) == 0)
354 		return -ENODEV;
355 
356 	if (!capable(CAP_SYS_ADMIN))
357 		return -EPERM;
358 
359 	return 0;
360 }
361 
362 static void
363 i915_mmu_notifier_free(struct i915_mmu_notifier *mn,
364 		       struct mm_struct *mm)
365 {
366 }
367 #endif
368 
369 #endif
370 
371 #if 0
372 static struct i915_mm_struct *
373 __i915_mm_struct_find(struct drm_i915_private *dev_priv, struct mm_struct *real)
374 {
375 	struct i915_mm_struct *mm;
376 
377 	/* Protected by dev_priv->mm_lock */
378 	hash_for_each_possible(dev_priv->mm_structs, mm, node, (unsigned long)real)
379 		if (mm->mm == real)
380 			return mm;
381 
382 	return NULL;
383 }
384 
385 static int
386 i915_gem_userptr_init__mm_struct(struct drm_i915_gem_object *obj)
387 {
388 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
389 	struct i915_mm_struct *mm;
390 	int ret = 0;
391 
392 	/* During release of the GEM object we hold the struct_mutex. This
393 	 * precludes us from calling mmput() at that time as that may be
394 	 * the last reference and so call exit_mmap(). exit_mmap() will
395 	 * attempt to reap the vma, and if we were holding a GTT mmap
396 	 * would then call drm_gem_vm_close() and attempt to reacquire
397 	 * the struct mutex. So in order to avoid that recursion, we have
398 	 * to defer releasing the mm reference until after we drop the
399 	 * struct_mutex, i.e. we need to schedule a worker to do the clean
400 	 * up.
401 	 */
402 	mutex_lock(&dev_priv->mm_lock);
403 	mm = __i915_mm_struct_find(dev_priv, current->mm);
404 	if (mm == NULL) {
405 		mm = kmalloc(sizeof(*mm), GFP_KERNEL);
406 		if (mm == NULL) {
407 			ret = -ENOMEM;
408 			goto out;
409 		}
410 
411 		kref_init(&mm->kref);
412 		mm->dev = obj->base.dev;
413 
414 		mm->mm = current->mm;
415 		atomic_inc(&current->mm->mm_count);
416 
417 		mm->mn = NULL;
418 
419 		/* Protected by dev_priv->mm_lock */
420 		hash_add(dev_priv->mm_structs,
421 			 &mm->node, (unsigned long)mm->mm);
422 	} else
423 		kref_get(&mm->kref);
424 
425 	obj->userptr.mm = mm;
426 out:
427 	mutex_unlock(&dev_priv->mm_lock);
428 	return ret;
429 }
430 
431 static void
432 __i915_mm_struct_free__worker(struct work_struct *work)
433 {
434 	struct i915_mm_struct *mm = container_of(work, typeof(*mm), work);
435 	i915_mmu_notifier_free(mm->mn, mm->mm);
436 	mmdrop(mm->mm);
437 	kfree(mm);
438 }
439 
440 static void
441 __i915_mm_struct_free(struct kref *kref)
442 {
443 	struct i915_mm_struct *mm = container_of(kref, typeof(*mm), kref);
444 
445 	/* Protected by dev_priv->mm_lock */
446 	hash_del(&mm->node);
447 	mutex_unlock(&to_i915(mm->dev)->mm_lock);
448 
449 	INIT_WORK(&mm->work, __i915_mm_struct_free__worker);
450 	schedule_work(&mm->work);
451 }
452 
453 static void
454 i915_gem_userptr_release__mm_struct(struct drm_i915_gem_object *obj)
455 {
456 	if (obj->userptr.mm == NULL)
457 		return;
458 
459 	kref_put_mutex(&obj->userptr.mm->kref,
460 		       __i915_mm_struct_free,
461 		       &to_i915(obj->base.dev)->mm_lock);
462 	obj->userptr.mm = NULL;
463 }
464 #endif
465 
466 struct get_pages_work {
467 	struct work_struct work;
468 	struct drm_i915_gem_object *obj;
469 	struct task_struct *task;
470 };
471 
472 #if IS_ENABLED(CONFIG_SWIOTLB)
473 #define swiotlb_active() swiotlb_nr_tbl()
474 #else
475 #define swiotlb_active() 0
476 #endif
477 
478 #if 0
479 static int
480 st_set_pages(struct sg_table **st, struct page **pvec, int num_pages)
481 {
482 	struct scatterlist *sg;
483 	int ret, n;
484 
485 	*st = kmalloc(sizeof(**st), GFP_KERNEL);
486 	if (*st == NULL)
487 		return -ENOMEM;
488 
489 	if (swiotlb_active()) {
490 		ret = sg_alloc_table(*st, num_pages, GFP_KERNEL);
491 		if (ret)
492 			goto err;
493 
494 		for_each_sg((*st)->sgl, sg, num_pages, n)
495 			sg_set_page(sg, pvec[n], PAGE_SIZE, 0);
496 	} else {
497 		ret = sg_alloc_table_from_pages(*st, pvec, num_pages,
498 						0, num_pages << PAGE_SHIFT,
499 						GFP_KERNEL);
500 		if (ret)
501 			goto err;
502 	}
503 
504 	return 0;
505 
506 err:
507 	kfree(*st);
508 	*st = NULL;
509 	return ret;
510 }
511 
512 static int
513 __i915_gem_userptr_set_pages(struct drm_i915_gem_object *obj,
514 			     struct page **pvec, int num_pages)
515 {
516 	int ret;
517 
518 	ret = st_set_pages(&obj->pages, pvec, num_pages);
519 	if (ret)
520 		return ret;
521 
522 	ret = i915_gem_gtt_prepare_object(obj);
523 	if (ret) {
524 		sg_free_table(obj->pages);
525 		kfree(obj->pages);
526 		obj->pages = NULL;
527 	}
528 
529 	return ret;
530 }
531 
532 static int
533 __i915_gem_userptr_set_active(struct drm_i915_gem_object *obj,
534 			      bool value)
535 {
536 	int ret = 0;
537 
538 	/* During mm_invalidate_range we need to cancel any userptr that
539 	 * overlaps the range being invalidated. Doing so requires the
540 	 * struct_mutex, and that risks recursion. In order to cause
541 	 * recursion, the user must alias the userptr address space with
542 	 * a GTT mmapping (possible with a MAP_FIXED) - then when we have
543 	 * to invalidate that mmaping, mm_invalidate_range is called with
544 	 * the userptr address *and* the struct_mutex held.  To prevent that
545 	 * we set a flag under the i915_mmu_notifier spinlock to indicate
546 	 * whether this object is valid.
547 	 */
548 #if defined(CONFIG_MMU_NOTIFIER)
549 	if (obj->userptr.mmu_object == NULL)
550 		return 0;
551 
552 	spin_lock(&obj->userptr.mmu_object->mn->lock);
553 	/* In order to serialise get_pages with an outstanding
554 	 * cancel_userptr, we must drop the struct_mutex and try again.
555 	 */
556 	if (!value || !work_pending(&obj->userptr.mmu_object->work))
557 		obj->userptr.mmu_object->active = value;
558 	else
559 		ret = -EAGAIN;
560 	spin_unlock(&obj->userptr.mmu_object->mn->lock);
561 #endif
562 
563 	return ret;
564 }
565 
566 static void
567 __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
568 {
569 	struct get_pages_work *work = container_of(_work, typeof(*work), work);
570 	struct drm_i915_gem_object *obj = work->obj;
571 	struct drm_device *dev = obj->base.dev;
572 	const int npages = obj->base.size >> PAGE_SHIFT;
573 	struct page **pvec;
574 	int pinned, ret;
575 
576 	ret = -ENOMEM;
577 	pinned = 0;
578 
579 	pvec = kmalloc(npages*sizeof(struct page *),
580 		       GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
581 	if (pvec == NULL)
582 		pvec = drm_malloc_ab(npages, sizeof(struct page *));
583 	if (pvec != NULL) {
584 		struct mm_struct *mm = obj->userptr.mm->mm;
585 
586 		down_read(&mm->mmap_sem);
587 		while (pinned < npages) {
588 			ret = get_user_pages(work->task, mm,
589 					     obj->userptr.ptr + pinned * PAGE_SIZE,
590 					     npages - pinned,
591 					     !obj->userptr.read_only, 0,
592 					     pvec + pinned, NULL);
593 			if (ret < 0)
594 				break;
595 
596 			pinned += ret;
597 		}
598 		up_read(&mm->mmap_sem);
599 	}
600 
601 	mutex_lock(&dev->struct_mutex);
602 	if (obj->userptr.work == &work->work) {
603 		if (pinned == npages) {
604 			ret = __i915_gem_userptr_set_pages(obj, pvec, npages);
605 			if (ret == 0) {
606 				list_add_tail(&obj->global_list,
607 					      &to_i915(dev)->mm.unbound_list);
608 				obj->get_page.sg = obj->pages->sgl;
609 				obj->get_page.last = 0;
610 				pinned = 0;
611 			}
612 		}
613 		obj->userptr.work = ERR_PTR(ret);
614 		if (ret)
615 			__i915_gem_userptr_set_active(obj, false);
616 	}
617 
618 	obj->userptr.workers--;
619 	drm_gem_object_unreference(&obj->base);
620 	mutex_unlock(&dev->struct_mutex);
621 
622 	release_pages(pvec, pinned, 0);
623 	drm_free_large(pvec);
624 
625 	put_task_struct(work->task);
626 	kfree(work);
627 }
628 
629 static int
630 __i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj,
631 				      bool *active)
632 {
633 	struct get_pages_work *work;
634 
635 	/* Spawn a worker so that we can acquire the
636 	 * user pages without holding our mutex. Access
637 	 * to the user pages requires mmap_sem, and we have
638 	 * a strict lock ordering of mmap_sem, struct_mutex -
639 	 * we already hold struct_mutex here and so cannot
640 	 * call gup without encountering a lock inversion.
641 	 *
642 	 * Userspace will keep on repeating the operation
643 	 * (thanks to EAGAIN) until either we hit the fast
644 	 * path or the worker completes. If the worker is
645 	 * cancelled or superseded, the task is still run
646 	 * but the results ignored. (This leads to
647 	 * complications that we may have a stray object
648 	 * refcount that we need to be wary of when
649 	 * checking for existing objects during creation.)
650 	 * If the worker encounters an error, it reports
651 	 * that error back to this function through
652 	 * obj->userptr.work = ERR_PTR.
653 	 */
654 	if (obj->userptr.workers >= I915_GEM_USERPTR_MAX_WORKERS)
655 		return -EAGAIN;
656 
657 	work = kmalloc(sizeof(*work), GFP_KERNEL);
658 	if (work == NULL)
659 		return -ENOMEM;
660 
661 	obj->userptr.work = &work->work;
662 	obj->userptr.workers++;
663 
664 	work->obj = obj;
665 	drm_gem_object_reference(&obj->base);
666 
667 	work->task = current;
668 	get_task_struct(work->task);
669 
670 	INIT_WORK(&work->work, __i915_gem_userptr_get_pages_worker);
671 	schedule_work(&work->work);
672 
673 	*active = true;
674 	return -EAGAIN;
675 }
676 
677 static int
678 i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
679 {
680 	const int num_pages = obj->base.size >> PAGE_SHIFT;
681 	struct page **pvec;
682 	int pinned, ret;
683 	bool active;
684 
685 	/* If userspace should engineer that these pages are replaced in
686 	 * the vma between us binding this page into the GTT and completion
687 	 * of rendering... Their loss. If they change the mapping of their
688 	 * pages they need to create a new bo to point to the new vma.
689 	 *
690 	 * However, that still leaves open the possibility of the vma
691 	 * being copied upon fork. Which falls under the same userspace
692 	 * synchronisation issue as a regular bo, except that this time
693 	 * the process may not be expecting that a particular piece of
694 	 * memory is tied to the GPU.
695 	 *
696 	 * Fortunately, we can hook into the mmu_notifier in order to
697 	 * discard the page references prior to anything nasty happening
698 	 * to the vma (discard or cloning) which should prevent the more
699 	 * egregious cases from causing harm.
700 	 */
701 	if (IS_ERR(obj->userptr.work)) {
702 		/* active flag will have been dropped already by the worker */
703 		ret = PTR_ERR(obj->userptr.work);
704 		obj->userptr.work = NULL;
705 		return ret;
706 	}
707 	if (obj->userptr.work)
708 		/* active flag should still be held for the pending work */
709 		return -EAGAIN;
710 
711 	/* Let the mmu-notifier know that we have begun and need cancellation */
712 	ret = __i915_gem_userptr_set_active(obj, true);
713 	if (ret)
714 		return ret;
715 
716 	pvec = NULL;
717 	pinned = 0;
718 	if (obj->userptr.mm->mm == current->mm) {
719 		pvec = kmalloc(num_pages*sizeof(struct page *),
720 			       GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
721 		if (pvec == NULL) {
722 			pvec = drm_malloc_ab(num_pages, sizeof(struct page *));
723 			if (pvec == NULL) {
724 				__i915_gem_userptr_set_active(obj, false);
725 				return -ENOMEM;
726 			}
727 		}
728 
729 		pinned = __get_user_pages_fast(obj->userptr.ptr, num_pages,
730 					       !obj->userptr.read_only, pvec);
731 	}
732 
733 	active = false;
734 	if (pinned < 0)
735 		ret = pinned, pinned = 0;
736 	else if (pinned < num_pages)
737 		ret = __i915_gem_userptr_get_pages_schedule(obj, &active);
738 	else
739 		ret = __i915_gem_userptr_set_pages(obj, pvec, num_pages);
740 	if (ret) {
741 		__i915_gem_userptr_set_active(obj, active);
742 		release_pages(pvec, pinned, 0);
743 	}
744 	drm_free_large(pvec);
745 	return ret;
746 }
747 
748 static void
749 i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj)
750 {
751 	struct sg_page_iter sg_iter;
752 
753 	BUG_ON(obj->userptr.work != NULL);
754 	__i915_gem_userptr_set_active(obj, false);
755 
756 	if (obj->madv != I915_MADV_WILLNEED)
757 		obj->dirty = 0;
758 
759 	i915_gem_gtt_finish_object(obj);
760 
761 	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
762 		struct page *page = sg_page_iter_page(&sg_iter);
763 
764 		if (obj->dirty)
765 			set_page_dirty(page);
766 
767 		mark_page_accessed(page);
768 		page_cache_release(page);
769 	}
770 	obj->dirty = 0;
771 
772 	sg_free_table(obj->pages);
773 	kfree(obj->pages);
774 }
775 
776 static void
777 i915_gem_userptr_release(struct drm_i915_gem_object *obj)
778 {
779 	i915_gem_userptr_release__mmu_notifier(obj);
780 	i915_gem_userptr_release__mm_struct(obj);
781 }
782 
783 static int
784 i915_gem_userptr_dmabuf_export(struct drm_i915_gem_object *obj)
785 {
786 	if (obj->userptr.mmu_object)
787 		return 0;
788 
789 	return i915_gem_userptr_init__mmu_notifier(obj, 0);
790 }
791 
792 static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = {
793 	.dmabuf_export = i915_gem_userptr_dmabuf_export,
794 	.get_pages = i915_gem_userptr_get_pages,
795 	.put_pages = i915_gem_userptr_put_pages,
796 	.release = i915_gem_userptr_release,
797 };
798 
799 /**
800  * Creates a new mm object that wraps some normal memory from the process
801  * context - user memory.
802  *
803  * We impose several restrictions upon the memory being mapped
804  * into the GPU.
805  * 1. It must be page aligned (both start/end addresses, i.e ptr and size).
806  * 2. It must be normal system memory, not a pointer into another map of IO
807  *    space (e.g. it must not be a GTT mmapping of another object).
808  * 3. We only allow a bo as large as we could in theory map into the GTT,
809  *    that is we limit the size to the total size of the GTT.
810  * 4. The bo is marked as being snoopable. The backing pages are left
811  *    accessible directly by the CPU, but reads and writes by the GPU may
812  *    incur the cost of a snoop (unless you have an LLC architecture).
813  *
814  * Synchronisation between multiple users and the GPU is left to userspace
815  * through the normal set-domain-ioctl. The kernel will enforce that the
816  * GPU relinquishes the VMA before it is returned back to the system
817  * i.e. upon free(), munmap() or process termination. However, the userspace
818  * malloc() library may not immediately relinquish the VMA after free() and
819  * instead reuse it whilst the GPU is still reading and writing to the VMA.
820  * Caveat emptor.
821  *
822  * Also note, that the object created here is not currently a "first class"
823  * object, in that several ioctls are banned. These are the CPU access
824  * ioctls: mmap(), pwrite and pread. In practice, you are expected to use
825  * direct access via your pointer rather than use those ioctls. Another
826  * restriction is that we do not allow userptr surfaces to be pinned to the
827  * hardware and so we reject any attempt to create a framebuffer out of a
828  * userptr.
829  *
830  * If you think this is a good interface to use to pass GPU memory between
831  * drivers, please use dma-buf instead. In fact, wherever possible use
832  * dma-buf instead.
833  */
834 int
835 i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
836 {
837 	struct drm_i915_gem_userptr *args = data;
838 	struct drm_i915_gem_object *obj;
839 	int ret;
840 	u32 handle;
841 
842 	if (args->flags & ~(I915_USERPTR_READ_ONLY |
843 			    I915_USERPTR_UNSYNCHRONIZED))
844 		return -EINVAL;
845 
846 	if (offset_in_page(args->user_ptr | args->user_size))
847 		return -EINVAL;
848 
849 	if (!access_ok(args->flags & I915_USERPTR_READ_ONLY ? VERIFY_READ : VERIFY_WRITE,
850 		       (char __user *)(unsigned long)args->user_ptr, args->user_size))
851 		return -EFAULT;
852 
853 	if (args->flags & I915_USERPTR_READ_ONLY) {
854 		/* On almost all of the current hw, we cannot tell the GPU that a
855 		 * page is readonly, so this is just a placeholder in the uAPI.
856 		 */
857 		return -ENODEV;
858 	}
859 
860 	obj = i915_gem_object_alloc(dev);
861 	if (obj == NULL)
862 		return -ENOMEM;
863 
864 	drm_gem_private_object_init(dev, &obj->base, args->user_size);
865 	i915_gem_object_init(obj, &i915_gem_userptr_ops);
866 	obj->cache_level = I915_CACHE_LLC;
867 	obj->base.write_domain = I915_GEM_DOMAIN_CPU;
868 	obj->base.read_domains = I915_GEM_DOMAIN_CPU;
869 
870 	obj->userptr.ptr = args->user_ptr;
871 	obj->userptr.read_only = !!(args->flags & I915_USERPTR_READ_ONLY);
872 
873 	/* And keep a pointer to the current->mm for resolving the user pages
874 	 * at binding. This means that we need to hook into the mmu_notifier
875 	 * in order to detect if the mmu is destroyed.
876 	 */
877 	ret = i915_gem_userptr_init__mm_struct(obj);
878 	if (ret == 0)
879 		ret = i915_gem_userptr_init__mmu_notifier(obj, args->flags);
880 	if (ret == 0)
881 		ret = drm_gem_handle_create(file, &obj->base, &handle);
882 
883 	/* drop reference from allocate - handle holds it now */
884 	drm_gem_object_unreference_unlocked(&obj->base);
885 	if (ret)
886 		return ret;
887 
888 	args->handle = handle;
889 	return 0;
890 }
891 #endif
892 
893 int
894 i915_gem_init_userptr(struct drm_device *dev)
895 {
896 	struct drm_i915_private *dev_priv = to_i915(dev);
897 	lockinit(&dev_priv->mm_lock, "i915dmm", 0, LK_CANRECURSE);
898 #if 0
899 	hash_init(dev_priv->mm_structs);
900 #endif
901 	return 0;
902 }
903