xref: /dragonfly/sys/dev/drm/i915/i915_gem_userptr.c (revision 279dd846)
1 /*
2  * Copyright © 2012-2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 
25 #include <drm/drmP.h>
26 #include <drm/i915_drm.h>
27 #include "i915_drv.h"
28 #include "i915_trace.h"
29 #include "intel_drv.h"
30 
31 #if defined(CONFIG_MMU_NOTIFIER)
32 #include <linux/interval_tree.h>
33 
34 struct i915_mmu_notifier {
35 	spinlock_t lock;
36 	struct hlist_node node;
37 	struct mmu_notifier mn;
38 	struct rb_root objects;
39 	struct list_head linear;
40 	struct drm_device *dev;
41 	struct mm_struct *mm;
42 	struct work_struct work;
43 	unsigned long count;
44 	unsigned long serial;
45 	bool has_linear;
46 };
47 
48 struct i915_mmu_object {
49 	struct i915_mmu_notifier *mmu;
50 	struct interval_tree_node it;
51 	struct list_head link;
52 	struct drm_i915_gem_object *obj;
53 	bool is_linear;
54 };
55 
56 static unsigned long cancel_userptr(struct drm_i915_gem_object *obj)
57 {
58 	struct drm_device *dev = obj->base.dev;
59 	unsigned long end;
60 
61 	mutex_lock(&dev->struct_mutex);
62 	/* Cancel any active worker and force us to re-evaluate gup */
63 	obj->userptr.work = NULL;
64 
65 	if (obj->pages != NULL) {
66 		struct drm_i915_private *dev_priv = to_i915(dev);
67 		struct i915_vma *vma, *tmp;
68 		bool was_interruptible;
69 
70 		was_interruptible = dev_priv->mm.interruptible;
71 		dev_priv->mm.interruptible = false;
72 
73 		list_for_each_entry_safe(vma, tmp, &obj->vma_list, vma_link) {
74 			int ret = i915_vma_unbind(vma);
75 			WARN_ON(ret && ret != -EIO);
76 		}
77 		WARN_ON(i915_gem_object_put_pages(obj));
78 
79 		dev_priv->mm.interruptible = was_interruptible;
80 	}
81 
82 	end = obj->userptr.ptr + obj->base.size;
83 
84 	drm_gem_object_unreference(&obj->base);
85 	mutex_unlock(&dev->struct_mutex);
86 
87 	return end;
88 }
89 
90 static void *invalidate_range__linear(struct i915_mmu_notifier *mn,
91 				      struct mm_struct *mm,
92 				      unsigned long start,
93 				      unsigned long end)
94 {
95 	struct i915_mmu_object *mmu;
96 	unsigned long serial;
97 
98 restart:
99 	serial = mn->serial;
100 	list_for_each_entry(mmu, &mn->linear, link) {
101 		struct drm_i915_gem_object *obj;
102 
103 		if (mmu->it.last < start || mmu->it.start > end)
104 			continue;
105 
106 		obj = mmu->obj;
107 		drm_gem_object_reference(&obj->base);
108 		spin_unlock(&mn->lock);
109 
110 		cancel_userptr(obj);
111 
112 		spin_lock(&mn->lock);
113 		if (serial != mn->serial)
114 			goto restart;
115 	}
116 
117 	return NULL;
118 }
119 
120 static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
121 						       struct mm_struct *mm,
122 						       unsigned long start,
123 						       unsigned long end)
124 {
125 	struct i915_mmu_notifier *mn = container_of(_mn, struct i915_mmu_notifier, mn);
126 	struct interval_tree_node *it = NULL;
127 	unsigned long next = start;
128 	unsigned long serial = 0;
129 
130 	end--; /* interval ranges are inclusive, but invalidate range is exclusive */
131 	while (next < end) {
132 		struct drm_i915_gem_object *obj = NULL;
133 
134 		spin_lock(&mn->lock);
135 		if (mn->has_linear)
136 			it = invalidate_range__linear(mn, mm, start, end);
137 		else if (serial == mn->serial)
138 			it = interval_tree_iter_next(it, next, end);
139 		else
140 			it = interval_tree_iter_first(&mn->objects, start, end);
141 		if (it != NULL) {
142 			obj = container_of(it, struct i915_mmu_object, it)->obj;
143 			drm_gem_object_reference(&obj->base);
144 			serial = mn->serial;
145 		}
146 		spin_unlock(&mn->lock);
147 		if (obj == NULL)
148 			return;
149 
150 		next = cancel_userptr(obj);
151 	}
152 }
153 
154 static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
155 	.invalidate_range_start = i915_gem_userptr_mn_invalidate_range_start,
156 };
157 
158 static struct i915_mmu_notifier *
159 __i915_mmu_notifier_lookup(struct drm_device *dev, struct mm_struct *mm)
160 {
161 	struct drm_i915_private *dev_priv = to_i915(dev);
162 	struct i915_mmu_notifier *mmu;
163 
164 	/* Protected by dev->struct_mutex */
165 	hash_for_each_possible(dev_priv->mmu_notifiers, mmu, node, (unsigned long)mm)
166 		if (mmu->mm == mm)
167 			return mmu;
168 
169 	return NULL;
170 }
171 
172 static struct i915_mmu_notifier *
173 i915_mmu_notifier_get(struct drm_device *dev, struct mm_struct *mm)
174 {
175 	struct drm_i915_private *dev_priv = to_i915(dev);
176 	struct i915_mmu_notifier *mmu;
177 	int ret;
178 
179 	lockdep_assert_held(&dev->struct_mutex);
180 
181 	mmu = __i915_mmu_notifier_lookup(dev, mm);
182 	if (mmu)
183 		return mmu;
184 
185 	mmu = kmalloc(sizeof(*mmu), GFP_KERNEL);
186 	if (mmu == NULL)
187 		return ERR_PTR(-ENOMEM);
188 
189 	spin_lock_init(&mmu->lock);
190 	mmu->dev = dev;
191 	mmu->mn.ops = &i915_gem_userptr_notifier;
192 	mmu->mm = mm;
193 	mmu->objects = RB_ROOT;
194 	mmu->count = 0;
195 	mmu->serial = 1;
196 	INIT_LIST_HEAD(&mmu->linear);
197 	mmu->has_linear = false;
198 
199 	/* Protected by mmap_sem (write-lock) */
200 	ret = __mmu_notifier_register(&mmu->mn, mm);
201 	if (ret) {
202 		kfree(mmu);
203 		return ERR_PTR(ret);
204 	}
205 
206 	/* Protected by dev->struct_mutex */
207 	hash_add(dev_priv->mmu_notifiers, &mmu->node, (unsigned long)mm);
208 	return mmu;
209 }
210 
211 static void
212 __i915_mmu_notifier_destroy_worker(struct work_struct *work)
213 {
214 	struct i915_mmu_notifier *mmu = container_of(work, typeof(*mmu), work);
215 	mmu_notifier_unregister(&mmu->mn, mmu->mm);
216 	kfree(mmu);
217 }
218 
219 static void
220 __i915_mmu_notifier_destroy(struct i915_mmu_notifier *mmu)
221 {
222 	lockdep_assert_held(&mmu->dev->struct_mutex);
223 
224 	/* Protected by dev->struct_mutex */
225 	hash_del(&mmu->node);
226 
227 	/* Our lock ordering is: mmap_sem, mmu_notifier_scru, struct_mutex.
228 	 * We enter the function holding struct_mutex, therefore we need
229 	 * to drop our mutex prior to calling mmu_notifier_unregister in
230 	 * order to prevent lock inversion (and system-wide deadlock)
231 	 * between the mmap_sem and struct-mutex. Hence we defer the
232 	 * unregistration to a workqueue where we hold no locks.
233 	 */
234 	INIT_WORK(&mmu->work, __i915_mmu_notifier_destroy_worker);
235 	schedule_work(&mmu->work);
236 }
237 
238 static void __i915_mmu_notifier_update_serial(struct i915_mmu_notifier *mmu)
239 {
240 	if (++mmu->serial == 0)
241 		mmu->serial = 1;
242 }
243 
244 static bool i915_mmu_notifier_has_linear(struct i915_mmu_notifier *mmu)
245 {
246 	struct i915_mmu_object *mn;
247 
248 	list_for_each_entry(mn, &mmu->linear, link)
249 		if (mn->is_linear)
250 			return true;
251 
252 	return false;
253 }
254 
255 static void
256 i915_mmu_notifier_del(struct i915_mmu_notifier *mmu,
257 		      struct i915_mmu_object *mn)
258 {
259 	lockdep_assert_held(&mmu->dev->struct_mutex);
260 
261 	spin_lock(&mmu->lock);
262 	list_del(&mn->link);
263 	if (mn->is_linear)
264 		mmu->has_linear = i915_mmu_notifier_has_linear(mmu);
265 	else
266 		interval_tree_remove(&mn->it, &mmu->objects);
267 	__i915_mmu_notifier_update_serial(mmu);
268 	spin_unlock(&mmu->lock);
269 
270 	/* Protected against _add() by dev->struct_mutex */
271 	if (--mmu->count == 0)
272 		__i915_mmu_notifier_destroy(mmu);
273 }
274 
275 static int
276 i915_mmu_notifier_add(struct i915_mmu_notifier *mmu,
277 		      struct i915_mmu_object *mn)
278 {
279 	struct interval_tree_node *it;
280 	int ret;
281 
282 	ret = i915_mutex_lock_interruptible(mmu->dev);
283 	if (ret)
284 		return ret;
285 
286 	/* Make sure we drop the final active reference (and thereby
287 	 * remove the objects from the interval tree) before we do
288 	 * the check for overlapping objects.
289 	 */
290 	i915_gem_retire_requests(mmu->dev);
291 
292 	spin_lock(&mmu->lock);
293 	it = interval_tree_iter_first(&mmu->objects,
294 				      mn->it.start, mn->it.last);
295 	if (it) {
296 		struct drm_i915_gem_object *obj;
297 
298 		/* We only need to check the first object in the range as it
299 		 * either has cancelled gup work queued and we need to
300 		 * return back to the user to give time for the gup-workers
301 		 * to flush their object references upon which the object will
302 		 * be removed from the interval-tree, or the the range is
303 		 * still in use by another client and the overlap is invalid.
304 		 *
305 		 * If we do have an overlap, we cannot use the interval tree
306 		 * for fast range invalidation.
307 		 */
308 
309 		obj = container_of(it, struct i915_mmu_object, it)->obj;
310 		if (!obj->userptr.workers)
311 			mmu->has_linear = mn->is_linear = true;
312 		else
313 			ret = -EAGAIN;
314 	} else
315 		interval_tree_insert(&mn->it, &mmu->objects);
316 
317 	if (ret == 0) {
318 		list_add(&mn->link, &mmu->linear);
319 		__i915_mmu_notifier_update_serial(mmu);
320 	}
321 	spin_unlock(&mmu->lock);
322 	mutex_unlock(&mmu->dev->struct_mutex);
323 
324 	return ret;
325 }
326 
327 static void
328 i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
329 {
330 	struct i915_mmu_object *mn;
331 
332 	mn = obj->userptr.mn;
333 	if (mn == NULL)
334 		return;
335 
336 	i915_mmu_notifier_del(mn->mmu, mn);
337 	obj->userptr.mn = NULL;
338 }
339 
340 static int
341 i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
342 				    unsigned flags)
343 {
344 	struct i915_mmu_notifier *mmu;
345 	struct i915_mmu_object *mn;
346 	int ret;
347 
348 	if (flags & I915_USERPTR_UNSYNCHRONIZED)
349 		return capable(CAP_SYS_ADMIN) ? 0 : -EPERM;
350 
351 	down_write(&obj->userptr.mm->mmap_sem);
352 	ret = i915_mutex_lock_interruptible(obj->base.dev);
353 	if (ret == 0) {
354 		mmu = i915_mmu_notifier_get(obj->base.dev, obj->userptr.mm);
355 		if (!IS_ERR(mmu))
356 			mmu->count++; /* preemptive add to act as a refcount */
357 		else
358 			ret = PTR_ERR(mmu);
359 		mutex_unlock(&obj->base.dev->struct_mutex);
360 	}
361 	up_write(&obj->userptr.mm->mmap_sem);
362 	if (ret)
363 		return ret;
364 
365 	mn = kzalloc(sizeof(*mn), GFP_KERNEL);
366 	if (mn == NULL) {
367 		ret = -ENOMEM;
368 		goto destroy_mmu;
369 	}
370 
371 	mn->mmu = mmu;
372 	mn->it.start = obj->userptr.ptr;
373 	mn->it.last = mn->it.start + obj->base.size - 1;
374 	mn->obj = obj;
375 
376 	ret = i915_mmu_notifier_add(mmu, mn);
377 	if (ret)
378 		goto free_mn;
379 
380 	obj->userptr.mn = mn;
381 	return 0;
382 
383 free_mn:
384 	kfree(mn);
385 destroy_mmu:
386 	mutex_lock(&obj->base.dev->struct_mutex);
387 	if (--mmu->count == 0)
388 		__i915_mmu_notifier_destroy(mmu);
389 	mutex_unlock(&obj->base.dev->struct_mutex);
390 	return ret;
391 }
392 
393 #else
394 
395 #if 0
396 static void
397 i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
398 {
399 }
400 
401 static int
402 i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
403 				    unsigned flags)
404 {
405 	if ((flags & I915_USERPTR_UNSYNCHRONIZED) == 0)
406 		return -ENODEV;
407 
408 
409 	return 0;
410 }
411 #endif
412 #endif
413 
414 struct get_pages_work {
415 	struct work_struct work;
416 	struct drm_i915_gem_object *obj;
417 	struct task_struct *task;
418 };
419 
420 
421 #if IS_ENABLED(CONFIG_SWIOTLB)
422 #define swiotlb_active() swiotlb_nr_tbl()
423 #else
424 #define swiotlb_active() 0
425 #endif
426 
427 #if 0
428 static int
429 st_set_pages(struct sg_table **st, struct vm_page **pvec, int num_pages)
430 {
431 	struct scatterlist *sg;
432 	int ret, n;
433 
434 	*st = kmalloc(sizeof(**st), M_DRM, M_WAITOK);
435 	if (*st == NULL)
436 		return -ENOMEM;
437 
438 	if (swiotlb_active()) {
439 		ret = sg_alloc_table(*st, num_pages, GFP_KERNEL);
440 		if (ret)
441 			goto err;
442 
443 		for_each_sg((*st)->sgl, sg, num_pages, n)
444 			sg_set_page(sg, pvec[n], PAGE_SIZE, 0);
445 	} else {
446 		ret = sg_alloc_table_from_pages(*st, pvec, num_pages,
447 						0, num_pages << PAGE_SHIFT,
448 						GFP_KERNEL);
449 		if (ret)
450 			goto err;
451 	}
452 
453 	return 0;
454 
455 err:
456 	kfree(*st);
457 	*st = NULL;
458 	return ret;
459 }
460 
461 static void
462 __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
463 {
464 	struct get_pages_work *work = container_of(_work, typeof(*work), work);
465 	struct drm_i915_gem_object *obj = work->obj;
466 	struct drm_device *dev = obj->base.dev;
467 	const int num_pages = obj->base.size >> PAGE_SHIFT;
468 	struct page **pvec;
469 	int pinned, ret;
470 
471 	ret = -ENOMEM;
472 	pinned = 0;
473 
474 	pvec = kmalloc(num_pages*sizeof(struct page *),
475 		       GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
476 	if (pvec == NULL)
477 		pvec = drm_malloc_ab(num_pages, sizeof(struct page *));
478 	if (pvec != NULL) {
479 		struct mm_struct *mm = obj->userptr.mm;
480 
481 		down_read(&mm->mmap_sem);
482 		while (pinned < num_pages) {
483 			ret = get_user_pages(work->task, mm,
484 					     obj->userptr.ptr + pinned * PAGE_SIZE,
485 					     num_pages - pinned,
486 					     !obj->userptr.read_only, 0,
487 					     pvec + pinned, NULL);
488 			if (ret < 0)
489 				break;
490 
491 			pinned += ret;
492 		}
493 		up_read(&mm->mmap_sem);
494 	}
495 
496 	mutex_lock(&dev->struct_mutex);
497 	if (obj->userptr.work != &work->work) {
498 		ret = 0;
499 	} else if (pinned == num_pages) {
500 		ret = st_set_pages(&obj->pages, pvec, num_pages);
501 		if (ret == 0) {
502 			list_add_tail(&obj->global_list, &to_i915(dev)->mm.unbound_list);
503 			pinned = 0;
504 		}
505 	}
506 
507 	obj->userptr.work = ERR_PTR(ret);
508 	obj->userptr.workers--;
509 	drm_gem_object_unreference(&obj->base);
510 	mutex_unlock(&dev->struct_mutex);
511 
512 	release_pages(pvec, pinned, 0);
513 	drm_free_large(pvec);
514 
515 	put_task_struct(work->task);
516 	kfree(work);
517 }
518 
519 static int
520 i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
521 {
522 	const int num_pages = obj->base.size >> PAGE_SHIFT;
523 	struct page **pvec;
524 	int pinned, ret;
525 
526 	/* If userspace should engineer that these pages are replaced in
527 	 * the vma between us binding this page into the GTT and completion
528 	 * of rendering... Their loss. If they change the mapping of their
529 	 * pages they need to create a new bo to point to the new vma.
530 	 *
531 	 * However, that still leaves open the possibility of the vma
532 	 * being copied upon fork. Which falls under the same userspace
533 	 * synchronisation issue as a regular bo, except that this time
534 	 * the process may not be expecting that a particular piece of
535 	 * memory is tied to the GPU.
536 	 *
537 	 * Fortunately, we can hook into the mmu_notifier in order to
538 	 * discard the page references prior to anything nasty happening
539 	 * to the vma (discard or cloning) which should prevent the more
540 	 * egregious cases from causing harm.
541 	 */
542 
543 	pvec = NULL;
544 	pinned = 0;
545 	if (obj->userptr.mm == current->mm) {
546 		pvec = kmalloc(num_pages*sizeof(struct page *),
547 			       GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
548 		if (pvec == NULL) {
549 			pvec = drm_malloc_ab(num_pages, sizeof(struct page *));
550 			if (pvec == NULL)
551 				return -ENOMEM;
552 		}
553 
554 		pinned = __get_user_pages_fast(obj->userptr.ptr, num_pages,
555 					       !obj->userptr.read_only, pvec);
556 	}
557 	if (pinned < num_pages) {
558 		if (pinned < 0) {
559 			ret = pinned;
560 			pinned = 0;
561 		} else {
562 			/* Spawn a worker so that we can acquire the
563 			 * user pages without holding our mutex. Access
564 			 * to the user pages requires mmap_sem, and we have
565 			 * a strict lock ordering of mmap_sem, struct_mutex -
566 			 * we already hold struct_mutex here and so cannot
567 			 * call gup without encountering a lock inversion.
568 			 *
569 			 * Userspace will keep on repeating the operation
570 			 * (thanks to EAGAIN) until either we hit the fast
571 			 * path or the worker completes. If the worker is
572 			 * cancelled or superseded, the task is still run
573 			 * but the results ignored. (This leads to
574 			 * complications that we may have a stray object
575 			 * refcount that we need to be wary of when
576 			 * checking for existing objects during creation.)
577 			 * If the worker encounters an error, it reports
578 			 * that error back to this function through
579 			 * obj->userptr.work = ERR_PTR.
580 			 */
581 			ret = -EAGAIN;
582 			if (obj->userptr.work == NULL &&
583 			    obj->userptr.workers < I915_GEM_USERPTR_MAX_WORKERS) {
584 				struct get_pages_work *work;
585 
586 				work = kmalloc(sizeof(*work), GFP_KERNEL);
587 				if (work != NULL) {
588 					obj->userptr.work = &work->work;
589 					obj->userptr.workers++;
590 
591 					work->obj = obj;
592 					drm_gem_object_reference(&obj->base);
593 
594 					work->task = current;
595 					get_task_struct(work->task);
596 
597 					INIT_WORK(&work->work, __i915_gem_userptr_get_pages_worker);
598 					schedule_work(&work->work);
599 				} else
600 					ret = -ENOMEM;
601 			} else {
602 				if (IS_ERR(obj->userptr.work)) {
603 					ret = PTR_ERR(obj->userptr.work);
604 					obj->userptr.work = NULL;
605 				}
606 			}
607 		}
608 	} else {
609 		ret = st_set_pages(&obj->pages, pvec, num_pages);
610 		if (ret == 0) {
611 			obj->userptr.work = NULL;
612 			pinned = 0;
613 		}
614 	}
615 
616 	release_pages(pvec, pinned, 0);
617 	drm_free_large(pvec);
618 	return ret;
619 }
620 
621 static void
622 i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj)
623 {
624 	struct scatterlist *sg;
625 	int i;
626 
627 	BUG_ON(obj->userptr.work != NULL);
628 
629 	if (obj->madv != I915_MADV_WILLNEED)
630 		obj->dirty = 0;
631 
632 	for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
633 		struct page *page = sg_page(sg);
634 
635 		if (obj->dirty)
636 			set_page_dirty(page);
637 
638 		mark_page_accessed(page);
639 		page_cache_release(page);
640 	}
641 	obj->dirty = 0;
642 
643 	sg_free_table(obj->pages);
644 	kfree(obj->pages);
645 }
646 
647 static void
648 i915_gem_userptr_release(struct drm_i915_gem_object *obj)
649 {
650 	i915_gem_userptr_release__mmu_notifier(obj);
651 
652 	if (obj->userptr.mm) {
653 		mmput(obj->userptr.mm);
654 		obj->userptr.mm = NULL;
655 	}
656 }
657 
658 static int
659 i915_gem_userptr_dmabuf_export(struct drm_i915_gem_object *obj)
660 {
661 	if (obj->userptr.mn)
662 		return 0;
663 
664 	return i915_gem_userptr_init__mmu_notifier(obj, 0);
665 }
666 
667 static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = {
668 	.dmabuf_export = i915_gem_userptr_dmabuf_export,
669 	.get_pages = i915_gem_userptr_get_pages,
670 	.put_pages = i915_gem_userptr_put_pages,
671 	.release = i915_gem_userptr_release,
672 };
673 
674 /**
675  * Creates a new mm object that wraps some normal memory from the process
676  * context - user memory.
677  *
678  * We impose several restrictions upon the memory being mapped
679  * into the GPU.
680  * 1. It must be page aligned (both start/end addresses, i.e ptr and size).
681  * 2. It must be normal system memory, not a pointer into another map of IO
682  *    space (e.g. it must not be a GTT mmapping of another object).
683  * 3. We only allow a bo as large as we could in theory map into the GTT,
684  *    that is we limit the size to the total size of the GTT.
685  * 4. The bo is marked as being snoopable. The backing pages are left
686  *    accessible directly by the CPU, but reads and writes by the GPU may
687  *    incur the cost of a snoop (unless you have an LLC architecture).
688  *
689  * Synchronisation between multiple users and the GPU is left to userspace
690  * through the normal set-domain-ioctl. The kernel will enforce that the
691  * GPU relinquishes the VMA before it is returned back to the system
692  * i.e. upon free(), munmap() or process termination. However, the userspace
693  * malloc() library may not immediately relinquish the VMA after free() and
694  * instead reuse it whilst the GPU is still reading and writing to the VMA.
695  * Caveat emptor.
696  *
697  * Also note, that the object created here is not currently a "first class"
698  * object, in that several ioctls are banned. These are the CPU access
699  * ioctls: mmap(), pwrite and pread. In practice, you are expected to use
700  * direct access via your pointer rather than use those ioctls.
701  *
702  * If you think this is a good interface to use to pass GPU memory between
703  * drivers, please use dma-buf instead. In fact, wherever possible use
704  * dma-buf instead.
705  */
706 int
707 i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
708 {
709 	struct drm_i915_private *dev_priv = dev->dev_private;
710 	struct drm_i915_gem_userptr *args = data;
711 	struct drm_i915_gem_object *obj;
712 	int ret;
713 	u32 handle;
714 
715 	if (args->flags & ~(I915_USERPTR_READ_ONLY |
716 			    I915_USERPTR_UNSYNCHRONIZED))
717 		return -EINVAL;
718 
719 	if (offset_in_page(args->user_ptr | args->user_size))
720 		return -EINVAL;
721 
722 	if (args->user_size > dev_priv->gtt.base.total)
723 		return -E2BIG;
724 
725 	if (!access_ok(args->flags & I915_USERPTR_READ_ONLY ? VERIFY_READ : VERIFY_WRITE,
726 		       (char __user *)(unsigned long)args->user_ptr, args->user_size))
727 		return -EFAULT;
728 
729 	if (args->flags & I915_USERPTR_READ_ONLY) {
730 		/* On almost all of the current hw, we cannot tell the GPU that a
731 		 * page is readonly, so this is just a placeholder in the uAPI.
732 		 */
733 		return -ENODEV;
734 	}
735 
736 	/* Allocate the new object */
737 	obj = i915_gem_object_alloc(dev);
738 	if (obj == NULL)
739 		return -ENOMEM;
740 
741 	drm_gem_private_object_init(dev, &obj->base, args->user_size);
742 	i915_gem_object_init(obj, &i915_gem_userptr_ops);
743 	obj->cache_level = I915_CACHE_LLC;
744 	obj->base.write_domain = I915_GEM_DOMAIN_CPU;
745 	obj->base.read_domains = I915_GEM_DOMAIN_CPU;
746 
747 	obj->userptr.ptr = args->user_ptr;
748 	obj->userptr.read_only = !!(args->flags & I915_USERPTR_READ_ONLY);
749 
750 	/* And keep a pointer to the current->mm for resolving the user pages
751 	 * at binding. This means that we need to hook into the mmu_notifier
752 	 * in order to detect if the mmu is destroyed.
753 	 */
754 	ret = -ENOMEM;
755 	if ((obj->userptr.mm = get_task_mm(current)))
756 		ret = i915_gem_userptr_init__mmu_notifier(obj, args->flags);
757 	if (ret == 0)
758 		ret = drm_gem_handle_create(file, &obj->base, &handle);
759 
760 	/* drop reference from allocate - handle holds it now */
761 	drm_gem_object_unreference_unlocked(&obj->base);
762 	if (ret)
763 		return ret;
764 
765 	args->handle = handle;
766 	return 0;
767 }
768 #endif
769 
770 int
771 i915_gem_init_userptr(struct drm_device *dev)
772 {
773 #if defined(CONFIG_MMU_NOTIFIER)
774 	struct drm_i915_private *dev_priv = to_i915(dev);
775 	hash_init(dev_priv->mmu_notifiers);
776 #endif
777 	return 0;
778 }
779