xref: /dragonfly/sys/dev/drm/i915/i915_gem.c (revision 297046af)
1 /*
2  * Copyright © 2008-2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *
26  */
27 
28 #include <drm/drmP.h>
29 #include <drm/drm_vma_manager.h>
30 #include <drm/i915_drm.h>
31 #include "i915_drv.h"
32 #include "i915_gem_dmabuf.h"
33 #include "i915_vgpu.h"
34 #include "i915_trace.h"
35 #include "intel_drv.h"
36 #include "intel_frontbuffer.h"
37 #include "intel_mocs.h"
38 #include <linux/reservation.h>
39 #include <linux/shmem_fs.h>
40 #include <linux/slab.h>
41 #include <linux/swap.h>
42 #include <linux/pci.h>
43 #include <linux/dma-buf.h>
44 
45 #include <sys/mman.h>
46 #include <vm/vm_map.h>
47 #include <vm/vm_param.h>
48 
49 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
50 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
51 
52 static bool cpu_cache_is_coherent(struct drm_device *dev,
53 				  enum i915_cache_level level)
54 {
55 	return HAS_LLC(dev) || level != I915_CACHE_NONE;
56 }
57 
58 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
59 {
60 	if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
61 		return false;
62 
63 	if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
64 		return true;
65 
66 	return obj->pin_display;
67 }
68 
69 static int
70 insert_mappable_node(struct drm_i915_private *i915,
71                      struct drm_mm_node *node, u32 size)
72 {
73 	memset(node, 0, sizeof(*node));
74 	return drm_mm_insert_node_in_range_generic(&i915->ggtt.base.mm, node,
75 						   size, 0, 0, 0,
76 						   i915->ggtt.mappable_end,
77 						   DRM_MM_SEARCH_DEFAULT,
78 						   DRM_MM_CREATE_DEFAULT);
79 }
80 
81 static void
82 remove_mappable_node(struct drm_mm_node *node)
83 {
84 	drm_mm_remove_node(node);
85 }
86 
87 /* some bookkeeping */
88 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
89 				  size_t size)
90 {
91 	lockmgr(&dev_priv->mm.object_stat_lock, LK_EXCLUSIVE);
92 	dev_priv->mm.object_count++;
93 	dev_priv->mm.object_memory += size;
94 	lockmgr(&dev_priv->mm.object_stat_lock, LK_RELEASE);
95 }
96 
97 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
98 				     size_t size)
99 {
100 	lockmgr(&dev_priv->mm.object_stat_lock, LK_EXCLUSIVE);
101 	dev_priv->mm.object_count--;
102 	dev_priv->mm.object_memory -= size;
103 	lockmgr(&dev_priv->mm.object_stat_lock, LK_RELEASE);
104 }
105 
106 static int
107 i915_gem_wait_for_error(struct i915_gpu_error *error)
108 {
109 	int ret;
110 
111 	if (!i915_reset_in_progress(error))
112 		return 0;
113 
114 	/*
115 	 * Only wait 10 seconds for the gpu reset to complete to avoid hanging
116 	 * userspace. If it takes that long something really bad is going on and
117 	 * we should simply try to bail out and fail as gracefully as possible.
118 	 */
119 	ret = wait_event_interruptible_timeout(error->reset_queue,
120 					       !i915_reset_in_progress(error),
121 					       10*HZ);
122 	if (ret == 0) {
123 		DRM_ERROR("Timed out waiting for the gpu reset to complete\n");
124 		return -EIO;
125 	} else if (ret < 0) {
126 		return ret;
127 	} else {
128 		return 0;
129 	}
130 }
131 
132 int i915_mutex_lock_interruptible(struct drm_device *dev)
133 {
134 	struct drm_i915_private *dev_priv = to_i915(dev);
135 	int ret;
136 
137 	ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
138 	if (ret)
139 		return ret;
140 
141 	ret = mutex_lock_interruptible(&dev->struct_mutex);
142 	if (ret)
143 		return ret;
144 
145 	return 0;
146 }
147 
148 int
149 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
150 			    struct drm_file *file)
151 {
152 	struct drm_i915_private *dev_priv = to_i915(dev);
153 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
154 	struct drm_i915_gem_get_aperture *args = data;
155 	struct i915_vma *vma;
156 	size_t pinned;
157 
158 	pinned = 0;
159 	mutex_lock(&dev->struct_mutex);
160 	list_for_each_entry(vma, &ggtt->base.active_list, vm_link)
161 		if (i915_vma_is_pinned(vma))
162 			pinned += vma->node.size;
163 	list_for_each_entry(vma, &ggtt->base.inactive_list, vm_link)
164 		if (i915_vma_is_pinned(vma))
165 			pinned += vma->node.size;
166 	mutex_unlock(&dev->struct_mutex);
167 
168 	args->aper_size = ggtt->base.total;
169 	args->aper_available_size = args->aper_size - pinned;
170 
171 	return 0;
172 }
173 
174 #if 0
175 static int
176 i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
177 {
178 	struct address_space *mapping = obj->base.filp->f_mapping;
179 	char *vaddr = obj->phys_handle->vaddr;
180 	struct sg_table *st;
181 	struct scatterlist *sg;
182 	int i;
183 
184 	if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
185 		return -EINVAL;
186 
187 	for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
188 		struct page *page;
189 		char *src;
190 
191 		page = shmem_read_mapping_page(mapping, i);
192 		if (IS_ERR(page))
193 			return PTR_ERR(page);
194 
195 		src = kmap_atomic(page);
196 		memcpy(vaddr, src, PAGE_SIZE);
197 		drm_clflush_virt_range(vaddr, PAGE_SIZE);
198 		kunmap_atomic(src);
199 
200 		put_page(page);
201 		vaddr += PAGE_SIZE;
202 	}
203 
204 	i915_gem_chipset_flush(to_i915(obj->base.dev));
205 
206 	st = kmalloc(sizeof(*st), GFP_KERNEL);
207 	if (st == NULL)
208 		return -ENOMEM;
209 
210 	if (sg_alloc_table(st, 1, GFP_KERNEL)) {
211 		kfree(st);
212 		return -ENOMEM;
213 	}
214 
215 	sg = st->sgl;
216 	sg->offset = 0;
217 	sg->length = obj->base.size;
218 
219 	sg_dma_address(sg) = obj->phys_handle->busaddr;
220 	sg_dma_len(sg) = obj->base.size;
221 
222 	obj->pages = st;
223 	return 0;
224 }
225 
226 static void
227 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj)
228 {
229 	int ret;
230 
231 	BUG_ON(obj->madv == __I915_MADV_PURGED);
232 
233 	ret = i915_gem_object_set_to_cpu_domain(obj, true);
234 	if (WARN_ON(ret)) {
235 		/* In the event of a disaster, abandon all caches and
236 		 * hope for the best.
237 		 */
238 		obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
239 	}
240 
241 	if (obj->madv == I915_MADV_DONTNEED)
242 		obj->dirty = 0;
243 
244 	if (obj->dirty) {
245 		struct address_space *mapping = obj->base.filp->f_mapping;
246 		char *vaddr = obj->phys_handle->vaddr;
247 		int i;
248 
249 		for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
250 			struct page *page;
251 			char *dst;
252 
253 			page = shmem_read_mapping_page(mapping, i);
254 			if (IS_ERR(page))
255 				continue;
256 
257 			dst = kmap_atomic(page);
258 			drm_clflush_virt_range(vaddr, PAGE_SIZE);
259 			memcpy(dst, vaddr, PAGE_SIZE);
260 			kunmap_atomic(dst);
261 
262 			set_page_dirty(page);
263 			if (obj->madv == I915_MADV_WILLNEED)
264 				mark_page_accessed(page);
265 			put_page(page);
266 			vaddr += PAGE_SIZE;
267 		}
268 		obj->dirty = 0;
269 	}
270 
271 	sg_free_table(obj->pages);
272 	kfree(obj->pages);
273 }
274 
275 static void
276 i915_gem_object_release_phys(struct drm_i915_gem_object *obj)
277 {
278 	drm_pci_free(obj->base.dev, obj->phys_handle);
279 }
280 #endif
281 
282 static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
283 #if 0
284 	.get_pages = i915_gem_object_get_pages_phys,
285 	.put_pages = i915_gem_object_put_pages_phys,
286 	.release = i915_gem_object_release_phys,
287 #endif
288 };
289 
290 int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
291 {
292 	struct i915_vma *vma;
293 	LINUX_LIST_HEAD(still_in_list);
294 	int ret;
295 
296 	lockdep_assert_held(&obj->base.dev->struct_mutex);
297 
298 	/* Closed vma are removed from the obj->vma_list - but they may
299 	 * still have an active binding on the object. To remove those we
300 	 * must wait for all rendering to complete to the object (as unbinding
301 	 * must anyway), and retire the requests.
302 	 */
303 	ret = i915_gem_object_wait_rendering(obj, false);
304 	if (ret)
305 		return ret;
306 
307 	i915_gem_retire_requests(to_i915(obj->base.dev));
308 
309 	while ((vma = list_first_entry_or_null(&obj->vma_list,
310 					       struct i915_vma,
311 					       obj_link))) {
312 		list_move_tail(&vma->obj_link, &still_in_list);
313 		ret = i915_vma_unbind(vma);
314 		if (ret)
315 			break;
316 	}
317 	list_splice(&still_in_list, &obj->vma_list);
318 
319 	return ret;
320 }
321 
322 /**
323  * Ensures that all rendering to the object has completed and the object is
324  * safe to unbind from the GTT or access from the CPU.
325  * @obj: i915 gem object
326  * @readonly: waiting for just read access or read-write access
327  */
328 int
329 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
330 			       bool readonly)
331 {
332 	struct reservation_object *resv;
333 	struct i915_gem_active *active;
334 	unsigned long active_mask;
335 	int idx;
336 
337 	lockdep_assert_held(&obj->base.dev->struct_mutex);
338 
339 	if (!readonly) {
340 		active = obj->last_read;
341 		active_mask = i915_gem_object_get_active(obj);
342 	} else {
343 		active_mask = 1;
344 		active = &obj->last_write;
345 	}
346 
347 	for_each_active(active_mask, idx) {
348 		int ret;
349 
350 		ret = i915_gem_active_wait(&active[idx],
351 					   &obj->base.dev->struct_mutex);
352 		if (ret)
353 			return ret;
354 	}
355 
356 	resv = i915_gem_object_get_dmabuf_resv(obj);
357 	if (resv) {
358 		long err;
359 
360 		err = reservation_object_wait_timeout_rcu(resv, !readonly, true,
361 							  MAX_SCHEDULE_TIMEOUT);
362 		if (err < 0)
363 			return err;
364 	}
365 
366 	return 0;
367 }
368 
369 /* A nonblocking variant of the above wait. Must be called prior to
370  * acquiring the mutex for the object, as the object state may change
371  * during this call. A reference must be held by the caller for the object.
372  */
373 static __must_check int
374 __unsafe_wait_rendering(struct drm_i915_gem_object *obj,
375 			struct intel_rps_client *rps,
376 			bool readonly)
377 {
378 	struct i915_gem_active *active;
379 	unsigned long active_mask;
380 	int idx;
381 
382 	active_mask = __I915_BO_ACTIVE(obj);
383 	if (!active_mask)
384 		return 0;
385 
386 	if (!readonly) {
387 		active = obj->last_read;
388 	} else {
389 		active_mask = 1;
390 		active = &obj->last_write;
391 	}
392 
393 	for_each_active(active_mask, idx) {
394 		int ret;
395 
396 		ret = i915_gem_active_wait_unlocked(&active[idx],
397 						    I915_WAIT_INTERRUPTIBLE,
398 						    NULL, rps);
399 		if (ret)
400 			return ret;
401 	}
402 
403 	return 0;
404 }
405 
406 static struct intel_rps_client *to_rps_client(struct drm_file *file)
407 {
408 	struct drm_i915_file_private *fpriv = file->driver_priv;
409 
410 	return &fpriv->rps;
411 }
412 
413 int
414 i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
415 			    int align)
416 {
417 	drm_dma_handle_t *phys;
418 	int ret;
419 
420 	if (obj->phys_handle) {
421 		if ((unsigned long)obj->phys_handle->vaddr & (align -1))
422 			return -EBUSY;
423 
424 		return 0;
425 	}
426 
427 	if (obj->madv != I915_MADV_WILLNEED)
428 		return -EFAULT;
429 
430 	if (obj->base.filp == NULL)
431 		return -EINVAL;
432 
433 	ret = i915_gem_object_unbind(obj);
434 	if (ret)
435 		return ret;
436 
437 	ret = i915_gem_object_put_pages(obj);
438 	if (ret)
439 		return ret;
440 
441 	/* create a new object */
442 	phys = drm_pci_alloc(obj->base.dev, obj->base.size, align);
443 	if (!phys)
444 		return -ENOMEM;
445 
446 	obj->phys_handle = phys;
447 	obj->ops = &i915_gem_phys_ops;
448 
449 	return i915_gem_object_get_pages(obj);
450 }
451 
452 static int
453 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
454 		     struct drm_i915_gem_pwrite *args,
455 		     struct drm_file *file_priv)
456 {
457 	struct drm_device *dev = obj->base.dev;
458 	void *vaddr = obj->phys_handle->vaddr + args->offset;
459 	char __user *user_data = u64_to_user_ptr(args->data_ptr);
460 	int ret = 0;
461 
462 	/* We manually control the domain here and pretend that it
463 	 * remains coherent i.e. in the GTT domain, like shmem_pwrite.
464 	 */
465 	ret = i915_gem_object_wait_rendering(obj, false);
466 	if (ret)
467 		return ret;
468 
469 	intel_fb_obj_invalidate(obj, ORIGIN_CPU);
470 	if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) {
471 		unsigned long unwritten;
472 
473 		/* The physical object once assigned is fixed for the lifetime
474 		 * of the obj, so we can safely drop the lock and continue
475 		 * to access vaddr.
476 		 */
477 		mutex_unlock(&dev->struct_mutex);
478 		unwritten = copy_from_user(vaddr, user_data, args->size);
479 		mutex_lock(&dev->struct_mutex);
480 		if (unwritten) {
481 			ret = -EFAULT;
482 			goto out;
483 		}
484 	}
485 
486 	drm_clflush_virt_range(vaddr, args->size);
487 	i915_gem_chipset_flush(to_i915(dev));
488 
489 out:
490 	intel_fb_obj_flush(obj, false, ORIGIN_CPU);
491 	return ret;
492 }
493 
494 void *i915_gem_object_alloc(struct drm_device *dev)
495 {
496 	return kzalloc(sizeof(struct drm_i915_gem_object), GFP_KERNEL);
497 }
498 
499 void i915_gem_object_free(struct drm_i915_gem_object *obj)
500 {
501 	kfree(obj);
502 }
503 
504 static int
505 i915_gem_create(struct drm_file *file,
506 		struct drm_device *dev,
507 		uint64_t size,
508 		uint32_t *handle_p)
509 {
510 	struct drm_i915_gem_object *obj;
511 	int ret;
512 	u32 handle;
513 
514 	size = roundup(size, PAGE_SIZE);
515 	if (size == 0)
516 		return -EINVAL;
517 
518 	/* Allocate the new object */
519 	obj = i915_gem_object_create(dev, size);
520 	if (IS_ERR(obj))
521 		return PTR_ERR(obj);
522 
523 	ret = drm_gem_handle_create(file, &obj->base, &handle);
524 	/* drop reference from allocate - handle holds it now */
525 	i915_gem_object_put_unlocked(obj);
526 	if (ret)
527 		return ret;
528 
529 	*handle_p = handle;
530 	return 0;
531 }
532 
533 int
534 i915_gem_dumb_create(struct drm_file *file,
535 		     struct drm_device *dev,
536 		     struct drm_mode_create_dumb *args)
537 {
538 	/* have to work out size/pitch and return them */
539 	args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64);
540 	args->size = args->pitch * args->height;
541 	return i915_gem_create(file, dev,
542 			       args->size, &args->handle);
543 }
544 
545 /**
546  * Creates a new mm object and returns a handle to it.
547  * @dev: drm device pointer
548  * @data: ioctl data blob
549  * @file: drm file pointer
550  */
551 int
552 i915_gem_create_ioctl(struct drm_device *dev, void *data,
553 		      struct drm_file *file)
554 {
555 	struct drm_i915_gem_create *args = data;
556 
557 	return i915_gem_create(file, dev,
558 			       args->size, &args->handle);
559 }
560 
561 static inline int
562 __copy_to_user_swizzled(char __user *cpu_vaddr,
563 			const char *gpu_vaddr, int gpu_offset,
564 			int length)
565 {
566 	int ret, cpu_offset = 0;
567 
568 	while (length > 0) {
569 		int cacheline_end = ALIGN(gpu_offset + 1, 64);
570 		int this_length = min(cacheline_end - gpu_offset, length);
571 		int swizzled_gpu_offset = gpu_offset ^ 64;
572 
573 		ret = __copy_to_user(cpu_vaddr + cpu_offset,
574 				     gpu_vaddr + swizzled_gpu_offset,
575 				     this_length);
576 		if (ret)
577 			return ret + length;
578 
579 		cpu_offset += this_length;
580 		gpu_offset += this_length;
581 		length -= this_length;
582 	}
583 
584 	return 0;
585 }
586 
587 static inline int
588 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
589 			  const char __user *cpu_vaddr,
590 			  int length)
591 {
592 	int ret, cpu_offset = 0;
593 
594 	while (length > 0) {
595 		int cacheline_end = ALIGN(gpu_offset + 1, 64);
596 		int this_length = min(cacheline_end - gpu_offset, length);
597 		int swizzled_gpu_offset = gpu_offset ^ 64;
598 
599 		ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset,
600 				       cpu_vaddr + cpu_offset,
601 				       this_length);
602 		if (ret)
603 			return ret + length;
604 
605 		cpu_offset += this_length;
606 		gpu_offset += this_length;
607 		length -= this_length;
608 	}
609 
610 	return 0;
611 }
612 
613 /*
614  * Pins the specified object's pages and synchronizes the object with
615  * GPU accesses. Sets needs_clflush to non-zero if the caller should
616  * flush the object from the CPU cache.
617  */
618 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
619 				    unsigned int *needs_clflush)
620 {
621 	int ret;
622 
623 	*needs_clflush = 0;
624 
625 	if (!i915_gem_object_has_struct_page(obj))
626 		return -ENODEV;
627 
628 	ret = i915_gem_object_wait_rendering(obj, true);
629 	if (ret)
630 		return ret;
631 
632 	ret = i915_gem_object_get_pages(obj);
633 	if (ret)
634 		return ret;
635 
636 	i915_gem_object_pin_pages(obj);
637 
638 	i915_gem_object_flush_gtt_write_domain(obj);
639 
640 	/* If we're not in the cpu read domain, set ourself into the gtt
641 	 * read domain and manually flush cachelines (if required). This
642 	 * optimizes for the case when the gpu will dirty the data
643 	 * anyway again before the next pread happens.
644 	 */
645 	if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU))
646 		*needs_clflush = !cpu_cache_is_coherent(obj->base.dev,
647 							obj->cache_level);
648 
649 	if (*needs_clflush && !static_cpu_has(X86_FEATURE_CLFLUSH)) {
650 		ret = i915_gem_object_set_to_cpu_domain(obj, false);
651 		if (ret)
652 			goto err_unpin;
653 
654 		*needs_clflush = 0;
655 	}
656 
657 	/* return with the pages pinned */
658 	return 0;
659 
660 err_unpin:
661 	i915_gem_object_unpin_pages(obj);
662 	return ret;
663 }
664 
665 int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
666 				     unsigned int *needs_clflush)
667 {
668 	int ret;
669 
670 	*needs_clflush = 0;
671 	if (!i915_gem_object_has_struct_page(obj))
672 		return -ENODEV;
673 
674 	ret = i915_gem_object_wait_rendering(obj, false);
675 	if (ret)
676 		return ret;
677 
678 	ret = i915_gem_object_get_pages(obj);
679 	if (ret)
680 		return ret;
681 
682 	i915_gem_object_pin_pages(obj);
683 
684 	i915_gem_object_flush_gtt_write_domain(obj);
685 
686 	/* If we're not in the cpu write domain, set ourself into the
687 	 * gtt write domain and manually flush cachelines (as required).
688 	 * This optimizes for the case when the gpu will use the data
689 	 * right away and we therefore have to clflush anyway.
690 	 */
691 	if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
692 		*needs_clflush |= cpu_write_needs_clflush(obj) << 1;
693 
694 	/* Same trick applies to invalidate partially written cachelines read
695 	 * before writing.
696 	 */
697 	if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU))
698 		*needs_clflush |= !cpu_cache_is_coherent(obj->base.dev,
699 							 obj->cache_level);
700 
701 	if (*needs_clflush && !static_cpu_has(X86_FEATURE_CLFLUSH)) {
702 		ret = i915_gem_object_set_to_cpu_domain(obj, true);
703 		if (ret)
704 			goto err_unpin;
705 
706 		*needs_clflush = 0;
707 	}
708 
709 	if ((*needs_clflush & CLFLUSH_AFTER) == 0)
710 		obj->cache_dirty = true;
711 
712 	intel_fb_obj_invalidate(obj, ORIGIN_CPU);
713 	obj->dirty = 1;
714 	/* return with the pages pinned */
715 	return 0;
716 
717 err_unpin:
718 	i915_gem_object_unpin_pages(obj);
719 	return ret;
720 }
721 
722 /* Per-page copy function for the shmem pread fastpath.
723  * Flushes invalid cachelines before reading the target if
724  * needs_clflush is set. */
725 static int
726 shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length,
727 		 char __user *user_data,
728 		 bool page_do_bit17_swizzling, bool needs_clflush)
729 {
730 	char *vaddr;
731 	int ret;
732 
733 	if (unlikely(page_do_bit17_swizzling))
734 		return -EINVAL;
735 
736 	vaddr = kmap_atomic(page);
737 	if (needs_clflush)
738 		drm_clflush_virt_range(vaddr + shmem_page_offset,
739 				       page_length);
740 	ret = __copy_to_user_inatomic(user_data,
741 				      vaddr + shmem_page_offset,
742 				      page_length);
743 	kunmap_atomic(vaddr);
744 
745 	return ret ? -EFAULT : 0;
746 }
747 
748 static void
749 shmem_clflush_swizzled_range(char *addr, unsigned long length,
750 			     bool swizzled)
751 {
752 	if (unlikely(swizzled)) {
753 		unsigned long start = (unsigned long) addr;
754 		unsigned long end = (unsigned long) addr + length;
755 
756 		/* For swizzling simply ensure that we always flush both
757 		 * channels. Lame, but simple and it works. Swizzled
758 		 * pwrite/pread is far from a hotpath - current userspace
759 		 * doesn't use it at all. */
760 		start = round_down(start, 128);
761 		end = round_up(end, 128);
762 
763 		drm_clflush_virt_range((void *)start, end - start);
764 	} else {
765 		drm_clflush_virt_range(addr, length);
766 	}
767 
768 }
769 
770 /* Only difference to the fast-path function is that this can handle bit17
771  * and uses non-atomic copy and kmap functions. */
772 static int
773 shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length,
774 		 char __user *user_data,
775 		 bool page_do_bit17_swizzling, bool needs_clflush)
776 {
777 	char *vaddr;
778 	int ret;
779 
780 	vaddr = kmap(page);
781 	if (needs_clflush)
782 		shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
783 					     page_length,
784 					     page_do_bit17_swizzling);
785 
786 	if (page_do_bit17_swizzling)
787 		ret = __copy_to_user_swizzled(user_data,
788 					      vaddr, shmem_page_offset,
789 					      page_length);
790 	else
791 		ret = __copy_to_user(user_data,
792 				     vaddr + shmem_page_offset,
793 				     page_length);
794 	kunmap(page);
795 
796 	return ret ? - EFAULT : 0;
797 }
798 
799 static inline unsigned long
800 slow_user_access(struct io_mapping *mapping,
801 		 uint64_t page_base, int page_offset,
802 		 char __user *user_data,
803 		 unsigned long length, bool pwrite)
804 {
805 	void __iomem *ioaddr;
806 	void *vaddr;
807 	uint64_t unwritten;
808 
809 	ioaddr = io_mapping_map_wc(mapping, page_base, PAGE_SIZE);
810 	/* We can use the cpu mem copy function because this is X86. */
811 	vaddr = (void __force *)ioaddr + page_offset;
812 	if (pwrite)
813 		unwritten = __copy_from_user(vaddr, user_data, length);
814 	else
815 		unwritten = __copy_to_user(user_data, vaddr, length);
816 
817 	io_mapping_unmap(ioaddr);
818 	return unwritten;
819 }
820 
821 static int
822 i915_gem_gtt_pread(struct drm_device *dev,
823 		   struct drm_i915_gem_object *obj, uint64_t size,
824 		   uint64_t data_offset, uint64_t data_ptr)
825 {
826 	struct drm_i915_private *dev_priv = to_i915(dev);
827 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
828 	struct i915_vma *vma;
829 	struct drm_mm_node node;
830 	char __user *user_data;
831 	uint64_t remain;
832 	uint64_t offset;
833 	int ret;
834 
835 	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE);
836 	if (!IS_ERR(vma)) {
837 		node.start = i915_ggtt_offset(vma);
838 		node.allocated = false;
839 		ret = i915_vma_put_fence(vma);
840 		if (ret) {
841 			i915_vma_unpin(vma);
842 			vma = ERR_PTR(ret);
843 		}
844 	}
845 	if (IS_ERR(vma)) {
846 		ret = insert_mappable_node(dev_priv, &node, PAGE_SIZE);
847 		if (ret)
848 			goto out;
849 
850 		ret = i915_gem_object_get_pages(obj);
851 		if (ret) {
852 			remove_mappable_node(&node);
853 			goto out;
854 		}
855 
856 		i915_gem_object_pin_pages(obj);
857 	}
858 
859 	ret = i915_gem_object_set_to_gtt_domain(obj, false);
860 	if (ret)
861 		goto out_unpin;
862 
863 	user_data = u64_to_user_ptr(data_ptr);
864 	remain = size;
865 	offset = data_offset;
866 
867 	mutex_unlock(&dev->struct_mutex);
868 	if (likely(!i915.prefault_disable)) {
869 		ret = fault_in_pages_writeable(user_data, remain);
870 		if (ret) {
871 			mutex_lock(&dev->struct_mutex);
872 			goto out_unpin;
873 		}
874 	}
875 
876 	while (remain > 0) {
877 		/* Operation in this page
878 		 *
879 		 * page_base = page offset within aperture
880 		 * page_offset = offset within page
881 		 * page_length = bytes to copy for this page
882 		 */
883 		u32 page_base = node.start;
884 		unsigned page_offset = offset_in_page(offset);
885 		unsigned page_length = PAGE_SIZE - page_offset;
886 		page_length = remain < page_length ? remain : page_length;
887 		if (node.allocated) {
888 			wmb();
889 			ggtt->base.insert_page(&ggtt->base,
890 					       i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
891 					       node.start,
892 					       I915_CACHE_NONE, 0);
893 			wmb();
894 		} else {
895 			page_base += offset & LINUX_PAGE_MASK;
896 		}
897 		/* This is a slow read/write as it tries to read from
898 		 * and write to user memory which may result into page
899 		 * faults, and so we cannot perform this under struct_mutex.
900 		 */
901 		if (slow_user_access(&ggtt->mappable, page_base,
902 				     page_offset, user_data,
903 				     page_length, false)) {
904 			ret = -EFAULT;
905 			break;
906 		}
907 
908 		remain -= page_length;
909 		user_data += page_length;
910 		offset += page_length;
911 	}
912 
913 	mutex_lock(&dev->struct_mutex);
914 	if (ret == 0 && (obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) {
915 		/* The user has modified the object whilst we tried
916 		 * reading from it, and we now have no idea what domain
917 		 * the pages should be in. As we have just been touching
918 		 * them directly, flush everything back to the GTT
919 		 * domain.
920 		 */
921 		ret = i915_gem_object_set_to_gtt_domain(obj, false);
922 	}
923 
924 out_unpin:
925 	if (node.allocated) {
926 		wmb();
927 		ggtt->base.clear_range(&ggtt->base,
928 				       node.start, node.size);
929 		i915_gem_object_unpin_pages(obj);
930 		remove_mappable_node(&node);
931 	} else {
932 		i915_vma_unpin(vma);
933 	}
934 out:
935 	return ret;
936 }
937 
938 static int
939 i915_gem_shmem_pread(struct drm_device *dev,
940 		     struct drm_i915_gem_object *obj,
941 		     struct drm_i915_gem_pread *args,
942 		     struct drm_file *file)
943 {
944 	char __user *user_data;
945 	ssize_t remain;
946 	loff_t offset;
947 	int shmem_page_offset, page_length, ret = 0;
948 	int obj_do_bit17_swizzling, page_do_bit17_swizzling;
949 	int prefaulted = 0;
950 	int needs_clflush = 0;
951 	struct sg_page_iter sg_iter;
952 
953 	ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
954 	if (ret)
955 		return ret;
956 
957 	obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
958 	user_data = u64_to_user_ptr(args->data_ptr);
959 	offset = args->offset;
960 	remain = args->size;
961 
962 	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
963 			 offset >> PAGE_SHIFT) {
964 		struct page *page = sg_page_iter_page(&sg_iter);
965 
966 		if (remain <= 0)
967 			break;
968 
969 		/* Operation in this page
970 		 *
971 		 * shmem_page_offset = offset within page in shmem file
972 		 * page_length = bytes to copy for this page
973 		 */
974 		shmem_page_offset = offset_in_page(offset);
975 		page_length = remain;
976 		if ((shmem_page_offset + page_length) > PAGE_SIZE)
977 			page_length = PAGE_SIZE - shmem_page_offset;
978 
979 		page_do_bit17_swizzling = obj_do_bit17_swizzling &&
980 			(page_to_phys(page) & (1 << 17)) != 0;
981 
982 		ret = shmem_pread_fast(page, shmem_page_offset, page_length,
983 				       user_data, page_do_bit17_swizzling,
984 				       needs_clflush);
985 		if (ret == 0)
986 			goto next_page;
987 
988 		mutex_unlock(&dev->struct_mutex);
989 
990 		if (likely(!i915.prefault_disable) && !prefaulted) {
991 			ret = fault_in_pages_writeable(user_data, remain);
992 			/* Userspace is tricking us, but we've already clobbered
993 			 * its pages with the prefault and promised to write the
994 			 * data up to the first fault. Hence ignore any errors
995 			 * and just continue. */
996 			(void)ret;
997 			prefaulted = 1;
998 		}
999 
1000 		ret = shmem_pread_slow(page, shmem_page_offset, page_length,
1001 				       user_data, page_do_bit17_swizzling,
1002 				       needs_clflush);
1003 
1004 		mutex_lock(&dev->struct_mutex);
1005 
1006 		if (ret)
1007 			goto out;
1008 
1009 next_page:
1010 		remain -= page_length;
1011 		user_data += page_length;
1012 		offset += page_length;
1013 	}
1014 
1015 out:
1016 	i915_gem_obj_finish_shmem_access(obj);
1017 
1018 	return ret;
1019 }
1020 
1021 /**
1022  * Reads data from the object referenced by handle.
1023  * @dev: drm device pointer
1024  * @data: ioctl data blob
1025  * @file: drm file pointer
1026  *
1027  * On error, the contents of *data are undefined.
1028  */
1029 int
1030 i915_gem_pread_ioctl(struct drm_device *dev, void *data,
1031 		     struct drm_file *file)
1032 {
1033 	struct drm_i915_gem_pread *args = data;
1034 	struct drm_i915_gem_object *obj;
1035 	int ret = 0;
1036 
1037 	if (args->size == 0)
1038 		return 0;
1039 
1040 #if 0
1041 	if (!access_ok(VERIFY_WRITE,
1042 		       u64_to_user_ptr(args->data_ptr),
1043 		       args->size))
1044 		return -EFAULT;
1045 #endif
1046 
1047 	obj = i915_gem_object_lookup(file, args->handle);
1048 	if (!obj)
1049 		return -ENOENT;
1050 
1051 	/* Bounds check source.  */
1052 	if (args->offset > obj->base.size ||
1053 	    args->size > obj->base.size - args->offset) {
1054 		ret = -EINVAL;
1055 		goto err;
1056 	}
1057 
1058 	trace_i915_gem_object_pread(obj, args->offset, args->size);
1059 
1060 	ret = __unsafe_wait_rendering(obj, to_rps_client(file), true);
1061 	if (ret)
1062 		goto err;
1063 
1064 	ret = i915_mutex_lock_interruptible(dev);
1065 	if (ret)
1066 		goto err;
1067 
1068 	ret = i915_gem_shmem_pread(dev, obj, args, file);
1069 
1070 	/* pread for non shmem backed objects */
1071 	if (ret == -EFAULT || ret == -ENODEV) {
1072 		intel_runtime_pm_get(to_i915(dev));
1073 		ret = i915_gem_gtt_pread(dev, obj, args->size,
1074 					args->offset, args->data_ptr);
1075 		intel_runtime_pm_put(to_i915(dev));
1076 	}
1077 
1078 	i915_gem_object_put(obj);
1079 	mutex_unlock(&dev->struct_mutex);
1080 
1081 	return ret;
1082 
1083 err:
1084 	i915_gem_object_put_unlocked(obj);
1085 	return ret;
1086 }
1087 
1088 /* This is the fast write path which cannot handle
1089  * page faults in the source data
1090  */
1091 
1092 static inline int
1093 fast_user_write(struct io_mapping *mapping,
1094 		loff_t page_base, int page_offset,
1095 		char __user *user_data,
1096 		int length)
1097 {
1098 	void __iomem *vaddr_atomic;
1099 	void *vaddr;
1100 	unsigned long unwritten;
1101 
1102 	vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
1103 	/* We can use the cpu mem copy function because this is X86. */
1104 	vaddr = (void __force*)vaddr_atomic + page_offset;
1105 	unwritten = __copy_from_user_inatomic_nocache(vaddr,
1106 						      user_data, length);
1107 	io_mapping_unmap_atomic(vaddr_atomic);
1108 	return unwritten;
1109 }
1110 
1111 /**
1112  * This is the fast pwrite path, where we copy the data directly from the
1113  * user into the GTT, uncached.
1114  * @i915: i915 device private data
1115  * @obj: i915 gem object
1116  * @args: pwrite arguments structure
1117  * @file: drm file pointer
1118  */
1119 static int
1120 i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
1121 			 struct drm_i915_gem_object *obj,
1122 			 struct drm_i915_gem_pwrite *args,
1123 			 struct drm_file *file)
1124 {
1125 	struct i915_ggtt *ggtt = &i915->ggtt;
1126 	struct drm_device *dev = obj->base.dev;
1127 	struct i915_vma *vma;
1128 	struct drm_mm_node node;
1129 	uint64_t remain, offset;
1130 	char __user *user_data;
1131 	int ret;
1132 	bool hit_slow_path = false;
1133 
1134 	if (i915_gem_object_is_tiled(obj))
1135 		return -EFAULT;
1136 
1137 	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
1138 				       PIN_MAPPABLE | PIN_NONBLOCK);
1139 	if (!IS_ERR(vma)) {
1140 		node.start = i915_ggtt_offset(vma);
1141 		node.allocated = false;
1142 		ret = i915_vma_put_fence(vma);
1143 		if (ret) {
1144 			i915_vma_unpin(vma);
1145 			vma = ERR_PTR(ret);
1146 		}
1147 	}
1148 	if (IS_ERR(vma)) {
1149 		ret = insert_mappable_node(i915, &node, PAGE_SIZE);
1150 		if (ret)
1151 			goto out;
1152 
1153 		ret = i915_gem_object_get_pages(obj);
1154 		if (ret) {
1155 			remove_mappable_node(&node);
1156 			goto out;
1157 		}
1158 
1159 		i915_gem_object_pin_pages(obj);
1160 	}
1161 
1162 	ret = i915_gem_object_set_to_gtt_domain(obj, true);
1163 	if (ret)
1164 		goto out_unpin;
1165 
1166 	intel_fb_obj_invalidate(obj, ORIGIN_CPU);
1167 	obj->dirty = true;
1168 
1169 	user_data = u64_to_user_ptr(args->data_ptr);
1170 	offset = args->offset;
1171 	remain = args->size;
1172 	while (remain) {
1173 		/* Operation in this page
1174 		 *
1175 		 * page_base = page offset within aperture
1176 		 * page_offset = offset within page
1177 		 * page_length = bytes to copy for this page
1178 		 */
1179 		u32 page_base = node.start;
1180 		unsigned page_offset = offset_in_page(offset);
1181 		unsigned page_length = PAGE_SIZE - page_offset;
1182 		page_length = remain < page_length ? remain : page_length;
1183 		if (node.allocated) {
1184 			wmb(); /* flush the write before we modify the GGTT */
1185 			ggtt->base.insert_page(&ggtt->base,
1186 					       i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
1187 					       node.start, I915_CACHE_NONE, 0);
1188 			wmb(); /* flush modifications to the GGTT (insert_page) */
1189 		} else {
1190 			page_base += offset & LINUX_PAGE_MASK;
1191 		}
1192 		/* If we get a fault while copying data, then (presumably) our
1193 		 * source page isn't available.  Return the error and we'll
1194 		 * retry in the slow path.
1195 		 * If the object is non-shmem backed, we retry again with the
1196 		 * path that handles page fault.
1197 		 */
1198 		if (fast_user_write(&ggtt->mappable, page_base,
1199 				    page_offset, user_data, page_length)) {
1200 			hit_slow_path = true;
1201 			mutex_unlock(&dev->struct_mutex);
1202 			if (slow_user_access(&ggtt->mappable,
1203 					     page_base,
1204 					     page_offset, user_data,
1205 					     page_length, true)) {
1206 				ret = -EFAULT;
1207 				mutex_lock(&dev->struct_mutex);
1208 				goto out_flush;
1209 			}
1210 
1211 			mutex_lock(&dev->struct_mutex);
1212 		}
1213 
1214 		remain -= page_length;
1215 		user_data += page_length;
1216 		offset += page_length;
1217 	}
1218 
1219 out_flush:
1220 	if (hit_slow_path) {
1221 		if (ret == 0 &&
1222 		    (obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) {
1223 			/* The user has modified the object whilst we tried
1224 			 * reading from it, and we now have no idea what domain
1225 			 * the pages should be in. As we have just been touching
1226 			 * them directly, flush everything back to the GTT
1227 			 * domain.
1228 			 */
1229 			ret = i915_gem_object_set_to_gtt_domain(obj, false);
1230 		}
1231 	}
1232 
1233 	intel_fb_obj_flush(obj, false, ORIGIN_CPU);
1234 out_unpin:
1235 	if (node.allocated) {
1236 		wmb();
1237 		ggtt->base.clear_range(&ggtt->base,
1238 				       node.start, node.size);
1239 		i915_gem_object_unpin_pages(obj);
1240 		remove_mappable_node(&node);
1241 	} else {
1242 		i915_vma_unpin(vma);
1243 	}
1244 out:
1245 	return ret;
1246 }
1247 
1248 /* Per-page copy function for the shmem pwrite fastpath.
1249  * Flushes invalid cachelines before writing to the target if
1250  * needs_clflush_before is set and flushes out any written cachelines after
1251  * writing if needs_clflush is set. */
1252 static int
1253 shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length,
1254 		  char __user *user_data,
1255 		  bool page_do_bit17_swizzling,
1256 		  bool needs_clflush_before,
1257 		  bool needs_clflush_after)
1258 {
1259 	char *vaddr;
1260 	int ret;
1261 
1262 	if (unlikely(page_do_bit17_swizzling))
1263 		return -EINVAL;
1264 
1265 	vaddr = kmap_atomic(page);
1266 	if (needs_clflush_before)
1267 		drm_clflush_virt_range(vaddr + shmem_page_offset,
1268 				       page_length);
1269 	ret = __copy_from_user_inatomic(vaddr + shmem_page_offset,
1270 					user_data, page_length);
1271 	if (needs_clflush_after)
1272 		drm_clflush_virt_range(vaddr + shmem_page_offset,
1273 				       page_length);
1274 	kunmap_atomic(vaddr);
1275 
1276 	return ret ? -EFAULT : 0;
1277 }
1278 
1279 /* Only difference to the fast-path function is that this can handle bit17
1280  * and uses non-atomic copy and kmap functions. */
1281 static int
1282 shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length,
1283 		  char __user *user_data,
1284 		  bool page_do_bit17_swizzling,
1285 		  bool needs_clflush_before,
1286 		  bool needs_clflush_after)
1287 {
1288 	char *vaddr;
1289 	int ret;
1290 
1291 	vaddr = kmap(page);
1292 	if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
1293 		shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
1294 					     page_length,
1295 					     page_do_bit17_swizzling);
1296 	if (page_do_bit17_swizzling)
1297 		ret = __copy_from_user_swizzled(vaddr, shmem_page_offset,
1298 						user_data,
1299 						page_length);
1300 	else
1301 		ret = __copy_from_user(vaddr + shmem_page_offset,
1302 				       user_data,
1303 				       page_length);
1304 	if (needs_clflush_after)
1305 		shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
1306 					     page_length,
1307 					     page_do_bit17_swizzling);
1308 	kunmap(page);
1309 
1310 	return ret ? -EFAULT : 0;
1311 }
1312 
1313 static int
1314 i915_gem_shmem_pwrite(struct drm_device *dev,
1315 		      struct drm_i915_gem_object *obj,
1316 		      struct drm_i915_gem_pwrite *args,
1317 		      struct drm_file *file)
1318 {
1319 	ssize_t remain;
1320 	loff_t offset;
1321 	char __user *user_data;
1322 	int shmem_page_offset, page_length, ret = 0;
1323 	int obj_do_bit17_swizzling, page_do_bit17_swizzling;
1324 	int hit_slowpath = 0;
1325 	unsigned int needs_clflush;
1326 	struct sg_page_iter sg_iter;
1327 
1328 	ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush);
1329 	if (ret)
1330 		return ret;
1331 
1332 	obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
1333 	user_data = u64_to_user_ptr(args->data_ptr);
1334 	offset = args->offset;
1335 	remain = args->size;
1336 
1337 	VM_OBJECT_LOCK(obj->base.filp);
1338 	vm_object_pip_add(obj->base.filp, 1);
1339 
1340 	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
1341 			 offset >> PAGE_SHIFT) {
1342 		struct page *page = sg_page_iter_page(&sg_iter);
1343 		int partial_cacheline_write;
1344 
1345 		if (remain <= 0)
1346 			break;
1347 
1348 		/* Operation in this page
1349 		 *
1350 		 * shmem_page_offset = offset within page in shmem file
1351 		 * page_length = bytes to copy for this page
1352 		 */
1353 		shmem_page_offset = offset_in_page(offset);
1354 
1355 		page_length = remain;
1356 		if ((shmem_page_offset + page_length) > PAGE_SIZE)
1357 			page_length = PAGE_SIZE - shmem_page_offset;
1358 
1359 		/* If we don't overwrite a cacheline completely we need to be
1360 		 * careful to have up-to-date data by first clflushing. Don't
1361 		 * overcomplicate things and flush the entire patch. */
1362 		partial_cacheline_write = needs_clflush & CLFLUSH_BEFORE &&
1363 			((shmem_page_offset | page_length)
1364 				& (boot_cpu_data.x86_clflush_size - 1));
1365 
1366 		page_do_bit17_swizzling = obj_do_bit17_swizzling &&
1367 			(page_to_phys(page) & (1 << 17)) != 0;
1368 
1369 		ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
1370 					user_data, page_do_bit17_swizzling,
1371 					partial_cacheline_write,
1372 					needs_clflush & CLFLUSH_AFTER);
1373 		if (ret == 0)
1374 			goto next_page;
1375 
1376 		hit_slowpath = 1;
1377 		mutex_unlock(&dev->struct_mutex);
1378 		ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
1379 					user_data, page_do_bit17_swizzling,
1380 					partial_cacheline_write,
1381 					needs_clflush & CLFLUSH_AFTER);
1382 
1383 		mutex_lock(&dev->struct_mutex);
1384 
1385 		if (ret)
1386 			goto out;
1387 
1388 next_page:
1389 		remain -= page_length;
1390 		user_data += page_length;
1391 		offset += page_length;
1392 	}
1393 
1394 out:
1395 	vm_object_pip_wakeup(obj->base.filp);
1396 	VM_OBJECT_UNLOCK(obj->base.filp);
1397 	i915_gem_obj_finish_shmem_access(obj);
1398 
1399 	if (hit_slowpath) {
1400 		/*
1401 		 * Fixup: Flush cpu caches in case we didn't flush the dirty
1402 		 * cachelines in-line while writing and the object moved
1403 		 * out of the cpu write domain while we've dropped the lock.
1404 		 */
1405 		if (!(needs_clflush & CLFLUSH_AFTER) &&
1406 		    obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
1407 			if (i915_gem_clflush_object(obj, obj->pin_display))
1408 				needs_clflush |= CLFLUSH_AFTER;
1409 		}
1410 	}
1411 
1412 	if (needs_clflush & CLFLUSH_AFTER)
1413 		i915_gem_chipset_flush(to_i915(dev));
1414 
1415 	intel_fb_obj_flush(obj, false, ORIGIN_CPU);
1416 	return ret;
1417 }
1418 
1419 /**
1420  * Writes data to the object referenced by handle.
1421  * @dev: drm device
1422  * @data: ioctl data blob
1423  * @file: drm file
1424  *
1425  * On error, the contents of the buffer that were to be modified are undefined.
1426  */
1427 int
1428 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
1429 		      struct drm_file *file)
1430 {
1431 	struct drm_i915_private *dev_priv = to_i915(dev);
1432 	struct drm_i915_gem_pwrite *args = data;
1433 	struct drm_i915_gem_object *obj;
1434 	int ret;
1435 
1436 	if (args->size == 0)
1437 		return 0;
1438 
1439 #if 0
1440 	if (!access_ok(VERIFY_READ,
1441 		       u64_to_user_ptr(args->data_ptr),
1442 		       args->size))
1443 		return -EFAULT;
1444 #endif
1445 
1446 	if (likely(!i915.prefault_disable)) {
1447 		ret = fault_in_pages_readable(u64_to_user_ptr(args->data_ptr),
1448 						   args->size);
1449 		if (ret)
1450 			return -EFAULT;
1451 	}
1452 
1453 	obj = i915_gem_object_lookup(file, args->handle);
1454 	if (!obj)
1455 		return -ENOENT;
1456 
1457 	/* Bounds check destination. */
1458 	if (args->offset > obj->base.size ||
1459 	    args->size > obj->base.size - args->offset) {
1460 		ret = -EINVAL;
1461 		goto err;
1462 	}
1463 
1464 	trace_i915_gem_object_pwrite(obj, args->offset, args->size);
1465 
1466 	ret = __unsafe_wait_rendering(obj, to_rps_client(file), false);
1467 	if (ret)
1468 		goto err;
1469 
1470 	intel_runtime_pm_get(dev_priv);
1471 
1472 	ret = i915_mutex_lock_interruptible(dev);
1473 	if (ret)
1474 		goto err_rpm;
1475 
1476 	ret = -EFAULT;
1477 	/* We can only do the GTT pwrite on untiled buffers, as otherwise
1478 	 * it would end up going through the fenced access, and we'll get
1479 	 * different detiling behavior between reading and writing.
1480 	 * pread/pwrite currently are reading and writing from the CPU
1481 	 * perspective, requiring manual detiling by the client.
1482 	 */
1483 	if (!i915_gem_object_has_struct_page(obj) ||
1484 	    cpu_write_needs_clflush(obj)) {
1485 		ret = i915_gem_gtt_pwrite_fast(dev_priv, obj, args, file);
1486 		/* Note that the gtt paths might fail with non-page-backed user
1487 		 * pointers (e.g. gtt mappings when moving data between
1488 		 * textures). Fallback to the shmem path in that case. */
1489 	}
1490 
1491 	if (ret == -EFAULT || ret == -ENOSPC) {
1492 		if (obj->phys_handle)
1493 			ret = i915_gem_phys_pwrite(obj, args, file);
1494 		else
1495 			ret = i915_gem_shmem_pwrite(dev, obj, args, file);
1496 	}
1497 
1498 	i915_gem_object_put(obj);
1499 	mutex_unlock(&dev->struct_mutex);
1500 	intel_runtime_pm_put(dev_priv);
1501 
1502 	return ret;
1503 
1504 err_rpm:
1505 	intel_runtime_pm_put(dev_priv);
1506 err:
1507 	i915_gem_object_put_unlocked(obj);
1508 	return ret;
1509 }
1510 
1511 static inline enum fb_op_origin
1512 write_origin(struct drm_i915_gem_object *obj, unsigned domain)
1513 {
1514 	return (domain == I915_GEM_DOMAIN_GTT ?
1515 		obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
1516 }
1517 
1518 /**
1519  * Called when user space prepares to use an object with the CPU, either
1520  * through the mmap ioctl's mapping or a GTT mapping.
1521  * @dev: drm device
1522  * @data: ioctl data blob
1523  * @file: drm file
1524  */
1525 int
1526 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
1527 			  struct drm_file *file)
1528 {
1529 	struct drm_i915_gem_set_domain *args = data;
1530 	struct drm_i915_gem_object *obj;
1531 	uint32_t read_domains = args->read_domains;
1532 	uint32_t write_domain = args->write_domain;
1533 	int ret;
1534 
1535 	/* Only handle setting domains to types used by the CPU. */
1536 	if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
1537 		return -EINVAL;
1538 
1539 	/* Having something in the write domain implies it's in the read
1540 	 * domain, and only that read domain.  Enforce that in the request.
1541 	 */
1542 	if (write_domain != 0 && read_domains != write_domain)
1543 		return -EINVAL;
1544 
1545 	obj = i915_gem_object_lookup(file, args->handle);
1546 	if (!obj)
1547 		return -ENOENT;
1548 
1549 	/* Try to flush the object off the GPU without holding the lock.
1550 	 * We will repeat the flush holding the lock in the normal manner
1551 	 * to catch cases where we are gazumped.
1552 	 */
1553 	ret = __unsafe_wait_rendering(obj, to_rps_client(file), !write_domain);
1554 	if (ret)
1555 		goto err;
1556 
1557 	ret = i915_mutex_lock_interruptible(dev);
1558 	if (ret)
1559 		goto err;
1560 
1561 	if (read_domains & I915_GEM_DOMAIN_GTT)
1562 		ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
1563 	else
1564 		ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
1565 
1566 	if (write_domain != 0)
1567 		intel_fb_obj_invalidate(obj, write_origin(obj, write_domain));
1568 
1569 	i915_gem_object_put(obj);
1570 	mutex_unlock(&dev->struct_mutex);
1571 	return ret;
1572 
1573 err:
1574 	i915_gem_object_put_unlocked(obj);
1575 	return ret;
1576 }
1577 
1578 /**
1579  * Called when user space has done writes to this buffer
1580  * @dev: drm device
1581  * @data: ioctl data blob
1582  * @file: drm file
1583  */
1584 int
1585 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
1586 			 struct drm_file *file)
1587 {
1588 	struct drm_i915_gem_sw_finish *args = data;
1589 	struct drm_i915_gem_object *obj;
1590 	int err = 0;
1591 
1592 	obj = i915_gem_object_lookup(file, args->handle);
1593 	if (!obj)
1594 		return -ENOENT;
1595 
1596 	/* Pinned buffers may be scanout, so flush the cache */
1597 	if (READ_ONCE(obj->pin_display)) {
1598 		err = i915_mutex_lock_interruptible(dev);
1599 		if (!err) {
1600 			i915_gem_object_flush_cpu_write_domain(obj);
1601 			mutex_unlock(&dev->struct_mutex);
1602 		}
1603 	}
1604 
1605 	i915_gem_object_put_unlocked(obj);
1606 	return err;
1607 }
1608 
1609 /**
1610  * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address
1611  *			 it is mapped to.
1612  * @dev: drm device
1613  * @data: ioctl data blob
1614  * @file: drm file
1615  *
1616  * While the mapping holds a reference on the contents of the object, it doesn't
1617  * imply a ref on the object itself.
1618  *
1619  * IMPORTANT:
1620  *
1621  * DRM driver writers who look a this function as an example for how to do GEM
1622  * mmap support, please don't implement mmap support like here. The modern way
1623  * to implement DRM mmap support is with an mmap offset ioctl (like
1624  * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly.
1625  * That way debug tooling like valgrind will understand what's going on, hiding
1626  * the mmap call in a driver private ioctl will break that. The i915 driver only
1627  * does cpu mmaps this way because we didn't know better.
1628  */
1629 int
1630 i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1631 		    struct drm_file *file)
1632 {
1633 	struct drm_i915_gem_mmap *args = data;
1634 	struct drm_i915_gem_object *obj;
1635 	unsigned long addr;
1636 
1637 	struct proc *p = curproc;
1638 	vm_map_t map = &p->p_vmspace->vm_map;
1639 	vm_size_t size;
1640 	int error = 0, rv;
1641 
1642 	if (args->flags & ~(I915_MMAP_WC))
1643 		return -EINVAL;
1644 
1645 #if 0
1646 	if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT))
1647 		return -ENODEV;
1648 #endif
1649 
1650 	obj = i915_gem_object_lookup(file, args->handle);
1651 	if (!obj)
1652 		return -ENOENT;
1653 
1654 	/* prime objects have no backing filp to GEM mmap
1655 	 * pages from.
1656 	 */
1657 	if (!obj->base.filp) {
1658 		i915_gem_object_put_unlocked(obj);
1659 		return -EINVAL;
1660 	}
1661 
1662 	if (args->size == 0)
1663 		goto out;
1664 
1665 	size = round_page(args->size);
1666 	if (map->size + size > p->p_rlimit[RLIMIT_VMEM].rlim_cur) {
1667 		error = -ENOMEM;
1668 		goto out;
1669 	}
1670 
1671 	/*
1672 	 * Call hint to ensure that NULL is not returned as a valid address
1673 	 * and to reduce vm_map traversals. XXX causes instability, use a
1674 	 * fixed low address as the start point instead to avoid the NULL
1675 	 * return issue.
1676 	 */
1677 	addr = PAGE_SIZE;
1678 
1679 	/*
1680 	 * Use 256KB alignment.  It is unclear why this matters for a
1681 	 * virtual address but it appears to fix a number of application/X
1682 	 * crashes and kms console switching is much faster.
1683 	 */
1684 	vm_object_hold(obj->base.filp);
1685 	vm_object_reference_locked(obj->base.filp);
1686 	vm_object_drop(obj->base.filp);
1687 
1688 	/* Something gets wrong here: fails to mmap 4096 */
1689 	rv = vm_map_find(map, obj->base.filp, NULL,
1690 			 args->offset, &addr, args->size,
1691 			 256 * 1024, /* align */
1692 			 TRUE, /* fitit */
1693 			 VM_MAPTYPE_NORMAL, VM_SUBSYS_DRM_GEM,
1694 			 VM_PROT_READ | VM_PROT_WRITE, /* prot */
1695 			 VM_PROT_READ | VM_PROT_WRITE, /* max */
1696 			 MAP_SHARED /* cow */);
1697 	if (rv != KERN_SUCCESS) {
1698 		vm_object_deallocate(obj->base.filp);
1699 		error = -vm_mmap_to_errno(rv);
1700 	} else {
1701 		args->addr_ptr = (uint64_t)addr;
1702 	}
1703 
1704 	if (args->flags & I915_MMAP_WC) {	/* I915_PARAM_MMAP_VERSION */
1705 #if 0
1706 		struct mm_struct *mm = current->mm;
1707 		struct vm_area_struct *vma;
1708 
1709 		if (down_write_killable(&mm->mmap_sem)) {
1710 			i915_gem_object_put_unlocked(obj);
1711 			return -EINTR;
1712 		}
1713 		vma = find_vma(mm, addr);
1714 		if (vma)
1715 			vma->vm_page_prot =
1716 				pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
1717 		else
1718 			addr = -ENOMEM;
1719 		up_write(&mm->mmap_sem);
1720 #endif
1721 
1722 		/* This may race, but that's ok, it only gets set */
1723 		WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU);
1724 	}
1725 
1726 out:
1727 	i915_gem_object_put_unlocked(obj);
1728 	if (error != 0)
1729 		return error;
1730 
1731 	args->addr_ptr = (uint64_t) addr;
1732 
1733 	return 0;
1734 }
1735 
1736 #if 0
1737 static unsigned int tile_row_pages(struct drm_i915_gem_object *obj)
1738 {
1739 	u64 size;
1740 
1741 	size = i915_gem_object_get_stride(obj);
1742 	size *= i915_gem_object_get_tiling(obj) == I915_TILING_Y ? 32 : 8;
1743 
1744 	return size >> PAGE_SHIFT;
1745 }
1746 #endif
1747 
1748 /**
1749  * i915_gem_mmap_gtt_version - report the current feature set for GTT mmaps
1750  *
1751  * A history of the GTT mmap interface:
1752  *
1753  * 0 - Everything had to fit into the GTT. Both parties of a memcpy had to
1754  *     aligned and suitable for fencing, and still fit into the available
1755  *     mappable space left by the pinned display objects. A classic problem
1756  *     we called the page-fault-of-doom where we would ping-pong between
1757  *     two objects that could not fit inside the GTT and so the memcpy
1758  *     would page one object in at the expense of the other between every
1759  *     single byte.
1760  *
1761  * 1 - Objects can be any size, and have any compatible fencing (X Y, or none
1762  *     as set via i915_gem_set_tiling() [DRM_I915_GEM_SET_TILING]). If the
1763  *     object is too large for the available space (or simply too large
1764  *     for the mappable aperture!), a view is created instead and faulted
1765  *     into userspace. (This view is aligned and sized appropriately for
1766  *     fenced access.)
1767  *
1768  * Restrictions:
1769  *
1770  *  * snoopable objects cannot be accessed via the GTT. It can cause machine
1771  *    hangs on some architectures, corruption on others. An attempt to service
1772  *    a GTT page fault from a snoopable object will generate a SIGBUS.
1773  *
1774  *  * the object must be able to fit into RAM (physical memory, though no
1775  *    limited to the mappable aperture).
1776  *
1777  *
1778  * Caveats:
1779  *
1780  *  * a new GTT page fault will synchronize rendering from the GPU and flush
1781  *    all data to system memory. Subsequent access will not be synchronized.
1782  *
1783  *  * all mappings are revoked on runtime device suspend.
1784  *
1785  *  * there are only 8, 16 or 32 fence registers to share between all users
1786  *    (older machines require fence register for display and blitter access
1787  *    as well). Contention of the fence registers will cause the previous users
1788  *    to be unmapped and any new access will generate new page faults.
1789  *
1790  *  * running out of memory while servicing a fault may generate a SIGBUS,
1791  *    rather than the expected SIGSEGV.
1792  */
1793 int i915_gem_mmap_gtt_version(void)
1794 {
1795 	return 1;
1796 }
1797 
1798 /**
1799  * i915_gem_fault - fault a page into the GTT
1800  *
1801  * vm_obj is locked on entry and expected to be locked on return.
1802  *
1803  * The vm_pager has placemarked the object with an anonymous memory page
1804  * which we must replace atomically to avoid races against concurrent faults
1805  * on the same page.  XXX we currently are unable to do this atomically.
1806  *
1807  * If we are to return an error we should not touch the anonymous page,
1808  * the caller will deallocate it.
1809  *
1810  * XXX Most GEM calls appear to be interruptable, but we can't hard loop
1811  * in that case.  Release all resources and wait 1 tick before retrying.
1812  * This is a huge problem which needs to be fixed by getting rid of most
1813  * of the interruptability.  The linux code does not retry but does appear
1814  * to have some sort of mechanism (VM_FAULT_NOPAGE ?) for the higher level
1815  * to be able to retry.
1816  *
1817  * --
1818  * @vma: VMA in question
1819  * @vmf: fault info
1820  *
1821  * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1822  * from userspace.  The fault handler takes care of binding the object to
1823  * the GTT (if needed), allocating and programming a fence register (again,
1824  * only if needed based on whether the old reg is still valid or the object
1825  * is tiled) and inserting a new PTE into the faulting process.
1826  *
1827  * Note that the faulting process may involve evicting existing objects
1828  * from the GTT and/or fence registers to make room.  So performance may
1829  * suffer if the GTT working set is large or there are few fence registers
1830  * left.
1831  *
1832  * The current feature set supported by i915_gem_fault() and thus GTT mmaps
1833  * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version).
1834  * vm_obj is locked on entry and expected to be locked on return.  The VM
1835  * pager has placed an anonymous memory page at (obj,offset) which we have
1836  * to replace.
1837  */
1838 int i915_gem_fault(vm_object_t vm_obj, vm_ooffset_t offset, int prot, vm_page_t *mres)
1839 {
1840 #define MIN_CHUNK_PAGES ((1 << 20) >> PAGE_SHIFT) /* 1 MiB */
1841 	struct drm_i915_gem_object *obj = to_intel_bo(vm_obj->handle);
1842 	struct drm_device *dev = obj->base.dev;
1843 	struct drm_i915_private *dev_priv = to_i915(dev);
1844 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
1845 	bool write = !!(prot & VM_PROT_WRITE);
1846 	struct i915_vma *vma;
1847 	unsigned long page_offset;
1848 	vm_page_t m;
1849 	unsigned int flags;
1850 	int ret;
1851 
1852 	/* We don't use vmf->pgoff since that has the fake offset */
1853 	page_offset = (unsigned long)offset;
1854 
1855 	/*
1856 	 * vm_fault() has supplied us with a busied page placeholding
1857 	 * the operation.  This presents a lock order reversal issue
1858 	 * again i915_gem_release_mmap() for our device mutex.
1859 	 *
1860 	 * Deal with the problem by getting rid of the placeholder now,
1861 	 * and then dealing with the potential for a new placeholder when
1862 	 * we try to insert later.
1863 	 */
1864 	if (*mres != NULL) {
1865 		m = *mres;
1866 		*mres = NULL;
1867 		if ((m->busy_count & PBUSY_LOCKED) == 0)
1868 			kprintf("i915_gem_fault: Page was not busy\n");
1869 		else
1870 			vm_page_remove(m);
1871 		vm_page_free(m);
1872 	}
1873 
1874 	m = NULL;
1875 
1876 retry:
1877 	trace_i915_gem_object_fault(obj, page_offset, true, write);
1878 
1879 	/* Try to flush the object off the GPU first without holding the lock.
1880 	 * Upon acquiring the lock, we will perform our sanity checks and then
1881 	 * repeat the flush holding the lock in the normal manner to catch cases
1882 	 * where we are gazumped.
1883 	 */
1884 	ret = __unsafe_wait_rendering(obj, NULL, !write);
1885 	if (ret)
1886 		goto err;
1887 
1888 	intel_runtime_pm_get(dev_priv);
1889 
1890 	ret = i915_mutex_lock_interruptible(dev);
1891 	if (ret)
1892 		goto err_rpm;
1893 
1894 	/* Access to snoopable pages through the GTT is incoherent. */
1895 	if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) {
1896 		ret = -EFAULT;
1897 		goto err_unlock;
1898 	}
1899 
1900 	/* If the object is smaller than a couple of partial vma, it is
1901 	 * not worth only creating a single partial vma - we may as well
1902 	 * clear enough space for the full object.
1903 	 */
1904 	flags = PIN_MAPPABLE;
1905 	if (obj->base.size > 2 * MIN_CHUNK_PAGES << PAGE_SHIFT)
1906 		flags |= PIN_NONBLOCK | PIN_NONFAULT;
1907 
1908 	/* Now pin it into the GTT as needed */
1909 	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, flags);
1910 #if 0
1911 	if (IS_ERR(vma)) {
1912 		struct i915_ggtt_view view;
1913 		unsigned int chunk_size;
1914 
1915 		/* Use a partial view if it is bigger than available space */
1916 		chunk_size = MIN_CHUNK_PAGES;
1917 		if (i915_gem_object_is_tiled(obj))
1918 			chunk_size = max(chunk_size, tile_row_pages(obj));
1919 
1920 		memset(&view, 0, sizeof(view));
1921 		view.type = I915_GGTT_VIEW_PARTIAL;
1922 		view.params.partial.offset = rounddown(page_offset, chunk_size);
1923 		view.params.partial.size =
1924 			min_t(unsigned int, chunk_size,
1925 			      vma_pages(area) - view.params.partial.offset);
1926 
1927 		/* If the partial covers the entire object, just create a
1928 		 * normal VMA.
1929 		 */
1930 		if (chunk_size >= obj->base.size >> PAGE_SHIFT)
1931 			view.type = I915_GGTT_VIEW_NORMAL;
1932 
1933 		/* Userspace is now writing through an untracked VMA, abandon
1934 		 * all hope that the hardware is able to track future writes.
1935 		 */
1936 		obj->frontbuffer_ggtt_origin = ORIGIN_CPU;
1937 
1938 		vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE);
1939 	}
1940 #endif
1941 	if (IS_ERR(vma)) {
1942 		ret = PTR_ERR(vma);
1943 		goto err_unlock;
1944 	}
1945 
1946 	ret = i915_gem_object_set_to_gtt_domain(obj, write);
1947 	if (ret)
1948 		goto err_unpin;
1949 
1950 	ret = i915_vma_get_fence(vma);
1951 	if (ret)
1952 		goto err_unpin;
1953 
1954 	/*
1955 	 * START FREEBSD MAGIC
1956 	 *
1957 	 * Add a pip count to avoid destruction and certain other
1958 	 * complex operations (such as collapses?) while unlocked.
1959 	 */
1960 	vm_object_pip_add(vm_obj, 1);
1961 
1962 	ret = 0;
1963 	m = NULL;
1964 
1965 	/*
1966 	 * Since the object lock was dropped, another thread might have
1967 	 * faulted on the same GTT address and instantiated the mapping.
1968 	 * Recheck.
1969 	 */
1970 	m = vm_page_lookup(vm_obj, OFF_TO_IDX(offset));
1971 	if (m != NULL) {
1972 		/*
1973 		 * Try to busy the page, retry on failure (non-zero ret).
1974 		 */
1975 		if (vm_page_busy_try(m, false)) {
1976 			kprintf("i915_gem_fault: BUSY\n");
1977 			ret = -EINTR;
1978 			goto err_unlock;
1979 		}
1980 		goto have_page;
1981 	}
1982 	/* END FREEBSD MAGIC */
1983 
1984 	obj->fault_mappable = true;
1985 
1986 	/* Finally, remap it using the new GTT offset */
1987 	m = vm_phys_fictitious_to_vm_page(ggtt->mappable_base +
1988 			i915_ggtt_offset(vma) + offset);
1989 	if (m == NULL) {
1990 		ret = -EFAULT;
1991 		goto err_unpin;
1992 	}
1993 	KASSERT((m->flags & PG_FICTITIOUS) != 0, ("not fictitious %p", m));
1994 	KASSERT(m->wire_count == 1, ("wire_count not 1 %p", m));
1995 
1996 	/*
1997 	 * Try to busy the page.  Fails on non-zero return.
1998 	 */
1999 	if (vm_page_busy_try(m, false)) {
2000 		kprintf("i915_gem_fault: BUSY(2)\n");
2001 		ret = -EINTR;
2002 		goto err_unpin;
2003 	}
2004 	m->valid = VM_PAGE_BITS_ALL;
2005 
2006 #if 1
2007 	/*
2008 	 * This should always work since we already checked via a lookup
2009 	 * above.
2010 	 */
2011 	if (vm_page_insert(m, vm_obj, OFF_TO_IDX(offset)) == FALSE) {
2012 		kprintf("i915:gem_fault: page %p,%jd already in object\n",
2013 			vm_obj,
2014 			OFF_TO_IDX(offset));
2015 		vm_page_wakeup(m);
2016 		ret = -EINTR;
2017 		goto err_unpin;
2018 	}
2019 #else
2020 	/* NOT COMPILED ATM */
2021 	if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
2022 		if (!obj->fault_mappable) {
2023 			unsigned long size =
2024 				min_t(unsigned long,
2025 				      area->vm_end - area->vm_start,
2026 				      obj->base.size) >> PAGE_SHIFT;
2027 			unsigned long base = area->vm_start;
2028 			int i;
2029 
2030 			for (i = 0; i < size; i++) {
2031 				ret = vm_insert_pfn(area,
2032 						    base + i * PAGE_SIZE,
2033 						    pfn + i);
2034 				if (ret)
2035 					break;
2036 			}
2037 
2038 		} else
2039 			ret = vm_insert_pfn(area,
2040 					    (unsigned long)vmf->virtual_address,
2041 					    pfn + page_offset);
2042 	} else {
2043 		/* Overriding existing pages in partial view does not cause
2044 		 * us any trouble as TLBs are still valid because the fault
2045 		 * is due to userspace losing part of the mapping or never
2046 		 * having accessed it before (at this partials' range).
2047 		 */
2048 		const struct i915_ggtt_view *view = &vma->ggtt_view;
2049 		unsigned long base = area->vm_start +
2050 			(view->params.partial.offset << PAGE_SHIFT);
2051 		unsigned int i;
2052 
2053 		for (i = 0; i < view->params.partial.size; i++) {
2054 			ret = vm_insert_pfn(area,
2055 					    base + i * PAGE_SIZE,
2056 					    pfn + i);
2057 			if (ret)
2058 				break;
2059 		}
2060 	}
2061 
2062 	obj->fault_mappable = true;
2063 #endif
2064 
2065 have_page:
2066 	*mres = m;
2067 
2068 	__i915_vma_unpin(vma);
2069 	mutex_unlock(&dev->struct_mutex);
2070 	ret = VM_PAGER_OK;
2071 	goto done;
2072 
2073 	/*
2074 	 * ALTERNATIVE ERROR RETURN.
2075 	 *
2076 	 * OBJECT EXPECTED TO BE LOCKED.
2077 	 */
2078 err_unpin:
2079 	__i915_vma_unpin(vma);
2080 err_unlock:
2081 	mutex_unlock(&dev->struct_mutex);
2082 err_rpm:
2083 	intel_runtime_pm_put(dev_priv);
2084 err:
2085 	switch (ret) {
2086 	case -EIO:
2087 		/*
2088 		 * We eat errors when the gpu is terminally wedged to avoid
2089 		 * userspace unduly crashing (gl has no provisions for mmaps to
2090 		 * fail). But any other -EIO isn't ours (e.g. swap in failure)
2091 		 * and so needs to be reported.
2092 		 */
2093 		if (!i915_terminally_wedged(&dev_priv->gpu_error)) {
2094 //			ret = VM_FAULT_SIGBUS;
2095 			break;
2096 		}
2097 	case -EAGAIN:
2098 		/*
2099 		 * EAGAIN means the gpu is hung and we'll wait for the error
2100 		 * handler to reset everything when re-faulting in
2101 		 * i915_mutex_lock_interruptible.
2102 		 */
2103 	case -ERESTARTSYS:
2104 	case -EINTR:
2105 		VM_OBJECT_UNLOCK(vm_obj);
2106 		int dummy;
2107 		tsleep(&dummy, 0, "delay", 1); /* XXX */
2108 		VM_OBJECT_LOCK(vm_obj);
2109 		goto retry;
2110 	default:
2111 		WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret);
2112 		ret = VM_PAGER_ERROR;
2113 		break;
2114 	}
2115 
2116 done:
2117 	vm_object_pip_wakeup(vm_obj);
2118 
2119 	return ret;
2120 }
2121 
2122 /**
2123  * i915_gem_release_mmap - remove physical page mappings
2124  * @obj: obj in question
2125  *
2126  * Preserve the reservation of the mmapping with the DRM core code, but
2127  * relinquish ownership of the pages back to the system.
2128  *
2129  * It is vital that we remove the page mapping if we have mapped a tiled
2130  * object through the GTT and then lose the fence register due to
2131  * resource pressure. Similarly if the object has been moved out of the
2132  * aperture, than pages mapped into userspace must be revoked. Removing the
2133  * mapping will then trigger a page fault on the next user access, allowing
2134  * fixup by i915_gem_fault().
2135  */
2136 void
2137 i915_gem_release_mmap(struct drm_i915_gem_object *obj)
2138 {
2139 	vm_object_t devobj;
2140 	vm_page_t m;
2141 	int i, page_count;
2142 
2143 	/* Serialisation between user GTT access and our code depends upon
2144 	 * revoking the CPU's PTE whilst the mutex is held. The next user
2145 	 * pagefault then has to wait until we release the mutex.
2146 	 */
2147 	lockdep_assert_held(&obj->base.dev->struct_mutex);
2148 
2149 	if (!obj->fault_mappable)
2150 		return;
2151 
2152 	devobj = cdev_pager_lookup(obj);
2153 	if (devobj != NULL) {
2154 		page_count = OFF_TO_IDX(obj->base.size);
2155 
2156 		VM_OBJECT_LOCK(devobj);
2157 		for (i = 0; i < page_count; i++) {
2158 			m = vm_page_lookup_busy_wait(devobj, i, TRUE, "915unm");
2159 			if (m == NULL)
2160 				continue;
2161 			cdev_pager_free_page(devobj, m);
2162 		}
2163 		VM_OBJECT_UNLOCK(devobj);
2164 		vm_object_deallocate(devobj);
2165 	}
2166 
2167 	/* Ensure that the CPU's PTE are revoked and there are not outstanding
2168 	 * memory transactions from userspace before we return. The TLB
2169 	 * flushing implied above by changing the PTE above *should* be
2170 	 * sufficient, an extra barrier here just provides us with a bit
2171 	 * of paranoid documentation about our requirement to serialise
2172 	 * memory writes before touching registers / GSM.
2173 	 */
2174 	wmb();
2175 
2176 	obj->fault_mappable = false;
2177 }
2178 
2179 void
2180 i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv)
2181 {
2182 	struct drm_i915_gem_object *obj;
2183 
2184 	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list)
2185 		i915_gem_release_mmap(obj);
2186 }
2187 
2188 /**
2189  * i915_gem_get_ggtt_size - return required global GTT size for an object
2190  * @dev_priv: i915 device
2191  * @size: object size
2192  * @tiling_mode: tiling mode
2193  *
2194  * Return the required global GTT size for an object, taking into account
2195  * potential fence register mapping.
2196  */
2197 u64 i915_gem_get_ggtt_size(struct drm_i915_private *dev_priv,
2198 			   u64 size, int tiling_mode)
2199 {
2200 	u64 ggtt_size;
2201 
2202 	GEM_BUG_ON(size == 0);
2203 
2204 	if (INTEL_GEN(dev_priv) >= 4 ||
2205 	    tiling_mode == I915_TILING_NONE)
2206 		return size;
2207 
2208 	/* Previous chips need a power-of-two fence region when tiling */
2209 	if (IS_GEN3(dev_priv))
2210 		ggtt_size = 1024*1024;
2211 	else
2212 		ggtt_size = 512*1024;
2213 
2214 	while (ggtt_size < size)
2215 		ggtt_size <<= 1;
2216 
2217 	return ggtt_size;
2218 }
2219 
2220 /**
2221  * i915_gem_get_ggtt_alignment - return required global GTT alignment
2222  * @dev_priv: i915 device
2223  * @size: object size
2224  * @tiling_mode: tiling mode
2225  * @fenced: is fenced alignment required or not
2226  *
2227  * Return the required global GTT alignment for an object, taking into account
2228  * potential fence register mapping.
2229  */
2230 u64 i915_gem_get_ggtt_alignment(struct drm_i915_private *dev_priv, u64 size,
2231 				int tiling_mode, bool fenced)
2232 {
2233 	GEM_BUG_ON(size == 0);
2234 
2235 	/*
2236 	 * Minimum alignment is 4k (GTT page size), but might be greater
2237 	 * if a fence register is needed for the object.
2238 	 */
2239 	if (INTEL_GEN(dev_priv) >= 4 || (!fenced && IS_G33(dev_priv)) ||
2240 	    tiling_mode == I915_TILING_NONE)
2241 		return 4096;
2242 
2243 	/*
2244 	 * Previous chips need to be aligned to the size of the smallest
2245 	 * fence register that can contain the object.
2246 	 */
2247 	return i915_gem_get_ggtt_size(dev_priv, size, tiling_mode);
2248 }
2249 
2250 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
2251 {
2252 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
2253 	int err;
2254 
2255 	err = drm_gem_create_mmap_offset(&obj->base);
2256 	if (!err)
2257 		return 0;
2258 
2259 	/* We can idle the GPU locklessly to flush stale objects, but in order
2260 	 * to claim that space for ourselves, we need to take the big
2261 	 * struct_mutex to free the requests+objects and allocate our slot.
2262 	 */
2263 	err = i915_gem_wait_for_idle(dev_priv, I915_WAIT_INTERRUPTIBLE);
2264 	if (err)
2265 		return err;
2266 
2267 	err = i915_mutex_lock_interruptible(&dev_priv->drm);
2268 	if (!err) {
2269 		i915_gem_retire_requests(dev_priv);
2270 		err = drm_gem_create_mmap_offset(&obj->base);
2271 		mutex_unlock(&dev_priv->drm.struct_mutex);
2272 	}
2273 
2274 	return err;
2275 }
2276 
2277 #if 0
2278 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
2279 {
2280 	drm_gem_free_mmap_offset(&obj->base);
2281 }
2282 #endif
2283 
2284 int
2285 i915_gem_mmap_gtt(struct drm_file *file,
2286 		  struct drm_device *dev,
2287 		  uint32_t handle,
2288 		  uint64_t *offset)
2289 {
2290 	struct drm_i915_gem_object *obj;
2291 	int ret;
2292 
2293 	obj = i915_gem_object_lookup(file, handle);
2294 	if (!obj)
2295 		return -ENOENT;
2296 
2297 	ret = i915_gem_object_create_mmap_offset(obj);
2298 	if (ret == 0)
2299 		*offset = DRM_GEM_MAPPING_OFF(obj->base.map_list.key) |
2300 		    DRM_GEM_MAPPING_KEY;
2301 
2302 	i915_gem_object_put_unlocked(obj);
2303 	return ret;
2304 }
2305 
2306 /**
2307  * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
2308  * @dev: DRM device
2309  * @data: GTT mapping ioctl data
2310  * @file: GEM object info
2311  *
2312  * Simply returns the fake offset to userspace so it can mmap it.
2313  * The mmap call will end up in drm_gem_mmap(), which will set things
2314  * up so we can get faults in the handler above.
2315  *
2316  * The fault handler will take care of binding the object into the GTT
2317  * (since it may have been evicted to make room for something), allocating
2318  * a fence register, and mapping the appropriate aperture address into
2319  * userspace.
2320  */
2321 int
2322 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
2323 			struct drm_file *file)
2324 {
2325 	struct drm_i915_gem_mmap_gtt *args = data;
2326 
2327 	return i915_gem_mmap_gtt(file, dev, args->handle, (uint64_t *)&args->offset);
2328 }
2329 
2330 /* Immediately discard the backing storage */
2331 static void
2332 i915_gem_object_truncate(struct drm_i915_gem_object *obj)
2333 {
2334 	vm_object_t vm_obj = obj->base.filp;
2335 
2336 	if (obj->base.filp == NULL)
2337 		return;
2338 
2339 	VM_OBJECT_LOCK(vm_obj);
2340 	vm_object_page_remove(vm_obj, 0, 0, false);
2341 	VM_OBJECT_UNLOCK(vm_obj);
2342 
2343 	/* Our goal here is to return as much of the memory as
2344 	 * is possible back to the system as we are called from OOM.
2345 	 * To do this we must instruct the shmfs to drop all of its
2346 	 * backing pages, *now*.
2347 	 */
2348 #if 0
2349 	shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1);
2350 #endif
2351 	obj->madv = __I915_MADV_PURGED;
2352 }
2353 
2354 /* Try to discard unwanted pages */
2355 static void
2356 i915_gem_object_invalidate(struct drm_i915_gem_object *obj)
2357 {
2358 #if 0
2359 	struct address_space *mapping;
2360 #endif
2361 
2362 	switch (obj->madv) {
2363 	case I915_MADV_DONTNEED:
2364 		i915_gem_object_truncate(obj);
2365 	case __I915_MADV_PURGED:
2366 		return;
2367 	}
2368 
2369 	if (obj->base.filp == NULL)
2370 		return;
2371 
2372 #if 0
2373 	mapping = file_inode(obj->base.filp)->i_mapping,
2374 #endif
2375 	invalidate_mapping_pages(obj->base.filp, 0, (loff_t)-1);
2376 }
2377 
2378 static void
2379 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
2380 {
2381 	struct sgt_iter sgt_iter;
2382 	struct page *page;
2383 	int ret;
2384 
2385 	BUG_ON(obj->madv == __I915_MADV_PURGED);
2386 
2387 	ret = i915_gem_object_set_to_cpu_domain(obj, true);
2388 	if (WARN_ON(ret)) {
2389 		/* In the event of a disaster, abandon all caches and
2390 		 * hope for the best.
2391 		 */
2392 		i915_gem_clflush_object(obj, true);
2393 		obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
2394 	}
2395 
2396 	i915_gem_gtt_finish_object(obj);
2397 
2398 	if (i915_gem_object_needs_bit17_swizzle(obj))
2399 		i915_gem_object_save_bit_17_swizzle(obj);
2400 
2401 	if (obj->madv == I915_MADV_DONTNEED)
2402 		obj->dirty = 0;
2403 
2404 	for_each_sgt_page(page, sgt_iter, obj->pages) {
2405 		if (obj->dirty)
2406 			set_page_dirty(page);
2407 
2408 		if (obj->madv == I915_MADV_WILLNEED)
2409 			mark_page_accessed(page);
2410 
2411 		vm_page_busy_wait((struct vm_page *)page, FALSE, "i915gem");
2412 		vm_page_unwire((struct vm_page *)page, 1);
2413 		vm_page_wakeup((struct vm_page *)page);
2414 	}
2415 	obj->dirty = 0;
2416 
2417 	sg_free_table(obj->pages);
2418 	kfree(obj->pages);
2419 }
2420 
2421 int
2422 i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
2423 {
2424 	const struct drm_i915_gem_object_ops *ops = obj->ops;
2425 
2426 	if (obj->pages == NULL)
2427 		return 0;
2428 
2429 	if (obj->pages_pin_count)
2430 		return -EBUSY;
2431 
2432 	GEM_BUG_ON(obj->bind_count);
2433 
2434 	/* ->put_pages might need to allocate memory for the bit17 swizzle
2435 	 * array, hence protect them from being reaped by removing them from gtt
2436 	 * lists early. */
2437 	list_del(&obj->global_list);
2438 
2439 	if (obj->mapping) {
2440 		void *ptr;
2441 
2442 		ptr = ptr_mask_bits(obj->mapping);
2443 		if (is_vmalloc_addr(ptr))
2444 			vunmap(ptr);
2445 		else
2446 			kunmap(kmap_to_page(ptr));
2447 
2448 		obj->mapping = NULL;
2449 	}
2450 
2451 	ops->put_pages(obj);
2452 	obj->pages = NULL;
2453 
2454 	i915_gem_object_invalidate(obj);
2455 
2456 	return 0;
2457 }
2458 
2459 static unsigned long swiotlb_max_size(void)
2460 {
2461 #if IS_ENABLED(CONFIG_SWIOTLB)
2462 	return rounddown(swiotlb_nr_tbl() << IO_TLB_SHIFT, PAGE_SIZE);
2463 #else
2464 	return 0;
2465 #endif
2466 }
2467 
2468 static int
2469 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
2470 {
2471 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
2472 	int page_count, i;
2473 	vm_object_t vm_obj;
2474 	struct sg_table *st;
2475 	struct scatterlist *sg;
2476 	struct sgt_iter sgt_iter;
2477 	struct page *page;
2478 	unsigned long last_pfn = 0;	/* suppress gcc warning */
2479 	unsigned long max_segment;
2480 	int ret;
2481 
2482 	/* Assert that the object is not currently in any GPU domain. As it
2483 	 * wasn't in the GTT, there shouldn't be any way it could have been in
2484 	 * a GPU cache
2485 	 */
2486 	BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
2487 	BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
2488 
2489 	max_segment = swiotlb_max_size();
2490 	if (!max_segment)
2491 		max_segment = obj->base.size;
2492 
2493 	st = kmalloc(sizeof(*st), M_DRM, GFP_KERNEL);
2494 	if (st == NULL)
2495 		return -ENOMEM;
2496 
2497 	page_count = obj->base.size / PAGE_SIZE;
2498 	if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
2499 		kfree(st);
2500 		return -ENOMEM;
2501 	}
2502 
2503 	/* Get the list of pages out of our struct file.  They'll be pinned
2504 	 * at this point until we release them.
2505 	 *
2506 	 * Fail silently without starting the shrinker
2507 	 */
2508 	vm_obj = obj->base.filp;
2509 	VM_OBJECT_LOCK(vm_obj);
2510 	sg = st->sgl;
2511 	st->nents = 0;
2512 	for (i = 0; i < page_count; i++) {
2513 		page = shmem_read_mapping_page(vm_obj, i);
2514 		if (IS_ERR(page)) {
2515 			i915_gem_shrink(dev_priv,
2516 					page_count,
2517 					I915_SHRINK_BOUND |
2518 					I915_SHRINK_UNBOUND |
2519 					I915_SHRINK_PURGEABLE);
2520 			page = shmem_read_mapping_page(vm_obj, i);
2521 		}
2522 		if (IS_ERR(page)) {
2523 			/* We've tried hard to allocate the memory by reaping
2524 			 * our own buffer, now let the real VM do its job and
2525 			 * go down in flames if truly OOM.
2526 			 */
2527 			page = shmem_read_mapping_page(vm_obj, i);
2528 			if (IS_ERR(page)) {
2529 				ret = PTR_ERR(page);
2530 				goto err_pages;
2531 			}
2532 		}
2533 		if (!i ||
2534 		    sg->length >= max_segment ||
2535 		    page_to_pfn(page) != last_pfn + 1) {
2536 			if (i)
2537 				sg = sg_next(sg);
2538 			st->nents++;
2539 			sg_set_page(sg, page, PAGE_SIZE, 0);
2540 		} else {
2541 			sg->length += PAGE_SIZE;
2542 		}
2543 		last_pfn = page_to_pfn(page);
2544 
2545 		/* Check that the i965g/gm workaround works. */
2546 	}
2547 	if (sg) /* loop terminated early; short sg table */
2548 		sg_mark_end(sg);
2549 	obj->pages = st;
2550 	VM_OBJECT_UNLOCK(vm_obj);
2551 
2552 	ret = i915_gem_gtt_prepare_object(obj);
2553 	if (ret)
2554 		goto err_pages;
2555 
2556 	if (i915_gem_object_needs_bit17_swizzle(obj))
2557 		i915_gem_object_do_bit_17_swizzle(obj);
2558 
2559 	if (i915_gem_object_is_tiled(obj) &&
2560 	    dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES)
2561 		i915_gem_object_pin_pages(obj);
2562 
2563 	return 0;
2564 
2565 err_pages:
2566 	sg_mark_end(sg);
2567 	for_each_sgt_page(page, sgt_iter, st)
2568 	{
2569 		struct vm_page *vmp = (struct vm_page *)page;
2570 		vm_page_busy_wait(vmp, FALSE, "i915gem");
2571 		vm_page_unwire(vmp, 0);
2572 		vm_page_wakeup(vmp);
2573 	}
2574 	VM_OBJECT_UNLOCK(vm_obj);
2575 	sg_free_table(st);
2576 	kfree(st);
2577 
2578 	/* shmemfs first checks if there is enough memory to allocate the page
2579 	 * and reports ENOSPC should there be insufficient, along with the usual
2580 	 * ENOMEM for a genuine allocation failure.
2581 	 *
2582 	 * We use ENOSPC in our driver to mean that we have run out of aperture
2583 	 * space and so want to translate the error from shmemfs back to our
2584 	 * usual understanding of ENOMEM.
2585 	 */
2586 	if (ret == -ENOSPC)
2587 		ret = -ENOMEM;
2588 
2589 	return ret;
2590 }
2591 
2592 /* Ensure that the associated pages are gathered from the backing storage
2593  * and pinned into our object. i915_gem_object_get_pages() may be called
2594  * multiple times before they are released by a single call to
2595  * i915_gem_object_put_pages() - once the pages are no longer referenced
2596  * either as a result of memory pressure (reaping pages under the shrinker)
2597  * or as the object is itself released.
2598  */
2599 int
2600 i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
2601 {
2602 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
2603 	const struct drm_i915_gem_object_ops *ops = obj->ops;
2604 	int ret;
2605 
2606 	if (obj->pages)
2607 		return 0;
2608 
2609 	if (obj->madv != I915_MADV_WILLNEED) {
2610 		DRM_DEBUG("Attempting to obtain a purgeable object\n");
2611 		return -EFAULT;
2612 	}
2613 
2614 	BUG_ON(obj->pages_pin_count);
2615 
2616 	ret = ops->get_pages(obj);
2617 	if (ret)
2618 		return ret;
2619 
2620 	list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list);
2621 
2622 	obj->get_page.sg = obj->pages->sgl;
2623 	obj->get_page.last = 0;
2624 
2625 	return 0;
2626 }
2627 
2628 /* The 'mapping' part of i915_gem_object_pin_map() below */
2629 static void *i915_gem_object_map(const struct drm_i915_gem_object *obj,
2630 				 enum i915_map_type type)
2631 {
2632 	unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
2633 	struct sg_table *sgt = obj->pages;
2634 	struct sgt_iter sgt_iter;
2635 	struct page *page;
2636 	struct page *stack_pages[32];
2637 	struct page **pages = stack_pages;
2638 	unsigned long i = 0;
2639 	pgprot_t pgprot;
2640 	void *addr;
2641 
2642 	/* A single page can always be kmapped */
2643 	if (n_pages == 1 && type == I915_MAP_WB)
2644 		return kmap(sg_page(sgt->sgl));
2645 
2646 	if (n_pages > ARRAY_SIZE(stack_pages)) {
2647 		/* Too big for stack -- allocate temporary array instead */
2648 		pages = drm_malloc_gfp(n_pages, sizeof(*pages), GFP_TEMPORARY);
2649 		if (!pages)
2650 			return NULL;
2651 	}
2652 
2653 	for_each_sgt_page(page, sgt_iter, sgt)
2654 		pages[i++] = page;
2655 
2656 	/* Check that we have the expected number of pages */
2657 	GEM_BUG_ON(i != n_pages);
2658 
2659 	switch (type) {
2660 	case I915_MAP_WB:
2661 		pgprot = PAGE_KERNEL;
2662 		break;
2663 	case I915_MAP_WC:
2664 		pgprot = pgprot_writecombine(PAGE_KERNEL_IO);
2665 		break;
2666 	}
2667 	addr = vmap(pages, n_pages, 0, pgprot);
2668 
2669 	if (pages != stack_pages)
2670 		drm_free_large(pages);
2671 
2672 	return addr;
2673 }
2674 
2675 /* get, pin, and map the pages of the object into kernel space */
2676 void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
2677 			      enum i915_map_type type)
2678 {
2679 	enum i915_map_type has_type;
2680 	bool pinned;
2681 	void *ptr;
2682 	int ret;
2683 
2684 	lockdep_assert_held(&obj->base.dev->struct_mutex);
2685 	GEM_BUG_ON(!i915_gem_object_has_struct_page(obj));
2686 
2687 	ret = i915_gem_object_get_pages(obj);
2688 	if (ret)
2689 		return ERR_PTR(ret);
2690 
2691 	i915_gem_object_pin_pages(obj);
2692 	pinned = obj->pages_pin_count > 1;
2693 
2694 	ptr = ptr_unpack_bits(obj->mapping, has_type);
2695 	if (ptr && has_type != type) {
2696 		if (pinned) {
2697 			ret = -EBUSY;
2698 			goto err;
2699 		}
2700 
2701 		if (is_vmalloc_addr(ptr))
2702 			vunmap(ptr);
2703 		else
2704 			kunmap(kmap_to_page(ptr));
2705 
2706 		ptr = obj->mapping = NULL;
2707 	}
2708 
2709 	if (!ptr) {
2710 		ptr = i915_gem_object_map(obj, type);
2711 		if (!ptr) {
2712 			ret = -ENOMEM;
2713 			goto err;
2714 		}
2715 
2716 		obj->mapping = ptr_pack_bits(ptr, type);
2717 	}
2718 
2719 	return ptr;
2720 
2721 err:
2722 	i915_gem_object_unpin_pages(obj);
2723 	return ERR_PTR(ret);
2724 }
2725 
2726 static void
2727 i915_gem_object_retire__write(struct i915_gem_active *active,
2728 			      struct drm_i915_gem_request *request)
2729 {
2730 	struct drm_i915_gem_object *obj =
2731 		container_of(active, struct drm_i915_gem_object, last_write);
2732 
2733 	intel_fb_obj_flush(obj, true, ORIGIN_CS);
2734 }
2735 
2736 static void
2737 i915_gem_object_retire__read(struct i915_gem_active *active,
2738 			     struct drm_i915_gem_request *request)
2739 {
2740 	int idx = request->engine->id;
2741 	struct drm_i915_gem_object *obj =
2742 		container_of(active, struct drm_i915_gem_object, last_read[idx]);
2743 
2744 	GEM_BUG_ON(!i915_gem_object_has_active_engine(obj, idx));
2745 
2746 	i915_gem_object_clear_active(obj, idx);
2747 	if (i915_gem_object_is_active(obj))
2748 		return;
2749 
2750 	/* Bump our place on the bound list to keep it roughly in LRU order
2751 	 * so that we don't steal from recently used but inactive objects
2752 	 * (unless we are forced to ofc!)
2753 	 */
2754 	if (obj->bind_count)
2755 		list_move_tail(&obj->global_list,
2756 			       &request->i915->mm.bound_list);
2757 
2758 	i915_gem_object_put(obj);
2759 }
2760 
2761 static bool i915_context_is_banned(const struct i915_gem_context *ctx)
2762 {
2763 	unsigned long elapsed;
2764 
2765 	if (ctx->hang_stats.banned)
2766 		return true;
2767 
2768 	elapsed = get_seconds() - ctx->hang_stats.guilty_ts;
2769 	if (ctx->hang_stats.ban_period_seconds &&
2770 	    elapsed <= ctx->hang_stats.ban_period_seconds) {
2771 		DRM_DEBUG("context hanging too fast, banning!\n");
2772 		return true;
2773 	}
2774 
2775 	return false;
2776 }
2777 
2778 static void i915_set_reset_status(struct i915_gem_context *ctx,
2779 				  const bool guilty)
2780 {
2781 	struct i915_ctx_hang_stats *hs = &ctx->hang_stats;
2782 
2783 	if (guilty) {
2784 		hs->banned = i915_context_is_banned(ctx);
2785 		hs->batch_active++;
2786 		hs->guilty_ts = get_seconds();
2787 	} else {
2788 		hs->batch_pending++;
2789 	}
2790 }
2791 
2792 struct drm_i915_gem_request *
2793 i915_gem_find_active_request(struct intel_engine_cs *engine)
2794 {
2795 	struct drm_i915_gem_request *request;
2796 
2797 	/* We are called by the error capture and reset at a random
2798 	 * point in time. In particular, note that neither is crucially
2799 	 * ordered with an interrupt. After a hang, the GPU is dead and we
2800 	 * assume that no more writes can happen (we waited long enough for
2801 	 * all writes that were in transaction to be flushed) - adding an
2802 	 * extra delay for a recent interrupt is pointless. Hence, we do
2803 	 * not need an engine->irq_seqno_barrier() before the seqno reads.
2804 	 */
2805 	list_for_each_entry(request, &engine->request_list, link) {
2806 		if (i915_gem_request_completed(request))
2807 			continue;
2808 
2809 		if (!i915_sw_fence_done(&request->submit))
2810 			break;
2811 
2812 		return request;
2813 	}
2814 
2815 	return NULL;
2816 }
2817 
2818 static void reset_request(struct drm_i915_gem_request *request)
2819 {
2820 	void *vaddr = request->ring->vaddr;
2821 	u32 head;
2822 
2823 	/* As this request likely depends on state from the lost
2824 	 * context, clear out all the user operations leaving the
2825 	 * breadcrumb at the end (so we get the fence notifications).
2826 	 */
2827 	head = request->head;
2828 	if (request->postfix < head) {
2829 		memset(vaddr + head, 0, request->ring->size - head);
2830 		head = 0;
2831 	}
2832 	memset(vaddr + head, 0, request->postfix - head);
2833 }
2834 
2835 static void i915_gem_reset_engine(struct intel_engine_cs *engine)
2836 {
2837 	struct drm_i915_gem_request *request;
2838 	struct i915_gem_context *incomplete_ctx;
2839 	bool ring_hung;
2840 
2841 	if (engine->irq_seqno_barrier)
2842 		engine->irq_seqno_barrier(engine);
2843 
2844 	request = i915_gem_find_active_request(engine);
2845 	if (!request)
2846 		return;
2847 
2848 	ring_hung = engine->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG;
2849 	if (engine->hangcheck.seqno != intel_engine_get_seqno(engine))
2850 		ring_hung = false;
2851 
2852 	i915_set_reset_status(request->ctx, ring_hung);
2853 	if (!ring_hung)
2854 		return;
2855 
2856 	DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 0x%x\n",
2857 			 engine->name, request->fence.seqno);
2858 
2859 	/* Setup the CS to resume from the breadcrumb of the hung request */
2860 	engine->reset_hw(engine, request);
2861 
2862 	/* Users of the default context do not rely on logical state
2863 	 * preserved between batches. They have to emit full state on
2864 	 * every batch and so it is safe to execute queued requests following
2865 	 * the hang.
2866 	 *
2867 	 * Other contexts preserve state, now corrupt. We want to skip all
2868 	 * queued requests that reference the corrupt context.
2869 	 */
2870 	incomplete_ctx = request->ctx;
2871 	if (i915_gem_context_is_default(incomplete_ctx))
2872 		return;
2873 
2874 	list_for_each_entry_continue(request, &engine->request_list, link)
2875 		if (request->ctx == incomplete_ctx)
2876 			reset_request(request);
2877 }
2878 
2879 void i915_gem_reset(struct drm_i915_private *dev_priv)
2880 {
2881 	struct intel_engine_cs *engine;
2882 	enum intel_engine_id id;
2883 
2884 	i915_gem_retire_requests(dev_priv);
2885 
2886 	for_each_engine(engine, dev_priv, id)
2887 		i915_gem_reset_engine(engine);
2888 
2889 	i915_gem_restore_fences(&dev_priv->drm);
2890 
2891 	if (dev_priv->gt.awake) {
2892 		intel_sanitize_gt_powersave(dev_priv);
2893 		intel_enable_gt_powersave(dev_priv);
2894 		if (INTEL_GEN(dev_priv) >= 6)
2895 			gen6_rps_busy(dev_priv);
2896 	}
2897 }
2898 
2899 static void nop_submit_request(struct drm_i915_gem_request *request)
2900 {
2901 }
2902 
2903 static void i915_gem_cleanup_engine(struct intel_engine_cs *engine)
2904 {
2905 	engine->submit_request = nop_submit_request;
2906 
2907 	/* Mark all pending requests as complete so that any concurrent
2908 	 * (lockless) lookup doesn't try and wait upon the request as we
2909 	 * reset it.
2910 	 */
2911 	intel_engine_init_seqno(engine, engine->last_submitted_seqno);
2912 
2913 	/*
2914 	 * Clear the execlists queue up before freeing the requests, as those
2915 	 * are the ones that keep the context and ringbuffer backing objects
2916 	 * pinned in place.
2917 	 */
2918 
2919 	if (i915.enable_execlists) {
2920 		lockmgr(&engine->execlist_lock, LK_EXCLUSIVE);
2921 		INIT_LIST_HEAD(&engine->execlist_queue);
2922 		i915_gem_request_put(engine->execlist_port[0].request);
2923 		i915_gem_request_put(engine->execlist_port[1].request);
2924 		memset(engine->execlist_port, 0, sizeof(engine->execlist_port));
2925 		lockmgr(&engine->execlist_lock, LK_RELEASE);
2926 	}
2927 
2928 	engine->i915->gt.active_engines &= ~intel_engine_flag(engine);
2929 }
2930 
2931 void i915_gem_set_wedged(struct drm_i915_private *dev_priv)
2932 {
2933 	struct intel_engine_cs *engine;
2934 	enum intel_engine_id id;
2935 
2936 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
2937 	set_bit(I915_WEDGED, &dev_priv->gpu_error.flags);
2938 
2939 	i915_gem_context_lost(dev_priv);
2940 	for_each_engine(engine, dev_priv, id)
2941 		i915_gem_cleanup_engine(engine);
2942 	mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0);
2943 
2944 	i915_gem_retire_requests(dev_priv);
2945 }
2946 
2947 static void
2948 i915_gem_retire_work_handler(struct work_struct *work)
2949 {
2950 	struct drm_i915_private *dev_priv =
2951 		container_of(work, typeof(*dev_priv), gt.retire_work.work);
2952 	struct drm_device *dev = &dev_priv->drm;
2953 
2954 	/* Come back later if the device is busy... */
2955 	if (mutex_trylock(&dev->struct_mutex)) {
2956 		i915_gem_retire_requests(dev_priv);
2957 		mutex_unlock(&dev->struct_mutex);
2958 	}
2959 
2960 	/* Keep the retire handler running until we are finally idle.
2961 	 * We do not need to do this test under locking as in the worst-case
2962 	 * we queue the retire worker once too often.
2963 	 */
2964 	if (READ_ONCE(dev_priv->gt.awake)) {
2965 		i915_queue_hangcheck(dev_priv);
2966 		queue_delayed_work(dev_priv->wq,
2967 				   &dev_priv->gt.retire_work,
2968 				   round_jiffies_up_relative(HZ));
2969 	}
2970 }
2971 
2972 static void
2973 i915_gem_idle_work_handler(struct work_struct *work)
2974 {
2975 	struct drm_i915_private *dev_priv =
2976 		container_of(work, typeof(*dev_priv), gt.idle_work.work);
2977 	struct drm_device *dev = &dev_priv->drm;
2978 	struct intel_engine_cs *engine;
2979 	enum intel_engine_id id;
2980 	bool rearm_hangcheck;
2981 
2982 	if (!READ_ONCE(dev_priv->gt.awake))
2983 		return;
2984 
2985 	if (READ_ONCE(dev_priv->gt.active_engines))
2986 		return;
2987 
2988 	rearm_hangcheck =
2989 		cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
2990 
2991 	if (!mutex_trylock(&dev->struct_mutex)) {
2992 		/* Currently busy, come back later */
2993 		mod_delayed_work(dev_priv->wq,
2994 				 &dev_priv->gt.idle_work,
2995 				 msecs_to_jiffies(50));
2996 		goto out_rearm;
2997 	}
2998 
2999 	if (dev_priv->gt.active_engines)
3000 		goto out_unlock;
3001 
3002 	for_each_engine(engine, dev_priv, id)
3003 		i915_gem_batch_pool_fini(&engine->batch_pool);
3004 
3005 	GEM_BUG_ON(!dev_priv->gt.awake);
3006 	dev_priv->gt.awake = false;
3007 	rearm_hangcheck = false;
3008 
3009 	if (INTEL_GEN(dev_priv) >= 6)
3010 		gen6_rps_idle(dev_priv);
3011 	intel_runtime_pm_put(dev_priv);
3012 out_unlock:
3013 	mutex_unlock(&dev->struct_mutex);
3014 
3015 out_rearm:
3016 	if (rearm_hangcheck) {
3017 		GEM_BUG_ON(!dev_priv->gt.awake);
3018 		i915_queue_hangcheck(dev_priv);
3019 	}
3020 }
3021 
3022 void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
3023 {
3024 	struct drm_i915_gem_object *obj = to_intel_bo(gem);
3025 	struct drm_i915_file_private *fpriv = file->driver_priv;
3026 	struct i915_vma *vma, *vn;
3027 
3028 	mutex_lock(&obj->base.dev->struct_mutex);
3029 	list_for_each_entry_safe(vma, vn, &obj->vma_list, obj_link)
3030 		if (vma->vm->file == fpriv)
3031 			i915_vma_close(vma);
3032 	mutex_unlock(&obj->base.dev->struct_mutex);
3033 }
3034 
3035 /**
3036  * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
3037  * @dev: drm device pointer
3038  * @data: ioctl data blob
3039  * @file: drm file pointer
3040  *
3041  * Returns 0 if successful, else an error is returned with the remaining time in
3042  * the timeout parameter.
3043  *  -ETIME: object is still busy after timeout
3044  *  -ERESTARTSYS: signal interrupted the wait
3045  *  -ENONENT: object doesn't exist
3046  * Also possible, but rare:
3047  *  -EAGAIN: GPU wedged
3048  *  -ENOMEM: damn
3049  *  -ENODEV: Internal IRQ fail
3050  *  -E?: The add request failed
3051  *
3052  * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
3053  * non-zero timeout parameter the wait ioctl will wait for the given number of
3054  * nanoseconds on an object becoming unbusy. Since the wait itself does so
3055  * without holding struct_mutex the object may become re-busied before this
3056  * function completes. A similar but shorter * race condition exists in the busy
3057  * ioctl
3058  */
3059 int
3060 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
3061 {
3062 	struct drm_i915_gem_wait *args = data;
3063 	struct intel_rps_client *rps = to_rps_client(file);
3064 	struct drm_i915_gem_object *obj;
3065 	unsigned long active;
3066 	int idx, ret = 0;
3067 
3068 	if (args->flags != 0)
3069 		return -EINVAL;
3070 
3071 	obj = i915_gem_object_lookup(file, args->bo_handle);
3072 	if (!obj)
3073 		return -ENOENT;
3074 
3075 	active = __I915_BO_ACTIVE(obj);
3076 	for_each_active(active, idx) {
3077 		s64 *timeout = args->timeout_ns >= 0 ? &args->timeout_ns : NULL;
3078 		ret = i915_gem_active_wait_unlocked(&obj->last_read[idx],
3079 						    I915_WAIT_INTERRUPTIBLE,
3080 						    timeout, rps);
3081 		if (ret)
3082 			break;
3083 	}
3084 
3085 	i915_gem_object_put_unlocked(obj);
3086 	return ret;
3087 }
3088 
3089 static void __i915_vma_iounmap(struct i915_vma *vma)
3090 {
3091 	GEM_BUG_ON(i915_vma_is_pinned(vma));
3092 
3093 	if (vma->iomap == NULL)
3094 		return;
3095 
3096 	io_mapping_unmap(vma->iomap);
3097 	vma->iomap = NULL;
3098 }
3099 
3100 int i915_vma_unbind(struct i915_vma *vma)
3101 {
3102 	struct drm_i915_gem_object *obj = vma->obj;
3103 	unsigned long active;
3104 	int ret;
3105 
3106 	/* First wait upon any activity as retiring the request may
3107 	 * have side-effects such as unpinning or even unbinding this vma.
3108 	 */
3109 	active = i915_vma_get_active(vma);
3110 	if (active) {
3111 		int idx;
3112 
3113 		/* When a closed VMA is retired, it is unbound - eek.
3114 		 * In order to prevent it from being recursively closed,
3115 		 * take a pin on the vma so that the second unbind is
3116 		 * aborted.
3117 		 */
3118 		__i915_vma_pin(vma);
3119 
3120 		for_each_active(active, idx) {
3121 			ret = i915_gem_active_retire(&vma->last_read[idx],
3122 						   &vma->vm->dev->struct_mutex);
3123 			if (ret)
3124 				break;
3125 		}
3126 
3127 		__i915_vma_unpin(vma);
3128 		if (ret)
3129 			return ret;
3130 
3131 		GEM_BUG_ON(i915_vma_is_active(vma));
3132 	}
3133 
3134 	if (i915_vma_is_pinned(vma))
3135 		return -EBUSY;
3136 
3137 	if (!drm_mm_node_allocated(&vma->node))
3138 		goto destroy;
3139 
3140 	GEM_BUG_ON(obj->bind_count == 0);
3141 	GEM_BUG_ON(!obj->pages);
3142 
3143 	if (i915_vma_is_map_and_fenceable(vma)) {
3144 		/* release the fence reg _after_ flushing */
3145 		ret = i915_vma_put_fence(vma);
3146 		if (ret)
3147 			return ret;
3148 
3149 		/* Force a pagefault for domain tracking on next user access */
3150 		i915_gem_release_mmap(obj);
3151 
3152 		__i915_vma_iounmap(vma);
3153 		vma->flags &= ~I915_VMA_CAN_FENCE;
3154 	}
3155 
3156 	if (likely(!vma->vm->closed)) {
3157 		trace_i915_vma_unbind(vma);
3158 		vma->vm->unbind_vma(vma);
3159 	}
3160 	vma->flags &= ~(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND);
3161 
3162 	drm_mm_remove_node(&vma->node);
3163 	list_move_tail(&vma->vm_link, &vma->vm->unbound_list);
3164 
3165 	if (vma->pages != obj->pages) {
3166 		GEM_BUG_ON(!vma->pages);
3167 		sg_free_table(vma->pages);
3168 		kfree(vma->pages);
3169 	}
3170 	vma->pages = NULL;
3171 
3172 	/* Since the unbound list is global, only move to that list if
3173 	 * no more VMAs exist. */
3174 	if (--obj->bind_count == 0)
3175 		list_move_tail(&obj->global_list,
3176 			       &to_i915(obj->base.dev)->mm.unbound_list);
3177 
3178 	/* And finally now the object is completely decoupled from this vma,
3179 	 * we can drop its hold on the backing storage and allow it to be
3180 	 * reaped by the shrinker.
3181 	 */
3182 	i915_gem_object_unpin_pages(obj);
3183 
3184 destroy:
3185 	if (unlikely(i915_vma_is_closed(vma)))
3186 		i915_vma_destroy(vma);
3187 
3188 	return 0;
3189 }
3190 
3191 int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv,
3192 			   unsigned int flags)
3193 {
3194 	struct intel_engine_cs *engine;
3195 	enum intel_engine_id id;
3196 	int ret;
3197 
3198 	for_each_engine(engine, dev_priv, id) {
3199 		if (engine->last_context == NULL)
3200 			continue;
3201 
3202 		ret = intel_engine_idle(engine, flags);
3203 		if (ret)
3204 			return ret;
3205 	}
3206 
3207 	return 0;
3208 }
3209 
3210 static bool i915_gem_valid_gtt_space(struct i915_vma *vma,
3211 				     unsigned long cache_level)
3212 {
3213 	struct drm_mm_node *gtt_space = &vma->node;
3214 	struct drm_mm_node *other;
3215 
3216 	/*
3217 	 * On some machines we have to be careful when putting differing types
3218 	 * of snoopable memory together to avoid the prefetcher crossing memory
3219 	 * domains and dying. During vm initialisation, we decide whether or not
3220 	 * these constraints apply and set the drm_mm.color_adjust
3221 	 * appropriately.
3222 	 */
3223 	if (vma->vm->mm.color_adjust == NULL)
3224 		return true;
3225 
3226 	if (!drm_mm_node_allocated(gtt_space))
3227 		return true;
3228 
3229 	if (list_empty(&gtt_space->node_list))
3230 		return true;
3231 
3232 	other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list);
3233 	if (other->allocated && !other->hole_follows && other->color != cache_level)
3234 		return false;
3235 
3236 	other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list);
3237 	if (other->allocated && !gtt_space->hole_follows && other->color != cache_level)
3238 		return false;
3239 
3240 	return true;
3241 }
3242 
3243 /**
3244  * i915_vma_insert - finds a slot for the vma in its address space
3245  * @vma: the vma
3246  * @size: requested size in bytes (can be larger than the VMA)
3247  * @alignment: required alignment
3248  * @flags: mask of PIN_* flags to use
3249  *
3250  * First we try to allocate some free space that meets the requirements for
3251  * the VMA. Failiing that, if the flags permit, it will evict an old VMA,
3252  * preferrably the oldest idle entry to make room for the new VMA.
3253  *
3254  * Returns:
3255  * 0 on success, negative error code otherwise.
3256  */
3257 static int
3258 i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
3259 {
3260 	struct drm_i915_private *dev_priv = to_i915(vma->vm->dev);
3261 	struct drm_i915_gem_object *obj = vma->obj;
3262 	u64 start, end;
3263 	int ret;
3264 
3265 	GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND));
3266 	GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
3267 
3268 	size = max(size, vma->size);
3269 	if (flags & PIN_MAPPABLE)
3270 		size = i915_gem_get_ggtt_size(dev_priv, size,
3271 					      i915_gem_object_get_tiling(obj));
3272 
3273 	alignment = max(max(alignment, vma->display_alignment),
3274 			i915_gem_get_ggtt_alignment(dev_priv, size,
3275 						    i915_gem_object_get_tiling(obj),
3276 						    flags & PIN_MAPPABLE));
3277 
3278 	start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
3279 
3280 	end = vma->vm->total;
3281 	if (flags & PIN_MAPPABLE)
3282 		end = min_t(u64, end, dev_priv->ggtt.mappable_end);
3283 	if (flags & PIN_ZONE_4G)
3284 		end = min_t(u64, end, (1ULL << 32) - PAGE_SIZE);
3285 
3286 	/* If binding the object/GGTT view requires more space than the entire
3287 	 * aperture has, reject it early before evicting everything in a vain
3288 	 * attempt to find space.
3289 	 */
3290 	if (size > end) {
3291 		DRM_DEBUG("Attempting to bind an object larger than the aperture: request=%llu [object=%zd] > %s aperture=%llu\n",
3292 			  size, obj->base.size,
3293 			  flags & PIN_MAPPABLE ? "mappable" : "total",
3294 			  end);
3295 		return -E2BIG;
3296 	}
3297 
3298 	ret = i915_gem_object_get_pages(obj);
3299 	if (ret)
3300 		return ret;
3301 
3302 	i915_gem_object_pin_pages(obj);
3303 
3304 	if (flags & PIN_OFFSET_FIXED) {
3305 		u64 offset = flags & PIN_OFFSET_MASK;
3306 		if (offset & (alignment - 1) || offset > end - size) {
3307 			ret = -EINVAL;
3308 			goto err_unpin;
3309 		}
3310 
3311 		vma->node.start = offset;
3312 		vma->node.size = size;
3313 		vma->node.color = obj->cache_level;
3314 		ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node);
3315 		if (ret) {
3316 			ret = i915_gem_evict_for_vma(vma);
3317 			if (ret == 0)
3318 				ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node);
3319 			if (ret)
3320 				goto err_unpin;
3321 		}
3322 	} else {
3323 		u32 search_flag, alloc_flag;
3324 
3325 		if (flags & PIN_HIGH) {
3326 			search_flag = DRM_MM_SEARCH_BELOW;
3327 			alloc_flag = DRM_MM_CREATE_TOP;
3328 		} else {
3329 			search_flag = DRM_MM_SEARCH_DEFAULT;
3330 			alloc_flag = DRM_MM_CREATE_DEFAULT;
3331 		}
3332 
3333 		/* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks,
3334 		 * so we know that we always have a minimum alignment of 4096.
3335 		 * The drm_mm range manager is optimised to return results
3336 		 * with zero alignment, so where possible use the optimal
3337 		 * path.
3338 		 */
3339 		if (alignment <= 4096)
3340 			alignment = 0;
3341 
3342 search_free:
3343 		ret = drm_mm_insert_node_in_range_generic(&vma->vm->mm,
3344 							  &vma->node,
3345 							  size, alignment,
3346 							  obj->cache_level,
3347 							  start, end,
3348 							  search_flag,
3349 							  alloc_flag);
3350 		if (ret) {
3351 			ret = i915_gem_evict_something(vma->vm, size, alignment,
3352 						       obj->cache_level,
3353 						       start, end,
3354 						       flags);
3355 			if (ret == 0)
3356 				goto search_free;
3357 
3358 			goto err_unpin;
3359 		}
3360 
3361 		GEM_BUG_ON(vma->node.start < start);
3362 		GEM_BUG_ON(vma->node.start + vma->node.size > end);
3363 	}
3364 	GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level));
3365 
3366 	list_move_tail(&obj->global_list, &dev_priv->mm.bound_list);
3367 	list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
3368 	obj->bind_count++;
3369 
3370 	return 0;
3371 
3372 err_unpin:
3373 	i915_gem_object_unpin_pages(obj);
3374 	return ret;
3375 }
3376 
3377 bool
3378 i915_gem_clflush_object(struct drm_i915_gem_object *obj,
3379 			bool force)
3380 {
3381 	/* If we don't have a page list set up, then we're not pinned
3382 	 * to GPU, and we can ignore the cache flush because it'll happen
3383 	 * again at bind time.
3384 	 */
3385 	if (obj->pages == NULL)
3386 		return false;
3387 
3388 	/*
3389 	 * Stolen memory is always coherent with the GPU as it is explicitly
3390 	 * marked as wc by the system, or the system is cache-coherent.
3391 	 */
3392 	if (obj->stolen || obj->phys_handle)
3393 		return false;
3394 
3395 	/* If the GPU is snooping the contents of the CPU cache,
3396 	 * we do not need to manually clear the CPU cache lines.  However,
3397 	 * the caches are only snooped when the render cache is
3398 	 * flushed/invalidated.  As we always have to emit invalidations
3399 	 * and flushes when moving into and out of the RENDER domain, correct
3400 	 * snooping behaviour occurs naturally as the result of our domain
3401 	 * tracking.
3402 	 */
3403 	if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) {
3404 		obj->cache_dirty = true;
3405 		return false;
3406 	}
3407 
3408 	trace_i915_gem_object_clflush(obj);
3409 	drm_clflush_sg(obj->pages);
3410 	obj->cache_dirty = false;
3411 
3412 	return true;
3413 }
3414 
3415 /** Flushes the GTT write domain for the object if it's dirty. */
3416 static void
3417 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
3418 {
3419 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
3420 
3421 	if (obj->base.write_domain != I915_GEM_DOMAIN_GTT)
3422 		return;
3423 
3424 	/* No actual flushing is required for the GTT write domain.  Writes
3425 	 * to it "immediately" go to main memory as far as we know, so there's
3426 	 * no chipset flush.  It also doesn't land in render cache.
3427 	 *
3428 	 * However, we do have to enforce the order so that all writes through
3429 	 * the GTT land before any writes to the device, such as updates to
3430 	 * the GATT itself.
3431 	 *
3432 	 * We also have to wait a bit for the writes to land from the GTT.
3433 	 * An uncached read (i.e. mmio) seems to be ideal for the round-trip
3434 	 * timing. This issue has only been observed when switching quickly
3435 	 * between GTT writes and CPU reads from inside the kernel on recent hw,
3436 	 * and it appears to only affect discrete GTT blocks (i.e. on LLC
3437 	 * system agents we cannot reproduce this behaviour).
3438 	 */
3439 	wmb();
3440 	if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv))
3441 		POSTING_READ(RING_ACTHD(dev_priv->engine[RCS]->mmio_base));
3442 
3443 	intel_fb_obj_flush(obj, false, write_origin(obj, I915_GEM_DOMAIN_GTT));
3444 
3445 	obj->base.write_domain = 0;
3446 	trace_i915_gem_object_change_domain(obj,
3447 					    obj->base.read_domains,
3448 					    I915_GEM_DOMAIN_GTT);
3449 }
3450 
3451 /** Flushes the CPU write domain for the object if it's dirty. */
3452 static void
3453 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
3454 {
3455 	if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
3456 		return;
3457 
3458 	if (i915_gem_clflush_object(obj, obj->pin_display))
3459 		i915_gem_chipset_flush(to_i915(obj->base.dev));
3460 
3461 	intel_fb_obj_flush(obj, false, ORIGIN_CPU);
3462 
3463 	obj->base.write_domain = 0;
3464 	trace_i915_gem_object_change_domain(obj,
3465 					    obj->base.read_domains,
3466 					    I915_GEM_DOMAIN_CPU);
3467 }
3468 
3469 static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
3470 {
3471 	struct i915_vma *vma;
3472 
3473 	list_for_each_entry(vma, &obj->vma_list, obj_link) {
3474 		if (!i915_vma_is_ggtt(vma))
3475 			continue;
3476 
3477 		if (i915_vma_is_active(vma))
3478 			continue;
3479 
3480 		if (!drm_mm_node_allocated(&vma->node))
3481 			continue;
3482 
3483 		list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
3484 	}
3485 }
3486 
3487 /**
3488  * Moves a single object to the GTT read, and possibly write domain.
3489  * @obj: object to act on
3490  * @write: ask for write access or read only
3491  *
3492  * This function returns when the move is complete, including waiting on
3493  * flushes to occur.
3494  */
3495 int
3496 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
3497 {
3498 	uint32_t old_write_domain, old_read_domains;
3499 	int ret;
3500 
3501 	ret = i915_gem_object_wait_rendering(obj, !write);
3502 	if (ret)
3503 		return ret;
3504 
3505 	if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
3506 		return 0;
3507 
3508 	/* Flush and acquire obj->pages so that we are coherent through
3509 	 * direct access in memory with previous cached writes through
3510 	 * shmemfs and that our cache domain tracking remains valid.
3511 	 * For example, if the obj->filp was moved to swap without us
3512 	 * being notified and releasing the pages, we would mistakenly
3513 	 * continue to assume that the obj remained out of the CPU cached
3514 	 * domain.
3515 	 */
3516 	ret = i915_gem_object_get_pages(obj);
3517 	if (ret)
3518 		return ret;
3519 
3520 	i915_gem_object_flush_cpu_write_domain(obj);
3521 
3522 	/* Serialise direct access to this object with the barriers for
3523 	 * coherent writes from the GPU, by effectively invalidating the
3524 	 * GTT domain upon first access.
3525 	 */
3526 	if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
3527 		mb();
3528 
3529 	old_write_domain = obj->base.write_domain;
3530 	old_read_domains = obj->base.read_domains;
3531 
3532 	/* It should now be out of any other write domains, and we can update
3533 	 * the domain values for our changes.
3534 	 */
3535 	BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
3536 	obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
3537 	if (write) {
3538 		obj->base.read_domains = I915_GEM_DOMAIN_GTT;
3539 		obj->base.write_domain = I915_GEM_DOMAIN_GTT;
3540 		obj->dirty = 1;
3541 	}
3542 
3543 	trace_i915_gem_object_change_domain(obj,
3544 					    old_read_domains,
3545 					    old_write_domain);
3546 
3547 	/* And bump the LRU for this access */
3548 	i915_gem_object_bump_inactive_ggtt(obj);
3549 
3550 	return 0;
3551 }
3552 
3553 /**
3554  * Changes the cache-level of an object across all VMA.
3555  * @obj: object to act on
3556  * @cache_level: new cache level to set for the object
3557  *
3558  * After this function returns, the object will be in the new cache-level
3559  * across all GTT and the contents of the backing storage will be coherent,
3560  * with respect to the new cache-level. In order to keep the backing storage
3561  * coherent for all users, we only allow a single cache level to be set
3562  * globally on the object and prevent it from being changed whilst the
3563  * hardware is reading from the object. That is if the object is currently
3564  * on the scanout it will be set to uncached (or equivalent display
3565  * cache coherency) and all non-MOCS GPU access will also be uncached so
3566  * that all direct access to the scanout remains coherent.
3567  */
3568 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
3569 				    enum i915_cache_level cache_level)
3570 {
3571 	struct i915_vma *vma;
3572 	int ret = 0;
3573 
3574 	if (obj->cache_level == cache_level)
3575 		goto out;
3576 
3577 	/* Inspect the list of currently bound VMA and unbind any that would
3578 	 * be invalid given the new cache-level. This is principally to
3579 	 * catch the issue of the CS prefetch crossing page boundaries and
3580 	 * reading an invalid PTE on older architectures.
3581 	 */
3582 restart:
3583 	list_for_each_entry(vma, &obj->vma_list, obj_link) {
3584 		if (!drm_mm_node_allocated(&vma->node))
3585 			continue;
3586 
3587 		if (i915_vma_is_pinned(vma)) {
3588 			DRM_DEBUG("can not change the cache level of pinned objects\n");
3589 			return -EBUSY;
3590 		}
3591 
3592 		if (i915_gem_valid_gtt_space(vma, cache_level))
3593 			continue;
3594 
3595 		ret = i915_vma_unbind(vma);
3596 		if (ret)
3597 			return ret;
3598 
3599 		/* As unbinding may affect other elements in the
3600 		 * obj->vma_list (due to side-effects from retiring
3601 		 * an active vma), play safe and restart the iterator.
3602 		 */
3603 		goto restart;
3604 	}
3605 
3606 	/* We can reuse the existing drm_mm nodes but need to change the
3607 	 * cache-level on the PTE. We could simply unbind them all and
3608 	 * rebind with the correct cache-level on next use. However since
3609 	 * we already have a valid slot, dma mapping, pages etc, we may as
3610 	 * rewrite the PTE in the belief that doing so tramples upon less
3611 	 * state and so involves less work.
3612 	 */
3613 	if (obj->bind_count) {
3614 		/* Before we change the PTE, the GPU must not be accessing it.
3615 		 * If we wait upon the object, we know that all the bound
3616 		 * VMA are no longer active.
3617 		 */
3618 		ret = i915_gem_object_wait_rendering(obj, false);
3619 		if (ret)
3620 			return ret;
3621 
3622 		if (!HAS_LLC(obj->base.dev) && cache_level != I915_CACHE_NONE) {
3623 			/* Access to snoopable pages through the GTT is
3624 			 * incoherent and on some machines causes a hard
3625 			 * lockup. Relinquish the CPU mmaping to force
3626 			 * userspace to refault in the pages and we can
3627 			 * then double check if the GTT mapping is still
3628 			 * valid for that pointer access.
3629 			 */
3630 			i915_gem_release_mmap(obj);
3631 
3632 			/* As we no longer need a fence for GTT access,
3633 			 * we can relinquish it now (and so prevent having
3634 			 * to steal a fence from someone else on the next
3635 			 * fence request). Note GPU activity would have
3636 			 * dropped the fence as all snoopable access is
3637 			 * supposed to be linear.
3638 			 */
3639 			list_for_each_entry(vma, &obj->vma_list, obj_link) {
3640 				ret = i915_vma_put_fence(vma);
3641 				if (ret)
3642 					return ret;
3643 			}
3644 		} else {
3645 			/* We either have incoherent backing store and
3646 			 * so no GTT access or the architecture is fully
3647 			 * coherent. In such cases, existing GTT mmaps
3648 			 * ignore the cache bit in the PTE and we can
3649 			 * rewrite it without confusing the GPU or having
3650 			 * to force userspace to fault back in its mmaps.
3651 			 */
3652 		}
3653 
3654 		list_for_each_entry(vma, &obj->vma_list, obj_link) {
3655 			if (!drm_mm_node_allocated(&vma->node))
3656 				continue;
3657 
3658 			ret = i915_vma_bind(vma, cache_level, PIN_UPDATE);
3659 			if (ret)
3660 				return ret;
3661 		}
3662 	}
3663 
3664 	list_for_each_entry(vma, &obj->vma_list, obj_link)
3665 		vma->node.color = cache_level;
3666 	obj->cache_level = cache_level;
3667 
3668 out:
3669 	/* Flush the dirty CPU caches to the backing storage so that the
3670 	 * object is now coherent at its new cache level (with respect
3671 	 * to the access domain).
3672 	 */
3673 	if (obj->cache_dirty && cpu_write_needs_clflush(obj)) {
3674 		if (i915_gem_clflush_object(obj, true))
3675 			i915_gem_chipset_flush(to_i915(obj->base.dev));
3676 	}
3677 
3678 	return 0;
3679 }
3680 
3681 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
3682 			       struct drm_file *file)
3683 {
3684 	struct drm_i915_gem_caching *args = data;
3685 	struct drm_i915_gem_object *obj;
3686 
3687 	obj = i915_gem_object_lookup(file, args->handle);
3688 	if (!obj)
3689 		return -ENOENT;
3690 
3691 	switch (obj->cache_level) {
3692 	case I915_CACHE_LLC:
3693 	case I915_CACHE_L3_LLC:
3694 		args->caching = I915_CACHING_CACHED;
3695 		break;
3696 
3697 	case I915_CACHE_WT:
3698 		args->caching = I915_CACHING_DISPLAY;
3699 		break;
3700 
3701 	default:
3702 		args->caching = I915_CACHING_NONE;
3703 		break;
3704 	}
3705 
3706 	i915_gem_object_put_unlocked(obj);
3707 	return 0;
3708 }
3709 
3710 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
3711 			       struct drm_file *file)
3712 {
3713 	struct drm_i915_private *dev_priv = to_i915(dev);
3714 	struct drm_i915_gem_caching *args = data;
3715 	struct drm_i915_gem_object *obj;
3716 	enum i915_cache_level level;
3717 	int ret;
3718 
3719 	switch (args->caching) {
3720 	case I915_CACHING_NONE:
3721 		level = I915_CACHE_NONE;
3722 		break;
3723 	case I915_CACHING_CACHED:
3724 		/*
3725 		 * Due to a HW issue on BXT A stepping, GPU stores via a
3726 		 * snooped mapping may leave stale data in a corresponding CPU
3727 		 * cacheline, whereas normally such cachelines would get
3728 		 * invalidated.
3729 		 */
3730 		if (!HAS_LLC(dev) && !HAS_SNOOP(dev))
3731 			return -ENODEV;
3732 
3733 		level = I915_CACHE_LLC;
3734 		break;
3735 	case I915_CACHING_DISPLAY:
3736 		level = HAS_WT(dev_priv) ? I915_CACHE_WT : I915_CACHE_NONE;
3737 		break;
3738 	default:
3739 		return -EINVAL;
3740 	}
3741 
3742 	intel_runtime_pm_get(dev_priv);
3743 
3744 	ret = i915_mutex_lock_interruptible(dev);
3745 	if (ret)
3746 		goto rpm_put;
3747 
3748 	obj = i915_gem_object_lookup(file, args->handle);
3749 	if (!obj) {
3750 		ret = -ENOENT;
3751 		goto unlock;
3752 	}
3753 
3754 	ret = i915_gem_object_set_cache_level(obj, level);
3755 
3756 	i915_gem_object_put(obj);
3757 unlock:
3758 	mutex_unlock(&dev->struct_mutex);
3759 rpm_put:
3760 	intel_runtime_pm_put(dev_priv);
3761 
3762 	return ret;
3763 }
3764 
3765 /*
3766  * Prepare buffer for display plane (scanout, cursors, etc).
3767  * Can be called from an uninterruptible phase (modesetting) and allows
3768  * any flushes to be pipelined (for pageflips).
3769  */
3770 struct i915_vma *
3771 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
3772 				     u32 alignment,
3773 				     const struct i915_ggtt_view *view)
3774 {
3775 	struct i915_vma *vma;
3776 	u32 old_read_domains, old_write_domain;
3777 	int ret;
3778 
3779 	/* Mark the pin_display early so that we account for the
3780 	 * display coherency whilst setting up the cache domains.
3781 	 */
3782 	obj->pin_display++;
3783 
3784 	/* The display engine is not coherent with the LLC cache on gen6.  As
3785 	 * a result, we make sure that the pinning that is about to occur is
3786 	 * done with uncached PTEs. This is lowest common denominator for all
3787 	 * chipsets.
3788 	 *
3789 	 * However for gen6+, we could do better by using the GFDT bit instead
3790 	 * of uncaching, which would allow us to flush all the LLC-cached data
3791 	 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
3792 	 */
3793 	ret = i915_gem_object_set_cache_level(obj,
3794 					      HAS_WT(to_i915(obj->base.dev)) ?
3795 					      I915_CACHE_WT : I915_CACHE_NONE);
3796 	if (ret) {
3797 		vma = ERR_PTR(ret);
3798 		goto err_unpin_display;
3799 	}
3800 
3801 	/* As the user may map the buffer once pinned in the display plane
3802 	 * (e.g. libkms for the bootup splash), we have to ensure that we
3803 	 * always use map_and_fenceable for all scanout buffers. However,
3804 	 * it may simply be too big to fit into mappable, in which case
3805 	 * put it anyway and hope that userspace can cope (but always first
3806 	 * try to preserve the existing ABI).
3807 	 */
3808 	vma = ERR_PTR(-ENOSPC);
3809 	if (view->type == I915_GGTT_VIEW_NORMAL)
3810 		vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
3811 					       PIN_MAPPABLE | PIN_NONBLOCK);
3812 	if (IS_ERR(vma))
3813 		vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, 0);
3814 	if (IS_ERR(vma))
3815 		goto err_unpin_display;
3816 
3817 	vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
3818 
3819 	WARN_ON(obj->pin_display > i915_vma_pin_count(vma));
3820 
3821 	i915_gem_object_flush_cpu_write_domain(obj);
3822 
3823 	old_write_domain = obj->base.write_domain;
3824 	old_read_domains = obj->base.read_domains;
3825 
3826 	/* It should now be out of any other write domains, and we can update
3827 	 * the domain values for our changes.
3828 	 */
3829 	obj->base.write_domain = 0;
3830 	obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
3831 
3832 	trace_i915_gem_object_change_domain(obj,
3833 					    old_read_domains,
3834 					    old_write_domain);
3835 
3836 	return vma;
3837 
3838 err_unpin_display:
3839 	obj->pin_display--;
3840 	return vma;
3841 }
3842 
3843 void
3844 i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
3845 {
3846 	if (WARN_ON(vma->obj->pin_display == 0))
3847 		return;
3848 
3849 	if (--vma->obj->pin_display == 0)
3850 		vma->display_alignment = 0;
3851 
3852 	/* Bump the LRU to try and avoid premature eviction whilst flipping  */
3853 	if (!i915_vma_is_active(vma))
3854 		list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
3855 
3856 	i915_vma_unpin(vma);
3857 	WARN_ON(vma->obj->pin_display > i915_vma_pin_count(vma));
3858 }
3859 
3860 /**
3861  * Moves a single object to the CPU read, and possibly write domain.
3862  * @obj: object to act on
3863  * @write: requesting write or read-only access
3864  *
3865  * This function returns when the move is complete, including waiting on
3866  * flushes to occur.
3867  */
3868 int
3869 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
3870 {
3871 	uint32_t old_write_domain, old_read_domains;
3872 	int ret;
3873 
3874 	ret = i915_gem_object_wait_rendering(obj, !write);
3875 	if (ret)
3876 		return ret;
3877 
3878 	if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
3879 		return 0;
3880 
3881 	i915_gem_object_flush_gtt_write_domain(obj);
3882 
3883 	old_write_domain = obj->base.write_domain;
3884 	old_read_domains = obj->base.read_domains;
3885 
3886 	/* Flush the CPU cache if it's still invalid. */
3887 	if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
3888 		i915_gem_clflush_object(obj, false);
3889 
3890 		obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
3891 	}
3892 
3893 	/* It should now be out of any other write domains, and we can update
3894 	 * the domain values for our changes.
3895 	 */
3896 	BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
3897 
3898 	/* If we're writing through the CPU, then the GPU read domains will
3899 	 * need to be invalidated at next use.
3900 	 */
3901 	if (write) {
3902 		obj->base.read_domains = I915_GEM_DOMAIN_CPU;
3903 		obj->base.write_domain = I915_GEM_DOMAIN_CPU;
3904 	}
3905 
3906 	trace_i915_gem_object_change_domain(obj,
3907 					    old_read_domains,
3908 					    old_write_domain);
3909 
3910 	return 0;
3911 }
3912 
3913 /* Throttle our rendering by waiting until the ring has completed our requests
3914  * emitted over 20 msec ago.
3915  *
3916  * Note that if we were to use the current jiffies each time around the loop,
3917  * we wouldn't escape the function with any frames outstanding if the time to
3918  * render a frame was over 20ms.
3919  *
3920  * This should get us reasonable parallelism between CPU and GPU but also
3921  * relatively low latency when blocking on a particular request to finish.
3922  */
3923 static int
3924 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
3925 {
3926 	struct drm_i915_private *dev_priv = to_i915(dev);
3927 	struct drm_i915_file_private *file_priv = file->driver_priv;
3928 	unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES;
3929 	struct drm_i915_gem_request *request, *target = NULL;
3930 	int ret;
3931 
3932 	ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
3933 	if (ret)
3934 		return ret;
3935 
3936 	/* ABI: return -EIO if already wedged */
3937 	if (i915_terminally_wedged(&dev_priv->gpu_error))
3938 		return -EIO;
3939 
3940 	lockmgr(&file_priv->mm.lock, LK_EXCLUSIVE);
3941 	list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
3942 		if (time_after_eq(request->emitted_jiffies, recent_enough))
3943 			break;
3944 
3945 		/*
3946 		 * Note that the request might not have been submitted yet.
3947 		 * In which case emitted_jiffies will be zero.
3948 		 */
3949 		if (!request->emitted_jiffies)
3950 			continue;
3951 
3952 		target = request;
3953 	}
3954 	if (target)
3955 		i915_gem_request_get(target);
3956 	lockmgr(&file_priv->mm.lock, LK_RELEASE);
3957 
3958 	if (target == NULL)
3959 		return 0;
3960 
3961 	ret = i915_wait_request(target, I915_WAIT_INTERRUPTIBLE, NULL, NULL);
3962 	i915_gem_request_put(target);
3963 
3964 	return ret;
3965 }
3966 
3967 static bool
3968 i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
3969 {
3970 	if (!drm_mm_node_allocated(&vma->node))
3971 		return false;
3972 
3973 	if (vma->node.size < size)
3974 		return true;
3975 
3976 	if (alignment && vma->node.start & (alignment - 1))
3977 		return true;
3978 
3979 	if (flags & PIN_MAPPABLE && !i915_vma_is_map_and_fenceable(vma))
3980 		return true;
3981 
3982 	if (flags & PIN_OFFSET_BIAS &&
3983 	    vma->node.start < (flags & PIN_OFFSET_MASK))
3984 		return true;
3985 
3986 	if (flags & PIN_OFFSET_FIXED &&
3987 	    vma->node.start != (flags & PIN_OFFSET_MASK))
3988 		return true;
3989 
3990 	return false;
3991 }
3992 
3993 void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
3994 {
3995 	struct drm_i915_gem_object *obj = vma->obj;
3996 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
3997 	bool mappable, fenceable;
3998 	u32 fence_size, fence_alignment;
3999 
4000 	fence_size = i915_gem_get_ggtt_size(dev_priv,
4001 					    vma->size,
4002 					    i915_gem_object_get_tiling(obj));
4003 	fence_alignment = i915_gem_get_ggtt_alignment(dev_priv,
4004 						      vma->size,
4005 						      i915_gem_object_get_tiling(obj),
4006 						      true);
4007 
4008 	fenceable = (vma->node.size == fence_size &&
4009 		     (vma->node.start & (fence_alignment - 1)) == 0);
4010 
4011 	mappable = (vma->node.start + fence_size <=
4012 		    dev_priv->ggtt.mappable_end);
4013 
4014 	if (mappable && fenceable)
4015 		vma->flags |= I915_VMA_CAN_FENCE;
4016 	else
4017 		vma->flags &= ~I915_VMA_CAN_FENCE;
4018 }
4019 
4020 int __i915_vma_do_pin(struct i915_vma *vma,
4021 		      u64 size, u64 alignment, u64 flags)
4022 {
4023 	unsigned int bound = vma->flags;
4024 	int ret;
4025 
4026 	GEM_BUG_ON((flags & (PIN_GLOBAL | PIN_USER)) == 0);
4027 	GEM_BUG_ON((flags & PIN_GLOBAL) && !i915_vma_is_ggtt(vma));
4028 
4029 	if (WARN_ON(bound & I915_VMA_PIN_OVERFLOW)) {
4030 		ret = -EBUSY;
4031 		goto err;
4032 	}
4033 
4034 	if ((bound & I915_VMA_BIND_MASK) == 0) {
4035 		ret = i915_vma_insert(vma, size, alignment, flags);
4036 		if (ret)
4037 			goto err;
4038 	}
4039 
4040 	ret = i915_vma_bind(vma, vma->obj->cache_level, flags);
4041 	if (ret)
4042 		goto err;
4043 
4044 	if ((bound ^ vma->flags) & I915_VMA_GLOBAL_BIND)
4045 		__i915_vma_set_map_and_fenceable(vma);
4046 
4047 	GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags));
4048 	return 0;
4049 
4050 err:
4051 	__i915_vma_unpin(vma);
4052 	return ret;
4053 }
4054 
4055 struct i915_vma *
4056 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
4057 			 const struct i915_ggtt_view *view,
4058 			 u64 size,
4059 			 u64 alignment,
4060 			 u64 flags)
4061 {
4062 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
4063 	struct i915_address_space *vm = &dev_priv->ggtt.base;
4064 	struct i915_vma *vma;
4065 	int ret;
4066 
4067 	vma = i915_gem_obj_lookup_or_create_vma(obj, vm, view);
4068 	if (IS_ERR(vma))
4069 		return vma;
4070 
4071 	if (i915_vma_misplaced(vma, size, alignment, flags)) {
4072 		if (flags & PIN_NONBLOCK &&
4073 		    (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)))
4074 			return ERR_PTR(-ENOSPC);
4075 
4076 		if (flags & PIN_MAPPABLE) {
4077 			u32 fence_size;
4078 
4079 			fence_size = i915_gem_get_ggtt_size(dev_priv, vma->size,
4080 							    i915_gem_object_get_tiling(obj));
4081 			/* If the required space is larger than the available
4082 			 * aperture, we will not able to find a slot for the
4083 			 * object and unbinding the object now will be in
4084 			 * vain. Worse, doing so may cause us to ping-pong
4085 			 * the object in and out of the Global GTT and
4086 			 * waste a lot of cycles under the mutex.
4087 			 */
4088 			if (fence_size > dev_priv->ggtt.mappable_end)
4089 				return ERR_PTR(-E2BIG);
4090 
4091 			/* If NONBLOCK is set the caller is optimistically
4092 			 * trying to cache the full object within the mappable
4093 			 * aperture, and *must* have a fallback in place for
4094 			 * situations where we cannot bind the object. We
4095 			 * can be a little more lax here and use the fallback
4096 			 * more often to avoid costly migrations of ourselves
4097 			 * and other objects within the aperture.
4098 			 *
4099 			 * Half-the-aperture is used as a simple heuristic.
4100 			 * More interesting would to do search for a free
4101 			 * block prior to making the commitment to unbind.
4102 			 * That caters for the self-harm case, and with a
4103 			 * little more heuristics (e.g. NOFAULT, NOEVICT)
4104 			 * we could try to minimise harm to others.
4105 			 */
4106 			if (flags & PIN_NONBLOCK &&
4107 			    fence_size > dev_priv->ggtt.mappable_end / 2)
4108 				return ERR_PTR(-ENOSPC);
4109 		}
4110 
4111 		WARN(i915_vma_is_pinned(vma),
4112 		     "bo is already pinned in ggtt with incorrect alignment:"
4113 		     " offset=%08x, req.alignment=%llx,"
4114 		     " req.map_and_fenceable=%d, vma->map_and_fenceable=%d\n",
4115 		     i915_ggtt_offset(vma), alignment,
4116 		     !!(flags & PIN_MAPPABLE),
4117 		     i915_vma_is_map_and_fenceable(vma));
4118 		ret = i915_vma_unbind(vma);
4119 		if (ret)
4120 			return ERR_PTR(ret);
4121 	}
4122 
4123 	ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL);
4124 	if (ret)
4125 		return ERR_PTR(ret);
4126 
4127 	return vma;
4128 }
4129 
4130 static __always_inline unsigned int __busy_read_flag(unsigned int id)
4131 {
4132 	/* Note that we could alias engines in the execbuf API, but
4133 	 * that would be very unwise as it prevents userspace from
4134 	 * fine control over engine selection. Ahem.
4135 	 *
4136 	 * This should be something like EXEC_MAX_ENGINE instead of
4137 	 * I915_NUM_ENGINES.
4138 	 */
4139 	BUILD_BUG_ON(I915_NUM_ENGINES > 16);
4140 	return 0x10000 << id;
4141 }
4142 
4143 static __always_inline unsigned int __busy_write_id(unsigned int id)
4144 {
4145        /* The uABI guarantees an active writer is also amongst the read
4146         * engines. This would be true if we accessed the activity tracking
4147         * under the lock, but as we perform the lookup of the object and
4148         * its activity locklessly we can not guarantee that the last_write
4149         * being active implies that we have set the same engine flag from
4150         * last_read - hence we always set both read and write busy for
4151         * last_write.
4152         */
4153        return id | __busy_read_flag(id);
4154 }
4155 
4156 static __always_inline unsigned int
4157 __busy_set_if_active(const struct i915_gem_active *active,
4158 		     unsigned int (*flag)(unsigned int id))
4159 {
4160 	struct drm_i915_gem_request *request;
4161 
4162 	request = rcu_dereference(active->request);
4163 	if (!request || i915_gem_request_completed(request))
4164 		return 0;
4165 
4166 	/* This is racy. See __i915_gem_active_get_rcu() for an in detail
4167 	 * discussion of how to handle the race correctly, but for reporting
4168 	 * the busy state we err on the side of potentially reporting the
4169 	 * wrong engine as being busy (but we guarantee that the result
4170 	 * is at least self-consistent).
4171 	 *
4172 	 * As we use SLAB_DESTROY_BY_RCU, the request may be reallocated
4173 	 * whilst we are inspecting it, even under the RCU read lock as we are.
4174 	 * This means that there is a small window for the engine and/or the
4175 	 * seqno to have been overwritten. The seqno will always be in the
4176 	 * future compared to the intended, and so we know that if that
4177 	 * seqno is idle (on whatever engine) our request is idle and the
4178 	 * return 0 above is correct.
4179 	 *
4180 	 * The issue is that if the engine is switched, it is just as likely
4181 	 * to report that it is busy (but since the switch happened, we know
4182 	 * the request should be idle). So there is a small chance that a busy
4183 	 * result is actually the wrong engine.
4184 	 *
4185 	 * So why don't we care?
4186 	 *
4187 	 * For starters, the busy ioctl is a heuristic that is by definition
4188 	 * racy. Even with perfect serialisation in the driver, the hardware
4189 	 * state is constantly advancing - the state we report to the user
4190 	 * is stale.
4191 	 *
4192 	 * The critical information for the busy-ioctl is whether the object
4193 	 * is idle as userspace relies on that to detect whether its next
4194 	 * access will stall, or if it has missed submitting commands to
4195 	 * the hardware allowing the GPU to stall. We never generate a
4196 	 * false-positive for idleness, thus busy-ioctl is reliable at the
4197 	 * most fundamental level, and we maintain the guarantee that a
4198 	 * busy object left to itself will eventually become idle (and stay
4199 	 * idle!).
4200 	 *
4201 	 * We allow ourselves the leeway of potentially misreporting the busy
4202 	 * state because that is an optimisation heuristic that is constantly
4203 	 * in flux. Being quickly able to detect the busy/idle state is much
4204 	 * more important than accurate logging of exactly which engines were
4205 	 * busy.
4206 	 *
4207 	 * For accuracy in reporting the engine, we could use
4208 	 *
4209 	 *	result = 0;
4210 	 *	request = __i915_gem_active_get_rcu(active);
4211 	 *	if (request) {
4212 	 *		if (!i915_gem_request_completed(request))
4213 	 *			result = flag(request->engine->exec_id);
4214 	 *		i915_gem_request_put(request);
4215 	 *	}
4216 	 *
4217 	 * but that still remains susceptible to both hardware and userspace
4218 	 * races. So we accept making the result of that race slightly worse,
4219 	 * given the rarity of the race and its low impact on the result.
4220 	 */
4221 	return flag(READ_ONCE(request->engine->exec_id));
4222 }
4223 
4224 static __always_inline unsigned int
4225 busy_check_reader(const struct i915_gem_active *active)
4226 {
4227 	return __busy_set_if_active(active, __busy_read_flag);
4228 }
4229 
4230 static __always_inline unsigned int
4231 busy_check_writer(const struct i915_gem_active *active)
4232 {
4233 	return __busy_set_if_active(active, __busy_write_id);
4234 }
4235 
4236 int
4237 i915_gem_busy_ioctl(struct drm_device *dev, void *data,
4238 		    struct drm_file *file)
4239 {
4240 	struct drm_i915_gem_busy *args = data;
4241 	struct drm_i915_gem_object *obj;
4242 	unsigned long active;
4243 
4244 	obj = i915_gem_object_lookup(file, args->handle);
4245 	if (!obj)
4246 		return -ENOENT;
4247 
4248 	args->busy = 0;
4249 	active = __I915_BO_ACTIVE(obj);
4250 	if (active) {
4251 		int idx;
4252 
4253 		/* Yes, the lookups are intentionally racy.
4254 		 *
4255 		 * First, we cannot simply rely on __I915_BO_ACTIVE. We have
4256 		 * to regard the value as stale and as our ABI guarantees
4257 		 * forward progress, we confirm the status of each active
4258 		 * request with the hardware.
4259 		 *
4260 		 * Even though we guard the pointer lookup by RCU, that only
4261 		 * guarantees that the pointer and its contents remain
4262 		 * dereferencable and does *not* mean that the request we
4263 		 * have is the same as the one being tracked by the object.
4264 		 *
4265 		 * Consider that we lookup the request just as it is being
4266 		 * retired and freed. We take a local copy of the pointer,
4267 		 * but before we add its engine into the busy set, the other
4268 		 * thread reallocates it and assigns it to a task on another
4269 		 * engine with a fresh and incomplete seqno. Guarding against
4270 		 * that requires careful serialisation and reference counting,
4271 		 * i.e. using __i915_gem_active_get_request_rcu(). We don't,
4272 		 * instead we expect that if the result is busy, which engines
4273 		 * are busy is not completely reliable - we only guarantee
4274 		 * that the object was busy.
4275 		 */
4276 		rcu_read_lock();
4277 
4278 		for_each_active(active, idx)
4279 			args->busy |= busy_check_reader(&obj->last_read[idx]);
4280 
4281 		/* For ABI sanity, we only care that the write engine is in
4282 		 * the set of read engines. This is ensured by the ordering
4283 		 * of setting last_read/last_write in i915_vma_move_to_active,
4284 		 * and then in reverse in retire.
4285 		 *
4286 		 * We don't care that the set of active read/write engines
4287 		 * may change during construction of the result, as it is
4288 		 * equally liable to change before userspace can inspect
4289 		 * the result.
4290 		 */
4291 		args->busy |= busy_check_writer(&obj->last_write);
4292 
4293 		rcu_read_unlock();
4294 	}
4295 
4296 	i915_gem_object_put_unlocked(obj);
4297 	return 0;
4298 }
4299 
4300 int
4301 i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
4302 			struct drm_file *file_priv)
4303 {
4304 	return i915_gem_ring_throttle(dev, file_priv);
4305 }
4306 
4307 int
4308 i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
4309 		       struct drm_file *file_priv)
4310 {
4311 	struct drm_i915_private *dev_priv = to_i915(dev);
4312 	struct drm_i915_gem_madvise *args = data;
4313 	struct drm_i915_gem_object *obj;
4314 	int ret;
4315 
4316 	switch (args->madv) {
4317 	case I915_MADV_DONTNEED:
4318 	case I915_MADV_WILLNEED:
4319 	    break;
4320 	default:
4321 	    return -EINVAL;
4322 	}
4323 
4324 	ret = i915_mutex_lock_interruptible(dev);
4325 	if (ret)
4326 		return ret;
4327 
4328 	obj = i915_gem_object_lookup(file_priv, args->handle);
4329 	if (!obj) {
4330 		ret = -ENOENT;
4331 		goto unlock;
4332 	}
4333 
4334 	if (obj->pages &&
4335 	    i915_gem_object_is_tiled(obj) &&
4336 	    dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
4337 		if (obj->madv == I915_MADV_WILLNEED)
4338 			i915_gem_object_unpin_pages(obj);
4339 		if (args->madv == I915_MADV_WILLNEED)
4340 			i915_gem_object_pin_pages(obj);
4341 	}
4342 
4343 	if (obj->madv != __I915_MADV_PURGED)
4344 		obj->madv = args->madv;
4345 
4346 	/* if the object is no longer attached, discard its backing storage */
4347 	if (obj->madv == I915_MADV_DONTNEED && obj->pages == NULL)
4348 		i915_gem_object_truncate(obj);
4349 
4350 	args->retained = obj->madv != __I915_MADV_PURGED;
4351 
4352 	i915_gem_object_put(obj);
4353 unlock:
4354 	mutex_unlock(&dev->struct_mutex);
4355 	return ret;
4356 }
4357 
4358 void i915_gem_object_init(struct drm_i915_gem_object *obj,
4359 			  const struct drm_i915_gem_object_ops *ops)
4360 {
4361 	int i;
4362 
4363 	INIT_LIST_HEAD(&obj->global_list);
4364 	for (i = 0; i < I915_NUM_ENGINES; i++)
4365 		init_request_active(&obj->last_read[i],
4366 				    i915_gem_object_retire__read);
4367 	init_request_active(&obj->last_write,
4368 			    i915_gem_object_retire__write);
4369 	INIT_LIST_HEAD(&obj->obj_exec_link);
4370 	INIT_LIST_HEAD(&obj->vma_list);
4371 	INIT_LIST_HEAD(&obj->batch_pool_link);
4372 
4373 	obj->ops = ops;
4374 
4375 	obj->frontbuffer_ggtt_origin = ORIGIN_GTT;
4376 	obj->madv = I915_MADV_WILLNEED;
4377 
4378 	i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size);
4379 }
4380 
4381 static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
4382 	.flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE,
4383 	.get_pages = i915_gem_object_get_pages_gtt,
4384 	.put_pages = i915_gem_object_put_pages_gtt,
4385 };
4386 
4387 struct drm_i915_gem_object *i915_gem_object_create(struct drm_device *dev,
4388 						  size_t size)
4389 {
4390 	struct drm_i915_gem_object *obj;
4391 #if 0
4392 	struct address_space *mapping;
4393 	gfp_t mask;
4394 #endif
4395 	int ret;
4396 
4397 	obj = i915_gem_object_alloc(dev);
4398 	if (obj == NULL)
4399 		return ERR_PTR(-ENOMEM);
4400 
4401 	ret = drm_gem_object_init(dev, &obj->base, size);
4402 	if (ret)
4403 		goto fail;
4404 
4405 #if 0
4406 	mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
4407 	if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) {
4408 		/* 965gm cannot relocate objects above 4GiB. */
4409 		mask &= ~__GFP_HIGHMEM;
4410 		mask |= __GFP_DMA32;
4411 	}
4412 
4413 	mapping = obj->base.filp->f_mapping;
4414 	mapping_set_gfp_mask(mapping, mask);
4415 #endif
4416 
4417 	i915_gem_object_init(obj, &i915_gem_object_ops);
4418 
4419 	obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4420 	obj->base.read_domains = I915_GEM_DOMAIN_CPU;
4421 
4422 	if (HAS_LLC(dev)) {
4423 		/* On some devices, we can have the GPU use the LLC (the CPU
4424 		 * cache) for about a 10% performance improvement
4425 		 * compared to uncached.  Graphics requests other than
4426 		 * display scanout are coherent with the CPU in
4427 		 * accessing this cache.  This means in this mode we
4428 		 * don't need to clflush on the CPU side, and on the
4429 		 * GPU side we only need to flush internal caches to
4430 		 * get data visible to the CPU.
4431 		 *
4432 		 * However, we maintain the display planes as UC, and so
4433 		 * need to rebind when first used as such.
4434 		 */
4435 		obj->cache_level = I915_CACHE_LLC;
4436 	} else
4437 		obj->cache_level = I915_CACHE_NONE;
4438 
4439 	trace_i915_gem_object_create(obj);
4440 
4441 	return obj;
4442 
4443 fail:
4444 	i915_gem_object_free(obj);
4445 
4446 	return ERR_PTR(ret);
4447 }
4448 
4449 static bool discard_backing_storage(struct drm_i915_gem_object *obj)
4450 {
4451 	/* If we are the last user of the backing storage (be it shmemfs
4452 	 * pages or stolen etc), we know that the pages are going to be
4453 	 * immediately released. In this case, we can then skip copying
4454 	 * back the contents from the GPU.
4455 	 */
4456 
4457 	if (obj->madv != I915_MADV_WILLNEED)
4458 		return false;
4459 
4460 	if (obj->base.filp == NULL)
4461 		return true;
4462 
4463 	/* At first glance, this looks racy, but then again so would be
4464 	 * userspace racing mmap against close. However, the first external
4465 	 * reference to the filp can only be obtained through the
4466 	 * i915_gem_mmap_ioctl() which safeguards us against the user
4467 	 * acquiring such a reference whilst we are in the middle of
4468 	 * freeing the object.
4469 	 */
4470 #if 0
4471 	return atomic_long_read(&obj->base.filp->f_count) == 1;
4472 #else
4473 	return false;
4474 #endif
4475 }
4476 
4477 void i915_gem_free_object(struct drm_gem_object *gem_obj)
4478 {
4479 	struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
4480 	struct drm_device *dev = obj->base.dev;
4481 	struct drm_i915_private *dev_priv = to_i915(dev);
4482 	struct i915_vma *vma, *next;
4483 
4484 	intel_runtime_pm_get(dev_priv);
4485 
4486 	trace_i915_gem_object_destroy(obj);
4487 
4488 	/* All file-owned VMA should have been released by this point through
4489 	 * i915_gem_close_object(), or earlier by i915_gem_context_close().
4490 	 * However, the object may also be bound into the global GTT (e.g.
4491 	 * older GPUs without per-process support, or for direct access through
4492 	 * the GTT either for the user or for scanout). Those VMA still need to
4493 	 * unbound now.
4494 	 */
4495 	list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) {
4496 		GEM_BUG_ON(!i915_vma_is_ggtt(vma));
4497 		GEM_BUG_ON(i915_vma_is_active(vma));
4498 		vma->flags &= ~I915_VMA_PIN_MASK;
4499 		i915_vma_close(vma);
4500 	}
4501 	GEM_BUG_ON(obj->bind_count);
4502 
4503 	/* Stolen objects don't hold a ref, but do hold pin count. Fix that up
4504 	 * before progressing. */
4505 	if (obj->stolen)
4506 		i915_gem_object_unpin_pages(obj);
4507 
4508 	WARN_ON(atomic_read(&obj->frontbuffer_bits));
4509 
4510 	if (obj->pages && obj->madv == I915_MADV_WILLNEED &&
4511 	    dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES &&
4512 	    i915_gem_object_is_tiled(obj))
4513 		i915_gem_object_unpin_pages(obj);
4514 
4515 	if (WARN_ON(obj->pages_pin_count))
4516 		obj->pages_pin_count = 0;
4517 	if (discard_backing_storage(obj))
4518 		obj->madv = I915_MADV_DONTNEED;
4519 	i915_gem_object_put_pages(obj);
4520 
4521 	BUG_ON(obj->pages);
4522 
4523 #if 0
4524 	if (obj->base.import_attach)
4525 		drm_prime_gem_destroy(&obj->base, NULL);
4526 #endif
4527 
4528 	if (obj->ops->release)
4529 		obj->ops->release(obj);
4530 
4531 	drm_gem_object_release(&obj->base);
4532 	i915_gem_info_remove_obj(dev_priv, obj->base.size);
4533 
4534 	kfree(obj->bit_17);
4535 	i915_gem_object_free(obj);
4536 
4537 	intel_runtime_pm_put(dev_priv);
4538 }
4539 
4540 int i915_gem_suspend(struct drm_device *dev)
4541 {
4542 	struct drm_i915_private *dev_priv = to_i915(dev);
4543 	int ret;
4544 
4545 	intel_suspend_gt_powersave(dev_priv);
4546 
4547 	mutex_lock(&dev->struct_mutex);
4548 
4549 	/* We have to flush all the executing contexts to main memory so
4550 	 * that they can saved in the hibernation image. To ensure the last
4551 	 * context image is coherent, we have to switch away from it. That
4552 	 * leaves the dev_priv->kernel_context still active when
4553 	 * we actually suspend, and its image in memory may not match the GPU
4554 	 * state. Fortunately, the kernel_context is disposable and we do
4555 	 * not rely on its state.
4556 	 */
4557 	ret = i915_gem_switch_to_kernel_context(dev_priv);
4558 	if (ret)
4559 		goto err;
4560 
4561 	ret = i915_gem_wait_for_idle(dev_priv,
4562 				     I915_WAIT_INTERRUPTIBLE |
4563 				     I915_WAIT_LOCKED);
4564 	if (ret)
4565 		goto err;
4566 
4567 	i915_gem_retire_requests(dev_priv);
4568 
4569 	i915_gem_context_lost(dev_priv);
4570 	mutex_unlock(&dev->struct_mutex);
4571 
4572 	cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
4573 	cancel_delayed_work_sync(&dev_priv->gt.retire_work);
4574 	flush_delayed_work(&dev_priv->gt.idle_work);
4575 
4576 	/* Assert that we sucessfully flushed all the work and
4577 	 * reset the GPU back to its idle, low power state.
4578 	 */
4579 	WARN_ON(dev_priv->gt.awake);
4580 
4581 	/*
4582 	 * Neither the BIOS, ourselves or any other kernel
4583 	 * expects the system to be in execlists mode on startup,
4584 	 * so we need to reset the GPU back to legacy mode. And the only
4585 	 * known way to disable logical contexts is through a GPU reset.
4586 	 *
4587 	 * So in order to leave the system in a known default configuration,
4588 	 * always reset the GPU upon unload and suspend. Afterwards we then
4589 	 * clean up the GEM state tracking, flushing off the requests and
4590 	 * leaving the system in a known idle state.
4591 	 *
4592 	 * Note that is of the upmost importance that the GPU is idle and
4593 	 * all stray writes are flushed *before* we dismantle the backing
4594 	 * storage for the pinned objects.
4595 	 *
4596 	 * However, since we are uncertain that resetting the GPU on older
4597 	 * machines is a good idea, we don't - just in case it leaves the
4598 	 * machine in an unusable condition.
4599 	 */
4600 	if (HAS_HW_CONTEXTS(dev)) {
4601 		int reset = intel_gpu_reset(dev_priv, ALL_ENGINES);
4602 		WARN_ON(reset && reset != -ENODEV);
4603 	}
4604 
4605 	return 0;
4606 
4607 err:
4608 	mutex_unlock(&dev->struct_mutex);
4609 	return ret;
4610 }
4611 
4612 void i915_gem_resume(struct drm_device *dev)
4613 {
4614 	struct drm_i915_private *dev_priv = to_i915(dev);
4615 
4616 	mutex_lock(&dev->struct_mutex);
4617 	i915_gem_restore_gtt_mappings(dev);
4618 
4619 	/* As we didn't flush the kernel context before suspend, we cannot
4620 	 * guarantee that the context image is complete. So let's just reset
4621 	 * it and start again.
4622 	 */
4623 	dev_priv->gt.resume(dev_priv);
4624 
4625 	mutex_unlock(&dev->struct_mutex);
4626 }
4627 
4628 void i915_gem_init_swizzling(struct drm_device *dev)
4629 {
4630 	struct drm_i915_private *dev_priv = to_i915(dev);
4631 
4632 	if (INTEL_INFO(dev)->gen < 5 ||
4633 	    dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
4634 		return;
4635 
4636 	I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
4637 				 DISP_TILE_SURFACE_SWIZZLING);
4638 
4639 	if (IS_GEN5(dev_priv))
4640 		return;
4641 
4642 	I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
4643 	if (IS_GEN6(dev_priv))
4644 		I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB));
4645 	else if (IS_GEN7(dev_priv))
4646 		I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB));
4647 	else if (IS_GEN8(dev_priv))
4648 		I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW));
4649 	else
4650 		BUG();
4651 }
4652 
4653 static void init_unused_ring(struct drm_i915_private *dev_priv, u32 base)
4654 {
4655 	I915_WRITE(RING_CTL(base), 0);
4656 	I915_WRITE(RING_HEAD(base), 0);
4657 	I915_WRITE(RING_TAIL(base), 0);
4658 	I915_WRITE(RING_START(base), 0);
4659 }
4660 
4661 static void init_unused_rings(struct drm_i915_private *dev_priv)
4662 {
4663 	if (IS_I830(dev_priv)) {
4664 		init_unused_ring(dev_priv, PRB1_BASE);
4665 		init_unused_ring(dev_priv, SRB0_BASE);
4666 		init_unused_ring(dev_priv, SRB1_BASE);
4667 		init_unused_ring(dev_priv, SRB2_BASE);
4668 		init_unused_ring(dev_priv, SRB3_BASE);
4669 	} else if (IS_GEN2(dev_priv)) {
4670 		init_unused_ring(dev_priv, SRB0_BASE);
4671 		init_unused_ring(dev_priv, SRB1_BASE);
4672 	} else if (IS_GEN3(dev_priv)) {
4673 		init_unused_ring(dev_priv, PRB1_BASE);
4674 		init_unused_ring(dev_priv, PRB2_BASE);
4675 	}
4676 }
4677 
4678 int
4679 i915_gem_init_hw(struct drm_device *dev)
4680 {
4681 	struct drm_i915_private *dev_priv = to_i915(dev);
4682 	struct intel_engine_cs *engine;
4683 	enum intel_engine_id id;
4684 	int ret;
4685 
4686 	/* Double layer security blanket, see i915_gem_init() */
4687 	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4688 
4689 	if (HAS_EDRAM(dev) && INTEL_GEN(dev_priv) < 9)
4690 		I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf));
4691 
4692 	if (IS_HASWELL(dev_priv))
4693 		I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev_priv) ?
4694 			   LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED);
4695 
4696 	if (HAS_PCH_NOP(dev_priv)) {
4697 		if (IS_IVYBRIDGE(dev_priv)) {
4698 			u32 temp = I915_READ(GEN7_MSG_CTL);
4699 			temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK);
4700 			I915_WRITE(GEN7_MSG_CTL, temp);
4701 		} else if (INTEL_INFO(dev)->gen >= 7) {
4702 			u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT);
4703 			temp &= ~RESET_PCH_HANDSHAKE_ENABLE;
4704 			I915_WRITE(HSW_NDE_RSTWRN_OPT, temp);
4705 		}
4706 	}
4707 
4708 	i915_gem_init_swizzling(dev);
4709 
4710 	/*
4711 	 * At least 830 can leave some of the unused rings
4712 	 * "active" (ie. head != tail) after resume which
4713 	 * will prevent c3 entry. Makes sure all unused rings
4714 	 * are totally idle.
4715 	 */
4716 	init_unused_rings(dev_priv);
4717 
4718 	BUG_ON(!dev_priv->kernel_context);
4719 
4720 	ret = i915_ppgtt_init_hw(dev);
4721 	if (ret) {
4722 		DRM_ERROR("PPGTT enable HW failed %d\n", ret);
4723 		goto out;
4724 	}
4725 
4726 	/* Need to do basic initialisation of all rings first: */
4727 	for_each_engine(engine, dev_priv, id) {
4728 		ret = engine->init_hw(engine);
4729 		if (ret)
4730 			goto out;
4731 	}
4732 
4733 	intel_mocs_init_l3cc_table(dev);
4734 
4735 	/* We can't enable contexts until all firmware is loaded */
4736 	ret = intel_guc_setup(dev);
4737 	if (ret)
4738 		goto out;
4739 
4740 out:
4741 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
4742 	return ret;
4743 }
4744 
4745 bool intel_sanitize_semaphores(struct drm_i915_private *dev_priv, int value)
4746 {
4747 	if (INTEL_INFO(dev_priv)->gen < 6)
4748 		return false;
4749 
4750 	/* TODO: make semaphores and Execlists play nicely together */
4751 	if (i915.enable_execlists)
4752 		return false;
4753 
4754 	if (value >= 0)
4755 		return value;
4756 
4757 #ifdef CONFIG_INTEL_IOMMU
4758 	/* Enable semaphores on SNB when IO remapping is off */
4759 	if (INTEL_INFO(dev_priv)->gen == 6 && intel_iommu_gfx_mapped)
4760 		return false;
4761 #endif
4762 
4763 	return true;
4764 }
4765 
4766 int i915_gem_init(struct drm_device *dev)
4767 {
4768 	struct drm_i915_private *dev_priv = to_i915(dev);
4769 	int ret;
4770 
4771 	mutex_lock(&dev->struct_mutex);
4772 
4773 	if (!i915.enable_execlists) {
4774 		dev_priv->gt.resume = intel_legacy_submission_resume;
4775 		dev_priv->gt.cleanup_engine = intel_engine_cleanup;
4776 	} else {
4777 		dev_priv->gt.resume = intel_lr_context_resume;
4778 		dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup;
4779 	}
4780 
4781 	/* This is just a security blanket to placate dragons.
4782 	 * On some systems, we very sporadically observe that the first TLBs
4783 	 * used by the CS may be stale, despite us poking the TLB reset. If
4784 	 * we hold the forcewake during initialisation these problems
4785 	 * just magically go away.
4786 	 */
4787 	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4788 
4789 	i915_gem_init_userptr(dev_priv);
4790 
4791 	ret = i915_gem_init_ggtt(dev_priv);
4792 	if (ret)
4793 		goto out_unlock;
4794 
4795 	ret = i915_gem_context_init(dev);
4796 	if (ret)
4797 		goto out_unlock;
4798 
4799 	ret = intel_engines_init(dev);
4800 	if (ret)
4801 		goto out_unlock;
4802 
4803 	ret = i915_gem_init_hw(dev);
4804 	if (ret == -EIO) {
4805 		/* Allow engine initialisation to fail by marking the GPU as
4806 		 * wedged. But we only want to do this where the GPU is angry,
4807 		 * for all other failure, such as an allocation failure, bail.
4808 		 */
4809 		DRM_ERROR("Failed to initialize GPU, declaring it wedged\n");
4810 		i915_gem_set_wedged(dev_priv);
4811 		ret = 0;
4812 	}
4813 
4814 out_unlock:
4815 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
4816 	mutex_unlock(&dev->struct_mutex);
4817 
4818 	return ret;
4819 }
4820 
4821 void
4822 i915_gem_cleanup_engines(struct drm_device *dev)
4823 {
4824 	struct drm_i915_private *dev_priv = to_i915(dev);
4825 	struct intel_engine_cs *engine;
4826 	enum intel_engine_id id;
4827 
4828 	for_each_engine(engine, dev_priv, id)
4829 		dev_priv->gt.cleanup_engine(engine);
4830 }
4831 
4832 void
4833 i915_gem_load_init_fences(struct drm_i915_private *dev_priv)
4834 {
4835 	struct drm_device *dev = &dev_priv->drm;
4836 	int i;
4837 
4838 	if (INTEL_INFO(dev_priv)->gen >= 7 && !IS_VALLEYVIEW(dev_priv) &&
4839 	    !IS_CHERRYVIEW(dev_priv))
4840 		dev_priv->num_fence_regs = 32;
4841 	else if (INTEL_INFO(dev_priv)->gen >= 4 || IS_I945G(dev_priv) ||
4842 		 IS_I945GM(dev_priv) || IS_G33(dev_priv))
4843 		dev_priv->num_fence_regs = 16;
4844 	else
4845 		dev_priv->num_fence_regs = 8;
4846 
4847 	if (intel_vgpu_active(dev_priv))
4848 		dev_priv->num_fence_regs =
4849 				I915_READ(vgtif_reg(avail_rs.fence_num));
4850 
4851 	/* Initialize fence registers to zero */
4852 	for (i = 0; i < dev_priv->num_fence_regs; i++) {
4853 		struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i];
4854 
4855 		fence->i915 = dev_priv;
4856 		fence->id = i;
4857 		list_add_tail(&fence->link, &dev_priv->mm.fence_list);
4858 	}
4859 	i915_gem_restore_fences(dev);
4860 
4861 	i915_gem_detect_bit_6_swizzle(dev);
4862 }
4863 
4864 void
4865 i915_gem_load_init(struct drm_device *dev)
4866 {
4867 	struct drm_i915_private *dev_priv = to_i915(dev);
4868 
4869 #if 0
4870 	dev_priv->objects =
4871 		kmem_cache_create("i915_gem_object",
4872 				  sizeof(struct drm_i915_gem_object), 0,
4873 				  SLAB_HWCACHE_ALIGN,
4874 				  NULL);
4875 	dev_priv->vmas =
4876 		kmem_cache_create("i915_gem_vma",
4877 				  sizeof(struct i915_vma), 0,
4878 				  SLAB_HWCACHE_ALIGN,
4879 				  NULL);
4880 	dev_priv->requests =
4881 		kmem_cache_create("i915_gem_request",
4882 				  sizeof(struct drm_i915_gem_request), 0,
4883 				  SLAB_HWCACHE_ALIGN |
4884 				  SLAB_RECLAIM_ACCOUNT |
4885 				  SLAB_DESTROY_BY_RCU,
4886 				  NULL);
4887 #endif
4888 
4889 	INIT_LIST_HEAD(&dev_priv->context_list);
4890 	INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
4891 	INIT_LIST_HEAD(&dev_priv->mm.bound_list);
4892 	INIT_LIST_HEAD(&dev_priv->mm.fence_list);
4893 	INIT_DELAYED_WORK(&dev_priv->gt.retire_work,
4894 			  i915_gem_retire_work_handler);
4895 	INIT_DELAYED_WORK(&dev_priv->gt.idle_work,
4896 			  i915_gem_idle_work_handler);
4897 	init_waitqueue_head(&dev_priv->gpu_error.wait_queue);
4898 	init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
4899 
4900 	dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL;
4901 
4902 	init_waitqueue_head(&dev_priv->pending_flip_queue);
4903 
4904 	dev_priv->mm.interruptible = true;
4905 
4906 	atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0);
4907 
4908 	lockinit(&dev_priv->fb_tracking.lock, "drmftl", 0, 0);
4909 }
4910 
4911 void i915_gem_load_cleanup(struct drm_device *dev)
4912 {
4913 #if 0
4914 	struct drm_i915_private *dev_priv = to_i915(dev);
4915 
4916 	kmem_cache_destroy(dev_priv->requests);
4917 	kmem_cache_destroy(dev_priv->vmas);
4918 	kmem_cache_destroy(dev_priv->objects);
4919 #endif
4920 
4921 	/* And ensure that our DESTROY_BY_RCU slabs are truly destroyed */
4922 	rcu_barrier();
4923 }
4924 
4925 int i915_gem_freeze(struct drm_i915_private *dev_priv)
4926 {
4927 	intel_runtime_pm_get(dev_priv);
4928 
4929 	mutex_lock(&dev_priv->drm.struct_mutex);
4930 	i915_gem_shrink_all(dev_priv);
4931 	mutex_unlock(&dev_priv->drm.struct_mutex);
4932 
4933 	intel_runtime_pm_put(dev_priv);
4934 
4935 	return 0;
4936 }
4937 
4938 int i915_gem_freeze_late(struct drm_i915_private *dev_priv)
4939 {
4940 	struct drm_i915_gem_object *obj;
4941 	struct list_head *phases[] = {
4942 		&dev_priv->mm.unbound_list,
4943 		&dev_priv->mm.bound_list,
4944 		NULL
4945 	}, **p;
4946 
4947 	/* Called just before we write the hibernation image.
4948 	 *
4949 	 * We need to update the domain tracking to reflect that the CPU
4950 	 * will be accessing all the pages to create and restore from the
4951 	 * hibernation, and so upon restoration those pages will be in the
4952 	 * CPU domain.
4953 	 *
4954 	 * To make sure the hibernation image contains the latest state,
4955 	 * we update that state just before writing out the image.
4956 	 *
4957 	 * To try and reduce the hibernation image, we manually shrink
4958 	 * the objects as well.
4959 	 */
4960 
4961 	mutex_lock(&dev_priv->drm.struct_mutex);
4962 	i915_gem_shrink(dev_priv, -1UL, I915_SHRINK_UNBOUND);
4963 
4964 	for (p = phases; *p; p++) {
4965 		list_for_each_entry(obj, *p, global_list) {
4966 			obj->base.read_domains = I915_GEM_DOMAIN_CPU;
4967 			obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4968 		}
4969 	}
4970 	mutex_unlock(&dev_priv->drm.struct_mutex);
4971 
4972 	return 0;
4973 }
4974 
4975 void i915_gem_release(struct drm_device *dev, struct drm_file *file)
4976 {
4977 	struct drm_i915_file_private *file_priv = file->driver_priv;
4978 	struct drm_i915_gem_request *request;
4979 
4980 	/* Clean up our request list when the client is going away, so that
4981 	 * later retire_requests won't dereference our soon-to-be-gone
4982 	 * file_priv.
4983 	 */
4984 	lockmgr(&file_priv->mm.lock, LK_EXCLUSIVE);
4985 	list_for_each_entry(request, &file_priv->mm.request_list, client_list)
4986 		request->file_priv = NULL;
4987 	lockmgr(&file_priv->mm.lock, LK_RELEASE);
4988 
4989 	if (!list_empty(&file_priv->rps.link)) {
4990 		lockmgr(&to_i915(dev)->rps.client_lock, LK_EXCLUSIVE);
4991 		list_del(&file_priv->rps.link);
4992 		lockmgr(&to_i915(dev)->rps.client_lock, LK_RELEASE);
4993 	}
4994 }
4995 
4996 int
4997 i915_gem_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
4998     vm_ooffset_t foff, struct ucred *cred, u_short *color)
4999 {
5000 	*color = 0; /* XXXKIB */
5001 	return (0);
5002 }
5003 
5004 void
5005 i915_gem_pager_dtor(void *handle)
5006 {
5007 	struct drm_gem_object *obj = handle;
5008 	struct drm_device *dev = obj->dev;
5009 
5010 	mutex_lock(&dev->struct_mutex);
5011 	drm_gem_free_mmap_offset(obj);
5012 	i915_gem_release_mmap(to_intel_bo(obj));
5013 	drm_gem_object_unreference(obj);
5014 	mutex_unlock(&dev->struct_mutex);
5015 }
5016 
5017 int i915_gem_open(struct drm_device *dev, struct drm_file *file)
5018 {
5019 	struct drm_i915_file_private *file_priv;
5020 	int ret;
5021 
5022 	DRM_DEBUG_DRIVER("\n");
5023 
5024 	file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL);
5025 	if (!file_priv)
5026 		return -ENOMEM;
5027 
5028 	file->driver_priv = file_priv;
5029 	file_priv->dev_priv = to_i915(dev);
5030 	file_priv->file = file;
5031 	INIT_LIST_HEAD(&file_priv->rps.link);
5032 
5033 	lockinit(&file_priv->mm.lock, "i915_priv", 0, 0);
5034 	INIT_LIST_HEAD(&file_priv->mm.request_list);
5035 
5036 	file_priv->bsd_engine = -1;
5037 
5038 	ret = i915_gem_context_open(dev, file);
5039 	if (ret)
5040 		kfree(file_priv);
5041 
5042 	return ret;
5043 }
5044 
5045 /**
5046  * i915_gem_track_fb - update frontbuffer tracking
5047  * @old: current GEM buffer for the frontbuffer slots
5048  * @new: new GEM buffer for the frontbuffer slots
5049  * @frontbuffer_bits: bitmask of frontbuffer slots
5050  *
5051  * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them
5052  * from @old and setting them in @new. Both @old and @new can be NULL.
5053  */
5054 void i915_gem_track_fb(struct drm_i915_gem_object *old,
5055 		       struct drm_i915_gem_object *new,
5056 		       unsigned frontbuffer_bits)
5057 {
5058 	/* Control of individual bits within the mask are guarded by
5059 	 * the owning plane->mutex, i.e. we can never see concurrent
5060 	 * manipulation of individual bits. But since the bitfield as a whole
5061 	 * is updated using RMW, we need to use atomics in order to update
5062 	 * the bits.
5063 	 */
5064 	BUILD_BUG_ON(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES >
5065 		     sizeof(atomic_t) * BITS_PER_BYTE);
5066 
5067 	if (old) {
5068 		WARN_ON(!(atomic_read(&old->frontbuffer_bits) & frontbuffer_bits));
5069 		atomic_andnot(frontbuffer_bits, &old->frontbuffer_bits);
5070 	}
5071 
5072 	if (new) {
5073 		WARN_ON(atomic_read(&new->frontbuffer_bits) & frontbuffer_bits);
5074 		atomic_or(frontbuffer_bits, &new->frontbuffer_bits);
5075 	}
5076 }
5077 
5078 /* Like i915_gem_object_get_page(), but mark the returned page dirty */
5079 struct page *
5080 i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, int n)
5081 {
5082 	struct page *page;
5083 
5084 	/* Only default objects have per-page dirty tracking */
5085 	if (WARN_ON(!i915_gem_object_has_struct_page(obj)))
5086 		return NULL;
5087 
5088 	page = i915_gem_object_get_page(obj, n);
5089 	set_page_dirty(page);
5090 	return page;
5091 }
5092 
5093 /* Allocate a new GEM object and fill it with the supplied data */
5094 struct drm_i915_gem_object *
5095 i915_gem_object_create_from_data(struct drm_device *dev,
5096 			         const void *data, size_t size)
5097 {
5098 	struct drm_i915_gem_object *obj;
5099 	struct sg_table *sg;
5100 	size_t bytes;
5101 	int ret;
5102 
5103 	obj = i915_gem_object_create(dev, round_up(size, PAGE_SIZE));
5104 	if (IS_ERR(obj))
5105 		return obj;
5106 
5107 	ret = i915_gem_object_set_to_cpu_domain(obj, true);
5108 	if (ret)
5109 		goto fail;
5110 
5111 	ret = i915_gem_object_get_pages(obj);
5112 	if (ret)
5113 		goto fail;
5114 
5115 	i915_gem_object_pin_pages(obj);
5116 	sg = obj->pages;
5117 	bytes = sg_copy_from_buffer(sg->sgl, sg->nents, (void *)data, size);
5118 	obj->dirty = 1;		/* Backing store is now out of date */
5119 	i915_gem_object_unpin_pages(obj);
5120 
5121 	if (WARN_ON(bytes != size)) {
5122 		DRM_ERROR("Incomplete copy, wrote %zu of %zu", bytes, size);
5123 		ret = -EFAULT;
5124 		goto fail;
5125 	}
5126 
5127 	return obj;
5128 
5129 fail:
5130 	i915_gem_object_put(obj);
5131 	return ERR_PTR(ret);
5132 }
5133