xref: /dragonfly/sys/dev/drm/i915/i915_gem.c (revision 89656a4e)
1 /*
2  * Copyright © 2008-2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *
26  */
27 
28 #include <drm/drmP.h>
29 #include <drm/drm_vma_manager.h>
30 #include <drm/i915_drm.h>
31 #include "i915_drv.h"
32 #include "i915_gem_dmabuf.h"
33 #include "i915_vgpu.h"
34 #include "i915_trace.h"
35 #include "intel_drv.h"
36 #include "intel_frontbuffer.h"
37 #include "intel_mocs.h"
38 #include <linux/reservation.h>
39 #include <linux/shmem_fs.h>
40 #include <linux/slab.h>
41 #include <linux/swap.h>
42 #include <linux/pci.h>
43 #include <linux/dma-buf.h>
44 
45 #include <sys/mman.h>
46 #include <vm/vm_map.h>
47 #include <vm/vm_param.h>
48 
49 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
50 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
51 
52 static bool cpu_cache_is_coherent(struct drm_device *dev,
53 				  enum i915_cache_level level)
54 {
55 	return HAS_LLC(dev) || level != I915_CACHE_NONE;
56 }
57 
58 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
59 {
60 	if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
61 		return false;
62 
63 	if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
64 		return true;
65 
66 	return obj->pin_display;
67 }
68 
69 static int
70 insert_mappable_node(struct drm_i915_private *i915,
71                      struct drm_mm_node *node, u32 size)
72 {
73 	memset(node, 0, sizeof(*node));
74 	return drm_mm_insert_node_in_range_generic(&i915->ggtt.base.mm, node,
75 						   size, 0, 0, 0,
76 						   i915->ggtt.mappable_end,
77 						   DRM_MM_SEARCH_DEFAULT,
78 						   DRM_MM_CREATE_DEFAULT);
79 }
80 
81 static void
82 remove_mappable_node(struct drm_mm_node *node)
83 {
84 	drm_mm_remove_node(node);
85 }
86 
87 /* some bookkeeping */
88 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
89 				  size_t size)
90 {
91 	lockmgr(&dev_priv->mm.object_stat_lock, LK_EXCLUSIVE);
92 	dev_priv->mm.object_count++;
93 	dev_priv->mm.object_memory += size;
94 	lockmgr(&dev_priv->mm.object_stat_lock, LK_RELEASE);
95 }
96 
97 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
98 				     size_t size)
99 {
100 	lockmgr(&dev_priv->mm.object_stat_lock, LK_EXCLUSIVE);
101 	dev_priv->mm.object_count--;
102 	dev_priv->mm.object_memory -= size;
103 	lockmgr(&dev_priv->mm.object_stat_lock, LK_RELEASE);
104 }
105 
106 static int
107 i915_gem_wait_for_error(struct i915_gpu_error *error)
108 {
109 	int ret;
110 
111 	if (!i915_reset_in_progress(error))
112 		return 0;
113 
114 	/*
115 	 * Only wait 10 seconds for the gpu reset to complete to avoid hanging
116 	 * userspace. If it takes that long something really bad is going on and
117 	 * we should simply try to bail out and fail as gracefully as possible.
118 	 */
119 	ret = wait_event_interruptible_timeout(error->reset_queue,
120 					       !i915_reset_in_progress(error),
121 					       10*HZ);
122 	if (ret == 0) {
123 		DRM_ERROR("Timed out waiting for the gpu reset to complete\n");
124 		return -EIO;
125 	} else if (ret < 0) {
126 		return ret;
127 	} else {
128 		return 0;
129 	}
130 }
131 
132 int i915_mutex_lock_interruptible(struct drm_device *dev)
133 {
134 	struct drm_i915_private *dev_priv = to_i915(dev);
135 	int ret;
136 
137 	ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
138 	if (ret)
139 		return ret;
140 
141 	ret = mutex_lock_interruptible(&dev->struct_mutex);
142 	if (ret)
143 		return ret;
144 
145 	return 0;
146 }
147 
148 int
149 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
150 			    struct drm_file *file)
151 {
152 	struct drm_i915_private *dev_priv = to_i915(dev);
153 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
154 	struct drm_i915_gem_get_aperture *args = data;
155 	struct i915_vma *vma;
156 	size_t pinned;
157 
158 	pinned = 0;
159 	mutex_lock(&dev->struct_mutex);
160 	list_for_each_entry(vma, &ggtt->base.active_list, vm_link)
161 		if (i915_vma_is_pinned(vma))
162 			pinned += vma->node.size;
163 	list_for_each_entry(vma, &ggtt->base.inactive_list, vm_link)
164 		if (i915_vma_is_pinned(vma))
165 			pinned += vma->node.size;
166 	mutex_unlock(&dev->struct_mutex);
167 
168 	args->aper_size = ggtt->base.total;
169 	args->aper_available_size = args->aper_size - pinned;
170 
171 	return 0;
172 }
173 
174 static int
175 i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
176 {
177 #if 0
178 	struct address_space *mapping = obj->base.filp->f_mapping;
179 #else
180 	vm_object_t vm_obj = obj->base.filp;
181 #endif
182 	char *vaddr = obj->phys_handle->vaddr;
183 	struct sg_table *st;
184 	struct scatterlist *sg;
185 	int i;
186 
187 	if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
188 		return -EINVAL;
189 
190 	VM_OBJECT_LOCK(vm_obj);
191 	for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
192 		struct page *page;
193 		char *src;
194 
195 #if 0
196 		page = shmem_read_mapping_page(mapping, i);
197 #else
198 		page = shmem_read_mapping_page(vm_obj, i);
199 #endif
200 		if (IS_ERR(page))
201 			return PTR_ERR(page);
202 
203 		src = kmap_atomic(page);
204 		memcpy(vaddr, src, PAGE_SIZE);
205 		drm_clflush_virt_range(vaddr, PAGE_SIZE);
206 		kunmap_atomic(src);
207 
208 		put_page(page);
209 		vaddr += PAGE_SIZE;
210 	}
211 	VM_OBJECT_UNLOCK(vm_obj);
212 
213 	i915_gem_chipset_flush(to_i915(obj->base.dev));
214 
215 	st = kmalloc(sizeof(*st), M_DRM, GFP_KERNEL);
216 	if (st == NULL)
217 		return -ENOMEM;
218 
219 	if (sg_alloc_table(st, 1, GFP_KERNEL)) {
220 		kfree(st);
221 		return -ENOMEM;
222 	}
223 
224 	sg = st->sgl;
225 	sg->offset = 0;
226 	sg->length = obj->base.size;
227 
228 	sg_dma_address(sg) = obj->phys_handle->busaddr;
229 	sg_dma_len(sg) = obj->base.size;
230 
231 	obj->pages = st;
232 	return 0;
233 }
234 
235 static void
236 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj)
237 {
238 	int ret;
239 
240 	BUG_ON(obj->madv == __I915_MADV_PURGED);
241 
242 	ret = i915_gem_object_set_to_cpu_domain(obj, true);
243 	if (WARN_ON(ret)) {
244 		/* In the event of a disaster, abandon all caches and
245 		 * hope for the best.
246 		 */
247 		obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
248 	}
249 
250 	if (obj->madv == I915_MADV_DONTNEED)
251 		obj->dirty = 0;
252 
253 	if (obj->dirty) {
254 #if 0
255 		struct address_space *mapping = obj->base.filp->f_mapping;
256 #else
257 		vm_object_t vm_obj = obj->base.filp;
258 #endif
259 		char *vaddr = obj->phys_handle->vaddr;
260 		int i;
261 
262 		for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
263 			struct page *page;
264 			char *dst;
265 
266 			page = shmem_read_mapping_page(vm_obj, i);
267 			if (IS_ERR(page))
268 				continue;
269 
270 			dst = kmap_atomic(page);
271 			drm_clflush_virt_range(vaddr, PAGE_SIZE);
272 			memcpy(dst, vaddr, PAGE_SIZE);
273 			kunmap_atomic(dst);
274 
275 			set_page_dirty(page);
276 			if (obj->madv == I915_MADV_WILLNEED)
277 				mark_page_accessed(page);
278 			put_page(page);
279 			vaddr += PAGE_SIZE;
280 		}
281 		obj->dirty = 0;
282 	}
283 
284 	sg_free_table(obj->pages);
285 	kfree(obj->pages);
286 }
287 
288 static void
289 i915_gem_object_release_phys(struct drm_i915_gem_object *obj)
290 {
291 	drm_pci_free(obj->base.dev, obj->phys_handle);
292 }
293 
294 static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
295 	.get_pages = i915_gem_object_get_pages_phys,
296 	.put_pages = i915_gem_object_put_pages_phys,
297 	.release = i915_gem_object_release_phys,
298 };
299 
300 int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
301 {
302 	struct i915_vma *vma;
303 	LINUX_LIST_HEAD(still_in_list);
304 	int ret;
305 
306 	lockdep_assert_held(&obj->base.dev->struct_mutex);
307 
308 	/* Closed vma are removed from the obj->vma_list - but they may
309 	 * still have an active binding on the object. To remove those we
310 	 * must wait for all rendering to complete to the object (as unbinding
311 	 * must anyway), and retire the requests.
312 	 */
313 	ret = i915_gem_object_wait_rendering(obj, false);
314 	if (ret)
315 		return ret;
316 
317 	i915_gem_retire_requests(to_i915(obj->base.dev));
318 
319 	while ((vma = list_first_entry_or_null(&obj->vma_list,
320 					       struct i915_vma,
321 					       obj_link))) {
322 		list_move_tail(&vma->obj_link, &still_in_list);
323 		ret = i915_vma_unbind(vma);
324 		if (ret)
325 			break;
326 	}
327 	list_splice(&still_in_list, &obj->vma_list);
328 
329 	return ret;
330 }
331 
332 /**
333  * Ensures that all rendering to the object has completed and the object is
334  * safe to unbind from the GTT or access from the CPU.
335  * @obj: i915 gem object
336  * @readonly: waiting for just read access or read-write access
337  */
338 int
339 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
340 			       bool readonly)
341 {
342 	struct reservation_object *resv;
343 	struct i915_gem_active *active;
344 	unsigned long active_mask;
345 	int idx;
346 
347 	lockdep_assert_held(&obj->base.dev->struct_mutex);
348 
349 	if (!readonly) {
350 		active = obj->last_read;
351 		active_mask = i915_gem_object_get_active(obj);
352 	} else {
353 		active_mask = 1;
354 		active = &obj->last_write;
355 	}
356 
357 	for_each_active(active_mask, idx) {
358 		int ret;
359 
360 		ret = i915_gem_active_wait(&active[idx],
361 					   &obj->base.dev->struct_mutex);
362 		if (ret)
363 			return ret;
364 	}
365 
366 	resv = i915_gem_object_get_dmabuf_resv(obj);
367 	if (resv) {
368 		long err;
369 
370 		err = reservation_object_wait_timeout_rcu(resv, !readonly, true,
371 							  MAX_SCHEDULE_TIMEOUT);
372 		if (err < 0)
373 			return err;
374 	}
375 
376 	return 0;
377 }
378 
379 /* A nonblocking variant of the above wait. Must be called prior to
380  * acquiring the mutex for the object, as the object state may change
381  * during this call. A reference must be held by the caller for the object.
382  */
383 static __must_check int
384 __unsafe_wait_rendering(struct drm_i915_gem_object *obj,
385 			struct intel_rps_client *rps,
386 			bool readonly)
387 {
388 	struct i915_gem_active *active;
389 	unsigned long active_mask;
390 	int idx;
391 
392 	active_mask = __I915_BO_ACTIVE(obj);
393 	if (!active_mask)
394 		return 0;
395 
396 	if (!readonly) {
397 		active = obj->last_read;
398 	} else {
399 		active_mask = 1;
400 		active = &obj->last_write;
401 	}
402 
403 	for_each_active(active_mask, idx) {
404 		int ret;
405 
406 		ret = i915_gem_active_wait_unlocked(&active[idx],
407 						    I915_WAIT_INTERRUPTIBLE,
408 						    NULL, rps);
409 		if (ret)
410 			return ret;
411 	}
412 
413 	return 0;
414 }
415 
416 static struct intel_rps_client *to_rps_client(struct drm_file *file)
417 {
418 	struct drm_i915_file_private *fpriv = file->driver_priv;
419 
420 	return &fpriv->rps;
421 }
422 
423 int
424 i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
425 			    int align)
426 {
427 	drm_dma_handle_t *phys;
428 	int ret;
429 
430 	if (obj->phys_handle) {
431 		if ((unsigned long)obj->phys_handle->vaddr & (align -1))
432 			return -EBUSY;
433 
434 		return 0;
435 	}
436 
437 	if (obj->madv != I915_MADV_WILLNEED)
438 		return -EFAULT;
439 
440 	if (obj->base.filp == NULL)
441 		return -EINVAL;
442 
443 	ret = i915_gem_object_unbind(obj);
444 	if (ret)
445 		return ret;
446 
447 	ret = i915_gem_object_put_pages(obj);
448 	if (ret)
449 		return ret;
450 
451 	/* create a new object */
452 	phys = drm_pci_alloc(obj->base.dev, obj->base.size, align);
453 	if (!phys)
454 		return -ENOMEM;
455 
456 	obj->phys_handle = phys;
457 	obj->ops = &i915_gem_phys_ops;
458 
459 	return i915_gem_object_get_pages(obj);
460 }
461 
462 static int
463 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
464 		     struct drm_i915_gem_pwrite *args,
465 		     struct drm_file *file_priv)
466 {
467 	struct drm_device *dev = obj->base.dev;
468 	void *vaddr = obj->phys_handle->vaddr + args->offset;
469 	char __user *user_data = u64_to_user_ptr(args->data_ptr);
470 	int ret = 0;
471 
472 	/* We manually control the domain here and pretend that it
473 	 * remains coherent i.e. in the GTT domain, like shmem_pwrite.
474 	 */
475 	ret = i915_gem_object_wait_rendering(obj, false);
476 	if (ret)
477 		return ret;
478 
479 	intel_fb_obj_invalidate(obj, ORIGIN_CPU);
480 	if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) {
481 		unsigned long unwritten;
482 
483 		/* The physical object once assigned is fixed for the lifetime
484 		 * of the obj, so we can safely drop the lock and continue
485 		 * to access vaddr.
486 		 */
487 		mutex_unlock(&dev->struct_mutex);
488 		unwritten = copy_from_user(vaddr, user_data, args->size);
489 		mutex_lock(&dev->struct_mutex);
490 		if (unwritten) {
491 			ret = -EFAULT;
492 			goto out;
493 		}
494 	}
495 
496 	drm_clflush_virt_range(vaddr, args->size);
497 	i915_gem_chipset_flush(to_i915(dev));
498 
499 out:
500 	intel_fb_obj_flush(obj, false, ORIGIN_CPU);
501 	return ret;
502 }
503 
504 void *i915_gem_object_alloc(struct drm_device *dev)
505 {
506 	struct drm_i915_private *dev_priv = to_i915(dev);
507 	return kmem_cache_zalloc(dev_priv->objects, GFP_KERNEL);
508 }
509 
510 void i915_gem_object_free(struct drm_i915_gem_object *obj)
511 {
512 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
513 	kmem_cache_free(dev_priv->objects, obj);
514 }
515 
516 static int
517 i915_gem_create(struct drm_file *file,
518 		struct drm_device *dev,
519 		uint64_t size,
520 		uint32_t *handle_p)
521 {
522 	struct drm_i915_gem_object *obj;
523 	int ret;
524 	u32 handle;
525 
526 	size = roundup(size, PAGE_SIZE);
527 	if (size == 0)
528 		return -EINVAL;
529 
530 	/* Allocate the new object */
531 	obj = i915_gem_object_create(dev, size);
532 	if (IS_ERR(obj))
533 		return PTR_ERR(obj);
534 
535 	ret = drm_gem_handle_create(file, &obj->base, &handle);
536 	/* drop reference from allocate - handle holds it now */
537 	i915_gem_object_put_unlocked(obj);
538 	if (ret)
539 		return ret;
540 
541 	*handle_p = handle;
542 	return 0;
543 }
544 
545 int
546 i915_gem_dumb_create(struct drm_file *file,
547 		     struct drm_device *dev,
548 		     struct drm_mode_create_dumb *args)
549 {
550 	/* have to work out size/pitch and return them */
551 	args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64);
552 	args->size = args->pitch * args->height;
553 	return i915_gem_create(file, dev,
554 			       args->size, &args->handle);
555 }
556 
557 /**
558  * Creates a new mm object and returns a handle to it.
559  * @dev: drm device pointer
560  * @data: ioctl data blob
561  * @file: drm file pointer
562  */
563 int
564 i915_gem_create_ioctl(struct drm_device *dev, void *data,
565 		      struct drm_file *file)
566 {
567 	struct drm_i915_gem_create *args = data;
568 
569 	return i915_gem_create(file, dev,
570 			       args->size, &args->handle);
571 }
572 
573 static inline int
574 __copy_to_user_swizzled(char __user *cpu_vaddr,
575 			const char *gpu_vaddr, int gpu_offset,
576 			int length)
577 {
578 	int ret, cpu_offset = 0;
579 
580 	while (length > 0) {
581 		int cacheline_end = ALIGN(gpu_offset + 1, 64);
582 		int this_length = min(cacheline_end - gpu_offset, length);
583 		int swizzled_gpu_offset = gpu_offset ^ 64;
584 
585 		ret = __copy_to_user(cpu_vaddr + cpu_offset,
586 				     gpu_vaddr + swizzled_gpu_offset,
587 				     this_length);
588 		if (ret)
589 			return ret + length;
590 
591 		cpu_offset += this_length;
592 		gpu_offset += this_length;
593 		length -= this_length;
594 	}
595 
596 	return 0;
597 }
598 
599 static inline int
600 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
601 			  const char __user *cpu_vaddr,
602 			  int length)
603 {
604 	int ret, cpu_offset = 0;
605 
606 	while (length > 0) {
607 		int cacheline_end = ALIGN(gpu_offset + 1, 64);
608 		int this_length = min(cacheline_end - gpu_offset, length);
609 		int swizzled_gpu_offset = gpu_offset ^ 64;
610 
611 		ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset,
612 				       cpu_vaddr + cpu_offset,
613 				       this_length);
614 		if (ret)
615 			return ret + length;
616 
617 		cpu_offset += this_length;
618 		gpu_offset += this_length;
619 		length -= this_length;
620 	}
621 
622 	return 0;
623 }
624 
625 /*
626  * Pins the specified object's pages and synchronizes the object with
627  * GPU accesses. Sets needs_clflush to non-zero if the caller should
628  * flush the object from the CPU cache.
629  */
630 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
631 				    unsigned int *needs_clflush)
632 {
633 	int ret;
634 
635 	*needs_clflush = 0;
636 
637 	if (!i915_gem_object_has_struct_page(obj))
638 		return -ENODEV;
639 
640 	ret = i915_gem_object_wait_rendering(obj, true);
641 	if (ret)
642 		return ret;
643 
644 	ret = i915_gem_object_get_pages(obj);
645 	if (ret)
646 		return ret;
647 
648 	i915_gem_object_pin_pages(obj);
649 
650 	i915_gem_object_flush_gtt_write_domain(obj);
651 
652 	/* If we're not in the cpu read domain, set ourself into the gtt
653 	 * read domain and manually flush cachelines (if required). This
654 	 * optimizes for the case when the gpu will dirty the data
655 	 * anyway again before the next pread happens.
656 	 */
657 	if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU))
658 		*needs_clflush = !cpu_cache_is_coherent(obj->base.dev,
659 							obj->cache_level);
660 
661 	if (*needs_clflush && !static_cpu_has(X86_FEATURE_CLFLUSH)) {
662 		ret = i915_gem_object_set_to_cpu_domain(obj, false);
663 		if (ret)
664 			goto err_unpin;
665 
666 		*needs_clflush = 0;
667 	}
668 
669 	/* return with the pages pinned */
670 	return 0;
671 
672 err_unpin:
673 	i915_gem_object_unpin_pages(obj);
674 	return ret;
675 }
676 
677 int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
678 				     unsigned int *needs_clflush)
679 {
680 	int ret;
681 
682 	*needs_clflush = 0;
683 	if (!i915_gem_object_has_struct_page(obj))
684 		return -ENODEV;
685 
686 	ret = i915_gem_object_wait_rendering(obj, false);
687 	if (ret)
688 		return ret;
689 
690 	ret = i915_gem_object_get_pages(obj);
691 	if (ret)
692 		return ret;
693 
694 	i915_gem_object_pin_pages(obj);
695 
696 	i915_gem_object_flush_gtt_write_domain(obj);
697 
698 	/* If we're not in the cpu write domain, set ourself into the
699 	 * gtt write domain and manually flush cachelines (as required).
700 	 * This optimizes for the case when the gpu will use the data
701 	 * right away and we therefore have to clflush anyway.
702 	 */
703 	if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
704 		*needs_clflush |= cpu_write_needs_clflush(obj) << 1;
705 
706 	/* Same trick applies to invalidate partially written cachelines read
707 	 * before writing.
708 	 */
709 	if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU))
710 		*needs_clflush |= !cpu_cache_is_coherent(obj->base.dev,
711 							 obj->cache_level);
712 
713 	if (*needs_clflush && !static_cpu_has(X86_FEATURE_CLFLUSH)) {
714 		ret = i915_gem_object_set_to_cpu_domain(obj, true);
715 		if (ret)
716 			goto err_unpin;
717 
718 		*needs_clflush = 0;
719 	}
720 
721 	if ((*needs_clflush & CLFLUSH_AFTER) == 0)
722 		obj->cache_dirty = true;
723 
724 	intel_fb_obj_invalidate(obj, ORIGIN_CPU);
725 	obj->dirty = 1;
726 	/* return with the pages pinned */
727 	return 0;
728 
729 err_unpin:
730 	i915_gem_object_unpin_pages(obj);
731 	return ret;
732 }
733 
734 /* Per-page copy function for the shmem pread fastpath.
735  * Flushes invalid cachelines before reading the target if
736  * needs_clflush is set. */
737 static int
738 shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length,
739 		 char __user *user_data,
740 		 bool page_do_bit17_swizzling, bool needs_clflush)
741 {
742 	char *vaddr;
743 	int ret;
744 
745 	if (unlikely(page_do_bit17_swizzling))
746 		return -EINVAL;
747 
748 	vaddr = kmap_atomic(page);
749 	if (needs_clflush)
750 		drm_clflush_virt_range(vaddr + shmem_page_offset,
751 				       page_length);
752 	ret = __copy_to_user_inatomic(user_data,
753 				      vaddr + shmem_page_offset,
754 				      page_length);
755 	kunmap_atomic(vaddr);
756 
757 	return ret ? -EFAULT : 0;
758 }
759 
760 static void
761 shmem_clflush_swizzled_range(char *addr, unsigned long length,
762 			     bool swizzled)
763 {
764 	if (unlikely(swizzled)) {
765 		unsigned long start = (unsigned long) addr;
766 		unsigned long end = (unsigned long) addr + length;
767 
768 		/* For swizzling simply ensure that we always flush both
769 		 * channels. Lame, but simple and it works. Swizzled
770 		 * pwrite/pread is far from a hotpath - current userspace
771 		 * doesn't use it at all. */
772 		start = round_down(start, 128);
773 		end = round_up(end, 128);
774 
775 		drm_clflush_virt_range((void *)start, end - start);
776 	} else {
777 		drm_clflush_virt_range(addr, length);
778 	}
779 
780 }
781 
782 /* Only difference to the fast-path function is that this can handle bit17
783  * and uses non-atomic copy and kmap functions. */
784 static int
785 shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length,
786 		 char __user *user_data,
787 		 bool page_do_bit17_swizzling, bool needs_clflush)
788 {
789 	char *vaddr;
790 	int ret;
791 
792 	vaddr = kmap(page);
793 	if (needs_clflush)
794 		shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
795 					     page_length,
796 					     page_do_bit17_swizzling);
797 
798 	if (page_do_bit17_swizzling)
799 		ret = __copy_to_user_swizzled(user_data,
800 					      vaddr, shmem_page_offset,
801 					      page_length);
802 	else
803 		ret = __copy_to_user(user_data,
804 				     vaddr + shmem_page_offset,
805 				     page_length);
806 	kunmap(page);
807 
808 	return ret ? - EFAULT : 0;
809 }
810 
811 static inline unsigned long
812 slow_user_access(struct io_mapping *mapping,
813 		 uint64_t page_base, int page_offset,
814 		 char __user *user_data,
815 		 unsigned long length, bool pwrite)
816 {
817 	void __iomem *ioaddr;
818 	void *vaddr;
819 	uint64_t unwritten;
820 
821 	ioaddr = io_mapping_map_wc(mapping, page_base, PAGE_SIZE);
822 	/* We can use the cpu mem copy function because this is X86. */
823 	vaddr = (void __force *)ioaddr + page_offset;
824 	if (pwrite)
825 		unwritten = __copy_from_user(vaddr, user_data, length);
826 	else
827 		unwritten = __copy_to_user(user_data, vaddr, length);
828 
829 	io_mapping_unmap(ioaddr);
830 	return unwritten;
831 }
832 
833 static int
834 i915_gem_gtt_pread(struct drm_device *dev,
835 		   struct drm_i915_gem_object *obj, uint64_t size,
836 		   uint64_t data_offset, uint64_t data_ptr)
837 {
838 	struct drm_i915_private *dev_priv = to_i915(dev);
839 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
840 	struct i915_vma *vma;
841 	struct drm_mm_node node;
842 	char __user *user_data;
843 	uint64_t remain;
844 	uint64_t offset;
845 	int ret;
846 
847 	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE);
848 	if (!IS_ERR(vma)) {
849 		node.start = i915_ggtt_offset(vma);
850 		node.allocated = false;
851 		ret = i915_vma_put_fence(vma);
852 		if (ret) {
853 			i915_vma_unpin(vma);
854 			vma = ERR_PTR(ret);
855 		}
856 	}
857 	if (IS_ERR(vma)) {
858 		ret = insert_mappable_node(dev_priv, &node, PAGE_SIZE);
859 		if (ret)
860 			goto out;
861 
862 		ret = i915_gem_object_get_pages(obj);
863 		if (ret) {
864 			remove_mappable_node(&node);
865 			goto out;
866 		}
867 
868 		i915_gem_object_pin_pages(obj);
869 	}
870 
871 	ret = i915_gem_object_set_to_gtt_domain(obj, false);
872 	if (ret)
873 		goto out_unpin;
874 
875 	user_data = u64_to_user_ptr(data_ptr);
876 	remain = size;
877 	offset = data_offset;
878 
879 	mutex_unlock(&dev->struct_mutex);
880 	if (likely(!i915.prefault_disable)) {
881 		ret = fault_in_pages_writeable(user_data, remain);
882 		if (ret) {
883 			mutex_lock(&dev->struct_mutex);
884 			goto out_unpin;
885 		}
886 	}
887 
888 	while (remain > 0) {
889 		/* Operation in this page
890 		 *
891 		 * page_base = page offset within aperture
892 		 * page_offset = offset within page
893 		 * page_length = bytes to copy for this page
894 		 */
895 		u32 page_base = node.start;
896 		unsigned page_offset = offset_in_page(offset);
897 		unsigned page_length = PAGE_SIZE - page_offset;
898 		page_length = remain < page_length ? remain : page_length;
899 		if (node.allocated) {
900 			wmb();
901 			ggtt->base.insert_page(&ggtt->base,
902 					       i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
903 					       node.start,
904 					       I915_CACHE_NONE, 0);
905 			wmb();
906 		} else {
907 			page_base += offset & LINUX_PAGE_MASK;
908 		}
909 		/* This is a slow read/write as it tries to read from
910 		 * and write to user memory which may result into page
911 		 * faults, and so we cannot perform this under struct_mutex.
912 		 */
913 		if (slow_user_access(&ggtt->mappable, page_base,
914 				     page_offset, user_data,
915 				     page_length, false)) {
916 			ret = -EFAULT;
917 			break;
918 		}
919 
920 		remain -= page_length;
921 		user_data += page_length;
922 		offset += page_length;
923 	}
924 
925 	mutex_lock(&dev->struct_mutex);
926 	if (ret == 0 && (obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) {
927 		/* The user has modified the object whilst we tried
928 		 * reading from it, and we now have no idea what domain
929 		 * the pages should be in. As we have just been touching
930 		 * them directly, flush everything back to the GTT
931 		 * domain.
932 		 */
933 		ret = i915_gem_object_set_to_gtt_domain(obj, false);
934 	}
935 
936 out_unpin:
937 	if (node.allocated) {
938 		wmb();
939 		ggtt->base.clear_range(&ggtt->base,
940 				       node.start, node.size);
941 		i915_gem_object_unpin_pages(obj);
942 		remove_mappable_node(&node);
943 	} else {
944 		i915_vma_unpin(vma);
945 	}
946 out:
947 	return ret;
948 }
949 
950 static int
951 i915_gem_shmem_pread(struct drm_device *dev,
952 		     struct drm_i915_gem_object *obj,
953 		     struct drm_i915_gem_pread *args,
954 		     struct drm_file *file)
955 {
956 	char __user *user_data;
957 	ssize_t remain;
958 	loff_t offset;
959 	int shmem_page_offset, page_length, ret = 0;
960 	int obj_do_bit17_swizzling, page_do_bit17_swizzling;
961 	int prefaulted = 0;
962 	int needs_clflush = 0;
963 	struct sg_page_iter sg_iter;
964 
965 	ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
966 	if (ret)
967 		return ret;
968 
969 	obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
970 	user_data = u64_to_user_ptr(args->data_ptr);
971 	offset = args->offset;
972 	remain = args->size;
973 
974 	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
975 			 offset >> PAGE_SHIFT) {
976 		struct page *page = sg_page_iter_page(&sg_iter);
977 
978 		if (remain <= 0)
979 			break;
980 
981 		/* Operation in this page
982 		 *
983 		 * shmem_page_offset = offset within page in shmem file
984 		 * page_length = bytes to copy for this page
985 		 */
986 		shmem_page_offset = offset_in_page(offset);
987 		page_length = remain;
988 		if ((shmem_page_offset + page_length) > PAGE_SIZE)
989 			page_length = PAGE_SIZE - shmem_page_offset;
990 
991 		page_do_bit17_swizzling = obj_do_bit17_swizzling &&
992 			(page_to_phys(page) & (1 << 17)) != 0;
993 
994 		ret = shmem_pread_fast(page, shmem_page_offset, page_length,
995 				       user_data, page_do_bit17_swizzling,
996 				       needs_clflush);
997 		if (ret == 0)
998 			goto next_page;
999 
1000 		mutex_unlock(&dev->struct_mutex);
1001 
1002 		if (likely(!i915.prefault_disable) && !prefaulted) {
1003 			ret = fault_in_pages_writeable(user_data, remain);
1004 			/* Userspace is tricking us, but we've already clobbered
1005 			 * its pages with the prefault and promised to write the
1006 			 * data up to the first fault. Hence ignore any errors
1007 			 * and just continue. */
1008 			(void)ret;
1009 			prefaulted = 1;
1010 		}
1011 
1012 		ret = shmem_pread_slow(page, shmem_page_offset, page_length,
1013 				       user_data, page_do_bit17_swizzling,
1014 				       needs_clflush);
1015 
1016 		mutex_lock(&dev->struct_mutex);
1017 
1018 		if (ret)
1019 			goto out;
1020 
1021 next_page:
1022 		remain -= page_length;
1023 		user_data += page_length;
1024 		offset += page_length;
1025 	}
1026 
1027 out:
1028 	i915_gem_obj_finish_shmem_access(obj);
1029 
1030 	return ret;
1031 }
1032 
1033 /**
1034  * Reads data from the object referenced by handle.
1035  * @dev: drm device pointer
1036  * @data: ioctl data blob
1037  * @file: drm file pointer
1038  *
1039  * On error, the contents of *data are undefined.
1040  */
1041 int
1042 i915_gem_pread_ioctl(struct drm_device *dev, void *data,
1043 		     struct drm_file *file)
1044 {
1045 	struct drm_i915_gem_pread *args = data;
1046 	struct drm_i915_gem_object *obj;
1047 	int ret = 0;
1048 
1049 	if (args->size == 0)
1050 		return 0;
1051 
1052 #if 0
1053 	if (!access_ok(VERIFY_WRITE,
1054 		       u64_to_user_ptr(args->data_ptr),
1055 		       args->size))
1056 		return -EFAULT;
1057 #endif
1058 
1059 	obj = i915_gem_object_lookup(file, args->handle);
1060 	if (!obj)
1061 		return -ENOENT;
1062 
1063 	/* Bounds check source.  */
1064 	if (args->offset > obj->base.size ||
1065 	    args->size > obj->base.size - args->offset) {
1066 		ret = -EINVAL;
1067 		goto err;
1068 	}
1069 
1070 	trace_i915_gem_object_pread(obj, args->offset, args->size);
1071 
1072 	ret = __unsafe_wait_rendering(obj, to_rps_client(file), true);
1073 	if (ret)
1074 		goto err;
1075 
1076 	ret = i915_mutex_lock_interruptible(dev);
1077 	if (ret)
1078 		goto err;
1079 
1080 	ret = i915_gem_shmem_pread(dev, obj, args, file);
1081 
1082 	/* pread for non shmem backed objects */
1083 	if (ret == -EFAULT || ret == -ENODEV) {
1084 		intel_runtime_pm_get(to_i915(dev));
1085 		ret = i915_gem_gtt_pread(dev, obj, args->size,
1086 					args->offset, args->data_ptr);
1087 		intel_runtime_pm_put(to_i915(dev));
1088 	}
1089 
1090 	i915_gem_object_put(obj);
1091 	mutex_unlock(&dev->struct_mutex);
1092 
1093 	return ret;
1094 
1095 err:
1096 	i915_gem_object_put_unlocked(obj);
1097 	return ret;
1098 }
1099 
1100 /* This is the fast write path which cannot handle
1101  * page faults in the source data
1102  */
1103 
1104 static inline int
1105 fast_user_write(struct io_mapping *mapping,
1106 		loff_t page_base, int page_offset,
1107 		char __user *user_data,
1108 		int length)
1109 {
1110 	void __iomem *vaddr_atomic;
1111 	void *vaddr;
1112 	unsigned long unwritten;
1113 
1114 	vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
1115 	/* We can use the cpu mem copy function because this is X86. */
1116 	vaddr = (void __force*)vaddr_atomic + page_offset;
1117 	unwritten = __copy_from_user_inatomic_nocache(vaddr,
1118 						      user_data, length);
1119 	io_mapping_unmap_atomic(vaddr_atomic);
1120 	return unwritten;
1121 }
1122 
1123 /**
1124  * This is the fast pwrite path, where we copy the data directly from the
1125  * user into the GTT, uncached.
1126  * @i915: i915 device private data
1127  * @obj: i915 gem object
1128  * @args: pwrite arguments structure
1129  * @file: drm file pointer
1130  */
1131 static int
1132 i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
1133 			 struct drm_i915_gem_object *obj,
1134 			 struct drm_i915_gem_pwrite *args,
1135 			 struct drm_file *file)
1136 {
1137 	struct i915_ggtt *ggtt = &i915->ggtt;
1138 	struct drm_device *dev = obj->base.dev;
1139 	struct i915_vma *vma;
1140 	struct drm_mm_node node;
1141 	uint64_t remain, offset;
1142 	char __user *user_data;
1143 	int ret;
1144 	bool hit_slow_path = false;
1145 
1146 	if (i915_gem_object_is_tiled(obj))
1147 		return -EFAULT;
1148 
1149 	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
1150 				       PIN_MAPPABLE | PIN_NONBLOCK);
1151 	if (!IS_ERR(vma)) {
1152 		node.start = i915_ggtt_offset(vma);
1153 		node.allocated = false;
1154 		ret = i915_vma_put_fence(vma);
1155 		if (ret) {
1156 			i915_vma_unpin(vma);
1157 			vma = ERR_PTR(ret);
1158 		}
1159 	}
1160 	if (IS_ERR(vma)) {
1161 		ret = insert_mappable_node(i915, &node, PAGE_SIZE);
1162 		if (ret)
1163 			goto out;
1164 
1165 		ret = i915_gem_object_get_pages(obj);
1166 		if (ret) {
1167 			remove_mappable_node(&node);
1168 			goto out;
1169 		}
1170 
1171 		i915_gem_object_pin_pages(obj);
1172 	}
1173 
1174 	ret = i915_gem_object_set_to_gtt_domain(obj, true);
1175 	if (ret)
1176 		goto out_unpin;
1177 
1178 	intel_fb_obj_invalidate(obj, ORIGIN_CPU);
1179 	obj->dirty = true;
1180 
1181 	user_data = u64_to_user_ptr(args->data_ptr);
1182 	offset = args->offset;
1183 	remain = args->size;
1184 	while (remain) {
1185 		/* Operation in this page
1186 		 *
1187 		 * page_base = page offset within aperture
1188 		 * page_offset = offset within page
1189 		 * page_length = bytes to copy for this page
1190 		 */
1191 		u32 page_base = node.start;
1192 		unsigned page_offset = offset_in_page(offset);
1193 		unsigned page_length = PAGE_SIZE - page_offset;
1194 		page_length = remain < page_length ? remain : page_length;
1195 		if (node.allocated) {
1196 			wmb(); /* flush the write before we modify the GGTT */
1197 			ggtt->base.insert_page(&ggtt->base,
1198 					       i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
1199 					       node.start, I915_CACHE_NONE, 0);
1200 			wmb(); /* flush modifications to the GGTT (insert_page) */
1201 		} else {
1202 			page_base += offset & LINUX_PAGE_MASK;
1203 		}
1204 		/* If we get a fault while copying data, then (presumably) our
1205 		 * source page isn't available.  Return the error and we'll
1206 		 * retry in the slow path.
1207 		 * If the object is non-shmem backed, we retry again with the
1208 		 * path that handles page fault.
1209 		 */
1210 		if (fast_user_write(&ggtt->mappable, page_base,
1211 				    page_offset, user_data, page_length)) {
1212 			hit_slow_path = true;
1213 			mutex_unlock(&dev->struct_mutex);
1214 			if (slow_user_access(&ggtt->mappable,
1215 					     page_base,
1216 					     page_offset, user_data,
1217 					     page_length, true)) {
1218 				ret = -EFAULT;
1219 				mutex_lock(&dev->struct_mutex);
1220 				goto out_flush;
1221 			}
1222 
1223 			mutex_lock(&dev->struct_mutex);
1224 		}
1225 
1226 		remain -= page_length;
1227 		user_data += page_length;
1228 		offset += page_length;
1229 	}
1230 
1231 out_flush:
1232 	if (hit_slow_path) {
1233 		if (ret == 0 &&
1234 		    (obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) {
1235 			/* The user has modified the object whilst we tried
1236 			 * reading from it, and we now have no idea what domain
1237 			 * the pages should be in. As we have just been touching
1238 			 * them directly, flush everything back to the GTT
1239 			 * domain.
1240 			 */
1241 			ret = i915_gem_object_set_to_gtt_domain(obj, false);
1242 		}
1243 	}
1244 
1245 	intel_fb_obj_flush(obj, false, ORIGIN_CPU);
1246 out_unpin:
1247 	if (node.allocated) {
1248 		wmb();
1249 		ggtt->base.clear_range(&ggtt->base,
1250 				       node.start, node.size);
1251 		i915_gem_object_unpin_pages(obj);
1252 		remove_mappable_node(&node);
1253 	} else {
1254 		i915_vma_unpin(vma);
1255 	}
1256 out:
1257 	return ret;
1258 }
1259 
1260 /* Per-page copy function for the shmem pwrite fastpath.
1261  * Flushes invalid cachelines before writing to the target if
1262  * needs_clflush_before is set and flushes out any written cachelines after
1263  * writing if needs_clflush is set. */
1264 static int
1265 shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length,
1266 		  char __user *user_data,
1267 		  bool page_do_bit17_swizzling,
1268 		  bool needs_clflush_before,
1269 		  bool needs_clflush_after)
1270 {
1271 	char *vaddr;
1272 	int ret;
1273 
1274 	if (unlikely(page_do_bit17_swizzling))
1275 		return -EINVAL;
1276 
1277 	vaddr = kmap_atomic(page);
1278 	if (needs_clflush_before)
1279 		drm_clflush_virt_range(vaddr + shmem_page_offset,
1280 				       page_length);
1281 	ret = __copy_from_user_inatomic(vaddr + shmem_page_offset,
1282 					user_data, page_length);
1283 	if (needs_clflush_after)
1284 		drm_clflush_virt_range(vaddr + shmem_page_offset,
1285 				       page_length);
1286 	kunmap_atomic(vaddr);
1287 
1288 	return ret ? -EFAULT : 0;
1289 }
1290 
1291 /* Only difference to the fast-path function is that this can handle bit17
1292  * and uses non-atomic copy and kmap functions. */
1293 static int
1294 shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length,
1295 		  char __user *user_data,
1296 		  bool page_do_bit17_swizzling,
1297 		  bool needs_clflush_before,
1298 		  bool needs_clflush_after)
1299 {
1300 	char *vaddr;
1301 	int ret;
1302 
1303 	vaddr = kmap(page);
1304 	if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
1305 		shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
1306 					     page_length,
1307 					     page_do_bit17_swizzling);
1308 	if (page_do_bit17_swizzling)
1309 		ret = __copy_from_user_swizzled(vaddr, shmem_page_offset,
1310 						user_data,
1311 						page_length);
1312 	else
1313 		ret = __copy_from_user(vaddr + shmem_page_offset,
1314 				       user_data,
1315 				       page_length);
1316 	if (needs_clflush_after)
1317 		shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
1318 					     page_length,
1319 					     page_do_bit17_swizzling);
1320 	kunmap(page);
1321 
1322 	return ret ? -EFAULT : 0;
1323 }
1324 
1325 static int
1326 i915_gem_shmem_pwrite(struct drm_device *dev,
1327 		      struct drm_i915_gem_object *obj,
1328 		      struct drm_i915_gem_pwrite *args,
1329 		      struct drm_file *file)
1330 {
1331 	ssize_t remain;
1332 	loff_t offset;
1333 	char __user *user_data;
1334 	int shmem_page_offset, page_length, ret = 0;
1335 	int obj_do_bit17_swizzling, page_do_bit17_swizzling;
1336 	int hit_slowpath = 0;
1337 	unsigned int needs_clflush;
1338 	struct sg_page_iter sg_iter;
1339 
1340 	ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush);
1341 	if (ret)
1342 		return ret;
1343 
1344 	obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
1345 	user_data = u64_to_user_ptr(args->data_ptr);
1346 	offset = args->offset;
1347 	remain = args->size;
1348 
1349 	VM_OBJECT_LOCK(obj->base.filp);
1350 	vm_object_pip_add(obj->base.filp, 1);
1351 
1352 	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
1353 			 offset >> PAGE_SHIFT) {
1354 		struct page *page = sg_page_iter_page(&sg_iter);
1355 		int partial_cacheline_write;
1356 
1357 		if (remain <= 0)
1358 			break;
1359 
1360 		/* Operation in this page
1361 		 *
1362 		 * shmem_page_offset = offset within page in shmem file
1363 		 * page_length = bytes to copy for this page
1364 		 */
1365 		shmem_page_offset = offset_in_page(offset);
1366 
1367 		page_length = remain;
1368 		if ((shmem_page_offset + page_length) > PAGE_SIZE)
1369 			page_length = PAGE_SIZE - shmem_page_offset;
1370 
1371 		/* If we don't overwrite a cacheline completely we need to be
1372 		 * careful to have up-to-date data by first clflushing. Don't
1373 		 * overcomplicate things and flush the entire patch. */
1374 		partial_cacheline_write = needs_clflush & CLFLUSH_BEFORE &&
1375 			((shmem_page_offset | page_length)
1376 				& (boot_cpu_data.x86_clflush_size - 1));
1377 
1378 		page_do_bit17_swizzling = obj_do_bit17_swizzling &&
1379 			(page_to_phys(page) & (1 << 17)) != 0;
1380 
1381 		ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
1382 					user_data, page_do_bit17_swizzling,
1383 					partial_cacheline_write,
1384 					needs_clflush & CLFLUSH_AFTER);
1385 		if (ret == 0)
1386 			goto next_page;
1387 
1388 		hit_slowpath = 1;
1389 		mutex_unlock(&dev->struct_mutex);
1390 		ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
1391 					user_data, page_do_bit17_swizzling,
1392 					partial_cacheline_write,
1393 					needs_clflush & CLFLUSH_AFTER);
1394 
1395 		mutex_lock(&dev->struct_mutex);
1396 
1397 		if (ret)
1398 			goto out;
1399 
1400 next_page:
1401 		remain -= page_length;
1402 		user_data += page_length;
1403 		offset += page_length;
1404 	}
1405 
1406 out:
1407 	vm_object_pip_wakeup(obj->base.filp);
1408 	VM_OBJECT_UNLOCK(obj->base.filp);
1409 	i915_gem_obj_finish_shmem_access(obj);
1410 
1411 	if (hit_slowpath) {
1412 		/*
1413 		 * Fixup: Flush cpu caches in case we didn't flush the dirty
1414 		 * cachelines in-line while writing and the object moved
1415 		 * out of the cpu write domain while we've dropped the lock.
1416 		 */
1417 		if (!(needs_clflush & CLFLUSH_AFTER) &&
1418 		    obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
1419 			if (i915_gem_clflush_object(obj, obj->pin_display))
1420 				needs_clflush |= CLFLUSH_AFTER;
1421 		}
1422 	}
1423 
1424 	if (needs_clflush & CLFLUSH_AFTER)
1425 		i915_gem_chipset_flush(to_i915(dev));
1426 
1427 	intel_fb_obj_flush(obj, false, ORIGIN_CPU);
1428 	return ret;
1429 }
1430 
1431 /**
1432  * Writes data to the object referenced by handle.
1433  * @dev: drm device
1434  * @data: ioctl data blob
1435  * @file: drm file
1436  *
1437  * On error, the contents of the buffer that were to be modified are undefined.
1438  */
1439 int
1440 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
1441 		      struct drm_file *file)
1442 {
1443 	struct drm_i915_private *dev_priv = to_i915(dev);
1444 	struct drm_i915_gem_pwrite *args = data;
1445 	struct drm_i915_gem_object *obj;
1446 	int ret;
1447 
1448 	if (args->size == 0)
1449 		return 0;
1450 
1451 #if 0
1452 	if (!access_ok(VERIFY_READ,
1453 		       u64_to_user_ptr(args->data_ptr),
1454 		       args->size))
1455 		return -EFAULT;
1456 #endif
1457 
1458 	if (likely(!i915.prefault_disable)) {
1459 		ret = fault_in_pages_readable(u64_to_user_ptr(args->data_ptr),
1460 						   args->size);
1461 		if (ret)
1462 			return -EFAULT;
1463 	}
1464 
1465 	obj = i915_gem_object_lookup(file, args->handle);
1466 	if (!obj)
1467 		return -ENOENT;
1468 
1469 	/* Bounds check destination. */
1470 	if (args->offset > obj->base.size ||
1471 	    args->size > obj->base.size - args->offset) {
1472 		ret = -EINVAL;
1473 		goto err;
1474 	}
1475 
1476 	trace_i915_gem_object_pwrite(obj, args->offset, args->size);
1477 
1478 	ret = __unsafe_wait_rendering(obj, to_rps_client(file), false);
1479 	if (ret)
1480 		goto err;
1481 
1482 	intel_runtime_pm_get(dev_priv);
1483 
1484 	ret = i915_mutex_lock_interruptible(dev);
1485 	if (ret)
1486 		goto err_rpm;
1487 
1488 	ret = -EFAULT;
1489 	/* We can only do the GTT pwrite on untiled buffers, as otherwise
1490 	 * it would end up going through the fenced access, and we'll get
1491 	 * different detiling behavior between reading and writing.
1492 	 * pread/pwrite currently are reading and writing from the CPU
1493 	 * perspective, requiring manual detiling by the client.
1494 	 */
1495 	if (!i915_gem_object_has_struct_page(obj) ||
1496 	    cpu_write_needs_clflush(obj)) {
1497 		ret = i915_gem_gtt_pwrite_fast(dev_priv, obj, args, file);
1498 		/* Note that the gtt paths might fail with non-page-backed user
1499 		 * pointers (e.g. gtt mappings when moving data between
1500 		 * textures). Fallback to the shmem path in that case. */
1501 	}
1502 
1503 	if (ret == -EFAULT || ret == -ENOSPC) {
1504 		if (obj->phys_handle)
1505 			ret = i915_gem_phys_pwrite(obj, args, file);
1506 		else
1507 			ret = i915_gem_shmem_pwrite(dev, obj, args, file);
1508 	}
1509 
1510 	i915_gem_object_put(obj);
1511 	mutex_unlock(&dev->struct_mutex);
1512 	intel_runtime_pm_put(dev_priv);
1513 
1514 	return ret;
1515 
1516 err_rpm:
1517 	intel_runtime_pm_put(dev_priv);
1518 err:
1519 	i915_gem_object_put_unlocked(obj);
1520 	return ret;
1521 }
1522 
1523 static inline enum fb_op_origin
1524 write_origin(struct drm_i915_gem_object *obj, unsigned domain)
1525 {
1526 	return (domain == I915_GEM_DOMAIN_GTT ?
1527 		obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
1528 }
1529 
1530 /**
1531  * Called when user space prepares to use an object with the CPU, either
1532  * through the mmap ioctl's mapping or a GTT mapping.
1533  * @dev: drm device
1534  * @data: ioctl data blob
1535  * @file: drm file
1536  */
1537 int
1538 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
1539 			  struct drm_file *file)
1540 {
1541 	struct drm_i915_gem_set_domain *args = data;
1542 	struct drm_i915_gem_object *obj;
1543 	uint32_t read_domains = args->read_domains;
1544 	uint32_t write_domain = args->write_domain;
1545 	int ret;
1546 
1547 	/* Only handle setting domains to types used by the CPU. */
1548 	if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
1549 		return -EINVAL;
1550 
1551 	/* Having something in the write domain implies it's in the read
1552 	 * domain, and only that read domain.  Enforce that in the request.
1553 	 */
1554 	if (write_domain != 0 && read_domains != write_domain)
1555 		return -EINVAL;
1556 
1557 	obj = i915_gem_object_lookup(file, args->handle);
1558 	if (!obj)
1559 		return -ENOENT;
1560 
1561 	/* Try to flush the object off the GPU without holding the lock.
1562 	 * We will repeat the flush holding the lock in the normal manner
1563 	 * to catch cases where we are gazumped.
1564 	 */
1565 	ret = __unsafe_wait_rendering(obj, to_rps_client(file), !write_domain);
1566 	if (ret)
1567 		goto err;
1568 
1569 	ret = i915_mutex_lock_interruptible(dev);
1570 	if (ret)
1571 		goto err;
1572 
1573 	if (read_domains & I915_GEM_DOMAIN_GTT)
1574 		ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
1575 	else
1576 		ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
1577 
1578 	if (write_domain != 0)
1579 		intel_fb_obj_invalidate(obj, write_origin(obj, write_domain));
1580 
1581 	i915_gem_object_put(obj);
1582 	mutex_unlock(&dev->struct_mutex);
1583 	return ret;
1584 
1585 err:
1586 	i915_gem_object_put_unlocked(obj);
1587 	return ret;
1588 }
1589 
1590 /**
1591  * Called when user space has done writes to this buffer
1592  * @dev: drm device
1593  * @data: ioctl data blob
1594  * @file: drm file
1595  */
1596 int
1597 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
1598 			 struct drm_file *file)
1599 {
1600 	struct drm_i915_gem_sw_finish *args = data;
1601 	struct drm_i915_gem_object *obj;
1602 	int err = 0;
1603 
1604 	obj = i915_gem_object_lookup(file, args->handle);
1605 	if (!obj)
1606 		return -ENOENT;
1607 
1608 	/* Pinned buffers may be scanout, so flush the cache */
1609 	if (READ_ONCE(obj->pin_display)) {
1610 		err = i915_mutex_lock_interruptible(dev);
1611 		if (!err) {
1612 			i915_gem_object_flush_cpu_write_domain(obj);
1613 			mutex_unlock(&dev->struct_mutex);
1614 		}
1615 	}
1616 
1617 	i915_gem_object_put_unlocked(obj);
1618 	return err;
1619 }
1620 
1621 /**
1622  * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address
1623  *			 it is mapped to.
1624  * @dev: drm device
1625  * @data: ioctl data blob
1626  * @file: drm file
1627  *
1628  * While the mapping holds a reference on the contents of the object, it doesn't
1629  * imply a ref on the object itself.
1630  *
1631  * IMPORTANT:
1632  *
1633  * DRM driver writers who look a this function as an example for how to do GEM
1634  * mmap support, please don't implement mmap support like here. The modern way
1635  * to implement DRM mmap support is with an mmap offset ioctl (like
1636  * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly.
1637  * That way debug tooling like valgrind will understand what's going on, hiding
1638  * the mmap call in a driver private ioctl will break that. The i915 driver only
1639  * does cpu mmaps this way because we didn't know better.
1640  */
1641 int
1642 i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1643 		    struct drm_file *file)
1644 {
1645 	struct drm_i915_gem_mmap *args = data;
1646 	struct drm_i915_gem_object *obj;
1647 	unsigned long addr;
1648 
1649 	struct proc *p = curproc;
1650 	vm_map_t map = &p->p_vmspace->vm_map;
1651 	vm_size_t size;
1652 	int error = 0, rv;
1653 
1654 	if (args->flags & ~(I915_MMAP_WC))
1655 		return -EINVAL;
1656 
1657 #if 0
1658 	if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT))
1659 		return -ENODEV;
1660 #endif
1661 
1662 	obj = i915_gem_object_lookup(file, args->handle);
1663 	if (!obj)
1664 		return -ENOENT;
1665 
1666 	/* prime objects have no backing filp to GEM mmap
1667 	 * pages from.
1668 	 */
1669 	if (!obj->base.filp) {
1670 		i915_gem_object_put_unlocked(obj);
1671 		return -EINVAL;
1672 	}
1673 
1674 	if (args->size == 0)
1675 		goto out;
1676 
1677 	size = round_page(args->size);
1678 	if (map->size + size > p->p_rlimit[RLIMIT_VMEM].rlim_cur) {
1679 		error = -ENOMEM;
1680 		goto out;
1681 	}
1682 
1683 	/*
1684 	 * Call hint to ensure that NULL is not returned as a valid address
1685 	 * and to reduce vm_map traversals. XXX causes instability, use a
1686 	 * fixed low address as the start point instead to avoid the NULL
1687 	 * return issue.
1688 	 */
1689 	addr = PAGE_SIZE;
1690 
1691 	/*
1692 	 * Use 256KB alignment.  It is unclear why this matters for a
1693 	 * virtual address but it appears to fix a number of application/X
1694 	 * crashes and kms console switching is much faster.
1695 	 */
1696 	vm_object_hold(obj->base.filp);
1697 	vm_object_reference_locked(obj->base.filp);
1698 	vm_object_drop(obj->base.filp);
1699 
1700 	/* Something gets wrong here: fails to mmap 4096 */
1701 	rv = vm_map_find(map, obj->base.filp, NULL,
1702 			 args->offset, &addr, args->size,
1703 			 256 * 1024, /* align */
1704 			 TRUE, /* fitit */
1705 			 VM_MAPTYPE_NORMAL, VM_SUBSYS_DRM_GEM,
1706 			 VM_PROT_READ | VM_PROT_WRITE, /* prot */
1707 			 VM_PROT_READ | VM_PROT_WRITE, /* max */
1708 			 MAP_SHARED /* cow */);
1709 	if (rv != KERN_SUCCESS) {
1710 		vm_object_deallocate(obj->base.filp);
1711 		error = -vm_mmap_to_errno(rv);
1712 	} else {
1713 		args->addr_ptr = (uint64_t)addr;
1714 	}
1715 
1716 	if (args->flags & I915_MMAP_WC) {	/* I915_PARAM_MMAP_VERSION */
1717 #if 0
1718 		struct mm_struct *mm = current->mm;
1719 		struct vm_area_struct *vma;
1720 
1721 		if (down_write_killable(&mm->mmap_sem)) {
1722 			i915_gem_object_put_unlocked(obj);
1723 			return -EINTR;
1724 		}
1725 		vma = find_vma(mm, addr);
1726 		if (vma)
1727 			vma->vm_page_prot =
1728 				pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
1729 		else
1730 			addr = -ENOMEM;
1731 		up_write(&mm->mmap_sem);
1732 #endif
1733 
1734 		/* This may race, but that's ok, it only gets set */
1735 		WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU);
1736 	}
1737 
1738 out:
1739 	i915_gem_object_put_unlocked(obj);
1740 	if (error != 0)
1741 		return error;
1742 
1743 	args->addr_ptr = (uint64_t) addr;
1744 
1745 	return 0;
1746 }
1747 
1748 #if 0
1749 static unsigned int tile_row_pages(struct drm_i915_gem_object *obj)
1750 {
1751 	u64 size;
1752 
1753 	size = i915_gem_object_get_stride(obj);
1754 	size *= i915_gem_object_get_tiling(obj) == I915_TILING_Y ? 32 : 8;
1755 
1756 	return size >> PAGE_SHIFT;
1757 }
1758 #endif
1759 
1760 /**
1761  * i915_gem_mmap_gtt_version - report the current feature set for GTT mmaps
1762  *
1763  * A history of the GTT mmap interface:
1764  *
1765  * 0 - Everything had to fit into the GTT. Both parties of a memcpy had to
1766  *     aligned and suitable for fencing, and still fit into the available
1767  *     mappable space left by the pinned display objects. A classic problem
1768  *     we called the page-fault-of-doom where we would ping-pong between
1769  *     two objects that could not fit inside the GTT and so the memcpy
1770  *     would page one object in at the expense of the other between every
1771  *     single byte.
1772  *
1773  * 1 - Objects can be any size, and have any compatible fencing (X Y, or none
1774  *     as set via i915_gem_set_tiling() [DRM_I915_GEM_SET_TILING]). If the
1775  *     object is too large for the available space (or simply too large
1776  *     for the mappable aperture!), a view is created instead and faulted
1777  *     into userspace. (This view is aligned and sized appropriately for
1778  *     fenced access.)
1779  *
1780  * Restrictions:
1781  *
1782  *  * snoopable objects cannot be accessed via the GTT. It can cause machine
1783  *    hangs on some architectures, corruption on others. An attempt to service
1784  *    a GTT page fault from a snoopable object will generate a SIGBUS.
1785  *
1786  *  * the object must be able to fit into RAM (physical memory, though no
1787  *    limited to the mappable aperture).
1788  *
1789  *
1790  * Caveats:
1791  *
1792  *  * a new GTT page fault will synchronize rendering from the GPU and flush
1793  *    all data to system memory. Subsequent access will not be synchronized.
1794  *
1795  *  * all mappings are revoked on runtime device suspend.
1796  *
1797  *  * there are only 8, 16 or 32 fence registers to share between all users
1798  *    (older machines require fence register for display and blitter access
1799  *    as well). Contention of the fence registers will cause the previous users
1800  *    to be unmapped and any new access will generate new page faults.
1801  *
1802  *  * running out of memory while servicing a fault may generate a SIGBUS,
1803  *    rather than the expected SIGSEGV.
1804  */
1805 int i915_gem_mmap_gtt_version(void)
1806 {
1807 	return 1;
1808 }
1809 
1810 /**
1811  * i915_gem_fault - fault a page into the GTT
1812  *
1813  * vm_obj is locked on entry and expected to be locked on return.
1814  *
1815  * The vm_pager has placemarked the object with an anonymous memory page
1816  * which we must replace atomically to avoid races against concurrent faults
1817  * on the same page.  XXX we currently are unable to do this atomically.
1818  *
1819  * If we are to return an error we should not touch the anonymous page,
1820  * the caller will deallocate it.
1821  *
1822  * XXX Most GEM calls appear to be interruptable, but we can't hard loop
1823  * in that case.  Release all resources and wait 1 tick before retrying.
1824  * This is a huge problem which needs to be fixed by getting rid of most
1825  * of the interruptability.  The linux code does not retry but does appear
1826  * to have some sort of mechanism (VM_FAULT_NOPAGE ?) for the higher level
1827  * to be able to retry.
1828  *
1829  * --
1830  * @vma: VMA in question
1831  * @vmf: fault info
1832  *
1833  * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1834  * from userspace.  The fault handler takes care of binding the object to
1835  * the GTT (if needed), allocating and programming a fence register (again,
1836  * only if needed based on whether the old reg is still valid or the object
1837  * is tiled) and inserting a new PTE into the faulting process.
1838  *
1839  * Note that the faulting process may involve evicting existing objects
1840  * from the GTT and/or fence registers to make room.  So performance may
1841  * suffer if the GTT working set is large or there are few fence registers
1842  * left.
1843  *
1844  * The current feature set supported by i915_gem_fault() and thus GTT mmaps
1845  * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version).
1846  * vm_obj is locked on entry and expected to be locked on return.  The VM
1847  * pager has placed an anonymous memory page at (obj,offset) which we have
1848  * to replace.
1849  */
1850 int i915_gem_fault(vm_object_t vm_obj, vm_ooffset_t offset, int prot, vm_page_t *mres)
1851 {
1852 #define MIN_CHUNK_PAGES ((1 << 20) >> PAGE_SHIFT) /* 1 MiB */
1853 	struct drm_i915_gem_object *obj = to_intel_bo(vm_obj->handle);
1854 	struct drm_device *dev = obj->base.dev;
1855 	struct drm_i915_private *dev_priv = to_i915(dev);
1856 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
1857 	bool write = !!(prot & VM_PROT_WRITE);
1858 	struct i915_vma *vma;
1859 	unsigned long page_offset;
1860 	vm_page_t m;
1861 	unsigned int flags;
1862 	int ret;
1863 
1864 	/* We don't use vmf->pgoff since that has the fake offset */
1865 	page_offset = (unsigned long)offset;
1866 
1867 	/*
1868 	 * vm_fault() has supplied us with a busied page placeholding
1869 	 * the operation.  This presents a lock order reversal issue
1870 	 * again i915_gem_release_mmap() for our device mutex.
1871 	 *
1872 	 * Deal with the problem by getting rid of the placeholder now,
1873 	 * and then dealing with the potential for a new placeholder when
1874 	 * we try to insert later.
1875 	 */
1876 	if (*mres != NULL) {
1877 		m = *mres;
1878 		*mres = NULL;
1879 		if ((m->busy_count & PBUSY_LOCKED) == 0)
1880 			kprintf("i915_gem_fault: Page was not busy\n");
1881 		else
1882 			vm_page_remove(m);
1883 		vm_page_free(m);
1884 	}
1885 
1886 	m = NULL;
1887 
1888 retry:
1889 	trace_i915_gem_object_fault(obj, page_offset, true, write);
1890 
1891 	/* Try to flush the object off the GPU first without holding the lock.
1892 	 * Upon acquiring the lock, we will perform our sanity checks and then
1893 	 * repeat the flush holding the lock in the normal manner to catch cases
1894 	 * where we are gazumped.
1895 	 */
1896 	ret = __unsafe_wait_rendering(obj, NULL, !write);
1897 	if (ret)
1898 		goto err;
1899 
1900 	intel_runtime_pm_get(dev_priv);
1901 
1902 	ret = i915_mutex_lock_interruptible(dev);
1903 	if (ret)
1904 		goto err_rpm;
1905 
1906 	/* Access to snoopable pages through the GTT is incoherent. */
1907 	if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) {
1908 		ret = -EFAULT;
1909 		goto err_unlock;
1910 	}
1911 
1912 	/* If the object is smaller than a couple of partial vma, it is
1913 	 * not worth only creating a single partial vma - we may as well
1914 	 * clear enough space for the full object.
1915 	 */
1916 	flags = PIN_MAPPABLE;
1917 	if (obj->base.size > 2 * MIN_CHUNK_PAGES << PAGE_SHIFT)
1918 		flags |= PIN_NONBLOCK | PIN_NONFAULT;
1919 
1920 	/* Now pin it into the GTT as needed */
1921 	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, flags);
1922 #if 0
1923 	if (IS_ERR(vma)) {
1924 		struct i915_ggtt_view view;
1925 		unsigned int chunk_size;
1926 
1927 		/* Use a partial view if it is bigger than available space */
1928 		chunk_size = MIN_CHUNK_PAGES;
1929 		if (i915_gem_object_is_tiled(obj))
1930 			chunk_size = max(chunk_size, tile_row_pages(obj));
1931 
1932 		memset(&view, 0, sizeof(view));
1933 		view.type = I915_GGTT_VIEW_PARTIAL;
1934 		view.params.partial.offset = rounddown(page_offset, chunk_size);
1935 		view.params.partial.size =
1936 			min_t(unsigned int, chunk_size,
1937 			      vma_pages(area) - view.params.partial.offset);
1938 
1939 		/* If the partial covers the entire object, just create a
1940 		 * normal VMA.
1941 		 */
1942 		if (chunk_size >= obj->base.size >> PAGE_SHIFT)
1943 			view.type = I915_GGTT_VIEW_NORMAL;
1944 
1945 		/* Userspace is now writing through an untracked VMA, abandon
1946 		 * all hope that the hardware is able to track future writes.
1947 		 */
1948 		obj->frontbuffer_ggtt_origin = ORIGIN_CPU;
1949 
1950 		vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE);
1951 	}
1952 #endif
1953 	if (IS_ERR(vma)) {
1954 		ret = PTR_ERR(vma);
1955 		goto err_unlock;
1956 	}
1957 
1958 	ret = i915_gem_object_set_to_gtt_domain(obj, write);
1959 	if (ret)
1960 		goto err_unpin;
1961 
1962 	ret = i915_vma_get_fence(vma);
1963 	if (ret)
1964 		goto err_unpin;
1965 
1966 	/*
1967 	 * START FREEBSD MAGIC
1968 	 *
1969 	 * Add a pip count to avoid destruction and certain other
1970 	 * complex operations (such as collapses?) while unlocked.
1971 	 */
1972 	vm_object_pip_add(vm_obj, 1);
1973 
1974 	ret = 0;
1975 	m = NULL;
1976 
1977 	/*
1978 	 * Since the object lock was dropped, another thread might have
1979 	 * faulted on the same GTT address and instantiated the mapping.
1980 	 * Recheck.
1981 	 */
1982 	m = vm_page_lookup(vm_obj, OFF_TO_IDX(offset));
1983 	if (m != NULL) {
1984 		/*
1985 		 * Try to busy the page, retry on failure (non-zero ret).
1986 		 */
1987 		if (vm_page_busy_try(m, false)) {
1988 			kprintf("i915_gem_fault: BUSY\n");
1989 			ret = -EINTR;
1990 			goto err_unlock;
1991 		}
1992 		goto have_page;
1993 	}
1994 	/* END FREEBSD MAGIC */
1995 
1996 	obj->fault_mappable = true;
1997 
1998 	/* Finally, remap it using the new GTT offset */
1999 	m = vm_phys_fictitious_to_vm_page(ggtt->mappable_base +
2000 			i915_ggtt_offset(vma) + offset);
2001 	if (m == NULL) {
2002 		ret = -EFAULT;
2003 		goto err_unpin;
2004 	}
2005 	KASSERT((m->flags & PG_FICTITIOUS) != 0, ("not fictitious %p", m));
2006 	KASSERT(m->wire_count == 1, ("wire_count not 1 %p", m));
2007 
2008 	/*
2009 	 * Try to busy the page.  Fails on non-zero return.
2010 	 */
2011 	if (vm_page_busy_try(m, false)) {
2012 		kprintf("i915_gem_fault: BUSY(2)\n");
2013 		ret = -EINTR;
2014 		goto err_unpin;
2015 	}
2016 	m->valid = VM_PAGE_BITS_ALL;
2017 
2018 #if 1
2019 	/*
2020 	 * This should always work since we already checked via a lookup
2021 	 * above.
2022 	 */
2023 	if (vm_page_insert(m, vm_obj, OFF_TO_IDX(offset)) == FALSE) {
2024 		kprintf("i915:gem_fault: page %p,%jd already in object\n",
2025 			vm_obj,
2026 			OFF_TO_IDX(offset));
2027 		vm_page_wakeup(m);
2028 		ret = -EINTR;
2029 		goto err_unpin;
2030 	}
2031 #else
2032 	/* NOT COMPILED ATM */
2033 	if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
2034 		if (!obj->fault_mappable) {
2035 			unsigned long size =
2036 				min_t(unsigned long,
2037 				      area->vm_end - area->vm_start,
2038 				      obj->base.size) >> PAGE_SHIFT;
2039 			unsigned long base = area->vm_start;
2040 			int i;
2041 
2042 			for (i = 0; i < size; i++) {
2043 				ret = vm_insert_pfn(area,
2044 						    base + i * PAGE_SIZE,
2045 						    pfn + i);
2046 				if (ret)
2047 					break;
2048 			}
2049 
2050 		} else
2051 			ret = vm_insert_pfn(area,
2052 					    (unsigned long)vmf->virtual_address,
2053 					    pfn + page_offset);
2054 	} else {
2055 		/* Overriding existing pages in partial view does not cause
2056 		 * us any trouble as TLBs are still valid because the fault
2057 		 * is due to userspace losing part of the mapping or never
2058 		 * having accessed it before (at this partials' range).
2059 		 */
2060 		const struct i915_ggtt_view *view = &vma->ggtt_view;
2061 		unsigned long base = area->vm_start +
2062 			(view->params.partial.offset << PAGE_SHIFT);
2063 		unsigned int i;
2064 
2065 		for (i = 0; i < view->params.partial.size; i++) {
2066 			ret = vm_insert_pfn(area,
2067 					    base + i * PAGE_SIZE,
2068 					    pfn + i);
2069 			if (ret)
2070 				break;
2071 		}
2072 	}
2073 
2074 	obj->fault_mappable = true;
2075 #endif
2076 
2077 have_page:
2078 	*mres = m;
2079 
2080 	__i915_vma_unpin(vma);
2081 	mutex_unlock(&dev->struct_mutex);
2082 	ret = VM_PAGER_OK;
2083 	goto done;
2084 
2085 	/*
2086 	 * ALTERNATIVE ERROR RETURN.
2087 	 *
2088 	 * OBJECT EXPECTED TO BE LOCKED.
2089 	 */
2090 err_unpin:
2091 	__i915_vma_unpin(vma);
2092 err_unlock:
2093 	mutex_unlock(&dev->struct_mutex);
2094 err_rpm:
2095 	intel_runtime_pm_put(dev_priv);
2096 err:
2097 	switch (ret) {
2098 	case -EIO:
2099 		/*
2100 		 * We eat errors when the gpu is terminally wedged to avoid
2101 		 * userspace unduly crashing (gl has no provisions for mmaps to
2102 		 * fail). But any other -EIO isn't ours (e.g. swap in failure)
2103 		 * and so needs to be reported.
2104 		 */
2105 		if (!i915_terminally_wedged(&dev_priv->gpu_error)) {
2106 //			ret = VM_FAULT_SIGBUS;
2107 			break;
2108 		}
2109 	case -EAGAIN:
2110 		/*
2111 		 * EAGAIN means the gpu is hung and we'll wait for the error
2112 		 * handler to reset everything when re-faulting in
2113 		 * i915_mutex_lock_interruptible.
2114 		 */
2115 	case -ERESTARTSYS:
2116 	case -EINTR:
2117 		VM_OBJECT_UNLOCK(vm_obj);
2118 		int dummy;
2119 		tsleep(&dummy, 0, "delay", 1); /* XXX */
2120 		VM_OBJECT_LOCK(vm_obj);
2121 		goto retry;
2122 	default:
2123 		WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret);
2124 		ret = VM_PAGER_ERROR;
2125 		break;
2126 	}
2127 
2128 done:
2129 	vm_object_pip_wakeup(vm_obj);
2130 
2131 	return ret;
2132 }
2133 
2134 /**
2135  * i915_gem_release_mmap - remove physical page mappings
2136  * @obj: obj in question
2137  *
2138  * Preserve the reservation of the mmapping with the DRM core code, but
2139  * relinquish ownership of the pages back to the system.
2140  *
2141  * It is vital that we remove the page mapping if we have mapped a tiled
2142  * object through the GTT and then lose the fence register due to
2143  * resource pressure. Similarly if the object has been moved out of the
2144  * aperture, than pages mapped into userspace must be revoked. Removing the
2145  * mapping will then trigger a page fault on the next user access, allowing
2146  * fixup by i915_gem_fault().
2147  */
2148 void
2149 i915_gem_release_mmap(struct drm_i915_gem_object *obj)
2150 {
2151 	vm_object_t devobj;
2152 	vm_page_t m;
2153 	int i, page_count;
2154 
2155 	/* Serialisation between user GTT access and our code depends upon
2156 	 * revoking the CPU's PTE whilst the mutex is held. The next user
2157 	 * pagefault then has to wait until we release the mutex.
2158 	 */
2159 	lockdep_assert_held(&obj->base.dev->struct_mutex);
2160 
2161 	if (!obj->fault_mappable)
2162 		return;
2163 
2164 	devobj = cdev_pager_lookup(obj);
2165 	if (devobj != NULL) {
2166 		page_count = OFF_TO_IDX(obj->base.size);
2167 
2168 		VM_OBJECT_LOCK(devobj);
2169 		for (i = 0; i < page_count; i++) {
2170 			m = vm_page_lookup_busy_wait(devobj, i, TRUE, "915unm");
2171 			if (m == NULL)
2172 				continue;
2173 			cdev_pager_free_page(devobj, m);
2174 		}
2175 		VM_OBJECT_UNLOCK(devobj);
2176 		vm_object_deallocate(devobj);
2177 	}
2178 
2179 	/* Ensure that the CPU's PTE are revoked and there are not outstanding
2180 	 * memory transactions from userspace before we return. The TLB
2181 	 * flushing implied above by changing the PTE above *should* be
2182 	 * sufficient, an extra barrier here just provides us with a bit
2183 	 * of paranoid documentation about our requirement to serialise
2184 	 * memory writes before touching registers / GSM.
2185 	 */
2186 	wmb();
2187 
2188 	obj->fault_mappable = false;
2189 }
2190 
2191 void
2192 i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv)
2193 {
2194 	struct drm_i915_gem_object *obj;
2195 
2196 	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list)
2197 		i915_gem_release_mmap(obj);
2198 }
2199 
2200 /**
2201  * i915_gem_get_ggtt_size - return required global GTT size for an object
2202  * @dev_priv: i915 device
2203  * @size: object size
2204  * @tiling_mode: tiling mode
2205  *
2206  * Return the required global GTT size for an object, taking into account
2207  * potential fence register mapping.
2208  */
2209 u64 i915_gem_get_ggtt_size(struct drm_i915_private *dev_priv,
2210 			   u64 size, int tiling_mode)
2211 {
2212 	u64 ggtt_size;
2213 
2214 	GEM_BUG_ON(size == 0);
2215 
2216 	if (INTEL_GEN(dev_priv) >= 4 ||
2217 	    tiling_mode == I915_TILING_NONE)
2218 		return size;
2219 
2220 	/* Previous chips need a power-of-two fence region when tiling */
2221 	if (IS_GEN3(dev_priv))
2222 		ggtt_size = 1024*1024;
2223 	else
2224 		ggtt_size = 512*1024;
2225 
2226 	while (ggtt_size < size)
2227 		ggtt_size <<= 1;
2228 
2229 	return ggtt_size;
2230 }
2231 
2232 /**
2233  * i915_gem_get_ggtt_alignment - return required global GTT alignment
2234  * @dev_priv: i915 device
2235  * @size: object size
2236  * @tiling_mode: tiling mode
2237  * @fenced: is fenced alignment required or not
2238  *
2239  * Return the required global GTT alignment for an object, taking into account
2240  * potential fence register mapping.
2241  */
2242 u64 i915_gem_get_ggtt_alignment(struct drm_i915_private *dev_priv, u64 size,
2243 				int tiling_mode, bool fenced)
2244 {
2245 	GEM_BUG_ON(size == 0);
2246 
2247 	/*
2248 	 * Minimum alignment is 4k (GTT page size), but might be greater
2249 	 * if a fence register is needed for the object.
2250 	 */
2251 	if (INTEL_GEN(dev_priv) >= 4 || (!fenced && IS_G33(dev_priv)) ||
2252 	    tiling_mode == I915_TILING_NONE)
2253 		return 4096;
2254 
2255 	/*
2256 	 * Previous chips need to be aligned to the size of the smallest
2257 	 * fence register that can contain the object.
2258 	 */
2259 	return i915_gem_get_ggtt_size(dev_priv, size, tiling_mode);
2260 }
2261 
2262 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
2263 {
2264 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
2265 	int err;
2266 
2267 	err = drm_gem_create_mmap_offset(&obj->base);
2268 	if (!err)
2269 		return 0;
2270 
2271 	/* We can idle the GPU locklessly to flush stale objects, but in order
2272 	 * to claim that space for ourselves, we need to take the big
2273 	 * struct_mutex to free the requests+objects and allocate our slot.
2274 	 */
2275 	err = i915_gem_wait_for_idle(dev_priv, I915_WAIT_INTERRUPTIBLE);
2276 	if (err)
2277 		return err;
2278 
2279 	err = i915_mutex_lock_interruptible(&dev_priv->drm);
2280 	if (!err) {
2281 		i915_gem_retire_requests(dev_priv);
2282 		err = drm_gem_create_mmap_offset(&obj->base);
2283 		mutex_unlock(&dev_priv->drm.struct_mutex);
2284 	}
2285 
2286 	return err;
2287 }
2288 
2289 #if 0
2290 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
2291 {
2292 	drm_gem_free_mmap_offset(&obj->base);
2293 }
2294 #endif
2295 
2296 int
2297 i915_gem_mmap_gtt(struct drm_file *file,
2298 		  struct drm_device *dev,
2299 		  uint32_t handle,
2300 		  uint64_t *offset)
2301 {
2302 	struct drm_i915_gem_object *obj;
2303 	int ret;
2304 
2305 	obj = i915_gem_object_lookup(file, handle);
2306 	if (!obj)
2307 		return -ENOENT;
2308 
2309 	ret = i915_gem_object_create_mmap_offset(obj);
2310 	if (ret == 0)
2311 		*offset = DRM_GEM_MAPPING_OFF(obj->base.map_list.key) |
2312 		    DRM_GEM_MAPPING_KEY;
2313 
2314 	i915_gem_object_put_unlocked(obj);
2315 	return ret;
2316 }
2317 
2318 /**
2319  * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
2320  * @dev: DRM device
2321  * @data: GTT mapping ioctl data
2322  * @file: GEM object info
2323  *
2324  * Simply returns the fake offset to userspace so it can mmap it.
2325  * The mmap call will end up in drm_gem_mmap(), which will set things
2326  * up so we can get faults in the handler above.
2327  *
2328  * The fault handler will take care of binding the object into the GTT
2329  * (since it may have been evicted to make room for something), allocating
2330  * a fence register, and mapping the appropriate aperture address into
2331  * userspace.
2332  */
2333 int
2334 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
2335 			struct drm_file *file)
2336 {
2337 	struct drm_i915_gem_mmap_gtt *args = data;
2338 
2339 	return i915_gem_mmap_gtt(file, dev, args->handle, (uint64_t *)&args->offset);
2340 }
2341 
2342 /* Immediately discard the backing storage */
2343 static void
2344 i915_gem_object_truncate(struct drm_i915_gem_object *obj)
2345 {
2346 	vm_object_t vm_obj = obj->base.filp;
2347 
2348 	if (obj->base.filp == NULL)
2349 		return;
2350 
2351 	VM_OBJECT_LOCK(vm_obj);
2352 	vm_object_page_remove(vm_obj, 0, 0, false);
2353 	VM_OBJECT_UNLOCK(vm_obj);
2354 
2355 	/* Our goal here is to return as much of the memory as
2356 	 * is possible back to the system as we are called from OOM.
2357 	 * To do this we must instruct the shmfs to drop all of its
2358 	 * backing pages, *now*.
2359 	 */
2360 #if 0
2361 	shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1);
2362 #endif
2363 	obj->madv = __I915_MADV_PURGED;
2364 }
2365 
2366 /* Try to discard unwanted pages */
2367 static void
2368 i915_gem_object_invalidate(struct drm_i915_gem_object *obj)
2369 {
2370 #if 0
2371 	struct address_space *mapping;
2372 #endif
2373 
2374 	switch (obj->madv) {
2375 	case I915_MADV_DONTNEED:
2376 		i915_gem_object_truncate(obj);
2377 	case __I915_MADV_PURGED:
2378 		return;
2379 	}
2380 
2381 	if (obj->base.filp == NULL)
2382 		return;
2383 
2384 #if 0
2385 	mapping = file_inode(obj->base.filp)->i_mapping,
2386 #endif
2387 	invalidate_mapping_pages(obj->base.filp, 0, (loff_t)-1);
2388 }
2389 
2390 static void
2391 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
2392 {
2393 	struct sgt_iter sgt_iter;
2394 	struct page *page;
2395 	int ret;
2396 
2397 	BUG_ON(obj->madv == __I915_MADV_PURGED);
2398 
2399 	ret = i915_gem_object_set_to_cpu_domain(obj, true);
2400 	if (WARN_ON(ret)) {
2401 		/* In the event of a disaster, abandon all caches and
2402 		 * hope for the best.
2403 		 */
2404 		i915_gem_clflush_object(obj, true);
2405 		obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
2406 	}
2407 
2408 	i915_gem_gtt_finish_object(obj);
2409 
2410 	if (i915_gem_object_needs_bit17_swizzle(obj))
2411 		i915_gem_object_save_bit_17_swizzle(obj);
2412 
2413 	if (obj->madv == I915_MADV_DONTNEED)
2414 		obj->dirty = 0;
2415 
2416 	for_each_sgt_page(page, sgt_iter, obj->pages) {
2417 		if (obj->dirty)
2418 			set_page_dirty(page);
2419 
2420 		if (obj->madv == I915_MADV_WILLNEED)
2421 			mark_page_accessed(page);
2422 
2423 		vm_page_busy_wait((struct vm_page *)page, FALSE, "i915gem");
2424 		vm_page_unwire((struct vm_page *)page, 1);
2425 		vm_page_wakeup((struct vm_page *)page);
2426 	}
2427 	obj->dirty = 0;
2428 
2429 	sg_free_table(obj->pages);
2430 	kfree(obj->pages);
2431 }
2432 
2433 int
2434 i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
2435 {
2436 	const struct drm_i915_gem_object_ops *ops = obj->ops;
2437 
2438 	if (obj->pages == NULL)
2439 		return 0;
2440 
2441 	if (obj->pages_pin_count)
2442 		return -EBUSY;
2443 
2444 	GEM_BUG_ON(obj->bind_count);
2445 
2446 	/* ->put_pages might need to allocate memory for the bit17 swizzle
2447 	 * array, hence protect them from being reaped by removing them from gtt
2448 	 * lists early. */
2449 	list_del(&obj->global_list);
2450 
2451 	if (obj->mapping) {
2452 		void *ptr;
2453 
2454 		ptr = ptr_mask_bits(obj->mapping);
2455 		if (is_vmalloc_addr(ptr))
2456 			vunmap(ptr);
2457 		else
2458 			kunmap(kmap_to_page(ptr));
2459 
2460 		obj->mapping = NULL;
2461 	}
2462 
2463 	ops->put_pages(obj);
2464 	obj->pages = NULL;
2465 
2466 	i915_gem_object_invalidate(obj);
2467 
2468 	return 0;
2469 }
2470 
2471 static unsigned long swiotlb_max_size(void)
2472 {
2473 #if IS_ENABLED(CONFIG_SWIOTLB)
2474 	return rounddown(swiotlb_nr_tbl() << IO_TLB_SHIFT, PAGE_SIZE);
2475 #else
2476 	return 0;
2477 #endif
2478 }
2479 
2480 static int
2481 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
2482 {
2483 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
2484 	int page_count, i;
2485 	vm_object_t vm_obj;
2486 	struct sg_table *st;
2487 	struct scatterlist *sg;
2488 	struct sgt_iter sgt_iter;
2489 	struct page *page;
2490 	unsigned long last_pfn = 0;	/* suppress gcc warning */
2491 	unsigned long max_segment;
2492 	int ret;
2493 
2494 	/* Assert that the object is not currently in any GPU domain. As it
2495 	 * wasn't in the GTT, there shouldn't be any way it could have been in
2496 	 * a GPU cache
2497 	 */
2498 	BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
2499 	BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
2500 
2501 	max_segment = swiotlb_max_size();
2502 	if (!max_segment)
2503 		max_segment = obj->base.size;
2504 
2505 	st = kmalloc(sizeof(*st), M_DRM, GFP_KERNEL);
2506 	if (st == NULL)
2507 		return -ENOMEM;
2508 
2509 	page_count = obj->base.size / PAGE_SIZE;
2510 	if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
2511 		kfree(st);
2512 		return -ENOMEM;
2513 	}
2514 
2515 	/* Get the list of pages out of our struct file.  They'll be pinned
2516 	 * at this point until we release them.
2517 	 *
2518 	 * Fail silently without starting the shrinker
2519 	 */
2520 	vm_obj = obj->base.filp;
2521 	VM_OBJECT_LOCK(vm_obj);
2522 	sg = st->sgl;
2523 	st->nents = 0;
2524 	for (i = 0; i < page_count; i++) {
2525 		page = shmem_read_mapping_page(vm_obj, i);
2526 		if (IS_ERR(page)) {
2527 			i915_gem_shrink(dev_priv,
2528 					page_count,
2529 					I915_SHRINK_BOUND |
2530 					I915_SHRINK_UNBOUND |
2531 					I915_SHRINK_PURGEABLE);
2532 			page = shmem_read_mapping_page(vm_obj, i);
2533 		}
2534 		if (IS_ERR(page)) {
2535 			/* We've tried hard to allocate the memory by reaping
2536 			 * our own buffer, now let the real VM do its job and
2537 			 * go down in flames if truly OOM.
2538 			 */
2539 			page = shmem_read_mapping_page(vm_obj, i);
2540 			if (IS_ERR(page)) {
2541 				ret = PTR_ERR(page);
2542 				goto err_pages;
2543 			}
2544 		}
2545 		if (!i ||
2546 		    sg->length >= max_segment ||
2547 		    page_to_pfn(page) != last_pfn + 1) {
2548 			if (i)
2549 				sg = sg_next(sg);
2550 			st->nents++;
2551 			sg_set_page(sg, page, PAGE_SIZE, 0);
2552 		} else {
2553 			sg->length += PAGE_SIZE;
2554 		}
2555 		last_pfn = page_to_pfn(page);
2556 
2557 		/* Check that the i965g/gm workaround works. */
2558 	}
2559 	if (sg) /* loop terminated early; short sg table */
2560 		sg_mark_end(sg);
2561 	obj->pages = st;
2562 	VM_OBJECT_UNLOCK(vm_obj);
2563 
2564 	ret = i915_gem_gtt_prepare_object(obj);
2565 	if (ret)
2566 		goto err_pages;
2567 
2568 	if (i915_gem_object_needs_bit17_swizzle(obj))
2569 		i915_gem_object_do_bit_17_swizzle(obj);
2570 
2571 	if (i915_gem_object_is_tiled(obj) &&
2572 	    dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES)
2573 		i915_gem_object_pin_pages(obj);
2574 
2575 	return 0;
2576 
2577 err_pages:
2578 	sg_mark_end(sg);
2579 	for_each_sgt_page(page, sgt_iter, st)
2580 	{
2581 		struct vm_page *vmp = (struct vm_page *)page;
2582 		vm_page_busy_wait(vmp, FALSE, "i915gem");
2583 		vm_page_unwire(vmp, 0);
2584 		vm_page_wakeup(vmp);
2585 	}
2586 	VM_OBJECT_UNLOCK(vm_obj);
2587 	sg_free_table(st);
2588 	kfree(st);
2589 
2590 	/* shmemfs first checks if there is enough memory to allocate the page
2591 	 * and reports ENOSPC should there be insufficient, along with the usual
2592 	 * ENOMEM for a genuine allocation failure.
2593 	 *
2594 	 * We use ENOSPC in our driver to mean that we have run out of aperture
2595 	 * space and so want to translate the error from shmemfs back to our
2596 	 * usual understanding of ENOMEM.
2597 	 */
2598 	if (ret == -ENOSPC)
2599 		ret = -ENOMEM;
2600 
2601 	return ret;
2602 }
2603 
2604 /* Ensure that the associated pages are gathered from the backing storage
2605  * and pinned into our object. i915_gem_object_get_pages() may be called
2606  * multiple times before they are released by a single call to
2607  * i915_gem_object_put_pages() - once the pages are no longer referenced
2608  * either as a result of memory pressure (reaping pages under the shrinker)
2609  * or as the object is itself released.
2610  */
2611 int
2612 i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
2613 {
2614 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
2615 	const struct drm_i915_gem_object_ops *ops = obj->ops;
2616 	int ret;
2617 
2618 	if (obj->pages)
2619 		return 0;
2620 
2621 	if (obj->madv != I915_MADV_WILLNEED) {
2622 		DRM_DEBUG("Attempting to obtain a purgeable object\n");
2623 		return -EFAULT;
2624 	}
2625 
2626 	BUG_ON(obj->pages_pin_count);
2627 
2628 	ret = ops->get_pages(obj);
2629 	if (ret)
2630 		return ret;
2631 
2632 	list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list);
2633 
2634 	obj->get_page.sg = obj->pages->sgl;
2635 	obj->get_page.last = 0;
2636 
2637 	return 0;
2638 }
2639 
2640 /* The 'mapping' part of i915_gem_object_pin_map() below */
2641 static void *i915_gem_object_map(const struct drm_i915_gem_object *obj,
2642 				 enum i915_map_type type)
2643 {
2644 	unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
2645 	struct sg_table *sgt = obj->pages;
2646 	struct sgt_iter sgt_iter;
2647 	struct page *page;
2648 	struct page *stack_pages[32];
2649 	struct page **pages = stack_pages;
2650 	unsigned long i = 0;
2651 	pgprot_t pgprot;
2652 	void *addr;
2653 
2654 	/* A single page can always be kmapped */
2655 	if (n_pages == 1 && type == I915_MAP_WB)
2656 		return kmap(sg_page(sgt->sgl));
2657 
2658 	if (n_pages > ARRAY_SIZE(stack_pages)) {
2659 		/* Too big for stack -- allocate temporary array instead */
2660 		pages = drm_malloc_gfp(n_pages, sizeof(*pages), GFP_TEMPORARY);
2661 		if (!pages)
2662 			return NULL;
2663 	}
2664 
2665 	for_each_sgt_page(page, sgt_iter, sgt)
2666 		pages[i++] = page;
2667 
2668 	/* Check that we have the expected number of pages */
2669 	GEM_BUG_ON(i != n_pages);
2670 
2671 	switch (type) {
2672 	case I915_MAP_WB:
2673 		pgprot = PAGE_KERNEL;
2674 		break;
2675 	case I915_MAP_WC:
2676 		pgprot = pgprot_writecombine(PAGE_KERNEL_IO);
2677 		break;
2678 	}
2679 	addr = vmap(pages, n_pages, 0, pgprot);
2680 
2681 	if (pages != stack_pages)
2682 		drm_free_large(pages);
2683 
2684 	return addr;
2685 }
2686 
2687 /* get, pin, and map the pages of the object into kernel space */
2688 void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
2689 			      enum i915_map_type type)
2690 {
2691 	enum i915_map_type has_type;
2692 	bool pinned;
2693 	void *ptr;
2694 	int ret;
2695 
2696 	lockdep_assert_held(&obj->base.dev->struct_mutex);
2697 	GEM_BUG_ON(!i915_gem_object_has_struct_page(obj));
2698 
2699 	ret = i915_gem_object_get_pages(obj);
2700 	if (ret)
2701 		return ERR_PTR(ret);
2702 
2703 	i915_gem_object_pin_pages(obj);
2704 	pinned = obj->pages_pin_count > 1;
2705 
2706 	ptr = ptr_unpack_bits(obj->mapping, has_type);
2707 	if (ptr && has_type != type) {
2708 		if (pinned) {
2709 			ret = -EBUSY;
2710 			goto err;
2711 		}
2712 
2713 		if (is_vmalloc_addr(ptr))
2714 			vunmap(ptr);
2715 		else
2716 			kunmap(kmap_to_page(ptr));
2717 
2718 		ptr = obj->mapping = NULL;
2719 	}
2720 
2721 	if (!ptr) {
2722 		ptr = i915_gem_object_map(obj, type);
2723 		if (!ptr) {
2724 			ret = -ENOMEM;
2725 			goto err;
2726 		}
2727 
2728 		obj->mapping = ptr_pack_bits(ptr, type);
2729 	}
2730 
2731 	return ptr;
2732 
2733 err:
2734 	i915_gem_object_unpin_pages(obj);
2735 	return ERR_PTR(ret);
2736 }
2737 
2738 static void
2739 i915_gem_object_retire__write(struct i915_gem_active *active,
2740 			      struct drm_i915_gem_request *request)
2741 {
2742 	struct drm_i915_gem_object *obj =
2743 		container_of(active, struct drm_i915_gem_object, last_write);
2744 
2745 	intel_fb_obj_flush(obj, true, ORIGIN_CS);
2746 }
2747 
2748 static void
2749 i915_gem_object_retire__read(struct i915_gem_active *active,
2750 			     struct drm_i915_gem_request *request)
2751 {
2752 	int idx = request->engine->id;
2753 	struct drm_i915_gem_object *obj =
2754 		container_of(active, struct drm_i915_gem_object, last_read[idx]);
2755 
2756 	GEM_BUG_ON(!i915_gem_object_has_active_engine(obj, idx));
2757 
2758 	i915_gem_object_clear_active(obj, idx);
2759 	if (i915_gem_object_is_active(obj))
2760 		return;
2761 
2762 	/* Bump our place on the bound list to keep it roughly in LRU order
2763 	 * so that we don't steal from recently used but inactive objects
2764 	 * (unless we are forced to ofc!)
2765 	 */
2766 	if (obj->bind_count)
2767 		list_move_tail(&obj->global_list,
2768 			       &request->i915->mm.bound_list);
2769 
2770 	i915_gem_object_put(obj);
2771 }
2772 
2773 static bool i915_context_is_banned(const struct i915_gem_context *ctx)
2774 {
2775 	unsigned long elapsed;
2776 
2777 	if (ctx->hang_stats.banned)
2778 		return true;
2779 
2780 	elapsed = get_seconds() - ctx->hang_stats.guilty_ts;
2781 	if (ctx->hang_stats.ban_period_seconds &&
2782 	    elapsed <= ctx->hang_stats.ban_period_seconds) {
2783 		DRM_DEBUG("context hanging too fast, banning!\n");
2784 		return true;
2785 	}
2786 
2787 	return false;
2788 }
2789 
2790 static void i915_set_reset_status(struct i915_gem_context *ctx,
2791 				  const bool guilty)
2792 {
2793 	struct i915_ctx_hang_stats *hs = &ctx->hang_stats;
2794 
2795 	if (guilty) {
2796 		hs->banned = i915_context_is_banned(ctx);
2797 		hs->batch_active++;
2798 		hs->guilty_ts = get_seconds();
2799 	} else {
2800 		hs->batch_pending++;
2801 	}
2802 }
2803 
2804 struct drm_i915_gem_request *
2805 i915_gem_find_active_request(struct intel_engine_cs *engine)
2806 {
2807 	struct drm_i915_gem_request *request;
2808 
2809 	/* We are called by the error capture and reset at a random
2810 	 * point in time. In particular, note that neither is crucially
2811 	 * ordered with an interrupt. After a hang, the GPU is dead and we
2812 	 * assume that no more writes can happen (we waited long enough for
2813 	 * all writes that were in transaction to be flushed) - adding an
2814 	 * extra delay for a recent interrupt is pointless. Hence, we do
2815 	 * not need an engine->irq_seqno_barrier() before the seqno reads.
2816 	 */
2817 	list_for_each_entry(request, &engine->request_list, link) {
2818 		if (i915_gem_request_completed(request))
2819 			continue;
2820 
2821 		if (!i915_sw_fence_done(&request->submit))
2822 			break;
2823 
2824 		return request;
2825 	}
2826 
2827 	return NULL;
2828 }
2829 
2830 static void reset_request(struct drm_i915_gem_request *request)
2831 {
2832 	void *vaddr = request->ring->vaddr;
2833 	u32 head;
2834 
2835 	/* As this request likely depends on state from the lost
2836 	 * context, clear out all the user operations leaving the
2837 	 * breadcrumb at the end (so we get the fence notifications).
2838 	 */
2839 	head = request->head;
2840 	if (request->postfix < head) {
2841 		memset(vaddr + head, 0, request->ring->size - head);
2842 		head = 0;
2843 	}
2844 	memset(vaddr + head, 0, request->postfix - head);
2845 }
2846 
2847 static void i915_gem_reset_engine(struct intel_engine_cs *engine)
2848 {
2849 	struct drm_i915_gem_request *request;
2850 	struct i915_gem_context *incomplete_ctx;
2851 	bool ring_hung;
2852 
2853 	if (engine->irq_seqno_barrier)
2854 		engine->irq_seqno_barrier(engine);
2855 
2856 	request = i915_gem_find_active_request(engine);
2857 	if (!request)
2858 		return;
2859 
2860 	ring_hung = engine->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG;
2861 	if (engine->hangcheck.seqno != intel_engine_get_seqno(engine))
2862 		ring_hung = false;
2863 
2864 	i915_set_reset_status(request->ctx, ring_hung);
2865 	if (!ring_hung)
2866 		return;
2867 
2868 	DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 0x%x\n",
2869 			 engine->name, request->fence.seqno);
2870 
2871 	/* Setup the CS to resume from the breadcrumb of the hung request */
2872 	engine->reset_hw(engine, request);
2873 
2874 	/* Users of the default context do not rely on logical state
2875 	 * preserved between batches. They have to emit full state on
2876 	 * every batch and so it is safe to execute queued requests following
2877 	 * the hang.
2878 	 *
2879 	 * Other contexts preserve state, now corrupt. We want to skip all
2880 	 * queued requests that reference the corrupt context.
2881 	 */
2882 	incomplete_ctx = request->ctx;
2883 	if (i915_gem_context_is_default(incomplete_ctx))
2884 		return;
2885 
2886 	list_for_each_entry_continue(request, &engine->request_list, link)
2887 		if (request->ctx == incomplete_ctx)
2888 			reset_request(request);
2889 }
2890 
2891 void i915_gem_reset(struct drm_i915_private *dev_priv)
2892 {
2893 	struct intel_engine_cs *engine;
2894 	enum intel_engine_id id;
2895 
2896 	i915_gem_retire_requests(dev_priv);
2897 
2898 	for_each_engine(engine, dev_priv, id)
2899 		i915_gem_reset_engine(engine);
2900 
2901 	i915_gem_restore_fences(&dev_priv->drm);
2902 
2903 	if (dev_priv->gt.awake) {
2904 		intel_sanitize_gt_powersave(dev_priv);
2905 		intel_enable_gt_powersave(dev_priv);
2906 		if (INTEL_GEN(dev_priv) >= 6)
2907 			gen6_rps_busy(dev_priv);
2908 	}
2909 }
2910 
2911 static void nop_submit_request(struct drm_i915_gem_request *request)
2912 {
2913 }
2914 
2915 static void i915_gem_cleanup_engine(struct intel_engine_cs *engine)
2916 {
2917 	engine->submit_request = nop_submit_request;
2918 
2919 	/* Mark all pending requests as complete so that any concurrent
2920 	 * (lockless) lookup doesn't try and wait upon the request as we
2921 	 * reset it.
2922 	 */
2923 	intel_engine_init_seqno(engine, engine->last_submitted_seqno);
2924 
2925 	/*
2926 	 * Clear the execlists queue up before freeing the requests, as those
2927 	 * are the ones that keep the context and ringbuffer backing objects
2928 	 * pinned in place.
2929 	 */
2930 
2931 	if (i915.enable_execlists) {
2932 		lockmgr(&engine->execlist_lock, LK_EXCLUSIVE);
2933 		INIT_LIST_HEAD(&engine->execlist_queue);
2934 		i915_gem_request_put(engine->execlist_port[0].request);
2935 		i915_gem_request_put(engine->execlist_port[1].request);
2936 		memset(engine->execlist_port, 0, sizeof(engine->execlist_port));
2937 		lockmgr(&engine->execlist_lock, LK_RELEASE);
2938 	}
2939 
2940 	engine->i915->gt.active_engines &= ~intel_engine_flag(engine);
2941 }
2942 
2943 void i915_gem_set_wedged(struct drm_i915_private *dev_priv)
2944 {
2945 	struct intel_engine_cs *engine;
2946 	enum intel_engine_id id;
2947 
2948 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
2949 	set_bit(I915_WEDGED, &dev_priv->gpu_error.flags);
2950 
2951 	i915_gem_context_lost(dev_priv);
2952 	for_each_engine(engine, dev_priv, id)
2953 		i915_gem_cleanup_engine(engine);
2954 	mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0);
2955 
2956 	i915_gem_retire_requests(dev_priv);
2957 }
2958 
2959 static void
2960 i915_gem_retire_work_handler(struct work_struct *work)
2961 {
2962 	struct drm_i915_private *dev_priv =
2963 		container_of(work, typeof(*dev_priv), gt.retire_work.work);
2964 	struct drm_device *dev = &dev_priv->drm;
2965 
2966 	/* Come back later if the device is busy... */
2967 	if (mutex_trylock(&dev->struct_mutex)) {
2968 		i915_gem_retire_requests(dev_priv);
2969 		mutex_unlock(&dev->struct_mutex);
2970 	}
2971 
2972 	/* Keep the retire handler running until we are finally idle.
2973 	 * We do not need to do this test under locking as in the worst-case
2974 	 * we queue the retire worker once too often.
2975 	 */
2976 	if (READ_ONCE(dev_priv->gt.awake)) {
2977 		i915_queue_hangcheck(dev_priv);
2978 		queue_delayed_work(dev_priv->wq,
2979 				   &dev_priv->gt.retire_work,
2980 				   round_jiffies_up_relative(HZ));
2981 	}
2982 }
2983 
2984 static void
2985 i915_gem_idle_work_handler(struct work_struct *work)
2986 {
2987 	struct drm_i915_private *dev_priv =
2988 		container_of(work, typeof(*dev_priv), gt.idle_work.work);
2989 	struct drm_device *dev = &dev_priv->drm;
2990 	struct intel_engine_cs *engine;
2991 	enum intel_engine_id id;
2992 	bool rearm_hangcheck;
2993 
2994 	if (!READ_ONCE(dev_priv->gt.awake))
2995 		return;
2996 
2997 	if (READ_ONCE(dev_priv->gt.active_engines))
2998 		return;
2999 
3000 	rearm_hangcheck =
3001 		cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
3002 
3003 	if (!mutex_trylock(&dev->struct_mutex)) {
3004 		/* Currently busy, come back later */
3005 		mod_delayed_work(dev_priv->wq,
3006 				 &dev_priv->gt.idle_work,
3007 				 msecs_to_jiffies(50));
3008 		goto out_rearm;
3009 	}
3010 
3011 	if (dev_priv->gt.active_engines)
3012 		goto out_unlock;
3013 
3014 	for_each_engine(engine, dev_priv, id)
3015 		i915_gem_batch_pool_fini(&engine->batch_pool);
3016 
3017 	GEM_BUG_ON(!dev_priv->gt.awake);
3018 	dev_priv->gt.awake = false;
3019 	rearm_hangcheck = false;
3020 
3021 	if (INTEL_GEN(dev_priv) >= 6)
3022 		gen6_rps_idle(dev_priv);
3023 	intel_runtime_pm_put(dev_priv);
3024 out_unlock:
3025 	mutex_unlock(&dev->struct_mutex);
3026 
3027 out_rearm:
3028 	if (rearm_hangcheck) {
3029 		GEM_BUG_ON(!dev_priv->gt.awake);
3030 		i915_queue_hangcheck(dev_priv);
3031 	}
3032 }
3033 
3034 void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
3035 {
3036 	struct drm_i915_gem_object *obj = to_intel_bo(gem);
3037 	struct drm_i915_file_private *fpriv = file->driver_priv;
3038 	struct i915_vma *vma, *vn;
3039 
3040 	mutex_lock(&obj->base.dev->struct_mutex);
3041 	list_for_each_entry_safe(vma, vn, &obj->vma_list, obj_link)
3042 		if (vma->vm->file == fpriv)
3043 			i915_vma_close(vma);
3044 	mutex_unlock(&obj->base.dev->struct_mutex);
3045 }
3046 
3047 /**
3048  * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
3049  * @dev: drm device pointer
3050  * @data: ioctl data blob
3051  * @file: drm file pointer
3052  *
3053  * Returns 0 if successful, else an error is returned with the remaining time in
3054  * the timeout parameter.
3055  *  -ETIME: object is still busy after timeout
3056  *  -ERESTARTSYS: signal interrupted the wait
3057  *  -ENONENT: object doesn't exist
3058  * Also possible, but rare:
3059  *  -EAGAIN: GPU wedged
3060  *  -ENOMEM: damn
3061  *  -ENODEV: Internal IRQ fail
3062  *  -E?: The add request failed
3063  *
3064  * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
3065  * non-zero timeout parameter the wait ioctl will wait for the given number of
3066  * nanoseconds on an object becoming unbusy. Since the wait itself does so
3067  * without holding struct_mutex the object may become re-busied before this
3068  * function completes. A similar but shorter * race condition exists in the busy
3069  * ioctl
3070  */
3071 int
3072 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
3073 {
3074 	struct drm_i915_gem_wait *args = data;
3075 	struct intel_rps_client *rps = to_rps_client(file);
3076 	struct drm_i915_gem_object *obj;
3077 	unsigned long active;
3078 	int idx, ret = 0;
3079 
3080 	if (args->flags != 0)
3081 		return -EINVAL;
3082 
3083 	obj = i915_gem_object_lookup(file, args->bo_handle);
3084 	if (!obj)
3085 		return -ENOENT;
3086 
3087 	active = __I915_BO_ACTIVE(obj);
3088 	for_each_active(active, idx) {
3089 		s64 *timeout = args->timeout_ns >= 0 ? &args->timeout_ns : NULL;
3090 		ret = i915_gem_active_wait_unlocked(&obj->last_read[idx],
3091 						    I915_WAIT_INTERRUPTIBLE,
3092 						    timeout, rps);
3093 		if (ret)
3094 			break;
3095 	}
3096 
3097 	i915_gem_object_put_unlocked(obj);
3098 	return ret;
3099 }
3100 
3101 static void __i915_vma_iounmap(struct i915_vma *vma)
3102 {
3103 	GEM_BUG_ON(i915_vma_is_pinned(vma));
3104 
3105 	if (vma->iomap == NULL)
3106 		return;
3107 
3108 	io_mapping_unmap(vma->iomap);
3109 	vma->iomap = NULL;
3110 }
3111 
3112 int i915_vma_unbind(struct i915_vma *vma)
3113 {
3114 	struct drm_i915_gem_object *obj = vma->obj;
3115 	unsigned long active;
3116 	int ret;
3117 
3118 	/* First wait upon any activity as retiring the request may
3119 	 * have side-effects such as unpinning or even unbinding this vma.
3120 	 */
3121 	active = i915_vma_get_active(vma);
3122 	if (active) {
3123 		int idx;
3124 
3125 		/* When a closed VMA is retired, it is unbound - eek.
3126 		 * In order to prevent it from being recursively closed,
3127 		 * take a pin on the vma so that the second unbind is
3128 		 * aborted.
3129 		 */
3130 		__i915_vma_pin(vma);
3131 
3132 		for_each_active(active, idx) {
3133 			ret = i915_gem_active_retire(&vma->last_read[idx],
3134 						   &vma->vm->dev->struct_mutex);
3135 			if (ret)
3136 				break;
3137 		}
3138 
3139 		__i915_vma_unpin(vma);
3140 		if (ret)
3141 			return ret;
3142 
3143 		GEM_BUG_ON(i915_vma_is_active(vma));
3144 	}
3145 
3146 	if (i915_vma_is_pinned(vma))
3147 		return -EBUSY;
3148 
3149 	if (!drm_mm_node_allocated(&vma->node))
3150 		goto destroy;
3151 
3152 	GEM_BUG_ON(obj->bind_count == 0);
3153 	GEM_BUG_ON(!obj->pages);
3154 
3155 	if (i915_vma_is_map_and_fenceable(vma)) {
3156 		/* release the fence reg _after_ flushing */
3157 		ret = i915_vma_put_fence(vma);
3158 		if (ret)
3159 			return ret;
3160 
3161 		/* Force a pagefault for domain tracking on next user access */
3162 		i915_gem_release_mmap(obj);
3163 
3164 		__i915_vma_iounmap(vma);
3165 		vma->flags &= ~I915_VMA_CAN_FENCE;
3166 	}
3167 
3168 	if (likely(!vma->vm->closed)) {
3169 		trace_i915_vma_unbind(vma);
3170 		vma->vm->unbind_vma(vma);
3171 	}
3172 	vma->flags &= ~(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND);
3173 
3174 	drm_mm_remove_node(&vma->node);
3175 	list_move_tail(&vma->vm_link, &vma->vm->unbound_list);
3176 
3177 	if (vma->pages != obj->pages) {
3178 		GEM_BUG_ON(!vma->pages);
3179 		sg_free_table(vma->pages);
3180 		kfree(vma->pages);
3181 	}
3182 	vma->pages = NULL;
3183 
3184 	/* Since the unbound list is global, only move to that list if
3185 	 * no more VMAs exist. */
3186 	if (--obj->bind_count == 0)
3187 		list_move_tail(&obj->global_list,
3188 			       &to_i915(obj->base.dev)->mm.unbound_list);
3189 
3190 	/* And finally now the object is completely decoupled from this vma,
3191 	 * we can drop its hold on the backing storage and allow it to be
3192 	 * reaped by the shrinker.
3193 	 */
3194 	i915_gem_object_unpin_pages(obj);
3195 
3196 destroy:
3197 	if (unlikely(i915_vma_is_closed(vma)))
3198 		i915_vma_destroy(vma);
3199 
3200 	return 0;
3201 }
3202 
3203 int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv,
3204 			   unsigned int flags)
3205 {
3206 	struct intel_engine_cs *engine;
3207 	enum intel_engine_id id;
3208 	int ret;
3209 
3210 	for_each_engine(engine, dev_priv, id) {
3211 		if (engine->last_context == NULL)
3212 			continue;
3213 
3214 		ret = intel_engine_idle(engine, flags);
3215 		if (ret)
3216 			return ret;
3217 	}
3218 
3219 	return 0;
3220 }
3221 
3222 static bool i915_gem_valid_gtt_space(struct i915_vma *vma,
3223 				     unsigned long cache_level)
3224 {
3225 	struct drm_mm_node *gtt_space = &vma->node;
3226 	struct drm_mm_node *other;
3227 
3228 	/*
3229 	 * On some machines we have to be careful when putting differing types
3230 	 * of snoopable memory together to avoid the prefetcher crossing memory
3231 	 * domains and dying. During vm initialisation, we decide whether or not
3232 	 * these constraints apply and set the drm_mm.color_adjust
3233 	 * appropriately.
3234 	 */
3235 	if (vma->vm->mm.color_adjust == NULL)
3236 		return true;
3237 
3238 	if (!drm_mm_node_allocated(gtt_space))
3239 		return true;
3240 
3241 	if (list_empty(&gtt_space->node_list))
3242 		return true;
3243 
3244 	other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list);
3245 	if (other->allocated && !other->hole_follows && other->color != cache_level)
3246 		return false;
3247 
3248 	other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list);
3249 	if (other->allocated && !gtt_space->hole_follows && other->color != cache_level)
3250 		return false;
3251 
3252 	return true;
3253 }
3254 
3255 /**
3256  * i915_vma_insert - finds a slot for the vma in its address space
3257  * @vma: the vma
3258  * @size: requested size in bytes (can be larger than the VMA)
3259  * @alignment: required alignment
3260  * @flags: mask of PIN_* flags to use
3261  *
3262  * First we try to allocate some free space that meets the requirements for
3263  * the VMA. Failiing that, if the flags permit, it will evict an old VMA,
3264  * preferrably the oldest idle entry to make room for the new VMA.
3265  *
3266  * Returns:
3267  * 0 on success, negative error code otherwise.
3268  */
3269 static int
3270 i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
3271 {
3272 	struct drm_i915_private *dev_priv = to_i915(vma->vm->dev);
3273 	struct drm_i915_gem_object *obj = vma->obj;
3274 	u64 start, end;
3275 	int ret;
3276 
3277 	GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND));
3278 	GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
3279 
3280 	size = max(size, vma->size);
3281 	if (flags & PIN_MAPPABLE)
3282 		size = i915_gem_get_ggtt_size(dev_priv, size,
3283 					      i915_gem_object_get_tiling(obj));
3284 
3285 	alignment = max(max(alignment, vma->display_alignment),
3286 			i915_gem_get_ggtt_alignment(dev_priv, size,
3287 						    i915_gem_object_get_tiling(obj),
3288 						    flags & PIN_MAPPABLE));
3289 
3290 	start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
3291 
3292 	end = vma->vm->total;
3293 	if (flags & PIN_MAPPABLE)
3294 		end = min_t(u64, end, dev_priv->ggtt.mappable_end);
3295 	if (flags & PIN_ZONE_4G)
3296 		end = min_t(u64, end, (1ULL << 32) - PAGE_SIZE);
3297 
3298 	/* If binding the object/GGTT view requires more space than the entire
3299 	 * aperture has, reject it early before evicting everything in a vain
3300 	 * attempt to find space.
3301 	 */
3302 	if (size > end) {
3303 		DRM_DEBUG("Attempting to bind an object larger than the aperture: request=%llu [object=%zd] > %s aperture=%llu\n",
3304 			  size, obj->base.size,
3305 			  flags & PIN_MAPPABLE ? "mappable" : "total",
3306 			  end);
3307 		return -E2BIG;
3308 	}
3309 
3310 	ret = i915_gem_object_get_pages(obj);
3311 	if (ret)
3312 		return ret;
3313 
3314 	i915_gem_object_pin_pages(obj);
3315 
3316 	if (flags & PIN_OFFSET_FIXED) {
3317 		u64 offset = flags & PIN_OFFSET_MASK;
3318 		if (offset & (alignment - 1) || offset > end - size) {
3319 			ret = -EINVAL;
3320 			goto err_unpin;
3321 		}
3322 
3323 		vma->node.start = offset;
3324 		vma->node.size = size;
3325 		vma->node.color = obj->cache_level;
3326 		ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node);
3327 		if (ret) {
3328 			ret = i915_gem_evict_for_vma(vma);
3329 			if (ret == 0)
3330 				ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node);
3331 			if (ret)
3332 				goto err_unpin;
3333 		}
3334 	} else {
3335 		u32 search_flag, alloc_flag;
3336 
3337 		if (flags & PIN_HIGH) {
3338 			search_flag = DRM_MM_SEARCH_BELOW;
3339 			alloc_flag = DRM_MM_CREATE_TOP;
3340 		} else {
3341 			search_flag = DRM_MM_SEARCH_DEFAULT;
3342 			alloc_flag = DRM_MM_CREATE_DEFAULT;
3343 		}
3344 
3345 		/* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks,
3346 		 * so we know that we always have a minimum alignment of 4096.
3347 		 * The drm_mm range manager is optimised to return results
3348 		 * with zero alignment, so where possible use the optimal
3349 		 * path.
3350 		 */
3351 		if (alignment <= 4096)
3352 			alignment = 0;
3353 
3354 search_free:
3355 		ret = drm_mm_insert_node_in_range_generic(&vma->vm->mm,
3356 							  &vma->node,
3357 							  size, alignment,
3358 							  obj->cache_level,
3359 							  start, end,
3360 							  search_flag,
3361 							  alloc_flag);
3362 		if (ret) {
3363 			ret = i915_gem_evict_something(vma->vm, size, alignment,
3364 						       obj->cache_level,
3365 						       start, end,
3366 						       flags);
3367 			if (ret == 0)
3368 				goto search_free;
3369 
3370 			goto err_unpin;
3371 		}
3372 
3373 		GEM_BUG_ON(vma->node.start < start);
3374 		GEM_BUG_ON(vma->node.start + vma->node.size > end);
3375 	}
3376 	GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level));
3377 
3378 	list_move_tail(&obj->global_list, &dev_priv->mm.bound_list);
3379 	list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
3380 	obj->bind_count++;
3381 
3382 	return 0;
3383 
3384 err_unpin:
3385 	i915_gem_object_unpin_pages(obj);
3386 	return ret;
3387 }
3388 
3389 bool
3390 i915_gem_clflush_object(struct drm_i915_gem_object *obj,
3391 			bool force)
3392 {
3393 	/* If we don't have a page list set up, then we're not pinned
3394 	 * to GPU, and we can ignore the cache flush because it'll happen
3395 	 * again at bind time.
3396 	 */
3397 	if (obj->pages == NULL)
3398 		return false;
3399 
3400 	/*
3401 	 * Stolen memory is always coherent with the GPU as it is explicitly
3402 	 * marked as wc by the system, or the system is cache-coherent.
3403 	 */
3404 	if (obj->stolen || obj->phys_handle)
3405 		return false;
3406 
3407 	/* If the GPU is snooping the contents of the CPU cache,
3408 	 * we do not need to manually clear the CPU cache lines.  However,
3409 	 * the caches are only snooped when the render cache is
3410 	 * flushed/invalidated.  As we always have to emit invalidations
3411 	 * and flushes when moving into and out of the RENDER domain, correct
3412 	 * snooping behaviour occurs naturally as the result of our domain
3413 	 * tracking.
3414 	 */
3415 	if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) {
3416 		obj->cache_dirty = true;
3417 		return false;
3418 	}
3419 
3420 	trace_i915_gem_object_clflush(obj);
3421 	drm_clflush_sg(obj->pages);
3422 	obj->cache_dirty = false;
3423 
3424 	return true;
3425 }
3426 
3427 /** Flushes the GTT write domain for the object if it's dirty. */
3428 static void
3429 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
3430 {
3431 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
3432 
3433 	if (obj->base.write_domain != I915_GEM_DOMAIN_GTT)
3434 		return;
3435 
3436 	/* No actual flushing is required for the GTT write domain.  Writes
3437 	 * to it "immediately" go to main memory as far as we know, so there's
3438 	 * no chipset flush.  It also doesn't land in render cache.
3439 	 *
3440 	 * However, we do have to enforce the order so that all writes through
3441 	 * the GTT land before any writes to the device, such as updates to
3442 	 * the GATT itself.
3443 	 *
3444 	 * We also have to wait a bit for the writes to land from the GTT.
3445 	 * An uncached read (i.e. mmio) seems to be ideal for the round-trip
3446 	 * timing. This issue has only been observed when switching quickly
3447 	 * between GTT writes and CPU reads from inside the kernel on recent hw,
3448 	 * and it appears to only affect discrete GTT blocks (i.e. on LLC
3449 	 * system agents we cannot reproduce this behaviour).
3450 	 */
3451 	wmb();
3452 	if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv))
3453 		POSTING_READ(RING_ACTHD(dev_priv->engine[RCS]->mmio_base));
3454 
3455 	intel_fb_obj_flush(obj, false, write_origin(obj, I915_GEM_DOMAIN_GTT));
3456 
3457 	obj->base.write_domain = 0;
3458 	trace_i915_gem_object_change_domain(obj,
3459 					    obj->base.read_domains,
3460 					    I915_GEM_DOMAIN_GTT);
3461 }
3462 
3463 /** Flushes the CPU write domain for the object if it's dirty. */
3464 static void
3465 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
3466 {
3467 	if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
3468 		return;
3469 
3470 	if (i915_gem_clflush_object(obj, obj->pin_display))
3471 		i915_gem_chipset_flush(to_i915(obj->base.dev));
3472 
3473 	intel_fb_obj_flush(obj, false, ORIGIN_CPU);
3474 
3475 	obj->base.write_domain = 0;
3476 	trace_i915_gem_object_change_domain(obj,
3477 					    obj->base.read_domains,
3478 					    I915_GEM_DOMAIN_CPU);
3479 }
3480 
3481 static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
3482 {
3483 	struct i915_vma *vma;
3484 
3485 	list_for_each_entry(vma, &obj->vma_list, obj_link) {
3486 		if (!i915_vma_is_ggtt(vma))
3487 			continue;
3488 
3489 		if (i915_vma_is_active(vma))
3490 			continue;
3491 
3492 		if (!drm_mm_node_allocated(&vma->node))
3493 			continue;
3494 
3495 		list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
3496 	}
3497 }
3498 
3499 /**
3500  * Moves a single object to the GTT read, and possibly write domain.
3501  * @obj: object to act on
3502  * @write: ask for write access or read only
3503  *
3504  * This function returns when the move is complete, including waiting on
3505  * flushes to occur.
3506  */
3507 int
3508 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
3509 {
3510 	uint32_t old_write_domain, old_read_domains;
3511 	int ret;
3512 
3513 	ret = i915_gem_object_wait_rendering(obj, !write);
3514 	if (ret)
3515 		return ret;
3516 
3517 	if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
3518 		return 0;
3519 
3520 	/* Flush and acquire obj->pages so that we are coherent through
3521 	 * direct access in memory with previous cached writes through
3522 	 * shmemfs and that our cache domain tracking remains valid.
3523 	 * For example, if the obj->filp was moved to swap without us
3524 	 * being notified and releasing the pages, we would mistakenly
3525 	 * continue to assume that the obj remained out of the CPU cached
3526 	 * domain.
3527 	 */
3528 	ret = i915_gem_object_get_pages(obj);
3529 	if (ret)
3530 		return ret;
3531 
3532 	i915_gem_object_flush_cpu_write_domain(obj);
3533 
3534 	/* Serialise direct access to this object with the barriers for
3535 	 * coherent writes from the GPU, by effectively invalidating the
3536 	 * GTT domain upon first access.
3537 	 */
3538 	if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
3539 		mb();
3540 
3541 	old_write_domain = obj->base.write_domain;
3542 	old_read_domains = obj->base.read_domains;
3543 
3544 	/* It should now be out of any other write domains, and we can update
3545 	 * the domain values for our changes.
3546 	 */
3547 	BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
3548 	obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
3549 	if (write) {
3550 		obj->base.read_domains = I915_GEM_DOMAIN_GTT;
3551 		obj->base.write_domain = I915_GEM_DOMAIN_GTT;
3552 		obj->dirty = 1;
3553 	}
3554 
3555 	trace_i915_gem_object_change_domain(obj,
3556 					    old_read_domains,
3557 					    old_write_domain);
3558 
3559 	/* And bump the LRU for this access */
3560 	i915_gem_object_bump_inactive_ggtt(obj);
3561 
3562 	return 0;
3563 }
3564 
3565 /**
3566  * Changes the cache-level of an object across all VMA.
3567  * @obj: object to act on
3568  * @cache_level: new cache level to set for the object
3569  *
3570  * After this function returns, the object will be in the new cache-level
3571  * across all GTT and the contents of the backing storage will be coherent,
3572  * with respect to the new cache-level. In order to keep the backing storage
3573  * coherent for all users, we only allow a single cache level to be set
3574  * globally on the object and prevent it from being changed whilst the
3575  * hardware is reading from the object. That is if the object is currently
3576  * on the scanout it will be set to uncached (or equivalent display
3577  * cache coherency) and all non-MOCS GPU access will also be uncached so
3578  * that all direct access to the scanout remains coherent.
3579  */
3580 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
3581 				    enum i915_cache_level cache_level)
3582 {
3583 	struct i915_vma *vma;
3584 	int ret = 0;
3585 
3586 	if (obj->cache_level == cache_level)
3587 		goto out;
3588 
3589 	/* Inspect the list of currently bound VMA and unbind any that would
3590 	 * be invalid given the new cache-level. This is principally to
3591 	 * catch the issue of the CS prefetch crossing page boundaries and
3592 	 * reading an invalid PTE on older architectures.
3593 	 */
3594 restart:
3595 	list_for_each_entry(vma, &obj->vma_list, obj_link) {
3596 		if (!drm_mm_node_allocated(&vma->node))
3597 			continue;
3598 
3599 		if (i915_vma_is_pinned(vma)) {
3600 			DRM_DEBUG("can not change the cache level of pinned objects\n");
3601 			return -EBUSY;
3602 		}
3603 
3604 		if (i915_gem_valid_gtt_space(vma, cache_level))
3605 			continue;
3606 
3607 		ret = i915_vma_unbind(vma);
3608 		if (ret)
3609 			return ret;
3610 
3611 		/* As unbinding may affect other elements in the
3612 		 * obj->vma_list (due to side-effects from retiring
3613 		 * an active vma), play safe and restart the iterator.
3614 		 */
3615 		goto restart;
3616 	}
3617 
3618 	/* We can reuse the existing drm_mm nodes but need to change the
3619 	 * cache-level on the PTE. We could simply unbind them all and
3620 	 * rebind with the correct cache-level on next use. However since
3621 	 * we already have a valid slot, dma mapping, pages etc, we may as
3622 	 * rewrite the PTE in the belief that doing so tramples upon less
3623 	 * state and so involves less work.
3624 	 */
3625 	if (obj->bind_count) {
3626 		/* Before we change the PTE, the GPU must not be accessing it.
3627 		 * If we wait upon the object, we know that all the bound
3628 		 * VMA are no longer active.
3629 		 */
3630 		ret = i915_gem_object_wait_rendering(obj, false);
3631 		if (ret)
3632 			return ret;
3633 
3634 		if (!HAS_LLC(obj->base.dev) && cache_level != I915_CACHE_NONE) {
3635 			/* Access to snoopable pages through the GTT is
3636 			 * incoherent and on some machines causes a hard
3637 			 * lockup. Relinquish the CPU mmaping to force
3638 			 * userspace to refault in the pages and we can
3639 			 * then double check if the GTT mapping is still
3640 			 * valid for that pointer access.
3641 			 */
3642 			i915_gem_release_mmap(obj);
3643 
3644 			/* As we no longer need a fence for GTT access,
3645 			 * we can relinquish it now (and so prevent having
3646 			 * to steal a fence from someone else on the next
3647 			 * fence request). Note GPU activity would have
3648 			 * dropped the fence as all snoopable access is
3649 			 * supposed to be linear.
3650 			 */
3651 			list_for_each_entry(vma, &obj->vma_list, obj_link) {
3652 				ret = i915_vma_put_fence(vma);
3653 				if (ret)
3654 					return ret;
3655 			}
3656 		} else {
3657 			/* We either have incoherent backing store and
3658 			 * so no GTT access or the architecture is fully
3659 			 * coherent. In such cases, existing GTT mmaps
3660 			 * ignore the cache bit in the PTE and we can
3661 			 * rewrite it without confusing the GPU or having
3662 			 * to force userspace to fault back in its mmaps.
3663 			 */
3664 		}
3665 
3666 		list_for_each_entry(vma, &obj->vma_list, obj_link) {
3667 			if (!drm_mm_node_allocated(&vma->node))
3668 				continue;
3669 
3670 			ret = i915_vma_bind(vma, cache_level, PIN_UPDATE);
3671 			if (ret)
3672 				return ret;
3673 		}
3674 	}
3675 
3676 	list_for_each_entry(vma, &obj->vma_list, obj_link)
3677 		vma->node.color = cache_level;
3678 	obj->cache_level = cache_level;
3679 
3680 out:
3681 	/* Flush the dirty CPU caches to the backing storage so that the
3682 	 * object is now coherent at its new cache level (with respect
3683 	 * to the access domain).
3684 	 */
3685 	if (obj->cache_dirty && cpu_write_needs_clflush(obj)) {
3686 		if (i915_gem_clflush_object(obj, true))
3687 			i915_gem_chipset_flush(to_i915(obj->base.dev));
3688 	}
3689 
3690 	return 0;
3691 }
3692 
3693 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
3694 			       struct drm_file *file)
3695 {
3696 	struct drm_i915_gem_caching *args = data;
3697 	struct drm_i915_gem_object *obj;
3698 
3699 	obj = i915_gem_object_lookup(file, args->handle);
3700 	if (!obj)
3701 		return -ENOENT;
3702 
3703 	switch (obj->cache_level) {
3704 	case I915_CACHE_LLC:
3705 	case I915_CACHE_L3_LLC:
3706 		args->caching = I915_CACHING_CACHED;
3707 		break;
3708 
3709 	case I915_CACHE_WT:
3710 		args->caching = I915_CACHING_DISPLAY;
3711 		break;
3712 
3713 	default:
3714 		args->caching = I915_CACHING_NONE;
3715 		break;
3716 	}
3717 
3718 	i915_gem_object_put_unlocked(obj);
3719 	return 0;
3720 }
3721 
3722 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
3723 			       struct drm_file *file)
3724 {
3725 	struct drm_i915_private *dev_priv = to_i915(dev);
3726 	struct drm_i915_gem_caching *args = data;
3727 	struct drm_i915_gem_object *obj;
3728 	enum i915_cache_level level;
3729 	int ret;
3730 
3731 	switch (args->caching) {
3732 	case I915_CACHING_NONE:
3733 		level = I915_CACHE_NONE;
3734 		break;
3735 	case I915_CACHING_CACHED:
3736 		/*
3737 		 * Due to a HW issue on BXT A stepping, GPU stores via a
3738 		 * snooped mapping may leave stale data in a corresponding CPU
3739 		 * cacheline, whereas normally such cachelines would get
3740 		 * invalidated.
3741 		 */
3742 		if (!HAS_LLC(dev) && !HAS_SNOOP(dev))
3743 			return -ENODEV;
3744 
3745 		level = I915_CACHE_LLC;
3746 		break;
3747 	case I915_CACHING_DISPLAY:
3748 		level = HAS_WT(dev_priv) ? I915_CACHE_WT : I915_CACHE_NONE;
3749 		break;
3750 	default:
3751 		return -EINVAL;
3752 	}
3753 
3754 	intel_runtime_pm_get(dev_priv);
3755 
3756 	ret = i915_mutex_lock_interruptible(dev);
3757 	if (ret)
3758 		goto rpm_put;
3759 
3760 	obj = i915_gem_object_lookup(file, args->handle);
3761 	if (!obj) {
3762 		ret = -ENOENT;
3763 		goto unlock;
3764 	}
3765 
3766 	ret = i915_gem_object_set_cache_level(obj, level);
3767 
3768 	i915_gem_object_put(obj);
3769 unlock:
3770 	mutex_unlock(&dev->struct_mutex);
3771 rpm_put:
3772 	intel_runtime_pm_put(dev_priv);
3773 
3774 	return ret;
3775 }
3776 
3777 /*
3778  * Prepare buffer for display plane (scanout, cursors, etc).
3779  * Can be called from an uninterruptible phase (modesetting) and allows
3780  * any flushes to be pipelined (for pageflips).
3781  */
3782 struct i915_vma *
3783 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
3784 				     u32 alignment,
3785 				     const struct i915_ggtt_view *view)
3786 {
3787 	struct i915_vma *vma;
3788 	u32 old_read_domains, old_write_domain;
3789 	int ret;
3790 
3791 	/* Mark the pin_display early so that we account for the
3792 	 * display coherency whilst setting up the cache domains.
3793 	 */
3794 	obj->pin_display++;
3795 
3796 	/* The display engine is not coherent with the LLC cache on gen6.  As
3797 	 * a result, we make sure that the pinning that is about to occur is
3798 	 * done with uncached PTEs. This is lowest common denominator for all
3799 	 * chipsets.
3800 	 *
3801 	 * However for gen6+, we could do better by using the GFDT bit instead
3802 	 * of uncaching, which would allow us to flush all the LLC-cached data
3803 	 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
3804 	 */
3805 	ret = i915_gem_object_set_cache_level(obj,
3806 					      HAS_WT(to_i915(obj->base.dev)) ?
3807 					      I915_CACHE_WT : I915_CACHE_NONE);
3808 	if (ret) {
3809 		vma = ERR_PTR(ret);
3810 		goto err_unpin_display;
3811 	}
3812 
3813 	/* As the user may map the buffer once pinned in the display plane
3814 	 * (e.g. libkms for the bootup splash), we have to ensure that we
3815 	 * always use map_and_fenceable for all scanout buffers. However,
3816 	 * it may simply be too big to fit into mappable, in which case
3817 	 * put it anyway and hope that userspace can cope (but always first
3818 	 * try to preserve the existing ABI).
3819 	 */
3820 	vma = ERR_PTR(-ENOSPC);
3821 	if (view->type == I915_GGTT_VIEW_NORMAL)
3822 		vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
3823 					       PIN_MAPPABLE | PIN_NONBLOCK);
3824 	if (IS_ERR(vma))
3825 		vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, 0);
3826 	if (IS_ERR(vma))
3827 		goto err_unpin_display;
3828 
3829 	vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
3830 
3831 	WARN_ON(obj->pin_display > i915_vma_pin_count(vma));
3832 
3833 	i915_gem_object_flush_cpu_write_domain(obj);
3834 
3835 	old_write_domain = obj->base.write_domain;
3836 	old_read_domains = obj->base.read_domains;
3837 
3838 	/* It should now be out of any other write domains, and we can update
3839 	 * the domain values for our changes.
3840 	 */
3841 	obj->base.write_domain = 0;
3842 	obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
3843 
3844 	trace_i915_gem_object_change_domain(obj,
3845 					    old_read_domains,
3846 					    old_write_domain);
3847 
3848 	return vma;
3849 
3850 err_unpin_display:
3851 	obj->pin_display--;
3852 	return vma;
3853 }
3854 
3855 void
3856 i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
3857 {
3858 	if (WARN_ON(vma->obj->pin_display == 0))
3859 		return;
3860 
3861 	if (--vma->obj->pin_display == 0)
3862 		vma->display_alignment = 0;
3863 
3864 	/* Bump the LRU to try and avoid premature eviction whilst flipping  */
3865 	if (!i915_vma_is_active(vma))
3866 		list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
3867 
3868 	i915_vma_unpin(vma);
3869 	WARN_ON(vma->obj->pin_display > i915_vma_pin_count(vma));
3870 }
3871 
3872 /**
3873  * Moves a single object to the CPU read, and possibly write domain.
3874  * @obj: object to act on
3875  * @write: requesting write or read-only access
3876  *
3877  * This function returns when the move is complete, including waiting on
3878  * flushes to occur.
3879  */
3880 int
3881 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
3882 {
3883 	uint32_t old_write_domain, old_read_domains;
3884 	int ret;
3885 
3886 	ret = i915_gem_object_wait_rendering(obj, !write);
3887 	if (ret)
3888 		return ret;
3889 
3890 	if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
3891 		return 0;
3892 
3893 	i915_gem_object_flush_gtt_write_domain(obj);
3894 
3895 	old_write_domain = obj->base.write_domain;
3896 	old_read_domains = obj->base.read_domains;
3897 
3898 	/* Flush the CPU cache if it's still invalid. */
3899 	if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
3900 		i915_gem_clflush_object(obj, false);
3901 
3902 		obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
3903 	}
3904 
3905 	/* It should now be out of any other write domains, and we can update
3906 	 * the domain values for our changes.
3907 	 */
3908 	BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
3909 
3910 	/* If we're writing through the CPU, then the GPU read domains will
3911 	 * need to be invalidated at next use.
3912 	 */
3913 	if (write) {
3914 		obj->base.read_domains = I915_GEM_DOMAIN_CPU;
3915 		obj->base.write_domain = I915_GEM_DOMAIN_CPU;
3916 	}
3917 
3918 	trace_i915_gem_object_change_domain(obj,
3919 					    old_read_domains,
3920 					    old_write_domain);
3921 
3922 	return 0;
3923 }
3924 
3925 /* Throttle our rendering by waiting until the ring has completed our requests
3926  * emitted over 20 msec ago.
3927  *
3928  * Note that if we were to use the current jiffies each time around the loop,
3929  * we wouldn't escape the function with any frames outstanding if the time to
3930  * render a frame was over 20ms.
3931  *
3932  * This should get us reasonable parallelism between CPU and GPU but also
3933  * relatively low latency when blocking on a particular request to finish.
3934  */
3935 static int
3936 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
3937 {
3938 	struct drm_i915_private *dev_priv = to_i915(dev);
3939 	struct drm_i915_file_private *file_priv = file->driver_priv;
3940 	unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES;
3941 	struct drm_i915_gem_request *request, *target = NULL;
3942 	int ret;
3943 
3944 	ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
3945 	if (ret)
3946 		return ret;
3947 
3948 	/* ABI: return -EIO if already wedged */
3949 	if (i915_terminally_wedged(&dev_priv->gpu_error))
3950 		return -EIO;
3951 
3952 	lockmgr(&file_priv->mm.lock, LK_EXCLUSIVE);
3953 	list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
3954 		if (time_after_eq(request->emitted_jiffies, recent_enough))
3955 			break;
3956 
3957 		/*
3958 		 * Note that the request might not have been submitted yet.
3959 		 * In which case emitted_jiffies will be zero.
3960 		 */
3961 		if (!request->emitted_jiffies)
3962 			continue;
3963 
3964 		target = request;
3965 	}
3966 	if (target)
3967 		i915_gem_request_get(target);
3968 	lockmgr(&file_priv->mm.lock, LK_RELEASE);
3969 
3970 	if (target == NULL)
3971 		return 0;
3972 
3973 	ret = i915_wait_request(target, I915_WAIT_INTERRUPTIBLE, NULL, NULL);
3974 	i915_gem_request_put(target);
3975 
3976 	return ret;
3977 }
3978 
3979 static bool
3980 i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
3981 {
3982 	if (!drm_mm_node_allocated(&vma->node))
3983 		return false;
3984 
3985 	if (vma->node.size < size)
3986 		return true;
3987 
3988 	if (alignment && vma->node.start & (alignment - 1))
3989 		return true;
3990 
3991 	if (flags & PIN_MAPPABLE && !i915_vma_is_map_and_fenceable(vma))
3992 		return true;
3993 
3994 	if (flags & PIN_OFFSET_BIAS &&
3995 	    vma->node.start < (flags & PIN_OFFSET_MASK))
3996 		return true;
3997 
3998 	if (flags & PIN_OFFSET_FIXED &&
3999 	    vma->node.start != (flags & PIN_OFFSET_MASK))
4000 		return true;
4001 
4002 	return false;
4003 }
4004 
4005 void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
4006 {
4007 	struct drm_i915_gem_object *obj = vma->obj;
4008 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
4009 	bool mappable, fenceable;
4010 	u32 fence_size, fence_alignment;
4011 
4012 	fence_size = i915_gem_get_ggtt_size(dev_priv,
4013 					    vma->size,
4014 					    i915_gem_object_get_tiling(obj));
4015 	fence_alignment = i915_gem_get_ggtt_alignment(dev_priv,
4016 						      vma->size,
4017 						      i915_gem_object_get_tiling(obj),
4018 						      true);
4019 
4020 	fenceable = (vma->node.size == fence_size &&
4021 		     (vma->node.start & (fence_alignment - 1)) == 0);
4022 
4023 	mappable = (vma->node.start + fence_size <=
4024 		    dev_priv->ggtt.mappable_end);
4025 
4026 	if (mappable && fenceable)
4027 		vma->flags |= I915_VMA_CAN_FENCE;
4028 	else
4029 		vma->flags &= ~I915_VMA_CAN_FENCE;
4030 }
4031 
4032 int __i915_vma_do_pin(struct i915_vma *vma,
4033 		      u64 size, u64 alignment, u64 flags)
4034 {
4035 	unsigned int bound = vma->flags;
4036 	int ret;
4037 
4038 	GEM_BUG_ON((flags & (PIN_GLOBAL | PIN_USER)) == 0);
4039 	GEM_BUG_ON((flags & PIN_GLOBAL) && !i915_vma_is_ggtt(vma));
4040 
4041 	if (WARN_ON(bound & I915_VMA_PIN_OVERFLOW)) {
4042 		ret = -EBUSY;
4043 		goto err;
4044 	}
4045 
4046 	if ((bound & I915_VMA_BIND_MASK) == 0) {
4047 		ret = i915_vma_insert(vma, size, alignment, flags);
4048 		if (ret)
4049 			goto err;
4050 	}
4051 
4052 	ret = i915_vma_bind(vma, vma->obj->cache_level, flags);
4053 	if (ret)
4054 		goto err;
4055 
4056 	if ((bound ^ vma->flags) & I915_VMA_GLOBAL_BIND)
4057 		__i915_vma_set_map_and_fenceable(vma);
4058 
4059 	GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags));
4060 	return 0;
4061 
4062 err:
4063 	__i915_vma_unpin(vma);
4064 	return ret;
4065 }
4066 
4067 struct i915_vma *
4068 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
4069 			 const struct i915_ggtt_view *view,
4070 			 u64 size,
4071 			 u64 alignment,
4072 			 u64 flags)
4073 {
4074 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
4075 	struct i915_address_space *vm = &dev_priv->ggtt.base;
4076 	struct i915_vma *vma;
4077 	int ret;
4078 
4079 	vma = i915_gem_obj_lookup_or_create_vma(obj, vm, view);
4080 	if (IS_ERR(vma))
4081 		return vma;
4082 
4083 	if (i915_vma_misplaced(vma, size, alignment, flags)) {
4084 		if (flags & PIN_NONBLOCK &&
4085 		    (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)))
4086 			return ERR_PTR(-ENOSPC);
4087 
4088 		if (flags & PIN_MAPPABLE) {
4089 			u32 fence_size;
4090 
4091 			fence_size = i915_gem_get_ggtt_size(dev_priv, vma->size,
4092 							    i915_gem_object_get_tiling(obj));
4093 			/* If the required space is larger than the available
4094 			 * aperture, we will not able to find a slot for the
4095 			 * object and unbinding the object now will be in
4096 			 * vain. Worse, doing so may cause us to ping-pong
4097 			 * the object in and out of the Global GTT and
4098 			 * waste a lot of cycles under the mutex.
4099 			 */
4100 			if (fence_size > dev_priv->ggtt.mappable_end)
4101 				return ERR_PTR(-E2BIG);
4102 
4103 			/* If NONBLOCK is set the caller is optimistically
4104 			 * trying to cache the full object within the mappable
4105 			 * aperture, and *must* have a fallback in place for
4106 			 * situations where we cannot bind the object. We
4107 			 * can be a little more lax here and use the fallback
4108 			 * more often to avoid costly migrations of ourselves
4109 			 * and other objects within the aperture.
4110 			 *
4111 			 * Half-the-aperture is used as a simple heuristic.
4112 			 * More interesting would to do search for a free
4113 			 * block prior to making the commitment to unbind.
4114 			 * That caters for the self-harm case, and with a
4115 			 * little more heuristics (e.g. NOFAULT, NOEVICT)
4116 			 * we could try to minimise harm to others.
4117 			 */
4118 			if (flags & PIN_NONBLOCK &&
4119 			    fence_size > dev_priv->ggtt.mappable_end / 2)
4120 				return ERR_PTR(-ENOSPC);
4121 		}
4122 
4123 		WARN(i915_vma_is_pinned(vma),
4124 		     "bo is already pinned in ggtt with incorrect alignment:"
4125 		     " offset=%08x, req.alignment=%llx,"
4126 		     " req.map_and_fenceable=%d, vma->map_and_fenceable=%d\n",
4127 		     i915_ggtt_offset(vma), alignment,
4128 		     !!(flags & PIN_MAPPABLE),
4129 		     i915_vma_is_map_and_fenceable(vma));
4130 		ret = i915_vma_unbind(vma);
4131 		if (ret)
4132 			return ERR_PTR(ret);
4133 	}
4134 
4135 	ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL);
4136 	if (ret)
4137 		return ERR_PTR(ret);
4138 
4139 	return vma;
4140 }
4141 
4142 static __always_inline unsigned int __busy_read_flag(unsigned int id)
4143 {
4144 	/* Note that we could alias engines in the execbuf API, but
4145 	 * that would be very unwise as it prevents userspace from
4146 	 * fine control over engine selection. Ahem.
4147 	 *
4148 	 * This should be something like EXEC_MAX_ENGINE instead of
4149 	 * I915_NUM_ENGINES.
4150 	 */
4151 	BUILD_BUG_ON(I915_NUM_ENGINES > 16);
4152 	return 0x10000 << id;
4153 }
4154 
4155 static __always_inline unsigned int __busy_write_id(unsigned int id)
4156 {
4157        /* The uABI guarantees an active writer is also amongst the read
4158         * engines. This would be true if we accessed the activity tracking
4159         * under the lock, but as we perform the lookup of the object and
4160         * its activity locklessly we can not guarantee that the last_write
4161         * being active implies that we have set the same engine flag from
4162         * last_read - hence we always set both read and write busy for
4163         * last_write.
4164         */
4165        return id | __busy_read_flag(id);
4166 }
4167 
4168 static __always_inline unsigned int
4169 __busy_set_if_active(const struct i915_gem_active *active,
4170 		     unsigned int (*flag)(unsigned int id))
4171 {
4172 	struct drm_i915_gem_request *request;
4173 
4174 	request = rcu_dereference(active->request);
4175 	if (!request || i915_gem_request_completed(request))
4176 		return 0;
4177 
4178 	/* This is racy. See __i915_gem_active_get_rcu() for an in detail
4179 	 * discussion of how to handle the race correctly, but for reporting
4180 	 * the busy state we err on the side of potentially reporting the
4181 	 * wrong engine as being busy (but we guarantee that the result
4182 	 * is at least self-consistent).
4183 	 *
4184 	 * As we use SLAB_DESTROY_BY_RCU, the request may be reallocated
4185 	 * whilst we are inspecting it, even under the RCU read lock as we are.
4186 	 * This means that there is a small window for the engine and/or the
4187 	 * seqno to have been overwritten. The seqno will always be in the
4188 	 * future compared to the intended, and so we know that if that
4189 	 * seqno is idle (on whatever engine) our request is idle and the
4190 	 * return 0 above is correct.
4191 	 *
4192 	 * The issue is that if the engine is switched, it is just as likely
4193 	 * to report that it is busy (but since the switch happened, we know
4194 	 * the request should be idle). So there is a small chance that a busy
4195 	 * result is actually the wrong engine.
4196 	 *
4197 	 * So why don't we care?
4198 	 *
4199 	 * For starters, the busy ioctl is a heuristic that is by definition
4200 	 * racy. Even with perfect serialisation in the driver, the hardware
4201 	 * state is constantly advancing - the state we report to the user
4202 	 * is stale.
4203 	 *
4204 	 * The critical information for the busy-ioctl is whether the object
4205 	 * is idle as userspace relies on that to detect whether its next
4206 	 * access will stall, or if it has missed submitting commands to
4207 	 * the hardware allowing the GPU to stall. We never generate a
4208 	 * false-positive for idleness, thus busy-ioctl is reliable at the
4209 	 * most fundamental level, and we maintain the guarantee that a
4210 	 * busy object left to itself will eventually become idle (and stay
4211 	 * idle!).
4212 	 *
4213 	 * We allow ourselves the leeway of potentially misreporting the busy
4214 	 * state because that is an optimisation heuristic that is constantly
4215 	 * in flux. Being quickly able to detect the busy/idle state is much
4216 	 * more important than accurate logging of exactly which engines were
4217 	 * busy.
4218 	 *
4219 	 * For accuracy in reporting the engine, we could use
4220 	 *
4221 	 *	result = 0;
4222 	 *	request = __i915_gem_active_get_rcu(active);
4223 	 *	if (request) {
4224 	 *		if (!i915_gem_request_completed(request))
4225 	 *			result = flag(request->engine->exec_id);
4226 	 *		i915_gem_request_put(request);
4227 	 *	}
4228 	 *
4229 	 * but that still remains susceptible to both hardware and userspace
4230 	 * races. So we accept making the result of that race slightly worse,
4231 	 * given the rarity of the race and its low impact on the result.
4232 	 */
4233 	return flag(READ_ONCE(request->engine->exec_id));
4234 }
4235 
4236 static __always_inline unsigned int
4237 busy_check_reader(const struct i915_gem_active *active)
4238 {
4239 	return __busy_set_if_active(active, __busy_read_flag);
4240 }
4241 
4242 static __always_inline unsigned int
4243 busy_check_writer(const struct i915_gem_active *active)
4244 {
4245 	return __busy_set_if_active(active, __busy_write_id);
4246 }
4247 
4248 int
4249 i915_gem_busy_ioctl(struct drm_device *dev, void *data,
4250 		    struct drm_file *file)
4251 {
4252 	struct drm_i915_gem_busy *args = data;
4253 	struct drm_i915_gem_object *obj;
4254 	unsigned long active;
4255 
4256 	obj = i915_gem_object_lookup(file, args->handle);
4257 	if (!obj)
4258 		return -ENOENT;
4259 
4260 	args->busy = 0;
4261 	active = __I915_BO_ACTIVE(obj);
4262 	if (active) {
4263 		int idx;
4264 
4265 		/* Yes, the lookups are intentionally racy.
4266 		 *
4267 		 * First, we cannot simply rely on __I915_BO_ACTIVE. We have
4268 		 * to regard the value as stale and as our ABI guarantees
4269 		 * forward progress, we confirm the status of each active
4270 		 * request with the hardware.
4271 		 *
4272 		 * Even though we guard the pointer lookup by RCU, that only
4273 		 * guarantees that the pointer and its contents remain
4274 		 * dereferencable and does *not* mean that the request we
4275 		 * have is the same as the one being tracked by the object.
4276 		 *
4277 		 * Consider that we lookup the request just as it is being
4278 		 * retired and freed. We take a local copy of the pointer,
4279 		 * but before we add its engine into the busy set, the other
4280 		 * thread reallocates it and assigns it to a task on another
4281 		 * engine with a fresh and incomplete seqno. Guarding against
4282 		 * that requires careful serialisation and reference counting,
4283 		 * i.e. using __i915_gem_active_get_request_rcu(). We don't,
4284 		 * instead we expect that if the result is busy, which engines
4285 		 * are busy is not completely reliable - we only guarantee
4286 		 * that the object was busy.
4287 		 */
4288 		rcu_read_lock();
4289 
4290 		for_each_active(active, idx)
4291 			args->busy |= busy_check_reader(&obj->last_read[idx]);
4292 
4293 		/* For ABI sanity, we only care that the write engine is in
4294 		 * the set of read engines. This is ensured by the ordering
4295 		 * of setting last_read/last_write in i915_vma_move_to_active,
4296 		 * and then in reverse in retire.
4297 		 *
4298 		 * We don't care that the set of active read/write engines
4299 		 * may change during construction of the result, as it is
4300 		 * equally liable to change before userspace can inspect
4301 		 * the result.
4302 		 */
4303 		args->busy |= busy_check_writer(&obj->last_write);
4304 
4305 		rcu_read_unlock();
4306 	}
4307 
4308 	i915_gem_object_put_unlocked(obj);
4309 	return 0;
4310 }
4311 
4312 int
4313 i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
4314 			struct drm_file *file_priv)
4315 {
4316 	return i915_gem_ring_throttle(dev, file_priv);
4317 }
4318 
4319 int
4320 i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
4321 		       struct drm_file *file_priv)
4322 {
4323 	struct drm_i915_private *dev_priv = to_i915(dev);
4324 	struct drm_i915_gem_madvise *args = data;
4325 	struct drm_i915_gem_object *obj;
4326 	int ret;
4327 
4328 	switch (args->madv) {
4329 	case I915_MADV_DONTNEED:
4330 	case I915_MADV_WILLNEED:
4331 	    break;
4332 	default:
4333 	    return -EINVAL;
4334 	}
4335 
4336 	ret = i915_mutex_lock_interruptible(dev);
4337 	if (ret)
4338 		return ret;
4339 
4340 	obj = i915_gem_object_lookup(file_priv, args->handle);
4341 	if (!obj) {
4342 		ret = -ENOENT;
4343 		goto unlock;
4344 	}
4345 
4346 	if (obj->pages &&
4347 	    i915_gem_object_is_tiled(obj) &&
4348 	    dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
4349 		if (obj->madv == I915_MADV_WILLNEED)
4350 			i915_gem_object_unpin_pages(obj);
4351 		if (args->madv == I915_MADV_WILLNEED)
4352 			i915_gem_object_pin_pages(obj);
4353 	}
4354 
4355 	if (obj->madv != __I915_MADV_PURGED)
4356 		obj->madv = args->madv;
4357 
4358 	/* if the object is no longer attached, discard its backing storage */
4359 	if (obj->madv == I915_MADV_DONTNEED && obj->pages == NULL)
4360 		i915_gem_object_truncate(obj);
4361 
4362 	args->retained = obj->madv != __I915_MADV_PURGED;
4363 
4364 	i915_gem_object_put(obj);
4365 unlock:
4366 	mutex_unlock(&dev->struct_mutex);
4367 	return ret;
4368 }
4369 
4370 void i915_gem_object_init(struct drm_i915_gem_object *obj,
4371 			  const struct drm_i915_gem_object_ops *ops)
4372 {
4373 	int i;
4374 
4375 	INIT_LIST_HEAD(&obj->global_list);
4376 	for (i = 0; i < I915_NUM_ENGINES; i++)
4377 		init_request_active(&obj->last_read[i],
4378 				    i915_gem_object_retire__read);
4379 	init_request_active(&obj->last_write,
4380 			    i915_gem_object_retire__write);
4381 	INIT_LIST_HEAD(&obj->obj_exec_link);
4382 	INIT_LIST_HEAD(&obj->vma_list);
4383 	INIT_LIST_HEAD(&obj->batch_pool_link);
4384 
4385 	obj->ops = ops;
4386 
4387 	obj->frontbuffer_ggtt_origin = ORIGIN_GTT;
4388 	obj->madv = I915_MADV_WILLNEED;
4389 
4390 	i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size);
4391 }
4392 
4393 static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
4394 	.flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE,
4395 	.get_pages = i915_gem_object_get_pages_gtt,
4396 	.put_pages = i915_gem_object_put_pages_gtt,
4397 };
4398 
4399 struct drm_i915_gem_object *i915_gem_object_create(struct drm_device *dev,
4400 						  size_t size)
4401 {
4402 	struct drm_i915_gem_object *obj;
4403 #if 0
4404 	struct address_space *mapping;
4405 	gfp_t mask;
4406 #endif
4407 	int ret;
4408 
4409 	obj = i915_gem_object_alloc(dev);
4410 	if (obj == NULL)
4411 		return ERR_PTR(-ENOMEM);
4412 
4413 	ret = drm_gem_object_init(dev, &obj->base, size);
4414 	if (ret)
4415 		goto fail;
4416 
4417 #if 0
4418 	mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
4419 	if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) {
4420 		/* 965gm cannot relocate objects above 4GiB. */
4421 		mask &= ~__GFP_HIGHMEM;
4422 		mask |= __GFP_DMA32;
4423 	}
4424 
4425 	mapping = obj->base.filp->f_mapping;
4426 	mapping_set_gfp_mask(mapping, mask);
4427 #endif
4428 
4429 	i915_gem_object_init(obj, &i915_gem_object_ops);
4430 
4431 	obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4432 	obj->base.read_domains = I915_GEM_DOMAIN_CPU;
4433 
4434 	if (HAS_LLC(dev)) {
4435 		/* On some devices, we can have the GPU use the LLC (the CPU
4436 		 * cache) for about a 10% performance improvement
4437 		 * compared to uncached.  Graphics requests other than
4438 		 * display scanout are coherent with the CPU in
4439 		 * accessing this cache.  This means in this mode we
4440 		 * don't need to clflush on the CPU side, and on the
4441 		 * GPU side we only need to flush internal caches to
4442 		 * get data visible to the CPU.
4443 		 *
4444 		 * However, we maintain the display planes as UC, and so
4445 		 * need to rebind when first used as such.
4446 		 */
4447 		obj->cache_level = I915_CACHE_LLC;
4448 	} else
4449 		obj->cache_level = I915_CACHE_NONE;
4450 
4451 	trace_i915_gem_object_create(obj);
4452 
4453 	return obj;
4454 
4455 fail:
4456 	i915_gem_object_free(obj);
4457 
4458 	return ERR_PTR(ret);
4459 }
4460 
4461 static bool discard_backing_storage(struct drm_i915_gem_object *obj)
4462 {
4463 	/* If we are the last user of the backing storage (be it shmemfs
4464 	 * pages or stolen etc), we know that the pages are going to be
4465 	 * immediately released. In this case, we can then skip copying
4466 	 * back the contents from the GPU.
4467 	 */
4468 
4469 	if (obj->madv != I915_MADV_WILLNEED)
4470 		return false;
4471 
4472 	if (obj->base.filp == NULL)
4473 		return true;
4474 
4475 	/* At first glance, this looks racy, but then again so would be
4476 	 * userspace racing mmap against close. However, the first external
4477 	 * reference to the filp can only be obtained through the
4478 	 * i915_gem_mmap_ioctl() which safeguards us against the user
4479 	 * acquiring such a reference whilst we are in the middle of
4480 	 * freeing the object.
4481 	 */
4482 #if 0
4483 	return atomic_long_read(&obj->base.filp->f_count) == 1;
4484 #else
4485 	return false;
4486 #endif
4487 }
4488 
4489 void i915_gem_free_object(struct drm_gem_object *gem_obj)
4490 {
4491 	struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
4492 	struct drm_device *dev = obj->base.dev;
4493 	struct drm_i915_private *dev_priv = to_i915(dev);
4494 	struct i915_vma *vma, *next;
4495 
4496 	intel_runtime_pm_get(dev_priv);
4497 
4498 	trace_i915_gem_object_destroy(obj);
4499 
4500 	/* All file-owned VMA should have been released by this point through
4501 	 * i915_gem_close_object(), or earlier by i915_gem_context_close().
4502 	 * However, the object may also be bound into the global GTT (e.g.
4503 	 * older GPUs without per-process support, or for direct access through
4504 	 * the GTT either for the user or for scanout). Those VMA still need to
4505 	 * unbound now.
4506 	 */
4507 	list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) {
4508 		GEM_BUG_ON(!i915_vma_is_ggtt(vma));
4509 		GEM_BUG_ON(i915_vma_is_active(vma));
4510 		vma->flags &= ~I915_VMA_PIN_MASK;
4511 		i915_vma_close(vma);
4512 	}
4513 	GEM_BUG_ON(obj->bind_count);
4514 
4515 	/* Stolen objects don't hold a ref, but do hold pin count. Fix that up
4516 	 * before progressing. */
4517 	if (obj->stolen)
4518 		i915_gem_object_unpin_pages(obj);
4519 
4520 	WARN_ON(atomic_read(&obj->frontbuffer_bits));
4521 
4522 	if (obj->pages && obj->madv == I915_MADV_WILLNEED &&
4523 	    dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES &&
4524 	    i915_gem_object_is_tiled(obj))
4525 		i915_gem_object_unpin_pages(obj);
4526 
4527 	if (WARN_ON(obj->pages_pin_count))
4528 		obj->pages_pin_count = 0;
4529 	if (discard_backing_storage(obj))
4530 		obj->madv = I915_MADV_DONTNEED;
4531 	i915_gem_object_put_pages(obj);
4532 
4533 	BUG_ON(obj->pages);
4534 
4535 #if 0
4536 	if (obj->base.import_attach)
4537 		drm_prime_gem_destroy(&obj->base, NULL);
4538 #endif
4539 
4540 	if (obj->ops->release)
4541 		obj->ops->release(obj);
4542 
4543 	drm_gem_object_release(&obj->base);
4544 	i915_gem_info_remove_obj(dev_priv, obj->base.size);
4545 
4546 	kfree(obj->bit_17);
4547 	i915_gem_object_free(obj);
4548 
4549 	intel_runtime_pm_put(dev_priv);
4550 }
4551 
4552 int i915_gem_suspend(struct drm_device *dev)
4553 {
4554 	struct drm_i915_private *dev_priv = to_i915(dev);
4555 	int ret;
4556 
4557 	intel_suspend_gt_powersave(dev_priv);
4558 
4559 	mutex_lock(&dev->struct_mutex);
4560 
4561 	/* We have to flush all the executing contexts to main memory so
4562 	 * that they can saved in the hibernation image. To ensure the last
4563 	 * context image is coherent, we have to switch away from it. That
4564 	 * leaves the dev_priv->kernel_context still active when
4565 	 * we actually suspend, and its image in memory may not match the GPU
4566 	 * state. Fortunately, the kernel_context is disposable and we do
4567 	 * not rely on its state.
4568 	 */
4569 	ret = i915_gem_switch_to_kernel_context(dev_priv);
4570 	if (ret)
4571 		goto err;
4572 
4573 	ret = i915_gem_wait_for_idle(dev_priv,
4574 				     I915_WAIT_INTERRUPTIBLE |
4575 				     I915_WAIT_LOCKED);
4576 	if (ret)
4577 		goto err;
4578 
4579 	i915_gem_retire_requests(dev_priv);
4580 
4581 	i915_gem_context_lost(dev_priv);
4582 	mutex_unlock(&dev->struct_mutex);
4583 
4584 	cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
4585 	cancel_delayed_work_sync(&dev_priv->gt.retire_work);
4586 	flush_delayed_work(&dev_priv->gt.idle_work);
4587 
4588 	/* Assert that we sucessfully flushed all the work and
4589 	 * reset the GPU back to its idle, low power state.
4590 	 */
4591 	WARN_ON(dev_priv->gt.awake);
4592 
4593 	/*
4594 	 * Neither the BIOS, ourselves or any other kernel
4595 	 * expects the system to be in execlists mode on startup,
4596 	 * so we need to reset the GPU back to legacy mode. And the only
4597 	 * known way to disable logical contexts is through a GPU reset.
4598 	 *
4599 	 * So in order to leave the system in a known default configuration,
4600 	 * always reset the GPU upon unload and suspend. Afterwards we then
4601 	 * clean up the GEM state tracking, flushing off the requests and
4602 	 * leaving the system in a known idle state.
4603 	 *
4604 	 * Note that is of the upmost importance that the GPU is idle and
4605 	 * all stray writes are flushed *before* we dismantle the backing
4606 	 * storage for the pinned objects.
4607 	 *
4608 	 * However, since we are uncertain that resetting the GPU on older
4609 	 * machines is a good idea, we don't - just in case it leaves the
4610 	 * machine in an unusable condition.
4611 	 */
4612 	if (HAS_HW_CONTEXTS(dev)) {
4613 		int reset = intel_gpu_reset(dev_priv, ALL_ENGINES);
4614 		WARN_ON(reset && reset != -ENODEV);
4615 	}
4616 
4617 	return 0;
4618 
4619 err:
4620 	mutex_unlock(&dev->struct_mutex);
4621 	return ret;
4622 }
4623 
4624 void i915_gem_resume(struct drm_device *dev)
4625 {
4626 	struct drm_i915_private *dev_priv = to_i915(dev);
4627 
4628 	mutex_lock(&dev->struct_mutex);
4629 	i915_gem_restore_gtt_mappings(dev);
4630 
4631 	/* As we didn't flush the kernel context before suspend, we cannot
4632 	 * guarantee that the context image is complete. So let's just reset
4633 	 * it and start again.
4634 	 */
4635 	dev_priv->gt.resume(dev_priv);
4636 
4637 	mutex_unlock(&dev->struct_mutex);
4638 }
4639 
4640 void i915_gem_init_swizzling(struct drm_device *dev)
4641 {
4642 	struct drm_i915_private *dev_priv = to_i915(dev);
4643 
4644 	if (INTEL_INFO(dev)->gen < 5 ||
4645 	    dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
4646 		return;
4647 
4648 	I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
4649 				 DISP_TILE_SURFACE_SWIZZLING);
4650 
4651 	if (IS_GEN5(dev_priv))
4652 		return;
4653 
4654 	I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
4655 	if (IS_GEN6(dev_priv))
4656 		I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB));
4657 	else if (IS_GEN7(dev_priv))
4658 		I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB));
4659 	else if (IS_GEN8(dev_priv))
4660 		I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW));
4661 	else
4662 		BUG();
4663 }
4664 
4665 static void init_unused_ring(struct drm_i915_private *dev_priv, u32 base)
4666 {
4667 	I915_WRITE(RING_CTL(base), 0);
4668 	I915_WRITE(RING_HEAD(base), 0);
4669 	I915_WRITE(RING_TAIL(base), 0);
4670 	I915_WRITE(RING_START(base), 0);
4671 }
4672 
4673 static void init_unused_rings(struct drm_i915_private *dev_priv)
4674 {
4675 	if (IS_I830(dev_priv)) {
4676 		init_unused_ring(dev_priv, PRB1_BASE);
4677 		init_unused_ring(dev_priv, SRB0_BASE);
4678 		init_unused_ring(dev_priv, SRB1_BASE);
4679 		init_unused_ring(dev_priv, SRB2_BASE);
4680 		init_unused_ring(dev_priv, SRB3_BASE);
4681 	} else if (IS_GEN2(dev_priv)) {
4682 		init_unused_ring(dev_priv, SRB0_BASE);
4683 		init_unused_ring(dev_priv, SRB1_BASE);
4684 	} else if (IS_GEN3(dev_priv)) {
4685 		init_unused_ring(dev_priv, PRB1_BASE);
4686 		init_unused_ring(dev_priv, PRB2_BASE);
4687 	}
4688 }
4689 
4690 int
4691 i915_gem_init_hw(struct drm_device *dev)
4692 {
4693 	struct drm_i915_private *dev_priv = to_i915(dev);
4694 	struct intel_engine_cs *engine;
4695 	enum intel_engine_id id;
4696 	int ret;
4697 
4698 	/* Double layer security blanket, see i915_gem_init() */
4699 	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4700 
4701 	if (HAS_EDRAM(dev) && INTEL_GEN(dev_priv) < 9)
4702 		I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf));
4703 
4704 	if (IS_HASWELL(dev_priv))
4705 		I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev_priv) ?
4706 			   LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED);
4707 
4708 	if (HAS_PCH_NOP(dev_priv)) {
4709 		if (IS_IVYBRIDGE(dev_priv)) {
4710 			u32 temp = I915_READ(GEN7_MSG_CTL);
4711 			temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK);
4712 			I915_WRITE(GEN7_MSG_CTL, temp);
4713 		} else if (INTEL_INFO(dev)->gen >= 7) {
4714 			u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT);
4715 			temp &= ~RESET_PCH_HANDSHAKE_ENABLE;
4716 			I915_WRITE(HSW_NDE_RSTWRN_OPT, temp);
4717 		}
4718 	}
4719 
4720 	i915_gem_init_swizzling(dev);
4721 
4722 	/*
4723 	 * At least 830 can leave some of the unused rings
4724 	 * "active" (ie. head != tail) after resume which
4725 	 * will prevent c3 entry. Makes sure all unused rings
4726 	 * are totally idle.
4727 	 */
4728 	init_unused_rings(dev_priv);
4729 
4730 	BUG_ON(!dev_priv->kernel_context);
4731 
4732 	ret = i915_ppgtt_init_hw(dev);
4733 	if (ret) {
4734 		DRM_ERROR("PPGTT enable HW failed %d\n", ret);
4735 		goto out;
4736 	}
4737 
4738 	/* Need to do basic initialisation of all rings first: */
4739 	for_each_engine(engine, dev_priv, id) {
4740 		ret = engine->init_hw(engine);
4741 		if (ret)
4742 			goto out;
4743 	}
4744 
4745 	intel_mocs_init_l3cc_table(dev);
4746 
4747 	/* We can't enable contexts until all firmware is loaded */
4748 	ret = intel_guc_setup(dev);
4749 	if (ret)
4750 		goto out;
4751 
4752 out:
4753 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
4754 	return ret;
4755 }
4756 
4757 bool intel_sanitize_semaphores(struct drm_i915_private *dev_priv, int value)
4758 {
4759 	if (INTEL_INFO(dev_priv)->gen < 6)
4760 		return false;
4761 
4762 	/* TODO: make semaphores and Execlists play nicely together */
4763 	if (i915.enable_execlists)
4764 		return false;
4765 
4766 	if (value >= 0)
4767 		return value;
4768 
4769 #ifdef CONFIG_INTEL_IOMMU
4770 	/* Enable semaphores on SNB when IO remapping is off */
4771 	if (INTEL_INFO(dev_priv)->gen == 6 && intel_iommu_gfx_mapped)
4772 		return false;
4773 #endif
4774 
4775 	return true;
4776 }
4777 
4778 int i915_gem_init(struct drm_device *dev)
4779 {
4780 	struct drm_i915_private *dev_priv = to_i915(dev);
4781 	int ret;
4782 
4783 	mutex_lock(&dev->struct_mutex);
4784 
4785 	if (!i915.enable_execlists) {
4786 		dev_priv->gt.resume = intel_legacy_submission_resume;
4787 		dev_priv->gt.cleanup_engine = intel_engine_cleanup;
4788 	} else {
4789 		dev_priv->gt.resume = intel_lr_context_resume;
4790 		dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup;
4791 	}
4792 
4793 	/* This is just a security blanket to placate dragons.
4794 	 * On some systems, we very sporadically observe that the first TLBs
4795 	 * used by the CS may be stale, despite us poking the TLB reset. If
4796 	 * we hold the forcewake during initialisation these problems
4797 	 * just magically go away.
4798 	 */
4799 	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4800 
4801 	i915_gem_init_userptr(dev_priv);
4802 
4803 	ret = i915_gem_init_ggtt(dev_priv);
4804 	if (ret)
4805 		goto out_unlock;
4806 
4807 	ret = i915_gem_context_init(dev);
4808 	if (ret)
4809 		goto out_unlock;
4810 
4811 	ret = intel_engines_init(dev);
4812 	if (ret)
4813 		goto out_unlock;
4814 
4815 	ret = i915_gem_init_hw(dev);
4816 	if (ret == -EIO) {
4817 		/* Allow engine initialisation to fail by marking the GPU as
4818 		 * wedged. But we only want to do this where the GPU is angry,
4819 		 * for all other failure, such as an allocation failure, bail.
4820 		 */
4821 		DRM_ERROR("Failed to initialize GPU, declaring it wedged\n");
4822 		i915_gem_set_wedged(dev_priv);
4823 		ret = 0;
4824 	}
4825 
4826 out_unlock:
4827 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
4828 	mutex_unlock(&dev->struct_mutex);
4829 
4830 	return ret;
4831 }
4832 
4833 void
4834 i915_gem_cleanup_engines(struct drm_device *dev)
4835 {
4836 	struct drm_i915_private *dev_priv = to_i915(dev);
4837 	struct intel_engine_cs *engine;
4838 	enum intel_engine_id id;
4839 
4840 	for_each_engine(engine, dev_priv, id)
4841 		dev_priv->gt.cleanup_engine(engine);
4842 }
4843 
4844 void
4845 i915_gem_load_init_fences(struct drm_i915_private *dev_priv)
4846 {
4847 	struct drm_device *dev = &dev_priv->drm;
4848 	int i;
4849 
4850 	if (INTEL_INFO(dev_priv)->gen >= 7 && !IS_VALLEYVIEW(dev_priv) &&
4851 	    !IS_CHERRYVIEW(dev_priv))
4852 		dev_priv->num_fence_regs = 32;
4853 	else if (INTEL_INFO(dev_priv)->gen >= 4 || IS_I945G(dev_priv) ||
4854 		 IS_I945GM(dev_priv) || IS_G33(dev_priv))
4855 		dev_priv->num_fence_regs = 16;
4856 	else
4857 		dev_priv->num_fence_regs = 8;
4858 
4859 	if (intel_vgpu_active(dev_priv))
4860 		dev_priv->num_fence_regs =
4861 				I915_READ(vgtif_reg(avail_rs.fence_num));
4862 
4863 	/* Initialize fence registers to zero */
4864 	for (i = 0; i < dev_priv->num_fence_regs; i++) {
4865 		struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i];
4866 
4867 		fence->i915 = dev_priv;
4868 		fence->id = i;
4869 		list_add_tail(&fence->link, &dev_priv->mm.fence_list);
4870 	}
4871 	i915_gem_restore_fences(dev);
4872 
4873 	i915_gem_detect_bit_6_swizzle(dev);
4874 }
4875 
4876 void
4877 i915_gem_load_init(struct drm_device *dev)
4878 {
4879 	struct drm_i915_private *dev_priv = to_i915(dev);
4880 
4881 	dev_priv->objects =
4882 		kmem_cache_create("i915_gem_object",
4883 				  sizeof(struct drm_i915_gem_object), 0,
4884 				  SLAB_HWCACHE_ALIGN,
4885 				  NULL);
4886 	dev_priv->vmas =
4887 		kmem_cache_create("i915_gem_vma",
4888 				  sizeof(struct i915_vma), 0,
4889 				  SLAB_HWCACHE_ALIGN,
4890 				  NULL);
4891 	dev_priv->requests =
4892 		kmem_cache_create("i915_gem_request",
4893 				  sizeof(struct drm_i915_gem_request), 0,
4894 				  SLAB_HWCACHE_ALIGN |
4895 				  SLAB_RECLAIM_ACCOUNT |
4896 				  SLAB_DESTROY_BY_RCU,
4897 				  NULL);
4898 
4899 	INIT_LIST_HEAD(&dev_priv->context_list);
4900 	INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
4901 	INIT_LIST_HEAD(&dev_priv->mm.bound_list);
4902 	INIT_LIST_HEAD(&dev_priv->mm.fence_list);
4903 	INIT_DELAYED_WORK(&dev_priv->gt.retire_work,
4904 			  i915_gem_retire_work_handler);
4905 	INIT_DELAYED_WORK(&dev_priv->gt.idle_work,
4906 			  i915_gem_idle_work_handler);
4907 	init_waitqueue_head(&dev_priv->gpu_error.wait_queue);
4908 	init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
4909 
4910 	dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL;
4911 
4912 	init_waitqueue_head(&dev_priv->pending_flip_queue);
4913 
4914 	dev_priv->mm.interruptible = true;
4915 
4916 	atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0);
4917 
4918 	lockinit(&dev_priv->fb_tracking.lock, "drmftl", 0, 0);
4919 }
4920 
4921 void i915_gem_load_cleanup(struct drm_device *dev)
4922 {
4923 	struct drm_i915_private *dev_priv = to_i915(dev);
4924 
4925 	kmem_cache_destroy(dev_priv->requests);
4926 	kmem_cache_destroy(dev_priv->vmas);
4927 	kmem_cache_destroy(dev_priv->objects);
4928 
4929 	/* And ensure that our DESTROY_BY_RCU slabs are truly destroyed */
4930 	rcu_barrier();
4931 }
4932 
4933 int i915_gem_freeze(struct drm_i915_private *dev_priv)
4934 {
4935 	intel_runtime_pm_get(dev_priv);
4936 
4937 	mutex_lock(&dev_priv->drm.struct_mutex);
4938 	i915_gem_shrink_all(dev_priv);
4939 	mutex_unlock(&dev_priv->drm.struct_mutex);
4940 
4941 	intel_runtime_pm_put(dev_priv);
4942 
4943 	return 0;
4944 }
4945 
4946 int i915_gem_freeze_late(struct drm_i915_private *dev_priv)
4947 {
4948 	struct drm_i915_gem_object *obj;
4949 	struct list_head *phases[] = {
4950 		&dev_priv->mm.unbound_list,
4951 		&dev_priv->mm.bound_list,
4952 		NULL
4953 	}, **p;
4954 
4955 	/* Called just before we write the hibernation image.
4956 	 *
4957 	 * We need to update the domain tracking to reflect that the CPU
4958 	 * will be accessing all the pages to create and restore from the
4959 	 * hibernation, and so upon restoration those pages will be in the
4960 	 * CPU domain.
4961 	 *
4962 	 * To make sure the hibernation image contains the latest state,
4963 	 * we update that state just before writing out the image.
4964 	 *
4965 	 * To try and reduce the hibernation image, we manually shrink
4966 	 * the objects as well.
4967 	 */
4968 
4969 	mutex_lock(&dev_priv->drm.struct_mutex);
4970 	i915_gem_shrink(dev_priv, -1UL, I915_SHRINK_UNBOUND);
4971 
4972 	for (p = phases; *p; p++) {
4973 		list_for_each_entry(obj, *p, global_list) {
4974 			obj->base.read_domains = I915_GEM_DOMAIN_CPU;
4975 			obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4976 		}
4977 	}
4978 	mutex_unlock(&dev_priv->drm.struct_mutex);
4979 
4980 	return 0;
4981 }
4982 
4983 void i915_gem_release(struct drm_device *dev, struct drm_file *file)
4984 {
4985 	struct drm_i915_file_private *file_priv = file->driver_priv;
4986 	struct drm_i915_gem_request *request;
4987 
4988 	/* Clean up our request list when the client is going away, so that
4989 	 * later retire_requests won't dereference our soon-to-be-gone
4990 	 * file_priv.
4991 	 */
4992 	lockmgr(&file_priv->mm.lock, LK_EXCLUSIVE);
4993 	list_for_each_entry(request, &file_priv->mm.request_list, client_list)
4994 		request->file_priv = NULL;
4995 	lockmgr(&file_priv->mm.lock, LK_RELEASE);
4996 
4997 	if (!list_empty(&file_priv->rps.link)) {
4998 		lockmgr(&to_i915(dev)->rps.client_lock, LK_EXCLUSIVE);
4999 		list_del(&file_priv->rps.link);
5000 		lockmgr(&to_i915(dev)->rps.client_lock, LK_RELEASE);
5001 	}
5002 }
5003 
5004 int
5005 i915_gem_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
5006     vm_ooffset_t foff, struct ucred *cred, u_short *color)
5007 {
5008 	*color = 0; /* XXXKIB */
5009 	return (0);
5010 }
5011 
5012 void
5013 i915_gem_pager_dtor(void *handle)
5014 {
5015 	struct drm_gem_object *obj = handle;
5016 	struct drm_device *dev = obj->dev;
5017 
5018 	mutex_lock(&dev->struct_mutex);
5019 	drm_gem_free_mmap_offset(obj);
5020 	i915_gem_release_mmap(to_intel_bo(obj));
5021 	drm_gem_object_unreference(obj);
5022 	mutex_unlock(&dev->struct_mutex);
5023 }
5024 
5025 int i915_gem_open(struct drm_device *dev, struct drm_file *file)
5026 {
5027 	struct drm_i915_file_private *file_priv;
5028 	int ret;
5029 
5030 	DRM_DEBUG_DRIVER("\n");
5031 
5032 	file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL);
5033 	if (!file_priv)
5034 		return -ENOMEM;
5035 
5036 	file->driver_priv = file_priv;
5037 	file_priv->dev_priv = to_i915(dev);
5038 	file_priv->file = file;
5039 	INIT_LIST_HEAD(&file_priv->rps.link);
5040 
5041 	lockinit(&file_priv->mm.lock, "i915_priv", 0, 0);
5042 	INIT_LIST_HEAD(&file_priv->mm.request_list);
5043 
5044 	file_priv->bsd_engine = -1;
5045 
5046 	ret = i915_gem_context_open(dev, file);
5047 	if (ret)
5048 		kfree(file_priv);
5049 
5050 	return ret;
5051 }
5052 
5053 /**
5054  * i915_gem_track_fb - update frontbuffer tracking
5055  * @old: current GEM buffer for the frontbuffer slots
5056  * @new: new GEM buffer for the frontbuffer slots
5057  * @frontbuffer_bits: bitmask of frontbuffer slots
5058  *
5059  * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them
5060  * from @old and setting them in @new. Both @old and @new can be NULL.
5061  */
5062 void i915_gem_track_fb(struct drm_i915_gem_object *old,
5063 		       struct drm_i915_gem_object *new,
5064 		       unsigned frontbuffer_bits)
5065 {
5066 	/* Control of individual bits within the mask are guarded by
5067 	 * the owning plane->mutex, i.e. we can never see concurrent
5068 	 * manipulation of individual bits. But since the bitfield as a whole
5069 	 * is updated using RMW, we need to use atomics in order to update
5070 	 * the bits.
5071 	 */
5072 	BUILD_BUG_ON(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES >
5073 		     sizeof(atomic_t) * BITS_PER_BYTE);
5074 
5075 	if (old) {
5076 		WARN_ON(!(atomic_read(&old->frontbuffer_bits) & frontbuffer_bits));
5077 		atomic_andnot(frontbuffer_bits, &old->frontbuffer_bits);
5078 	}
5079 
5080 	if (new) {
5081 		WARN_ON(atomic_read(&new->frontbuffer_bits) & frontbuffer_bits);
5082 		atomic_or(frontbuffer_bits, &new->frontbuffer_bits);
5083 	}
5084 }
5085 
5086 /* Like i915_gem_object_get_page(), but mark the returned page dirty */
5087 struct page *
5088 i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, int n)
5089 {
5090 	struct page *page;
5091 
5092 	/* Only default objects have per-page dirty tracking */
5093 	if (WARN_ON(!i915_gem_object_has_struct_page(obj)))
5094 		return NULL;
5095 
5096 	page = i915_gem_object_get_page(obj, n);
5097 	set_page_dirty(page);
5098 	return page;
5099 }
5100 
5101 /* Allocate a new GEM object and fill it with the supplied data */
5102 struct drm_i915_gem_object *
5103 i915_gem_object_create_from_data(struct drm_device *dev,
5104 			         const void *data, size_t size)
5105 {
5106 	struct drm_i915_gem_object *obj;
5107 	struct sg_table *sg;
5108 	size_t bytes;
5109 	int ret;
5110 
5111 	obj = i915_gem_object_create(dev, round_up(size, PAGE_SIZE));
5112 	if (IS_ERR(obj))
5113 		return obj;
5114 
5115 	ret = i915_gem_object_set_to_cpu_domain(obj, true);
5116 	if (ret)
5117 		goto fail;
5118 
5119 	ret = i915_gem_object_get_pages(obj);
5120 	if (ret)
5121 		goto fail;
5122 
5123 	i915_gem_object_pin_pages(obj);
5124 	sg = obj->pages;
5125 	bytes = sg_copy_from_buffer(sg->sgl, sg->nents, (void *)data, size);
5126 	obj->dirty = 1;		/* Backing store is now out of date */
5127 	i915_gem_object_unpin_pages(obj);
5128 
5129 	if (WARN_ON(bytes != size)) {
5130 		DRM_ERROR("Incomplete copy, wrote %zu of %zu", bytes, size);
5131 		ret = -EFAULT;
5132 		goto fail;
5133 	}
5134 
5135 	return obj;
5136 
5137 fail:
5138 	i915_gem_object_put(obj);
5139 	return ERR_PTR(ret);
5140 }
5141