xref: /dragonfly/sys/dev/drm/i915/i915_gem.c (revision 74ad0aa1)
1 /*
2  * Copyright © 2008-2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *
26  */
27 
28 #include <drm/drmP.h>
29 #include <drm/drm_vma_manager.h>
30 #include <drm/i915_drm.h>
31 #include "i915_drv.h"
32 #include "i915_vgpu.h"
33 #include "i915_trace.h"
34 #include "intel_drv.h"
35 #include "intel_mocs.h"
36 #include <linux/shmem_fs.h>
37 #include <linux/slab.h>
38 #include <linux/swap.h>
39 #include <linux/pci.h>
40 #include <linux/dma-buf.h>
41 
42 #include <sys/mman.h>
43 #include <vm/vm_map.h>
44 #include <vm/vm_param.h>
45 
46 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
47 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
48 static void
49 i915_gem_object_retire__write(struct drm_i915_gem_object *obj);
50 static void
51 i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring);
52 
53 static bool cpu_cache_is_coherent(struct drm_device *dev,
54 				  enum i915_cache_level level)
55 {
56 	return HAS_LLC(dev) || level != I915_CACHE_NONE;
57 }
58 
59 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
60 {
61 	if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
62 		return false;
63 
64 	if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
65 		return true;
66 
67 	return obj->pin_display;
68 }
69 
70 static int
71 insert_mappable_node(struct drm_i915_private *i915,
72                      struct drm_mm_node *node, u32 size)
73 {
74 	memset(node, 0, sizeof(*node));
75 	return drm_mm_insert_node_in_range_generic(&i915->ggtt.base.mm, node,
76 						   size, 0, 0, 0,
77 						   i915->ggtt.mappable_end,
78 						   DRM_MM_SEARCH_DEFAULT,
79 						   DRM_MM_CREATE_DEFAULT);
80 }
81 
82 static void
83 remove_mappable_node(struct drm_mm_node *node)
84 {
85 	drm_mm_remove_node(node);
86 }
87 
88 /* some bookkeeping */
89 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
90 				  size_t size)
91 {
92 	spin_lock(&dev_priv->mm.object_stat_lock);
93 	dev_priv->mm.object_count++;
94 	dev_priv->mm.object_memory += size;
95 	spin_unlock(&dev_priv->mm.object_stat_lock);
96 }
97 
98 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
99 				     size_t size)
100 {
101 	spin_lock(&dev_priv->mm.object_stat_lock);
102 	dev_priv->mm.object_count--;
103 	dev_priv->mm.object_memory -= size;
104 	spin_unlock(&dev_priv->mm.object_stat_lock);
105 }
106 
107 static int
108 i915_gem_wait_for_error(struct i915_gpu_error *error)
109 {
110 	int ret;
111 
112 	if (!i915_reset_in_progress(error))
113 		return 0;
114 
115 	/*
116 	 * Only wait 10 seconds for the gpu reset to complete to avoid hanging
117 	 * userspace. If it takes that long something really bad is going on and
118 	 * we should simply try to bail out and fail as gracefully as possible.
119 	 */
120 	ret = wait_event_interruptible_timeout(error->reset_queue,
121 					       !i915_reset_in_progress(error),
122 					       10*HZ);
123 	if (ret == 0) {
124 		DRM_ERROR("Timed out waiting for the gpu reset to complete\n");
125 		return -EIO;
126 	} else if (ret < 0) {
127 		return ret;
128 	} else {
129 		return 0;
130 	}
131 }
132 
133 int i915_mutex_lock_interruptible(struct drm_device *dev)
134 {
135 	struct drm_i915_private *dev_priv = to_i915(dev);
136 	int ret;
137 
138 	ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
139 	if (ret)
140 		return ret;
141 
142 	ret = mutex_lock_interruptible(&dev->struct_mutex);
143 	if (ret)
144 		return ret;
145 
146 	WARN_ON(i915_verify_lists(dev));
147 	return 0;
148 }
149 
150 int
151 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
152 			    struct drm_file *file)
153 {
154 	struct drm_i915_private *dev_priv = to_i915(dev);
155 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
156 	struct drm_i915_gem_get_aperture *args = data;
157 	struct i915_vma *vma;
158 	size_t pinned;
159 
160 	pinned = 0;
161 	mutex_lock(&dev->struct_mutex);
162 	list_for_each_entry(vma, &ggtt->base.active_list, vm_link)
163 		if (vma->pin_count)
164 			pinned += vma->node.size;
165 	list_for_each_entry(vma, &ggtt->base.inactive_list, vm_link)
166 		if (vma->pin_count)
167 			pinned += vma->node.size;
168 	mutex_unlock(&dev->struct_mutex);
169 
170 	args->aper_size = ggtt->base.total;
171 	args->aper_available_size = args->aper_size - pinned;
172 
173 	return 0;
174 }
175 
176 #if 0
177 static int
178 i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
179 {
180 	struct address_space *mapping = obj->base.filp->f_mapping;
181 	char *vaddr = obj->phys_handle->vaddr;
182 	struct sg_table *st;
183 	struct scatterlist *sg;
184 	int i;
185 
186 	if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
187 		return -EINVAL;
188 
189 	for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
190 		struct page *page;
191 		char *src;
192 
193 		page = shmem_read_mapping_page(mapping, i);
194 		if (IS_ERR(page))
195 			return PTR_ERR(page);
196 
197 		src = kmap_atomic(page);
198 		memcpy(vaddr, src, PAGE_SIZE);
199 		drm_clflush_virt_range(vaddr, PAGE_SIZE);
200 		kunmap_atomic(src);
201 
202 		put_page(page);
203 		vaddr += PAGE_SIZE;
204 	}
205 
206 	i915_gem_chipset_flush(to_i915(obj->base.dev));
207 
208 	st = kmalloc(sizeof(*st), GFP_KERNEL);
209 	if (st == NULL)
210 		return -ENOMEM;
211 
212 	if (sg_alloc_table(st, 1, GFP_KERNEL)) {
213 		kfree(st);
214 		return -ENOMEM;
215 	}
216 
217 	sg = st->sgl;
218 	sg->offset = 0;
219 	sg->length = obj->base.size;
220 
221 	sg_dma_address(sg) = obj->phys_handle->busaddr;
222 	sg_dma_len(sg) = obj->base.size;
223 
224 	obj->pages = st;
225 	return 0;
226 }
227 
228 static void
229 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj)
230 {
231 	int ret;
232 
233 	BUG_ON(obj->madv == __I915_MADV_PURGED);
234 
235 	ret = i915_gem_object_set_to_cpu_domain(obj, true);
236 	if (WARN_ON(ret)) {
237 		/* In the event of a disaster, abandon all caches and
238 		 * hope for the best.
239 		 */
240 		obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
241 	}
242 
243 	if (obj->madv == I915_MADV_DONTNEED)
244 		obj->dirty = 0;
245 
246 	if (obj->dirty) {
247 		struct address_space *mapping = obj->base.filp->f_mapping;
248 		char *vaddr = obj->phys_handle->vaddr;
249 		int i;
250 
251 		for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
252 			struct page *page;
253 			char *dst;
254 
255 			page = shmem_read_mapping_page(mapping, i);
256 			if (IS_ERR(page))
257 				continue;
258 
259 			dst = kmap_atomic(page);
260 			drm_clflush_virt_range(vaddr, PAGE_SIZE);
261 			memcpy(dst, vaddr, PAGE_SIZE);
262 			kunmap_atomic(dst);
263 
264 			set_page_dirty(page);
265 			if (obj->madv == I915_MADV_WILLNEED)
266 				mark_page_accessed(page);
267 			put_page(page);
268 			vaddr += PAGE_SIZE;
269 		}
270 		obj->dirty = 0;
271 	}
272 
273 	sg_free_table(obj->pages);
274 	kfree(obj->pages);
275 }
276 
277 static void
278 i915_gem_object_release_phys(struct drm_i915_gem_object *obj)
279 {
280 	drm_pci_free(obj->base.dev, obj->phys_handle);
281 }
282 #endif
283 
284 static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
285 #if 0
286 	.get_pages = i915_gem_object_get_pages_phys,
287 	.put_pages = i915_gem_object_put_pages_phys,
288 	.release = i915_gem_object_release_phys,
289 #endif
290 };
291 
292 static int
293 drop_pages(struct drm_i915_gem_object *obj)
294 {
295 	struct i915_vma *vma, *next;
296 	int ret;
297 
298 	drm_gem_object_reference(&obj->base);
299 	list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link)
300 		if (i915_vma_unbind(vma))
301 			break;
302 
303 	ret = i915_gem_object_put_pages(obj);
304 	drm_gem_object_unreference(&obj->base);
305 
306 	return ret;
307 }
308 
309 int
310 i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
311 			    int align)
312 {
313 	drm_dma_handle_t *phys;
314 	int ret;
315 
316 	if (obj->phys_handle) {
317 		if ((unsigned long)obj->phys_handle->vaddr & (align -1))
318 			return -EBUSY;
319 
320 		return 0;
321 	}
322 
323 	if (obj->madv != I915_MADV_WILLNEED)
324 		return -EFAULT;
325 
326 	if (obj->base.filp == NULL)
327 		return -EINVAL;
328 
329 	ret = drop_pages(obj);
330 	if (ret)
331 		return ret;
332 
333 	/* create a new object */
334 	phys = drm_pci_alloc(obj->base.dev, obj->base.size, align);
335 	if (!phys)
336 		return -ENOMEM;
337 
338 	obj->phys_handle = phys;
339 	obj->ops = &i915_gem_phys_ops;
340 
341 	return i915_gem_object_get_pages(obj);
342 }
343 
344 static int
345 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
346 		     struct drm_i915_gem_pwrite *args,
347 		     struct drm_file *file_priv)
348 {
349 	struct drm_device *dev = obj->base.dev;
350 	void *vaddr = obj->phys_handle->vaddr + args->offset;
351 	char __user *user_data = u64_to_user_ptr(args->data_ptr);
352 	int ret = 0;
353 
354 	/* We manually control the domain here and pretend that it
355 	 * remains coherent i.e. in the GTT domain, like shmem_pwrite.
356 	 */
357 	ret = i915_gem_object_wait_rendering(obj, false);
358 	if (ret)
359 		return ret;
360 
361 	intel_fb_obj_invalidate(obj, ORIGIN_CPU);
362 	if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) {
363 		unsigned long unwritten;
364 
365 		/* The physical object once assigned is fixed for the lifetime
366 		 * of the obj, so we can safely drop the lock and continue
367 		 * to access vaddr.
368 		 */
369 		mutex_unlock(&dev->struct_mutex);
370 		unwritten = copy_from_user(vaddr, user_data, args->size);
371 		mutex_lock(&dev->struct_mutex);
372 		if (unwritten) {
373 			ret = -EFAULT;
374 			goto out;
375 		}
376 	}
377 
378 	drm_clflush_virt_range(vaddr, args->size);
379 	i915_gem_chipset_flush(to_i915(dev));
380 
381 out:
382 	intel_fb_obj_flush(obj, false, ORIGIN_CPU);
383 	return ret;
384 }
385 
386 void *i915_gem_object_alloc(struct drm_device *dev)
387 {
388 	return kzalloc(sizeof(struct drm_i915_gem_object), GFP_KERNEL);
389 }
390 
391 void i915_gem_object_free(struct drm_i915_gem_object *obj)
392 {
393 	kfree(obj);
394 }
395 
396 static int
397 i915_gem_create(struct drm_file *file,
398 		struct drm_device *dev,
399 		uint64_t size,
400 		uint32_t *handle_p)
401 {
402 	struct drm_i915_gem_object *obj;
403 	int ret;
404 	u32 handle;
405 
406 	size = roundup(size, PAGE_SIZE);
407 	if (size == 0)
408 		return -EINVAL;
409 
410 	/* Allocate the new object */
411 	obj = i915_gem_object_create(dev, size);
412 	if (IS_ERR(obj))
413 		return PTR_ERR(obj);
414 
415 	ret = drm_gem_handle_create(file, &obj->base, &handle);
416 	/* drop reference from allocate - handle holds it now */
417 	drm_gem_object_unreference_unlocked(&obj->base);
418 	if (ret)
419 		return ret;
420 
421 	*handle_p = handle;
422 	return 0;
423 }
424 
425 int
426 i915_gem_dumb_create(struct drm_file *file,
427 		     struct drm_device *dev,
428 		     struct drm_mode_create_dumb *args)
429 {
430 	/* have to work out size/pitch and return them */
431 	args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64);
432 	args->size = args->pitch * args->height;
433 	return i915_gem_create(file, dev,
434 			       args->size, &args->handle);
435 }
436 
437 /**
438  * Creates a new mm object and returns a handle to it.
439  * @dev: drm device pointer
440  * @data: ioctl data blob
441  * @file: drm file pointer
442  */
443 int
444 i915_gem_create_ioctl(struct drm_device *dev, void *data,
445 		      struct drm_file *file)
446 {
447 	struct drm_i915_gem_create *args = data;
448 
449 	return i915_gem_create(file, dev,
450 			       args->size, &args->handle);
451 }
452 
453 static inline int
454 __copy_to_user_swizzled(char __user *cpu_vaddr,
455 			const char *gpu_vaddr, int gpu_offset,
456 			int length)
457 {
458 	int ret, cpu_offset = 0;
459 
460 	while (length > 0) {
461 		int cacheline_end = ALIGN(gpu_offset + 1, 64);
462 		int this_length = min(cacheline_end - gpu_offset, length);
463 		int swizzled_gpu_offset = gpu_offset ^ 64;
464 
465 		ret = __copy_to_user(cpu_vaddr + cpu_offset,
466 				     gpu_vaddr + swizzled_gpu_offset,
467 				     this_length);
468 		if (ret)
469 			return ret + length;
470 
471 		cpu_offset += this_length;
472 		gpu_offset += this_length;
473 		length -= this_length;
474 	}
475 
476 	return 0;
477 }
478 
479 static inline int
480 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
481 			  const char __user *cpu_vaddr,
482 			  int length)
483 {
484 	int ret, cpu_offset = 0;
485 
486 	while (length > 0) {
487 		int cacheline_end = ALIGN(gpu_offset + 1, 64);
488 		int this_length = min(cacheline_end - gpu_offset, length);
489 		int swizzled_gpu_offset = gpu_offset ^ 64;
490 
491 		ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset,
492 				       cpu_vaddr + cpu_offset,
493 				       this_length);
494 		if (ret)
495 			return ret + length;
496 
497 		cpu_offset += this_length;
498 		gpu_offset += this_length;
499 		length -= this_length;
500 	}
501 
502 	return 0;
503 }
504 
505 /*
506  * Pins the specified object's pages and synchronizes the object with
507  * GPU accesses. Sets needs_clflush to non-zero if the caller should
508  * flush the object from the CPU cache.
509  */
510 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
511 				    int *needs_clflush)
512 {
513 	int ret;
514 
515 	*needs_clflush = 0;
516 
517 	if (WARN_ON(!i915_gem_object_has_struct_page(obj)))
518 		return -EINVAL;
519 
520 	if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) {
521 		/* If we're not in the cpu read domain, set ourself into the gtt
522 		 * read domain and manually flush cachelines (if required). This
523 		 * optimizes for the case when the gpu will dirty the data
524 		 * anyway again before the next pread happens. */
525 		*needs_clflush = !cpu_cache_is_coherent(obj->base.dev,
526 							obj->cache_level);
527 		ret = i915_gem_object_wait_rendering(obj, true);
528 		if (ret)
529 			return ret;
530 	}
531 
532 	ret = i915_gem_object_get_pages(obj);
533 	if (ret)
534 		return ret;
535 
536 	i915_gem_object_pin_pages(obj);
537 
538 	return ret;
539 }
540 
541 /* Per-page copy function for the shmem pread fastpath.
542  * Flushes invalid cachelines before reading the target if
543  * needs_clflush is set. */
544 static int
545 shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length,
546 		 char __user *user_data,
547 		 bool page_do_bit17_swizzling, bool needs_clflush)
548 {
549 	char *vaddr;
550 	int ret;
551 
552 	if (unlikely(page_do_bit17_swizzling))
553 		return -EINVAL;
554 
555 	vaddr = kmap_atomic(page);
556 	if (needs_clflush)
557 		drm_clflush_virt_range(vaddr + shmem_page_offset,
558 				       page_length);
559 	ret = __copy_to_user_inatomic(user_data,
560 				      vaddr + shmem_page_offset,
561 				      page_length);
562 	kunmap_atomic(vaddr);
563 
564 	return ret ? -EFAULT : 0;
565 }
566 
567 static void
568 shmem_clflush_swizzled_range(char *addr, unsigned long length,
569 			     bool swizzled)
570 {
571 	if (unlikely(swizzled)) {
572 		unsigned long start = (unsigned long) addr;
573 		unsigned long end = (unsigned long) addr + length;
574 
575 		/* For swizzling simply ensure that we always flush both
576 		 * channels. Lame, but simple and it works. Swizzled
577 		 * pwrite/pread is far from a hotpath - current userspace
578 		 * doesn't use it at all. */
579 		start = round_down(start, 128);
580 		end = round_up(end, 128);
581 
582 		drm_clflush_virt_range((void *)start, end - start);
583 	} else {
584 		drm_clflush_virt_range(addr, length);
585 	}
586 
587 }
588 
589 /* Only difference to the fast-path function is that this can handle bit17
590  * and uses non-atomic copy and kmap functions. */
591 static int
592 shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length,
593 		 char __user *user_data,
594 		 bool page_do_bit17_swizzling, bool needs_clflush)
595 {
596 	char *vaddr;
597 	int ret;
598 
599 	vaddr = kmap(page);
600 	if (needs_clflush)
601 		shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
602 					     page_length,
603 					     page_do_bit17_swizzling);
604 
605 	if (page_do_bit17_swizzling)
606 		ret = __copy_to_user_swizzled(user_data,
607 					      vaddr, shmem_page_offset,
608 					      page_length);
609 	else
610 		ret = __copy_to_user(user_data,
611 				     vaddr + shmem_page_offset,
612 				     page_length);
613 	kunmap(page);
614 
615 	return ret ? - EFAULT : 0;
616 }
617 
618 static inline unsigned long
619 slow_user_access(struct io_mapping *mapping,
620 		 uint64_t page_base, int page_offset,
621 		 char __user *user_data,
622 		 unsigned long length, bool pwrite)
623 {
624 	void __iomem *ioaddr;
625 	void *vaddr;
626 	uint64_t unwritten;
627 
628 	ioaddr = io_mapping_map_wc(mapping, page_base, PAGE_SIZE);
629 	/* We can use the cpu mem copy function because this is X86. */
630 	vaddr = (void __force *)ioaddr + page_offset;
631 	if (pwrite)
632 		unwritten = __copy_from_user(vaddr, user_data, length);
633 	else
634 		unwritten = __copy_to_user(user_data, vaddr, length);
635 
636 	io_mapping_unmap(ioaddr);
637 	return unwritten;
638 }
639 
640 static int
641 i915_gem_gtt_pread(struct drm_device *dev,
642 		   struct drm_i915_gem_object *obj, uint64_t size,
643 		   uint64_t data_offset, uint64_t data_ptr)
644 {
645 	struct drm_i915_private *dev_priv = to_i915(dev);
646 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
647 	struct drm_mm_node node;
648 	char __user *user_data;
649 	uint64_t remain;
650 	uint64_t offset;
651 	int ret;
652 
653 	ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE);
654 	if (ret) {
655 		ret = insert_mappable_node(dev_priv, &node, PAGE_SIZE);
656 		if (ret)
657 			goto out;
658 
659 		ret = i915_gem_object_get_pages(obj);
660 		if (ret) {
661 			remove_mappable_node(&node);
662 			goto out;
663 		}
664 
665 		i915_gem_object_pin_pages(obj);
666 	} else {
667 		node.start = i915_gem_obj_ggtt_offset(obj);
668 		node.allocated = false;
669 		ret = i915_gem_object_put_fence(obj);
670 		if (ret)
671 			goto out_unpin;
672 	}
673 
674 	ret = i915_gem_object_set_to_gtt_domain(obj, false);
675 	if (ret)
676 		goto out_unpin;
677 
678 	user_data = u64_to_user_ptr(data_ptr);
679 	remain = size;
680 	offset = data_offset;
681 
682 	mutex_unlock(&dev->struct_mutex);
683 	if (likely(!i915.prefault_disable)) {
684 		ret = fault_in_multipages_writeable(user_data, remain);
685 		if (ret) {
686 			mutex_lock(&dev->struct_mutex);
687 			goto out_unpin;
688 		}
689 	}
690 
691 	while (remain > 0) {
692 		/* Operation in this page
693 		 *
694 		 * page_base = page offset within aperture
695 		 * page_offset = offset within page
696 		 * page_length = bytes to copy for this page
697 		 */
698 		u32 page_base = node.start;
699 		unsigned page_offset = offset_in_page(offset);
700 		unsigned page_length = PAGE_SIZE - page_offset;
701 		page_length = remain < page_length ? remain : page_length;
702 		if (node.allocated) {
703 			wmb();
704 			ggtt->base.insert_page(&ggtt->base,
705 					       i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
706 					       node.start,
707 					       I915_CACHE_NONE, 0);
708 			wmb();
709 		} else {
710 			page_base += offset & PAGE_MASK;
711 		}
712 		/* This is a slow read/write as it tries to read from
713 		 * and write to user memory which may result into page
714 		 * faults, and so we cannot perform this under struct_mutex.
715 		 */
716 		if (slow_user_access(ggtt->mappable, page_base,
717 				     page_offset, user_data,
718 				     page_length, false)) {
719 			ret = -EFAULT;
720 			break;
721 		}
722 
723 		remain -= page_length;
724 		user_data += page_length;
725 		offset += page_length;
726 	}
727 
728 	mutex_lock(&dev->struct_mutex);
729 	if (ret == 0 && (obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) {
730 		/* The user has modified the object whilst we tried
731 		 * reading from it, and we now have no idea what domain
732 		 * the pages should be in. As we have just been touching
733 		 * them directly, flush everything back to the GTT
734 		 * domain.
735 		 */
736 		ret = i915_gem_object_set_to_gtt_domain(obj, false);
737 	}
738 
739 out_unpin:
740 	if (node.allocated) {
741 		wmb();
742 		ggtt->base.clear_range(&ggtt->base,
743 				       node.start, node.size,
744 				       true);
745 		i915_gem_object_unpin_pages(obj);
746 		remove_mappable_node(&node);
747 	} else {
748 		i915_gem_object_ggtt_unpin(obj);
749 	}
750 out:
751 	return ret;
752 }
753 
754 static int
755 i915_gem_shmem_pread(struct drm_device *dev,
756 		     struct drm_i915_gem_object *obj,
757 		     struct drm_i915_gem_pread *args,
758 		     struct drm_file *file)
759 {
760 	char __user *user_data;
761 	ssize_t remain;
762 	loff_t offset;
763 	int shmem_page_offset, page_length, ret = 0;
764 	int obj_do_bit17_swizzling, page_do_bit17_swizzling;
765 	int prefaulted = 0;
766 	int needs_clflush = 0;
767 	struct sg_page_iter sg_iter;
768 
769 	if (!i915_gem_object_has_struct_page(obj))
770 		return -ENODEV;
771 
772 	user_data = u64_to_user_ptr(args->data_ptr);
773 	remain = args->size;
774 
775 	obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
776 
777 	ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
778 	if (ret)
779 		return ret;
780 
781 	offset = args->offset;
782 
783 	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
784 			 offset >> PAGE_SHIFT) {
785 		struct page *page = sg_page_iter_page(&sg_iter);
786 
787 		if (remain <= 0)
788 			break;
789 
790 		/* Operation in this page
791 		 *
792 		 * shmem_page_offset = offset within page in shmem file
793 		 * page_length = bytes to copy for this page
794 		 */
795 		shmem_page_offset = offset_in_page(offset);
796 		page_length = remain;
797 		if ((shmem_page_offset + page_length) > PAGE_SIZE)
798 			page_length = PAGE_SIZE - shmem_page_offset;
799 
800 		page_do_bit17_swizzling = obj_do_bit17_swizzling &&
801 			(page_to_phys(page) & (1 << 17)) != 0;
802 
803 		ret = shmem_pread_fast(page, shmem_page_offset, page_length,
804 				       user_data, page_do_bit17_swizzling,
805 				       needs_clflush);
806 		if (ret == 0)
807 			goto next_page;
808 
809 		mutex_unlock(&dev->struct_mutex);
810 
811 		if (likely(!i915.prefault_disable) && !prefaulted) {
812 			ret = fault_in_multipages_writeable(user_data, remain);
813 			/* Userspace is tricking us, but we've already clobbered
814 			 * its pages with the prefault and promised to write the
815 			 * data up to the first fault. Hence ignore any errors
816 			 * and just continue. */
817 			(void)ret;
818 			prefaulted = 1;
819 		}
820 
821 		ret = shmem_pread_slow(page, shmem_page_offset, page_length,
822 				       user_data, page_do_bit17_swizzling,
823 				       needs_clflush);
824 
825 		mutex_lock(&dev->struct_mutex);
826 
827 		if (ret)
828 			goto out;
829 
830 next_page:
831 		remain -= page_length;
832 		user_data += page_length;
833 		offset += page_length;
834 	}
835 
836 out:
837 	i915_gem_object_unpin_pages(obj);
838 
839 	return ret;
840 }
841 
842 /**
843  * Reads data from the object referenced by handle.
844  * @dev: drm device pointer
845  * @data: ioctl data blob
846  * @file: drm file pointer
847  *
848  * On error, the contents of *data are undefined.
849  */
850 int
851 i915_gem_pread_ioctl(struct drm_device *dev, void *data,
852 		     struct drm_file *file)
853 {
854 	struct drm_i915_gem_pread *args = data;
855 	struct drm_i915_gem_object *obj;
856 	int ret = 0;
857 
858 	if (args->size == 0)
859 		return 0;
860 
861 #if 0
862 	if (!access_ok(VERIFY_WRITE,
863 		       u64_to_user_ptr(args->data_ptr),
864 		       args->size))
865 		return -EFAULT;
866 #endif
867 
868 	ret = i915_mutex_lock_interruptible(dev);
869 	if (ret)
870 		return ret;
871 
872 	obj = to_intel_bo(drm_gem_object_lookup(file, args->handle));
873 	if (&obj->base == NULL) {
874 		ret = -ENOENT;
875 		goto unlock;
876 	}
877 
878 	/* Bounds check source.  */
879 	if (args->offset > obj->base.size ||
880 	    args->size > obj->base.size - args->offset) {
881 		ret = -EINVAL;
882 		goto out;
883 	}
884 
885 	trace_i915_gem_object_pread(obj, args->offset, args->size);
886 
887 	ret = i915_gem_shmem_pread(dev, obj, args, file);
888 
889 	/* pread for non shmem backed objects */
890 	if (ret == -EFAULT || ret == -ENODEV) {
891 		intel_runtime_pm_get(to_i915(dev));
892 		ret = i915_gem_gtt_pread(dev, obj, args->size,
893 					args->offset, args->data_ptr);
894 		intel_runtime_pm_put(to_i915(dev));
895 	}
896 
897 out:
898 	drm_gem_object_unreference(&obj->base);
899 unlock:
900 	mutex_unlock(&dev->struct_mutex);
901 	return ret;
902 }
903 
904 /* This is the fast write path which cannot handle
905  * page faults in the source data
906  */
907 
908 static inline int
909 fast_user_write(struct io_mapping *mapping,
910 		loff_t page_base, int page_offset,
911 		char __user *user_data,
912 		int length)
913 {
914 	void __iomem *vaddr_atomic;
915 	void *vaddr;
916 	unsigned long unwritten;
917 
918 	vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
919 	/* We can use the cpu mem copy function because this is X86. */
920 	vaddr = (void __force*)vaddr_atomic + page_offset;
921 	unwritten = __copy_from_user_inatomic_nocache(vaddr,
922 						      user_data, length);
923 	io_mapping_unmap_atomic(vaddr_atomic);
924 	return unwritten;
925 }
926 
927 /**
928  * This is the fast pwrite path, where we copy the data directly from the
929  * user into the GTT, uncached.
930  * @dev: drm device pointer
931  * @obj: i915 gem object
932  * @args: pwrite arguments structure
933  * @file: drm file pointer
934  */
935 static int
936 i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
937 			 struct drm_i915_gem_object *obj,
938 			 struct drm_i915_gem_pwrite *args,
939 			 struct drm_file *file)
940 {
941 	struct i915_ggtt *ggtt = &i915->ggtt;
942 	struct drm_device *dev = obj->base.dev;
943 	struct drm_mm_node node;
944 	uint64_t remain, offset;
945 	char __user *user_data;
946 	int ret;
947 	bool hit_slow_path = false;
948 
949 	if (obj->tiling_mode != I915_TILING_NONE)
950 		return -EFAULT;
951 
952 	ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK);
953 	if (ret) {
954 		ret = insert_mappable_node(i915, &node, PAGE_SIZE);
955 		if (ret)
956 			goto out;
957 
958 		ret = i915_gem_object_get_pages(obj);
959 		if (ret) {
960 			remove_mappable_node(&node);
961 			goto out;
962 		}
963 
964 		i915_gem_object_pin_pages(obj);
965 	} else {
966 		node.start = i915_gem_obj_ggtt_offset(obj);
967 		node.allocated = false;
968 		ret = i915_gem_object_put_fence(obj);
969 		if (ret)
970 			goto out_unpin;
971 	}
972 
973 	ret = i915_gem_object_set_to_gtt_domain(obj, true);
974 	if (ret)
975 		goto out_unpin;
976 
977 	intel_fb_obj_invalidate(obj, ORIGIN_GTT);
978 	obj->dirty = true;
979 
980 	user_data = u64_to_user_ptr(args->data_ptr);
981 	offset = args->offset;
982 	remain = args->size;
983 	while (remain) {
984 		/* Operation in this page
985 		 *
986 		 * page_base = page offset within aperture
987 		 * page_offset = offset within page
988 		 * page_length = bytes to copy for this page
989 		 */
990 		u32 page_base = node.start;
991 		unsigned page_offset = offset_in_page(offset);
992 		unsigned page_length = PAGE_SIZE - page_offset;
993 		page_length = remain < page_length ? remain : page_length;
994 		if (node.allocated) {
995 			wmb(); /* flush the write before we modify the GGTT */
996 			ggtt->base.insert_page(&ggtt->base,
997 					       i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
998 					       node.start, I915_CACHE_NONE, 0);
999 			wmb(); /* flush modifications to the GGTT (insert_page) */
1000 		} else {
1001 			page_base += offset & LINUX_PAGE_MASK;
1002 		}
1003 		/* If we get a fault while copying data, then (presumably) our
1004 		 * source page isn't available.  Return the error and we'll
1005 		 * retry in the slow path.
1006 		 * If the object is non-shmem backed, we retry again with the
1007 		 * path that handles page fault.
1008 		 */
1009 		if (fast_user_write(ggtt->mappable, page_base,
1010 				    page_offset, user_data, page_length)) {
1011 			hit_slow_path = true;
1012 			mutex_unlock(&dev->struct_mutex);
1013 			if (slow_user_access(ggtt->mappable,
1014 					     page_base,
1015 					     page_offset, user_data,
1016 					     page_length, true)) {
1017 				ret = -EFAULT;
1018 				mutex_lock(&dev->struct_mutex);
1019 				goto out_flush;
1020 			}
1021 
1022 			mutex_lock(&dev->struct_mutex);
1023 		}
1024 
1025 		remain -= page_length;
1026 		user_data += page_length;
1027 		offset += page_length;
1028 	}
1029 
1030 out_flush:
1031 	if (hit_slow_path) {
1032 		if (ret == 0 &&
1033 		    (obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) {
1034 			/* The user has modified the object whilst we tried
1035 			 * reading from it, and we now have no idea what domain
1036 			 * the pages should be in. As we have just been touching
1037 			 * them directly, flush everything back to the GTT
1038 			 * domain.
1039 			 */
1040 			ret = i915_gem_object_set_to_gtt_domain(obj, false);
1041 		}
1042 	}
1043 
1044 	intel_fb_obj_flush(obj, false, ORIGIN_GTT);
1045 out_unpin:
1046 	if (node.allocated) {
1047 		wmb();
1048 		ggtt->base.clear_range(&ggtt->base,
1049 				       node.start, node.size,
1050 				       true);
1051 		i915_gem_object_unpin_pages(obj);
1052 		remove_mappable_node(&node);
1053 	} else {
1054 		i915_gem_object_ggtt_unpin(obj);
1055 	}
1056 out:
1057 	return ret;
1058 }
1059 
1060 /* Per-page copy function for the shmem pwrite fastpath.
1061  * Flushes invalid cachelines before writing to the target if
1062  * needs_clflush_before is set and flushes out any written cachelines after
1063  * writing if needs_clflush is set. */
1064 static int
1065 shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length,
1066 		  char __user *user_data,
1067 		  bool page_do_bit17_swizzling,
1068 		  bool needs_clflush_before,
1069 		  bool needs_clflush_after)
1070 {
1071 	char *vaddr;
1072 	int ret;
1073 
1074 	if (unlikely(page_do_bit17_swizzling))
1075 		return -EINVAL;
1076 
1077 	vaddr = kmap_atomic(page);
1078 	if (needs_clflush_before)
1079 		drm_clflush_virt_range(vaddr + shmem_page_offset,
1080 				       page_length);
1081 	ret = __copy_from_user_inatomic(vaddr + shmem_page_offset,
1082 					user_data, page_length);
1083 	if (needs_clflush_after)
1084 		drm_clflush_virt_range(vaddr + shmem_page_offset,
1085 				       page_length);
1086 	kunmap_atomic(vaddr);
1087 
1088 	return ret ? -EFAULT : 0;
1089 }
1090 
1091 /* Only difference to the fast-path function is that this can handle bit17
1092  * and uses non-atomic copy and kmap functions. */
1093 static int
1094 shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length,
1095 		  char __user *user_data,
1096 		  bool page_do_bit17_swizzling,
1097 		  bool needs_clflush_before,
1098 		  bool needs_clflush_after)
1099 {
1100 	char *vaddr;
1101 	int ret;
1102 
1103 	vaddr = kmap(page);
1104 	if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
1105 		shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
1106 					     page_length,
1107 					     page_do_bit17_swizzling);
1108 	if (page_do_bit17_swizzling)
1109 		ret = __copy_from_user_swizzled(vaddr, shmem_page_offset,
1110 						user_data,
1111 						page_length);
1112 	else
1113 		ret = __copy_from_user(vaddr + shmem_page_offset,
1114 				       user_data,
1115 				       page_length);
1116 	if (needs_clflush_after)
1117 		shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
1118 					     page_length,
1119 					     page_do_bit17_swizzling);
1120 	kunmap(page);
1121 
1122 	return ret ? -EFAULT : 0;
1123 }
1124 
1125 static int
1126 i915_gem_shmem_pwrite(struct drm_device *dev,
1127 		      struct drm_i915_gem_object *obj,
1128 		      struct drm_i915_gem_pwrite *args,
1129 		      struct drm_file *file)
1130 {
1131 	ssize_t remain;
1132 	loff_t offset;
1133 	char __user *user_data;
1134 	int shmem_page_offset, page_length, ret = 0;
1135 	int obj_do_bit17_swizzling, page_do_bit17_swizzling;
1136 	int hit_slowpath = 0;
1137 	int needs_clflush_after = 0;
1138 	int needs_clflush_before = 0;
1139 	struct sg_page_iter sg_iter;
1140 
1141 	user_data = u64_to_user_ptr(args->data_ptr);
1142 	remain = args->size;
1143 
1144 	obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
1145 
1146 	if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
1147 		/* If we're not in the cpu write domain, set ourself into the gtt
1148 		 * write domain and manually flush cachelines (if required). This
1149 		 * optimizes for the case when the gpu will use the data
1150 		 * right away and we therefore have to clflush anyway. */
1151 		needs_clflush_after = cpu_write_needs_clflush(obj);
1152 		ret = i915_gem_object_wait_rendering(obj, false);
1153 		if (ret)
1154 			return ret;
1155 	}
1156 	/* Same trick applies to invalidate partially written cachelines read
1157 	 * before writing. */
1158 	if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0)
1159 		needs_clflush_before =
1160 			!cpu_cache_is_coherent(dev, obj->cache_level);
1161 
1162 	ret = i915_gem_object_get_pages(obj);
1163 	if (ret)
1164 		return ret;
1165 
1166 	intel_fb_obj_invalidate(obj, ORIGIN_CPU);
1167 
1168 	i915_gem_object_pin_pages(obj);
1169 
1170 	offset = args->offset;
1171 	obj->dirty = 1;
1172 
1173 	VM_OBJECT_LOCK(obj->base.filp);
1174 	vm_object_pip_add(obj->base.filp, 1);
1175 
1176 	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
1177 			 offset >> PAGE_SHIFT) {
1178 		struct page *page = sg_page_iter_page(&sg_iter);
1179 		int partial_cacheline_write;
1180 
1181 		if (remain <= 0)
1182 			break;
1183 
1184 		/* Operation in this page
1185 		 *
1186 		 * shmem_page_offset = offset within page in shmem file
1187 		 * page_length = bytes to copy for this page
1188 		 */
1189 		shmem_page_offset = offset_in_page(offset);
1190 
1191 		page_length = remain;
1192 		if ((shmem_page_offset + page_length) > PAGE_SIZE)
1193 			page_length = PAGE_SIZE - shmem_page_offset;
1194 
1195 		/* If we don't overwrite a cacheline completely we need to be
1196 		 * careful to have up-to-date data by first clflushing. Don't
1197 		 * overcomplicate things and flush the entire patch. */
1198 		partial_cacheline_write = needs_clflush_before &&
1199 			((shmem_page_offset | page_length)
1200 				& (cpu_clflush_line_size - 1));
1201 
1202 		page_do_bit17_swizzling = obj_do_bit17_swizzling &&
1203 			(page_to_phys(page) & (1 << 17)) != 0;
1204 
1205 		ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
1206 					user_data, page_do_bit17_swizzling,
1207 					partial_cacheline_write,
1208 					needs_clflush_after);
1209 		if (ret == 0)
1210 			goto next_page;
1211 
1212 		hit_slowpath = 1;
1213 		mutex_unlock(&dev->struct_mutex);
1214 		ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
1215 					user_data, page_do_bit17_swizzling,
1216 					partial_cacheline_write,
1217 					needs_clflush_after);
1218 
1219 		mutex_lock(&dev->struct_mutex);
1220 
1221 		if (ret)
1222 			goto out;
1223 
1224 next_page:
1225 		remain -= page_length;
1226 		user_data += page_length;
1227 		offset += page_length;
1228 	}
1229 	vm_object_pip_wakeup(obj->base.filp);
1230 	VM_OBJECT_UNLOCK(obj->base.filp);
1231 
1232 out:
1233 	i915_gem_object_unpin_pages(obj);
1234 
1235 	if (hit_slowpath) {
1236 		/*
1237 		 * Fixup: Flush cpu caches in case we didn't flush the dirty
1238 		 * cachelines in-line while writing and the object moved
1239 		 * out of the cpu write domain while we've dropped the lock.
1240 		 */
1241 		if (!needs_clflush_after &&
1242 		    obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
1243 			if (i915_gem_clflush_object(obj, obj->pin_display))
1244 				needs_clflush_after = true;
1245 		}
1246 	}
1247 
1248 	if (needs_clflush_after)
1249 		i915_gem_chipset_flush(to_i915(dev));
1250 	else
1251 		obj->cache_dirty = true;
1252 
1253 	intel_fb_obj_flush(obj, false, ORIGIN_CPU);
1254 	return ret;
1255 }
1256 
1257 /**
1258  * Writes data to the object referenced by handle.
1259  * @dev: drm device
1260  * @data: ioctl data blob
1261  * @file: drm file
1262  *
1263  * On error, the contents of the buffer that were to be modified are undefined.
1264  */
1265 int
1266 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
1267 		      struct drm_file *file)
1268 {
1269 	struct drm_i915_private *dev_priv = to_i915(dev);
1270 	struct drm_i915_gem_pwrite *args = data;
1271 	struct drm_i915_gem_object *obj;
1272 	int ret;
1273 
1274 	if (args->size == 0)
1275 		return 0;
1276 
1277 #if 0
1278 	if (!access_ok(VERIFY_READ,
1279 		       u64_to_user_ptr(args->data_ptr),
1280 		       args->size))
1281 		return -EFAULT;
1282 #endif
1283 
1284 	if (likely(!i915.prefault_disable)) {
1285 		ret = fault_in_multipages_readable(u64_to_user_ptr(args->data_ptr),
1286 						   args->size);
1287 		if (ret)
1288 			return -EFAULT;
1289 	}
1290 
1291 	intel_runtime_pm_get(dev_priv);
1292 
1293 	ret = i915_mutex_lock_interruptible(dev);
1294 	if (ret)
1295 		goto put_rpm;
1296 
1297 	obj = to_intel_bo(drm_gem_object_lookup(file, args->handle));
1298 	if (&obj->base == NULL) {
1299 		ret = -ENOENT;
1300 		goto unlock;
1301 	}
1302 
1303 	/* Bounds check destination. */
1304 	if (args->offset > obj->base.size ||
1305 	    args->size > obj->base.size - args->offset) {
1306 		ret = -EINVAL;
1307 		goto out;
1308 	}
1309 
1310 	trace_i915_gem_object_pwrite(obj, args->offset, args->size);
1311 
1312 	ret = -EFAULT;
1313 	/* We can only do the GTT pwrite on untiled buffers, as otherwise
1314 	 * it would end up going through the fenced access, and we'll get
1315 	 * different detiling behavior between reading and writing.
1316 	 * pread/pwrite currently are reading and writing from the CPU
1317 	 * perspective, requiring manual detiling by the client.
1318 	 */
1319 	if (!i915_gem_object_has_struct_page(obj) ||
1320 	    cpu_write_needs_clflush(obj)) {
1321 		ret = i915_gem_gtt_pwrite_fast(dev_priv, obj, args, file);
1322 		/* Note that the gtt paths might fail with non-page-backed user
1323 		 * pointers (e.g. gtt mappings when moving data between
1324 		 * textures). Fallback to the shmem path in that case. */
1325 	}
1326 
1327 	if (ret == -EFAULT || ret == -ENOSPC) {
1328 		if (obj->phys_handle)
1329 			ret = i915_gem_phys_pwrite(obj, args, file);
1330 		else if (i915_gem_object_has_struct_page(obj))
1331 			ret = i915_gem_shmem_pwrite(dev, obj, args, file);
1332 		else
1333 			ret = -ENODEV;
1334 	}
1335 
1336 out:
1337 	drm_gem_object_unreference(&obj->base);
1338 unlock:
1339 	mutex_unlock(&dev->struct_mutex);
1340 put_rpm:
1341 	intel_runtime_pm_put(dev_priv);
1342 
1343 	return ret;
1344 }
1345 
1346 static int
1347 i915_gem_check_wedge(unsigned reset_counter, bool interruptible)
1348 {
1349 	if (__i915_terminally_wedged(reset_counter))
1350 		return -EIO;
1351 
1352 	if (__i915_reset_in_progress(reset_counter)) {
1353 		/* Non-interruptible callers can't handle -EAGAIN, hence return
1354 		 * -EIO unconditionally for these. */
1355 		if (!interruptible)
1356 			return -EIO;
1357 
1358 		return -EAGAIN;
1359 	}
1360 
1361 	return 0;
1362 }
1363 
1364 static unsigned long local_clock_us(unsigned *cpu)
1365 {
1366 	unsigned long t;
1367 
1368 	/* Cheaply and approximately convert from nanoseconds to microseconds.
1369 	 * The result and subsequent calculations are also defined in the same
1370 	 * approximate microseconds units. The principal source of timing
1371 	 * error here is from the simple truncation.
1372 	 *
1373 	 * Note that local_clock() is only defined wrt to the current CPU;
1374 	 * the comparisons are no longer valid if we switch CPUs. Instead of
1375 	 * blocking preemption for the entire busywait, we can detect the CPU
1376 	 * switch and use that as indicator of system load and a reason to
1377 	 * stop busywaiting, see busywait_stop().
1378 	 */
1379 	*cpu = get_cpu();
1380 	t = local_clock() >> 10;
1381 	put_cpu();
1382 
1383 	return t;
1384 }
1385 
1386 static bool busywait_stop(unsigned long timeout, unsigned cpu)
1387 {
1388 	unsigned this_cpu;
1389 
1390 	if (time_after(local_clock_us(&this_cpu), timeout))
1391 		return true;
1392 
1393 	return this_cpu != cpu;
1394 }
1395 
1396 bool __i915_spin_request(const struct drm_i915_gem_request *req,
1397 			 int state, unsigned long timeout_us)
1398 {
1399 	unsigned cpu;
1400 
1401 	/* When waiting for high frequency requests, e.g. during synchronous
1402 	 * rendering split between the CPU and GPU, the finite amount of time
1403 	 * required to set up the irq and wait upon it limits the response
1404 	 * rate. By busywaiting on the request completion for a short while we
1405 	 * can service the high frequency waits as quick as possible. However,
1406 	 * if it is a slow request, we want to sleep as quickly as possible.
1407 	 * The tradeoff between waiting and sleeping is roughly the time it
1408 	 * takes to sleep on a request, on the order of a microsecond.
1409 	 */
1410 
1411 	timeout_us += local_clock_us(&cpu);
1412 	do {
1413 		if (i915_gem_request_completed(req))
1414 			return true;
1415 
1416 		if (signal_pending_state(state, current))
1417 			break;
1418 
1419 		if (busywait_stop(timeout_us, cpu))
1420 			break;
1421 
1422 		cpu_relax();
1423 	} while (!need_resched());
1424 
1425 	return false;
1426 }
1427 
1428 /**
1429  * __i915_wait_request - wait until execution of request has finished
1430  * @req: duh!
1431  * @interruptible: do an interruptible wait (normally yes)
1432  * @timeout: in - how long to wait (NULL forever); out - how much time remaining
1433  * @rps: RPS client
1434  *
1435  * Note: It is of utmost importance that the passed in seqno and reset_counter
1436  * values have been read by the caller in an smp safe manner. Where read-side
1437  * locks are involved, it is sufficient to read the reset_counter before
1438  * unlocking the lock that protects the seqno. For lockless tricks, the
1439  * reset_counter _must_ be read before, and an appropriate smp_rmb must be
1440  * inserted.
1441  *
1442  * Returns 0 if the request was found within the alloted time. Else returns the
1443  * errno with remaining time filled in timeout argument.
1444  */
1445 int __i915_wait_request(struct drm_i915_gem_request *req,
1446 			bool interruptible,
1447 			s64 *timeout,
1448 			struct intel_rps_client *rps)
1449 {
1450 	int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
1451 	DEFINE_WAIT(reset);
1452 	struct intel_wait wait;
1453 	unsigned long timeout_remain;
1454 	s64 before = 0; /* Only to silence a compiler warning. */
1455 	int ret = 0;
1456 
1457 	might_sleep();
1458 
1459 	if (list_empty(&req->list))
1460 		return 0;
1461 
1462 	if (i915_gem_request_completed(req))
1463 		return 0;
1464 
1465 	timeout_remain = MAX_SCHEDULE_TIMEOUT;
1466 	if (timeout) {
1467 		if (WARN_ON(*timeout < 0))
1468 			return -EINVAL;
1469 
1470 		if (*timeout == 0)
1471 			return -ETIME;
1472 
1473 		timeout_remain = nsecs_to_jiffies_timeout(*timeout);
1474 
1475 		/*
1476 		 * Record current time in case interrupted by signal, or wedged.
1477 		 */
1478 		before = ktime_get_raw_ns();
1479 	}
1480 
1481 	trace_i915_gem_request_wait_begin(req);
1482 
1483 	/* This client is about to stall waiting for the GPU. In many cases
1484 	 * this is undesirable and limits the throughput of the system, as
1485 	 * many clients cannot continue processing user input/output whilst
1486 	 * blocked. RPS autotuning may take tens of milliseconds to respond
1487 	 * to the GPU load and thus incurs additional latency for the client.
1488 	 * We can circumvent that by promoting the GPU frequency to maximum
1489 	 * before we wait. This makes the GPU throttle up much more quickly
1490 	 * (good for benchmarks and user experience, e.g. window animations),
1491 	 * but at a cost of spending more power processing the workload
1492 	 * (bad for battery). Not all clients even want their results
1493 	 * immediately and for them we should just let the GPU select its own
1494 	 * frequency to maximise efficiency. To prevent a single client from
1495 	 * forcing the clocks too high for the whole system, we only allow
1496 	 * each client to waitboost once in a busy period.
1497 	 */
1498 	if (INTEL_INFO(req->i915)->gen >= 6)
1499 		gen6_rps_boost(req->i915, rps, req->emitted_jiffies);
1500 
1501 	/* Optimistic spin for the next ~jiffie before touching IRQs */
1502 	if (i915_spin_request(req, state, 5))
1503 		goto complete;
1504 
1505 	set_current_state(state);
1506 	add_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
1507 
1508 	intel_wait_init(&wait, req->seqno);
1509 	if (intel_engine_add_wait(req->engine, &wait))
1510 		/* In order to check that we haven't missed the interrupt
1511 		 * as we enabled it, we need to kick ourselves to do a
1512 		 * coherent check on the seqno before we sleep.
1513 		 */
1514 		goto wakeup;
1515 
1516 	for (;;) {
1517 		if (signal_pending_state(state, current)) {
1518 			ret = -ERESTARTSYS;
1519 			break;
1520 		}
1521 
1522 		timeout_remain = io_schedule_timeout(timeout_remain);
1523 		if (timeout_remain == 0) {
1524 			ret = -ETIME;
1525 			break;
1526 		}
1527 
1528 		if (intel_wait_complete(&wait))
1529 			break;
1530 
1531 		set_current_state(state);
1532 
1533 wakeup:
1534 		/* Carefully check if the request is complete, giving time
1535 		 * for the seqno to be visible following the interrupt.
1536 		 * We also have to check in case we are kicked by the GPU
1537 		 * reset in order to drop the struct_mutex.
1538 		 */
1539 		if (__i915_request_irq_complete(req))
1540 			break;
1541 
1542 		/* Only spin if we know the GPU is processing this request */
1543 		if (i915_spin_request(req, state, 2))
1544 			break;
1545 	}
1546 	remove_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
1547 
1548 	intel_engine_remove_wait(req->engine, &wait);
1549 	__set_current_state(TASK_RUNNING);
1550 complete:
1551 	trace_i915_gem_request_wait_end(req);
1552 
1553 	if (timeout) {
1554 		s64 tres = *timeout - (ktime_get_raw_ns() - before);
1555 
1556 		*timeout = tres < 0 ? 0 : tres;
1557 
1558 		/*
1559 		 * Apparently ktime isn't accurate enough and occasionally has a
1560 		 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch
1561 		 * things up to make the test happy. We allow up to 1 jiffy.
1562 		 *
1563 		 * This is a regrssion from the timespec->ktime conversion.
1564 		 */
1565 		if (ret == -ETIME && *timeout < jiffies_to_usecs(1)*1000)
1566 			*timeout = 0;
1567 	}
1568 
1569 	if (rps && req->seqno == req->engine->last_submitted_seqno) {
1570 		/* The GPU is now idle and this client has stalled.
1571 		 * Since no other client has submitted a request in the
1572 		 * meantime, assume that this client is the only one
1573 		 * supplying work to the GPU but is unable to keep that
1574 		 * work supplied because it is waiting. Since the GPU is
1575 		 * then never kept fully busy, RPS autoclocking will
1576 		 * keep the clocks relatively low, causing further delays.
1577 		 * Compensate by giving the synchronous client credit for
1578 		 * a waitboost next time.
1579 		 */
1580 		lockmgr(&req->i915->rps.client_lock, LK_EXCLUSIVE);
1581 		list_del_init(&rps->link);
1582 		lockmgr(&req->i915->rps.client_lock, LK_RELEASE);
1583 	}
1584 
1585 	return ret;
1586 }
1587 
1588 int i915_gem_request_add_to_client(struct drm_i915_gem_request *req,
1589 				   struct drm_file *file)
1590 {
1591 	struct drm_i915_file_private *file_priv;
1592 
1593 	WARN_ON(!req || !file || req->file_priv);
1594 
1595 	if (!req || !file)
1596 		return -EINVAL;
1597 
1598 	if (req->file_priv)
1599 		return -EINVAL;
1600 
1601 	file_priv = file->driver_priv;
1602 
1603 	spin_lock(&file_priv->mm.lock);
1604 	req->file_priv = file_priv;
1605 	list_add_tail(&req->client_list, &file_priv->mm.request_list);
1606 	spin_unlock(&file_priv->mm.lock);
1607 
1608 	req->pid = curproc->p_pid;
1609 
1610 	return 0;
1611 }
1612 
1613 static inline void
1614 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
1615 {
1616 	struct drm_i915_file_private *file_priv = request->file_priv;
1617 
1618 	if (!file_priv)
1619 		return;
1620 
1621 	spin_lock(&file_priv->mm.lock);
1622 	list_del(&request->client_list);
1623 	request->file_priv = NULL;
1624 	spin_unlock(&file_priv->mm.lock);
1625 
1626 #if 0
1627 	put_pid(request->pid);
1628 	request->pid = NULL;
1629 #endif
1630 }
1631 
1632 static void i915_gem_request_retire(struct drm_i915_gem_request *request)
1633 {
1634 	trace_i915_gem_request_retire(request);
1635 
1636 	/* We know the GPU must have read the request to have
1637 	 * sent us the seqno + interrupt, so use the position
1638 	 * of tail of the request to update the last known position
1639 	 * of the GPU head.
1640 	 *
1641 	 * Note this requires that we are always called in request
1642 	 * completion order.
1643 	 */
1644 	request->ringbuf->last_retired_head = request->postfix;
1645 
1646 	list_del_init(&request->list);
1647 	i915_gem_request_remove_from_client(request);
1648 
1649 	if (request->previous_context) {
1650 		if (i915.enable_execlists)
1651 			intel_lr_context_unpin(request->previous_context,
1652 					       request->engine);
1653 	}
1654 
1655 	i915_gem_context_unreference(request->ctx);
1656 	i915_gem_request_unreference(request);
1657 }
1658 
1659 static void
1660 __i915_gem_request_retire__upto(struct drm_i915_gem_request *req)
1661 {
1662 	struct intel_engine_cs *engine = req->engine;
1663 	struct drm_i915_gem_request *tmp;
1664 
1665 	lockdep_assert_held(&engine->i915->drm.struct_mutex);
1666 
1667 	if (list_empty(&req->list))
1668 		return;
1669 
1670 	do {
1671 		tmp = list_first_entry(&engine->request_list,
1672 				       typeof(*tmp), list);
1673 
1674 		i915_gem_request_retire(tmp);
1675 	} while (tmp != req);
1676 
1677 	WARN_ON(i915_verify_lists(engine->dev));
1678 }
1679 
1680 /**
1681  * Waits for a request to be signaled, and cleans up the
1682  * request and object lists appropriately for that event.
1683  * @req: request to wait on
1684  */
1685 int
1686 i915_wait_request(struct drm_i915_gem_request *req)
1687 {
1688 	struct drm_i915_private *dev_priv = req->i915;
1689 	bool interruptible;
1690 	int ret;
1691 
1692 	interruptible = dev_priv->mm.interruptible;
1693 
1694 	BUG_ON(!mutex_is_locked(&dev_priv->drm.struct_mutex));
1695 
1696 	ret = __i915_wait_request(req, interruptible, NULL, NULL);
1697 	if (ret)
1698 		return ret;
1699 
1700 	/* If the GPU hung, we want to keep the requests to find the guilty. */
1701 	if (!i915_reset_in_progress(&dev_priv->gpu_error))
1702 		__i915_gem_request_retire__upto(req);
1703 
1704 	return 0;
1705 }
1706 
1707 /**
1708  * Ensures that all rendering to the object has completed and the object is
1709  * safe to unbind from the GTT or access from the CPU.
1710  * @obj: i915 gem object
1711  * @readonly: waiting for read access or write
1712  */
1713 int
1714 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
1715 			       bool readonly)
1716 {
1717 	int ret, i;
1718 
1719 	if (!obj->active)
1720 		return 0;
1721 
1722 	if (readonly) {
1723 		if (obj->last_write_req != NULL) {
1724 			ret = i915_wait_request(obj->last_write_req);
1725 			if (ret)
1726 				return ret;
1727 
1728 			i = obj->last_write_req->engine->id;
1729 			if (obj->last_read_req[i] == obj->last_write_req)
1730 				i915_gem_object_retire__read(obj, i);
1731 			else
1732 				i915_gem_object_retire__write(obj);
1733 		}
1734 	} else {
1735 		for (i = 0; i < I915_NUM_ENGINES; i++) {
1736 			if (obj->last_read_req[i] == NULL)
1737 				continue;
1738 
1739 			ret = i915_wait_request(obj->last_read_req[i]);
1740 			if (ret)
1741 				return ret;
1742 
1743 			i915_gem_object_retire__read(obj, i);
1744 		}
1745 		GEM_BUG_ON(obj->active);
1746 	}
1747 
1748 	return 0;
1749 }
1750 
1751 static void
1752 i915_gem_object_retire_request(struct drm_i915_gem_object *obj,
1753 			       struct drm_i915_gem_request *req)
1754 {
1755 	int ring = req->engine->id;
1756 
1757 	if (obj->last_read_req[ring] == req)
1758 		i915_gem_object_retire__read(obj, ring);
1759 	else if (obj->last_write_req == req)
1760 		i915_gem_object_retire__write(obj);
1761 
1762 	if (!i915_reset_in_progress(&req->i915->gpu_error))
1763 		__i915_gem_request_retire__upto(req);
1764 }
1765 
1766 /* A nonblocking variant of the above wait. This is a highly dangerous routine
1767  * as the object state may change during this call.
1768  */
1769 static __must_check int
1770 i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
1771 					    struct intel_rps_client *rps,
1772 					    bool readonly)
1773 {
1774 	struct drm_device *dev = obj->base.dev;
1775 	struct drm_i915_private *dev_priv = to_i915(dev);
1776 	struct drm_i915_gem_request *requests[I915_NUM_ENGINES];
1777 	int ret, i, n = 0;
1778 
1779 	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
1780 	BUG_ON(!dev_priv->mm.interruptible);
1781 
1782 	if (!obj->active)
1783 		return 0;
1784 
1785 	if (readonly) {
1786 		struct drm_i915_gem_request *req;
1787 
1788 		req = obj->last_write_req;
1789 		if (req == NULL)
1790 			return 0;
1791 
1792 		requests[n++] = i915_gem_request_reference(req);
1793 	} else {
1794 		for (i = 0; i < I915_NUM_ENGINES; i++) {
1795 			struct drm_i915_gem_request *req;
1796 
1797 			req = obj->last_read_req[i];
1798 			if (req == NULL)
1799 				continue;
1800 
1801 			requests[n++] = i915_gem_request_reference(req);
1802 		}
1803 	}
1804 
1805 	mutex_unlock(&dev->struct_mutex);
1806 	ret = 0;
1807 	for (i = 0; ret == 0 && i < n; i++)
1808 		ret = __i915_wait_request(requests[i], true, NULL, rps);
1809 	mutex_lock(&dev->struct_mutex);
1810 
1811 	for (i = 0; i < n; i++) {
1812 		if (ret == 0)
1813 			i915_gem_object_retire_request(obj, requests[i]);
1814 		i915_gem_request_unreference(requests[i]);
1815 	}
1816 
1817 	return ret;
1818 }
1819 
1820 static struct intel_rps_client *to_rps_client(struct drm_file *file)
1821 {
1822 	struct drm_i915_file_private *fpriv = file->driver_priv;
1823 	return &fpriv->rps;
1824 }
1825 
1826 static enum fb_op_origin
1827 write_origin(struct drm_i915_gem_object *obj, unsigned domain)
1828 {
1829 	return domain == I915_GEM_DOMAIN_GTT && !obj->has_wc_mmap ?
1830 	       ORIGIN_GTT : ORIGIN_CPU;
1831 }
1832 
1833 /**
1834  * Called when user space prepares to use an object with the CPU, either
1835  * through the mmap ioctl's mapping or a GTT mapping.
1836  * @dev: drm device
1837  * @data: ioctl data blob
1838  * @file: drm file
1839  */
1840 int
1841 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
1842 			  struct drm_file *file)
1843 {
1844 	struct drm_i915_gem_set_domain *args = data;
1845 	struct drm_i915_gem_object *obj;
1846 	uint32_t read_domains = args->read_domains;
1847 	uint32_t write_domain = args->write_domain;
1848 	int ret;
1849 
1850 	/* Only handle setting domains to types used by the CPU. */
1851 	if (write_domain & I915_GEM_GPU_DOMAINS)
1852 		return -EINVAL;
1853 
1854 	if (read_domains & I915_GEM_GPU_DOMAINS)
1855 		return -EINVAL;
1856 
1857 	/* Having something in the write domain implies it's in the read
1858 	 * domain, and only that read domain.  Enforce that in the request.
1859 	 */
1860 	if (write_domain != 0 && read_domains != write_domain)
1861 		return -EINVAL;
1862 
1863 	ret = i915_mutex_lock_interruptible(dev);
1864 	if (ret)
1865 		return ret;
1866 
1867 	obj = to_intel_bo(drm_gem_object_lookup(file, args->handle));
1868 	if (&obj->base == NULL) {
1869 		ret = -ENOENT;
1870 		goto unlock;
1871 	}
1872 
1873 	/* Try to flush the object off the GPU without holding the lock.
1874 	 * We will repeat the flush holding the lock in the normal manner
1875 	 * to catch cases where we are gazumped.
1876 	 */
1877 	ret = i915_gem_object_wait_rendering__nonblocking(obj,
1878 							  to_rps_client(file),
1879 							  !write_domain);
1880 	if (ret)
1881 		goto unref;
1882 
1883 	if (read_domains & I915_GEM_DOMAIN_GTT)
1884 		ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
1885 	else
1886 		ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
1887 
1888 	if (write_domain != 0)
1889 		intel_fb_obj_invalidate(obj, write_origin(obj, write_domain));
1890 
1891 unref:
1892 	drm_gem_object_unreference(&obj->base);
1893 unlock:
1894 	mutex_unlock(&dev->struct_mutex);
1895 	return ret;
1896 }
1897 
1898 /**
1899  * Called when user space has done writes to this buffer
1900  * @dev: drm device
1901  * @data: ioctl data blob
1902  * @file: drm file
1903  */
1904 int
1905 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
1906 			 struct drm_file *file)
1907 {
1908 	struct drm_i915_gem_sw_finish *args = data;
1909 	struct drm_i915_gem_object *obj;
1910 	int ret = 0;
1911 
1912 	ret = i915_mutex_lock_interruptible(dev);
1913 	if (ret)
1914 		return ret;
1915 
1916 	obj = to_intel_bo(drm_gem_object_lookup(file, args->handle));
1917 	if (&obj->base == NULL) {
1918 		ret = -ENOENT;
1919 		goto unlock;
1920 	}
1921 
1922 	/* Pinned buffers may be scanout, so flush the cache */
1923 	if (obj->pin_display)
1924 		i915_gem_object_flush_cpu_write_domain(obj);
1925 
1926 	drm_gem_object_unreference(&obj->base);
1927 unlock:
1928 	mutex_unlock(&dev->struct_mutex);
1929 	return ret;
1930 }
1931 
1932 /**
1933  * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address
1934  *			 it is mapped to.
1935  * @dev: drm device
1936  * @data: ioctl data blob
1937  * @file: drm file
1938  *
1939  * While the mapping holds a reference on the contents of the object, it doesn't
1940  * imply a ref on the object itself.
1941  *
1942  * IMPORTANT:
1943  *
1944  * DRM driver writers who look a this function as an example for how to do GEM
1945  * mmap support, please don't implement mmap support like here. The modern way
1946  * to implement DRM mmap support is with an mmap offset ioctl (like
1947  * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly.
1948  * That way debug tooling like valgrind will understand what's going on, hiding
1949  * the mmap call in a driver private ioctl will break that. The i915 driver only
1950  * does cpu mmaps this way because we didn't know better.
1951  */
1952 int
1953 i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1954 		    struct drm_file *file)
1955 {
1956 	struct drm_i915_gem_mmap *args = data;
1957 	struct drm_gem_object *obj;
1958 	unsigned long addr;
1959 
1960 	struct proc *p = curproc;
1961 	vm_map_t map = &p->p_vmspace->vm_map;
1962 	vm_size_t size;
1963 	int error = 0, rv;
1964 
1965 	if (args->flags & ~(I915_MMAP_WC))
1966 		return -EINVAL;
1967 
1968 #if 0
1969 	if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT))
1970 		return -ENODEV;
1971 #endif
1972 
1973 	obj = drm_gem_object_lookup(file, args->handle);
1974 	if (obj == NULL)
1975 		return -ENOENT;
1976 
1977 	if (args->size == 0)
1978 		goto out;
1979 
1980 	size = round_page(args->size);
1981 	if (map->size + size > p->p_rlimit[RLIMIT_VMEM].rlim_cur) {
1982 		error = -ENOMEM;
1983 		goto out;
1984 	}
1985 
1986 	/* prime objects have no backing filp to GEM mmap
1987 	 * pages from.
1988 	 */
1989 	if (!obj->filp) {
1990 		drm_gem_object_unreference_unlocked(obj);
1991 		return -EINVAL;
1992 	}
1993 
1994 	/*
1995 	 * Call hint to ensure that NULL is not returned as a valid address
1996 	 * and to reduce vm_map traversals. XXX causes instability, use a
1997 	 * fixed low address as the start point instead to avoid the NULL
1998 	 * return issue.
1999 	 */
2000 	addr = PAGE_SIZE;
2001 
2002 	/*
2003 	 * Use 256KB alignment.  It is unclear why this matters for a
2004 	 * virtual address but it appears to fix a number of application/X
2005 	 * crashes and kms console switching is much faster.
2006 	 */
2007 	vm_object_hold(obj->filp);
2008 	vm_object_reference_locked(obj->filp);
2009 	vm_object_drop(obj->filp);
2010 
2011 /* Something gets wrong here: fails to mmap 4096 */
2012 	rv = vm_map_find(map, obj->filp, NULL,
2013 			 args->offset, &addr, args->size,
2014 			 256 * 1024, /* align */
2015 			 TRUE, /* fitit */
2016 			 VM_MAPTYPE_NORMAL, VM_SUBSYS_DRM_GEM,
2017 			 VM_PROT_READ | VM_PROT_WRITE, /* prot */
2018 			 VM_PROT_READ | VM_PROT_WRITE, /* max */
2019 			 MAP_SHARED /* cow */);
2020 	if (rv != KERN_SUCCESS) {
2021 		vm_object_deallocate(obj->filp);
2022 		error = -vm_mmap_to_errno(rv);
2023 	} else {
2024 		args->addr_ptr = (uint64_t)addr;
2025 	}
2026 out:
2027 	drm_gem_object_unreference_unlocked(obj);
2028 	return (error);
2029 }
2030 
2031 /**
2032  * i915_gem_fault - fault a page into the GTT
2033  *
2034  * vm_obj is locked on entry and expected to be locked on return.
2035  *
2036  * The vm_pager has placemarked the object with an anonymous memory page
2037  * which we must replace atomically to avoid races against concurrent faults
2038  * on the same page.  XXX we currently are unable to do this atomically.
2039  *
2040  * If we are to return an error we should not touch the anonymous page,
2041  * the caller will deallocate it.
2042  *
2043  * XXX Most GEM calls appear to be interruptable, but we can't hard loop
2044  * in that case.  Release all resources and wait 1 tick before retrying.
2045  * This is a huge problem which needs to be fixed by getting rid of most
2046  * of the interruptability.  The linux code does not retry but does appear
2047  * to have some sort of mechanism (VM_FAULT_NOPAGE ?) for the higher level
2048  * to be able to retry.
2049  *
2050  * --
2051  * @vma: VMA in question
2052  * @vmf: fault info
2053  *
2054  * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
2055  * from userspace.  The fault handler takes care of binding the object to
2056  * the GTT (if needed), allocating and programming a fence register (again,
2057  * only if needed based on whether the old reg is still valid or the object
2058  * is tiled) and inserting a new PTE into the faulting process.
2059  *
2060  * Note that the faulting process may involve evicting existing objects
2061  * from the GTT and/or fence registers to make room.  So performance may
2062  * suffer if the GTT working set is large or there are few fence registers
2063  * left.
2064  *
2065  * vm_obj is locked on entry and expected to be locked on return.  The VM
2066  * pager has placed an anonymous memory page at (obj,offset) which we have
2067  * to replace.
2068  */
2069 int i915_gem_fault(vm_object_t vm_obj, vm_ooffset_t offset, int prot, vm_page_t *mres)
2070 {
2071 	struct drm_i915_gem_object *obj = to_intel_bo(vm_obj->handle);
2072 	struct drm_device *dev = obj->base.dev;
2073 	struct drm_i915_private *dev_priv = to_i915(dev);
2074 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
2075 	struct i915_ggtt_view view = i915_ggtt_view_normal;
2076 	unsigned long page_offset;
2077 	vm_page_t m;
2078 	int ret = 0;
2079 	bool write = !!(prot & VM_PROT_WRITE);
2080 
2081 	intel_runtime_pm_get(dev_priv);
2082 
2083 	/* We don't use vmf->pgoff since that has the fake offset */
2084 	page_offset = (unsigned long)offset;
2085 
2086 	/*
2087 	 * vm_fault() has supplied us with a busied page placeholding
2088 	 * the operation.  This presents a lock order reversal issue
2089 	 * again i915_gem_release_mmap() for our device mutex.
2090 	 *
2091 	 * Deal with the problem by getting rid of the placeholder now,
2092 	 * and then dealing with the potential for a new placeholder when
2093 	 * we try to insert later.
2094 	 */
2095 	if (*mres != NULL) {
2096 		m = *mres;
2097 		*mres = NULL;
2098 		if ((m->busy_count & PBUSY_LOCKED) == 0)
2099 			kprintf("i915_gem_fault: Page was not busy\n");
2100 		else
2101 			vm_page_remove(m);
2102 		vm_page_free(m);
2103 	}
2104 
2105 	m = NULL;
2106 
2107 retry:
2108 	ret = i915_mutex_lock_interruptible(dev);
2109 	if (ret)
2110 		goto out;
2111 
2112 	trace_i915_gem_object_fault(obj, page_offset, true, write);
2113 
2114 	/* Try to flush the object off the GPU first without holding the lock.
2115 	 * Upon reacquiring the lock, we will perform our sanity checks and then
2116 	 * repeat the flush holding the lock in the normal manner to catch cases
2117 	 * where we are gazumped.
2118 	 */
2119 	ret = i915_gem_object_wait_rendering__nonblocking(obj, NULL, !write);
2120 	if (ret)
2121 		goto unlock;
2122 
2123 	/* Access to snoopable pages through the GTT is incoherent. */
2124 	if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) {
2125 		ret = -EFAULT;
2126 		goto unlock;
2127 	}
2128 
2129 	/* Use a partial view if the object is bigger than the aperture. */
2130 	if (obj->base.size >= ggtt->mappable_end &&
2131 	    obj->tiling_mode == I915_TILING_NONE) {
2132 #if 0
2133 		static const unsigned int chunk_size = 256; // 1 MiB
2134 
2135 		memset(&view, 0, sizeof(view));
2136 		view.type = I915_GGTT_VIEW_PARTIAL;
2137 		view.params.partial.offset = rounddown(page_offset, chunk_size);
2138 		view.params.partial.size =
2139 			min_t(unsigned int,
2140 			      chunk_size,
2141 			      (vma->vm_end - vma->vm_start)/PAGE_SIZE -
2142 			      view.params.partial.offset);
2143 #endif
2144 	}
2145 
2146 	/* Now pin it into the GTT if needed */
2147 	ret = i915_gem_object_ggtt_pin(obj, &view, 0, PIN_MAPPABLE);
2148 	if (ret)
2149 		goto unlock;
2150 
2151 	ret = i915_gem_object_set_to_gtt_domain(obj, write);
2152 	if (ret)
2153 		goto unpin;
2154 
2155 	ret = i915_gem_object_get_fence(obj);
2156 	if (ret)
2157 		goto unpin;
2158 
2159 	/*
2160 	 * START FREEBSD MAGIC
2161 	 *
2162 	 * Add a pip count to avoid destruction and certain other
2163 	 * complex operations (such as collapses?) while unlocked.
2164 	 */
2165 	vm_object_pip_add(vm_obj, 1);
2166 
2167 	ret = 0;
2168 	m = NULL;
2169 
2170 	/*
2171 	 * Since the object lock was dropped, another thread might have
2172 	 * faulted on the same GTT address and instantiated the mapping.
2173 	 * Recheck.
2174 	 */
2175 	m = vm_page_lookup(vm_obj, OFF_TO_IDX(offset));
2176 	if (m != NULL) {
2177 		/*
2178 		 * Try to busy the page, retry on failure (non-zero ret).
2179 		 */
2180 		if (vm_page_busy_try(m, false)) {
2181 			kprintf("i915_gem_fault: BUSY\n");
2182 			ret = -EINTR;
2183 			goto unlock;
2184 		}
2185 		goto have_page;
2186 	}
2187 	/* END FREEBSD MAGIC */
2188 
2189 	obj->fault_mappable = true;
2190 
2191 	/* Finally, remap it using the new GTT offset */
2192 	m = vm_phys_fictitious_to_vm_page(ggtt->mappable_base +
2193 			i915_gem_obj_ggtt_offset_view(obj, &view) + offset);
2194 	if (m == NULL) {
2195 		ret = -EFAULT;
2196 		goto unpin;
2197 	}
2198 	KASSERT((m->flags & PG_FICTITIOUS) != 0, ("not fictitious %p", m));
2199 	KASSERT(m->wire_count == 1, ("wire_count not 1 %p", m));
2200 
2201 	/*
2202 	 * Try to busy the page.  Fails on non-zero return.
2203 	 */
2204 	if (vm_page_busy_try(m, false)) {
2205 		kprintf("i915_gem_fault: BUSY(2)\n");
2206 		ret = -EINTR;
2207 		goto unpin;
2208 	}
2209 	m->valid = VM_PAGE_BITS_ALL;
2210 
2211 #if 1
2212 	/*
2213 	 * This should always work since we already checked via a lookup
2214 	 * above.
2215 	 */
2216 	if (vm_page_insert(m, vm_obj, OFF_TO_IDX(offset)) == FALSE) {
2217 		kprintf("i915:gem_fault: page %p,%jd already in object\n",
2218 			vm_obj,
2219 			OFF_TO_IDX(offset));
2220 		vm_page_wakeup(m);
2221 		ret = -EINTR;
2222 		goto unpin;
2223 	}
2224 #else
2225 	/* NOT COMPILED ATM */
2226 	if (unlikely(view.type == I915_GGTT_VIEW_PARTIAL)) {
2227 		/* Overriding existing pages in partial view does not cause
2228 		 * us any trouble as TLBs are still valid because the fault
2229 		 * is due to userspace losing part of the mapping or never
2230 		 * having accessed it before (at this partials' range).
2231 		 */
2232 		unsigned long base = vma->vm_start +
2233 				     (view.params.partial.offset << PAGE_SHIFT);
2234 		unsigned int i;
2235 
2236 		for (i = 0; i < view.params.partial.size; i++) {
2237 			ret = vm_insert_pfn(vma, base + i * PAGE_SIZE, pfn + i);
2238 			if (ret)
2239 				break;
2240 		}
2241 
2242 		obj->fault_mappable = true;
2243 	} else {
2244 		if (!obj->fault_mappable) {
2245 			unsigned long size = min_t(unsigned long,
2246 						   vma->vm_end - vma->vm_start,
2247 						   obj->base.size);
2248 			int i;
2249 
2250 			for (i = 0; i < size >> PAGE_SHIFT; i++) {
2251 				ret = vm_insert_pfn(vma,
2252 						    (unsigned long)vma->vm_start + i * PAGE_SIZE,
2253 						    pfn + i);
2254 				if (ret)
2255 					break;
2256 			}
2257 
2258 			obj->fault_mappable = true;
2259 		} else
2260 			ret = vm_insert_pfn(vma,
2261 					    (unsigned long)vmf->virtual_address,
2262 					    pfn + page_offset);
2263 	}
2264 #endif
2265 
2266 have_page:
2267 	*mres = m;
2268 
2269 	i915_gem_object_ggtt_unpin_view(obj, &view);
2270 	mutex_unlock(&dev->struct_mutex);
2271 	ret = VM_PAGER_OK;
2272 	goto done;
2273 
2274 	/*
2275 	 * ALTERNATIVE ERROR RETURN.
2276 	 *
2277 	 * OBJECT EXPECTED TO BE LOCKED.
2278 	 */
2279 unpin:
2280 	i915_gem_object_ggtt_unpin_view(obj, &view);
2281 unlock:
2282 	mutex_unlock(&dev->struct_mutex);
2283 out:
2284 	switch (ret) {
2285 	case -EIO:
2286 		/*
2287 		 * We eat errors when the gpu is terminally wedged to avoid
2288 		 * userspace unduly crashing (gl has no provisions for mmaps to
2289 		 * fail). But any other -EIO isn't ours (e.g. swap in failure)
2290 		 * and so needs to be reported.
2291 		 */
2292 		if (!i915_terminally_wedged(&dev_priv->gpu_error)) {
2293 //			ret = VM_FAULT_SIGBUS;
2294 			break;
2295 		}
2296 	case -EAGAIN:
2297 		/*
2298 		 * EAGAIN means the gpu is hung and we'll wait for the error
2299 		 * handler to reset everything when re-faulting in
2300 		 * i915_mutex_lock_interruptible.
2301 		 */
2302 	case -ERESTARTSYS:
2303 	case -EINTR:
2304 		VM_OBJECT_UNLOCK(vm_obj);
2305 		int dummy;
2306 		tsleep(&dummy, 0, "delay", 1); /* XXX */
2307 		VM_OBJECT_LOCK(vm_obj);
2308 		goto retry;
2309 	default:
2310 		WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret);
2311 		ret = VM_PAGER_ERROR;
2312 		break;
2313 	}
2314 
2315 done:
2316 	vm_object_pip_wakeup(vm_obj);
2317 
2318 	intel_runtime_pm_put(dev_priv);
2319 	return ret;
2320 }
2321 
2322 /**
2323  * i915_gem_release_mmap - remove physical page mappings
2324  * @obj: obj in question
2325  *
2326  * Preserve the reservation of the mmapping with the DRM core code, but
2327  * relinquish ownership of the pages back to the system.
2328  *
2329  * It is vital that we remove the page mapping if we have mapped a tiled
2330  * object through the GTT and then lose the fence register due to
2331  * resource pressure. Similarly if the object has been moved out of the
2332  * aperture, than pages mapped into userspace must be revoked. Removing the
2333  * mapping will then trigger a page fault on the next user access, allowing
2334  * fixup by i915_gem_fault().
2335  */
2336 void
2337 i915_gem_release_mmap(struct drm_i915_gem_object *obj)
2338 {
2339 	vm_object_t devobj;
2340 	vm_page_t m;
2341 	int i, page_count;
2342 
2343 	/* Serialisation between user GTT access and our code depends upon
2344 	 * revoking the CPU's PTE whilst the mutex is held. The next user
2345 	 * pagefault then has to wait until we release the mutex.
2346 	 */
2347 	lockdep_assert_held(&obj->base.dev->struct_mutex);
2348 
2349 	if (!obj->fault_mappable)
2350 		return;
2351 
2352 	devobj = cdev_pager_lookup(obj);
2353 	if (devobj != NULL) {
2354 		page_count = OFF_TO_IDX(obj->base.size);
2355 
2356 		VM_OBJECT_LOCK(devobj);
2357 		for (i = 0; i < page_count; i++) {
2358 			m = vm_page_lookup_busy_wait(devobj, i, TRUE, "915unm");
2359 			if (m == NULL)
2360 				continue;
2361 			cdev_pager_free_page(devobj, m);
2362 		}
2363 		VM_OBJECT_UNLOCK(devobj);
2364 		vm_object_deallocate(devobj);
2365 	}
2366 
2367 	/* Ensure that the CPU's PTE are revoked and there are not outstanding
2368 	 * memory transactions from userspace before we return. The TLB
2369 	 * flushing implied above by changing the PTE above *should* be
2370 	 * sufficient, an extra barrier here just provides us with a bit
2371 	 * of paranoid documentation about our requirement to serialise
2372 	 * memory writes before touching registers / GSM.
2373 	 */
2374 	wmb();
2375 
2376 	obj->fault_mappable = false;
2377 }
2378 
2379 void
2380 i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv)
2381 {
2382 	struct drm_i915_gem_object *obj;
2383 
2384 	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list)
2385 		i915_gem_release_mmap(obj);
2386 }
2387 
2388 uint32_t
2389 i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
2390 {
2391 	uint32_t gtt_size;
2392 
2393 	if (INTEL_INFO(dev)->gen >= 4 ||
2394 	    tiling_mode == I915_TILING_NONE)
2395 		return size;
2396 
2397 	/* Previous chips need a power-of-two fence region when tiling */
2398 	if (IS_GEN3(dev))
2399 		gtt_size = 1024*1024;
2400 	else
2401 		gtt_size = 512*1024;
2402 
2403 	while (gtt_size < size)
2404 		gtt_size <<= 1;
2405 
2406 	return gtt_size;
2407 }
2408 
2409 /**
2410  * i915_gem_get_gtt_alignment - return required GTT alignment for an object
2411  * @dev: drm device
2412  * @size: object size
2413  * @tiling_mode: tiling mode
2414  * @fenced: is fenced alignemned required or not
2415  *
2416  * Return the required GTT alignment for an object, taking into account
2417  * potential fence register mapping.
2418  */
2419 uint32_t
2420 i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size,
2421 			   int tiling_mode, bool fenced)
2422 {
2423 	/*
2424 	 * Minimum alignment is 4k (GTT page size), but might be greater
2425 	 * if a fence register is needed for the object.
2426 	 */
2427 	if (INTEL_INFO(dev)->gen >= 4 || (!fenced && IS_G33(dev)) ||
2428 	    tiling_mode == I915_TILING_NONE)
2429 		return 4096;
2430 
2431 	/*
2432 	 * Previous chips need to be aligned to the size of the smallest
2433 	 * fence register that can contain the object.
2434 	 */
2435 	return i915_gem_get_gtt_size(dev, size, tiling_mode);
2436 }
2437 
2438 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
2439 {
2440 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
2441 	int ret;
2442 
2443 	dev_priv->mm.shrinker_no_lock_stealing = true;
2444 
2445 	ret = drm_gem_create_mmap_offset(&obj->base);
2446 	if (ret != -ENOSPC)
2447 		goto out;
2448 
2449 	/* Badly fragmented mmap space? The only way we can recover
2450 	 * space is by destroying unwanted objects. We can't randomly release
2451 	 * mmap_offsets as userspace expects them to be persistent for the
2452 	 * lifetime of the objects. The closest we can is to release the
2453 	 * offsets on purgeable objects by truncating it and marking it purged,
2454 	 * which prevents userspace from ever using that object again.
2455 	 */
2456 	i915_gem_shrink(dev_priv,
2457 			obj->base.size >> PAGE_SHIFT,
2458 			I915_SHRINK_BOUND |
2459 			I915_SHRINK_UNBOUND |
2460 			I915_SHRINK_PURGEABLE);
2461 	ret = drm_gem_create_mmap_offset(&obj->base);
2462 	if (ret != -ENOSPC)
2463 		goto out;
2464 
2465 	i915_gem_shrink_all(dev_priv);
2466 	ret = drm_gem_create_mmap_offset(&obj->base);
2467 out:
2468 	dev_priv->mm.shrinker_no_lock_stealing = false;
2469 
2470 	return ret;
2471 }
2472 
2473 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
2474 {
2475 	drm_gem_free_mmap_offset(&obj->base);
2476 }
2477 
2478 int
2479 i915_gem_mmap_gtt(struct drm_file *file,
2480 		  struct drm_device *dev,
2481 		  uint32_t handle,
2482 		  uint64_t *offset)
2483 {
2484 	struct drm_i915_gem_object *obj;
2485 	int ret;
2486 
2487 	ret = i915_mutex_lock_interruptible(dev);
2488 	if (ret)
2489 		return ret;
2490 
2491 	obj = to_intel_bo(drm_gem_object_lookup(file, handle));
2492 	if (&obj->base == NULL) {
2493 		ret = -ENOENT;
2494 		goto unlock;
2495 	}
2496 
2497 	if (obj->madv != I915_MADV_WILLNEED) {
2498 		DRM_DEBUG("Attempting to mmap a purgeable buffer\n");
2499 		ret = -EFAULT;
2500 		goto out;
2501 	}
2502 
2503 	ret = i915_gem_object_create_mmap_offset(obj);
2504 	if (ret)
2505 		goto out;
2506 
2507 	*offset = DRM_GEM_MAPPING_OFF(obj->base.map_list.key) |
2508 	    DRM_GEM_MAPPING_KEY;
2509 
2510 out:
2511 	drm_gem_object_unreference(&obj->base);
2512 unlock:
2513 	mutex_unlock(&dev->struct_mutex);
2514 	return ret;
2515 }
2516 
2517 /**
2518  * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
2519  * @dev: DRM device
2520  * @data: GTT mapping ioctl data
2521  * @file: GEM object info
2522  *
2523  * Simply returns the fake offset to userspace so it can mmap it.
2524  * The mmap call will end up in drm_gem_mmap(), which will set things
2525  * up so we can get faults in the handler above.
2526  *
2527  * The fault handler will take care of binding the object into the GTT
2528  * (since it may have been evicted to make room for something), allocating
2529  * a fence register, and mapping the appropriate aperture address into
2530  * userspace.
2531  */
2532 int
2533 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
2534 			struct drm_file *file)
2535 {
2536 	struct drm_i915_gem_mmap_gtt *args = data;
2537 
2538 	return i915_gem_mmap_gtt(file, dev, args->handle, (uint64_t *)&args->offset);
2539 }
2540 
2541 /* Immediately discard the backing storage */
2542 static void
2543 i915_gem_object_truncate(struct drm_i915_gem_object *obj)
2544 {
2545 	vm_object_t vm_obj = obj->base.filp;
2546 
2547 	if (obj->base.filp == NULL)
2548 		return;
2549 
2550 	VM_OBJECT_LOCK(vm_obj);
2551 	vm_object_page_remove(vm_obj, 0, 0, false);
2552 	VM_OBJECT_UNLOCK(vm_obj);
2553 
2554 	/* Our goal here is to return as much of the memory as
2555 	 * is possible back to the system as we are called from OOM.
2556 	 * To do this we must instruct the shmfs to drop all of its
2557 	 * backing pages, *now*.
2558 	 */
2559 	obj->madv = __I915_MADV_PURGED;
2560 }
2561 
2562 /* Try to discard unwanted pages */
2563 static void
2564 i915_gem_object_invalidate(struct drm_i915_gem_object *obj)
2565 {
2566 #if 0
2567 	struct address_space *mapping;
2568 #endif
2569 
2570 	switch (obj->madv) {
2571 	case I915_MADV_DONTNEED:
2572 		i915_gem_object_truncate(obj);
2573 	case __I915_MADV_PURGED:
2574 		return;
2575 	}
2576 
2577 	if (obj->base.filp == NULL)
2578 		return;
2579 
2580 #if 0
2581 	mapping = obj->base.filp->f_mapping,
2582 #endif
2583 	invalidate_mapping_pages(obj->base.filp, 0, (loff_t)-1);
2584 }
2585 
2586 static void
2587 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
2588 {
2589 	struct sgt_iter sgt_iter;
2590 	struct page *page;
2591 	int ret;
2592 
2593 	BUG_ON(obj->madv == __I915_MADV_PURGED);
2594 
2595 	ret = i915_gem_object_set_to_cpu_domain(obj, true);
2596 	if (WARN_ON(ret)) {
2597 		/* In the event of a disaster, abandon all caches and
2598 		 * hope for the best.
2599 		 */
2600 		i915_gem_clflush_object(obj, true);
2601 		obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
2602 	}
2603 
2604 	i915_gem_gtt_finish_object(obj);
2605 
2606 	if (i915_gem_object_needs_bit17_swizzle(obj))
2607 		i915_gem_object_save_bit_17_swizzle(obj);
2608 
2609 	if (obj->madv == I915_MADV_DONTNEED)
2610 		obj->dirty = 0;
2611 
2612 	for_each_sgt_page(page, sgt_iter, obj->pages) {
2613 		if (obj->dirty)
2614 			set_page_dirty(page);
2615 
2616 		if (obj->madv == I915_MADV_WILLNEED)
2617 			mark_page_accessed(page);
2618 
2619 		vm_page_busy_wait((struct vm_page *)page, FALSE, "i915gem");
2620 		vm_page_unwire((struct vm_page *)page, 1);
2621 		vm_page_wakeup((struct vm_page *)page);
2622 	}
2623 	obj->dirty = 0;
2624 
2625 	sg_free_table(obj->pages);
2626 	kfree(obj->pages);
2627 }
2628 
2629 int
2630 i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
2631 {
2632 	const struct drm_i915_gem_object_ops *ops = obj->ops;
2633 
2634 	if (obj->pages == NULL)
2635 		return 0;
2636 
2637 	if (obj->pages_pin_count)
2638 		return -EBUSY;
2639 
2640 	BUG_ON(i915_gem_obj_bound_any(obj));
2641 
2642 	/* ->put_pages might need to allocate memory for the bit17 swizzle
2643 	 * array, hence protect them from being reaped by removing them from gtt
2644 	 * lists early. */
2645 	list_del(&obj->global_list);
2646 
2647 	if (obj->mapping) {
2648 		if (is_vmalloc_addr(obj->mapping))
2649 			vunmap(obj->mapping);
2650 		else
2651 			kunmap(kmap_to_page(obj->mapping));
2652 		obj->mapping = NULL;
2653 	}
2654 
2655 	ops->put_pages(obj);
2656 	obj->pages = NULL;
2657 
2658 	i915_gem_object_invalidate(obj);
2659 
2660 	return 0;
2661 }
2662 
2663 static int
2664 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
2665 {
2666 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
2667 	int page_count, i;
2668 	vm_object_t vm_obj;
2669 	struct sg_table *st;
2670 	struct scatterlist *sg;
2671 	struct sgt_iter sgt_iter;
2672 	struct page *page;
2673 	unsigned long last_pfn = 0;	/* suppress gcc warning */
2674 	int ret;
2675 
2676 	/* Assert that the object is not currently in any GPU domain. As it
2677 	 * wasn't in the GTT, there shouldn't be any way it could have been in
2678 	 * a GPU cache
2679 	 */
2680 	BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
2681 	BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
2682 
2683 	st = kmalloc(sizeof(*st), M_DRM, GFP_KERNEL);
2684 	if (st == NULL)
2685 		return -ENOMEM;
2686 
2687 	page_count = obj->base.size / PAGE_SIZE;
2688 	if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
2689 		kfree(st);
2690 		return -ENOMEM;
2691 	}
2692 
2693 	/* Get the list of pages out of our struct file.  They'll be pinned
2694 	 * at this point until we release them.
2695 	 *
2696 	 * Fail silently without starting the shrinker
2697 	 */
2698 	vm_obj = obj->base.filp;
2699 	VM_OBJECT_LOCK(vm_obj);
2700 	sg = st->sgl;
2701 	st->nents = 0;
2702 	for (i = 0; i < page_count; i++) {
2703 		page = shmem_read_mapping_page(vm_obj, i);
2704 		if (IS_ERR(page)) {
2705 			i915_gem_shrink(dev_priv,
2706 					page_count,
2707 					I915_SHRINK_BOUND |
2708 					I915_SHRINK_UNBOUND |
2709 					I915_SHRINK_PURGEABLE);
2710 			page = shmem_read_mapping_page(vm_obj, i);
2711 		}
2712 		if (IS_ERR(page)) {
2713 			/* We've tried hard to allocate the memory by reaping
2714 			 * our own buffer, now let the real VM do its job and
2715 			 * go down in flames if truly OOM.
2716 			 */
2717 			i915_gem_shrink_all(dev_priv);
2718 			page = shmem_read_mapping_page(vm_obj, i);
2719 			if (IS_ERR(page)) {
2720 				ret = PTR_ERR(page);
2721 				goto err_sg;
2722 			}
2723 		}
2724 #ifdef CONFIG_SWIOTLB
2725 		if (swiotlb_nr_tbl()) {
2726 			st->nents++;
2727 			sg_set_page(sg, page, PAGE_SIZE, 0);
2728 			sg = sg_next(sg);
2729 			continue;
2730 		}
2731 #endif
2732 		if (!i || page_to_pfn(page) != last_pfn + 1) {
2733 			if (i)
2734 				sg = sg_next(sg);
2735 			st->nents++;
2736 			sg_set_page(sg, page, PAGE_SIZE, 0);
2737 		} else {
2738 			sg->length += PAGE_SIZE;
2739 		}
2740 		last_pfn = page_to_pfn(page);
2741 
2742 		/* Check that the i965g/gm workaround works. */
2743 	}
2744 #ifdef CONFIG_SWIOTLB
2745 	if (!swiotlb_nr_tbl())
2746 #endif
2747 		sg_mark_end(sg);
2748 	obj->pages = st;
2749 	VM_OBJECT_UNLOCK(vm_obj);
2750 
2751 	ret = i915_gem_gtt_prepare_object(obj);
2752 	if (ret)
2753 		goto err_pages;
2754 
2755 	if (i915_gem_object_needs_bit17_swizzle(obj))
2756 		i915_gem_object_do_bit_17_swizzle(obj);
2757 
2758 	if (obj->tiling_mode != I915_TILING_NONE &&
2759 	    dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES)
2760 		i915_gem_object_pin_pages(obj);
2761 
2762 	return 0;
2763 
2764 err_sg:
2765 	sg_mark_end(sg);
2766 err_pages:
2767 	for_each_sgt_page(page, sgt_iter, st)
2768 	{
2769 		struct vm_page *vmp = (struct vm_page *)page;
2770 		vm_page_busy_wait(vmp, FALSE, "i915gem");
2771 		vm_page_unwire(vmp, 0);
2772 		vm_page_wakeup(vmp);
2773 	}
2774 	VM_OBJECT_UNLOCK(vm_obj);
2775 	sg_free_table(st);
2776 	kfree(st);
2777 
2778 	/* shmemfs first checks if there is enough memory to allocate the page
2779 	 * and reports ENOSPC should there be insufficient, along with the usual
2780 	 * ENOMEM for a genuine allocation failure.
2781 	 *
2782 	 * We use ENOSPC in our driver to mean that we have run out of aperture
2783 	 * space and so want to translate the error from shmemfs back to our
2784 	 * usual understanding of ENOMEM.
2785 	 */
2786 	if (ret == -ENOSPC)
2787 		ret = -ENOMEM;
2788 
2789 	return ret;
2790 }
2791 
2792 /* Ensure that the associated pages are gathered from the backing storage
2793  * and pinned into our object. i915_gem_object_get_pages() may be called
2794  * multiple times before they are released by a single call to
2795  * i915_gem_object_put_pages() - once the pages are no longer referenced
2796  * either as a result of memory pressure (reaping pages under the shrinker)
2797  * or as the object is itself released.
2798  */
2799 int
2800 i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
2801 {
2802 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
2803 	const struct drm_i915_gem_object_ops *ops = obj->ops;
2804 	int ret;
2805 
2806 	if (obj->pages)
2807 		return 0;
2808 
2809 	if (obj->madv != I915_MADV_WILLNEED) {
2810 		DRM_DEBUG("Attempting to obtain a purgeable object\n");
2811 		return -EFAULT;
2812 	}
2813 
2814 	BUG_ON(obj->pages_pin_count);
2815 
2816 	ret = ops->get_pages(obj);
2817 	if (ret)
2818 		return ret;
2819 
2820 	list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list);
2821 
2822 	obj->get_page.sg = obj->pages->sgl;
2823 	obj->get_page.last = 0;
2824 
2825 	return 0;
2826 }
2827 
2828 /* The 'mapping' part of i915_gem_object_pin_map() below */
2829 static void *i915_gem_object_map(const struct drm_i915_gem_object *obj)
2830 {
2831 	unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
2832 	struct sg_table *sgt = obj->pages;
2833 	struct sgt_iter sgt_iter;
2834 	struct page *page;
2835 	struct page *stack_pages[32];
2836 	struct page **pages = stack_pages;
2837 	unsigned long i = 0;
2838 	void *addr;
2839 
2840 	/* A single page can always be kmapped */
2841 	if (n_pages == 1)
2842 		return kmap(sg_page(sgt->sgl));
2843 
2844 	if (n_pages > ARRAY_SIZE(stack_pages)) {
2845 		/* Too big for stack -- allocate temporary array instead */
2846 		pages = drm_malloc_gfp(n_pages, sizeof(*pages), GFP_TEMPORARY);
2847 		if (!pages)
2848 			return NULL;
2849 	}
2850 
2851 	for_each_sgt_page(page, sgt_iter, sgt)
2852 		pages[i++] = page;
2853 
2854 	/* Check that we have the expected number of pages */
2855 	GEM_BUG_ON(i != n_pages);
2856 
2857 	addr = vmap(pages, n_pages, 0, PAGE_KERNEL);
2858 
2859 	if (pages != stack_pages)
2860 		drm_free_large(pages);
2861 
2862 	return addr;
2863 }
2864 
2865 /* get, pin, and map the pages of the object into kernel space */
2866 void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj)
2867 {
2868 	int ret;
2869 
2870 	lockdep_assert_held(&obj->base.dev->struct_mutex);
2871 
2872 	ret = i915_gem_object_get_pages(obj);
2873 	if (ret)
2874 		return ERR_PTR(ret);
2875 
2876 	i915_gem_object_pin_pages(obj);
2877 
2878 	if (!obj->mapping) {
2879 		obj->mapping = i915_gem_object_map(obj);
2880 		if (!obj->mapping) {
2881 			i915_gem_object_unpin_pages(obj);
2882 			return ERR_PTR(-ENOMEM);
2883 		}
2884 	}
2885 
2886 	return obj->mapping;
2887 }
2888 
2889 void i915_vma_move_to_active(struct i915_vma *vma,
2890 			     struct drm_i915_gem_request *req)
2891 {
2892 	struct drm_i915_gem_object *obj = vma->obj;
2893 	struct intel_engine_cs *engine;
2894 
2895 	engine = i915_gem_request_get_engine(req);
2896 
2897 	/* Add a reference if we're newly entering the active list. */
2898 	if (obj->active == 0)
2899 		drm_gem_object_reference(&obj->base);
2900 	obj->active |= intel_engine_flag(engine);
2901 
2902 	list_move_tail(&obj->engine_list[engine->id], &engine->active_list);
2903 	i915_gem_request_assign(&obj->last_read_req[engine->id], req);
2904 
2905 	list_move_tail(&vma->vm_link, &vma->vm->active_list);
2906 }
2907 
2908 static void
2909 i915_gem_object_retire__write(struct drm_i915_gem_object *obj)
2910 {
2911 	GEM_BUG_ON(obj->last_write_req == NULL);
2912 	GEM_BUG_ON(!(obj->active & intel_engine_flag(obj->last_write_req->engine)));
2913 
2914 	i915_gem_request_assign(&obj->last_write_req, NULL);
2915 	intel_fb_obj_flush(obj, true, ORIGIN_CS);
2916 }
2917 
2918 static void
2919 i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring)
2920 {
2921 	struct i915_vma *vma;
2922 
2923 	GEM_BUG_ON(obj->last_read_req[ring] == NULL);
2924 	GEM_BUG_ON(!(obj->active & (1 << ring)));
2925 
2926 	list_del_init(&obj->engine_list[ring]);
2927 	i915_gem_request_assign(&obj->last_read_req[ring], NULL);
2928 
2929 	if (obj->last_write_req && obj->last_write_req->engine->id == ring)
2930 		i915_gem_object_retire__write(obj);
2931 
2932 	obj->active &= ~(1 << ring);
2933 	if (obj->active)
2934 		return;
2935 
2936 	/* Bump our place on the bound list to keep it roughly in LRU order
2937 	 * so that we don't steal from recently used but inactive objects
2938 	 * (unless we are forced to ofc!)
2939 	 */
2940 	list_move_tail(&obj->global_list,
2941 		       &to_i915(obj->base.dev)->mm.bound_list);
2942 
2943 	list_for_each_entry(vma, &obj->vma_list, obj_link) {
2944 		if (!list_empty(&vma->vm_link))
2945 			list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
2946 	}
2947 
2948 	i915_gem_request_assign(&obj->last_fenced_req, NULL);
2949 	drm_gem_object_unreference(&obj->base);
2950 }
2951 
2952 static int
2953 i915_gem_init_seqno(struct drm_i915_private *dev_priv, u32 seqno)
2954 {
2955 	struct intel_engine_cs *engine;
2956 	int ret;
2957 
2958 	/* Carefully retire all requests without writing to the rings */
2959 	for_each_engine(engine, dev_priv) {
2960 		ret = intel_engine_idle(engine);
2961 		if (ret)
2962 			return ret;
2963 	}
2964 	i915_gem_retire_requests(dev_priv);
2965 
2966 	/* If the seqno wraps around, we need to clear the breadcrumb rbtree */
2967 	if (!i915_seqno_passed(seqno, dev_priv->next_seqno)) {
2968 		while (intel_kick_waiters(dev_priv) ||
2969 		       intel_kick_signalers(dev_priv))
2970 			yield();
2971 	}
2972 
2973 	/* Finally reset hw state */
2974 	for_each_engine(engine, dev_priv)
2975 		intel_ring_init_seqno(engine, seqno);
2976 
2977 	return 0;
2978 }
2979 
2980 int i915_gem_set_seqno(struct drm_device *dev, u32 seqno)
2981 {
2982 	struct drm_i915_private *dev_priv = to_i915(dev);
2983 	int ret;
2984 
2985 	if (seqno == 0)
2986 		return -EINVAL;
2987 
2988 	/* HWS page needs to be set less than what we
2989 	 * will inject to ring
2990 	 */
2991 	ret = i915_gem_init_seqno(dev_priv, seqno - 1);
2992 	if (ret)
2993 		return ret;
2994 
2995 	/* Carefully set the last_seqno value so that wrap
2996 	 * detection still works
2997 	 */
2998 	dev_priv->next_seqno = seqno;
2999 	dev_priv->last_seqno = seqno - 1;
3000 	if (dev_priv->last_seqno == 0)
3001 		dev_priv->last_seqno--;
3002 
3003 	return 0;
3004 }
3005 
3006 int
3007 i915_gem_get_seqno(struct drm_i915_private *dev_priv, u32 *seqno)
3008 {
3009 	/* reserve 0 for non-seqno */
3010 	if (dev_priv->next_seqno == 0) {
3011 		int ret = i915_gem_init_seqno(dev_priv, 0);
3012 		if (ret)
3013 			return ret;
3014 
3015 		dev_priv->next_seqno = 1;
3016 	}
3017 
3018 	*seqno = dev_priv->last_seqno = dev_priv->next_seqno++;
3019 	return 0;
3020 }
3021 
3022 static void i915_gem_mark_busy(const struct intel_engine_cs *engine)
3023 {
3024 	struct drm_i915_private *dev_priv = engine->i915;
3025 
3026 	dev_priv->gt.active_engines |= intel_engine_flag(engine);
3027 	if (dev_priv->gt.awake)
3028 		return;
3029 
3030 	intel_runtime_pm_get_noresume(dev_priv);
3031 	dev_priv->gt.awake = true;
3032 
3033 	i915_update_gfx_val(dev_priv);
3034 	if (INTEL_GEN(dev_priv) >= 6)
3035 		gen6_rps_busy(dev_priv);
3036 
3037 	queue_delayed_work(dev_priv->wq,
3038 			   &dev_priv->gt.retire_work,
3039 			   round_jiffies_up_relative(HZ));
3040 }
3041 
3042 /*
3043  * NB: This function is not allowed to fail. Doing so would mean the the
3044  * request is not being tracked for completion but the work itself is
3045  * going to happen on the hardware. This would be a Bad Thing(tm).
3046  */
3047 void __i915_add_request(struct drm_i915_gem_request *request,
3048 			struct drm_i915_gem_object *obj,
3049 			bool flush_caches)
3050 {
3051 	struct intel_engine_cs *engine;
3052 	struct intel_ringbuffer *ringbuf;
3053 	u32 request_start;
3054 	u32 reserved_tail;
3055 	int ret;
3056 
3057 	if (WARN_ON(request == NULL))
3058 		return;
3059 
3060 	engine = request->engine;
3061 	ringbuf = request->ringbuf;
3062 
3063 	/*
3064 	 * To ensure that this call will not fail, space for its emissions
3065 	 * should already have been reserved in the ring buffer. Let the ring
3066 	 * know that it is time to use that space up.
3067 	 */
3068 	request_start = intel_ring_get_tail(ringbuf);
3069 	reserved_tail = request->reserved_space;
3070 	request->reserved_space = 0;
3071 
3072 	/*
3073 	 * Emit any outstanding flushes - execbuf can fail to emit the flush
3074 	 * after having emitted the batchbuffer command. Hence we need to fix
3075 	 * things up similar to emitting the lazy request. The difference here
3076 	 * is that the flush _must_ happen before the next request, no matter
3077 	 * what.
3078 	 */
3079 	if (flush_caches) {
3080 		if (i915.enable_execlists)
3081 			ret = logical_ring_flush_all_caches(request);
3082 		else
3083 			ret = intel_ring_flush_all_caches(request);
3084 		/* Not allowed to fail! */
3085 		WARN(ret, "*_ring_flush_all_caches failed: %d!\n", ret);
3086 	}
3087 
3088 	trace_i915_gem_request_add(request);
3089 
3090 	request->head = request_start;
3091 
3092 	/* Whilst this request exists, batch_obj will be on the
3093 	 * active_list, and so will hold the active reference. Only when this
3094 	 * request is retired will the the batch_obj be moved onto the
3095 	 * inactive_list and lose its active reference. Hence we do not need
3096 	 * to explicitly hold another reference here.
3097 	 */
3098 	request->batch_obj = obj;
3099 
3100 	/* Seal the request and mark it as pending execution. Note that
3101 	 * we may inspect this state, without holding any locks, during
3102 	 * hangcheck. Hence we apply the barrier to ensure that we do not
3103 	 * see a more recent value in the hws than we are tracking.
3104 	 */
3105 	request->emitted_jiffies = jiffies;
3106 	request->previous_seqno = engine->last_submitted_seqno;
3107 	smp_store_mb(engine->last_submitted_seqno, request->seqno);
3108 	list_add_tail(&request->list, &engine->request_list);
3109 
3110 	/* Record the position of the start of the request so that
3111 	 * should we detect the updated seqno part-way through the
3112 	 * GPU processing the request, we never over-estimate the
3113 	 * position of the head.
3114 	 */
3115 	request->postfix = intel_ring_get_tail(ringbuf);
3116 
3117 	if (i915.enable_execlists)
3118 		ret = engine->emit_request(request);
3119 	else {
3120 		ret = engine->add_request(request);
3121 
3122 		request->tail = intel_ring_get_tail(ringbuf);
3123 	}
3124 	/* Not allowed to fail! */
3125 	WARN(ret, "emit|add_request failed: %d!\n", ret);
3126 	/* Sanity check that the reserved size was large enough. */
3127 	ret = intel_ring_get_tail(ringbuf) - request_start;
3128 	if (ret < 0)
3129 		ret += ringbuf->size;
3130 	WARN_ONCE(ret > reserved_tail,
3131 		  "Not enough space reserved (%d bytes) "
3132 		  "for adding the request (%d bytes)\n",
3133 		  reserved_tail, ret);
3134 
3135 	i915_gem_mark_busy(engine);
3136 }
3137 
3138 static bool i915_context_is_banned(const struct i915_gem_context *ctx)
3139 {
3140 	unsigned long elapsed;
3141 
3142 	if (ctx->hang_stats.banned)
3143 		return true;
3144 
3145 	elapsed = get_seconds() - ctx->hang_stats.guilty_ts;
3146 	if (ctx->hang_stats.ban_period_seconds &&
3147 	    elapsed <= ctx->hang_stats.ban_period_seconds) {
3148 		DRM_DEBUG("context hanging too fast, banning!\n");
3149 		return true;
3150 	}
3151 
3152 	return false;
3153 }
3154 
3155 static void i915_set_reset_status(struct i915_gem_context *ctx,
3156 				  const bool guilty)
3157 {
3158 	struct i915_ctx_hang_stats *hs = &ctx->hang_stats;
3159 
3160 	if (guilty) {
3161 		hs->banned = i915_context_is_banned(ctx);
3162 		hs->batch_active++;
3163 		hs->guilty_ts = get_seconds();
3164 	} else {
3165 		hs->batch_pending++;
3166 	}
3167 }
3168 
3169 void i915_gem_request_free(struct kref *req_ref)
3170 {
3171 	struct drm_i915_gem_request *req = container_of(req_ref,
3172 						 typeof(*req), ref);
3173 	kmem_cache_free(req->i915->requests, req);
3174 }
3175 
3176 static inline int
3177 __i915_gem_request_alloc(struct intel_engine_cs *engine,
3178 			 struct i915_gem_context *ctx,
3179 			 struct drm_i915_gem_request **req_out)
3180 {
3181 	struct drm_i915_private *dev_priv = engine->i915;
3182 	unsigned reset_counter = i915_reset_counter(&dev_priv->gpu_error);
3183 	struct drm_i915_gem_request *req;
3184 	int ret;
3185 
3186 	if (!req_out)
3187 		return -EINVAL;
3188 
3189 	*req_out = NULL;
3190 
3191 	/* ABI: Before userspace accesses the GPU (e.g. execbuffer), report
3192 	 * EIO if the GPU is already wedged, or EAGAIN to drop the struct_mutex
3193 	 * and restart.
3194 	 */
3195 	ret = i915_gem_check_wedge(reset_counter, dev_priv->mm.interruptible);
3196 	if (ret)
3197 		return ret;
3198 
3199 	req = kzalloc(sizeof(*req), GFP_KERNEL);
3200 	if (req == NULL)
3201 		return -ENOMEM;
3202 
3203 	ret = i915_gem_get_seqno(engine->i915, &req->seqno);
3204 	if (ret)
3205 		goto err;
3206 
3207 	kref_init(&req->ref);
3208 	req->i915 = dev_priv;
3209 	req->engine = engine;
3210 	req->ctx  = ctx;
3211 	i915_gem_context_reference(req->ctx);
3212 
3213 	/*
3214 	 * Reserve space in the ring buffer for all the commands required to
3215 	 * eventually emit this request. This is to guarantee that the
3216 	 * i915_add_request() call can't fail. Note that the reserve may need
3217 	 * to be redone if the request is not actually submitted straight
3218 	 * away, e.g. because a GPU scheduler has deferred it.
3219 	 */
3220 	req->reserved_space = MIN_SPACE_FOR_ADD_REQUEST;
3221 
3222 	if (i915.enable_execlists)
3223 		ret = intel_logical_ring_alloc_request_extras(req);
3224 	else
3225 		ret = intel_ring_alloc_request_extras(req);
3226 	if (ret)
3227 		goto err_ctx;
3228 
3229 	*req_out = req;
3230 	return 0;
3231 
3232 err_ctx:
3233 	i915_gem_context_unreference(ctx);
3234 err:
3235 	kfree(req);
3236 	return ret;
3237 }
3238 
3239 /**
3240  * i915_gem_request_alloc - allocate a request structure
3241  *
3242  * @engine: engine that we wish to issue the request on.
3243  * @ctx: context that the request will be associated with.
3244  *       This can be NULL if the request is not directly related to
3245  *       any specific user context, in which case this function will
3246  *       choose an appropriate context to use.
3247  *
3248  * Returns a pointer to the allocated request if successful,
3249  * or an error code if not.
3250  */
3251 struct drm_i915_gem_request *
3252 i915_gem_request_alloc(struct intel_engine_cs *engine,
3253 		       struct i915_gem_context *ctx)
3254 {
3255 	struct drm_i915_gem_request *req;
3256 	int err;
3257 
3258 	if (ctx == NULL)
3259 		ctx = engine->i915->kernel_context;
3260 	err = __i915_gem_request_alloc(engine, ctx, &req);
3261 	return err ? ERR_PTR(err) : req;
3262 }
3263 
3264 struct drm_i915_gem_request *
3265 i915_gem_find_active_request(struct intel_engine_cs *engine)
3266 {
3267 	struct drm_i915_gem_request *request;
3268 
3269 	/* We are called by the error capture and reset at a random
3270 	 * point in time. In particular, note that neither is crucially
3271 	 * ordered with an interrupt. After a hang, the GPU is dead and we
3272 	 * assume that no more writes can happen (we waited long enough for
3273 	 * all writes that were in transaction to be flushed) - adding an
3274 	 * extra delay for a recent interrupt is pointless. Hence, we do
3275 	 * not need an engine->irq_seqno_barrier() before the seqno reads.
3276 	 */
3277 	list_for_each_entry(request, &engine->request_list, list) {
3278 		if (i915_gem_request_completed(request))
3279 			continue;
3280 
3281 		return request;
3282 	}
3283 
3284 	return NULL;
3285 }
3286 
3287 static void i915_gem_reset_engine_status(struct intel_engine_cs *engine)
3288 {
3289 	struct drm_i915_gem_request *request;
3290 	bool ring_hung;
3291 
3292 	request = i915_gem_find_active_request(engine);
3293 	if (request == NULL)
3294 		return;
3295 
3296 	ring_hung = engine->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG;
3297 
3298 	i915_set_reset_status(request->ctx, ring_hung);
3299 	list_for_each_entry_continue(request, &engine->request_list, list)
3300 		i915_set_reset_status(request->ctx, false);
3301 }
3302 
3303 static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine)
3304 {
3305 	struct intel_ringbuffer *buffer;
3306 
3307 	while (!list_empty(&engine->active_list)) {
3308 		struct drm_i915_gem_object *obj;
3309 
3310 		obj = list_first_entry(&engine->active_list,
3311 				       struct drm_i915_gem_object,
3312 				       engine_list[engine->id]);
3313 
3314 		i915_gem_object_retire__read(obj, engine->id);
3315 	}
3316 
3317 	/*
3318 	 * Clear the execlists queue up before freeing the requests, as those
3319 	 * are the ones that keep the context and ringbuffer backing objects
3320 	 * pinned in place.
3321 	 */
3322 
3323 	if (i915.enable_execlists) {
3324 		/* Ensure irq handler finishes or is cancelled. */
3325 		tasklet_kill(&engine->irq_tasklet);
3326 
3327 		intel_execlists_cancel_requests(engine);
3328 	}
3329 
3330 	/*
3331 	 * We must free the requests after all the corresponding objects have
3332 	 * been moved off active lists. Which is the same order as the normal
3333 	 * retire_requests function does. This is important if object hold
3334 	 * implicit references on things like e.g. ppgtt address spaces through
3335 	 * the request.
3336 	 */
3337 	while (!list_empty(&engine->request_list)) {
3338 		struct drm_i915_gem_request *request;
3339 
3340 		request = list_first_entry(&engine->request_list,
3341 					   struct drm_i915_gem_request,
3342 					   list);
3343 
3344 		i915_gem_request_retire(request);
3345 	}
3346 
3347 	/* Having flushed all requests from all queues, we know that all
3348 	 * ringbuffers must now be empty. However, since we do not reclaim
3349 	 * all space when retiring the request (to prevent HEADs colliding
3350 	 * with rapid ringbuffer wraparound) the amount of available space
3351 	 * upon reset is less than when we start. Do one more pass over
3352 	 * all the ringbuffers to reset last_retired_head.
3353 	 */
3354 	list_for_each_entry(buffer, &engine->buffers, link) {
3355 		buffer->last_retired_head = buffer->tail;
3356 		intel_ring_update_space(buffer);
3357 	}
3358 
3359 	intel_ring_init_seqno(engine, engine->last_submitted_seqno);
3360 
3361 	engine->i915->gt.active_engines &= ~intel_engine_flag(engine);
3362 }
3363 
3364 void i915_gem_reset(struct drm_device *dev)
3365 {
3366 	struct drm_i915_private *dev_priv = to_i915(dev);
3367 	struct intel_engine_cs *engine;
3368 
3369 	/*
3370 	 * Before we free the objects from the requests, we need to inspect
3371 	 * them for finding the guilty party. As the requests only borrow
3372 	 * their reference to the objects, the inspection must be done first.
3373 	 */
3374 	for_each_engine(engine, dev_priv)
3375 		i915_gem_reset_engine_status(engine);
3376 
3377 	for_each_engine(engine, dev_priv)
3378 		i915_gem_reset_engine_cleanup(engine);
3379 	mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0);
3380 
3381 	i915_gem_context_reset(dev);
3382 
3383 	i915_gem_restore_fences(dev);
3384 
3385 	WARN_ON(i915_verify_lists(dev));
3386 }
3387 
3388 /**
3389  * This function clears the request list as sequence numbers are passed.
3390  * @engine: engine to retire requests on
3391  */
3392 void
3393 i915_gem_retire_requests_ring(struct intel_engine_cs *engine)
3394 {
3395 	WARN_ON(i915_verify_lists(engine->dev));
3396 
3397 	/* Retire requests first as we use it above for the early return.
3398 	 * If we retire requests last, we may use a later seqno and so clear
3399 	 * the requests lists without clearing the active list, leading to
3400 	 * confusion.
3401 	 */
3402 	while (!list_empty(&engine->request_list)) {
3403 		struct drm_i915_gem_request *request;
3404 
3405 		request = list_first_entry(&engine->request_list,
3406 					   struct drm_i915_gem_request,
3407 					   list);
3408 
3409 		if (!i915_gem_request_completed(request))
3410 			break;
3411 
3412 		i915_gem_request_retire(request);
3413 	}
3414 
3415 	/* Move any buffers on the active list that are no longer referenced
3416 	 * by the ringbuffer to the flushing/inactive lists as appropriate,
3417 	 * before we free the context associated with the requests.
3418 	 */
3419 	while (!list_empty(&engine->active_list)) {
3420 		struct drm_i915_gem_object *obj;
3421 
3422 		obj = list_first_entry(&engine->active_list,
3423 				       struct drm_i915_gem_object,
3424 				       engine_list[engine->id]);
3425 
3426 		if (!list_empty(&obj->last_read_req[engine->id]->list))
3427 			break;
3428 
3429 		i915_gem_object_retire__read(obj, engine->id);
3430 	}
3431 
3432 	WARN_ON(i915_verify_lists(engine->dev));
3433 }
3434 
3435 void i915_gem_retire_requests(struct drm_i915_private *dev_priv)
3436 {
3437 	struct intel_engine_cs *engine;
3438 
3439 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
3440 
3441 	if (dev_priv->gt.active_engines == 0)
3442 		return;
3443 
3444 	GEM_BUG_ON(!dev_priv->gt.awake);
3445 
3446 	for_each_engine(engine, dev_priv) {
3447 		i915_gem_retire_requests_ring(engine);
3448 		if (list_empty(&engine->request_list))
3449 			dev_priv->gt.active_engines &= ~intel_engine_flag(engine);
3450 	}
3451 
3452 	if (dev_priv->gt.active_engines == 0)
3453 		queue_delayed_work(dev_priv->wq,
3454 				   &dev_priv->gt.idle_work,
3455 				   msecs_to_jiffies(100));
3456 }
3457 
3458 static void
3459 i915_gem_retire_work_handler(struct work_struct *work)
3460 {
3461 	struct drm_i915_private *dev_priv =
3462 		container_of(work, typeof(*dev_priv), gt.retire_work.work);
3463 	struct drm_device *dev = &dev_priv->drm;
3464 
3465 	/* Come back later if the device is busy... */
3466 	if (mutex_trylock(&dev->struct_mutex)) {
3467 		i915_gem_retire_requests(dev_priv);
3468 		mutex_unlock(&dev->struct_mutex);
3469 	}
3470 
3471 	/* Keep the retire handler running until we are finally idle.
3472 	 * We do not need to do this test under locking as in the worst-case
3473 	 * we queue the retire worker once too often.
3474 	 */
3475 	if (READ_ONCE(dev_priv->gt.awake))
3476 		queue_delayed_work(dev_priv->wq,
3477 				   &dev_priv->gt.retire_work,
3478 				   round_jiffies_up_relative(HZ));
3479 }
3480 
3481 static void
3482 i915_gem_idle_work_handler(struct work_struct *work)
3483 {
3484 	struct drm_i915_private *dev_priv =
3485 		container_of(work, typeof(*dev_priv), gt.idle_work.work);
3486 	struct drm_device *dev = &dev_priv->drm;
3487 	struct intel_engine_cs *engine;
3488 	unsigned int stuck_engines;
3489 	bool rearm_hangcheck;
3490 
3491 	if (!READ_ONCE(dev_priv->gt.awake))
3492 		return;
3493 
3494 	if (READ_ONCE(dev_priv->gt.active_engines))
3495 		return;
3496 
3497 	rearm_hangcheck =
3498 		cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
3499 
3500 	if (!mutex_trylock(&dev->struct_mutex)) {
3501 		/* Currently busy, come back later */
3502 		mod_delayed_work(dev_priv->wq,
3503 				 &dev_priv->gt.idle_work,
3504 				 msecs_to_jiffies(50));
3505 		goto out_rearm;
3506 	}
3507 
3508 	if (dev_priv->gt.active_engines)
3509 		goto out_unlock;
3510 
3511 	for_each_engine(engine, dev_priv)
3512 		i915_gem_batch_pool_fini(&engine->batch_pool);
3513 
3514 	GEM_BUG_ON(!dev_priv->gt.awake);
3515 	dev_priv->gt.awake = false;
3516 	rearm_hangcheck = false;
3517 
3518 	stuck_engines = intel_kick_waiters(dev_priv);
3519 	if (unlikely(stuck_engines)) {
3520 		DRM_DEBUG_DRIVER("kicked stuck waiters...missed irq\n");
3521 		dev_priv->gpu_error.missed_irq_rings |= stuck_engines;
3522 	}
3523 
3524 	if (INTEL_GEN(dev_priv) >= 6)
3525 		gen6_rps_idle(dev_priv);
3526 	intel_runtime_pm_put(dev_priv);
3527 out_unlock:
3528 	mutex_unlock(&dev->struct_mutex);
3529 
3530 out_rearm:
3531 	if (rearm_hangcheck) {
3532 		GEM_BUG_ON(!dev_priv->gt.awake);
3533 		i915_queue_hangcheck(dev_priv);
3534 	}
3535 }
3536 
3537 /**
3538  * Ensures that an object will eventually get non-busy by flushing any required
3539  * write domains, emitting any outstanding lazy request and retiring and
3540  * completed requests.
3541  * @obj: object to flush
3542  */
3543 static int
3544 i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
3545 {
3546 	int i;
3547 
3548 	if (!obj->active)
3549 		return 0;
3550 
3551 	for (i = 0; i < I915_NUM_ENGINES; i++) {
3552 		struct drm_i915_gem_request *req;
3553 
3554 		req = obj->last_read_req[i];
3555 		if (req == NULL)
3556 			continue;
3557 
3558 		if (i915_gem_request_completed(req))
3559 			i915_gem_object_retire__read(obj, i);
3560 	}
3561 
3562 	return 0;
3563 }
3564 
3565 /**
3566  * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
3567  * @dev: drm device pointer
3568  * @data: ioctl data blob
3569  * @file: drm file pointer
3570  *
3571  * Returns 0 if successful, else an error is returned with the remaining time in
3572  * the timeout parameter.
3573  *  -ETIME: object is still busy after timeout
3574  *  -ERESTARTSYS: signal interrupted the wait
3575  *  -ENONENT: object doesn't exist
3576  * Also possible, but rare:
3577  *  -EAGAIN: GPU wedged
3578  *  -ENOMEM: damn
3579  *  -ENODEV: Internal IRQ fail
3580  *  -E?: The add request failed
3581  *
3582  * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
3583  * non-zero timeout parameter the wait ioctl will wait for the given number of
3584  * nanoseconds on an object becoming unbusy. Since the wait itself does so
3585  * without holding struct_mutex the object may become re-busied before this
3586  * function completes. A similar but shorter * race condition exists in the busy
3587  * ioctl
3588  */
3589 int
3590 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
3591 {
3592 	struct drm_i915_gem_wait *args = data;
3593 	struct drm_i915_gem_object *obj;
3594 	struct drm_i915_gem_request *req[I915_NUM_ENGINES];
3595 	int i, n = 0;
3596 	int ret;
3597 
3598 	if (args->flags != 0)
3599 		return -EINVAL;
3600 
3601 	ret = i915_mutex_lock_interruptible(dev);
3602 	if (ret)
3603 		return ret;
3604 
3605 	obj = to_intel_bo(drm_gem_object_lookup(file, args->bo_handle));
3606 	if (&obj->base == NULL) {
3607 		mutex_unlock(&dev->struct_mutex);
3608 		return -ENOENT;
3609 	}
3610 
3611 	/* Need to make sure the object gets inactive eventually. */
3612 	ret = i915_gem_object_flush_active(obj);
3613 	if (ret)
3614 		goto out;
3615 
3616 	if (!obj->active)
3617 		goto out;
3618 
3619 	/* Do this after OLR check to make sure we make forward progress polling
3620 	 * on this IOCTL with a timeout == 0 (like busy ioctl)
3621 	 */
3622 	if (args->timeout_ns == 0) {
3623 		ret = -ETIME;
3624 		goto out;
3625 	}
3626 
3627 	drm_gem_object_unreference(&obj->base);
3628 
3629 	for (i = 0; i < I915_NUM_ENGINES; i++) {
3630 		if (obj->last_read_req[i] == NULL)
3631 			continue;
3632 
3633 		req[n++] = i915_gem_request_reference(obj->last_read_req[i]);
3634 	}
3635 
3636 	mutex_unlock(&dev->struct_mutex);
3637 
3638 	for (i = 0; i < n; i++) {
3639 		if (ret == 0)
3640 			ret = __i915_wait_request(req[i], true,
3641 						  args->timeout_ns > 0 ? &args->timeout_ns : NULL,
3642 						  to_rps_client(file));
3643 		i915_gem_request_unreference(req[i]);
3644 	}
3645 	return ret;
3646 
3647 out:
3648 	drm_gem_object_unreference(&obj->base);
3649 	mutex_unlock(&dev->struct_mutex);
3650 	return ret;
3651 }
3652 
3653 static int
3654 __i915_gem_object_sync(struct drm_i915_gem_object *obj,
3655 		       struct intel_engine_cs *to,
3656 		       struct drm_i915_gem_request *from_req,
3657 		       struct drm_i915_gem_request **to_req)
3658 {
3659 	struct intel_engine_cs *from;
3660 	int ret;
3661 
3662 	from = i915_gem_request_get_engine(from_req);
3663 	if (to == from)
3664 		return 0;
3665 
3666 	if (i915_gem_request_completed(from_req))
3667 		return 0;
3668 
3669 	if (!i915_semaphore_is_enabled(to_i915(obj->base.dev))) {
3670 		struct drm_i915_private *i915 = to_i915(obj->base.dev);
3671 		ret = __i915_wait_request(from_req,
3672 					  i915->mm.interruptible,
3673 					  NULL,
3674 					  &i915->rps.semaphores);
3675 		if (ret)
3676 			return ret;
3677 
3678 		i915_gem_object_retire_request(obj, from_req);
3679 	} else {
3680 		int idx = intel_ring_sync_index(from, to);
3681 		u32 seqno = i915_gem_request_get_seqno(from_req);
3682 
3683 		WARN_ON(!to_req);
3684 
3685 		if (seqno <= from->semaphore.sync_seqno[idx])
3686 			return 0;
3687 
3688 		if (*to_req == NULL) {
3689 			struct drm_i915_gem_request *req;
3690 
3691 			req = i915_gem_request_alloc(to, NULL);
3692 			if (IS_ERR(req))
3693 				return PTR_ERR(req);
3694 
3695 			*to_req = req;
3696 		}
3697 
3698 		trace_i915_gem_ring_sync_to(*to_req, from, from_req);
3699 		ret = to->semaphore.sync_to(*to_req, from, seqno);
3700 		if (ret)
3701 			return ret;
3702 
3703 		/* We use last_read_req because sync_to()
3704 		 * might have just caused seqno wrap under
3705 		 * the radar.
3706 		 */
3707 		from->semaphore.sync_seqno[idx] =
3708 			i915_gem_request_get_seqno(obj->last_read_req[from->id]);
3709 	}
3710 
3711 	return 0;
3712 }
3713 
3714 /**
3715  * i915_gem_object_sync - sync an object to a ring.
3716  *
3717  * @obj: object which may be in use on another ring.
3718  * @to: ring we wish to use the object on. May be NULL.
3719  * @to_req: request we wish to use the object for. See below.
3720  *          This will be allocated and returned if a request is
3721  *          required but not passed in.
3722  *
3723  * This code is meant to abstract object synchronization with the GPU.
3724  * Calling with NULL implies synchronizing the object with the CPU
3725  * rather than a particular GPU ring. Conceptually we serialise writes
3726  * between engines inside the GPU. We only allow one engine to write
3727  * into a buffer at any time, but multiple readers. To ensure each has
3728  * a coherent view of memory, we must:
3729  *
3730  * - If there is an outstanding write request to the object, the new
3731  *   request must wait for it to complete (either CPU or in hw, requests
3732  *   on the same ring will be naturally ordered).
3733  *
3734  * - If we are a write request (pending_write_domain is set), the new
3735  *   request must wait for outstanding read requests to complete.
3736  *
3737  * For CPU synchronisation (NULL to) no request is required. For syncing with
3738  * rings to_req must be non-NULL. However, a request does not have to be
3739  * pre-allocated. If *to_req is NULL and sync commands will be emitted then a
3740  * request will be allocated automatically and returned through *to_req. Note
3741  * that it is not guaranteed that commands will be emitted (because the system
3742  * might already be idle). Hence there is no need to create a request that
3743  * might never have any work submitted. Note further that if a request is
3744  * returned in *to_req, it is the responsibility of the caller to submit
3745  * that request (after potentially adding more work to it).
3746  *
3747  * Returns 0 if successful, else propagates up the lower layer error.
3748  */
3749 int
3750 i915_gem_object_sync(struct drm_i915_gem_object *obj,
3751 		     struct intel_engine_cs *to,
3752 		     struct drm_i915_gem_request **to_req)
3753 {
3754 	const bool readonly = obj->base.pending_write_domain == 0;
3755 	struct drm_i915_gem_request *req[I915_NUM_ENGINES];
3756 	int ret, i, n;
3757 
3758 	if (!obj->active)
3759 		return 0;
3760 
3761 	if (to == NULL)
3762 		return i915_gem_object_wait_rendering(obj, readonly);
3763 
3764 	n = 0;
3765 	if (readonly) {
3766 		if (obj->last_write_req)
3767 			req[n++] = obj->last_write_req;
3768 	} else {
3769 		for (i = 0; i < I915_NUM_ENGINES; i++)
3770 			if (obj->last_read_req[i])
3771 				req[n++] = obj->last_read_req[i];
3772 	}
3773 	for (i = 0; i < n; i++) {
3774 		ret = __i915_gem_object_sync(obj, to, req[i], to_req);
3775 		if (ret)
3776 			return ret;
3777 	}
3778 
3779 	return 0;
3780 }
3781 
3782 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
3783 {
3784 	u32 old_write_domain, old_read_domains;
3785 
3786 	/* Force a pagefault for domain tracking on next user access */
3787 	i915_gem_release_mmap(obj);
3788 
3789 	if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
3790 		return;
3791 
3792 	old_read_domains = obj->base.read_domains;
3793 	old_write_domain = obj->base.write_domain;
3794 
3795 	obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT;
3796 	obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT;
3797 
3798 	trace_i915_gem_object_change_domain(obj,
3799 					    old_read_domains,
3800 					    old_write_domain);
3801 }
3802 
3803 static void __i915_vma_iounmap(struct i915_vma *vma)
3804 {
3805 	GEM_BUG_ON(vma->pin_count);
3806 
3807 	if (vma->iomap == NULL)
3808 		return;
3809 
3810 	io_mapping_unmap(vma->iomap);
3811 	vma->iomap = NULL;
3812 }
3813 
3814 static int __i915_vma_unbind(struct i915_vma *vma, bool wait)
3815 {
3816 	struct drm_i915_gem_object *obj = vma->obj;
3817 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
3818 	int ret;
3819 
3820 	if (list_empty(&vma->obj_link))
3821 		return 0;
3822 
3823 	if (!drm_mm_node_allocated(&vma->node)) {
3824 		i915_gem_vma_destroy(vma);
3825 		return 0;
3826 	}
3827 
3828 	if (vma->pin_count)
3829 		return -EBUSY;
3830 
3831 	BUG_ON(obj->pages == NULL);
3832 
3833 	if (wait) {
3834 		ret = i915_gem_object_wait_rendering(obj, false);
3835 		if (ret)
3836 			return ret;
3837 	}
3838 
3839 	if (vma->is_ggtt && vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
3840 		i915_gem_object_finish_gtt(obj);
3841 
3842 		/* release the fence reg _after_ flushing */
3843 		ret = i915_gem_object_put_fence(obj);
3844 		if (ret)
3845 			return ret;
3846 
3847 		__i915_vma_iounmap(vma);
3848 	}
3849 
3850 	trace_i915_vma_unbind(vma);
3851 
3852 	vma->vm->unbind_vma(vma);
3853 	vma->bound = 0;
3854 
3855 	list_del_init(&vma->vm_link);
3856 	if (vma->is_ggtt) {
3857 		if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
3858 			obj->map_and_fenceable = false;
3859 		} else if (vma->ggtt_view.pages) {
3860 			sg_free_table(vma->ggtt_view.pages);
3861 			kfree(vma->ggtt_view.pages);
3862 		}
3863 		vma->ggtt_view.pages = NULL;
3864 	}
3865 
3866 	drm_mm_remove_node(&vma->node);
3867 	i915_gem_vma_destroy(vma);
3868 
3869 	/* Since the unbound list is global, only move to that list if
3870 	 * no more VMAs exist. */
3871 	if (list_empty(&obj->vma_list))
3872 		list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list);
3873 
3874 	/* And finally now the object is completely decoupled from this vma,
3875 	 * we can drop its hold on the backing storage and allow it to be
3876 	 * reaped by the shrinker.
3877 	 */
3878 	i915_gem_object_unpin_pages(obj);
3879 
3880 	return 0;
3881 }
3882 
3883 int i915_vma_unbind(struct i915_vma *vma)
3884 {
3885 	return __i915_vma_unbind(vma, true);
3886 }
3887 
3888 int __i915_vma_unbind_no_wait(struct i915_vma *vma)
3889 {
3890 	return __i915_vma_unbind(vma, false);
3891 }
3892 
3893 int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv)
3894 {
3895 	struct intel_engine_cs *engine;
3896 	int ret;
3897 
3898 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
3899 
3900 	for_each_engine(engine, dev_priv) {
3901 		if (engine->last_context == NULL)
3902 			continue;
3903 
3904 		ret = intel_engine_idle(engine);
3905 		if (ret)
3906 			return ret;
3907 	}
3908 
3909 	WARN_ON(i915_verify_lists(dev));
3910 	return 0;
3911 }
3912 
3913 static bool i915_gem_valid_gtt_space(struct i915_vma *vma,
3914 				     unsigned long cache_level)
3915 {
3916 	struct drm_mm_node *gtt_space = &vma->node;
3917 	struct drm_mm_node *other;
3918 
3919 	/*
3920 	 * On some machines we have to be careful when putting differing types
3921 	 * of snoopable memory together to avoid the prefetcher crossing memory
3922 	 * domains and dying. During vm initialisation, we decide whether or not
3923 	 * these constraints apply and set the drm_mm.color_adjust
3924 	 * appropriately.
3925 	 */
3926 	if (vma->vm->mm.color_adjust == NULL)
3927 		return true;
3928 
3929 	if (!drm_mm_node_allocated(gtt_space))
3930 		return true;
3931 
3932 	if (list_empty(&gtt_space->node_list))
3933 		return true;
3934 
3935 	other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list);
3936 	if (other->allocated && !other->hole_follows && other->color != cache_level)
3937 		return false;
3938 
3939 	other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list);
3940 	if (other->allocated && !gtt_space->hole_follows && other->color != cache_level)
3941 		return false;
3942 
3943 	return true;
3944 }
3945 
3946 /**
3947  * Finds free space in the GTT aperture and binds the object or a view of it
3948  * there.
3949  * @obj: object to bind
3950  * @vm: address space to bind into
3951  * @ggtt_view: global gtt view if applicable
3952  * @alignment: requested alignment
3953  * @flags: mask of PIN_* flags to use
3954  */
3955 static struct i915_vma *
3956 i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
3957 			   struct i915_address_space *vm,
3958 			   const struct i915_ggtt_view *ggtt_view,
3959 			   unsigned alignment,
3960 			   uint64_t flags)
3961 {
3962 	struct drm_device *dev = obj->base.dev;
3963 	struct drm_i915_private *dev_priv = to_i915(dev);
3964 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
3965 	u32 fence_alignment, unfenced_alignment;
3966 	u32 search_flag, alloc_flag;
3967 	u64 start, end;
3968 	u64 size, fence_size;
3969 	struct i915_vma *vma;
3970 	int ret;
3971 
3972 	if (i915_is_ggtt(vm)) {
3973 		u32 view_size;
3974 
3975 		if (WARN_ON(!ggtt_view))
3976 			return ERR_PTR(-EINVAL);
3977 
3978 		view_size = i915_ggtt_view_size(obj, ggtt_view);
3979 
3980 		fence_size = i915_gem_get_gtt_size(dev,
3981 						   view_size,
3982 						   obj->tiling_mode);
3983 		fence_alignment = i915_gem_get_gtt_alignment(dev,
3984 							     view_size,
3985 							     obj->tiling_mode,
3986 							     true);
3987 		unfenced_alignment = i915_gem_get_gtt_alignment(dev,
3988 								view_size,
3989 								obj->tiling_mode,
3990 								false);
3991 		size = flags & PIN_MAPPABLE ? fence_size : view_size;
3992 	} else {
3993 		fence_size = i915_gem_get_gtt_size(dev,
3994 						   obj->base.size,
3995 						   obj->tiling_mode);
3996 		fence_alignment = i915_gem_get_gtt_alignment(dev,
3997 							     obj->base.size,
3998 							     obj->tiling_mode,
3999 							     true);
4000 		unfenced_alignment =
4001 			i915_gem_get_gtt_alignment(dev,
4002 						   obj->base.size,
4003 						   obj->tiling_mode,
4004 						   false);
4005 		size = flags & PIN_MAPPABLE ? fence_size : obj->base.size;
4006 	}
4007 
4008 	start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
4009 	end = vm->total;
4010 	if (flags & PIN_MAPPABLE)
4011 		end = min_t(u64, end, ggtt->mappable_end);
4012 	if (flags & PIN_ZONE_4G)
4013 		end = min_t(u64, end, (1ULL << 32) - PAGE_SIZE);
4014 
4015 	if (alignment == 0)
4016 		alignment = flags & PIN_MAPPABLE ? fence_alignment :
4017 						unfenced_alignment;
4018 	if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) {
4019 		DRM_DEBUG("Invalid object (view type=%u) alignment requested %u\n",
4020 			  ggtt_view ? ggtt_view->type : 0,
4021 			  alignment);
4022 		return ERR_PTR(-EINVAL);
4023 	}
4024 
4025 	/* If binding the object/GGTT view requires more space than the entire
4026 	 * aperture has, reject it early before evicting everything in a vain
4027 	 * attempt to find space.
4028 	 */
4029 	if (size > end) {
4030 		DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: size=%llu > %s aperture=%llu\n",
4031 			  ggtt_view ? ggtt_view->type : 0,
4032 			  size,
4033 			  flags & PIN_MAPPABLE ? "mappable" : "total",
4034 			  end);
4035 		return ERR_PTR(-E2BIG);
4036 	}
4037 
4038 	ret = i915_gem_object_get_pages(obj);
4039 	if (ret)
4040 		return ERR_PTR(ret);
4041 
4042 	i915_gem_object_pin_pages(obj);
4043 
4044 	vma = ggtt_view ? i915_gem_obj_lookup_or_create_ggtt_vma(obj, ggtt_view) :
4045 			  i915_gem_obj_lookup_or_create_vma(obj, vm);
4046 
4047 	if (IS_ERR(vma))
4048 		goto err_unpin;
4049 
4050 	if (flags & PIN_OFFSET_FIXED) {
4051 		uint64_t offset = flags & PIN_OFFSET_MASK;
4052 
4053 		if (offset & (alignment - 1) || offset + size > end) {
4054 			ret = -EINVAL;
4055 			goto err_free_vma;
4056 		}
4057 		vma->node.start = offset;
4058 		vma->node.size = size;
4059 		vma->node.color = obj->cache_level;
4060 		ret = drm_mm_reserve_node(&vm->mm, &vma->node);
4061 		if (ret) {
4062 			ret = i915_gem_evict_for_vma(vma);
4063 			if (ret == 0)
4064 				ret = drm_mm_reserve_node(&vm->mm, &vma->node);
4065 		}
4066 		if (ret)
4067 			goto err_free_vma;
4068 	} else {
4069 		if (flags & PIN_HIGH) {
4070 			search_flag = DRM_MM_SEARCH_BELOW;
4071 			alloc_flag = DRM_MM_CREATE_TOP;
4072 		} else {
4073 			search_flag = DRM_MM_SEARCH_DEFAULT;
4074 			alloc_flag = DRM_MM_CREATE_DEFAULT;
4075 		}
4076 
4077 search_free:
4078 		ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
4079 							  size, alignment,
4080 							  obj->cache_level,
4081 							  start, end,
4082 							  search_flag,
4083 							  alloc_flag);
4084 		if (ret) {
4085 			ret = i915_gem_evict_something(dev, vm, size, alignment,
4086 						       obj->cache_level,
4087 						       start, end,
4088 						       flags);
4089 			if (ret == 0)
4090 				goto search_free;
4091 
4092 			goto err_free_vma;
4093 		}
4094 	}
4095 	if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) {
4096 		ret = -EINVAL;
4097 		goto err_remove_node;
4098 	}
4099 
4100 	trace_i915_vma_bind(vma, flags);
4101 	ret = i915_vma_bind(vma, obj->cache_level, flags);
4102 	if (ret)
4103 		goto err_remove_node;
4104 
4105 	list_move_tail(&obj->global_list, &dev_priv->mm.bound_list);
4106 	list_add_tail(&vma->vm_link, &vm->inactive_list);
4107 
4108 	return vma;
4109 
4110 err_remove_node:
4111 	drm_mm_remove_node(&vma->node);
4112 err_free_vma:
4113 	i915_gem_vma_destroy(vma);
4114 	vma = ERR_PTR(ret);
4115 err_unpin:
4116 	i915_gem_object_unpin_pages(obj);
4117 	return vma;
4118 }
4119 
4120 bool
4121 i915_gem_clflush_object(struct drm_i915_gem_object *obj,
4122 			bool force)
4123 {
4124 	/* If we don't have a page list set up, then we're not pinned
4125 	 * to GPU, and we can ignore the cache flush because it'll happen
4126 	 * again at bind time.
4127 	 */
4128 	if (obj->pages == NULL)
4129 		return false;
4130 
4131 	/*
4132 	 * Stolen memory is always coherent with the GPU as it is explicitly
4133 	 * marked as wc by the system, or the system is cache-coherent.
4134 	 */
4135 	if (obj->stolen || obj->phys_handle)
4136 		return false;
4137 
4138 	/* If the GPU is snooping the contents of the CPU cache,
4139 	 * we do not need to manually clear the CPU cache lines.  However,
4140 	 * the caches are only snooped when the render cache is
4141 	 * flushed/invalidated.  As we always have to emit invalidations
4142 	 * and flushes when moving into and out of the RENDER domain, correct
4143 	 * snooping behaviour occurs naturally as the result of our domain
4144 	 * tracking.
4145 	 */
4146 	if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) {
4147 		obj->cache_dirty = true;
4148 		return false;
4149 	}
4150 
4151 	trace_i915_gem_object_clflush(obj);
4152 	drm_clflush_sg(obj->pages);
4153 	obj->cache_dirty = false;
4154 
4155 	return true;
4156 }
4157 
4158 /** Flushes the GTT write domain for the object if it's dirty. */
4159 static void
4160 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
4161 {
4162 	uint32_t old_write_domain;
4163 
4164 	if (obj->base.write_domain != I915_GEM_DOMAIN_GTT)
4165 		return;
4166 
4167 	/* No actual flushing is required for the GTT write domain.  Writes
4168 	 * to it immediately go to main memory as far as we know, so there's
4169 	 * no chipset flush.  It also doesn't land in render cache.
4170 	 *
4171 	 * However, we do have to enforce the order so that all writes through
4172 	 * the GTT land before any writes to the device, such as updates to
4173 	 * the GATT itself.
4174 	 */
4175 	wmb();
4176 
4177 	old_write_domain = obj->base.write_domain;
4178 	obj->base.write_domain = 0;
4179 
4180 	intel_fb_obj_flush(obj, false, ORIGIN_GTT);
4181 
4182 	trace_i915_gem_object_change_domain(obj,
4183 					    obj->base.read_domains,
4184 					    old_write_domain);
4185 }
4186 
4187 /** Flushes the CPU write domain for the object if it's dirty. */
4188 static void
4189 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
4190 {
4191 	uint32_t old_write_domain;
4192 
4193 	if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
4194 		return;
4195 
4196 	if (i915_gem_clflush_object(obj, obj->pin_display))
4197 		i915_gem_chipset_flush(to_i915(obj->base.dev));
4198 
4199 	old_write_domain = obj->base.write_domain;
4200 	obj->base.write_domain = 0;
4201 
4202 	intel_fb_obj_flush(obj, false, ORIGIN_CPU);
4203 
4204 	trace_i915_gem_object_change_domain(obj,
4205 					    obj->base.read_domains,
4206 					    old_write_domain);
4207 }
4208 
4209 /**
4210  * Moves a single object to the GTT read, and possibly write domain.
4211  * @obj: object to act on
4212  * @write: ask for write access or read only
4213  *
4214  * This function returns when the move is complete, including waiting on
4215  * flushes to occur.
4216  */
4217 int
4218 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
4219 {
4220 	struct drm_device *dev = obj->base.dev;
4221 	struct drm_i915_private *dev_priv = to_i915(dev);
4222 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
4223 	uint32_t old_write_domain, old_read_domains;
4224 	struct i915_vma *vma;
4225 	int ret;
4226 
4227 	if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
4228 		return 0;
4229 
4230 	ret = i915_gem_object_wait_rendering(obj, !write);
4231 	if (ret)
4232 		return ret;
4233 
4234 	/* Flush and acquire obj->pages so that we are coherent through
4235 	 * direct access in memory with previous cached writes through
4236 	 * shmemfs and that our cache domain tracking remains valid.
4237 	 * For example, if the obj->filp was moved to swap without us
4238 	 * being notified and releasing the pages, we would mistakenly
4239 	 * continue to assume that the obj remained out of the CPU cached
4240 	 * domain.
4241 	 */
4242 	ret = i915_gem_object_get_pages(obj);
4243 	if (ret)
4244 		return ret;
4245 
4246 	i915_gem_object_flush_cpu_write_domain(obj);
4247 
4248 	/* Serialise direct access to this object with the barriers for
4249 	 * coherent writes from the GPU, by effectively invalidating the
4250 	 * GTT domain upon first access.
4251 	 */
4252 	if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
4253 		mb();
4254 
4255 	old_write_domain = obj->base.write_domain;
4256 	old_read_domains = obj->base.read_domains;
4257 
4258 	/* It should now be out of any other write domains, and we can update
4259 	 * the domain values for our changes.
4260 	 */
4261 	BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
4262 	obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
4263 	if (write) {
4264 		obj->base.read_domains = I915_GEM_DOMAIN_GTT;
4265 		obj->base.write_domain = I915_GEM_DOMAIN_GTT;
4266 		obj->dirty = 1;
4267 	}
4268 
4269 	trace_i915_gem_object_change_domain(obj,
4270 					    old_read_domains,
4271 					    old_write_domain);
4272 
4273 	/* And bump the LRU for this access */
4274 	vma = i915_gem_obj_to_ggtt(obj);
4275 	if (vma && drm_mm_node_allocated(&vma->node) && !obj->active)
4276 		list_move_tail(&vma->vm_link,
4277 			       &ggtt->base.inactive_list);
4278 
4279 	return 0;
4280 }
4281 
4282 /**
4283  * Changes the cache-level of an object across all VMA.
4284  * @obj: object to act on
4285  * @cache_level: new cache level to set for the object
4286  *
4287  * After this function returns, the object will be in the new cache-level
4288  * across all GTT and the contents of the backing storage will be coherent,
4289  * with respect to the new cache-level. In order to keep the backing storage
4290  * coherent for all users, we only allow a single cache level to be set
4291  * globally on the object and prevent it from being changed whilst the
4292  * hardware is reading from the object. That is if the object is currently
4293  * on the scanout it will be set to uncached (or equivalent display
4294  * cache coherency) and all non-MOCS GPU access will also be uncached so
4295  * that all direct access to the scanout remains coherent.
4296  */
4297 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
4298 				    enum i915_cache_level cache_level)
4299 {
4300 	struct drm_device *dev = obj->base.dev;
4301 	struct i915_vma *vma, *next;
4302 	bool bound = false;
4303 	int ret = 0;
4304 
4305 	if (obj->cache_level == cache_level)
4306 		goto out;
4307 
4308 	/* Inspect the list of currently bound VMA and unbind any that would
4309 	 * be invalid given the new cache-level. This is principally to
4310 	 * catch the issue of the CS prefetch crossing page boundaries and
4311 	 * reading an invalid PTE on older architectures.
4312 	 */
4313 	list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) {
4314 		if (!drm_mm_node_allocated(&vma->node))
4315 			continue;
4316 
4317 		if (vma->pin_count) {
4318 			DRM_DEBUG("can not change the cache level of pinned objects\n");
4319 			return -EBUSY;
4320 		}
4321 
4322 		if (!i915_gem_valid_gtt_space(vma, cache_level)) {
4323 			ret = i915_vma_unbind(vma);
4324 			if (ret)
4325 				return ret;
4326 		} else
4327 			bound = true;
4328 	}
4329 
4330 	/* We can reuse the existing drm_mm nodes but need to change the
4331 	 * cache-level on the PTE. We could simply unbind them all and
4332 	 * rebind with the correct cache-level on next use. However since
4333 	 * we already have a valid slot, dma mapping, pages etc, we may as
4334 	 * rewrite the PTE in the belief that doing so tramples upon less
4335 	 * state and so involves less work.
4336 	 */
4337 	if (bound) {
4338 		/* Before we change the PTE, the GPU must not be accessing it.
4339 		 * If we wait upon the object, we know that all the bound
4340 		 * VMA are no longer active.
4341 		 */
4342 		ret = i915_gem_object_wait_rendering(obj, false);
4343 		if (ret)
4344 			return ret;
4345 
4346 		if (!HAS_LLC(dev) && cache_level != I915_CACHE_NONE) {
4347 			/* Access to snoopable pages through the GTT is
4348 			 * incoherent and on some machines causes a hard
4349 			 * lockup. Relinquish the CPU mmaping to force
4350 			 * userspace to refault in the pages and we can
4351 			 * then double check if the GTT mapping is still
4352 			 * valid for that pointer access.
4353 			 */
4354 			i915_gem_release_mmap(obj);
4355 
4356 			/* As we no longer need a fence for GTT access,
4357 			 * we can relinquish it now (and so prevent having
4358 			 * to steal a fence from someone else on the next
4359 			 * fence request). Note GPU activity would have
4360 			 * dropped the fence as all snoopable access is
4361 			 * supposed to be linear.
4362 			 */
4363 			ret = i915_gem_object_put_fence(obj);
4364 			if (ret)
4365 				return ret;
4366 		} else {
4367 			/* We either have incoherent backing store and
4368 			 * so no GTT access or the architecture is fully
4369 			 * coherent. In such cases, existing GTT mmaps
4370 			 * ignore the cache bit in the PTE and we can
4371 			 * rewrite it without confusing the GPU or having
4372 			 * to force userspace to fault back in its mmaps.
4373 			 */
4374 		}
4375 
4376 		list_for_each_entry(vma, &obj->vma_list, obj_link) {
4377 			if (!drm_mm_node_allocated(&vma->node))
4378 				continue;
4379 
4380 			ret = i915_vma_bind(vma, cache_level, PIN_UPDATE);
4381 			if (ret)
4382 				return ret;
4383 		}
4384 	}
4385 
4386 	list_for_each_entry(vma, &obj->vma_list, obj_link)
4387 		vma->node.color = cache_level;
4388 	obj->cache_level = cache_level;
4389 
4390 out:
4391 	/* Flush the dirty CPU caches to the backing storage so that the
4392 	 * object is now coherent at its new cache level (with respect
4393 	 * to the access domain).
4394 	 */
4395 	if (obj->cache_dirty && cpu_write_needs_clflush(obj)) {
4396 		if (i915_gem_clflush_object(obj, true))
4397 			i915_gem_chipset_flush(to_i915(obj->base.dev));
4398 	}
4399 
4400 	return 0;
4401 }
4402 
4403 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
4404 			       struct drm_file *file)
4405 {
4406 	struct drm_i915_gem_caching *args = data;
4407 	struct drm_i915_gem_object *obj;
4408 
4409 	obj = to_intel_bo(drm_gem_object_lookup(file, args->handle));
4410 	if (&obj->base == NULL)
4411 		return -ENOENT;
4412 
4413 	switch (obj->cache_level) {
4414 	case I915_CACHE_LLC:
4415 	case I915_CACHE_L3_LLC:
4416 		args->caching = I915_CACHING_CACHED;
4417 		break;
4418 
4419 	case I915_CACHE_WT:
4420 		args->caching = I915_CACHING_DISPLAY;
4421 		break;
4422 
4423 	default:
4424 		args->caching = I915_CACHING_NONE;
4425 		break;
4426 	}
4427 
4428 	drm_gem_object_unreference_unlocked(&obj->base);
4429 	return 0;
4430 }
4431 
4432 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
4433 			       struct drm_file *file)
4434 {
4435 	struct drm_i915_private *dev_priv = to_i915(dev);
4436 	struct drm_i915_gem_caching *args = data;
4437 	struct drm_i915_gem_object *obj;
4438 	enum i915_cache_level level;
4439 	int ret;
4440 
4441 	switch (args->caching) {
4442 	case I915_CACHING_NONE:
4443 		level = I915_CACHE_NONE;
4444 		break;
4445 	case I915_CACHING_CACHED:
4446 		/*
4447 		 * Due to a HW issue on BXT A stepping, GPU stores via a
4448 		 * snooped mapping may leave stale data in a corresponding CPU
4449 		 * cacheline, whereas normally such cachelines would get
4450 		 * invalidated.
4451 		 */
4452 		if (!HAS_LLC(dev) && !HAS_SNOOP(dev))
4453 			return -ENODEV;
4454 
4455 		level = I915_CACHE_LLC;
4456 		break;
4457 	case I915_CACHING_DISPLAY:
4458 		level = HAS_WT(dev) ? I915_CACHE_WT : I915_CACHE_NONE;
4459 		break;
4460 	default:
4461 		return -EINVAL;
4462 	}
4463 
4464 	intel_runtime_pm_get(dev_priv);
4465 
4466 	ret = i915_mutex_lock_interruptible(dev);
4467 	if (ret)
4468 		goto rpm_put;
4469 
4470 	obj = to_intel_bo(drm_gem_object_lookup(file, args->handle));
4471 	if (&obj->base == NULL) {
4472 		ret = -ENOENT;
4473 		goto unlock;
4474 	}
4475 
4476 	ret = i915_gem_object_set_cache_level(obj, level);
4477 
4478 	drm_gem_object_unreference(&obj->base);
4479 unlock:
4480 	mutex_unlock(&dev->struct_mutex);
4481 rpm_put:
4482 	intel_runtime_pm_put(dev_priv);
4483 
4484 	return ret;
4485 }
4486 
4487 /*
4488  * Prepare buffer for display plane (scanout, cursors, etc).
4489  * Can be called from an uninterruptible phase (modesetting) and allows
4490  * any flushes to be pipelined (for pageflips).
4491  */
4492 int
4493 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
4494 				     u32 alignment,
4495 				     const struct i915_ggtt_view *view)
4496 {
4497 	u32 old_read_domains, old_write_domain;
4498 	int ret;
4499 
4500 	/* Mark the pin_display early so that we account for the
4501 	 * display coherency whilst setting up the cache domains.
4502 	 */
4503 	obj->pin_display++;
4504 
4505 	/* The display engine is not coherent with the LLC cache on gen6.  As
4506 	 * a result, we make sure that the pinning that is about to occur is
4507 	 * done with uncached PTEs. This is lowest common denominator for all
4508 	 * chipsets.
4509 	 *
4510 	 * However for gen6+, we could do better by using the GFDT bit instead
4511 	 * of uncaching, which would allow us to flush all the LLC-cached data
4512 	 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
4513 	 */
4514 	ret = i915_gem_object_set_cache_level(obj,
4515 					      HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE);
4516 	if (ret)
4517 		goto err_unpin_display;
4518 
4519 	/* As the user may map the buffer once pinned in the display plane
4520 	 * (e.g. libkms for the bootup splash), we have to ensure that we
4521 	 * always use map_and_fenceable for all scanout buffers.
4522 	 */
4523 	ret = i915_gem_object_ggtt_pin(obj, view, alignment,
4524 				       view->type == I915_GGTT_VIEW_NORMAL ?
4525 				       PIN_MAPPABLE : 0);
4526 	if (ret)
4527 		goto err_unpin_display;
4528 
4529 	i915_gem_object_flush_cpu_write_domain(obj);
4530 
4531 	old_write_domain = obj->base.write_domain;
4532 	old_read_domains = obj->base.read_domains;
4533 
4534 	/* It should now be out of any other write domains, and we can update
4535 	 * the domain values for our changes.
4536 	 */
4537 	obj->base.write_domain = 0;
4538 	obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
4539 
4540 	trace_i915_gem_object_change_domain(obj,
4541 					    old_read_domains,
4542 					    old_write_domain);
4543 
4544 	return 0;
4545 
4546 err_unpin_display:
4547 	obj->pin_display--;
4548 	return ret;
4549 }
4550 
4551 void
4552 i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj,
4553 					 const struct i915_ggtt_view *view)
4554 {
4555 	if (WARN_ON(obj->pin_display == 0))
4556 		return;
4557 
4558 	i915_gem_object_ggtt_unpin_view(obj, view);
4559 
4560 	obj->pin_display--;
4561 }
4562 
4563 /**
4564  * Moves a single object to the CPU read, and possibly write domain.
4565  * @obj: object to act on
4566  * @write: requesting write or read-only access
4567  *
4568  * This function returns when the move is complete, including waiting on
4569  * flushes to occur.
4570  */
4571 int
4572 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
4573 {
4574 	uint32_t old_write_domain, old_read_domains;
4575 	int ret;
4576 
4577 	if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
4578 		return 0;
4579 
4580 	ret = i915_gem_object_wait_rendering(obj, !write);
4581 	if (ret)
4582 		return ret;
4583 
4584 	i915_gem_object_flush_gtt_write_domain(obj);
4585 
4586 	old_write_domain = obj->base.write_domain;
4587 	old_read_domains = obj->base.read_domains;
4588 
4589 	/* Flush the CPU cache if it's still invalid. */
4590 	if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
4591 		i915_gem_clflush_object(obj, false);
4592 
4593 		obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
4594 	}
4595 
4596 	/* It should now be out of any other write domains, and we can update
4597 	 * the domain values for our changes.
4598 	 */
4599 	BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
4600 
4601 	/* If we're writing through the CPU, then the GPU read domains will
4602 	 * need to be invalidated at next use.
4603 	 */
4604 	if (write) {
4605 		obj->base.read_domains = I915_GEM_DOMAIN_CPU;
4606 		obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4607 	}
4608 
4609 	trace_i915_gem_object_change_domain(obj,
4610 					    old_read_domains,
4611 					    old_write_domain);
4612 
4613 	return 0;
4614 }
4615 
4616 /* Throttle our rendering by waiting until the ring has completed our requests
4617  * emitted over 20 msec ago.
4618  *
4619  * Note that if we were to use the current jiffies each time around the loop,
4620  * we wouldn't escape the function with any frames outstanding if the time to
4621  * render a frame was over 20ms.
4622  *
4623  * This should get us reasonable parallelism between CPU and GPU but also
4624  * relatively low latency when blocking on a particular request to finish.
4625  */
4626 static int
4627 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
4628 {
4629 	struct drm_i915_private *dev_priv = to_i915(dev);
4630 	struct drm_i915_file_private *file_priv = file->driver_priv;
4631 	unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES;
4632 	struct drm_i915_gem_request *request, *target = NULL;
4633 	int ret;
4634 
4635 	ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
4636 	if (ret)
4637 		return ret;
4638 
4639 	/* ABI: return -EIO if already wedged */
4640 	if (i915_terminally_wedged(&dev_priv->gpu_error))
4641 		return -EIO;
4642 
4643 	spin_lock(&file_priv->mm.lock);
4644 	list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
4645 		if (time_after_eq(request->emitted_jiffies, recent_enough))
4646 			break;
4647 
4648 		/*
4649 		 * Note that the request might not have been submitted yet.
4650 		 * In which case emitted_jiffies will be zero.
4651 		 */
4652 		if (!request->emitted_jiffies)
4653 			continue;
4654 
4655 		target = request;
4656 	}
4657 	if (target)
4658 		i915_gem_request_reference(target);
4659 	spin_unlock(&file_priv->mm.lock);
4660 
4661 	if (target == NULL)
4662 		return 0;
4663 
4664 	ret = __i915_wait_request(target, true, NULL, NULL);
4665 	i915_gem_request_unreference(target);
4666 
4667 	return ret;
4668 }
4669 
4670 static bool
4671 i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags)
4672 {
4673 	struct drm_i915_gem_object *obj = vma->obj;
4674 
4675 	if (alignment &&
4676 	    vma->node.start & (alignment - 1))
4677 		return true;
4678 
4679 	if (flags & PIN_MAPPABLE && !obj->map_and_fenceable)
4680 		return true;
4681 
4682 	if (flags & PIN_OFFSET_BIAS &&
4683 	    vma->node.start < (flags & PIN_OFFSET_MASK))
4684 		return true;
4685 
4686 	if (flags & PIN_OFFSET_FIXED &&
4687 	    vma->node.start != (flags & PIN_OFFSET_MASK))
4688 		return true;
4689 
4690 	return false;
4691 }
4692 
4693 void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
4694 {
4695 	struct drm_i915_gem_object *obj = vma->obj;
4696 	bool mappable, fenceable;
4697 	u32 fence_size, fence_alignment;
4698 
4699 	fence_size = i915_gem_get_gtt_size(obj->base.dev,
4700 					   obj->base.size,
4701 					   obj->tiling_mode);
4702 	fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev,
4703 						     obj->base.size,
4704 						     obj->tiling_mode,
4705 						     true);
4706 
4707 	fenceable = (vma->node.size == fence_size &&
4708 		     (vma->node.start & (fence_alignment - 1)) == 0);
4709 
4710 	mappable = (vma->node.start + fence_size <=
4711 		    to_i915(obj->base.dev)->ggtt.mappable_end);
4712 
4713 	obj->map_and_fenceable = mappable && fenceable;
4714 }
4715 
4716 static int
4717 i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
4718 		       struct i915_address_space *vm,
4719 		       const struct i915_ggtt_view *ggtt_view,
4720 		       uint32_t alignment,
4721 		       uint64_t flags)
4722 {
4723 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
4724 	struct i915_vma *vma;
4725 	unsigned bound;
4726 	int ret;
4727 
4728 	if (WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base))
4729 		return -ENODEV;
4730 
4731 	if (WARN_ON(flags & (PIN_GLOBAL | PIN_MAPPABLE) && !i915_is_ggtt(vm)))
4732 		return -EINVAL;
4733 
4734 	if (WARN_ON((flags & (PIN_MAPPABLE | PIN_GLOBAL)) == PIN_MAPPABLE))
4735 		return -EINVAL;
4736 
4737 	if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view))
4738 		return -EINVAL;
4739 
4740 	vma = ggtt_view ? i915_gem_obj_to_ggtt_view(obj, ggtt_view) :
4741 			  i915_gem_obj_to_vma(obj, vm);
4742 
4743 	if (vma) {
4744 		if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
4745 			return -EBUSY;
4746 
4747 		if (i915_vma_misplaced(vma, alignment, flags)) {
4748 			WARN(vma->pin_count,
4749 			     "bo is already pinned in %s with incorrect alignment:"
4750 			     " offset=%08x %08x, req.alignment=%x, req.map_and_fenceable=%d,"
4751 			     " obj->map_and_fenceable=%d\n",
4752 			     ggtt_view ? "ggtt" : "ppgtt",
4753 			     upper_32_bits(vma->node.start),
4754 			     lower_32_bits(vma->node.start),
4755 			     alignment,
4756 			     !!(flags & PIN_MAPPABLE),
4757 			     obj->map_and_fenceable);
4758 			ret = i915_vma_unbind(vma);
4759 			if (ret)
4760 				return ret;
4761 
4762 			vma = NULL;
4763 		}
4764 	}
4765 
4766 	bound = vma ? vma->bound : 0;
4767 	if (vma == NULL || !drm_mm_node_allocated(&vma->node)) {
4768 		vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view, alignment,
4769 						 flags);
4770 		if (IS_ERR(vma))
4771 			return PTR_ERR(vma);
4772 	} else {
4773 		ret = i915_vma_bind(vma, obj->cache_level, flags);
4774 		if (ret)
4775 			return ret;
4776 	}
4777 
4778 	if (ggtt_view && ggtt_view->type == I915_GGTT_VIEW_NORMAL &&
4779 	    (bound ^ vma->bound) & GLOBAL_BIND) {
4780 		__i915_vma_set_map_and_fenceable(vma);
4781 		WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable);
4782 	}
4783 
4784 	vma->pin_count++;
4785 	return 0;
4786 }
4787 
4788 int
4789 i915_gem_object_pin(struct drm_i915_gem_object *obj,
4790 		    struct i915_address_space *vm,
4791 		    uint32_t alignment,
4792 		    uint64_t flags)
4793 {
4794 	return i915_gem_object_do_pin(obj, vm,
4795 				      i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL,
4796 				      alignment, flags);
4797 }
4798 
4799 int
4800 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
4801 			 const struct i915_ggtt_view *view,
4802 			 uint32_t alignment,
4803 			 uint64_t flags)
4804 {
4805 	struct drm_device *dev = obj->base.dev;
4806 	struct drm_i915_private *dev_priv = to_i915(dev);
4807 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
4808 
4809 	BUG_ON(!view);
4810 
4811 	return i915_gem_object_do_pin(obj, &ggtt->base, view,
4812 				      alignment, flags | PIN_GLOBAL);
4813 }
4814 
4815 void
4816 i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj,
4817 				const struct i915_ggtt_view *view)
4818 {
4819 	struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view);
4820 
4821 	WARN_ON(vma->pin_count == 0);
4822 	WARN_ON(!i915_gem_obj_ggtt_bound_view(obj, view));
4823 
4824 	--vma->pin_count;
4825 }
4826 
4827 int
4828 i915_gem_busy_ioctl(struct drm_device *dev, void *data,
4829 		    struct drm_file *file)
4830 {
4831 	struct drm_i915_gem_busy *args = data;
4832 	struct drm_i915_gem_object *obj;
4833 	int ret;
4834 
4835 	ret = i915_mutex_lock_interruptible(dev);
4836 	if (ret)
4837 		return ret;
4838 
4839 	obj = to_intel_bo(drm_gem_object_lookup(file, args->handle));
4840 	if (&obj->base == NULL) {
4841 		ret = -ENOENT;
4842 		goto unlock;
4843 	}
4844 
4845 	/* Count all active objects as busy, even if they are currently not used
4846 	 * by the gpu. Users of this interface expect objects to eventually
4847 	 * become non-busy without any further actions, therefore emit any
4848 	 * necessary flushes here.
4849 	 */
4850 	ret = i915_gem_object_flush_active(obj);
4851 	if (ret)
4852 		goto unref;
4853 
4854 	args->busy = 0;
4855 	if (obj->active) {
4856 		int i;
4857 
4858 		for (i = 0; i < I915_NUM_ENGINES; i++) {
4859 			struct drm_i915_gem_request *req;
4860 
4861 			req = obj->last_read_req[i];
4862 			if (req)
4863 				args->busy |= 1 << (16 + req->engine->exec_id);
4864 		}
4865 		if (obj->last_write_req)
4866 			args->busy |= obj->last_write_req->engine->exec_id;
4867 	}
4868 
4869 unref:
4870 	drm_gem_object_unreference(&obj->base);
4871 unlock:
4872 	mutex_unlock(&dev->struct_mutex);
4873 	return ret;
4874 }
4875 
4876 int
4877 i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
4878 			struct drm_file *file_priv)
4879 {
4880 	return i915_gem_ring_throttle(dev, file_priv);
4881 }
4882 
4883 int
4884 i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
4885 		       struct drm_file *file_priv)
4886 {
4887 	struct drm_i915_private *dev_priv = to_i915(dev);
4888 	struct drm_i915_gem_madvise *args = data;
4889 	struct drm_i915_gem_object *obj;
4890 	int ret;
4891 
4892 	switch (args->madv) {
4893 	case I915_MADV_DONTNEED:
4894 	case I915_MADV_WILLNEED:
4895 	    break;
4896 	default:
4897 	    return -EINVAL;
4898 	}
4899 
4900 	ret = i915_mutex_lock_interruptible(dev);
4901 	if (ret)
4902 		return ret;
4903 
4904 	obj = to_intel_bo(drm_gem_object_lookup(file_priv, args->handle));
4905 	if (&obj->base == NULL) {
4906 		ret = -ENOENT;
4907 		goto unlock;
4908 	}
4909 
4910 	if (i915_gem_obj_is_pinned(obj)) {
4911 		ret = -EINVAL;
4912 		goto out;
4913 	}
4914 
4915 	if (obj->pages &&
4916 	    obj->tiling_mode != I915_TILING_NONE &&
4917 	    dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
4918 		if (obj->madv == I915_MADV_WILLNEED)
4919 			i915_gem_object_unpin_pages(obj);
4920 		if (args->madv == I915_MADV_WILLNEED)
4921 			i915_gem_object_pin_pages(obj);
4922 	}
4923 
4924 	if (obj->madv != __I915_MADV_PURGED)
4925 		obj->madv = args->madv;
4926 
4927 	/* if the object is no longer attached, discard its backing storage */
4928 	if (obj->madv == I915_MADV_DONTNEED && obj->pages == NULL)
4929 		i915_gem_object_truncate(obj);
4930 
4931 	args->retained = obj->madv != __I915_MADV_PURGED;
4932 
4933 out:
4934 	drm_gem_object_unreference(&obj->base);
4935 unlock:
4936 	mutex_unlock(&dev->struct_mutex);
4937 	return ret;
4938 }
4939 
4940 void i915_gem_object_init(struct drm_i915_gem_object *obj,
4941 			  const struct drm_i915_gem_object_ops *ops)
4942 {
4943 	int i;
4944 
4945 	INIT_LIST_HEAD(&obj->global_list);
4946 	for (i = 0; i < I915_NUM_ENGINES; i++)
4947 		INIT_LIST_HEAD(&obj->engine_list[i]);
4948 	INIT_LIST_HEAD(&obj->obj_exec_link);
4949 	INIT_LIST_HEAD(&obj->vma_list);
4950 	INIT_LIST_HEAD(&obj->batch_pool_link);
4951 
4952 	obj->ops = ops;
4953 
4954 	obj->fence_reg = I915_FENCE_REG_NONE;
4955 	obj->madv = I915_MADV_WILLNEED;
4956 
4957 	i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size);
4958 }
4959 
4960 static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
4961 	.flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE,
4962 	.get_pages = i915_gem_object_get_pages_gtt,
4963 	.put_pages = i915_gem_object_put_pages_gtt,
4964 };
4965 
4966 struct drm_i915_gem_object *i915_gem_object_create(struct drm_device *dev,
4967 						  size_t size)
4968 {
4969 	struct drm_i915_gem_object *obj;
4970 #if 0
4971 	struct address_space *mapping;
4972 	gfp_t mask;
4973 #endif
4974 	int ret;
4975 
4976 	obj = i915_gem_object_alloc(dev);
4977 	if (obj == NULL)
4978 		return ERR_PTR(-ENOMEM);
4979 
4980 	ret = drm_gem_object_init(dev, &obj->base, size);
4981 	if (ret)
4982 		goto fail;
4983 
4984 #if 0
4985 	mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
4986 	if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) {
4987 		/* 965gm cannot relocate objects above 4GiB. */
4988 		mask &= ~__GFP_HIGHMEM;
4989 		mask |= __GFP_DMA32;
4990 	}
4991 
4992 	mapping = obj->base.filp->f_mapping;
4993 	mapping_set_gfp_mask(mapping, mask);
4994 #endif
4995 
4996 	i915_gem_object_init(obj, &i915_gem_object_ops);
4997 
4998 	obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4999 	obj->base.read_domains = I915_GEM_DOMAIN_CPU;
5000 
5001 	if (HAS_LLC(dev)) {
5002 		/* On some devices, we can have the GPU use the LLC (the CPU
5003 		 * cache) for about a 10% performance improvement
5004 		 * compared to uncached.  Graphics requests other than
5005 		 * display scanout are coherent with the CPU in
5006 		 * accessing this cache.  This means in this mode we
5007 		 * don't need to clflush on the CPU side, and on the
5008 		 * GPU side we only need to flush internal caches to
5009 		 * get data visible to the CPU.
5010 		 *
5011 		 * However, we maintain the display planes as UC, and so
5012 		 * need to rebind when first used as such.
5013 		 */
5014 		obj->cache_level = I915_CACHE_LLC;
5015 	} else
5016 		obj->cache_level = I915_CACHE_NONE;
5017 
5018 	trace_i915_gem_object_create(obj);
5019 
5020 	return obj;
5021 
5022 fail:
5023 	i915_gem_object_free(obj);
5024 
5025 	return ERR_PTR(ret);
5026 }
5027 
5028 static bool discard_backing_storage(struct drm_i915_gem_object *obj)
5029 {
5030 	/* If we are the last user of the backing storage (be it shmemfs
5031 	 * pages or stolen etc), we know that the pages are going to be
5032 	 * immediately released. In this case, we can then skip copying
5033 	 * back the contents from the GPU.
5034 	 */
5035 
5036 	if (obj->madv != I915_MADV_WILLNEED)
5037 		return false;
5038 
5039 	if (obj->base.filp == NULL)
5040 		return true;
5041 
5042 	/* At first glance, this looks racy, but then again so would be
5043 	 * userspace racing mmap against close. However, the first external
5044 	 * reference to the filp can only be obtained through the
5045 	 * i915_gem_mmap_ioctl() which safeguards us against the user
5046 	 * acquiring such a reference whilst we are in the middle of
5047 	 * freeing the object.
5048 	 */
5049 #if 0
5050 	return atomic_long_read(&obj->base.filp->f_count) == 1;
5051 #else
5052 	return false;
5053 #endif
5054 }
5055 
5056 void i915_gem_free_object(struct drm_gem_object *gem_obj)
5057 {
5058 	struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
5059 	struct drm_device *dev = obj->base.dev;
5060 	struct drm_i915_private *dev_priv = to_i915(dev);
5061 	struct i915_vma *vma, *next;
5062 
5063 	intel_runtime_pm_get(dev_priv);
5064 
5065 	trace_i915_gem_object_destroy(obj);
5066 
5067 	list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) {
5068 		int ret;
5069 
5070 		vma->pin_count = 0;
5071 		ret = i915_vma_unbind(vma);
5072 		if (WARN_ON(ret == -ERESTARTSYS)) {
5073 			bool was_interruptible;
5074 
5075 			was_interruptible = dev_priv->mm.interruptible;
5076 			dev_priv->mm.interruptible = false;
5077 
5078 			WARN_ON(i915_vma_unbind(vma));
5079 
5080 			dev_priv->mm.interruptible = was_interruptible;
5081 		}
5082 	}
5083 
5084 	/* Stolen objects don't hold a ref, but do hold pin count. Fix that up
5085 	 * before progressing. */
5086 	if (obj->stolen)
5087 		i915_gem_object_unpin_pages(obj);
5088 
5089 	WARN_ON(obj->frontbuffer_bits);
5090 
5091 	if (obj->pages && obj->madv == I915_MADV_WILLNEED &&
5092 	    dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES &&
5093 	    obj->tiling_mode != I915_TILING_NONE)
5094 		i915_gem_object_unpin_pages(obj);
5095 
5096 	if (WARN_ON(obj->pages_pin_count))
5097 		obj->pages_pin_count = 0;
5098 	if (discard_backing_storage(obj))
5099 		obj->madv = I915_MADV_DONTNEED;
5100 	i915_gem_object_put_pages(obj);
5101 	i915_gem_object_free_mmap_offset(obj);
5102 
5103 	BUG_ON(obj->pages);
5104 
5105 #if 0
5106 	if (obj->base.import_attach)
5107 		drm_prime_gem_destroy(&obj->base, NULL);
5108 #endif
5109 
5110 	if (obj->ops->release)
5111 		obj->ops->release(obj);
5112 
5113 	drm_gem_object_release(&obj->base);
5114 	i915_gem_info_remove_obj(dev_priv, obj->base.size);
5115 
5116 	kfree(obj->bit_17);
5117 	i915_gem_object_free(obj);
5118 
5119 	intel_runtime_pm_put(dev_priv);
5120 }
5121 
5122 struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
5123 				     struct i915_address_space *vm)
5124 {
5125 	struct i915_vma *vma;
5126 	list_for_each_entry(vma, &obj->vma_list, obj_link) {
5127 		if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL &&
5128 		    vma->vm == vm)
5129 			return vma;
5130 	}
5131 	return NULL;
5132 }
5133 
5134 struct i915_vma *i915_gem_obj_to_ggtt_view(struct drm_i915_gem_object *obj,
5135 					   const struct i915_ggtt_view *view)
5136 {
5137 	struct i915_vma *vma;
5138 
5139 	GEM_BUG_ON(!view);
5140 
5141 	list_for_each_entry(vma, &obj->vma_list, obj_link)
5142 		if (vma->is_ggtt && i915_ggtt_view_equal(&vma->ggtt_view, view))
5143 			return vma;
5144 	return NULL;
5145 }
5146 
5147 void i915_gem_vma_destroy(struct i915_vma *vma)
5148 {
5149 	WARN_ON(vma->node.allocated);
5150 
5151 	/* Keep the vma as a placeholder in the execbuffer reservation lists */
5152 	if (!list_empty(&vma->exec_list))
5153 		return;
5154 
5155 	if (!vma->is_ggtt)
5156 		i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm));
5157 
5158 	list_del(&vma->obj_link);
5159 
5160 	kfree(vma);
5161 }
5162 
5163 static void
5164 i915_gem_stop_engines(struct drm_device *dev)
5165 {
5166 	struct drm_i915_private *dev_priv = to_i915(dev);
5167 	struct intel_engine_cs *engine;
5168 
5169 	for_each_engine(engine, dev_priv)
5170 		dev_priv->gt.stop_engine(engine);
5171 }
5172 
5173 int
5174 i915_gem_suspend(struct drm_device *dev)
5175 {
5176 	struct drm_i915_private *dev_priv = to_i915(dev);
5177 	int ret = 0;
5178 
5179 	mutex_lock(&dev->struct_mutex);
5180 	ret = i915_gem_wait_for_idle(dev_priv);
5181 	if (ret)
5182 		goto err;
5183 
5184 	i915_gem_retire_requests(dev_priv);
5185 
5186 	i915_gem_stop_engines(dev);
5187 	i915_gem_context_lost(dev_priv);
5188 	mutex_unlock(&dev->struct_mutex);
5189 
5190 	cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
5191 	cancel_delayed_work_sync(&dev_priv->gt.retire_work);
5192 	flush_delayed_work(&dev_priv->gt.idle_work);
5193 
5194 	/* Assert that we sucessfully flushed all the work and
5195 	 * reset the GPU back to its idle, low power state.
5196 	 */
5197 	WARN_ON(dev_priv->gt.awake);
5198 
5199 	return 0;
5200 
5201 err:
5202 	mutex_unlock(&dev->struct_mutex);
5203 	return ret;
5204 }
5205 
5206 void i915_gem_init_swizzling(struct drm_device *dev)
5207 {
5208 	struct drm_i915_private *dev_priv = to_i915(dev);
5209 
5210 	if (INTEL_INFO(dev)->gen < 5 ||
5211 	    dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
5212 		return;
5213 
5214 	I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
5215 				 DISP_TILE_SURFACE_SWIZZLING);
5216 
5217 	if (IS_GEN5(dev))
5218 		return;
5219 
5220 	I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
5221 	if (IS_GEN6(dev))
5222 		I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB));
5223 	else if (IS_GEN7(dev))
5224 		I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB));
5225 	else if (IS_GEN8(dev))
5226 		I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW));
5227 	else
5228 		BUG();
5229 }
5230 
5231 static void init_unused_ring(struct drm_device *dev, u32 base)
5232 {
5233 	struct drm_i915_private *dev_priv = to_i915(dev);
5234 
5235 	I915_WRITE(RING_CTL(base), 0);
5236 	I915_WRITE(RING_HEAD(base), 0);
5237 	I915_WRITE(RING_TAIL(base), 0);
5238 	I915_WRITE(RING_START(base), 0);
5239 }
5240 
5241 static void init_unused_rings(struct drm_device *dev)
5242 {
5243 	if (IS_I830(dev)) {
5244 		init_unused_ring(dev, PRB1_BASE);
5245 		init_unused_ring(dev, SRB0_BASE);
5246 		init_unused_ring(dev, SRB1_BASE);
5247 		init_unused_ring(dev, SRB2_BASE);
5248 		init_unused_ring(dev, SRB3_BASE);
5249 	} else if (IS_GEN2(dev)) {
5250 		init_unused_ring(dev, SRB0_BASE);
5251 		init_unused_ring(dev, SRB1_BASE);
5252 	} else if (IS_GEN3(dev)) {
5253 		init_unused_ring(dev, PRB1_BASE);
5254 		init_unused_ring(dev, PRB2_BASE);
5255 	}
5256 }
5257 
5258 int i915_gem_init_engines(struct drm_device *dev)
5259 {
5260 	struct drm_i915_private *dev_priv = to_i915(dev);
5261 	int ret;
5262 
5263 	ret = intel_init_render_ring_buffer(dev);
5264 	if (ret)
5265 		return ret;
5266 
5267 	if (HAS_BSD(dev)) {
5268 		ret = intel_init_bsd_ring_buffer(dev);
5269 		if (ret)
5270 			goto cleanup_render_ring;
5271 	}
5272 
5273 	if (HAS_BLT(dev)) {
5274 		ret = intel_init_blt_ring_buffer(dev);
5275 		if (ret)
5276 			goto cleanup_bsd_ring;
5277 	}
5278 
5279 	if (HAS_VEBOX(dev)) {
5280 		ret = intel_init_vebox_ring_buffer(dev);
5281 		if (ret)
5282 			goto cleanup_blt_ring;
5283 	}
5284 
5285 	if (HAS_BSD2(dev)) {
5286 		ret = intel_init_bsd2_ring_buffer(dev);
5287 		if (ret)
5288 			goto cleanup_vebox_ring;
5289 	}
5290 
5291 	return 0;
5292 
5293 cleanup_vebox_ring:
5294 	intel_cleanup_engine(&dev_priv->engine[VECS]);
5295 cleanup_blt_ring:
5296 	intel_cleanup_engine(&dev_priv->engine[BCS]);
5297 cleanup_bsd_ring:
5298 	intel_cleanup_engine(&dev_priv->engine[VCS]);
5299 cleanup_render_ring:
5300 	intel_cleanup_engine(&dev_priv->engine[RCS]);
5301 
5302 	return ret;
5303 }
5304 
5305 int
5306 i915_gem_init_hw(struct drm_device *dev)
5307 {
5308 	struct drm_i915_private *dev_priv = to_i915(dev);
5309 	struct intel_engine_cs *engine;
5310 	int ret;
5311 
5312 	/* Double layer security blanket, see i915_gem_init() */
5313 	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
5314 
5315 	if (HAS_EDRAM(dev) && INTEL_GEN(dev_priv) < 9)
5316 		I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf));
5317 
5318 	if (IS_HASWELL(dev))
5319 		I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev) ?
5320 			   LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED);
5321 
5322 	if (HAS_PCH_NOP(dev)) {
5323 		if (IS_IVYBRIDGE(dev)) {
5324 			u32 temp = I915_READ(GEN7_MSG_CTL);
5325 			temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK);
5326 			I915_WRITE(GEN7_MSG_CTL, temp);
5327 		} else if (INTEL_INFO(dev)->gen >= 7) {
5328 			u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT);
5329 			temp &= ~RESET_PCH_HANDSHAKE_ENABLE;
5330 			I915_WRITE(HSW_NDE_RSTWRN_OPT, temp);
5331 		}
5332 	}
5333 
5334 	i915_gem_init_swizzling(dev);
5335 
5336 	/*
5337 	 * At least 830 can leave some of the unused rings
5338 	 * "active" (ie. head != tail) after resume which
5339 	 * will prevent c3 entry. Makes sure all unused rings
5340 	 * are totally idle.
5341 	 */
5342 	init_unused_rings(dev);
5343 
5344 	BUG_ON(!dev_priv->kernel_context);
5345 
5346 	ret = i915_ppgtt_init_hw(dev);
5347 	if (ret) {
5348 		DRM_ERROR("PPGTT enable HW failed %d\n", ret);
5349 		goto out;
5350 	}
5351 
5352 	/* Need to do basic initialisation of all rings first: */
5353 	for_each_engine(engine, dev_priv) {
5354 		ret = engine->init_hw(engine);
5355 		if (ret)
5356 			goto out;
5357 	}
5358 
5359 	intel_mocs_init_l3cc_table(dev);
5360 
5361 	/* We can't enable contexts until all firmware is loaded */
5362 	ret = intel_guc_setup(dev);
5363 	if (ret)
5364 		goto out;
5365 
5366 out:
5367 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
5368 	return ret;
5369 }
5370 
5371 int i915_gem_init(struct drm_device *dev)
5372 {
5373 	struct drm_i915_private *dev_priv = to_i915(dev);
5374 	int ret;
5375 
5376 	mutex_lock(&dev->struct_mutex);
5377 
5378 	if (!i915.enable_execlists) {
5379 		dev_priv->gt.execbuf_submit = i915_gem_ringbuffer_submission;
5380 		dev_priv->gt.init_engines = i915_gem_init_engines;
5381 		dev_priv->gt.cleanup_engine = intel_cleanup_engine;
5382 		dev_priv->gt.stop_engine = intel_stop_engine;
5383 	} else {
5384 		dev_priv->gt.execbuf_submit = intel_execlists_submission;
5385 		dev_priv->gt.init_engines = intel_logical_rings_init;
5386 		dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup;
5387 		dev_priv->gt.stop_engine = intel_logical_ring_stop;
5388 	}
5389 
5390 	/* This is just a security blanket to placate dragons.
5391 	 * On some systems, we very sporadically observe that the first TLBs
5392 	 * used by the CS may be stale, despite us poking the TLB reset. If
5393 	 * we hold the forcewake during initialisation these problems
5394 	 * just magically go away.
5395 	 */
5396 	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
5397 
5398 	i915_gem_init_userptr(dev_priv);
5399 	i915_gem_init_ggtt(dev);
5400 
5401 	ret = i915_gem_context_init(dev);
5402 	if (ret)
5403 		goto out_unlock;
5404 
5405 	ret = dev_priv->gt.init_engines(dev);
5406 	if (ret)
5407 		goto out_unlock;
5408 
5409 	ret = i915_gem_init_hw(dev);
5410 	if (ret == -EIO) {
5411 		/* Allow ring initialisation to fail by marking the GPU as
5412 		 * wedged. But we only want to do this where the GPU is angry,
5413 		 * for all other failure, such as an allocation failure, bail.
5414 		 */
5415 		DRM_ERROR("Failed to initialize GPU, declaring it wedged\n");
5416 		atomic_or(I915_WEDGED, &dev_priv->gpu_error.reset_counter);
5417 		ret = 0;
5418 	}
5419 
5420 out_unlock:
5421 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
5422 	mutex_unlock(&dev->struct_mutex);
5423 
5424 	return ret;
5425 }
5426 
5427 void
5428 i915_gem_cleanup_engines(struct drm_device *dev)
5429 {
5430 	struct drm_i915_private *dev_priv = to_i915(dev);
5431 	struct intel_engine_cs *engine;
5432 
5433 	for_each_engine(engine, dev_priv)
5434 		dev_priv->gt.cleanup_engine(engine);
5435 }
5436 
5437 static void
5438 init_engine_lists(struct intel_engine_cs *engine)
5439 {
5440 	INIT_LIST_HEAD(&engine->active_list);
5441 	INIT_LIST_HEAD(&engine->request_list);
5442 }
5443 
5444 void
5445 i915_gem_load_init_fences(struct drm_i915_private *dev_priv)
5446 {
5447 	struct drm_device *dev = &dev_priv->drm;
5448 
5449 	if (INTEL_INFO(dev_priv)->gen >= 7 && !IS_VALLEYVIEW(dev_priv) &&
5450 	    !IS_CHERRYVIEW(dev_priv))
5451 		dev_priv->num_fence_regs = 32;
5452 	else if (INTEL_INFO(dev_priv)->gen >= 4 || IS_I945G(dev_priv) ||
5453 		 IS_I945GM(dev_priv) || IS_G33(dev_priv))
5454 		dev_priv->num_fence_regs = 16;
5455 	else
5456 		dev_priv->num_fence_regs = 8;
5457 
5458 	if (intel_vgpu_active(dev_priv))
5459 		dev_priv->num_fence_regs =
5460 				I915_READ(vgtif_reg(avail_rs.fence_num));
5461 
5462 	/* Initialize fence registers to zero */
5463 	i915_gem_restore_fences(dev);
5464 
5465 	i915_gem_detect_bit_6_swizzle(dev);
5466 }
5467 
5468 void
5469 i915_gem_load_init(struct drm_device *dev)
5470 {
5471 	struct drm_i915_private *dev_priv = to_i915(dev);
5472 	int i;
5473 
5474 	INIT_LIST_HEAD(&dev_priv->vm_list);
5475 	INIT_LIST_HEAD(&dev_priv->context_list);
5476 	INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
5477 	INIT_LIST_HEAD(&dev_priv->mm.bound_list);
5478 	INIT_LIST_HEAD(&dev_priv->mm.fence_list);
5479 	for (i = 0; i < I915_NUM_ENGINES; i++)
5480 		init_engine_lists(&dev_priv->engine[i]);
5481 	for (i = 0; i < I915_MAX_NUM_FENCES; i++)
5482 		INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
5483 	INIT_DELAYED_WORK(&dev_priv->gt.retire_work,
5484 			  i915_gem_retire_work_handler);
5485 	INIT_DELAYED_WORK(&dev_priv->gt.idle_work,
5486 			  i915_gem_idle_work_handler);
5487 	init_waitqueue_head(&dev_priv->gpu_error.wait_queue);
5488 	init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
5489 
5490 	dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL;
5491 
5492 	INIT_LIST_HEAD(&dev_priv->mm.fence_list);
5493 
5494 	init_waitqueue_head(&dev_priv->pending_flip_queue);
5495 
5496 	dev_priv->mm.interruptible = true;
5497 
5498 	lockinit(&dev_priv->fb_tracking.lock, "drmftl", 0, LK_CANRECURSE);
5499 }
5500 
5501 void i915_gem_load_cleanup(struct drm_device *dev)
5502 {
5503 #if 0
5504 	struct drm_i915_private *dev_priv = to_i915(dev);
5505 
5506 	kmem_cache_destroy(dev_priv->requests);
5507 	kmem_cache_destroy(dev_priv->vmas);
5508 	kmem_cache_destroy(dev_priv->objects);
5509 #endif
5510 }
5511 
5512 int i915_gem_freeze_late(struct drm_i915_private *dev_priv)
5513 {
5514 	struct drm_i915_gem_object *obj;
5515 
5516 	/* Called just before we write the hibernation image.
5517 	 *
5518 	 * We need to update the domain tracking to reflect that the CPU
5519 	 * will be accessing all the pages to create and restore from the
5520 	 * hibernation, and so upon restoration those pages will be in the
5521 	 * CPU domain.
5522 	 *
5523 	 * To make sure the hibernation image contains the latest state,
5524 	 * we update that state just before writing out the image.
5525 	 */
5526 
5527 	list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list) {
5528 		obj->base.read_domains = I915_GEM_DOMAIN_CPU;
5529 		obj->base.write_domain = I915_GEM_DOMAIN_CPU;
5530 	}
5531 
5532 	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
5533 		obj->base.read_domains = I915_GEM_DOMAIN_CPU;
5534 		obj->base.write_domain = I915_GEM_DOMAIN_CPU;
5535 	}
5536 
5537 	return 0;
5538 }
5539 
5540 void i915_gem_release(struct drm_device *dev, struct drm_file *file)
5541 {
5542 	struct drm_i915_file_private *file_priv = file->driver_priv;
5543 
5544 	/* Clean up our request list when the client is going away, so that
5545 	 * later retire_requests won't dereference our soon-to-be-gone
5546 	 * file_priv.
5547 	 */
5548 	spin_lock(&file_priv->mm.lock);
5549 	while (!list_empty(&file_priv->mm.request_list)) {
5550 		struct drm_i915_gem_request *request;
5551 
5552 		request = list_first_entry(&file_priv->mm.request_list,
5553 					   struct drm_i915_gem_request,
5554 					   client_list);
5555 		list_del(&request->client_list);
5556 		request->file_priv = NULL;
5557 	}
5558 	spin_unlock(&file_priv->mm.lock);
5559 
5560 	if (!list_empty(&file_priv->rps.link)) {
5561 		lockmgr(&to_i915(dev)->rps.client_lock, LK_EXCLUSIVE);
5562 		list_del(&file_priv->rps.link);
5563 		lockmgr(&to_i915(dev)->rps.client_lock, LK_RELEASE);
5564 	}
5565 }
5566 
5567 int
5568 i915_gem_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
5569     vm_ooffset_t foff, struct ucred *cred, u_short *color)
5570 {
5571 	*color = 0; /* XXXKIB */
5572 	return (0);
5573 }
5574 
5575 void
5576 i915_gem_pager_dtor(void *handle)
5577 {
5578 	struct drm_gem_object *obj;
5579 	struct drm_device *dev;
5580 
5581 	obj = handle;
5582 	dev = obj->dev;
5583 
5584 	mutex_lock(&dev->struct_mutex);
5585 	drm_gem_free_mmap_offset(obj);
5586 	i915_gem_release_mmap(to_intel_bo(obj));
5587 	drm_gem_object_unreference(obj);
5588 	mutex_unlock(&dev->struct_mutex);
5589 }
5590 
5591 int i915_gem_open(struct drm_device *dev, struct drm_file *file)
5592 {
5593 	struct drm_i915_file_private *file_priv;
5594 	int ret;
5595 
5596 	DRM_DEBUG_DRIVER("\n");
5597 
5598 	file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL);
5599 	if (!file_priv)
5600 		return -ENOMEM;
5601 
5602 	file->driver_priv = file_priv;
5603 	file_priv->dev_priv = to_i915(dev);
5604 	file_priv->file = file;
5605 	INIT_LIST_HEAD(&file_priv->rps.link);
5606 
5607 	spin_init(&file_priv->mm.lock, "i915_priv");
5608 	INIT_LIST_HEAD(&file_priv->mm.request_list);
5609 
5610 	file_priv->bsd_ring = -1;
5611 
5612 	ret = i915_gem_context_open(dev, file);
5613 	if (ret)
5614 		kfree(file_priv);
5615 
5616 	return ret;
5617 }
5618 
5619 /**
5620  * i915_gem_track_fb - update frontbuffer tracking
5621  * @old: current GEM buffer for the frontbuffer slots
5622  * @new: new GEM buffer for the frontbuffer slots
5623  * @frontbuffer_bits: bitmask of frontbuffer slots
5624  *
5625  * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them
5626  * from @old and setting them in @new. Both @old and @new can be NULL.
5627  */
5628 void i915_gem_track_fb(struct drm_i915_gem_object *old,
5629 		       struct drm_i915_gem_object *new,
5630 		       unsigned frontbuffer_bits)
5631 {
5632 	if (old) {
5633 		WARN_ON(!mutex_is_locked(&old->base.dev->struct_mutex));
5634 		WARN_ON(!(old->frontbuffer_bits & frontbuffer_bits));
5635 		old->frontbuffer_bits &= ~frontbuffer_bits;
5636 	}
5637 
5638 	if (new) {
5639 		WARN_ON(!mutex_is_locked(&new->base.dev->struct_mutex));
5640 		WARN_ON(new->frontbuffer_bits & frontbuffer_bits);
5641 		new->frontbuffer_bits |= frontbuffer_bits;
5642 	}
5643 }
5644 
5645 /* All the new VM stuff */
5646 u64 i915_gem_obj_offset(struct drm_i915_gem_object *o,
5647 			struct i915_address_space *vm)
5648 {
5649 	struct drm_i915_private *dev_priv = to_i915(o->base.dev);
5650 	struct i915_vma *vma;
5651 
5652 	WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base);
5653 
5654 	list_for_each_entry(vma, &o->vma_list, obj_link) {
5655 		if (vma->is_ggtt &&
5656 		    vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
5657 			continue;
5658 		if (vma->vm == vm)
5659 			return vma->node.start;
5660 	}
5661 
5662 	WARN(1, "%s vma for this object not found.\n",
5663 	     i915_is_ggtt(vm) ? "global" : "ppgtt");
5664 	return -1;
5665 }
5666 
5667 u64 i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o,
5668 				  const struct i915_ggtt_view *view)
5669 {
5670 	struct i915_vma *vma;
5671 
5672 	list_for_each_entry(vma, &o->vma_list, obj_link)
5673 		if (vma->is_ggtt && i915_ggtt_view_equal(&vma->ggtt_view, view))
5674 			return vma->node.start;
5675 
5676 	WARN(1, "global vma for this object not found. (view=%u)\n", view->type);
5677 	return -1;
5678 }
5679 
5680 bool i915_gem_obj_bound(struct drm_i915_gem_object *o,
5681 			struct i915_address_space *vm)
5682 {
5683 	struct i915_vma *vma;
5684 
5685 	list_for_each_entry(vma, &o->vma_list, obj_link) {
5686 		if (vma->is_ggtt &&
5687 		    vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
5688 			continue;
5689 		if (vma->vm == vm && drm_mm_node_allocated(&vma->node))
5690 			return true;
5691 	}
5692 
5693 	return false;
5694 }
5695 
5696 bool i915_gem_obj_ggtt_bound_view(struct drm_i915_gem_object *o,
5697 				  const struct i915_ggtt_view *view)
5698 {
5699 	struct i915_vma *vma;
5700 
5701 	list_for_each_entry(vma, &o->vma_list, obj_link)
5702 		if (vma->is_ggtt &&
5703 		    i915_ggtt_view_equal(&vma->ggtt_view, view) &&
5704 		    drm_mm_node_allocated(&vma->node))
5705 			return true;
5706 
5707 	return false;
5708 }
5709 
5710 bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o)
5711 {
5712 	struct i915_vma *vma;
5713 
5714 	list_for_each_entry(vma, &o->vma_list, obj_link)
5715 		if (drm_mm_node_allocated(&vma->node))
5716 			return true;
5717 
5718 	return false;
5719 }
5720 
5721 unsigned long i915_gem_obj_ggtt_size(struct drm_i915_gem_object *o)
5722 {
5723 	struct i915_vma *vma;
5724 
5725 	GEM_BUG_ON(list_empty(&o->vma_list));
5726 
5727 	list_for_each_entry(vma, &o->vma_list, obj_link) {
5728 		if (vma->is_ggtt &&
5729 		    vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL)
5730 			return vma->node.size;
5731 	}
5732 
5733 	return 0;
5734 }
5735 
5736 bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj)
5737 {
5738 	struct i915_vma *vma;
5739 	list_for_each_entry(vma, &obj->vma_list, obj_link)
5740 		if (vma->pin_count > 0)
5741 			return true;
5742 
5743 	return false;
5744 }
5745 
5746 /* Like i915_gem_object_get_page(), but mark the returned page dirty */
5747 struct page *
5748 i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, int n)
5749 {
5750 	struct page *page;
5751 
5752 	/* Only default objects have per-page dirty tracking */
5753 	if (WARN_ON(!i915_gem_object_has_struct_page(obj)))
5754 		return NULL;
5755 
5756 	page = i915_gem_object_get_page(obj, n);
5757 	set_page_dirty(page);
5758 	return page;
5759 }
5760 
5761 /* Allocate a new GEM object and fill it with the supplied data */
5762 struct drm_i915_gem_object *
5763 i915_gem_object_create_from_data(struct drm_device *dev,
5764 			         const void *data, size_t size)
5765 {
5766 	struct drm_i915_gem_object *obj;
5767 	struct sg_table *sg;
5768 	size_t bytes;
5769 	int ret;
5770 
5771 	obj = i915_gem_object_create(dev, round_up(size, PAGE_SIZE));
5772 	if (IS_ERR(obj))
5773 		return obj;
5774 
5775 	ret = i915_gem_object_set_to_cpu_domain(obj, true);
5776 	if (ret)
5777 		goto fail;
5778 
5779 	ret = i915_gem_object_get_pages(obj);
5780 	if (ret)
5781 		goto fail;
5782 
5783 	i915_gem_object_pin_pages(obj);
5784 	sg = obj->pages;
5785 	bytes = sg_copy_from_buffer(sg->sgl, sg->nents, (void *)data, size);
5786 	obj->dirty = 1;		/* Backing store is now out of date */
5787 	i915_gem_object_unpin_pages(obj);
5788 
5789 	if (WARN_ON(bytes != size)) {
5790 		DRM_ERROR("Incomplete copy, wrote %zu of %zu", bytes, size);
5791 		ret = -EFAULT;
5792 		goto fail;
5793 	}
5794 
5795 	return obj;
5796 
5797 fail:
5798 	drm_gem_object_unreference(&obj->base);
5799 	return ERR_PTR(ret);
5800 }
5801