1 /*
2  * Copyright © 2008,2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *    Chris Wilson <chris@chris-wilson.co.uk>
26  *
27  */
28 
29 #include <linux/dma_remapping.h>
30 #include <linux/reservation.h>
31 #include <linux/uaccess.h>
32 
33 #include <drm/drmP.h>
34 #include <drm/i915_drm.h>
35 
36 #include "i915_drv.h"
37 #include "i915_trace.h"
38 #include "intel_drv.h"
39 #include "intel_frontbuffer.h"
40 
41 #define DBG_USE_CPU_RELOC 0 /* -1 force GTT relocs; 1 force CPU relocs */
42 
43 #define  __EXEC_OBJECT_HAS_PIN		(1<<31)
44 #define  __EXEC_OBJECT_HAS_FENCE	(1<<30)
45 #define  __EXEC_OBJECT_NEEDS_MAP	(1<<29)
46 #define  __EXEC_OBJECT_NEEDS_BIAS	(1<<28)
47 #define  __EXEC_OBJECT_INTERNAL_FLAGS (0xf<<28) /* all of the above */
48 
49 #define BATCH_OFFSET_BIAS (256*1024)
50 
51 struct i915_execbuffer_params {
52 	struct drm_device               *dev;
53 	struct drm_file                 *file;
54 	struct i915_vma			*batch;
55 	u32				dispatch_flags;
56 	u32				args_batch_start_offset;
57 	struct intel_engine_cs          *engine;
58 	struct i915_gem_context         *ctx;
59 	struct drm_i915_gem_request     *request;
60 };
61 
62 struct eb_vmas {
63 	struct drm_i915_private *i915;
64 	struct list_head vmas;
65 	int and;
66 	union {
67 		struct i915_vma *lut[0];
68 		struct hlist_head buckets[0];
69 	};
70 };
71 
72 static struct eb_vmas *
73 eb_create(struct drm_i915_private *i915,
74 	  struct drm_i915_gem_execbuffer2 *args)
75 {
76 	struct eb_vmas *eb = NULL;
77 
78 	if (args->flags & I915_EXEC_HANDLE_LUT) {
79 		unsigned size = args->buffer_count;
80 		size *= sizeof(struct i915_vma *);
81 		size += sizeof(struct eb_vmas);
82 		eb = kmalloc(size, M_DRM,
83 			     GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
84 	}
85 
86 	if (eb == NULL) {
87 		unsigned size = args->buffer_count;
88 		unsigned count = PAGE_SIZE / sizeof(struct hlist_head) / 2;
89 		BUILD_BUG_ON_NOT_POWER_OF_2(PAGE_SIZE / sizeof(struct hlist_head));
90 		while (count > 2*size)
91 			count >>= 1;
92 		eb = kzalloc(count*sizeof(struct hlist_head) +
93 			     sizeof(struct eb_vmas),
94 			     GFP_TEMPORARY);
95 		if (eb == NULL)
96 			return eb;
97 
98 		eb->and = count - 1;
99 	} else
100 		eb->and = -args->buffer_count;
101 
102 	eb->i915 = i915;
103 	INIT_LIST_HEAD(&eb->vmas);
104 	return eb;
105 }
106 
107 static void
108 eb_reset(struct eb_vmas *eb)
109 {
110 	if (eb->and >= 0)
111 		memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head));
112 }
113 
114 static struct i915_vma *
115 eb_get_batch(struct eb_vmas *eb)
116 {
117 	struct i915_vma *vma = list_entry(eb->vmas.prev, typeof(*vma), exec_list);
118 
119 	/*
120 	 * SNA is doing fancy tricks with compressing batch buffers, which leads
121 	 * to negative relocation deltas. Usually that works out ok since the
122 	 * relocate address is still positive, except when the batch is placed
123 	 * very low in the GTT. Ensure this doesn't happen.
124 	 *
125 	 * Note that actual hangs have only been observed on gen7, but for
126 	 * paranoia do it everywhere.
127 	 */
128 	if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0)
129 		vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
130 
131 	return vma;
132 }
133 
134 static int
135 eb_lookup_vmas(struct eb_vmas *eb,
136 	       struct drm_i915_gem_exec_object2 *exec,
137 	       const struct drm_i915_gem_execbuffer2 *args,
138 	       struct i915_address_space *vm,
139 	       struct drm_file *file)
140 {
141 	struct drm_i915_gem_object *obj;
142 	struct list_head objects;
143 	int i, ret;
144 
145 	INIT_LIST_HEAD(&objects);
146 	lockmgr(&file->table_lock, LK_EXCLUSIVE);
147 	/* Grab a reference to the object and release the lock so we can lookup
148 	 * or create the VMA without using GFP_ATOMIC */
149 	for (i = 0; i < args->buffer_count; i++) {
150 		obj = to_intel_bo(idr_find(&file->object_idr, exec[i].handle));
151 		if (obj == NULL) {
152 			lockmgr(&file->table_lock, LK_RELEASE);
153 			DRM_DEBUG("Invalid object handle %d at index %d\n",
154 				   exec[i].handle, i);
155 			ret = -ENOENT;
156 			goto err;
157 		}
158 
159 		if (!list_empty(&obj->obj_exec_link)) {
160 			lockmgr(&file->table_lock, LK_RELEASE);
161 			DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n",
162 				   obj, exec[i].handle, i);
163 			ret = -EINVAL;
164 			goto err;
165 		}
166 
167 		i915_gem_object_get(obj);
168 		list_add_tail(&obj->obj_exec_link, &objects);
169 	}
170 	lockmgr(&file->table_lock, LK_RELEASE);
171 
172 	i = 0;
173 	while (!list_empty(&objects)) {
174 		struct i915_vma *vma;
175 
176 		obj = list_first_entry(&objects,
177 				       struct drm_i915_gem_object,
178 				       obj_exec_link);
179 
180 		/*
181 		 * NOTE: We can leak any vmas created here when something fails
182 		 * later on. But that's no issue since vma_unbind can deal with
183 		 * vmas which are not actually bound. And since only
184 		 * lookup_or_create exists as an interface to get at the vma
185 		 * from the (obj, vm) we don't run the risk of creating
186 		 * duplicated vmas for the same vm.
187 		 */
188 		vma = i915_gem_obj_lookup_or_create_vma(obj, vm, NULL);
189 		if (unlikely(IS_ERR(vma))) {
190 			DRM_DEBUG("Failed to lookup VMA\n");
191 			ret = PTR_ERR(vma);
192 			goto err;
193 		}
194 
195 		/* Transfer ownership from the objects list to the vmas list. */
196 		list_add_tail(&vma->exec_list, &eb->vmas);
197 		list_del_init(&obj->obj_exec_link);
198 
199 		vma->exec_entry = &exec[i];
200 		if (eb->and < 0) {
201 			eb->lut[i] = vma;
202 		} else {
203 			uint32_t handle = args->flags & I915_EXEC_HANDLE_LUT ? i : exec[i].handle;
204 			vma->exec_handle = handle;
205 			hlist_add_head(&vma->exec_node,
206 				       &eb->buckets[handle & eb->and]);
207 		}
208 		++i;
209 	}
210 
211 	return 0;
212 
213 
214 err:
215 	while (!list_empty(&objects)) {
216 		obj = list_first_entry(&objects,
217 				       struct drm_i915_gem_object,
218 				       obj_exec_link);
219 		list_del_init(&obj->obj_exec_link);
220 		i915_gem_object_put(obj);
221 	}
222 	/*
223 	 * Objects already transfered to the vmas list will be unreferenced by
224 	 * eb_destroy.
225 	 */
226 
227 	return ret;
228 }
229 
230 static struct i915_vma *eb_get_vma(struct eb_vmas *eb, unsigned long handle)
231 {
232 	if (eb->and < 0) {
233 		if (handle >= -eb->and)
234 			return NULL;
235 		return eb->lut[handle];
236 	} else {
237 		struct hlist_head *head;
238 		struct i915_vma *vma;
239 
240 		head = &eb->buckets[handle & eb->and];
241 		hlist_for_each_entry(vma, head, exec_node) {
242 			if (vma->exec_handle == handle)
243 				return vma;
244 		}
245 		return NULL;
246 	}
247 }
248 
249 static void
250 i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma)
251 {
252 	struct drm_i915_gem_exec_object2 *entry;
253 
254 	if (!drm_mm_node_allocated(&vma->node))
255 		return;
256 
257 	entry = vma->exec_entry;
258 
259 	if (entry->flags & __EXEC_OBJECT_HAS_FENCE)
260 		i915_vma_unpin_fence(vma);
261 
262 	if (entry->flags & __EXEC_OBJECT_HAS_PIN)
263 		__i915_vma_unpin(vma);
264 
265 	entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN);
266 }
267 
268 static void eb_destroy(struct eb_vmas *eb)
269 {
270 	while (!list_empty(&eb->vmas)) {
271 		struct i915_vma *vma;
272 
273 		vma = list_first_entry(&eb->vmas,
274 				       struct i915_vma,
275 				       exec_list);
276 		list_del_init(&vma->exec_list);
277 		i915_gem_execbuffer_unreserve_vma(vma);
278 		i915_vma_put(vma);
279 	}
280 	kfree(eb);
281 }
282 
283 static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)
284 {
285 	if (!i915_gem_object_has_struct_page(obj))
286 		return false;
287 
288 	if (DBG_USE_CPU_RELOC)
289 		return DBG_USE_CPU_RELOC > 0;
290 
291 	return (HAS_LLC(to_i915(obj->base.dev)) ||
292 		obj->base.write_domain == I915_GEM_DOMAIN_CPU ||
293 		obj->cache_level != I915_CACHE_NONE);
294 }
295 
296 /* Used to convert any address to canonical form.
297  * Starting from gen8, some commands (e.g. STATE_BASE_ADDRESS,
298  * MI_LOAD_REGISTER_MEM and others, see Broadwell PRM Vol2a) require the
299  * addresses to be in a canonical form:
300  * "GraphicsAddress[63:48] are ignored by the HW and assumed to be in correct
301  * canonical form [63:48] == [47]."
302  */
303 #define GEN8_HIGH_ADDRESS_BIT 47
304 static inline uint64_t gen8_canonical_addr(uint64_t address)
305 {
306 	return sign_extend64(address, GEN8_HIGH_ADDRESS_BIT);
307 }
308 
309 static inline uint64_t gen8_noncanonical_addr(uint64_t address)
310 {
311 	return address & ((1ULL << (GEN8_HIGH_ADDRESS_BIT + 1)) - 1);
312 }
313 
314 static inline uint64_t
315 relocation_target(const struct drm_i915_gem_relocation_entry *reloc,
316 		  uint64_t target_offset)
317 {
318 	return gen8_canonical_addr((int)reloc->delta + target_offset);
319 }
320 
321 struct reloc_cache {
322 	struct drm_i915_private *i915;
323 	struct drm_mm_node node;
324 	unsigned long vaddr;
325 	unsigned int page;
326 	bool use_64bit_reloc;
327 };
328 
329 static void reloc_cache_init(struct reloc_cache *cache,
330 			     struct drm_i915_private *i915)
331 {
332 	cache->page = -1;
333 	cache->vaddr = 0;
334 	cache->i915 = i915;
335 	/* Must be a variable in the struct to allow GCC to unroll. */
336 	cache->use_64bit_reloc = HAS_64BIT_RELOC(i915);
337 	cache->node.allocated = false;
338 }
339 
340 static inline void *unmask_page(unsigned long p)
341 {
342 	return (void *)(uintptr_t)(p & LINUX_PAGE_MASK);
343 }
344 
345 static inline unsigned int unmask_flags(unsigned long p)
346 {
347 	return p & ~LINUX_PAGE_MASK;
348 }
349 
350 #define KMAP 0x4 /* after CLFLUSH_FLAGS */
351 
352 static void reloc_cache_fini(struct reloc_cache *cache)
353 {
354 	void *vaddr;
355 
356 	if (!cache->vaddr)
357 		return;
358 
359 	vaddr = unmask_page(cache->vaddr);
360 	if (cache->vaddr & KMAP) {
361 		if (cache->vaddr & CLFLUSH_AFTER)
362 			mb();
363 
364 		kunmap_atomic(vaddr);
365 		i915_gem_obj_finish_shmem_access((struct drm_i915_gem_object *)cache->node.mm);
366 	} else {
367 		wmb();
368 		io_mapping_unmap_atomic((void __iomem *)vaddr);
369 		if (cache->node.allocated) {
370 			struct i915_ggtt *ggtt = &cache->i915->ggtt;
371 
372 			ggtt->base.clear_range(&ggtt->base,
373 					       cache->node.start,
374 					       cache->node.size);
375 			drm_mm_remove_node(&cache->node);
376 		} else {
377 			i915_vma_unpin((struct i915_vma *)cache->node.mm);
378 		}
379 	}
380 }
381 
382 static void *reloc_kmap(struct drm_i915_gem_object *obj,
383 			struct reloc_cache *cache,
384 			int page)
385 {
386 	void *vaddr;
387 
388 	if (cache->vaddr) {
389 		kunmap_atomic(unmask_page(cache->vaddr));
390 	} else {
391 		unsigned int flushes;
392 		int ret;
393 
394 		ret = i915_gem_obj_prepare_shmem_write(obj, &flushes);
395 		if (ret)
396 			return ERR_PTR(ret);
397 
398 		BUILD_BUG_ON(KMAP & CLFLUSH_FLAGS);
399 		BUILD_BUG_ON((KMAP | CLFLUSH_FLAGS) & LINUX_PAGE_MASK);
400 
401 		cache->vaddr = flushes | KMAP;
402 		cache->node.mm = (void *)obj;
403 		if (flushes)
404 			mb();
405 	}
406 
407 	vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, page));
408 	cache->vaddr = unmask_flags(cache->vaddr) | (unsigned long)vaddr;
409 	cache->page = page;
410 
411 	return vaddr;
412 }
413 
414 static void *reloc_iomap(struct drm_i915_gem_object *obj,
415 			 struct reloc_cache *cache,
416 			 int page)
417 {
418 	struct i915_ggtt *ggtt = &cache->i915->ggtt;
419 	unsigned long offset;
420 	void *vaddr;
421 
422 	if (cache->vaddr) {
423 		io_mapping_unmap_atomic((void __force __iomem *) unmask_page(cache->vaddr));
424 	} else {
425 		struct i915_vma *vma;
426 		int ret;
427 
428 		if (use_cpu_reloc(obj))
429 			return NULL;
430 
431 		ret = i915_gem_object_set_to_gtt_domain(obj, true);
432 		if (ret)
433 			return ERR_PTR(ret);
434 
435 		vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
436 					       PIN_MAPPABLE | PIN_NONBLOCK);
437 		if (IS_ERR(vma)) {
438 			memset(&cache->node, 0, sizeof(cache->node));
439 			ret = drm_mm_insert_node_in_range_generic
440 				(&ggtt->base.mm, &cache->node,
441 				 4096, 0, 0,
442 				 0, ggtt->mappable_end,
443 				 DRM_MM_SEARCH_DEFAULT,
444 				 DRM_MM_CREATE_DEFAULT);
445 			if (ret) /* no inactive aperture space, use cpu reloc */
446 				return NULL;
447 		} else {
448 			ret = i915_vma_put_fence(vma);
449 			if (ret) {
450 				i915_vma_unpin(vma);
451 				return ERR_PTR(ret);
452 			}
453 
454 			cache->node.start = vma->node.start;
455 			cache->node.mm = (void *)vma;
456 		}
457 	}
458 
459 	offset = cache->node.start;
460 	if (cache->node.allocated) {
461 		wmb();
462 		ggtt->base.insert_page(&ggtt->base,
463 				       i915_gem_object_get_dma_address(obj, page),
464 				       offset, I915_CACHE_NONE, 0);
465 	} else {
466 		offset += page << PAGE_SHIFT;
467 	}
468 
469 	vaddr = (void __force *) io_mapping_map_atomic_wc(&cache->i915->ggtt.mappable, offset);
470 	cache->page = page;
471 	cache->vaddr = (unsigned long)vaddr;
472 
473 	return vaddr;
474 }
475 
476 static void *reloc_vaddr(struct drm_i915_gem_object *obj,
477 			 struct reloc_cache *cache,
478 			 int page)
479 {
480 	void *vaddr;
481 
482 	if (cache->page == page) {
483 		vaddr = unmask_page(cache->vaddr);
484 	} else {
485 		vaddr = NULL;
486 		if ((cache->vaddr & KMAP) == 0)
487 			vaddr = reloc_iomap(obj, cache, page);
488 		if (!vaddr)
489 			vaddr = reloc_kmap(obj, cache, page);
490 	}
491 
492 	return vaddr;
493 }
494 
495 static void clflush_write32(u32 *addr, u32 value, unsigned int flushes)
496 {
497 	if (unlikely(flushes & (CLFLUSH_BEFORE | CLFLUSH_AFTER))) {
498 		if (flushes & CLFLUSH_BEFORE) {
499 			clflushopt(addr);
500 			mb();
501 		}
502 
503 		*addr = value;
504 
505 		/* Writes to the same cacheline are serialised by the CPU
506 		 * (including clflush). On the write path, we only require
507 		 * that it hits memory in an orderly fashion and place
508 		 * mb barriers at the start and end of the relocation phase
509 		 * to ensure ordering of clflush wrt to the system.
510 		 */
511 		if (flushes & CLFLUSH_AFTER)
512 			clflushopt(addr);
513 	} else
514 		*addr = value;
515 }
516 
517 static int
518 relocate_entry(struct drm_i915_gem_object *obj,
519 	       const struct drm_i915_gem_relocation_entry *reloc,
520 	       struct reloc_cache *cache,
521 	       u64 target_offset)
522 {
523 	u64 offset = reloc->offset;
524 	bool wide = cache->use_64bit_reloc;
525 	void *vaddr;
526 
527 	target_offset = relocation_target(reloc, target_offset);
528 repeat:
529 	vaddr = reloc_vaddr(obj, cache, offset >> PAGE_SHIFT);
530 	if (IS_ERR(vaddr))
531 		return PTR_ERR(vaddr);
532 
533 	clflush_write32(vaddr + offset_in_page(offset),
534 			lower_32_bits(target_offset),
535 			cache->vaddr);
536 
537 	if (wide) {
538 		offset += sizeof(u32);
539 		target_offset >>= 32;
540 		wide = false;
541 		goto repeat;
542 	}
543 
544 	return 0;
545 }
546 
547 static int
548 i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
549 				   struct eb_vmas *eb,
550 				   struct drm_i915_gem_relocation_entry *reloc,
551 				   struct reloc_cache *cache)
552 {
553 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
554 	struct drm_gem_object *target_obj;
555 	struct drm_i915_gem_object *target_i915_obj;
556 	struct i915_vma *target_vma;
557 	uint64_t target_offset;
558 	int ret;
559 
560 	/* we've already hold a reference to all valid objects */
561 	target_vma = eb_get_vma(eb, reloc->target_handle);
562 	if (unlikely(target_vma == NULL))
563 		return -ENOENT;
564 	target_i915_obj = target_vma->obj;
565 	target_obj = &target_vma->obj->base;
566 
567 	target_offset = gen8_canonical_addr(target_vma->node.start);
568 
569 	/* Sandybridge PPGTT errata: We need a global gtt mapping for MI and
570 	 * pipe_control writes because the gpu doesn't properly redirect them
571 	 * through the ppgtt for non_secure batchbuffers. */
572 	if (unlikely(IS_GEN6(dev_priv) &&
573 	    reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION)) {
574 		ret = i915_vma_bind(target_vma, target_i915_obj->cache_level,
575 				    PIN_GLOBAL);
576 		if (WARN_ONCE(ret, "Unexpected failure to bind target VMA!"))
577 			return ret;
578 	}
579 
580 	/* Validate that the target is in a valid r/w GPU domain */
581 	if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) {
582 		DRM_DEBUG("reloc with multiple write domains: "
583 			  "obj %p target %d offset %d "
584 			  "read %08x write %08x",
585 			  obj, reloc->target_handle,
586 			  (int) reloc->offset,
587 			  reloc->read_domains,
588 			  reloc->write_domain);
589 		return -EINVAL;
590 	}
591 	if (unlikely((reloc->write_domain | reloc->read_domains)
592 		     & ~I915_GEM_GPU_DOMAINS)) {
593 		DRM_DEBUG("reloc with read/write non-GPU domains: "
594 			  "obj %p target %d offset %d "
595 			  "read %08x write %08x",
596 			  obj, reloc->target_handle,
597 			  (int) reloc->offset,
598 			  reloc->read_domains,
599 			  reloc->write_domain);
600 		return -EINVAL;
601 	}
602 
603 	target_obj->pending_read_domains |= reloc->read_domains;
604 	target_obj->pending_write_domain |= reloc->write_domain;
605 
606 	/* If the relocation already has the right value in it, no
607 	 * more work needs to be done.
608 	 */
609 	if (target_offset == reloc->presumed_offset)
610 		return 0;
611 
612 	/* Check that the relocation address is valid... */
613 	if (unlikely(reloc->offset >
614 		     obj->base.size - (cache->use_64bit_reloc ? 8 : 4))) {
615 		DRM_DEBUG("Relocation beyond object bounds: "
616 			  "obj %p target %d offset %d size %d.\n",
617 			  obj, reloc->target_handle,
618 			  (int) reloc->offset,
619 			  (int) obj->base.size);
620 		return -EINVAL;
621 	}
622 	if (unlikely(reloc->offset & 3)) {
623 		DRM_DEBUG("Relocation not 4-byte aligned: "
624 			  "obj %p target %d offset %d.\n",
625 			  obj, reloc->target_handle,
626 			  (int) reloc->offset);
627 		return -EINVAL;
628 	}
629 
630 	ret = relocate_entry(obj, reloc, cache, target_offset);
631 	if (ret)
632 		return ret;
633 
634 	/* and update the user's relocation entry */
635 	reloc->presumed_offset = target_offset;
636 	return 0;
637 }
638 
639 static int
640 i915_gem_execbuffer_relocate_vma(struct i915_vma *vma,
641 				 struct eb_vmas *eb)
642 {
643 #define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
644 	struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)];
645 	struct drm_i915_gem_relocation_entry __user *user_relocs;
646 	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
647 	struct reloc_cache cache;
648 	int remain, ret = 0;
649 
650 	user_relocs = u64_to_user_ptr(entry->relocs_ptr);
651 	reloc_cache_init(&cache, eb->i915);
652 
653 	remain = entry->relocation_count;
654 	while (remain) {
655 		struct drm_i915_gem_relocation_entry *r = stack_reloc;
656 		unsigned long unwritten;
657 		unsigned int count;
658 
659 		count = min_t(unsigned int, remain, ARRAY_SIZE(stack_reloc));
660 		remain -= count;
661 
662 		/* This is the fast path and we cannot handle a pagefault
663 		 * whilst holding the struct mutex lest the user pass in the
664 		 * relocations contained within a mmaped bo. For in such a case
665 		 * we, the page fault handler would call i915_gem_fault() and
666 		 * we would try to acquire the struct mutex again. Obviously
667 		 * this is bad and so lockdep complains vehemently.
668 		 */
669 		pagefault_disable();
670 		unwritten = __copy_from_user_inatomic(r, user_relocs, count*sizeof(r[0]));
671 		pagefault_enable();
672 		if (unlikely(unwritten)) {
673 			ret = -EFAULT;
674 			goto out;
675 		}
676 
677 		do {
678 			u64 offset = r->presumed_offset;
679 
680 			ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, r, &cache);
681 			if (ret)
682 				goto out;
683 
684 			if (r->presumed_offset != offset) {
685 				pagefault_disable();
686 				unwritten = __put_user(r->presumed_offset,
687 						       &user_relocs->presumed_offset);
688 				pagefault_enable();
689 				if (unlikely(unwritten)) {
690 					/* Note that reporting an error now
691 					 * leaves everything in an inconsistent
692 					 * state as we have *already* changed
693 					 * the relocation value inside the
694 					 * object. As we have not changed the
695 					 * reloc.presumed_offset or will not
696 					 * change the execobject.offset, on the
697 					 * call we may not rewrite the value
698 					 * inside the object, leaving it
699 					 * dangling and causing a GPU hang.
700 					 */
701 					ret = -EFAULT;
702 					goto out;
703 				}
704 			}
705 
706 			user_relocs++;
707 			r++;
708 		} while (--count);
709 	}
710 
711 out:
712 	reloc_cache_fini(&cache);
713 	return ret;
714 #undef N_RELOC
715 }
716 
717 static int
718 i915_gem_execbuffer_relocate_vma_slow(struct i915_vma *vma,
719 				      struct eb_vmas *eb,
720 				      struct drm_i915_gem_relocation_entry *relocs)
721 {
722 	const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
723 	struct reloc_cache cache;
724 	int i, ret = 0;
725 
726 	reloc_cache_init(&cache, eb->i915);
727 	for (i = 0; i < entry->relocation_count; i++) {
728 		ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, &relocs[i], &cache);
729 		if (ret)
730 			break;
731 	}
732 	reloc_cache_fini(&cache);
733 
734 	return ret;
735 }
736 
737 static int
738 i915_gem_execbuffer_relocate(struct eb_vmas *eb)
739 {
740 	struct i915_vma *vma;
741 	int ret = 0;
742 
743 	list_for_each_entry(vma, &eb->vmas, exec_list) {
744 		ret = i915_gem_execbuffer_relocate_vma(vma, eb);
745 		if (ret)
746 			break;
747 	}
748 
749 	return ret;
750 }
751 
752 static bool only_mappable_for_reloc(unsigned int flags)
753 {
754 	return (flags & (EXEC_OBJECT_NEEDS_FENCE | __EXEC_OBJECT_NEEDS_MAP)) ==
755 		__EXEC_OBJECT_NEEDS_MAP;
756 }
757 
758 static int
759 i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
760 				struct intel_engine_cs *engine,
761 				bool *need_reloc)
762 {
763 	struct drm_i915_gem_object *obj = vma->obj;
764 	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
765 	uint64_t flags;
766 	int ret;
767 
768 	flags = PIN_USER;
769 	if (entry->flags & EXEC_OBJECT_NEEDS_GTT)
770 		flags |= PIN_GLOBAL;
771 
772 	if (!drm_mm_node_allocated(&vma->node)) {
773 		/* Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset,
774 		 * limit address to the first 4GBs for unflagged objects.
775 		 */
776 		if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0)
777 			flags |= PIN_ZONE_4G;
778 		if (entry->flags & __EXEC_OBJECT_NEEDS_MAP)
779 			flags |= PIN_GLOBAL | PIN_MAPPABLE;
780 		if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS)
781 			flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
782 		if (entry->flags & EXEC_OBJECT_PINNED)
783 			flags |= entry->offset | PIN_OFFSET_FIXED;
784 		if ((flags & PIN_MAPPABLE) == 0)
785 			flags |= PIN_HIGH;
786 	}
787 
788 	ret = i915_vma_pin(vma,
789 			   entry->pad_to_size,
790 			   entry->alignment,
791 			   flags);
792 	if ((ret == -ENOSPC || ret == -E2BIG) &&
793 	    only_mappable_for_reloc(entry->flags))
794 		ret = i915_vma_pin(vma,
795 				   entry->pad_to_size,
796 				   entry->alignment,
797 				   flags & ~PIN_MAPPABLE);
798 	if (ret)
799 		return ret;
800 
801 	entry->flags |= __EXEC_OBJECT_HAS_PIN;
802 
803 	if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
804 		ret = i915_vma_get_fence(vma);
805 		if (ret)
806 			return ret;
807 
808 		if (i915_vma_pin_fence(vma))
809 			entry->flags |= __EXEC_OBJECT_HAS_FENCE;
810 	}
811 
812 	if (entry->offset != vma->node.start) {
813 		entry->offset = vma->node.start;
814 		*need_reloc = true;
815 	}
816 
817 	if (entry->flags & EXEC_OBJECT_WRITE) {
818 		obj->base.pending_read_domains = I915_GEM_DOMAIN_RENDER;
819 		obj->base.pending_write_domain = I915_GEM_DOMAIN_RENDER;
820 	}
821 
822 	return 0;
823 }
824 
825 static bool
826 need_reloc_mappable(struct i915_vma *vma)
827 {
828 	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
829 
830 	if (entry->relocation_count == 0)
831 		return false;
832 
833 	if (!i915_vma_is_ggtt(vma))
834 		return false;
835 
836 	/* See also use_cpu_reloc() */
837 	if (HAS_LLC(to_i915(vma->obj->base.dev)))
838 		return false;
839 
840 	if (vma->obj->base.write_domain == I915_GEM_DOMAIN_CPU)
841 		return false;
842 
843 	return true;
844 }
845 
846 static bool
847 eb_vma_misplaced(struct i915_vma *vma)
848 {
849 	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
850 
851 	WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP &&
852 		!i915_vma_is_ggtt(vma));
853 
854 	if (entry->alignment &&
855 	    vma->node.start & (entry->alignment - 1))
856 		return true;
857 
858 	if (vma->node.size < entry->pad_to_size)
859 		return true;
860 
861 	if (entry->flags & EXEC_OBJECT_PINNED &&
862 	    vma->node.start != entry->offset)
863 		return true;
864 
865 	if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
866 	    vma->node.start < BATCH_OFFSET_BIAS)
867 		return true;
868 
869 	/* avoid costly ping-pong once a batch bo ended up non-mappable */
870 	if (entry->flags & __EXEC_OBJECT_NEEDS_MAP &&
871 	    !i915_vma_is_map_and_fenceable(vma))
872 		return !only_mappable_for_reloc(entry->flags);
873 
874 	if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0 &&
875 	    (vma->node.start + vma->node.size - 1) >> 32)
876 		return true;
877 
878 	return false;
879 }
880 
881 static int
882 i915_gem_execbuffer_reserve(struct intel_engine_cs *engine,
883 			    struct list_head *vmas,
884 			    struct i915_gem_context *ctx,
885 			    bool *need_relocs)
886 {
887 	struct drm_i915_gem_object *obj;
888 	struct i915_vma *vma;
889 	struct i915_address_space *vm;
890 	struct list_head ordered_vmas;
891 	struct list_head pinned_vmas;
892 	bool has_fenced_gpu_access = INTEL_GEN(engine->i915) < 4;
893 	int retry;
894 
895 	vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm;
896 
897 	INIT_LIST_HEAD(&ordered_vmas);
898 	INIT_LIST_HEAD(&pinned_vmas);
899 	while (!list_empty(vmas)) {
900 		struct drm_i915_gem_exec_object2 *entry;
901 		bool need_fence, need_mappable;
902 
903 		vma = list_first_entry(vmas, struct i915_vma, exec_list);
904 		obj = vma->obj;
905 		entry = vma->exec_entry;
906 
907 		if (ctx->flags & CONTEXT_NO_ZEROMAP)
908 			entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
909 
910 		if (!has_fenced_gpu_access)
911 			entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE;
912 		need_fence =
913 			entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
914 			i915_gem_object_is_tiled(obj);
915 		need_mappable = need_fence || need_reloc_mappable(vma);
916 
917 		if (entry->flags & EXEC_OBJECT_PINNED)
918 			list_move_tail(&vma->exec_list, &pinned_vmas);
919 		else if (need_mappable) {
920 			entry->flags |= __EXEC_OBJECT_NEEDS_MAP;
921 			list_move(&vma->exec_list, &ordered_vmas);
922 		} else
923 			list_move_tail(&vma->exec_list, &ordered_vmas);
924 
925 		obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND;
926 		obj->base.pending_write_domain = 0;
927 	}
928 	list_splice(&ordered_vmas, vmas);
929 	list_splice(&pinned_vmas, vmas);
930 
931 	/* Attempt to pin all of the buffers into the GTT.
932 	 * This is done in 3 phases:
933 	 *
934 	 * 1a. Unbind all objects that do not match the GTT constraints for
935 	 *     the execbuffer (fenceable, mappable, alignment etc).
936 	 * 1b. Increment pin count for already bound objects.
937 	 * 2.  Bind new objects.
938 	 * 3.  Decrement pin count.
939 	 *
940 	 * This avoid unnecessary unbinding of later objects in order to make
941 	 * room for the earlier objects *unless* we need to defragment.
942 	 */
943 	retry = 0;
944 	do {
945 		int ret = 0;
946 
947 		/* Unbind any ill-fitting objects or pin. */
948 		list_for_each_entry(vma, vmas, exec_list) {
949 			if (!drm_mm_node_allocated(&vma->node))
950 				continue;
951 
952 			if (eb_vma_misplaced(vma))
953 				ret = i915_vma_unbind(vma);
954 			else
955 				ret = i915_gem_execbuffer_reserve_vma(vma,
956 								      engine,
957 								      need_relocs);
958 			if (ret)
959 				goto err;
960 		}
961 
962 		/* Bind fresh objects */
963 		list_for_each_entry(vma, vmas, exec_list) {
964 			if (drm_mm_node_allocated(&vma->node))
965 				continue;
966 
967 			ret = i915_gem_execbuffer_reserve_vma(vma, engine,
968 							      need_relocs);
969 			if (ret)
970 				goto err;
971 		}
972 
973 err:
974 		if (ret != -ENOSPC || retry++)
975 			return ret;
976 
977 		/* Decrement pin count for bound objects */
978 		list_for_each_entry(vma, vmas, exec_list)
979 			i915_gem_execbuffer_unreserve_vma(vma);
980 
981 		ret = i915_gem_evict_vm(vm, true);
982 		if (ret)
983 			return ret;
984 	} while (1);
985 }
986 
987 static int
988 i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
989 				  struct drm_i915_gem_execbuffer2 *args,
990 				  struct drm_file *file,
991 				  struct intel_engine_cs *engine,
992 				  struct eb_vmas *eb,
993 				  struct drm_i915_gem_exec_object2 *exec,
994 				  struct i915_gem_context *ctx)
995 {
996 	struct drm_i915_gem_relocation_entry *reloc;
997 	struct i915_address_space *vm;
998 	struct i915_vma *vma;
999 	bool need_relocs;
1000 	int *reloc_offset;
1001 	int i, total, ret;
1002 	unsigned count = args->buffer_count;
1003 
1004 	vm = list_first_entry(&eb->vmas, struct i915_vma, exec_list)->vm;
1005 
1006 	/* We may process another execbuffer during the unlock... */
1007 	while (!list_empty(&eb->vmas)) {
1008 		vma = list_first_entry(&eb->vmas, struct i915_vma, exec_list);
1009 		list_del_init(&vma->exec_list);
1010 		i915_gem_execbuffer_unreserve_vma(vma);
1011 		i915_vma_put(vma);
1012 	}
1013 
1014 	mutex_unlock(&dev->struct_mutex);
1015 
1016 	total = 0;
1017 	for (i = 0; i < count; i++)
1018 		total += exec[i].relocation_count;
1019 
1020 	reloc_offset = drm_malloc_ab(count, sizeof(*reloc_offset));
1021 	reloc = drm_malloc_ab(total, sizeof(*reloc));
1022 	if (reloc == NULL || reloc_offset == NULL) {
1023 		drm_free_large(reloc);
1024 		drm_free_large(reloc_offset);
1025 		mutex_lock(&dev->struct_mutex);
1026 		return -ENOMEM;
1027 	}
1028 
1029 	total = 0;
1030 	for (i = 0; i < count; i++) {
1031 		struct drm_i915_gem_relocation_entry __user *user_relocs;
1032 		u64 invalid_offset = (u64)-1;
1033 		int j;
1034 
1035 		user_relocs = u64_to_user_ptr(exec[i].relocs_ptr);
1036 
1037 		if (copy_from_user(reloc+total, user_relocs,
1038 				   exec[i].relocation_count * sizeof(*reloc))) {
1039 			ret = -EFAULT;
1040 			mutex_lock(&dev->struct_mutex);
1041 			goto err;
1042 		}
1043 
1044 		/* As we do not update the known relocation offsets after
1045 		 * relocating (due to the complexities in lock handling),
1046 		 * we need to mark them as invalid now so that we force the
1047 		 * relocation processing next time. Just in case the target
1048 		 * object is evicted and then rebound into its old
1049 		 * presumed_offset before the next execbuffer - if that
1050 		 * happened we would make the mistake of assuming that the
1051 		 * relocations were valid.
1052 		 */
1053 		for (j = 0; j < exec[i].relocation_count; j++) {
1054 			if (__copy_to_user(&user_relocs[j].presumed_offset,
1055 					   &invalid_offset,
1056 					   sizeof(invalid_offset))) {
1057 				ret = -EFAULT;
1058 				mutex_lock(&dev->struct_mutex);
1059 				goto err;
1060 			}
1061 		}
1062 
1063 		reloc_offset[i] = total;
1064 		total += exec[i].relocation_count;
1065 	}
1066 
1067 	ret = i915_mutex_lock_interruptible(dev);
1068 	if (ret) {
1069 		mutex_lock(&dev->struct_mutex);
1070 		goto err;
1071 	}
1072 
1073 	/* reacquire the objects */
1074 	eb_reset(eb);
1075 	ret = eb_lookup_vmas(eb, exec, args, vm, file);
1076 	if (ret)
1077 		goto err;
1078 
1079 	need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
1080 	ret = i915_gem_execbuffer_reserve(engine, &eb->vmas, ctx,
1081 					  &need_relocs);
1082 	if (ret)
1083 		goto err;
1084 
1085 	list_for_each_entry(vma, &eb->vmas, exec_list) {
1086 		int offset = vma->exec_entry - exec;
1087 		ret = i915_gem_execbuffer_relocate_vma_slow(vma, eb,
1088 							    reloc + reloc_offset[offset]);
1089 		if (ret)
1090 			goto err;
1091 	}
1092 
1093 	/* Leave the user relocations as are, this is the painfully slow path,
1094 	 * and we want to avoid the complication of dropping the lock whilst
1095 	 * having buffers reserved in the aperture and so causing spurious
1096 	 * ENOSPC for random operations.
1097 	 */
1098 
1099 err:
1100 	drm_free_large(reloc);
1101 	drm_free_large(reloc_offset);
1102 	return ret;
1103 }
1104 
1105 static int
1106 i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
1107 				struct list_head *vmas)
1108 {
1109 	struct i915_vma *vma;
1110 	int ret;
1111 
1112 	list_for_each_entry(vma, vmas, exec_list) {
1113 		struct drm_i915_gem_object *obj = vma->obj;
1114 
1115 		ret = i915_gem_request_await_object
1116 			(req, obj, obj->base.pending_write_domain);
1117 		if (ret)
1118 			return ret;
1119 
1120 		if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
1121 			i915_gem_clflush_object(obj, false);
1122 	}
1123 
1124 	/* Unconditionally flush any chipset caches (for streaming writes). */
1125 	i915_gem_chipset_flush(req->engine->i915);
1126 
1127 	/* Unconditionally invalidate GPU caches and TLBs. */
1128 	return req->engine->emit_flush(req, EMIT_INVALIDATE);
1129 }
1130 
1131 static bool
1132 i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
1133 {
1134 	if (exec->flags & __I915_EXEC_UNKNOWN_FLAGS)
1135 		return false;
1136 
1137 	/* Kernel clipping was a DRI1 misfeature */
1138 	if (exec->num_cliprects || exec->cliprects_ptr)
1139 		return false;
1140 
1141 	if (exec->DR4 == 0xffffffff) {
1142 		DRM_DEBUG("UXA submitting garbage DR4, fixing up\n");
1143 		exec->DR4 = 0;
1144 	}
1145 	if (exec->DR1 || exec->DR4)
1146 		return false;
1147 
1148 	if ((exec->batch_start_offset | exec->batch_len) & 0x7)
1149 		return false;
1150 
1151 	return true;
1152 }
1153 
1154 static int
1155 validate_exec_list(struct drm_device *dev,
1156 		   struct drm_i915_gem_exec_object2 *exec,
1157 		   int count)
1158 {
1159 	unsigned relocs_total = 0;
1160 	unsigned relocs_max = UINT_MAX / sizeof(struct drm_i915_gem_relocation_entry);
1161 	unsigned invalid_flags;
1162 	int i;
1163 
1164 	/* INTERNAL flags must not overlap with external ones */
1165 	BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS & ~__EXEC_OBJECT_UNKNOWN_FLAGS);
1166 
1167 	invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS;
1168 	if (USES_FULL_PPGTT(dev))
1169 		invalid_flags |= EXEC_OBJECT_NEEDS_GTT;
1170 
1171 	for (i = 0; i < count; i++) {
1172 		char __user *ptr = u64_to_user_ptr(exec[i].relocs_ptr);
1173 		int length; /* limited by fault_in_pages_readable() */
1174 
1175 		if (exec[i].flags & invalid_flags)
1176 			return -EINVAL;
1177 
1178 		/* Offset can be used as input (EXEC_OBJECT_PINNED), reject
1179 		 * any non-page-aligned or non-canonical addresses.
1180 		 */
1181 		if (exec[i].flags & EXEC_OBJECT_PINNED) {
1182 			if (exec[i].offset !=
1183 			    gen8_canonical_addr(exec[i].offset & LINUX_PAGE_MASK))
1184 				return -EINVAL;
1185 		}
1186 
1187 		/* From drm_mm perspective address space is continuous,
1188 		 * so from this point we're always using non-canonical
1189 		 * form internally.
1190 		 */
1191 		exec[i].offset = gen8_noncanonical_addr(exec[i].offset);
1192 
1193 		if (exec[i].alignment && !is_power_of_2(exec[i].alignment))
1194 			return -EINVAL;
1195 
1196 		/* pad_to_size was once a reserved field, so sanitize it */
1197 		if (exec[i].flags & EXEC_OBJECT_PAD_TO_SIZE) {
1198 			if (offset_in_page(exec[i].pad_to_size))
1199 				return -EINVAL;
1200 		} else {
1201 			exec[i].pad_to_size = 0;
1202 		}
1203 
1204 		/* First check for malicious input causing overflow in
1205 		 * the worst case where we need to allocate the entire
1206 		 * relocation tree as a single array.
1207 		 */
1208 		if (exec[i].relocation_count > relocs_max - relocs_total)
1209 			return -EINVAL;
1210 		relocs_total += exec[i].relocation_count;
1211 
1212 		length = exec[i].relocation_count *
1213 			sizeof(struct drm_i915_gem_relocation_entry);
1214 		/*
1215 		 * We must check that the entire relocation array is safe
1216 		 * to read, but since we may need to update the presumed
1217 		 * offsets during execution, check for full write access.
1218 		 */
1219 #if 0
1220 		if (!access_ok(VERIFY_WRITE, ptr, length))
1221 			return -EFAULT;
1222 #endif
1223 
1224 		if (likely(!i915.prefault_disable)) {
1225 			if (fault_in_pages_readable(ptr, length))
1226 				return -EFAULT;
1227 		}
1228 	}
1229 
1230 	return 0;
1231 }
1232 
1233 static struct i915_gem_context *
1234 i915_gem_validate_context(struct drm_device *dev, struct drm_file *file,
1235 			  struct intel_engine_cs *engine, const u32 ctx_id)
1236 {
1237 	struct i915_gem_context *ctx;
1238 	struct i915_ctx_hang_stats *hs;
1239 
1240 	ctx = i915_gem_context_lookup(file->driver_priv, ctx_id);
1241 	if (IS_ERR(ctx))
1242 		return ctx;
1243 
1244 	hs = &ctx->hang_stats;
1245 	if (hs->banned) {
1246 		DRM_DEBUG("Context %u tried to submit while banned\n", ctx_id);
1247 		return ERR_PTR(-EIO);
1248 	}
1249 
1250 	return ctx;
1251 }
1252 
1253 static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
1254 {
1255 	return !(obj->cache_level == I915_CACHE_NONE ||
1256 		 obj->cache_level == I915_CACHE_WT);
1257 }
1258 
1259 void i915_vma_move_to_active(struct i915_vma *vma,
1260 			     struct drm_i915_gem_request *req,
1261 			     unsigned int flags)
1262 {
1263 	struct drm_i915_gem_object *obj = vma->obj;
1264 	const unsigned int idx = req->engine->id;
1265 
1266 	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
1267 
1268 	/* Add a reference if we're newly entering the active list.
1269 	 * The order in which we add operations to the retirement queue is
1270 	 * vital here: mark_active adds to the start of the callback list,
1271 	 * such that subsequent callbacks are called first. Therefore we
1272 	 * add the active reference first and queue for it to be dropped
1273 	 * *last*.
1274 	 */
1275 	if (!i915_vma_is_active(vma))
1276 		obj->active_count++;
1277 	i915_vma_set_active(vma, idx);
1278 	i915_gem_active_set(&vma->last_read[idx], req);
1279 	list_move_tail(&vma->vm_link, &vma->vm->active_list);
1280 
1281 	if (flags & EXEC_OBJECT_WRITE) {
1282 		if (intel_fb_obj_invalidate(obj, ORIGIN_CS))
1283 			i915_gem_active_set(&obj->frontbuffer_write, req);
1284 
1285 		/* update for the implicit flush after a batch */
1286 		obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
1287 		if (!obj->cache_dirty && gpu_write_needs_clflush(obj))
1288 			obj->cache_dirty = true;
1289 	}
1290 
1291 	if (flags & EXEC_OBJECT_NEEDS_FENCE)
1292 		i915_gem_active_set(&vma->last_fence, req);
1293 }
1294 
1295 static void eb_export_fence(struct drm_i915_gem_object *obj,
1296 			    struct drm_i915_gem_request *req,
1297 			    unsigned int flags)
1298 {
1299 	struct reservation_object *resv = obj->resv;
1300 
1301 	/* Ignore errors from failing to allocate the new fence, we can't
1302 	 * handle an error right now. Worst case should be missed
1303 	 * synchronisation leading to rendering corruption.
1304 	 */
1305 	ww_mutex_lock(&resv->lock, NULL);
1306 	if (flags & EXEC_OBJECT_WRITE)
1307 		reservation_object_add_excl_fence(resv, &req->fence);
1308 	else if (reservation_object_reserve_shared(resv) == 0)
1309 		reservation_object_add_shared_fence(resv, &req->fence);
1310 	ww_mutex_unlock(&resv->lock);
1311 }
1312 
1313 static void
1314 i915_gem_execbuffer_move_to_active(struct list_head *vmas,
1315 				   struct drm_i915_gem_request *req)
1316 {
1317 	struct i915_vma *vma;
1318 
1319 	list_for_each_entry(vma, vmas, exec_list) {
1320 		struct drm_i915_gem_object *obj = vma->obj;
1321 		u32 old_read = obj->base.read_domains;
1322 		u32 old_write = obj->base.write_domain;
1323 
1324 		obj->base.write_domain = obj->base.pending_write_domain;
1325 		if (obj->base.write_domain)
1326 			vma->exec_entry->flags |= EXEC_OBJECT_WRITE;
1327 		else
1328 			obj->base.pending_read_domains |= obj->base.read_domains;
1329 		obj->base.read_domains = obj->base.pending_read_domains;
1330 
1331 		i915_vma_move_to_active(vma, req, vma->exec_entry->flags);
1332 		eb_export_fence(obj, req, vma->exec_entry->flags);
1333 		trace_i915_gem_object_change_domain(obj, old_read, old_write);
1334 	}
1335 }
1336 
1337 static int
1338 i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req)
1339 {
1340 	struct intel_ring *ring = req->ring;
1341 	int ret, i;
1342 
1343 	if (!IS_GEN7(req->i915) || req->engine->id != RCS) {
1344 		DRM_DEBUG("sol reset is gen7/rcs only\n");
1345 		return -EINVAL;
1346 	}
1347 
1348 	ret = intel_ring_begin(req, 4 * 3);
1349 	if (ret)
1350 		return ret;
1351 
1352 	for (i = 0; i < 4; i++) {
1353 		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
1354 		intel_ring_emit_reg(ring, GEN7_SO_WRITE_OFFSET(i));
1355 		intel_ring_emit(ring, 0);
1356 	}
1357 
1358 	intel_ring_advance(ring);
1359 
1360 	return 0;
1361 }
1362 
1363 static struct i915_vma *
1364 i915_gem_execbuffer_parse(struct intel_engine_cs *engine,
1365 			  struct drm_i915_gem_exec_object2 *shadow_exec_entry,
1366 			  struct drm_i915_gem_object *batch_obj,
1367 			  struct eb_vmas *eb,
1368 			  u32 batch_start_offset,
1369 			  u32 batch_len,
1370 			  bool is_master)
1371 {
1372 	struct drm_i915_gem_object *shadow_batch_obj;
1373 	struct i915_vma *vma;
1374 	int ret;
1375 
1376 	shadow_batch_obj = i915_gem_batch_pool_get(&engine->batch_pool,
1377 						   PAGE_ALIGN(batch_len));
1378 	if (IS_ERR(shadow_batch_obj))
1379 		return ERR_CAST(shadow_batch_obj);
1380 
1381 	ret = intel_engine_cmd_parser(engine,
1382 				      batch_obj,
1383 				      shadow_batch_obj,
1384 				      batch_start_offset,
1385 				      batch_len,
1386 				      is_master);
1387 	if (ret) {
1388 		if (ret == -EACCES) /* unhandled chained batch */
1389 			vma = NULL;
1390 		else
1391 			vma = ERR_PTR(ret);
1392 		goto out;
1393 	}
1394 
1395 	vma = i915_gem_object_ggtt_pin(shadow_batch_obj, NULL, 0, 0, 0);
1396 	if (IS_ERR(vma))
1397 		goto out;
1398 
1399 	memset(shadow_exec_entry, 0, sizeof(*shadow_exec_entry));
1400 
1401 	vma->exec_entry = shadow_exec_entry;
1402 	vma->exec_entry->flags = __EXEC_OBJECT_HAS_PIN;
1403 	i915_gem_object_get(shadow_batch_obj);
1404 	list_add_tail(&vma->exec_list, &eb->vmas);
1405 
1406 out:
1407 	i915_gem_object_unpin_pages(shadow_batch_obj);
1408 	return vma;
1409 }
1410 
1411 static int
1412 execbuf_submit(struct i915_execbuffer_params *params,
1413 	       struct drm_i915_gem_execbuffer2 *args,
1414 	       struct list_head *vmas)
1415 {
1416 	u64 exec_start, exec_len;
1417 	int ret;
1418 
1419 	ret = i915_gem_execbuffer_move_to_gpu(params->request, vmas);
1420 	if (ret)
1421 		return ret;
1422 
1423 	ret = i915_switch_context(params->request);
1424 	if (ret)
1425 		return ret;
1426 
1427 	if (args->flags & I915_EXEC_CONSTANTS_MASK) {
1428 		DRM_DEBUG("I915_EXEC_CONSTANTS_* unsupported\n");
1429 		return -EINVAL;
1430 	}
1431 
1432 	if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
1433 		ret = i915_reset_gen7_sol_offsets(params->request);
1434 		if (ret)
1435 			return ret;
1436 	}
1437 
1438 	exec_len   = args->batch_len;
1439 	exec_start = params->batch->node.start +
1440 		     params->args_batch_start_offset;
1441 
1442 	if (exec_len == 0)
1443 		exec_len = params->batch->size - params->args_batch_start_offset;
1444 
1445 	ret = params->engine->emit_bb_start(params->request,
1446 					    exec_start, exec_len,
1447 					    params->dispatch_flags);
1448 	if (ret)
1449 		return ret;
1450 
1451 	trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags);
1452 
1453 	i915_gem_execbuffer_move_to_active(vmas, params->request);
1454 
1455 	return 0;
1456 }
1457 
1458 /**
1459  * Find one BSD ring to dispatch the corresponding BSD command.
1460  * The engine index is returned.
1461  */
1462 static unsigned int
1463 gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv,
1464 			 struct drm_file *file)
1465 {
1466 	struct drm_i915_file_private *file_priv = file->driver_priv;
1467 
1468 	/* Check whether the file_priv has already selected one ring. */
1469 	if ((int)file_priv->bsd_engine < 0)
1470 		file_priv->bsd_engine = atomic_fetch_xor(1,
1471 			 &dev_priv->mm.bsd_engine_dispatch_index);
1472 
1473 	return file_priv->bsd_engine;
1474 }
1475 
1476 #define I915_USER_RINGS (4)
1477 
1478 static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = {
1479 	[I915_EXEC_DEFAULT]	= RCS,
1480 	[I915_EXEC_RENDER]	= RCS,
1481 	[I915_EXEC_BLT]		= BCS,
1482 	[I915_EXEC_BSD]		= VCS,
1483 	[I915_EXEC_VEBOX]	= VECS
1484 };
1485 
1486 static struct intel_engine_cs *
1487 eb_select_engine(struct drm_i915_private *dev_priv,
1488 		 struct drm_file *file,
1489 		 struct drm_i915_gem_execbuffer2 *args)
1490 {
1491 	unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK;
1492 	struct intel_engine_cs *engine;
1493 
1494 	if (user_ring_id > I915_USER_RINGS) {
1495 		DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id);
1496 		return NULL;
1497 	}
1498 
1499 	if ((user_ring_id != I915_EXEC_BSD) &&
1500 	    ((args->flags & I915_EXEC_BSD_MASK) != 0)) {
1501 		DRM_DEBUG("execbuf with non bsd ring but with invalid "
1502 			  "bsd dispatch flags: %d\n", (int)(args->flags));
1503 		return NULL;
1504 	}
1505 
1506 	if (user_ring_id == I915_EXEC_BSD && HAS_BSD2(dev_priv)) {
1507 		unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK;
1508 
1509 		if (bsd_idx == I915_EXEC_BSD_DEFAULT) {
1510 			bsd_idx = gen8_dispatch_bsd_engine(dev_priv, file);
1511 		} else if (bsd_idx >= I915_EXEC_BSD_RING1 &&
1512 			   bsd_idx <= I915_EXEC_BSD_RING2) {
1513 			bsd_idx >>= I915_EXEC_BSD_SHIFT;
1514 			bsd_idx--;
1515 		} else {
1516 			DRM_DEBUG("execbuf with unknown bsd ring: %u\n",
1517 				  bsd_idx);
1518 			return NULL;
1519 		}
1520 
1521 		engine = dev_priv->engine[_VCS(bsd_idx)];
1522 	} else {
1523 		engine = dev_priv->engine[user_ring_map[user_ring_id]];
1524 	}
1525 
1526 	if (!engine) {
1527 		DRM_DEBUG("execbuf with invalid ring: %u\n", user_ring_id);
1528 		return NULL;
1529 	}
1530 
1531 	return engine;
1532 }
1533 
1534 static int
1535 i915_gem_do_execbuffer(struct drm_device *dev, void *data,
1536 		       struct drm_file *file,
1537 		       struct drm_i915_gem_execbuffer2 *args,
1538 		       struct drm_i915_gem_exec_object2 *exec)
1539 {
1540 	struct drm_i915_private *dev_priv = to_i915(dev);
1541 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
1542 	struct eb_vmas *eb;
1543 	struct drm_i915_gem_exec_object2 shadow_exec_entry;
1544 	struct intel_engine_cs *engine;
1545 	struct i915_gem_context *ctx;
1546 	struct i915_address_space *vm;
1547 	struct i915_execbuffer_params params_master; /* XXX: will be removed later */
1548 	struct i915_execbuffer_params *params = &params_master;
1549 	const u32 ctx_id = i915_execbuffer2_get_context_id(*args);
1550 	u32 dispatch_flags;
1551 	int ret;
1552 	bool need_relocs;
1553 
1554 	if (!i915_gem_check_execbuffer(args))
1555 		return -EINVAL;
1556 
1557 	ret = validate_exec_list(dev, exec, args->buffer_count);
1558 	if (ret)
1559 		return ret;
1560 
1561 	dispatch_flags = 0;
1562 	if (args->flags & I915_EXEC_SECURE) {
1563 #if 0
1564 		if (!drm_is_current_master(file) || !capable(CAP_SYS_ADMIN))
1565 		    return -EPERM;
1566 #endif
1567 
1568 		dispatch_flags |= I915_DISPATCH_SECURE;
1569 	}
1570 	if (args->flags & I915_EXEC_IS_PINNED)
1571 		dispatch_flags |= I915_DISPATCH_PINNED;
1572 
1573 	engine = eb_select_engine(dev_priv, file, args);
1574 	if (!engine)
1575 		return -EINVAL;
1576 
1577 	if (args->buffer_count < 1) {
1578 		DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
1579 		return -EINVAL;
1580 	}
1581 
1582 	if (args->flags & I915_EXEC_RESOURCE_STREAMER) {
1583 		if (!HAS_RESOURCE_STREAMER(dev_priv)) {
1584 			DRM_DEBUG("RS is only allowed for Haswell, Gen8 and above\n");
1585 			return -EINVAL;
1586 		}
1587 		if (engine->id != RCS) {
1588 			DRM_DEBUG("RS is not available on %s\n",
1589 				 engine->name);
1590 			return -EINVAL;
1591 		}
1592 
1593 		dispatch_flags |= I915_DISPATCH_RS;
1594 	}
1595 
1596 	/* Take a local wakeref for preparing to dispatch the execbuf as
1597 	 * we expect to access the hardware fairly frequently in the
1598 	 * process. Upon first dispatch, we acquire another prolonged
1599 	 * wakeref that we hold until the GPU has been idle for at least
1600 	 * 100ms.
1601 	 */
1602 	intel_runtime_pm_get(dev_priv);
1603 
1604 	ret = i915_mutex_lock_interruptible(dev);
1605 	if (ret)
1606 		goto pre_mutex_err;
1607 
1608 	ctx = i915_gem_validate_context(dev, file, engine, ctx_id);
1609 	if (IS_ERR(ctx)) {
1610 		mutex_unlock(&dev->struct_mutex);
1611 		ret = PTR_ERR(ctx);
1612 		goto pre_mutex_err;
1613 	}
1614 
1615 	i915_gem_context_get(ctx);
1616 
1617 	if (ctx->ppgtt)
1618 		vm = &ctx->ppgtt->base;
1619 	else
1620 		vm = &ggtt->base;
1621 
1622 	memset(&params_master, 0x00, sizeof(params_master));
1623 
1624 	eb = eb_create(dev_priv, args);
1625 	if (eb == NULL) {
1626 		i915_gem_context_put(ctx);
1627 		mutex_unlock(&dev->struct_mutex);
1628 		ret = -ENOMEM;
1629 		goto pre_mutex_err;
1630 	}
1631 
1632 	/* Look up object handles */
1633 	ret = eb_lookup_vmas(eb, exec, args, vm, file);
1634 	if (ret)
1635 		goto err;
1636 
1637 	/* take note of the batch buffer before we might reorder the lists */
1638 	params->batch = eb_get_batch(eb);
1639 
1640 	/* Move the objects en-masse into the GTT, evicting if necessary. */
1641 	need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
1642 	ret = i915_gem_execbuffer_reserve(engine, &eb->vmas, ctx,
1643 					  &need_relocs);
1644 	if (ret)
1645 		goto err;
1646 
1647 	/* The objects are in their final locations, apply the relocations. */
1648 	if (need_relocs)
1649 		ret = i915_gem_execbuffer_relocate(eb);
1650 	if (ret) {
1651 		if (ret == -EFAULT) {
1652 			ret = i915_gem_execbuffer_relocate_slow(dev, args, file,
1653 								engine,
1654 								eb, exec, ctx);
1655 			BUG_ON(!mutex_is_locked(&dev->struct_mutex));
1656 		}
1657 		if (ret)
1658 			goto err;
1659 	}
1660 
1661 	/* Set the pending read domains for the batch buffer to COMMAND */
1662 	if (params->batch->obj->base.pending_write_domain) {
1663 		DRM_DEBUG("Attempting to use self-modifying batch buffer\n");
1664 		ret = -EINVAL;
1665 		goto err;
1666 	}
1667 	if (args->batch_start_offset > params->batch->size ||
1668 	    args->batch_len > params->batch->size - args->batch_start_offset) {
1669 		DRM_DEBUG("Attempting to use out-of-bounds batch\n");
1670 		ret = -EINVAL;
1671 		goto err;
1672 	}
1673 
1674 	params->args_batch_start_offset = args->batch_start_offset;
1675 	if (intel_engine_needs_cmd_parser(engine) && args->batch_len) {
1676 		struct i915_vma *vma;
1677 
1678 		vma = i915_gem_execbuffer_parse(engine, &shadow_exec_entry,
1679 						params->batch->obj,
1680 						eb,
1681 						args->batch_start_offset,
1682 						args->batch_len,
1683 						drm_is_current_master(file));
1684 		if (IS_ERR(vma)) {
1685 			ret = PTR_ERR(vma);
1686 			goto err;
1687 		}
1688 
1689 		if (vma) {
1690 			/*
1691 			 * Batch parsed and accepted:
1692 			 *
1693 			 * Set the DISPATCH_SECURE bit to remove the NON_SECURE
1694 			 * bit from MI_BATCH_BUFFER_START commands issued in
1695 			 * the dispatch_execbuffer implementations. We
1696 			 * specifically don't want that set on batches the
1697 			 * command parser has accepted.
1698 			 */
1699 			dispatch_flags |= I915_DISPATCH_SECURE;
1700 			params->args_batch_start_offset = 0;
1701 			params->batch = vma;
1702 		}
1703 	}
1704 
1705 	params->batch->obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
1706 
1707 	/* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
1708 	 * batch" bit. Hence we need to pin secure batches into the global gtt.
1709 	 * hsw should have this fixed, but bdw mucks it up again. */
1710 	if (dispatch_flags & I915_DISPATCH_SECURE) {
1711 		struct drm_i915_gem_object *obj = params->batch->obj;
1712 		struct i915_vma *vma;
1713 
1714 		/*
1715 		 * So on first glance it looks freaky that we pin the batch here
1716 		 * outside of the reservation loop. But:
1717 		 * - The batch is already pinned into the relevant ppgtt, so we
1718 		 *   already have the backing storage fully allocated.
1719 		 * - No other BO uses the global gtt (well contexts, but meh),
1720 		 *   so we don't really have issues with multiple objects not
1721 		 *   fitting due to fragmentation.
1722 		 * So this is actually safe.
1723 		 */
1724 		vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0);
1725 		if (IS_ERR(vma)) {
1726 			ret = PTR_ERR(vma);
1727 			goto err;
1728 		}
1729 
1730 		params->batch = vma;
1731 	}
1732 
1733 	/* Allocate a request for this batch buffer nice and early. */
1734 	params->request = i915_gem_request_alloc(engine, ctx);
1735 	if (IS_ERR(params->request)) {
1736 		ret = PTR_ERR(params->request);
1737 		goto err_batch_unpin;
1738 	}
1739 
1740 	/* Whilst this request exists, batch_obj will be on the
1741 	 * active_list, and so will hold the active reference. Only when this
1742 	 * request is retired will the the batch_obj be moved onto the
1743 	 * inactive_list and lose its active reference. Hence we do not need
1744 	 * to explicitly hold another reference here.
1745 	 */
1746 	params->request->batch = params->batch;
1747 
1748 	ret = i915_gem_request_add_to_client(params->request, file);
1749 	if (ret)
1750 		goto err_request;
1751 
1752 	/*
1753 	 * Save assorted stuff away to pass through to *_submission().
1754 	 * NB: This data should be 'persistent' and not local as it will
1755 	 * kept around beyond the duration of the IOCTL once the GPU
1756 	 * scheduler arrives.
1757 	 */
1758 	params->dev                     = dev;
1759 	params->file                    = file;
1760 	params->engine                    = engine;
1761 	params->dispatch_flags          = dispatch_flags;
1762 	params->ctx                     = ctx;
1763 
1764 	ret = execbuf_submit(params, args, &eb->vmas);
1765 err_request:
1766 	__i915_add_request(params->request, ret == 0);
1767 
1768 err_batch_unpin:
1769 	/*
1770 	 * FIXME: We crucially rely upon the active tracking for the (ppgtt)
1771 	 * batch vma for correctness. For less ugly and less fragility this
1772 	 * needs to be adjusted to also track the ggtt batch vma properly as
1773 	 * active.
1774 	 */
1775 	if (dispatch_flags & I915_DISPATCH_SECURE)
1776 		i915_vma_unpin(params->batch);
1777 err:
1778 	/* the request owns the ref now */
1779 	i915_gem_context_put(ctx);
1780 	eb_destroy(eb);
1781 
1782 	mutex_unlock(&dev->struct_mutex);
1783 
1784 pre_mutex_err:
1785 	/* intel_gpu_busy should also get a ref, so it will free when the device
1786 	 * is really idle. */
1787 	intel_runtime_pm_put(dev_priv);
1788 	return ret;
1789 }
1790 
1791 /*
1792  * Legacy execbuffer just creates an exec2 list from the original exec object
1793  * list array and passes it to the real function.
1794  */
1795 int
1796 i915_gem_execbuffer(struct drm_device *dev, void *data,
1797 		    struct drm_file *file)
1798 {
1799 	struct drm_i915_gem_execbuffer *args = data;
1800 	struct drm_i915_gem_execbuffer2 exec2;
1801 	struct drm_i915_gem_exec_object *exec_list = NULL;
1802 	struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1803 	int ret, i;
1804 
1805 	if (args->buffer_count < 1) {
1806 		DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
1807 		return -EINVAL;
1808 	}
1809 
1810 	/* Copy in the exec list from userland */
1811 	exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count);
1812 	exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count);
1813 	if (exec_list == NULL || exec2_list == NULL) {
1814 		DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
1815 			  args->buffer_count);
1816 		drm_free_large(exec_list);
1817 		drm_free_large(exec2_list);
1818 		return -ENOMEM;
1819 	}
1820 	ret = copy_from_user(exec_list,
1821 			     u64_to_user_ptr(args->buffers_ptr),
1822 			     sizeof(*exec_list) * args->buffer_count);
1823 	if (ret != 0) {
1824 		DRM_DEBUG("copy %d exec entries failed %d\n",
1825 			  args->buffer_count, ret);
1826 		drm_free_large(exec_list);
1827 		drm_free_large(exec2_list);
1828 		return -EFAULT;
1829 	}
1830 
1831 	for (i = 0; i < args->buffer_count; i++) {
1832 		exec2_list[i].handle = exec_list[i].handle;
1833 		exec2_list[i].relocation_count = exec_list[i].relocation_count;
1834 		exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr;
1835 		exec2_list[i].alignment = exec_list[i].alignment;
1836 		exec2_list[i].offset = exec_list[i].offset;
1837 		if (INTEL_GEN(to_i915(dev)) < 4)
1838 			exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE;
1839 		else
1840 			exec2_list[i].flags = 0;
1841 	}
1842 
1843 	exec2.buffers_ptr = args->buffers_ptr;
1844 	exec2.buffer_count = args->buffer_count;
1845 	exec2.batch_start_offset = args->batch_start_offset;
1846 	exec2.batch_len = args->batch_len;
1847 	exec2.DR1 = args->DR1;
1848 	exec2.DR4 = args->DR4;
1849 	exec2.num_cliprects = args->num_cliprects;
1850 	exec2.cliprects_ptr = args->cliprects_ptr;
1851 	exec2.flags = I915_EXEC_RENDER;
1852 	i915_execbuffer2_set_context_id(exec2, 0);
1853 
1854 	ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list);
1855 	if (!ret) {
1856 		struct drm_i915_gem_exec_object __user *user_exec_list =
1857 			u64_to_user_ptr(args->buffers_ptr);
1858 
1859 		/* Copy the new buffer offsets back to the user's exec list. */
1860 		for (i = 0; i < args->buffer_count; i++) {
1861 			exec2_list[i].offset =
1862 				gen8_canonical_addr(exec2_list[i].offset);
1863 			ret = __copy_to_user(&user_exec_list[i].offset,
1864 					     &exec2_list[i].offset,
1865 					     sizeof(user_exec_list[i].offset));
1866 			if (ret) {
1867 				ret = -EFAULT;
1868 				DRM_DEBUG("failed to copy %d exec entries "
1869 					  "back to user (%d)\n",
1870 					  args->buffer_count, ret);
1871 				break;
1872 			}
1873 		}
1874 	}
1875 
1876 	drm_free_large(exec_list);
1877 	drm_free_large(exec2_list);
1878 	return ret;
1879 }
1880 
1881 int
1882 i915_gem_execbuffer2(struct drm_device *dev, void *data,
1883 		     struct drm_file *file)
1884 {
1885 	struct drm_i915_gem_execbuffer2 *args = data;
1886 	struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1887 	int ret;
1888 
1889 	if (args->buffer_count < 1 ||
1890 	    args->buffer_count > UINT_MAX / sizeof(*exec2_list)) {
1891 		DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count);
1892 		return -EINVAL;
1893 	}
1894 
1895 	if (args->rsvd2 != 0) {
1896 		DRM_DEBUG("dirty rvsd2 field\n");
1897 		return -EINVAL;
1898 	}
1899 
1900 	exec2_list = drm_malloc_gfp(args->buffer_count,
1901 				    sizeof(*exec2_list),
1902 				    GFP_TEMPORARY);
1903 	if (exec2_list == NULL) {
1904 		DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
1905 			  args->buffer_count);
1906 		return -ENOMEM;
1907 	}
1908 	ret = copy_from_user(exec2_list,
1909 			     u64_to_user_ptr(args->buffers_ptr),
1910 			     sizeof(*exec2_list) * args->buffer_count);
1911 	if (ret != 0) {
1912 		DRM_DEBUG("copy %d exec entries failed %d\n",
1913 			  args->buffer_count, ret);
1914 		drm_free_large(exec2_list);
1915 		return -EFAULT;
1916 	}
1917 
1918 	ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list);
1919 	if (!ret) {
1920 		/* Copy the new buffer offsets back to the user's exec list. */
1921 		struct drm_i915_gem_exec_object2 __user *user_exec_list =
1922 				   u64_to_user_ptr(args->buffers_ptr);
1923 		int i;
1924 
1925 		for (i = 0; i < args->buffer_count; i++) {
1926 			exec2_list[i].offset =
1927 				gen8_canonical_addr(exec2_list[i].offset);
1928 			ret = __copy_to_user(&user_exec_list[i].offset,
1929 					     &exec2_list[i].offset,
1930 					     sizeof(user_exec_list[i].offset));
1931 			if (ret) {
1932 				ret = -EFAULT;
1933 				DRM_DEBUG("failed to copy %d exec entries "
1934 					  "back to user\n",
1935 					  args->buffer_count);
1936 				break;
1937 			}
1938 		}
1939 	}
1940 
1941 	drm_free_large(exec2_list);
1942 	return ret;
1943 }
1944