1 /*
2  * Copyright © 2008,2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *    Chris Wilson <chris@chris-wilson.co.uk>
26  *
27  */
28 
29 #include <linux/dma_remapping.h>
30 #include <linux/reservation.h>
31 #include <linux/sync_file.h>
32 #include <linux/uaccess.h>
33 
34 #include <drm/drmP.h>
35 #include <drm/i915_drm.h>
36 
37 #include "i915_drv.h"
38 #include "i915_gem_clflush.h"
39 #include "i915_trace.h"
40 #include "intel_drv.h"
41 #include "intel_frontbuffer.h"
42 
43 #define DBG_USE_CPU_RELOC 0 /* -1 force GTT relocs; 1 force CPU relocs */
44 
45 #define  __EXEC_OBJECT_HAS_PIN		(1<<31)
46 #define  __EXEC_OBJECT_HAS_FENCE	(1<<30)
47 #define  __EXEC_OBJECT_NEEDS_MAP	(1<<29)
48 #define  __EXEC_OBJECT_NEEDS_BIAS	(1<<28)
49 #define  __EXEC_OBJECT_INTERNAL_FLAGS (0xf<<28) /* all of the above */
50 
51 #define BATCH_OFFSET_BIAS (256*1024)
52 
53 struct i915_execbuffer_params {
54 	struct drm_device               *dev;
55 	struct drm_file                 *file;
56 	struct i915_vma			*batch;
57 	u32				dispatch_flags;
58 	u32				args_batch_start_offset;
59 	struct intel_engine_cs          *engine;
60 	struct i915_gem_context         *ctx;
61 	struct drm_i915_gem_request     *request;
62 };
63 
64 struct eb_vmas {
65 	struct drm_i915_private *i915;
66 	struct list_head vmas;
67 	int and;
68 	union {
69 		struct i915_vma *lut[0];
70 		struct hlist_head buckets[0];
71 	};
72 };
73 
74 static struct eb_vmas *
75 eb_create(struct drm_i915_private *i915,
76 	  struct drm_i915_gem_execbuffer2 *args)
77 {
78 	struct eb_vmas *eb = NULL;
79 
80 	if (args->flags & I915_EXEC_HANDLE_LUT) {
81 		unsigned size = args->buffer_count;
82 		size *= sizeof(struct i915_vma *);
83 		size += sizeof(struct eb_vmas);
84 		eb = kmalloc(size, M_DRM,
85 			     GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
86 	}
87 
88 	if (eb == NULL) {
89 		unsigned size = args->buffer_count;
90 		unsigned count = PAGE_SIZE / sizeof(struct hlist_head) / 2;
91 		BUILD_BUG_ON_NOT_POWER_OF_2(PAGE_SIZE / sizeof(struct hlist_head));
92 		while (count > 2*size)
93 			count >>= 1;
94 		eb = kzalloc(count*sizeof(struct hlist_head) +
95 			     sizeof(struct eb_vmas),
96 			     GFP_TEMPORARY);
97 		if (eb == NULL)
98 			return eb;
99 
100 		eb->and = count - 1;
101 	} else
102 		eb->and = -args->buffer_count;
103 
104 	eb->i915 = i915;
105 	INIT_LIST_HEAD(&eb->vmas);
106 	return eb;
107 }
108 
109 static void
110 eb_reset(struct eb_vmas *eb)
111 {
112 	if (eb->and >= 0)
113 		memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head));
114 }
115 
116 static struct i915_vma *
117 eb_get_batch(struct eb_vmas *eb)
118 {
119 	struct i915_vma *vma = list_entry(eb->vmas.prev, typeof(*vma), exec_list);
120 
121 	/*
122 	 * SNA is doing fancy tricks with compressing batch buffers, which leads
123 	 * to negative relocation deltas. Usually that works out ok since the
124 	 * relocate address is still positive, except when the batch is placed
125 	 * very low in the GTT. Ensure this doesn't happen.
126 	 *
127 	 * Note that actual hangs have only been observed on gen7, but for
128 	 * paranoia do it everywhere.
129 	 */
130 	if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0)
131 		vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
132 
133 	return vma;
134 }
135 
136 static int
137 eb_lookup_vmas(struct eb_vmas *eb,
138 	       struct drm_i915_gem_exec_object2 *exec,
139 	       const struct drm_i915_gem_execbuffer2 *args,
140 	       struct i915_address_space *vm,
141 	       struct drm_file *file)
142 {
143 	struct drm_i915_gem_object *obj;
144 	struct list_head objects;
145 	int i, ret;
146 
147 	INIT_LIST_HEAD(&objects);
148 	lockmgr(&file->table_lock, LK_EXCLUSIVE);
149 	/* Grab a reference to the object and release the lock so we can lookup
150 	 * or create the VMA without using GFP_ATOMIC */
151 	for (i = 0; i < args->buffer_count; i++) {
152 		obj = to_intel_bo(idr_find(&file->object_idr, exec[i].handle));
153 		if (obj == NULL) {
154 			lockmgr(&file->table_lock, LK_RELEASE);
155 			DRM_DEBUG("Invalid object handle %d at index %d\n",
156 				   exec[i].handle, i);
157 			ret = -ENOENT;
158 			goto err;
159 		}
160 
161 		if (!list_empty(&obj->obj_exec_link)) {
162 			lockmgr(&file->table_lock, LK_RELEASE);
163 			DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n",
164 				   obj, exec[i].handle, i);
165 			ret = -EINVAL;
166 			goto err;
167 		}
168 
169 		i915_gem_object_get(obj);
170 		list_add_tail(&obj->obj_exec_link, &objects);
171 	}
172 	lockmgr(&file->table_lock, LK_RELEASE);
173 
174 	i = 0;
175 	while (!list_empty(&objects)) {
176 		struct i915_vma *vma;
177 
178 		obj = list_first_entry(&objects,
179 				       struct drm_i915_gem_object,
180 				       obj_exec_link);
181 
182 		/*
183 		 * NOTE: We can leak any vmas created here when something fails
184 		 * later on. But that's no issue since vma_unbind can deal with
185 		 * vmas which are not actually bound. And since only
186 		 * lookup_or_create exists as an interface to get at the vma
187 		 * from the (obj, vm) we don't run the risk of creating
188 		 * duplicated vmas for the same vm.
189 		 */
190 		vma = i915_vma_instance(obj, vm, NULL);
191 		if (unlikely(IS_ERR(vma))) {
192 			DRM_DEBUG("Failed to lookup VMA\n");
193 			ret = PTR_ERR(vma);
194 			goto err;
195 		}
196 
197 		/* Transfer ownership from the objects list to the vmas list. */
198 		list_add_tail(&vma->exec_list, &eb->vmas);
199 		list_del_init(&obj->obj_exec_link);
200 
201 		vma->exec_entry = &exec[i];
202 		if (eb->and < 0) {
203 			eb->lut[i] = vma;
204 		} else {
205 			uint32_t handle = args->flags & I915_EXEC_HANDLE_LUT ? i : exec[i].handle;
206 			vma->exec_handle = handle;
207 			hlist_add_head(&vma->exec_node,
208 				       &eb->buckets[handle & eb->and]);
209 		}
210 		++i;
211 	}
212 
213 	return 0;
214 
215 
216 err:
217 	while (!list_empty(&objects)) {
218 		obj = list_first_entry(&objects,
219 				       struct drm_i915_gem_object,
220 				       obj_exec_link);
221 		list_del_init(&obj->obj_exec_link);
222 		i915_gem_object_put(obj);
223 	}
224 	/*
225 	 * Objects already transfered to the vmas list will be unreferenced by
226 	 * eb_destroy.
227 	 */
228 
229 	return ret;
230 }
231 
232 static struct i915_vma *eb_get_vma(struct eb_vmas *eb, unsigned long handle)
233 {
234 	if (eb->and < 0) {
235 		if (handle >= -eb->and)
236 			return NULL;
237 		return eb->lut[handle];
238 	} else {
239 		struct hlist_head *head;
240 		struct i915_vma *vma;
241 
242 		head = &eb->buckets[handle & eb->and];
243 		hlist_for_each_entry(vma, head, exec_node) {
244 			if (vma->exec_handle == handle)
245 				return vma;
246 		}
247 		return NULL;
248 	}
249 }
250 
251 static void
252 i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma)
253 {
254 	struct drm_i915_gem_exec_object2 *entry;
255 
256 	if (!drm_mm_node_allocated(&vma->node))
257 		return;
258 
259 	entry = vma->exec_entry;
260 
261 	if (entry->flags & __EXEC_OBJECT_HAS_FENCE)
262 		i915_vma_unpin_fence(vma);
263 
264 	if (entry->flags & __EXEC_OBJECT_HAS_PIN)
265 		__i915_vma_unpin(vma);
266 
267 	entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN);
268 }
269 
270 static void eb_destroy(struct eb_vmas *eb)
271 {
272 	while (!list_empty(&eb->vmas)) {
273 		struct i915_vma *vma;
274 
275 		vma = list_first_entry(&eb->vmas,
276 				       struct i915_vma,
277 				       exec_list);
278 		list_del_init(&vma->exec_list);
279 		i915_gem_execbuffer_unreserve_vma(vma);
280 		vma->exec_entry = NULL;
281 		i915_vma_put(vma);
282 	}
283 	kfree(eb);
284 }
285 
286 static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)
287 {
288 	if (!i915_gem_object_has_struct_page(obj))
289 		return false;
290 
291 	if (DBG_USE_CPU_RELOC)
292 		return DBG_USE_CPU_RELOC > 0;
293 
294 	return (HAS_LLC(to_i915(obj->base.dev)) ||
295 		obj->base.write_domain == I915_GEM_DOMAIN_CPU ||
296 		obj->cache_level != I915_CACHE_NONE);
297 }
298 
299 /* Used to convert any address to canonical form.
300  * Starting from gen8, some commands (e.g. STATE_BASE_ADDRESS,
301  * MI_LOAD_REGISTER_MEM and others, see Broadwell PRM Vol2a) require the
302  * addresses to be in a canonical form:
303  * "GraphicsAddress[63:48] are ignored by the HW and assumed to be in correct
304  * canonical form [63:48] == [47]."
305  */
306 #define GEN8_HIGH_ADDRESS_BIT 47
307 static inline uint64_t gen8_canonical_addr(uint64_t address)
308 {
309 	return sign_extend64(address, GEN8_HIGH_ADDRESS_BIT);
310 }
311 
312 static inline uint64_t gen8_noncanonical_addr(uint64_t address)
313 {
314 	return address & ((1ULL << (GEN8_HIGH_ADDRESS_BIT + 1)) - 1);
315 }
316 
317 static inline uint64_t
318 relocation_target(const struct drm_i915_gem_relocation_entry *reloc,
319 		  uint64_t target_offset)
320 {
321 	return gen8_canonical_addr((int)reloc->delta + target_offset);
322 }
323 
324 struct reloc_cache {
325 	struct drm_i915_private *i915;
326 	struct drm_mm_node node;
327 	unsigned long vaddr;
328 	unsigned int page;
329 	bool use_64bit_reloc;
330 };
331 
332 static void reloc_cache_init(struct reloc_cache *cache,
333 			     struct drm_i915_private *i915)
334 {
335 	cache->page = -1;
336 	cache->vaddr = 0;
337 	cache->i915 = i915;
338 	/* Must be a variable in the struct to allow GCC to unroll. */
339 	cache->use_64bit_reloc = HAS_64BIT_RELOC(i915);
340 	cache->node.allocated = false;
341 }
342 
343 static inline void *unmask_page(unsigned long p)
344 {
345 	return (void *)(uintptr_t)(p & LINUX_PAGE_MASK);
346 }
347 
348 static inline unsigned int unmask_flags(unsigned long p)
349 {
350 	return p & ~LINUX_PAGE_MASK;
351 }
352 
353 #define KMAP 0x4 /* after CLFLUSH_FLAGS */
354 
355 static void reloc_cache_fini(struct reloc_cache *cache)
356 {
357 	void *vaddr;
358 
359 	if (!cache->vaddr)
360 		return;
361 
362 	vaddr = unmask_page(cache->vaddr);
363 	if (cache->vaddr & KMAP) {
364 		if (cache->vaddr & CLFLUSH_AFTER)
365 			mb();
366 
367 		kunmap_atomic(vaddr);
368 		i915_gem_obj_finish_shmem_access((struct drm_i915_gem_object *)cache->node.mm);
369 	} else {
370 		wmb();
371 		io_mapping_unmap_atomic((void __iomem *)vaddr);
372 		if (cache->node.allocated) {
373 			struct i915_ggtt *ggtt = &cache->i915->ggtt;
374 
375 			ggtt->base.clear_range(&ggtt->base,
376 					       cache->node.start,
377 					       cache->node.size);
378 			drm_mm_remove_node(&cache->node);
379 		} else {
380 			i915_vma_unpin((struct i915_vma *)cache->node.mm);
381 		}
382 	}
383 }
384 
385 static void *reloc_kmap(struct drm_i915_gem_object *obj,
386 			struct reloc_cache *cache,
387 			int page)
388 {
389 	void *vaddr;
390 
391 	if (cache->vaddr) {
392 		kunmap_atomic(unmask_page(cache->vaddr));
393 	} else {
394 		unsigned int flushes;
395 		int ret;
396 
397 		ret = i915_gem_obj_prepare_shmem_write(obj, &flushes);
398 		if (ret)
399 			return ERR_PTR(ret);
400 
401 		BUILD_BUG_ON(KMAP & CLFLUSH_FLAGS);
402 		BUILD_BUG_ON((KMAP | CLFLUSH_FLAGS) & LINUX_PAGE_MASK);
403 
404 		cache->vaddr = flushes | KMAP;
405 		cache->node.mm = (void *)obj;
406 		if (flushes)
407 			mb();
408 	}
409 
410 	vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, page));
411 	cache->vaddr = unmask_flags(cache->vaddr) | (unsigned long)vaddr;
412 	cache->page = page;
413 
414 	return vaddr;
415 }
416 
417 static void *reloc_iomap(struct drm_i915_gem_object *obj,
418 			 struct reloc_cache *cache,
419 			 int page)
420 {
421 	struct i915_ggtt *ggtt = &cache->i915->ggtt;
422 	unsigned long offset;
423 	void *vaddr;
424 
425 	if (cache->vaddr) {
426 		io_mapping_unmap_atomic((void __force __iomem *) unmask_page(cache->vaddr));
427 	} else {
428 		struct i915_vma *vma;
429 		int ret;
430 
431 		if (use_cpu_reloc(obj))
432 			return NULL;
433 
434 		ret = i915_gem_object_set_to_gtt_domain(obj, true);
435 		if (ret)
436 			return ERR_PTR(ret);
437 
438 		vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
439 					       PIN_MAPPABLE | PIN_NONBLOCK);
440 		if (IS_ERR(vma)) {
441 			memset(&cache->node, 0, sizeof(cache->node));
442 			ret = drm_mm_insert_node_in_range
443 				(&ggtt->base.mm, &cache->node,
444 				 PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE,
445 				 0, ggtt->mappable_end,
446 				 DRM_MM_INSERT_LOW);
447 			if (ret) /* no inactive aperture space, use cpu reloc */
448 				return NULL;
449 		} else {
450 			ret = i915_vma_put_fence(vma);
451 			if (ret) {
452 				i915_vma_unpin(vma);
453 				return ERR_PTR(ret);
454 			}
455 
456 			cache->node.start = vma->node.start;
457 			cache->node.mm = (void *)vma;
458 		}
459 	}
460 
461 	offset = cache->node.start;
462 	if (cache->node.allocated) {
463 		wmb();
464 		ggtt->base.insert_page(&ggtt->base,
465 				       i915_gem_object_get_dma_address(obj, page),
466 				       offset, I915_CACHE_NONE, 0);
467 	} else {
468 		offset += page << PAGE_SHIFT;
469 	}
470 
471 	vaddr = (void __force *) io_mapping_map_atomic_wc(&cache->i915->ggtt.mappable, offset);
472 	cache->page = page;
473 	cache->vaddr = (unsigned long)vaddr;
474 
475 	return vaddr;
476 }
477 
478 static void *reloc_vaddr(struct drm_i915_gem_object *obj,
479 			 struct reloc_cache *cache,
480 			 int page)
481 {
482 	void *vaddr;
483 
484 	if (cache->page == page) {
485 		vaddr = unmask_page(cache->vaddr);
486 	} else {
487 		vaddr = NULL;
488 		if ((cache->vaddr & KMAP) == 0)
489 			vaddr = reloc_iomap(obj, cache, page);
490 		if (!vaddr)
491 			vaddr = reloc_kmap(obj, cache, page);
492 	}
493 
494 	return vaddr;
495 }
496 
497 static void clflush_write32(u32 *addr, u32 value, unsigned int flushes)
498 {
499 	if (unlikely(flushes & (CLFLUSH_BEFORE | CLFLUSH_AFTER))) {
500 		if (flushes & CLFLUSH_BEFORE) {
501 			clflushopt(addr);
502 			mb();
503 		}
504 
505 		*addr = value;
506 
507 		/* Writes to the same cacheline are serialised by the CPU
508 		 * (including clflush). On the write path, we only require
509 		 * that it hits memory in an orderly fashion and place
510 		 * mb barriers at the start and end of the relocation phase
511 		 * to ensure ordering of clflush wrt to the system.
512 		 */
513 		if (flushes & CLFLUSH_AFTER)
514 			clflushopt(addr);
515 	} else
516 		*addr = value;
517 }
518 
519 static int
520 relocate_entry(struct drm_i915_gem_object *obj,
521 	       const struct drm_i915_gem_relocation_entry *reloc,
522 	       struct reloc_cache *cache,
523 	       u64 target_offset)
524 {
525 	u64 offset = reloc->offset;
526 	bool wide = cache->use_64bit_reloc;
527 	void *vaddr;
528 
529 	target_offset = relocation_target(reloc, target_offset);
530 repeat:
531 	vaddr = reloc_vaddr(obj, cache, offset >> PAGE_SHIFT);
532 	if (IS_ERR(vaddr))
533 		return PTR_ERR(vaddr);
534 
535 	clflush_write32(vaddr + offset_in_page(offset),
536 			lower_32_bits(target_offset),
537 			cache->vaddr);
538 
539 	if (wide) {
540 		offset += sizeof(u32);
541 		target_offset >>= 32;
542 		wide = false;
543 		goto repeat;
544 	}
545 
546 	return 0;
547 }
548 
549 static int
550 i915_gem_execbuffer_relocate_entry(struct i915_vma *vma,
551 				   struct eb_vmas *eb,
552 				   struct drm_i915_gem_relocation_entry *reloc,
553 				   struct reloc_cache *cache)
554 {
555 	struct drm_i915_gem_object *obj = vma->obj;
556 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
557 	struct drm_gem_object *target_obj;
558 	struct drm_i915_gem_object *target_i915_obj;
559 	struct i915_vma *target_vma;
560 	uint64_t target_offset;
561 	int ret;
562 
563 	/* we've already hold a reference to all valid objects */
564 	target_vma = eb_get_vma(eb, reloc->target_handle);
565 	if (unlikely(target_vma == NULL))
566 		return -ENOENT;
567 	target_i915_obj = target_vma->obj;
568 	target_obj = &target_vma->obj->base;
569 
570 	target_offset = gen8_canonical_addr(target_vma->node.start);
571 
572 	/* Sandybridge PPGTT errata: We need a global gtt mapping for MI and
573 	 * pipe_control writes because the gpu doesn't properly redirect them
574 	 * through the ppgtt for non_secure batchbuffers. */
575 	if (unlikely(IS_GEN6(dev_priv) &&
576 	    reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION)) {
577 		ret = i915_vma_bind(target_vma, target_i915_obj->cache_level,
578 				    PIN_GLOBAL);
579 		if (WARN_ONCE(ret, "Unexpected failure to bind target VMA!"))
580 			return ret;
581 	}
582 
583 	/* Validate that the target is in a valid r/w GPU domain */
584 	if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) {
585 		DRM_DEBUG("reloc with multiple write domains: "
586 			  "obj %p target %d offset %d "
587 			  "read %08x write %08x",
588 			  obj, reloc->target_handle,
589 			  (int) reloc->offset,
590 			  reloc->read_domains,
591 			  reloc->write_domain);
592 		return -EINVAL;
593 	}
594 	if (unlikely((reloc->write_domain | reloc->read_domains)
595 		     & ~I915_GEM_GPU_DOMAINS)) {
596 		DRM_DEBUG("reloc with read/write non-GPU domains: "
597 			  "obj %p target %d offset %d "
598 			  "read %08x write %08x",
599 			  obj, reloc->target_handle,
600 			  (int) reloc->offset,
601 			  reloc->read_domains,
602 			  reloc->write_domain);
603 		return -EINVAL;
604 	}
605 
606 	target_obj->pending_read_domains |= reloc->read_domains;
607 	target_obj->pending_write_domain |= reloc->write_domain;
608 
609 	/* If the relocation already has the right value in it, no
610 	 * more work needs to be done.
611 	 */
612 	if (target_offset == reloc->presumed_offset)
613 		return 0;
614 
615 	/* Check that the relocation address is valid... */
616 	if (unlikely(reloc->offset >
617 		     obj->base.size - (cache->use_64bit_reloc ? 8 : 4))) {
618 		DRM_DEBUG("Relocation beyond object bounds: "
619 			  "obj %p target %d offset %d size %d.\n",
620 			  obj, reloc->target_handle,
621 			  (int) reloc->offset,
622 			  (int) obj->base.size);
623 		return -EINVAL;
624 	}
625 	if (unlikely(reloc->offset & 3)) {
626 		DRM_DEBUG("Relocation not 4-byte aligned: "
627 			  "obj %p target %d offset %d.\n",
628 			  obj, reloc->target_handle,
629 			  (int) reloc->offset);
630 		return -EINVAL;
631 	}
632 
633 	/*
634 	 * If we write into the object, we need to force the synchronisation
635 	 * barrier, either with an asynchronous clflush or if we executed the
636 	 * patching using the GPU (though that should be serialised by the
637 	 * timeline). To be completely sure, and since we are required to
638 	 * do relocations we are already stalling, disable the user's opt
639 	 * of our synchronisation.
640 	 */
641 	vma->exec_entry->flags &= ~EXEC_OBJECT_ASYNC;
642 
643 	ret = relocate_entry(obj, reloc, cache, target_offset);
644 	if (ret)
645 		return ret;
646 
647 	/* and update the user's relocation entry */
648 	reloc->presumed_offset = target_offset;
649 	return 0;
650 }
651 
652 static int
653 i915_gem_execbuffer_relocate_vma(struct i915_vma *vma,
654 				 struct eb_vmas *eb)
655 {
656 #define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
657 	struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)];
658 	struct drm_i915_gem_relocation_entry __user *user_relocs;
659 	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
660 	struct reloc_cache cache;
661 	int remain, ret = 0;
662 
663 	user_relocs = u64_to_user_ptr(entry->relocs_ptr);
664 	reloc_cache_init(&cache, eb->i915);
665 
666 	remain = entry->relocation_count;
667 	while (remain) {
668 		struct drm_i915_gem_relocation_entry *r = stack_reloc;
669 		unsigned long unwritten;
670 		unsigned int count;
671 
672 		count = min_t(unsigned int, remain, ARRAY_SIZE(stack_reloc));
673 		remain -= count;
674 
675 		/* This is the fast path and we cannot handle a pagefault
676 		 * whilst holding the struct mutex lest the user pass in the
677 		 * relocations contained within a mmaped bo. For in such a case
678 		 * we, the page fault handler would call i915_gem_fault() and
679 		 * we would try to acquire the struct mutex again. Obviously
680 		 * this is bad and so lockdep complains vehemently.
681 		 */
682 		pagefault_disable();
683 		unwritten = __copy_from_user_inatomic(r, user_relocs, count*sizeof(r[0]));
684 		pagefault_enable();
685 		if (unlikely(unwritten)) {
686 			ret = -EFAULT;
687 			goto out;
688 		}
689 
690 		do {
691 			u64 offset = r->presumed_offset;
692 
693 			ret = i915_gem_execbuffer_relocate_entry(vma, eb, r, &cache);
694 			if (ret)
695 				goto out;
696 
697 			if (r->presumed_offset != offset) {
698 				pagefault_disable();
699 				unwritten = __put_user(r->presumed_offset,
700 						       &user_relocs->presumed_offset);
701 				pagefault_enable();
702 				if (unlikely(unwritten)) {
703 					/* Note that reporting an error now
704 					 * leaves everything in an inconsistent
705 					 * state as we have *already* changed
706 					 * the relocation value inside the
707 					 * object. As we have not changed the
708 					 * reloc.presumed_offset or will not
709 					 * change the execobject.offset, on the
710 					 * call we may not rewrite the value
711 					 * inside the object, leaving it
712 					 * dangling and causing a GPU hang.
713 					 */
714 					ret = -EFAULT;
715 					goto out;
716 				}
717 			}
718 
719 			user_relocs++;
720 			r++;
721 		} while (--count);
722 	}
723 
724 out:
725 	reloc_cache_fini(&cache);
726 	return ret;
727 #undef N_RELOC
728 }
729 
730 static int
731 i915_gem_execbuffer_relocate_vma_slow(struct i915_vma *vma,
732 				      struct eb_vmas *eb,
733 				      struct drm_i915_gem_relocation_entry *relocs)
734 {
735 	const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
736 	struct reloc_cache cache;
737 	int i, ret = 0;
738 
739 	reloc_cache_init(&cache, eb->i915);
740 	for (i = 0; i < entry->relocation_count; i++) {
741 		ret = i915_gem_execbuffer_relocate_entry(vma, eb, &relocs[i], &cache);
742 		if (ret)
743 			break;
744 	}
745 	reloc_cache_fini(&cache);
746 
747 	return ret;
748 }
749 
750 static int
751 i915_gem_execbuffer_relocate(struct eb_vmas *eb)
752 {
753 	struct i915_vma *vma;
754 	int ret = 0;
755 
756 	list_for_each_entry(vma, &eb->vmas, exec_list) {
757 		ret = i915_gem_execbuffer_relocate_vma(vma, eb);
758 		if (ret)
759 			break;
760 	}
761 
762 	return ret;
763 }
764 
765 static bool only_mappable_for_reloc(unsigned int flags)
766 {
767 	return (flags & (EXEC_OBJECT_NEEDS_FENCE | __EXEC_OBJECT_NEEDS_MAP)) ==
768 		__EXEC_OBJECT_NEEDS_MAP;
769 }
770 
771 static int
772 i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
773 				struct intel_engine_cs *engine,
774 				bool *need_reloc)
775 {
776 	struct drm_i915_gem_object *obj = vma->obj;
777 	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
778 	uint64_t flags;
779 	int ret;
780 
781 	flags = PIN_USER;
782 	if (entry->flags & EXEC_OBJECT_NEEDS_GTT)
783 		flags |= PIN_GLOBAL;
784 
785 	if (!drm_mm_node_allocated(&vma->node)) {
786 		/* Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset,
787 		 * limit address to the first 4GBs for unflagged objects.
788 		 */
789 		if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0)
790 			flags |= PIN_ZONE_4G;
791 		if (entry->flags & __EXEC_OBJECT_NEEDS_MAP)
792 			flags |= PIN_GLOBAL | PIN_MAPPABLE;
793 		if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS)
794 			flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
795 		if (entry->flags & EXEC_OBJECT_PINNED)
796 			flags |= entry->offset | PIN_OFFSET_FIXED;
797 		if ((flags & PIN_MAPPABLE) == 0)
798 			flags |= PIN_HIGH;
799 	}
800 
801 	ret = i915_vma_pin(vma,
802 			   entry->pad_to_size,
803 			   entry->alignment,
804 			   flags);
805 	if ((ret == -ENOSPC || ret == -E2BIG) &&
806 	    only_mappable_for_reloc(entry->flags))
807 		ret = i915_vma_pin(vma,
808 				   entry->pad_to_size,
809 				   entry->alignment,
810 				   flags & ~PIN_MAPPABLE);
811 	if (ret)
812 		return ret;
813 
814 	entry->flags |= __EXEC_OBJECT_HAS_PIN;
815 
816 	if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
817 		ret = i915_vma_get_fence(vma);
818 		if (ret)
819 			return ret;
820 
821 		if (i915_vma_pin_fence(vma))
822 			entry->flags |= __EXEC_OBJECT_HAS_FENCE;
823 	}
824 
825 	if (entry->offset != vma->node.start) {
826 		entry->offset = vma->node.start;
827 		*need_reloc = true;
828 	}
829 
830 	if (entry->flags & EXEC_OBJECT_WRITE) {
831 		obj->base.pending_read_domains = I915_GEM_DOMAIN_RENDER;
832 		obj->base.pending_write_domain = I915_GEM_DOMAIN_RENDER;
833 	}
834 
835 	return 0;
836 }
837 
838 static bool
839 need_reloc_mappable(struct i915_vma *vma)
840 {
841 	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
842 
843 	if (entry->relocation_count == 0)
844 		return false;
845 
846 	if (!i915_vma_is_ggtt(vma))
847 		return false;
848 
849 	/* See also use_cpu_reloc() */
850 	if (HAS_LLC(to_i915(vma->obj->base.dev)))
851 		return false;
852 
853 	if (vma->obj->base.write_domain == I915_GEM_DOMAIN_CPU)
854 		return false;
855 
856 	return true;
857 }
858 
859 static bool
860 eb_vma_misplaced(struct i915_vma *vma)
861 {
862 	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
863 
864 	WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP &&
865 		!i915_vma_is_ggtt(vma));
866 
867 	if (entry->alignment && !IS_ALIGNED(vma->node.start, entry->alignment))
868 		return true;
869 
870 	if (vma->node.size < entry->pad_to_size)
871 		return true;
872 
873 	if (entry->flags & EXEC_OBJECT_PINNED &&
874 	    vma->node.start != entry->offset)
875 		return true;
876 
877 	if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
878 	    vma->node.start < BATCH_OFFSET_BIAS)
879 		return true;
880 
881 	/* avoid costly ping-pong once a batch bo ended up non-mappable */
882 	if (entry->flags & __EXEC_OBJECT_NEEDS_MAP &&
883 	    !i915_vma_is_map_and_fenceable(vma))
884 		return !only_mappable_for_reloc(entry->flags);
885 
886 	if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0 &&
887 	    (vma->node.start + vma->node.size - 1) >> 32)
888 		return true;
889 
890 	return false;
891 }
892 
893 static int
894 i915_gem_execbuffer_reserve(struct intel_engine_cs *engine,
895 			    struct list_head *vmas,
896 			    struct i915_gem_context *ctx,
897 			    bool *need_relocs)
898 {
899 	struct drm_i915_gem_object *obj;
900 	struct i915_vma *vma;
901 	struct i915_address_space *vm;
902 	struct list_head ordered_vmas;
903 	struct list_head pinned_vmas;
904 	bool has_fenced_gpu_access = INTEL_GEN(engine->i915) < 4;
905 	bool needs_unfenced_map = INTEL_INFO(engine->i915)->unfenced_needs_alignment;
906 	int retry;
907 
908 	vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm;
909 
910 	INIT_LIST_HEAD(&ordered_vmas);
911 	INIT_LIST_HEAD(&pinned_vmas);
912 	while (!list_empty(vmas)) {
913 		struct drm_i915_gem_exec_object2 *entry;
914 		bool need_fence, need_mappable;
915 
916 		vma = list_first_entry(vmas, struct i915_vma, exec_list);
917 		obj = vma->obj;
918 		entry = vma->exec_entry;
919 
920 		if (ctx->flags & CONTEXT_NO_ZEROMAP)
921 			entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
922 
923 		if (!has_fenced_gpu_access)
924 			entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE;
925 		need_fence =
926 			(entry->flags & EXEC_OBJECT_NEEDS_FENCE ||
927 			 needs_unfenced_map) &&
928 			i915_gem_object_is_tiled(obj);
929 		need_mappable = need_fence || need_reloc_mappable(vma);
930 
931 		if (entry->flags & EXEC_OBJECT_PINNED)
932 			list_move_tail(&vma->exec_list, &pinned_vmas);
933 		else if (need_mappable) {
934 			entry->flags |= __EXEC_OBJECT_NEEDS_MAP;
935 			list_move(&vma->exec_list, &ordered_vmas);
936 		} else
937 			list_move_tail(&vma->exec_list, &ordered_vmas);
938 
939 		obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND;
940 		obj->base.pending_write_domain = 0;
941 	}
942 	list_splice(&ordered_vmas, vmas);
943 	list_splice(&pinned_vmas, vmas);
944 
945 	/* Attempt to pin all of the buffers into the GTT.
946 	 * This is done in 3 phases:
947 	 *
948 	 * 1a. Unbind all objects that do not match the GTT constraints for
949 	 *     the execbuffer (fenceable, mappable, alignment etc).
950 	 * 1b. Increment pin count for already bound objects.
951 	 * 2.  Bind new objects.
952 	 * 3.  Decrement pin count.
953 	 *
954 	 * This avoid unnecessary unbinding of later objects in order to make
955 	 * room for the earlier objects *unless* we need to defragment.
956 	 */
957 	retry = 0;
958 	do {
959 		int ret = 0;
960 
961 		/* Unbind any ill-fitting objects or pin. */
962 		list_for_each_entry(vma, vmas, exec_list) {
963 			if (!drm_mm_node_allocated(&vma->node))
964 				continue;
965 
966 			if (eb_vma_misplaced(vma))
967 				ret = i915_vma_unbind(vma);
968 			else
969 				ret = i915_gem_execbuffer_reserve_vma(vma,
970 								      engine,
971 								      need_relocs);
972 			if (ret)
973 				goto err;
974 		}
975 
976 		/* Bind fresh objects */
977 		list_for_each_entry(vma, vmas, exec_list) {
978 			if (drm_mm_node_allocated(&vma->node))
979 				continue;
980 
981 			ret = i915_gem_execbuffer_reserve_vma(vma, engine,
982 							      need_relocs);
983 			if (ret)
984 				goto err;
985 		}
986 
987 err:
988 		if (ret != -ENOSPC || retry++)
989 			return ret;
990 
991 		/* Decrement pin count for bound objects */
992 		list_for_each_entry(vma, vmas, exec_list)
993 			i915_gem_execbuffer_unreserve_vma(vma);
994 
995 		ret = i915_gem_evict_vm(vm, true);
996 		if (ret)
997 			return ret;
998 	} while (1);
999 }
1000 
1001 static int
1002 i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
1003 				  struct drm_i915_gem_execbuffer2 *args,
1004 				  struct drm_file *file,
1005 				  struct intel_engine_cs *engine,
1006 				  struct eb_vmas *eb,
1007 				  struct drm_i915_gem_exec_object2 *exec,
1008 				  struct i915_gem_context *ctx)
1009 {
1010 	struct drm_i915_gem_relocation_entry *reloc;
1011 	struct i915_address_space *vm;
1012 	struct i915_vma *vma;
1013 	bool need_relocs;
1014 	int *reloc_offset;
1015 	int i, total, ret;
1016 	unsigned count = args->buffer_count;
1017 
1018 	vm = list_first_entry(&eb->vmas, struct i915_vma, exec_list)->vm;
1019 
1020 	/* We may process another execbuffer during the unlock... */
1021 	while (!list_empty(&eb->vmas)) {
1022 		vma = list_first_entry(&eb->vmas, struct i915_vma, exec_list);
1023 		list_del_init(&vma->exec_list);
1024 		i915_gem_execbuffer_unreserve_vma(vma);
1025 		i915_vma_put(vma);
1026 	}
1027 
1028 	mutex_unlock(&dev->struct_mutex);
1029 
1030 	total = 0;
1031 	for (i = 0; i < count; i++)
1032 		total += exec[i].relocation_count;
1033 
1034 	reloc_offset = drm_malloc_ab(count, sizeof(*reloc_offset));
1035 	reloc = drm_malloc_ab(total, sizeof(*reloc));
1036 	if (reloc == NULL || reloc_offset == NULL) {
1037 		drm_free_large(reloc);
1038 		drm_free_large(reloc_offset);
1039 		mutex_lock(&dev->struct_mutex);
1040 		return -ENOMEM;
1041 	}
1042 
1043 	total = 0;
1044 	for (i = 0; i < count; i++) {
1045 		struct drm_i915_gem_relocation_entry __user *user_relocs;
1046 		u64 invalid_offset = (u64)-1;
1047 		int j;
1048 
1049 		user_relocs = u64_to_user_ptr(exec[i].relocs_ptr);
1050 
1051 		if (copy_from_user(reloc+total, user_relocs,
1052 				   exec[i].relocation_count * sizeof(*reloc))) {
1053 			ret = -EFAULT;
1054 			mutex_lock(&dev->struct_mutex);
1055 			goto err;
1056 		}
1057 
1058 		/* As we do not update the known relocation offsets after
1059 		 * relocating (due to the complexities in lock handling),
1060 		 * we need to mark them as invalid now so that we force the
1061 		 * relocation processing next time. Just in case the target
1062 		 * object is evicted and then rebound into its old
1063 		 * presumed_offset before the next execbuffer - if that
1064 		 * happened we would make the mistake of assuming that the
1065 		 * relocations were valid.
1066 		 */
1067 		for (j = 0; j < exec[i].relocation_count; j++) {
1068 			if (__copy_to_user(&user_relocs[j].presumed_offset,
1069 					   &invalid_offset,
1070 					   sizeof(invalid_offset))) {
1071 				ret = -EFAULT;
1072 				mutex_lock(&dev->struct_mutex);
1073 				goto err;
1074 			}
1075 		}
1076 
1077 		reloc_offset[i] = total;
1078 		total += exec[i].relocation_count;
1079 	}
1080 
1081 	ret = i915_mutex_lock_interruptible(dev);
1082 	if (ret) {
1083 		mutex_lock(&dev->struct_mutex);
1084 		goto err;
1085 	}
1086 
1087 	/* reacquire the objects */
1088 	eb_reset(eb);
1089 	ret = eb_lookup_vmas(eb, exec, args, vm, file);
1090 	if (ret)
1091 		goto err;
1092 
1093 	need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
1094 	ret = i915_gem_execbuffer_reserve(engine, &eb->vmas, ctx,
1095 					  &need_relocs);
1096 	if (ret)
1097 		goto err;
1098 
1099 	list_for_each_entry(vma, &eb->vmas, exec_list) {
1100 		int offset = vma->exec_entry - exec;
1101 		ret = i915_gem_execbuffer_relocate_vma_slow(vma, eb,
1102 							    reloc + reloc_offset[offset]);
1103 		if (ret)
1104 			goto err;
1105 	}
1106 
1107 	/* Leave the user relocations as are, this is the painfully slow path,
1108 	 * and we want to avoid the complication of dropping the lock whilst
1109 	 * having buffers reserved in the aperture and so causing spurious
1110 	 * ENOSPC for random operations.
1111 	 */
1112 
1113 err:
1114 	drm_free_large(reloc);
1115 	drm_free_large(reloc_offset);
1116 	return ret;
1117 }
1118 
1119 static int
1120 i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
1121 				struct list_head *vmas)
1122 {
1123 	struct i915_vma *vma;
1124 	int ret;
1125 
1126 	list_for_each_entry(vma, vmas, exec_list) {
1127 		struct drm_i915_gem_object *obj = vma->obj;
1128 
1129 		if (vma->exec_entry->flags & EXEC_OBJECT_ASYNC)
1130 			continue;
1131 
1132 		if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) {
1133 			i915_gem_clflush_object(obj, 0);
1134 			obj->base.write_domain = 0;
1135 		}
1136 
1137 		ret = i915_gem_request_await_object
1138 			(req, obj, obj->base.pending_write_domain);
1139 		if (ret)
1140 			return ret;
1141 	}
1142 
1143 	/* Unconditionally flush any chipset caches (for streaming writes). */
1144 	i915_gem_chipset_flush(req->engine->i915);
1145 
1146 	/* Unconditionally invalidate GPU caches and TLBs. */
1147 	return req->engine->emit_flush(req, EMIT_INVALIDATE);
1148 }
1149 
1150 static bool
1151 i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
1152 {
1153 	if (exec->flags & __I915_EXEC_UNKNOWN_FLAGS)
1154 		return false;
1155 
1156 	/* Kernel clipping was a DRI1 misfeature */
1157 	if (exec->num_cliprects || exec->cliprects_ptr)
1158 		return false;
1159 
1160 	if (exec->DR4 == 0xffffffff) {
1161 		DRM_DEBUG("UXA submitting garbage DR4, fixing up\n");
1162 		exec->DR4 = 0;
1163 	}
1164 	if (exec->DR1 || exec->DR4)
1165 		return false;
1166 
1167 	if ((exec->batch_start_offset | exec->batch_len) & 0x7)
1168 		return false;
1169 
1170 	return true;
1171 }
1172 
1173 static int
1174 validate_exec_list(struct drm_device *dev,
1175 		   struct drm_i915_gem_exec_object2 *exec,
1176 		   int count)
1177 {
1178 	unsigned relocs_total = 0;
1179 	unsigned relocs_max = UINT_MAX / sizeof(struct drm_i915_gem_relocation_entry);
1180 	unsigned invalid_flags;
1181 	int i;
1182 
1183 	/* INTERNAL flags must not overlap with external ones */
1184 	BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS & ~__EXEC_OBJECT_UNKNOWN_FLAGS);
1185 
1186 	invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS;
1187 	if (USES_FULL_PPGTT(dev))
1188 		invalid_flags |= EXEC_OBJECT_NEEDS_GTT;
1189 
1190 	for (i = 0; i < count; i++) {
1191 		char __user *ptr = u64_to_user_ptr(exec[i].relocs_ptr);
1192 		int length; /* limited by fault_in_pages_readable() */
1193 
1194 		if (exec[i].flags & invalid_flags)
1195 			return -EINVAL;
1196 
1197 		/* Offset can be used as input (EXEC_OBJECT_PINNED), reject
1198 		 * any non-page-aligned or non-canonical addresses.
1199 		 */
1200 		if (exec[i].flags & EXEC_OBJECT_PINNED) {
1201 			if (exec[i].offset !=
1202 			    gen8_canonical_addr(exec[i].offset & LINUX_PAGE_MASK))
1203 				return -EINVAL;
1204 		}
1205 
1206 		/* From drm_mm perspective address space is continuous,
1207 		 * so from this point we're always using non-canonical
1208 		 * form internally.
1209 		 */
1210 		exec[i].offset = gen8_noncanonical_addr(exec[i].offset);
1211 
1212 		if (exec[i].alignment && !is_power_of_2(exec[i].alignment))
1213 			return -EINVAL;
1214 
1215 		/* pad_to_size was once a reserved field, so sanitize it */
1216 		if (exec[i].flags & EXEC_OBJECT_PAD_TO_SIZE) {
1217 			if (offset_in_page(exec[i].pad_to_size))
1218 				return -EINVAL;
1219 		} else {
1220 			exec[i].pad_to_size = 0;
1221 		}
1222 
1223 		/* First check for malicious input causing overflow in
1224 		 * the worst case where we need to allocate the entire
1225 		 * relocation tree as a single array.
1226 		 */
1227 		if (exec[i].relocation_count > relocs_max - relocs_total)
1228 			return -EINVAL;
1229 		relocs_total += exec[i].relocation_count;
1230 
1231 		length = exec[i].relocation_count *
1232 			sizeof(struct drm_i915_gem_relocation_entry);
1233 		/*
1234 		 * We must check that the entire relocation array is safe
1235 		 * to read, but since we may need to update the presumed
1236 		 * offsets during execution, check for full write access.
1237 		 */
1238 #if 0
1239 		if (!access_ok(VERIFY_WRITE, ptr, length))
1240 			return -EFAULT;
1241 #endif
1242 
1243 		if (likely(!i915.prefault_disable)) {
1244 			if (fault_in_pages_readable(ptr, length))
1245 				return -EFAULT;
1246 		}
1247 	}
1248 
1249 	return 0;
1250 }
1251 
1252 static struct i915_gem_context *
1253 i915_gem_validate_context(struct drm_device *dev, struct drm_file *file,
1254 			  struct intel_engine_cs *engine, const u32 ctx_id)
1255 {
1256 	struct i915_gem_context *ctx;
1257 
1258 	ctx = i915_gem_context_lookup(file->driver_priv, ctx_id);
1259 	if (IS_ERR(ctx))
1260 		return ctx;
1261 
1262 	if (i915_gem_context_is_banned(ctx)) {
1263 		DRM_DEBUG("Context %u tried to submit while banned\n", ctx_id);
1264 		return ERR_PTR(-EIO);
1265 	}
1266 
1267 	return ctx;
1268 }
1269 
1270 static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
1271 {
1272 	return !(obj->cache_level == I915_CACHE_NONE ||
1273 		 obj->cache_level == I915_CACHE_WT);
1274 }
1275 
1276 void i915_vma_move_to_active(struct i915_vma *vma,
1277 			     struct drm_i915_gem_request *req,
1278 			     unsigned int flags)
1279 {
1280 	struct drm_i915_gem_object *obj = vma->obj;
1281 	const unsigned int idx = req->engine->id;
1282 
1283 	lockdep_assert_held(&req->i915->drm.struct_mutex);
1284 	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
1285 
1286 	/* Add a reference if we're newly entering the active list.
1287 	 * The order in which we add operations to the retirement queue is
1288 	 * vital here: mark_active adds to the start of the callback list,
1289 	 * such that subsequent callbacks are called first. Therefore we
1290 	 * add the active reference first and queue for it to be dropped
1291 	 * *last*.
1292 	 */
1293 	if (!i915_vma_is_active(vma))
1294 		obj->active_count++;
1295 	i915_vma_set_active(vma, idx);
1296 	i915_gem_active_set(&vma->last_read[idx], req);
1297 	list_move_tail(&vma->vm_link, &vma->vm->active_list);
1298 
1299 	if (flags & EXEC_OBJECT_WRITE) {
1300 		if (intel_fb_obj_invalidate(obj, ORIGIN_CS))
1301 			i915_gem_active_set(&obj->frontbuffer_write, req);
1302 
1303 		/* update for the implicit flush after a batch */
1304 		obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
1305 		if (!obj->cache_dirty && gpu_write_needs_clflush(obj))
1306 			obj->cache_dirty = true;
1307 	}
1308 
1309 	if (flags & EXEC_OBJECT_NEEDS_FENCE)
1310 		i915_gem_active_set(&vma->last_fence, req);
1311 }
1312 
1313 static void eb_export_fence(struct drm_i915_gem_object *obj,
1314 			    struct drm_i915_gem_request *req,
1315 			    unsigned int flags)
1316 {
1317 	struct reservation_object *resv = obj->resv;
1318 
1319 	/* Ignore errors from failing to allocate the new fence, we can't
1320 	 * handle an error right now. Worst case should be missed
1321 	 * synchronisation leading to rendering corruption.
1322 	 */
1323 	reservation_object_lock(resv, NULL);
1324 	if (flags & EXEC_OBJECT_WRITE)
1325 		reservation_object_add_excl_fence(resv, &req->fence);
1326 	else if (reservation_object_reserve_shared(resv) == 0)
1327 		reservation_object_add_shared_fence(resv, &req->fence);
1328 	reservation_object_unlock(resv);
1329 }
1330 
1331 static void
1332 i915_gem_execbuffer_move_to_active(struct list_head *vmas,
1333 				   struct drm_i915_gem_request *req)
1334 {
1335 	struct i915_vma *vma;
1336 
1337 	list_for_each_entry(vma, vmas, exec_list) {
1338 		struct drm_i915_gem_object *obj = vma->obj;
1339 
1340 		obj->base.write_domain = obj->base.pending_write_domain;
1341 		if (obj->base.write_domain)
1342 			vma->exec_entry->flags |= EXEC_OBJECT_WRITE;
1343 		else
1344 			obj->base.pending_read_domains |= obj->base.read_domains;
1345 		obj->base.read_domains = obj->base.pending_read_domains;
1346 
1347 		i915_vma_move_to_active(vma, req, vma->exec_entry->flags);
1348 		eb_export_fence(obj, req, vma->exec_entry->flags);
1349 	}
1350 }
1351 
1352 static int
1353 i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req)
1354 {
1355 	u32 *cs;
1356 	int i;
1357 
1358 	if (!IS_GEN7(req->i915) || req->engine->id != RCS) {
1359 		DRM_DEBUG("sol reset is gen7/rcs only\n");
1360 		return -EINVAL;
1361 	}
1362 
1363 	cs = intel_ring_begin(req, 4 * 3);
1364 	if (IS_ERR(cs))
1365 		return PTR_ERR(cs);
1366 
1367 	for (i = 0; i < 4; i++) {
1368 		*cs++ = MI_LOAD_REGISTER_IMM(1);
1369 		*cs++ = i915_mmio_reg_offset(GEN7_SO_WRITE_OFFSET(i));
1370 		*cs++ = 0;
1371 	}
1372 
1373 	intel_ring_advance(req, cs);
1374 
1375 	return 0;
1376 }
1377 
1378 static struct i915_vma *
1379 i915_gem_execbuffer_parse(struct intel_engine_cs *engine,
1380 			  struct drm_i915_gem_exec_object2 *shadow_exec_entry,
1381 			  struct drm_i915_gem_object *batch_obj,
1382 			  struct eb_vmas *eb,
1383 			  u32 batch_start_offset,
1384 			  u32 batch_len,
1385 			  bool is_master)
1386 {
1387 	struct drm_i915_gem_object *shadow_batch_obj;
1388 	struct i915_vma *vma;
1389 	int ret;
1390 
1391 	shadow_batch_obj = i915_gem_batch_pool_get(&engine->batch_pool,
1392 						   PAGE_ALIGN(batch_len));
1393 	if (IS_ERR(shadow_batch_obj))
1394 		return ERR_CAST(shadow_batch_obj);
1395 
1396 	ret = intel_engine_cmd_parser(engine,
1397 				      batch_obj,
1398 				      shadow_batch_obj,
1399 				      batch_start_offset,
1400 				      batch_len,
1401 				      is_master);
1402 	if (ret) {
1403 		if (ret == -EACCES) /* unhandled chained batch */
1404 			vma = NULL;
1405 		else
1406 			vma = ERR_PTR(ret);
1407 		goto out;
1408 	}
1409 
1410 	vma = i915_gem_object_ggtt_pin(shadow_batch_obj, NULL, 0, 0, 0);
1411 	if (IS_ERR(vma))
1412 		goto out;
1413 
1414 	memset(shadow_exec_entry, 0, sizeof(*shadow_exec_entry));
1415 
1416 	vma->exec_entry = shadow_exec_entry;
1417 	vma->exec_entry->flags = __EXEC_OBJECT_HAS_PIN;
1418 	i915_gem_object_get(shadow_batch_obj);
1419 	list_add_tail(&vma->exec_list, &eb->vmas);
1420 
1421 out:
1422 	i915_gem_object_unpin_pages(shadow_batch_obj);
1423 	return vma;
1424 }
1425 
1426 static void
1427 add_to_client(struct drm_i915_gem_request *req,
1428 	      struct drm_file *file)
1429 {
1430 	req->file_priv = file->driver_priv;
1431 	list_add_tail(&req->client_link, &req->file_priv->mm.request_list);
1432 }
1433 
1434 static int
1435 execbuf_submit(struct i915_execbuffer_params *params,
1436 	       struct drm_i915_gem_execbuffer2 *args,
1437 	       struct list_head *vmas)
1438 {
1439 	u64 exec_start, exec_len;
1440 	int ret;
1441 
1442 	ret = i915_gem_execbuffer_move_to_gpu(params->request, vmas);
1443 	if (ret)
1444 		return ret;
1445 
1446 	ret = i915_switch_context(params->request);
1447 	if (ret)
1448 		return ret;
1449 
1450 	if (args->flags & I915_EXEC_CONSTANTS_MASK) {
1451 		DRM_DEBUG("I915_EXEC_CONSTANTS_* unsupported\n");
1452 		return -EINVAL;
1453 	}
1454 
1455 	if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
1456 		ret = i915_reset_gen7_sol_offsets(params->request);
1457 		if (ret)
1458 			return ret;
1459 	}
1460 
1461 	exec_len   = args->batch_len;
1462 	exec_start = params->batch->node.start +
1463 		     params->args_batch_start_offset;
1464 
1465 	if (exec_len == 0)
1466 		exec_len = params->batch->size - params->args_batch_start_offset;
1467 
1468 	ret = params->engine->emit_bb_start(params->request,
1469 					    exec_start, exec_len,
1470 					    params->dispatch_flags);
1471 	if (ret)
1472 		return ret;
1473 
1474 	i915_gem_execbuffer_move_to_active(vmas, params->request);
1475 
1476 	return 0;
1477 }
1478 
1479 /**
1480  * Find one BSD ring to dispatch the corresponding BSD command.
1481  * The engine index is returned.
1482  */
1483 static unsigned int
1484 gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv,
1485 			 struct drm_file *file)
1486 {
1487 	struct drm_i915_file_private *file_priv = file->driver_priv;
1488 
1489 	/* Check whether the file_priv has already selected one ring. */
1490 	if ((int)file_priv->bsd_engine < 0)
1491 		file_priv->bsd_engine = atomic_fetch_xor(1,
1492 			 &dev_priv->mm.bsd_engine_dispatch_index);
1493 
1494 	return file_priv->bsd_engine;
1495 }
1496 
1497 #define I915_USER_RINGS (4)
1498 
1499 static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = {
1500 	[I915_EXEC_DEFAULT]	= RCS,
1501 	[I915_EXEC_RENDER]	= RCS,
1502 	[I915_EXEC_BLT]		= BCS,
1503 	[I915_EXEC_BSD]		= VCS,
1504 	[I915_EXEC_VEBOX]	= VECS
1505 };
1506 
1507 static struct intel_engine_cs *
1508 eb_select_engine(struct drm_i915_private *dev_priv,
1509 		 struct drm_file *file,
1510 		 struct drm_i915_gem_execbuffer2 *args)
1511 {
1512 	unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK;
1513 	struct intel_engine_cs *engine;
1514 
1515 	if (user_ring_id > I915_USER_RINGS) {
1516 		DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id);
1517 		return NULL;
1518 	}
1519 
1520 	if ((user_ring_id != I915_EXEC_BSD) &&
1521 	    ((args->flags & I915_EXEC_BSD_MASK) != 0)) {
1522 		DRM_DEBUG("execbuf with non bsd ring but with invalid "
1523 			  "bsd dispatch flags: %d\n", (int)(args->flags));
1524 		return NULL;
1525 	}
1526 
1527 	if (user_ring_id == I915_EXEC_BSD && HAS_BSD2(dev_priv)) {
1528 		unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK;
1529 
1530 		if (bsd_idx == I915_EXEC_BSD_DEFAULT) {
1531 			bsd_idx = gen8_dispatch_bsd_engine(dev_priv, file);
1532 		} else if (bsd_idx >= I915_EXEC_BSD_RING1 &&
1533 			   bsd_idx <= I915_EXEC_BSD_RING2) {
1534 			bsd_idx >>= I915_EXEC_BSD_SHIFT;
1535 			bsd_idx--;
1536 		} else {
1537 			DRM_DEBUG("execbuf with unknown bsd ring: %u\n",
1538 				  bsd_idx);
1539 			return NULL;
1540 		}
1541 
1542 		engine = dev_priv->engine[_VCS(bsd_idx)];
1543 	} else {
1544 		engine = dev_priv->engine[user_ring_map[user_ring_id]];
1545 	}
1546 
1547 	if (!engine) {
1548 		DRM_DEBUG("execbuf with invalid ring: %u\n", user_ring_id);
1549 		return NULL;
1550 	}
1551 
1552 	return engine;
1553 }
1554 
1555 static int
1556 i915_gem_do_execbuffer(struct drm_device *dev, void *data,
1557 		       struct drm_file *file,
1558 		       struct drm_i915_gem_execbuffer2 *args,
1559 		       struct drm_i915_gem_exec_object2 *exec)
1560 {
1561 	struct drm_i915_private *dev_priv = to_i915(dev);
1562 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
1563 	struct eb_vmas *eb;
1564 	struct drm_i915_gem_exec_object2 shadow_exec_entry;
1565 	struct intel_engine_cs *engine;
1566 	struct i915_gem_context *ctx;
1567 	struct i915_address_space *vm;
1568 	struct i915_execbuffer_params params_master; /* XXX: will be removed later */
1569 	struct i915_execbuffer_params *params = &params_master;
1570 	const u32 ctx_id = i915_execbuffer2_get_context_id(*args);
1571 	u32 dispatch_flags;
1572 	struct dma_fence *in_fence = NULL;
1573 	struct sync_file *out_fence = NULL;
1574 	int out_fence_fd = -1;
1575 	int ret;
1576 	bool need_relocs;
1577 
1578 	if (!i915_gem_check_execbuffer(args))
1579 		return -EINVAL;
1580 
1581 	ret = validate_exec_list(dev, exec, args->buffer_count);
1582 	if (ret)
1583 		return ret;
1584 
1585 	dispatch_flags = 0;
1586 	if (args->flags & I915_EXEC_SECURE) {
1587 #if 0
1588 		if (!drm_is_current_master(file) || !capable(CAP_SYS_ADMIN))
1589 		    return -EPERM;
1590 #endif
1591 
1592 		dispatch_flags |= I915_DISPATCH_SECURE;
1593 	}
1594 	if (args->flags & I915_EXEC_IS_PINNED)
1595 		dispatch_flags |= I915_DISPATCH_PINNED;
1596 
1597 	engine = eb_select_engine(dev_priv, file, args);
1598 	if (!engine)
1599 		return -EINVAL;
1600 
1601 	if (args->buffer_count < 1) {
1602 		DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
1603 		return -EINVAL;
1604 	}
1605 
1606 	if (args->flags & I915_EXEC_RESOURCE_STREAMER) {
1607 		if (!HAS_RESOURCE_STREAMER(dev_priv)) {
1608 			DRM_DEBUG("RS is only allowed for Haswell, Gen8 and above\n");
1609 			return -EINVAL;
1610 		}
1611 		if (engine->id != RCS) {
1612 			DRM_DEBUG("RS is not available on %s\n",
1613 				 engine->name);
1614 			return -EINVAL;
1615 		}
1616 
1617 		dispatch_flags |= I915_DISPATCH_RS;
1618 	}
1619 
1620 	if (args->flags & I915_EXEC_FENCE_IN) {
1621 		in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
1622 		if (!in_fence)
1623 			return -EINVAL;
1624 	}
1625 
1626 	if (args->flags & I915_EXEC_FENCE_OUT) {
1627 		out_fence_fd = get_unused_fd_flags(O_CLOEXEC);
1628 		if (out_fence_fd < 0) {
1629 			ret = out_fence_fd;
1630 			goto err_in_fence;
1631 		}
1632 	}
1633 
1634 	/* Take a local wakeref for preparing to dispatch the execbuf as
1635 	 * we expect to access the hardware fairly frequently in the
1636 	 * process. Upon first dispatch, we acquire another prolonged
1637 	 * wakeref that we hold until the GPU has been idle for at least
1638 	 * 100ms.
1639 	 */
1640 	intel_runtime_pm_get(dev_priv);
1641 
1642 	ret = i915_mutex_lock_interruptible(dev);
1643 	if (ret)
1644 		goto pre_mutex_err;
1645 
1646 	ctx = i915_gem_validate_context(dev, file, engine, ctx_id);
1647 	if (IS_ERR(ctx)) {
1648 		mutex_unlock(&dev->struct_mutex);
1649 		ret = PTR_ERR(ctx);
1650 		goto pre_mutex_err;
1651 	}
1652 
1653 	i915_gem_context_get(ctx);
1654 
1655 	if (ctx->ppgtt)
1656 		vm = &ctx->ppgtt->base;
1657 	else
1658 		vm = &ggtt->base;
1659 
1660 	memset(&params_master, 0x00, sizeof(params_master));
1661 
1662 	eb = eb_create(dev_priv, args);
1663 	if (eb == NULL) {
1664 		i915_gem_context_put(ctx);
1665 		mutex_unlock(&dev->struct_mutex);
1666 		ret = -ENOMEM;
1667 		goto pre_mutex_err;
1668 	}
1669 
1670 	/* Look up object handles */
1671 	ret = eb_lookup_vmas(eb, exec, args, vm, file);
1672 	if (ret)
1673 		goto err;
1674 
1675 	/* take note of the batch buffer before we might reorder the lists */
1676 	params->batch = eb_get_batch(eb);
1677 
1678 	/* Move the objects en-masse into the GTT, evicting if necessary. */
1679 	need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
1680 	ret = i915_gem_execbuffer_reserve(engine, &eb->vmas, ctx,
1681 					  &need_relocs);
1682 	if (ret)
1683 		goto err;
1684 
1685 	/* The objects are in their final locations, apply the relocations. */
1686 	if (need_relocs)
1687 		ret = i915_gem_execbuffer_relocate(eb);
1688 	if (ret) {
1689 		if (ret == -EFAULT) {
1690 			ret = i915_gem_execbuffer_relocate_slow(dev, args, file,
1691 								engine,
1692 								eb, exec, ctx);
1693 			BUG_ON(!mutex_is_locked(&dev->struct_mutex));
1694 		}
1695 		if (ret)
1696 			goto err;
1697 	}
1698 
1699 	/* Set the pending read domains for the batch buffer to COMMAND */
1700 	if (params->batch->obj->base.pending_write_domain) {
1701 		DRM_DEBUG("Attempting to use self-modifying batch buffer\n");
1702 		ret = -EINVAL;
1703 		goto err;
1704 	}
1705 	if (args->batch_start_offset > params->batch->size ||
1706 	    args->batch_len > params->batch->size - args->batch_start_offset) {
1707 		DRM_DEBUG("Attempting to use out-of-bounds batch\n");
1708 		ret = -EINVAL;
1709 		goto err;
1710 	}
1711 
1712 	params->args_batch_start_offset = args->batch_start_offset;
1713 	if (engine->needs_cmd_parser && args->batch_len) {
1714 		struct i915_vma *vma;
1715 
1716 		vma = i915_gem_execbuffer_parse(engine, &shadow_exec_entry,
1717 						params->batch->obj,
1718 						eb,
1719 						args->batch_start_offset,
1720 						args->batch_len,
1721 						drm_is_current_master(file));
1722 		if (IS_ERR(vma)) {
1723 			ret = PTR_ERR(vma);
1724 			goto err;
1725 		}
1726 
1727 		if (vma) {
1728 			/*
1729 			 * Batch parsed and accepted:
1730 			 *
1731 			 * Set the DISPATCH_SECURE bit to remove the NON_SECURE
1732 			 * bit from MI_BATCH_BUFFER_START commands issued in
1733 			 * the dispatch_execbuffer implementations. We
1734 			 * specifically don't want that set on batches the
1735 			 * command parser has accepted.
1736 			 */
1737 			dispatch_flags |= I915_DISPATCH_SECURE;
1738 			params->args_batch_start_offset = 0;
1739 			params->batch = vma;
1740 		}
1741 	}
1742 
1743 	params->batch->obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
1744 
1745 	/* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
1746 	 * batch" bit. Hence we need to pin secure batches into the global gtt.
1747 	 * hsw should have this fixed, but bdw mucks it up again. */
1748 	if (dispatch_flags & I915_DISPATCH_SECURE) {
1749 		struct drm_i915_gem_object *obj = params->batch->obj;
1750 		struct i915_vma *vma;
1751 
1752 		/*
1753 		 * So on first glance it looks freaky that we pin the batch here
1754 		 * outside of the reservation loop. But:
1755 		 * - The batch is already pinned into the relevant ppgtt, so we
1756 		 *   already have the backing storage fully allocated.
1757 		 * - No other BO uses the global gtt (well contexts, but meh),
1758 		 *   so we don't really have issues with multiple objects not
1759 		 *   fitting due to fragmentation.
1760 		 * So this is actually safe.
1761 		 */
1762 		vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0);
1763 		if (IS_ERR(vma)) {
1764 			ret = PTR_ERR(vma);
1765 			goto err;
1766 		}
1767 
1768 		params->batch = vma;
1769 	}
1770 
1771 	/* Allocate a request for this batch buffer nice and early. */
1772 	params->request = i915_gem_request_alloc(engine, ctx);
1773 	if (IS_ERR(params->request)) {
1774 		ret = PTR_ERR(params->request);
1775 		goto err_batch_unpin;
1776 	}
1777 
1778 	if (in_fence) {
1779 		ret = i915_gem_request_await_dma_fence(params->request,
1780 						       in_fence);
1781 		if (ret < 0)
1782 			goto err_request;
1783 	}
1784 
1785 	if (out_fence_fd != -1) {
1786 		out_fence = sync_file_create(&params->request->fence);
1787 		if (!out_fence) {
1788 			ret = -ENOMEM;
1789 			goto err_request;
1790 		}
1791 	}
1792 
1793 	/* Whilst this request exists, batch_obj will be on the
1794 	 * active_list, and so will hold the active reference. Only when this
1795 	 * request is retired will the the batch_obj be moved onto the
1796 	 * inactive_list and lose its active reference. Hence we do not need
1797 	 * to explicitly hold another reference here.
1798 	 */
1799 	params->request->batch = params->batch;
1800 
1801 	/*
1802 	 * Save assorted stuff away to pass through to *_submission().
1803 	 * NB: This data should be 'persistent' and not local as it will
1804 	 * kept around beyond the duration of the IOCTL once the GPU
1805 	 * scheduler arrives.
1806 	 */
1807 	params->dev                     = dev;
1808 	params->file                    = file;
1809 	params->engine                    = engine;
1810 	params->dispatch_flags          = dispatch_flags;
1811 	params->ctx                     = ctx;
1812 
1813 	trace_i915_gem_request_queue(params->request, dispatch_flags);
1814 
1815 	ret = execbuf_submit(params, args, &eb->vmas);
1816 err_request:
1817 	__i915_add_request(params->request, ret == 0);
1818 	add_to_client(params->request, file);
1819 
1820 	if (out_fence) {
1821 		if (ret == 0) {
1822 			fd_install(out_fence_fd, out_fence->file);
1823 			args->rsvd2 &= GENMASK_ULL(0, 31); /* keep in-fence */
1824 			args->rsvd2 |= (u64)out_fence_fd << 32;
1825 			out_fence_fd = -1;
1826 		} else {
1827 			fput(out_fence->file);
1828 		}
1829 	}
1830 
1831 err_batch_unpin:
1832 	/*
1833 	 * FIXME: We crucially rely upon the active tracking for the (ppgtt)
1834 	 * batch vma for correctness. For less ugly and less fragility this
1835 	 * needs to be adjusted to also track the ggtt batch vma properly as
1836 	 * active.
1837 	 */
1838 	if (dispatch_flags & I915_DISPATCH_SECURE)
1839 		i915_vma_unpin(params->batch);
1840 err:
1841 	/* the request owns the ref now */
1842 	i915_gem_context_put(ctx);
1843 	eb_destroy(eb);
1844 
1845 	mutex_unlock(&dev->struct_mutex);
1846 
1847 pre_mutex_err:
1848 	/* intel_gpu_busy should also get a ref, so it will free when the device
1849 	 * is really idle. */
1850 	intel_runtime_pm_put(dev_priv);
1851 	if (out_fence_fd != -1)
1852 		put_unused_fd(out_fence_fd);
1853 err_in_fence:
1854 	dma_fence_put(in_fence);
1855 	return ret;
1856 }
1857 
1858 /*
1859  * Legacy execbuffer just creates an exec2 list from the original exec object
1860  * list array and passes it to the real function.
1861  */
1862 int
1863 i915_gem_execbuffer(struct drm_device *dev, void *data,
1864 		    struct drm_file *file)
1865 {
1866 	struct drm_i915_gem_execbuffer *args = data;
1867 	struct drm_i915_gem_execbuffer2 exec2;
1868 	struct drm_i915_gem_exec_object *exec_list = NULL;
1869 	struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1870 	int ret, i;
1871 
1872 	if (args->buffer_count < 1) {
1873 		DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
1874 		return -EINVAL;
1875 	}
1876 
1877 	/* Copy in the exec list from userland */
1878 	exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count);
1879 	exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count);
1880 	if (exec_list == NULL || exec2_list == NULL) {
1881 		DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
1882 			  args->buffer_count);
1883 		drm_free_large(exec_list);
1884 		drm_free_large(exec2_list);
1885 		return -ENOMEM;
1886 	}
1887 	ret = copy_from_user(exec_list,
1888 			     u64_to_user_ptr(args->buffers_ptr),
1889 			     sizeof(*exec_list) * args->buffer_count);
1890 	if (ret != 0) {
1891 		DRM_DEBUG("copy %d exec entries failed %d\n",
1892 			  args->buffer_count, ret);
1893 		drm_free_large(exec_list);
1894 		drm_free_large(exec2_list);
1895 		return -EFAULT;
1896 	}
1897 
1898 	for (i = 0; i < args->buffer_count; i++) {
1899 		exec2_list[i].handle = exec_list[i].handle;
1900 		exec2_list[i].relocation_count = exec_list[i].relocation_count;
1901 		exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr;
1902 		exec2_list[i].alignment = exec_list[i].alignment;
1903 		exec2_list[i].offset = exec_list[i].offset;
1904 		if (INTEL_GEN(to_i915(dev)) < 4)
1905 			exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE;
1906 		else
1907 			exec2_list[i].flags = 0;
1908 	}
1909 
1910 	exec2.buffers_ptr = args->buffers_ptr;
1911 	exec2.buffer_count = args->buffer_count;
1912 	exec2.batch_start_offset = args->batch_start_offset;
1913 	exec2.batch_len = args->batch_len;
1914 	exec2.DR1 = args->DR1;
1915 	exec2.DR4 = args->DR4;
1916 	exec2.num_cliprects = args->num_cliprects;
1917 	exec2.cliprects_ptr = args->cliprects_ptr;
1918 	exec2.flags = I915_EXEC_RENDER;
1919 	i915_execbuffer2_set_context_id(exec2, 0);
1920 
1921 	ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list);
1922 	if (!ret) {
1923 		struct drm_i915_gem_exec_object __user *user_exec_list =
1924 			u64_to_user_ptr(args->buffers_ptr);
1925 
1926 		/* Copy the new buffer offsets back to the user's exec list. */
1927 		for (i = 0; i < args->buffer_count; i++) {
1928 			exec2_list[i].offset =
1929 				gen8_canonical_addr(exec2_list[i].offset);
1930 			ret = __copy_to_user(&user_exec_list[i].offset,
1931 					     &exec2_list[i].offset,
1932 					     sizeof(user_exec_list[i].offset));
1933 			if (ret) {
1934 				ret = -EFAULT;
1935 				DRM_DEBUG("failed to copy %d exec entries "
1936 					  "back to user (%d)\n",
1937 					  args->buffer_count, ret);
1938 				break;
1939 			}
1940 		}
1941 	}
1942 
1943 	drm_free_large(exec_list);
1944 	drm_free_large(exec2_list);
1945 	return ret;
1946 }
1947 
1948 int
1949 i915_gem_execbuffer2(struct drm_device *dev, void *data,
1950 		     struct drm_file *file)
1951 {
1952 	struct drm_i915_gem_execbuffer2 *args = data;
1953 	struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1954 	int ret;
1955 
1956 	if (args->buffer_count < 1 ||
1957 	    args->buffer_count > UINT_MAX / sizeof(*exec2_list)) {
1958 		DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count);
1959 		return -EINVAL;
1960 	}
1961 
1962 	exec2_list = drm_malloc_gfp(args->buffer_count,
1963 				    sizeof(*exec2_list),
1964 				    GFP_TEMPORARY);
1965 	if (exec2_list == NULL) {
1966 		DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
1967 			  args->buffer_count);
1968 		return -ENOMEM;
1969 	}
1970 	ret = copy_from_user(exec2_list,
1971 			     u64_to_user_ptr(args->buffers_ptr),
1972 			     sizeof(*exec2_list) * args->buffer_count);
1973 	if (ret != 0) {
1974 		DRM_DEBUG("copy %d exec entries failed %d\n",
1975 			  args->buffer_count, ret);
1976 		drm_free_large(exec2_list);
1977 		return -EFAULT;
1978 	}
1979 
1980 	ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list);
1981 	if (!ret) {
1982 		/* Copy the new buffer offsets back to the user's exec list. */
1983 		struct drm_i915_gem_exec_object2 __user *user_exec_list =
1984 				   u64_to_user_ptr(args->buffers_ptr);
1985 		int i;
1986 
1987 		for (i = 0; i < args->buffer_count; i++) {
1988 			exec2_list[i].offset =
1989 				gen8_canonical_addr(exec2_list[i].offset);
1990 			ret = __copy_to_user(&user_exec_list[i].offset,
1991 					     &exec2_list[i].offset,
1992 					     sizeof(user_exec_list[i].offset));
1993 			if (ret) {
1994 				ret = -EFAULT;
1995 				DRM_DEBUG("failed to copy %d exec entries "
1996 					  "back to user\n",
1997 					  args->buffer_count);
1998 				break;
1999 			}
2000 		}
2001 	}
2002 
2003 	drm_free_large(exec2_list);
2004 	return ret;
2005 }
2006