1 /*
2  * Copyright © 2008,2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *    Chris Wilson <chris@chris-wilson.co.uk>
26  *
27  */
28 
29 #include <linux/dma_remapping.h>
30 #include <linux/reservation.h>
31 #include <linux/uaccess.h>
32 
33 #include <drm/drmP.h>
34 #include <drm/i915_drm.h>
35 
36 #include "i915_drv.h"
37 #include "i915_gem_dmabuf.h"
38 #include "i915_trace.h"
39 #include "intel_drv.h"
40 #include "intel_frontbuffer.h"
41 
42 #define DBG_USE_CPU_RELOC 0 /* -1 force GTT relocs; 1 force CPU relocs */
43 
44 #define  __EXEC_OBJECT_HAS_PIN		(1<<31)
45 #define  __EXEC_OBJECT_HAS_FENCE	(1<<30)
46 #define  __EXEC_OBJECT_NEEDS_MAP	(1<<29)
47 #define  __EXEC_OBJECT_NEEDS_BIAS	(1<<28)
48 #define  __EXEC_OBJECT_INTERNAL_FLAGS (0xf<<28) /* all of the above */
49 
50 #define BATCH_OFFSET_BIAS (256*1024)
51 
52 struct i915_execbuffer_params {
53 	struct drm_device               *dev;
54 	struct drm_file                 *file;
55 	struct i915_vma			*batch;
56 	u32				dispatch_flags;
57 	u32				args_batch_start_offset;
58 	struct intel_engine_cs          *engine;
59 	struct i915_gem_context         *ctx;
60 	struct drm_i915_gem_request     *request;
61 };
62 
63 struct eb_vmas {
64 	struct drm_i915_private *i915;
65 	struct list_head vmas;
66 	int and;
67 	union {
68 		struct i915_vma *lut[0];
69 		struct hlist_head buckets[0];
70 	};
71 };
72 
73 static struct eb_vmas *
74 eb_create(struct drm_i915_private *i915,
75 	  struct drm_i915_gem_execbuffer2 *args)
76 {
77 	struct eb_vmas *eb = NULL;
78 
79 	if (args->flags & I915_EXEC_HANDLE_LUT) {
80 		unsigned size = args->buffer_count;
81 		size *= sizeof(struct i915_vma *);
82 		size += sizeof(struct eb_vmas);
83 		eb = kmalloc(size, M_DRM, GFP_TEMPORARY);
84 	}
85 
86 	if (eb == NULL) {
87 		unsigned size = args->buffer_count;
88 		unsigned count = PAGE_SIZE / sizeof(struct hlist_head) / 2;
89 		BUILD_BUG_ON_NOT_POWER_OF_2(PAGE_SIZE / sizeof(struct hlist_head));
90 		while (count > 2*size)
91 			count >>= 1;
92 		eb = kzalloc(count*sizeof(struct hlist_head) +
93 			     sizeof(struct eb_vmas),
94 			     GFP_TEMPORARY);
95 		if (eb == NULL)
96 			return eb;
97 
98 		eb->and = count - 1;
99 	} else
100 		eb->and = -args->buffer_count;
101 
102 	eb->i915 = i915;
103 	INIT_LIST_HEAD(&eb->vmas);
104 	return eb;
105 }
106 
107 static void
108 eb_reset(struct eb_vmas *eb)
109 {
110 	if (eb->and >= 0)
111 		memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head));
112 }
113 
114 static struct i915_vma *
115 eb_get_batch(struct eb_vmas *eb)
116 {
117 	struct i915_vma *vma = list_entry(eb->vmas.prev, typeof(*vma), exec_list);
118 
119 	/*
120 	 * SNA is doing fancy tricks with compressing batch buffers, which leads
121 	 * to negative relocation deltas. Usually that works out ok since the
122 	 * relocate address is still positive, except when the batch is placed
123 	 * very low in the GTT. Ensure this doesn't happen.
124 	 *
125 	 * Note that actual hangs have only been observed on gen7, but for
126 	 * paranoia do it everywhere.
127 	 */
128 	if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0)
129 		vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
130 
131 	return vma;
132 }
133 
134 static int
135 eb_lookup_vmas(struct eb_vmas *eb,
136 	       struct drm_i915_gem_exec_object2 *exec,
137 	       const struct drm_i915_gem_execbuffer2 *args,
138 	       struct i915_address_space *vm,
139 	       struct drm_file *file)
140 {
141 	struct drm_i915_gem_object *obj;
142 	struct list_head objects;
143 	int i, ret;
144 
145 	INIT_LIST_HEAD(&objects);
146 	lockmgr(&file->table_lock, LK_EXCLUSIVE);
147 	/* Grab a reference to the object and release the lock so we can lookup
148 	 * or create the VMA without using GFP_ATOMIC */
149 	for (i = 0; i < args->buffer_count; i++) {
150 		obj = to_intel_bo(idr_find(&file->object_idr, exec[i].handle));
151 		if (obj == NULL) {
152 			lockmgr(&file->table_lock, LK_RELEASE);
153 			DRM_DEBUG("Invalid object handle %d at index %d\n",
154 				   exec[i].handle, i);
155 			ret = -ENOENT;
156 			goto err;
157 		}
158 
159 		if (!list_empty(&obj->obj_exec_link)) {
160 			lockmgr(&file->table_lock, LK_RELEASE);
161 			DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n",
162 				   obj, exec[i].handle, i);
163 			ret = -EINVAL;
164 			goto err;
165 		}
166 
167 		i915_gem_object_get(obj);
168 		list_add_tail(&obj->obj_exec_link, &objects);
169 	}
170 	lockmgr(&file->table_lock, LK_RELEASE);
171 
172 	i = 0;
173 	while (!list_empty(&objects)) {
174 		struct i915_vma *vma;
175 
176 		obj = list_first_entry(&objects,
177 				       struct drm_i915_gem_object,
178 				       obj_exec_link);
179 
180 		/*
181 		 * NOTE: We can leak any vmas created here when something fails
182 		 * later on. But that's no issue since vma_unbind can deal with
183 		 * vmas which are not actually bound. And since only
184 		 * lookup_or_create exists as an interface to get at the vma
185 		 * from the (obj, vm) we don't run the risk of creating
186 		 * duplicated vmas for the same vm.
187 		 */
188 		vma = i915_gem_obj_lookup_or_create_vma(obj, vm, NULL);
189 		if (unlikely(IS_ERR(vma))) {
190 			DRM_DEBUG("Failed to lookup VMA\n");
191 			ret = PTR_ERR(vma);
192 			goto err;
193 		}
194 
195 		/* Transfer ownership from the objects list to the vmas list. */
196 		list_add_tail(&vma->exec_list, &eb->vmas);
197 		list_del_init(&obj->obj_exec_link);
198 
199 		vma->exec_entry = &exec[i];
200 		if (eb->and < 0) {
201 			eb->lut[i] = vma;
202 		} else {
203 			uint32_t handle = args->flags & I915_EXEC_HANDLE_LUT ? i : exec[i].handle;
204 			vma->exec_handle = handle;
205 			hlist_add_head(&vma->exec_node,
206 				       &eb->buckets[handle & eb->and]);
207 		}
208 		++i;
209 	}
210 
211 	return 0;
212 
213 
214 err:
215 	while (!list_empty(&objects)) {
216 		obj = list_first_entry(&objects,
217 				       struct drm_i915_gem_object,
218 				       obj_exec_link);
219 		list_del_init(&obj->obj_exec_link);
220 		i915_gem_object_put(obj);
221 	}
222 	/*
223 	 * Objects already transfered to the vmas list will be unreferenced by
224 	 * eb_destroy.
225 	 */
226 
227 	return ret;
228 }
229 
230 static struct i915_vma *eb_get_vma(struct eb_vmas *eb, unsigned long handle)
231 {
232 	if (eb->and < 0) {
233 		if (handle >= -eb->and)
234 			return NULL;
235 		return eb->lut[handle];
236 	} else {
237 		struct hlist_head *head;
238 		struct i915_vma *vma;
239 
240 		head = &eb->buckets[handle & eb->and];
241 		hlist_for_each_entry(vma, head, exec_node) {
242 			if (vma->exec_handle == handle)
243 				return vma;
244 		}
245 		return NULL;
246 	}
247 }
248 
249 static void
250 i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma)
251 {
252 	struct drm_i915_gem_exec_object2 *entry;
253 
254 	if (!drm_mm_node_allocated(&vma->node))
255 		return;
256 
257 	entry = vma->exec_entry;
258 
259 	if (entry->flags & __EXEC_OBJECT_HAS_FENCE)
260 		i915_vma_unpin_fence(vma);
261 
262 	if (entry->flags & __EXEC_OBJECT_HAS_PIN)
263 		__i915_vma_unpin(vma);
264 
265 	entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN);
266 }
267 
268 static void eb_destroy(struct eb_vmas *eb)
269 {
270 	while (!list_empty(&eb->vmas)) {
271 		struct i915_vma *vma;
272 
273 		vma = list_first_entry(&eb->vmas,
274 				       struct i915_vma,
275 				       exec_list);
276 		list_del_init(&vma->exec_list);
277 		i915_gem_execbuffer_unreserve_vma(vma);
278 		i915_vma_put(vma);
279 	}
280 	kfree(eb);
281 }
282 
283 static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)
284 {
285 	if (!i915_gem_object_has_struct_page(obj))
286 		return false;
287 
288 	if (DBG_USE_CPU_RELOC)
289 		return DBG_USE_CPU_RELOC > 0;
290 
291 	return (HAS_LLC(obj->base.dev) ||
292 		obj->base.write_domain == I915_GEM_DOMAIN_CPU ||
293 		obj->cache_level != I915_CACHE_NONE);
294 }
295 
296 /* Used to convert any address to canonical form.
297  * Starting from gen8, some commands (e.g. STATE_BASE_ADDRESS,
298  * MI_LOAD_REGISTER_MEM and others, see Broadwell PRM Vol2a) require the
299  * addresses to be in a canonical form:
300  * "GraphicsAddress[63:48] are ignored by the HW and assumed to be in correct
301  * canonical form [63:48] == [47]."
302  */
303 #define GEN8_HIGH_ADDRESS_BIT 47
304 static inline uint64_t gen8_canonical_addr(uint64_t address)
305 {
306 	return sign_extend64(address, GEN8_HIGH_ADDRESS_BIT);
307 }
308 
309 static inline uint64_t gen8_noncanonical_addr(uint64_t address)
310 {
311 	return address & ((1ULL << (GEN8_HIGH_ADDRESS_BIT + 1)) - 1);
312 }
313 
314 static inline uint64_t
315 relocation_target(const struct drm_i915_gem_relocation_entry *reloc,
316 		  uint64_t target_offset)
317 {
318 	return gen8_canonical_addr((int)reloc->delta + target_offset);
319 }
320 
321 struct reloc_cache {
322 	struct drm_i915_private *i915;
323 	struct drm_mm_node node;
324 	unsigned long vaddr;
325 	unsigned int page;
326 	bool use_64bit_reloc;
327 };
328 
329 static void reloc_cache_init(struct reloc_cache *cache,
330 			     struct drm_i915_private *i915)
331 {
332 	cache->page = -1;
333 	cache->vaddr = 0;
334 	cache->i915 = i915;
335 	cache->use_64bit_reloc = INTEL_GEN(cache->i915) >= 8;
336 	cache->node.allocated = false;
337 }
338 
339 static inline void *unmask_page(unsigned long p)
340 {
341 	return (void *)(uintptr_t)(p & LINUX_PAGE_MASK);
342 }
343 
344 static inline unsigned int unmask_flags(unsigned long p)
345 {
346 	return p & ~LINUX_PAGE_MASK;
347 }
348 
349 #define KMAP 0x4 /* after CLFLUSH_FLAGS */
350 
351 static void reloc_cache_fini(struct reloc_cache *cache)
352 {
353 	void *vaddr;
354 
355 	if (!cache->vaddr)
356 		return;
357 
358 	vaddr = unmask_page(cache->vaddr);
359 	if (cache->vaddr & KMAP) {
360 		if (cache->vaddr & CLFLUSH_AFTER)
361 			mb();
362 
363 		kunmap_atomic(vaddr);
364 		i915_gem_obj_finish_shmem_access((struct drm_i915_gem_object *)cache->node.mm);
365 	} else {
366 		wmb();
367 		io_mapping_unmap_atomic((void __iomem *)vaddr);
368 		if (cache->node.allocated) {
369 			struct i915_ggtt *ggtt = &cache->i915->ggtt;
370 
371 			ggtt->base.clear_range(&ggtt->base,
372 					       cache->node.start,
373 					       cache->node.size);
374 			drm_mm_remove_node(&cache->node);
375 		} else {
376 			i915_vma_unpin((struct i915_vma *)cache->node.mm);
377 		}
378 	}
379 }
380 
381 static void *reloc_kmap(struct drm_i915_gem_object *obj,
382 			struct reloc_cache *cache,
383 			int page)
384 {
385 	void *vaddr;
386 
387 	if (cache->vaddr) {
388 		kunmap_atomic(unmask_page(cache->vaddr));
389 	} else {
390 		unsigned int flushes;
391 		int ret;
392 
393 		ret = i915_gem_obj_prepare_shmem_write(obj, &flushes);
394 		if (ret)
395 			return ERR_PTR(ret);
396 
397 		BUILD_BUG_ON(KMAP & CLFLUSH_FLAGS);
398 		BUILD_BUG_ON((KMAP | CLFLUSH_FLAGS) & LINUX_PAGE_MASK);
399 
400 		cache->vaddr = flushes | KMAP;
401 		cache->node.mm = (void *)obj;
402 		if (flushes)
403 			mb();
404 	}
405 
406 	vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, page));
407 	cache->vaddr = unmask_flags(cache->vaddr) | (unsigned long)vaddr;
408 	cache->page = page;
409 
410 	return vaddr;
411 }
412 
413 static void *reloc_iomap(struct drm_i915_gem_object *obj,
414 			 struct reloc_cache *cache,
415 			 int page)
416 {
417 	struct i915_ggtt *ggtt = &cache->i915->ggtt;
418 	unsigned long offset;
419 	void *vaddr;
420 
421 	if (cache->node.allocated) {
422 		wmb();
423 		ggtt->base.insert_page(&ggtt->base,
424 				       i915_gem_object_get_dma_address(obj, page),
425 				       cache->node.start, I915_CACHE_NONE, 0);
426 		cache->page = page;
427 		return unmask_page(cache->vaddr);
428 	}
429 
430 	if (cache->vaddr) {
431 		io_mapping_unmap_atomic((void __force __iomem *) unmask_page(cache->vaddr));
432 	} else {
433 		struct i915_vma *vma;
434 		int ret;
435 
436 		if (use_cpu_reloc(obj))
437 			return NULL;
438 
439 		ret = i915_gem_object_set_to_gtt_domain(obj, true);
440 		if (ret)
441 			return ERR_PTR(ret);
442 
443 		vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
444 					       PIN_MAPPABLE | PIN_NONBLOCK);
445 		if (IS_ERR(vma)) {
446 			memset(&cache->node, 0, sizeof(cache->node));
447 			ret = drm_mm_insert_node_in_range_generic
448 				(&ggtt->base.mm, &cache->node,
449 				 4096, 0, 0,
450 				 0, ggtt->mappable_end,
451 				 DRM_MM_SEARCH_DEFAULT,
452 				 DRM_MM_CREATE_DEFAULT);
453 			if (ret) /* no inactive aperture space, use cpu reloc */
454 				return NULL;
455 		} else {
456 			ret = i915_vma_put_fence(vma);
457 			if (ret) {
458 				i915_vma_unpin(vma);
459 				return ERR_PTR(ret);
460 			}
461 
462 			cache->node.start = vma->node.start;
463 			cache->node.mm = (void *)vma;
464 		}
465 	}
466 
467 	offset = cache->node.start;
468 	if (cache->node.allocated) {
469 		ggtt->base.insert_page(&ggtt->base,
470 				       i915_gem_object_get_dma_address(obj, page),
471 				       offset, I915_CACHE_NONE, 0);
472 	} else {
473 		offset += page << PAGE_SHIFT;
474 	}
475 
476 	vaddr = (void __force *) io_mapping_map_atomic_wc(&cache->i915->ggtt.mappable, offset);
477 	cache->page = page;
478 	cache->vaddr = (unsigned long)vaddr;
479 
480 	return vaddr;
481 }
482 
483 static void *reloc_vaddr(struct drm_i915_gem_object *obj,
484 			 struct reloc_cache *cache,
485 			 int page)
486 {
487 	void *vaddr;
488 
489 	if (cache->page == page) {
490 		vaddr = unmask_page(cache->vaddr);
491 	} else {
492 		vaddr = NULL;
493 		if ((cache->vaddr & KMAP) == 0)
494 			vaddr = reloc_iomap(obj, cache, page);
495 		if (!vaddr)
496 			vaddr = reloc_kmap(obj, cache, page);
497 	}
498 
499 	return vaddr;
500 }
501 
502 static void clflush_write32(u32 *addr, u32 value, unsigned int flushes)
503 {
504 	if (unlikely(flushes & (CLFLUSH_BEFORE | CLFLUSH_AFTER))) {
505 		if (flushes & CLFLUSH_BEFORE) {
506 			clflushopt(addr);
507 			mb();
508 		}
509 
510 		*addr = value;
511 
512 		/* Writes to the same cacheline are serialised by the CPU
513 		 * (including clflush). On the write path, we only require
514 		 * that it hits memory in an orderly fashion and place
515 		 * mb barriers at the start and end of the relocation phase
516 		 * to ensure ordering of clflush wrt to the system.
517 		 */
518 		if (flushes & CLFLUSH_AFTER)
519 			clflushopt(addr);
520 	} else
521 		*addr = value;
522 }
523 
524 static int
525 relocate_entry(struct drm_i915_gem_object *obj,
526 	       const struct drm_i915_gem_relocation_entry *reloc,
527 	       struct reloc_cache *cache,
528 	       u64 target_offset)
529 {
530 	u64 offset = reloc->offset;
531 	bool wide = cache->use_64bit_reloc;
532 	void *vaddr;
533 
534 	target_offset = relocation_target(reloc, target_offset);
535 repeat:
536 	vaddr = reloc_vaddr(obj, cache, offset >> PAGE_SHIFT);
537 	if (IS_ERR(vaddr))
538 		return PTR_ERR(vaddr);
539 
540 	clflush_write32(vaddr + offset_in_page(offset),
541 			lower_32_bits(target_offset),
542 			cache->vaddr);
543 
544 	if (wide) {
545 		offset += sizeof(u32);
546 		target_offset >>= 32;
547 		wide = false;
548 		goto repeat;
549 	}
550 
551 	return 0;
552 }
553 
554 static int
555 i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
556 				   struct eb_vmas *eb,
557 				   struct drm_i915_gem_relocation_entry *reloc,
558 				   struct reloc_cache *cache)
559 {
560 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
561 	struct drm_gem_object *target_obj;
562 	struct drm_i915_gem_object *target_i915_obj;
563 	struct i915_vma *target_vma;
564 	uint64_t target_offset;
565 	int ret;
566 
567 	/* we've already hold a reference to all valid objects */
568 	target_vma = eb_get_vma(eb, reloc->target_handle);
569 	if (unlikely(target_vma == NULL))
570 		return -ENOENT;
571 	target_i915_obj = target_vma->obj;
572 	target_obj = &target_vma->obj->base;
573 
574 	target_offset = gen8_canonical_addr(target_vma->node.start);
575 
576 	/* Sandybridge PPGTT errata: We need a global gtt mapping for MI and
577 	 * pipe_control writes because the gpu doesn't properly redirect them
578 	 * through the ppgtt for non_secure batchbuffers. */
579 	if (unlikely(IS_GEN6(dev_priv) &&
580 	    reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION)) {
581 		ret = i915_vma_bind(target_vma, target_i915_obj->cache_level,
582 				    PIN_GLOBAL);
583 		if (WARN_ONCE(ret, "Unexpected failure to bind target VMA!"))
584 			return ret;
585 	}
586 
587 	/* Validate that the target is in a valid r/w GPU domain */
588 	if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) {
589 		DRM_DEBUG("reloc with multiple write domains: "
590 			  "obj %p target %d offset %d "
591 			  "read %08x write %08x",
592 			  obj, reloc->target_handle,
593 			  (int) reloc->offset,
594 			  reloc->read_domains,
595 			  reloc->write_domain);
596 		return -EINVAL;
597 	}
598 	if (unlikely((reloc->write_domain | reloc->read_domains)
599 		     & ~I915_GEM_GPU_DOMAINS)) {
600 		DRM_DEBUG("reloc with read/write non-GPU domains: "
601 			  "obj %p target %d offset %d "
602 			  "read %08x write %08x",
603 			  obj, reloc->target_handle,
604 			  (int) reloc->offset,
605 			  reloc->read_domains,
606 			  reloc->write_domain);
607 		return -EINVAL;
608 	}
609 
610 	target_obj->pending_read_domains |= reloc->read_domains;
611 	target_obj->pending_write_domain |= reloc->write_domain;
612 
613 	/* If the relocation already has the right value in it, no
614 	 * more work needs to be done.
615 	 */
616 	if (target_offset == reloc->presumed_offset)
617 		return 0;
618 
619 	/* Check that the relocation address is valid... */
620 	if (unlikely(reloc->offset >
621 		     obj->base.size - (cache->use_64bit_reloc ? 8 : 4))) {
622 		DRM_DEBUG("Relocation beyond object bounds: "
623 			  "obj %p target %d offset %d size %d.\n",
624 			  obj, reloc->target_handle,
625 			  (int) reloc->offset,
626 			  (int) obj->base.size);
627 		return -EINVAL;
628 	}
629 	if (unlikely(reloc->offset & 3)) {
630 		DRM_DEBUG("Relocation not 4-byte aligned: "
631 			  "obj %p target %d offset %d.\n",
632 			  obj, reloc->target_handle,
633 			  (int) reloc->offset);
634 		return -EINVAL;
635 	}
636 
637 	ret = relocate_entry(obj, reloc, cache, target_offset);
638 	if (ret)
639 		return ret;
640 
641 	/* and update the user's relocation entry */
642 	reloc->presumed_offset = target_offset;
643 	return 0;
644 }
645 
646 static int
647 i915_gem_execbuffer_relocate_vma(struct i915_vma *vma,
648 				 struct eb_vmas *eb)
649 {
650 #define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
651 	struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)];
652 	struct drm_i915_gem_relocation_entry __user *user_relocs;
653 	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
654 	struct reloc_cache cache;
655 	int remain, ret = 0;
656 
657 	user_relocs = u64_to_user_ptr(entry->relocs_ptr);
658 	reloc_cache_init(&cache, eb->i915);
659 
660 	remain = entry->relocation_count;
661 	while (remain) {
662 		struct drm_i915_gem_relocation_entry *r = stack_reloc;
663 		unsigned long unwritten;
664 		unsigned int count;
665 
666 		count = min_t(unsigned int, remain, ARRAY_SIZE(stack_reloc));
667 		remain -= count;
668 
669 		/* This is the fast path and we cannot handle a pagefault
670 		 * whilst holding the struct mutex lest the user pass in the
671 		 * relocations contained within a mmaped bo. For in such a case
672 		 * we, the page fault handler would call i915_gem_fault() and
673 		 * we would try to acquire the struct mutex again. Obviously
674 		 * this is bad and so lockdep complains vehemently.
675 		 */
676 		pagefault_disable();
677 		unwritten = __copy_from_user_inatomic(r, user_relocs, count*sizeof(r[0]));
678 		pagefault_enable();
679 		if (unlikely(unwritten)) {
680 			ret = -EFAULT;
681 			goto out;
682 		}
683 
684 		do {
685 			u64 offset = r->presumed_offset;
686 
687 			ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, r, &cache);
688 			if (ret)
689 				goto out;
690 
691 			if (r->presumed_offset != offset) {
692 				pagefault_disable();
693 				unwritten = __put_user(r->presumed_offset,
694 						       &user_relocs->presumed_offset);
695 				pagefault_enable();
696 				if (unlikely(unwritten)) {
697 					/* Note that reporting an error now
698 					 * leaves everything in an inconsistent
699 					 * state as we have *already* changed
700 					 * the relocation value inside the
701 					 * object. As we have not changed the
702 					 * reloc.presumed_offset or will not
703 					 * change the execobject.offset, on the
704 					 * call we may not rewrite the value
705 					 * inside the object, leaving it
706 					 * dangling and causing a GPU hang.
707 					 */
708 					ret = -EFAULT;
709 					goto out;
710 				}
711 			}
712 
713 			user_relocs++;
714 			r++;
715 		} while (--count);
716 	}
717 
718 out:
719 	reloc_cache_fini(&cache);
720 	return ret;
721 #undef N_RELOC
722 }
723 
724 static int
725 i915_gem_execbuffer_relocate_vma_slow(struct i915_vma *vma,
726 				      struct eb_vmas *eb,
727 				      struct drm_i915_gem_relocation_entry *relocs)
728 {
729 	const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
730 	struct reloc_cache cache;
731 	int i, ret = 0;
732 
733 	reloc_cache_init(&cache, eb->i915);
734 	for (i = 0; i < entry->relocation_count; i++) {
735 		ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, &relocs[i], &cache);
736 		if (ret)
737 			break;
738 	}
739 	reloc_cache_fini(&cache);
740 
741 	return ret;
742 }
743 
744 static int
745 i915_gem_execbuffer_relocate(struct eb_vmas *eb)
746 {
747 	struct i915_vma *vma;
748 	int ret = 0;
749 
750 	list_for_each_entry(vma, &eb->vmas, exec_list) {
751 		ret = i915_gem_execbuffer_relocate_vma(vma, eb);
752 		if (ret)
753 			break;
754 	}
755 
756 	return ret;
757 }
758 
759 static bool only_mappable_for_reloc(unsigned int flags)
760 {
761 	return (flags & (EXEC_OBJECT_NEEDS_FENCE | __EXEC_OBJECT_NEEDS_MAP)) ==
762 		__EXEC_OBJECT_NEEDS_MAP;
763 }
764 
765 static int
766 i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
767 				struct intel_engine_cs *engine,
768 				bool *need_reloc)
769 {
770 	struct drm_i915_gem_object *obj = vma->obj;
771 	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
772 	uint64_t flags;
773 	int ret;
774 
775 	flags = PIN_USER;
776 	if (entry->flags & EXEC_OBJECT_NEEDS_GTT)
777 		flags |= PIN_GLOBAL;
778 
779 	if (!drm_mm_node_allocated(&vma->node)) {
780 		/* Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset,
781 		 * limit address to the first 4GBs for unflagged objects.
782 		 */
783 		if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0)
784 			flags |= PIN_ZONE_4G;
785 		if (entry->flags & __EXEC_OBJECT_NEEDS_MAP)
786 			flags |= PIN_GLOBAL | PIN_MAPPABLE;
787 		if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS)
788 			flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
789 		if (entry->flags & EXEC_OBJECT_PINNED)
790 			flags |= entry->offset | PIN_OFFSET_FIXED;
791 		if ((flags & PIN_MAPPABLE) == 0)
792 			flags |= PIN_HIGH;
793 	}
794 
795 	ret = i915_vma_pin(vma,
796 			   entry->pad_to_size,
797 			   entry->alignment,
798 			   flags);
799 	if ((ret == -ENOSPC || ret == -E2BIG) &&
800 	    only_mappable_for_reloc(entry->flags))
801 		ret = i915_vma_pin(vma,
802 				   entry->pad_to_size,
803 				   entry->alignment,
804 				   flags & ~PIN_MAPPABLE);
805 	if (ret)
806 		return ret;
807 
808 	entry->flags |= __EXEC_OBJECT_HAS_PIN;
809 
810 	if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
811 		ret = i915_vma_get_fence(vma);
812 		if (ret)
813 			return ret;
814 
815 		if (i915_vma_pin_fence(vma))
816 			entry->flags |= __EXEC_OBJECT_HAS_FENCE;
817 	}
818 
819 	if (entry->offset != vma->node.start) {
820 		entry->offset = vma->node.start;
821 		*need_reloc = true;
822 	}
823 
824 	if (entry->flags & EXEC_OBJECT_WRITE) {
825 		obj->base.pending_read_domains = I915_GEM_DOMAIN_RENDER;
826 		obj->base.pending_write_domain = I915_GEM_DOMAIN_RENDER;
827 	}
828 
829 	return 0;
830 }
831 
832 static bool
833 need_reloc_mappable(struct i915_vma *vma)
834 {
835 	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
836 
837 	if (entry->relocation_count == 0)
838 		return false;
839 
840 	if (!i915_vma_is_ggtt(vma))
841 		return false;
842 
843 	/* See also use_cpu_reloc() */
844 	if (HAS_LLC(vma->obj->base.dev))
845 		return false;
846 
847 	if (vma->obj->base.write_domain == I915_GEM_DOMAIN_CPU)
848 		return false;
849 
850 	return true;
851 }
852 
853 static bool
854 eb_vma_misplaced(struct i915_vma *vma)
855 {
856 	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
857 
858 	WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP &&
859 		!i915_vma_is_ggtt(vma));
860 
861 	if (entry->alignment &&
862 	    vma->node.start & (entry->alignment - 1))
863 		return true;
864 
865 	if (vma->node.size < entry->pad_to_size)
866 		return true;
867 
868 	if (entry->flags & EXEC_OBJECT_PINNED &&
869 	    vma->node.start != entry->offset)
870 		return true;
871 
872 	if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
873 	    vma->node.start < BATCH_OFFSET_BIAS)
874 		return true;
875 
876 	/* avoid costly ping-pong once a batch bo ended up non-mappable */
877 	if (entry->flags & __EXEC_OBJECT_NEEDS_MAP &&
878 	    !i915_vma_is_map_and_fenceable(vma))
879 		return !only_mappable_for_reloc(entry->flags);
880 
881 	if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0 &&
882 	    (vma->node.start + vma->node.size - 1) >> 32)
883 		return true;
884 
885 	return false;
886 }
887 
888 static int
889 i915_gem_execbuffer_reserve(struct intel_engine_cs *engine,
890 			    struct list_head *vmas,
891 			    struct i915_gem_context *ctx,
892 			    bool *need_relocs)
893 {
894 	struct drm_i915_gem_object *obj;
895 	struct i915_vma *vma;
896 	struct i915_address_space *vm;
897 	struct list_head ordered_vmas;
898 	struct list_head pinned_vmas;
899 	bool has_fenced_gpu_access = INTEL_GEN(engine->i915) < 4;
900 	int retry;
901 
902 	vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm;
903 
904 	INIT_LIST_HEAD(&ordered_vmas);
905 	INIT_LIST_HEAD(&pinned_vmas);
906 	while (!list_empty(vmas)) {
907 		struct drm_i915_gem_exec_object2 *entry;
908 		bool need_fence, need_mappable;
909 
910 		vma = list_first_entry(vmas, struct i915_vma, exec_list);
911 		obj = vma->obj;
912 		entry = vma->exec_entry;
913 
914 		if (ctx->flags & CONTEXT_NO_ZEROMAP)
915 			entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
916 
917 		if (!has_fenced_gpu_access)
918 			entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE;
919 		need_fence =
920 			entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
921 			i915_gem_object_is_tiled(obj);
922 		need_mappable = need_fence || need_reloc_mappable(vma);
923 
924 		if (entry->flags & EXEC_OBJECT_PINNED)
925 			list_move_tail(&vma->exec_list, &pinned_vmas);
926 		else if (need_mappable) {
927 			entry->flags |= __EXEC_OBJECT_NEEDS_MAP;
928 			list_move(&vma->exec_list, &ordered_vmas);
929 		} else
930 			list_move_tail(&vma->exec_list, &ordered_vmas);
931 
932 		obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND;
933 		obj->base.pending_write_domain = 0;
934 	}
935 	list_splice(&ordered_vmas, vmas);
936 	list_splice(&pinned_vmas, vmas);
937 
938 	/* Attempt to pin all of the buffers into the GTT.
939 	 * This is done in 3 phases:
940 	 *
941 	 * 1a. Unbind all objects that do not match the GTT constraints for
942 	 *     the execbuffer (fenceable, mappable, alignment etc).
943 	 * 1b. Increment pin count for already bound objects.
944 	 * 2.  Bind new objects.
945 	 * 3.  Decrement pin count.
946 	 *
947 	 * This avoid unnecessary unbinding of later objects in order to make
948 	 * room for the earlier objects *unless* we need to defragment.
949 	 */
950 	retry = 0;
951 	do {
952 		int ret = 0;
953 
954 		/* Unbind any ill-fitting objects or pin. */
955 		list_for_each_entry(vma, vmas, exec_list) {
956 			if (!drm_mm_node_allocated(&vma->node))
957 				continue;
958 
959 			if (eb_vma_misplaced(vma))
960 				ret = i915_vma_unbind(vma);
961 			else
962 				ret = i915_gem_execbuffer_reserve_vma(vma,
963 								      engine,
964 								      need_relocs);
965 			if (ret)
966 				goto err;
967 		}
968 
969 		/* Bind fresh objects */
970 		list_for_each_entry(vma, vmas, exec_list) {
971 			if (drm_mm_node_allocated(&vma->node))
972 				continue;
973 
974 			ret = i915_gem_execbuffer_reserve_vma(vma, engine,
975 							      need_relocs);
976 			if (ret)
977 				goto err;
978 		}
979 
980 err:
981 		if (ret != -ENOSPC || retry++)
982 			return ret;
983 
984 		/* Decrement pin count for bound objects */
985 		list_for_each_entry(vma, vmas, exec_list)
986 			i915_gem_execbuffer_unreserve_vma(vma);
987 
988 		ret = i915_gem_evict_vm(vm, true);
989 		if (ret)
990 			return ret;
991 	} while (1);
992 }
993 
994 static int
995 i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
996 				  struct drm_i915_gem_execbuffer2 *args,
997 				  struct drm_file *file,
998 				  struct intel_engine_cs *engine,
999 				  struct eb_vmas *eb,
1000 				  struct drm_i915_gem_exec_object2 *exec,
1001 				  struct i915_gem_context *ctx)
1002 {
1003 	struct drm_i915_gem_relocation_entry *reloc;
1004 	struct i915_address_space *vm;
1005 	struct i915_vma *vma;
1006 	bool need_relocs;
1007 	int *reloc_offset;
1008 	int i, total, ret;
1009 	unsigned count = args->buffer_count;
1010 
1011 	vm = list_first_entry(&eb->vmas, struct i915_vma, exec_list)->vm;
1012 
1013 	/* We may process another execbuffer during the unlock... */
1014 	while (!list_empty(&eb->vmas)) {
1015 		vma = list_first_entry(&eb->vmas, struct i915_vma, exec_list);
1016 		list_del_init(&vma->exec_list);
1017 		i915_gem_execbuffer_unreserve_vma(vma);
1018 		i915_vma_put(vma);
1019 	}
1020 
1021 	mutex_unlock(&dev->struct_mutex);
1022 
1023 	total = 0;
1024 	for (i = 0; i < count; i++)
1025 		total += exec[i].relocation_count;
1026 
1027 	reloc_offset = drm_malloc_ab(count, sizeof(*reloc_offset));
1028 	reloc = drm_malloc_ab(total, sizeof(*reloc));
1029 	if (reloc == NULL || reloc_offset == NULL) {
1030 		drm_free_large(reloc);
1031 		drm_free_large(reloc_offset);
1032 		mutex_lock(&dev->struct_mutex);
1033 		return -ENOMEM;
1034 	}
1035 
1036 	total = 0;
1037 	for (i = 0; i < count; i++) {
1038 		struct drm_i915_gem_relocation_entry __user *user_relocs;
1039 		u64 invalid_offset = (u64)-1;
1040 		int j;
1041 
1042 		user_relocs = u64_to_user_ptr(exec[i].relocs_ptr);
1043 
1044 		if (copy_from_user(reloc+total, user_relocs,
1045 				   exec[i].relocation_count * sizeof(*reloc))) {
1046 			ret = -EFAULT;
1047 			mutex_lock(&dev->struct_mutex);
1048 			goto err;
1049 		}
1050 
1051 		/* As we do not update the known relocation offsets after
1052 		 * relocating (due to the complexities in lock handling),
1053 		 * we need to mark them as invalid now so that we force the
1054 		 * relocation processing next time. Just in case the target
1055 		 * object is evicted and then rebound into its old
1056 		 * presumed_offset before the next execbuffer - if that
1057 		 * happened we would make the mistake of assuming that the
1058 		 * relocations were valid.
1059 		 */
1060 		for (j = 0; j < exec[i].relocation_count; j++) {
1061 			if (__copy_to_user(&user_relocs[j].presumed_offset,
1062 					   &invalid_offset,
1063 					   sizeof(invalid_offset))) {
1064 				ret = -EFAULT;
1065 				mutex_lock(&dev->struct_mutex);
1066 				goto err;
1067 			}
1068 		}
1069 
1070 		reloc_offset[i] = total;
1071 		total += exec[i].relocation_count;
1072 	}
1073 
1074 	ret = i915_mutex_lock_interruptible(dev);
1075 	if (ret) {
1076 		mutex_lock(&dev->struct_mutex);
1077 		goto err;
1078 	}
1079 
1080 	/* reacquire the objects */
1081 	eb_reset(eb);
1082 	ret = eb_lookup_vmas(eb, exec, args, vm, file);
1083 	if (ret)
1084 		goto err;
1085 
1086 	need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
1087 	ret = i915_gem_execbuffer_reserve(engine, &eb->vmas, ctx,
1088 					  &need_relocs);
1089 	if (ret)
1090 		goto err;
1091 
1092 	list_for_each_entry(vma, &eb->vmas, exec_list) {
1093 		int offset = vma->exec_entry - exec;
1094 		ret = i915_gem_execbuffer_relocate_vma_slow(vma, eb,
1095 							    reloc + reloc_offset[offset]);
1096 		if (ret)
1097 			goto err;
1098 	}
1099 
1100 	/* Leave the user relocations as are, this is the painfully slow path,
1101 	 * and we want to avoid the complication of dropping the lock whilst
1102 	 * having buffers reserved in the aperture and so causing spurious
1103 	 * ENOSPC for random operations.
1104 	 */
1105 
1106 err:
1107 	drm_free_large(reloc);
1108 	drm_free_large(reloc_offset);
1109 	return ret;
1110 }
1111 
1112 static unsigned int eb_other_engines(struct drm_i915_gem_request *req)
1113 {
1114 	unsigned int mask;
1115 
1116 	mask = ~intel_engine_flag(req->engine) & I915_BO_ACTIVE_MASK;
1117 	mask <<= I915_BO_ACTIVE_SHIFT;
1118 
1119 	return mask;
1120 }
1121 
1122 static int
1123 i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
1124 				struct list_head *vmas)
1125 {
1126 	const unsigned int other_rings = eb_other_engines(req);
1127 	struct i915_vma *vma;
1128 	int ret;
1129 
1130 	list_for_each_entry(vma, vmas, exec_list) {
1131 		struct drm_i915_gem_object *obj = vma->obj;
1132 		struct reservation_object *resv;
1133 
1134 		if (obj->flags & other_rings) {
1135 			ret = i915_gem_request_await_object
1136 				(req, obj, obj->base.pending_write_domain);
1137 			if (ret)
1138 				return ret;
1139 		}
1140 
1141 		resv = i915_gem_object_get_dmabuf_resv(obj);
1142 		if (resv) {
1143 			ret = i915_sw_fence_await_reservation
1144 				(&req->submit, resv, &i915_fence_ops,
1145 				 obj->base.pending_write_domain, 10*HZ,
1146 				 GFP_KERNEL | __GFP_NOWARN);
1147 			if (ret < 0)
1148 				return ret;
1149 		}
1150 
1151 		if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
1152 			i915_gem_clflush_object(obj, false);
1153 	}
1154 
1155 	/* Unconditionally flush any chipset caches (for streaming writes). */
1156 	i915_gem_chipset_flush(req->engine->i915);
1157 
1158 	/* Unconditionally invalidate GPU caches and TLBs. */
1159 	return req->engine->emit_flush(req, EMIT_INVALIDATE);
1160 }
1161 
1162 static bool
1163 i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
1164 {
1165 	if (exec->flags & __I915_EXEC_UNKNOWN_FLAGS)
1166 		return false;
1167 
1168 	/* Kernel clipping was a DRI1 misfeature */
1169 	if (exec->num_cliprects || exec->cliprects_ptr)
1170 		return false;
1171 
1172 	if (exec->DR4 == 0xffffffff) {
1173 		DRM_DEBUG("UXA submitting garbage DR4, fixing up\n");
1174 		exec->DR4 = 0;
1175 	}
1176 	if (exec->DR1 || exec->DR4)
1177 		return false;
1178 
1179 	if ((exec->batch_start_offset | exec->batch_len) & 0x7)
1180 		return false;
1181 
1182 	return true;
1183 }
1184 
1185 static int
1186 validate_exec_list(struct drm_device *dev,
1187 		   struct drm_i915_gem_exec_object2 *exec,
1188 		   int count)
1189 {
1190 	unsigned relocs_total = 0;
1191 	unsigned relocs_max = UINT_MAX / sizeof(struct drm_i915_gem_relocation_entry);
1192 	unsigned invalid_flags;
1193 	int i;
1194 
1195 	/* INTERNAL flags must not overlap with external ones */
1196 	BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS & ~__EXEC_OBJECT_UNKNOWN_FLAGS);
1197 
1198 	invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS;
1199 	if (USES_FULL_PPGTT(dev))
1200 		invalid_flags |= EXEC_OBJECT_NEEDS_GTT;
1201 
1202 	for (i = 0; i < count; i++) {
1203 		char __user *ptr = u64_to_user_ptr(exec[i].relocs_ptr);
1204 		int length; /* limited by fault_in_pages_readable() */
1205 
1206 		if (exec[i].flags & invalid_flags)
1207 			return -EINVAL;
1208 
1209 		/* Offset can be used as input (EXEC_OBJECT_PINNED), reject
1210 		 * any non-page-aligned or non-canonical addresses.
1211 		 */
1212 		if (exec[i].flags & EXEC_OBJECT_PINNED) {
1213 			if (exec[i].offset !=
1214 			    gen8_canonical_addr(exec[i].offset & I915_GTT_PAGE_MASK))
1215 				return -EINVAL;
1216 
1217 			/* From drm_mm perspective address space is continuous,
1218 			 * so from this point we're always using non-canonical
1219 			 * form internally.
1220 			 */
1221 			exec[i].offset = gen8_noncanonical_addr(exec[i].offset);
1222 		}
1223 
1224 		if (exec[i].alignment && !is_power_of_2(exec[i].alignment))
1225 			return -EINVAL;
1226 
1227 		/* pad_to_size was once a reserved field, so sanitize it */
1228 		if (exec[i].flags & EXEC_OBJECT_PAD_TO_SIZE) {
1229 			if (offset_in_page(exec[i].pad_to_size))
1230 				return -EINVAL;
1231 		} else {
1232 			exec[i].pad_to_size = 0;
1233 		}
1234 
1235 		/* First check for malicious input causing overflow in
1236 		 * the worst case where we need to allocate the entire
1237 		 * relocation tree as a single array.
1238 		 */
1239 		if (exec[i].relocation_count > relocs_max - relocs_total)
1240 			return -EINVAL;
1241 		relocs_total += exec[i].relocation_count;
1242 
1243 		length = exec[i].relocation_count *
1244 			sizeof(struct drm_i915_gem_relocation_entry);
1245 		/*
1246 		 * We must check that the entire relocation array is safe
1247 		 * to read, but since we may need to update the presumed
1248 		 * offsets during execution, check for full write access.
1249 		 */
1250 #if 0
1251 		if (!access_ok(VERIFY_WRITE, ptr, length))
1252 			return -EFAULT;
1253 #endif
1254 
1255 		if (likely(!i915.prefault_disable)) {
1256 			if (fault_in_pages_readable(ptr, length))
1257 				return -EFAULT;
1258 		}
1259 	}
1260 
1261 	return 0;
1262 }
1263 
1264 static struct i915_gem_context *
1265 i915_gem_validate_context(struct drm_device *dev, struct drm_file *file,
1266 			  struct intel_engine_cs *engine, const u32 ctx_id)
1267 {
1268 	struct i915_gem_context *ctx;
1269 	struct i915_ctx_hang_stats *hs;
1270 
1271 	ctx = i915_gem_context_lookup(file->driver_priv, ctx_id);
1272 	if (IS_ERR(ctx))
1273 		return ctx;
1274 
1275 	hs = &ctx->hang_stats;
1276 	if (hs->banned) {
1277 		DRM_DEBUG("Context %u tried to submit while banned\n", ctx_id);
1278 		return ERR_PTR(-EIO);
1279 	}
1280 
1281 	return ctx;
1282 }
1283 
1284 void i915_vma_move_to_active(struct i915_vma *vma,
1285 			     struct drm_i915_gem_request *req,
1286 			     unsigned int flags)
1287 {
1288 	struct drm_i915_gem_object *obj = vma->obj;
1289 	const unsigned int idx = req->engine->id;
1290 
1291 	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
1292 
1293 	obj->dirty = 1; /* be paranoid  */
1294 
1295 	/* Add a reference if we're newly entering the active list.
1296 	 * The order in which we add operations to the retirement queue is
1297 	 * vital here: mark_active adds to the start of the callback list,
1298 	 * such that subsequent callbacks are called first. Therefore we
1299 	 * add the active reference first and queue for it to be dropped
1300 	 * *last*.
1301 	 */
1302 	if (!i915_gem_object_is_active(obj))
1303 		i915_gem_object_get(obj);
1304 	i915_gem_object_set_active(obj, idx);
1305 	i915_gem_active_set(&obj->last_read[idx], req);
1306 
1307 	if (flags & EXEC_OBJECT_WRITE) {
1308 		i915_gem_active_set(&obj->last_write, req);
1309 
1310 		intel_fb_obj_invalidate(obj, ORIGIN_CS);
1311 
1312 		/* update for the implicit flush after a batch */
1313 		obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
1314 	}
1315 
1316 	if (flags & EXEC_OBJECT_NEEDS_FENCE)
1317 		i915_gem_active_set(&vma->last_fence, req);
1318 
1319 	i915_vma_set_active(vma, idx);
1320 	i915_gem_active_set(&vma->last_read[idx], req);
1321 	list_move_tail(&vma->vm_link, &vma->vm->active_list);
1322 }
1323 
1324 static void eb_export_fence(struct drm_i915_gem_object *obj,
1325 			    struct drm_i915_gem_request *req,
1326 			    unsigned int flags)
1327 {
1328 	struct reservation_object *resv;
1329 
1330 	resv = i915_gem_object_get_dmabuf_resv(obj);
1331 	if (!resv)
1332 		return;
1333 
1334 	/* Ignore errors from failing to allocate the new fence, we can't
1335 	 * handle an error right now. Worst case should be missed
1336 	 * synchronisation leading to rendering corruption.
1337 	 */
1338 	ww_mutex_lock(&resv->lock, NULL);
1339 	if (flags & EXEC_OBJECT_WRITE)
1340 		reservation_object_add_excl_fence(resv, &req->fence);
1341 	else if (reservation_object_reserve_shared(resv) == 0)
1342 		reservation_object_add_shared_fence(resv, &req->fence);
1343 	ww_mutex_unlock(&resv->lock);
1344 }
1345 
1346 static void
1347 i915_gem_execbuffer_move_to_active(struct list_head *vmas,
1348 				   struct drm_i915_gem_request *req)
1349 {
1350 	struct i915_vma *vma;
1351 
1352 	list_for_each_entry(vma, vmas, exec_list) {
1353 		struct drm_i915_gem_object *obj = vma->obj;
1354 		u32 old_read = obj->base.read_domains;
1355 		u32 old_write = obj->base.write_domain;
1356 
1357 		obj->base.write_domain = obj->base.pending_write_domain;
1358 		if (obj->base.write_domain)
1359 			vma->exec_entry->flags |= EXEC_OBJECT_WRITE;
1360 		else
1361 			obj->base.pending_read_domains |= obj->base.read_domains;
1362 		obj->base.read_domains = obj->base.pending_read_domains;
1363 
1364 		i915_vma_move_to_active(vma, req, vma->exec_entry->flags);
1365 		eb_export_fence(obj, req, vma->exec_entry->flags);
1366 		trace_i915_gem_object_change_domain(obj, old_read, old_write);
1367 	}
1368 }
1369 
1370 static int
1371 i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req)
1372 {
1373 	struct intel_ring *ring = req->ring;
1374 	int ret, i;
1375 
1376 	if (!IS_GEN7(req->i915) || req->engine->id != RCS) {
1377 		DRM_DEBUG("sol reset is gen7/rcs only\n");
1378 		return -EINVAL;
1379 	}
1380 
1381 	ret = intel_ring_begin(req, 4 * 3);
1382 	if (ret)
1383 		return ret;
1384 
1385 	for (i = 0; i < 4; i++) {
1386 		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
1387 		intel_ring_emit_reg(ring, GEN7_SO_WRITE_OFFSET(i));
1388 		intel_ring_emit(ring, 0);
1389 	}
1390 
1391 	intel_ring_advance(ring);
1392 
1393 	return 0;
1394 }
1395 
1396 static struct i915_vma *
1397 i915_gem_execbuffer_parse(struct intel_engine_cs *engine,
1398 			  struct drm_i915_gem_exec_object2 *shadow_exec_entry,
1399 			  struct drm_i915_gem_object *batch_obj,
1400 			  struct eb_vmas *eb,
1401 			  u32 batch_start_offset,
1402 			  u32 batch_len,
1403 			  bool is_master)
1404 {
1405 	struct drm_i915_gem_object *shadow_batch_obj;
1406 	struct i915_vma *vma;
1407 	int ret;
1408 
1409 	shadow_batch_obj = i915_gem_batch_pool_get(&engine->batch_pool,
1410 						   PAGE_ALIGN(batch_len));
1411 	if (IS_ERR(shadow_batch_obj))
1412 		return ERR_CAST(shadow_batch_obj);
1413 
1414 	ret = intel_engine_cmd_parser(engine,
1415 				      batch_obj,
1416 				      shadow_batch_obj,
1417 				      batch_start_offset,
1418 				      batch_len,
1419 				      is_master);
1420 	if (ret) {
1421 		if (ret == -EACCES) /* unhandled chained batch */
1422 			vma = NULL;
1423 		else
1424 			vma = ERR_PTR(ret);
1425 		goto out;
1426 	}
1427 
1428 	vma = i915_gem_object_ggtt_pin(shadow_batch_obj, NULL, 0, 0, 0);
1429 	if (IS_ERR(vma))
1430 		goto out;
1431 
1432 	memset(shadow_exec_entry, 0, sizeof(*shadow_exec_entry));
1433 
1434 	vma->exec_entry = shadow_exec_entry;
1435 	vma->exec_entry->flags = __EXEC_OBJECT_HAS_PIN;
1436 	i915_gem_object_get(shadow_batch_obj);
1437 	list_add_tail(&vma->exec_list, &eb->vmas);
1438 
1439 out:
1440 	i915_gem_object_unpin_pages(shadow_batch_obj);
1441 	return vma;
1442 }
1443 
1444 static int
1445 execbuf_submit(struct i915_execbuffer_params *params,
1446 	       struct drm_i915_gem_execbuffer2 *args,
1447 	       struct list_head *vmas)
1448 {
1449 	struct drm_i915_private *dev_priv = params->request->i915;
1450 	u64 exec_start, exec_len;
1451 	int instp_mode;
1452 	u32 instp_mask;
1453 	int ret;
1454 
1455 	ret = i915_gem_execbuffer_move_to_gpu(params->request, vmas);
1456 	if (ret)
1457 		return ret;
1458 
1459 	ret = i915_switch_context(params->request);
1460 	if (ret)
1461 		return ret;
1462 
1463 	instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK;
1464 	instp_mask = I915_EXEC_CONSTANTS_MASK;
1465 	switch (instp_mode) {
1466 	case I915_EXEC_CONSTANTS_REL_GENERAL:
1467 	case I915_EXEC_CONSTANTS_ABSOLUTE:
1468 	case I915_EXEC_CONSTANTS_REL_SURFACE:
1469 		if (instp_mode != 0 && params->engine->id != RCS) {
1470 			DRM_DEBUG("non-0 rel constants mode on non-RCS\n");
1471 			return -EINVAL;
1472 		}
1473 
1474 		if (instp_mode != dev_priv->relative_constants_mode) {
1475 			if (INTEL_INFO(dev_priv)->gen < 4) {
1476 				DRM_DEBUG("no rel constants on pre-gen4\n");
1477 				return -EINVAL;
1478 			}
1479 
1480 			if (INTEL_INFO(dev_priv)->gen > 5 &&
1481 			    instp_mode == I915_EXEC_CONSTANTS_REL_SURFACE) {
1482 				DRM_DEBUG("rel surface constants mode invalid on gen5+\n");
1483 				return -EINVAL;
1484 			}
1485 
1486 			/* The HW changed the meaning on this bit on gen6 */
1487 			if (INTEL_INFO(dev_priv)->gen >= 6)
1488 				instp_mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE;
1489 		}
1490 		break;
1491 	default:
1492 		DRM_DEBUG("execbuf with unknown constants: %d\n", instp_mode);
1493 		return -EINVAL;
1494 	}
1495 
1496 	if (params->engine->id == RCS &&
1497 	    instp_mode != dev_priv->relative_constants_mode) {
1498 		struct intel_ring *ring = params->request->ring;
1499 
1500 		ret = intel_ring_begin(params->request, 4);
1501 		if (ret)
1502 			return ret;
1503 
1504 		intel_ring_emit(ring, MI_NOOP);
1505 		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
1506 		intel_ring_emit_reg(ring, INSTPM);
1507 		intel_ring_emit(ring, instp_mask << 16 | instp_mode);
1508 		intel_ring_advance(ring);
1509 
1510 		dev_priv->relative_constants_mode = instp_mode;
1511 	}
1512 
1513 	if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
1514 		ret = i915_reset_gen7_sol_offsets(params->request);
1515 		if (ret)
1516 			return ret;
1517 	}
1518 
1519 	exec_len   = args->batch_len;
1520 	exec_start = params->batch->node.start +
1521 		     params->args_batch_start_offset;
1522 
1523 	if (exec_len == 0)
1524 		exec_len = params->batch->size - params->args_batch_start_offset;
1525 
1526 	ret = params->engine->emit_bb_start(params->request,
1527 					    exec_start, exec_len,
1528 					    params->dispatch_flags);
1529 	if (ret)
1530 		return ret;
1531 
1532 	trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags);
1533 
1534 	i915_gem_execbuffer_move_to_active(vmas, params->request);
1535 
1536 	return 0;
1537 }
1538 
1539 /**
1540  * Find one BSD ring to dispatch the corresponding BSD command.
1541  * The engine index is returned.
1542  */
1543 static unsigned int
1544 gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv,
1545 			 struct drm_file *file)
1546 {
1547 	struct drm_i915_file_private *file_priv = file->driver_priv;
1548 
1549 	/* Check whether the file_priv has already selected one ring. */
1550 	if ((int)file_priv->bsd_engine < 0)
1551 		file_priv->bsd_engine = atomic_fetch_xor(1,
1552 			 &dev_priv->mm.bsd_engine_dispatch_index);
1553 
1554 	return file_priv->bsd_engine;
1555 }
1556 
1557 #define I915_USER_RINGS (4)
1558 
1559 static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = {
1560 	[I915_EXEC_DEFAULT]	= RCS,
1561 	[I915_EXEC_RENDER]	= RCS,
1562 	[I915_EXEC_BLT]		= BCS,
1563 	[I915_EXEC_BSD]		= VCS,
1564 	[I915_EXEC_VEBOX]	= VECS
1565 };
1566 
1567 static struct intel_engine_cs *
1568 eb_select_engine(struct drm_i915_private *dev_priv,
1569 		 struct drm_file *file,
1570 		 struct drm_i915_gem_execbuffer2 *args)
1571 {
1572 	unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK;
1573 	struct intel_engine_cs *engine;
1574 
1575 	if (user_ring_id > I915_USER_RINGS) {
1576 		DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id);
1577 		return NULL;
1578 	}
1579 
1580 	if ((user_ring_id != I915_EXEC_BSD) &&
1581 	    ((args->flags & I915_EXEC_BSD_MASK) != 0)) {
1582 		DRM_DEBUG("execbuf with non bsd ring but with invalid "
1583 			  "bsd dispatch flags: %d\n", (int)(args->flags));
1584 		return NULL;
1585 	}
1586 
1587 	if (user_ring_id == I915_EXEC_BSD && HAS_BSD2(dev_priv)) {
1588 		unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK;
1589 
1590 		if (bsd_idx == I915_EXEC_BSD_DEFAULT) {
1591 			bsd_idx = gen8_dispatch_bsd_engine(dev_priv, file);
1592 		} else if (bsd_idx >= I915_EXEC_BSD_RING1 &&
1593 			   bsd_idx <= I915_EXEC_BSD_RING2) {
1594 			bsd_idx >>= I915_EXEC_BSD_SHIFT;
1595 			bsd_idx--;
1596 		} else {
1597 			DRM_DEBUG("execbuf with unknown bsd ring: %u\n",
1598 				  bsd_idx);
1599 			return NULL;
1600 		}
1601 
1602 		engine = dev_priv->engine[_VCS(bsd_idx)];
1603 	} else {
1604 		engine = dev_priv->engine[user_ring_map[user_ring_id]];
1605 	}
1606 
1607 	if (!engine) {
1608 		DRM_DEBUG("execbuf with invalid ring: %u\n", user_ring_id);
1609 		return NULL;
1610 	}
1611 
1612 	return engine;
1613 }
1614 
1615 static int
1616 i915_gem_do_execbuffer(struct drm_device *dev, void *data,
1617 		       struct drm_file *file,
1618 		       struct drm_i915_gem_execbuffer2 *args,
1619 		       struct drm_i915_gem_exec_object2 *exec)
1620 {
1621 	struct drm_i915_private *dev_priv = to_i915(dev);
1622 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
1623 	struct eb_vmas *eb;
1624 	struct drm_i915_gem_exec_object2 shadow_exec_entry;
1625 	struct intel_engine_cs *engine;
1626 	struct i915_gem_context *ctx;
1627 	struct i915_address_space *vm;
1628 	struct i915_execbuffer_params params_master; /* XXX: will be removed later */
1629 	struct i915_execbuffer_params *params = &params_master;
1630 	const u32 ctx_id = i915_execbuffer2_get_context_id(*args);
1631 	u32 dispatch_flags;
1632 	int ret;
1633 	bool need_relocs;
1634 
1635 	if (!i915_gem_check_execbuffer(args))
1636 		return -EINVAL;
1637 
1638 	ret = validate_exec_list(dev, exec, args->buffer_count);
1639 	if (ret)
1640 		return ret;
1641 
1642 	dispatch_flags = 0;
1643 	if (args->flags & I915_EXEC_SECURE) {
1644 #if 0
1645 		if (!drm_is_current_master(file) || !capable(CAP_SYS_ADMIN))
1646 		    return -EPERM;
1647 #endif
1648 
1649 		dispatch_flags |= I915_DISPATCH_SECURE;
1650 	}
1651 	if (args->flags & I915_EXEC_IS_PINNED)
1652 		dispatch_flags |= I915_DISPATCH_PINNED;
1653 
1654 	engine = eb_select_engine(dev_priv, file, args);
1655 	if (!engine)
1656 		return -EINVAL;
1657 
1658 	if (args->buffer_count < 1) {
1659 		DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
1660 		return -EINVAL;
1661 	}
1662 
1663 	if (args->flags & I915_EXEC_RESOURCE_STREAMER) {
1664 		if (!HAS_RESOURCE_STREAMER(dev)) {
1665 			DRM_DEBUG("RS is only allowed for Haswell, Gen8 and above\n");
1666 			return -EINVAL;
1667 		}
1668 		if (engine->id != RCS) {
1669 			DRM_DEBUG("RS is not available on %s\n",
1670 				 engine->name);
1671 			return -EINVAL;
1672 		}
1673 
1674 		dispatch_flags |= I915_DISPATCH_RS;
1675 	}
1676 
1677 	/* Take a local wakeref for preparing to dispatch the execbuf as
1678 	 * we expect to access the hardware fairly frequently in the
1679 	 * process. Upon first dispatch, we acquire another prolonged
1680 	 * wakeref that we hold until the GPU has been idle for at least
1681 	 * 100ms.
1682 	 */
1683 	intel_runtime_pm_get(dev_priv);
1684 
1685 	ret = i915_mutex_lock_interruptible(dev);
1686 	if (ret)
1687 		goto pre_mutex_err;
1688 
1689 	ctx = i915_gem_validate_context(dev, file, engine, ctx_id);
1690 	if (IS_ERR(ctx)) {
1691 		mutex_unlock(&dev->struct_mutex);
1692 		ret = PTR_ERR(ctx);
1693 		goto pre_mutex_err;
1694 	}
1695 
1696 	i915_gem_context_get(ctx);
1697 
1698 	if (ctx->ppgtt)
1699 		vm = &ctx->ppgtt->base;
1700 	else
1701 		vm = &ggtt->base;
1702 
1703 	memset(&params_master, 0x00, sizeof(params_master));
1704 
1705 	eb = eb_create(dev_priv, args);
1706 	if (eb == NULL) {
1707 		i915_gem_context_put(ctx);
1708 		mutex_unlock(&dev->struct_mutex);
1709 		ret = -ENOMEM;
1710 		goto pre_mutex_err;
1711 	}
1712 
1713 	/* Look up object handles */
1714 	ret = eb_lookup_vmas(eb, exec, args, vm, file);
1715 	if (ret)
1716 		goto err;
1717 
1718 	/* take note of the batch buffer before we might reorder the lists */
1719 	params->batch = eb_get_batch(eb);
1720 
1721 	/* Move the objects en-masse into the GTT, evicting if necessary. */
1722 	need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
1723 	ret = i915_gem_execbuffer_reserve(engine, &eb->vmas, ctx,
1724 					  &need_relocs);
1725 	if (ret)
1726 		goto err;
1727 
1728 	/* The objects are in their final locations, apply the relocations. */
1729 	if (need_relocs)
1730 		ret = i915_gem_execbuffer_relocate(eb);
1731 	if (ret) {
1732 		if (ret == -EFAULT) {
1733 			ret = i915_gem_execbuffer_relocate_slow(dev, args, file,
1734 								engine,
1735 								eb, exec, ctx);
1736 			BUG_ON(!mutex_is_locked(&dev->struct_mutex));
1737 		}
1738 		if (ret)
1739 			goto err;
1740 	}
1741 
1742 	/* Set the pending read domains for the batch buffer to COMMAND */
1743 	if (params->batch->obj->base.pending_write_domain) {
1744 		DRM_DEBUG("Attempting to use self-modifying batch buffer\n");
1745 		ret = -EINVAL;
1746 		goto err;
1747 	}
1748 	if (args->batch_start_offset > params->batch->size ||
1749 	    args->batch_len > params->batch->size - args->batch_start_offset) {
1750 		DRM_DEBUG("Attempting to use out-of-bounds batch\n");
1751 		ret = -EINVAL;
1752 		goto err;
1753 	}
1754 
1755 	params->args_batch_start_offset = args->batch_start_offset;
1756 	if (intel_engine_needs_cmd_parser(engine) && args->batch_len) {
1757 		struct i915_vma *vma;
1758 
1759 		vma = i915_gem_execbuffer_parse(engine, &shadow_exec_entry,
1760 						params->batch->obj,
1761 						eb,
1762 						args->batch_start_offset,
1763 						args->batch_len,
1764 						drm_is_current_master(file));
1765 		if (IS_ERR(vma)) {
1766 			ret = PTR_ERR(vma);
1767 			goto err;
1768 		}
1769 
1770 		if (vma) {
1771 			/*
1772 			 * Batch parsed and accepted:
1773 			 *
1774 			 * Set the DISPATCH_SECURE bit to remove the NON_SECURE
1775 			 * bit from MI_BATCH_BUFFER_START commands issued in
1776 			 * the dispatch_execbuffer implementations. We
1777 			 * specifically don't want that set on batches the
1778 			 * command parser has accepted.
1779 			 */
1780 			dispatch_flags |= I915_DISPATCH_SECURE;
1781 			params->args_batch_start_offset = 0;
1782 			params->batch = vma;
1783 		}
1784 	}
1785 
1786 	params->batch->obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
1787 
1788 	/* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
1789 	 * batch" bit. Hence we need to pin secure batches into the global gtt.
1790 	 * hsw should have this fixed, but bdw mucks it up again. */
1791 	if (dispatch_flags & I915_DISPATCH_SECURE) {
1792 		struct drm_i915_gem_object *obj = params->batch->obj;
1793 		struct i915_vma *vma;
1794 
1795 		/*
1796 		 * So on first glance it looks freaky that we pin the batch here
1797 		 * outside of the reservation loop. But:
1798 		 * - The batch is already pinned into the relevant ppgtt, so we
1799 		 *   already have the backing storage fully allocated.
1800 		 * - No other BO uses the global gtt (well contexts, but meh),
1801 		 *   so we don't really have issues with multiple objects not
1802 		 *   fitting due to fragmentation.
1803 		 * So this is actually safe.
1804 		 */
1805 		vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0);
1806 		if (IS_ERR(vma)) {
1807 			ret = PTR_ERR(vma);
1808 			goto err;
1809 		}
1810 
1811 		params->batch = vma;
1812 	}
1813 
1814 	/* Allocate a request for this batch buffer nice and early. */
1815 	params->request = i915_gem_request_alloc(engine, ctx);
1816 	if (IS_ERR(params->request)) {
1817 		ret = PTR_ERR(params->request);
1818 		goto err_batch_unpin;
1819 	}
1820 
1821 	/* Whilst this request exists, batch_obj will be on the
1822 	 * active_list, and so will hold the active reference. Only when this
1823 	 * request is retired will the the batch_obj be moved onto the
1824 	 * inactive_list and lose its active reference. Hence we do not need
1825 	 * to explicitly hold another reference here.
1826 	 */
1827 	params->request->batch = params->batch;
1828 
1829 	ret = i915_gem_request_add_to_client(params->request, file);
1830 	if (ret)
1831 		goto err_request;
1832 
1833 	/*
1834 	 * Save assorted stuff away to pass through to *_submission().
1835 	 * NB: This data should be 'persistent' and not local as it will
1836 	 * kept around beyond the duration of the IOCTL once the GPU
1837 	 * scheduler arrives.
1838 	 */
1839 	params->dev                     = dev;
1840 	params->file                    = file;
1841 	params->engine                    = engine;
1842 	params->dispatch_flags          = dispatch_flags;
1843 	params->ctx                     = ctx;
1844 
1845 	ret = execbuf_submit(params, args, &eb->vmas);
1846 err_request:
1847 	__i915_add_request(params->request, ret == 0);
1848 
1849 err_batch_unpin:
1850 	/*
1851 	 * FIXME: We crucially rely upon the active tracking for the (ppgtt)
1852 	 * batch vma for correctness. For less ugly and less fragility this
1853 	 * needs to be adjusted to also track the ggtt batch vma properly as
1854 	 * active.
1855 	 */
1856 	if (dispatch_flags & I915_DISPATCH_SECURE)
1857 		i915_vma_unpin(params->batch);
1858 err:
1859 	/* the request owns the ref now */
1860 	i915_gem_context_put(ctx);
1861 	eb_destroy(eb);
1862 
1863 	mutex_unlock(&dev->struct_mutex);
1864 
1865 pre_mutex_err:
1866 	/* intel_gpu_busy should also get a ref, so it will free when the device
1867 	 * is really idle. */
1868 	intel_runtime_pm_put(dev_priv);
1869 	return ret;
1870 }
1871 
1872 /*
1873  * Legacy execbuffer just creates an exec2 list from the original exec object
1874  * list array and passes it to the real function.
1875  */
1876 int
1877 i915_gem_execbuffer(struct drm_device *dev, void *data,
1878 		    struct drm_file *file)
1879 {
1880 	struct drm_i915_gem_execbuffer *args = data;
1881 	struct drm_i915_gem_execbuffer2 exec2;
1882 	struct drm_i915_gem_exec_object *exec_list = NULL;
1883 	struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1884 	int ret, i;
1885 
1886 	if (args->buffer_count < 1) {
1887 		DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
1888 		return -EINVAL;
1889 	}
1890 
1891 	/* Copy in the exec list from userland */
1892 	exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count);
1893 	exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count);
1894 	if (exec_list == NULL || exec2_list == NULL) {
1895 		DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
1896 			  args->buffer_count);
1897 		drm_free_large(exec_list);
1898 		drm_free_large(exec2_list);
1899 		return -ENOMEM;
1900 	}
1901 	ret = copy_from_user(exec_list,
1902 			     u64_to_user_ptr(args->buffers_ptr),
1903 			     sizeof(*exec_list) * args->buffer_count);
1904 	if (ret != 0) {
1905 		DRM_DEBUG("copy %d exec entries failed %d\n",
1906 			  args->buffer_count, ret);
1907 		drm_free_large(exec_list);
1908 		drm_free_large(exec2_list);
1909 		return -EFAULT;
1910 	}
1911 
1912 	for (i = 0; i < args->buffer_count; i++) {
1913 		exec2_list[i].handle = exec_list[i].handle;
1914 		exec2_list[i].relocation_count = exec_list[i].relocation_count;
1915 		exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr;
1916 		exec2_list[i].alignment = exec_list[i].alignment;
1917 		exec2_list[i].offset = exec_list[i].offset;
1918 		if (INTEL_INFO(dev)->gen < 4)
1919 			exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE;
1920 		else
1921 			exec2_list[i].flags = 0;
1922 	}
1923 
1924 	exec2.buffers_ptr = args->buffers_ptr;
1925 	exec2.buffer_count = args->buffer_count;
1926 	exec2.batch_start_offset = args->batch_start_offset;
1927 	exec2.batch_len = args->batch_len;
1928 	exec2.DR1 = args->DR1;
1929 	exec2.DR4 = args->DR4;
1930 	exec2.num_cliprects = args->num_cliprects;
1931 	exec2.cliprects_ptr = args->cliprects_ptr;
1932 	exec2.flags = I915_EXEC_RENDER;
1933 	i915_execbuffer2_set_context_id(exec2, 0);
1934 
1935 	ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list);
1936 	if (!ret) {
1937 		struct drm_i915_gem_exec_object __user *user_exec_list =
1938 			u64_to_user_ptr(args->buffers_ptr);
1939 
1940 		/* Copy the new buffer offsets back to the user's exec list. */
1941 		for (i = 0; i < args->buffer_count; i++) {
1942 			exec2_list[i].offset =
1943 				gen8_canonical_addr(exec2_list[i].offset);
1944 			ret = __copy_to_user(&user_exec_list[i].offset,
1945 					     &exec2_list[i].offset,
1946 					     sizeof(user_exec_list[i].offset));
1947 			if (ret) {
1948 				ret = -EFAULT;
1949 				DRM_DEBUG("failed to copy %d exec entries "
1950 					  "back to user (%d)\n",
1951 					  args->buffer_count, ret);
1952 				break;
1953 			}
1954 		}
1955 	}
1956 
1957 	drm_free_large(exec_list);
1958 	drm_free_large(exec2_list);
1959 	return ret;
1960 }
1961 
1962 int
1963 i915_gem_execbuffer2(struct drm_device *dev, void *data,
1964 		     struct drm_file *file)
1965 {
1966 	struct drm_i915_gem_execbuffer2 *args = data;
1967 	struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1968 	int ret;
1969 
1970 	if (args->buffer_count < 1 ||
1971 	    args->buffer_count > UINT_MAX / sizeof(*exec2_list)) {
1972 		DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count);
1973 		return -EINVAL;
1974 	}
1975 
1976 	if (args->rsvd2 != 0) {
1977 		DRM_DEBUG("dirty rvsd2 field\n");
1978 		return -EINVAL;
1979 	}
1980 
1981 	exec2_list = drm_malloc_gfp(args->buffer_count,
1982 				    sizeof(*exec2_list),
1983 				    GFP_TEMPORARY);
1984 	if (exec2_list == NULL) {
1985 		DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
1986 			  args->buffer_count);
1987 		return -ENOMEM;
1988 	}
1989 	ret = copy_from_user(exec2_list,
1990 			     u64_to_user_ptr(args->buffers_ptr),
1991 			     sizeof(*exec2_list) * args->buffer_count);
1992 	if (ret != 0) {
1993 		DRM_DEBUG("copy %d exec entries failed %d\n",
1994 			  args->buffer_count, ret);
1995 		drm_free_large(exec2_list);
1996 		return -EFAULT;
1997 	}
1998 
1999 	ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list);
2000 	if (!ret) {
2001 		/* Copy the new buffer offsets back to the user's exec list. */
2002 		struct drm_i915_gem_exec_object2 __user *user_exec_list =
2003 				   u64_to_user_ptr(args->buffers_ptr);
2004 		int i;
2005 
2006 		for (i = 0; i < args->buffer_count; i++) {
2007 			exec2_list[i].offset =
2008 				gen8_canonical_addr(exec2_list[i].offset);
2009 			ret = __copy_to_user(&user_exec_list[i].offset,
2010 					     &exec2_list[i].offset,
2011 					     sizeof(user_exec_list[i].offset));
2012 			if (ret) {
2013 				ret = -EFAULT;
2014 				DRM_DEBUG("failed to copy %d exec entries "
2015 					  "back to user\n",
2016 					  args->buffer_count);
2017 				break;
2018 			}
2019 		}
2020 	}
2021 
2022 	drm_free_large(exec2_list);
2023 	return ret;
2024 }
2025