1 /*
2  * Copyright © 2008,2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *    Chris Wilson <chris@chris-wilson.co.uk>
26  *
27  */
28 
29 #include <drm/drmP.h>
30 #include <drm/i915_drm.h>
31 #include "i915_drv.h"
32 #include "i915_trace.h"
33 #include "intel_drv.h"
34 #include <linux/pagemap.h>
35 
36 #define  __EXEC_OBJECT_HAS_PIN (1<<31)
37 #define  __EXEC_OBJECT_HAS_FENCE (1<<30)
38 
39 struct eb_vmas {
40 	struct list_head vmas;
41 	int and;
42 	union {
43 		struct i915_vma *lut[0];
44 		struct hlist_head buckets[0];
45 	};
46 };
47 
48 static struct eb_vmas *
49 eb_create(struct drm_i915_gem_execbuffer2 *args)
50 {
51 	struct eb_vmas *eb = NULL;
52 
53 	if (args->flags & I915_EXEC_HANDLE_LUT) {
54 		unsigned size = args->buffer_count;
55 		size *= sizeof(struct i915_vma *);
56 		size += sizeof(struct eb_vmas);
57 		eb = kmalloc(size, M_DRM, M_NOWAIT);
58 	}
59 
60 	if (eb == NULL) {
61 		unsigned size = args->buffer_count;
62 		unsigned count = PAGE_SIZE / sizeof(struct hlist_head) / 2;
63 		BUILD_BUG_ON_NOT_POWER_OF_2(PAGE_SIZE / sizeof(struct hlist_head));
64 		while (count > 2*size)
65 			count >>= 1;
66 		eb = kzalloc(count*sizeof(struct hlist_head) +
67 			     sizeof(struct eb_vmas),
68 			     GFP_TEMPORARY);
69 		if (eb == NULL)
70 			return eb;
71 
72 		eb->and = count - 1;
73 	} else
74 		eb->and = -args->buffer_count;
75 
76 	INIT_LIST_HEAD(&eb->vmas);
77 	return eb;
78 }
79 
80 static void
81 eb_reset(struct eb_vmas *eb)
82 {
83 	if (eb->and >= 0)
84 		memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head));
85 }
86 
87 static int
88 eb_lookup_vmas(struct eb_vmas *eb,
89 	       struct drm_i915_gem_exec_object2 *exec,
90 	       const struct drm_i915_gem_execbuffer2 *args,
91 	       struct i915_address_space *vm,
92 	       struct drm_file *file)
93 {
94 	struct drm_i915_gem_object *obj;
95 	struct list_head objects;
96 	int i, ret;
97 
98 	INIT_LIST_HEAD(&objects);
99 	lockmgr(&file->table_lock, LK_EXCLUSIVE);
100 	/* Grab a reference to the object and release the lock so we can lookup
101 	 * or create the VMA without using GFP_ATOMIC */
102 	for (i = 0; i < args->buffer_count; i++) {
103 		obj = to_intel_bo(idr_find(&file->object_idr, exec[i].handle));
104 		if (obj == NULL) {
105 			lockmgr(&file->table_lock, LK_RELEASE);
106 			DRM_DEBUG("Invalid object handle %d at index %d\n",
107 				   exec[i].handle, i);
108 			ret = -ENOENT;
109 			goto err;
110 		}
111 
112 		if (!list_empty(&obj->obj_exec_link)) {
113 			lockmgr(&file->table_lock, LK_RELEASE);
114 			DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n",
115 				   obj, exec[i].handle, i);
116 			ret = -EINVAL;
117 			goto err;
118 		}
119 
120 		drm_gem_object_reference(&obj->base);
121 		list_add_tail(&obj->obj_exec_link, &objects);
122 	}
123 	lockmgr(&file->table_lock, LK_RELEASE);
124 
125 	i = 0;
126 	while (!list_empty(&objects)) {
127 		struct i915_vma *vma;
128 
129 		obj = list_first_entry(&objects,
130 				       struct drm_i915_gem_object,
131 				       obj_exec_link);
132 
133 		/*
134 		 * NOTE: We can leak any vmas created here when something fails
135 		 * later on. But that's no issue since vma_unbind can deal with
136 		 * vmas which are not actually bound. And since only
137 		 * lookup_or_create exists as an interface to get at the vma
138 		 * from the (obj, vm) we don't run the risk of creating
139 		 * duplicated vmas for the same vm.
140 		 */
141 		vma = i915_gem_obj_lookup_or_create_vma(obj, vm);
142 		if (IS_ERR(vma)) {
143 			DRM_DEBUG("Failed to lookup VMA\n");
144 			ret = PTR_ERR(vma);
145 			goto err;
146 		}
147 
148 		/* Transfer ownership from the objects list to the vmas list. */
149 		list_add_tail(&vma->exec_list, &eb->vmas);
150 		list_del_init(&obj->obj_exec_link);
151 
152 		vma->exec_entry = &exec[i];
153 		if (eb->and < 0) {
154 			eb->lut[i] = vma;
155 		} else {
156 			uint32_t handle = args->flags & I915_EXEC_HANDLE_LUT ? i : exec[i].handle;
157 			vma->exec_handle = handle;
158 			hlist_add_head(&vma->exec_node,
159 				       &eb->buckets[handle & eb->and]);
160 		}
161 		++i;
162 	}
163 
164 	return 0;
165 
166 
167 err:
168 	while (!list_empty(&objects)) {
169 		obj = list_first_entry(&objects,
170 				       struct drm_i915_gem_object,
171 				       obj_exec_link);
172 		list_del_init(&obj->obj_exec_link);
173 		drm_gem_object_unreference(&obj->base);
174 	}
175 	/*
176 	 * Objects already transfered to the vmas list will be unreferenced by
177 	 * eb_destroy.
178 	 */
179 
180 	return ret;
181 }
182 
183 static struct i915_vma *eb_get_vma(struct eb_vmas *eb, unsigned long handle)
184 {
185 	if (eb->and < 0) {
186 		if (handle >= -eb->and)
187 			return NULL;
188 		return eb->lut[handle];
189 	} else {
190 		struct hlist_head *head;
191 		struct hlist_node *node;
192 
193 		head = &eb->buckets[handle & eb->and];
194 		hlist_for_each(node, head) {
195 			struct i915_vma *vma;
196 
197 			vma = hlist_entry(node, struct i915_vma, exec_node);
198 			if (vma->exec_handle == handle)
199 				return vma;
200 		}
201 		return NULL;
202 	}
203 }
204 
205 static void
206 i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma)
207 {
208 	struct drm_i915_gem_exec_object2 *entry;
209 	struct drm_i915_gem_object *obj = vma->obj;
210 
211 	if (!drm_mm_node_allocated(&vma->node))
212 		return;
213 
214 	entry = vma->exec_entry;
215 
216 	if (entry->flags & __EXEC_OBJECT_HAS_FENCE)
217 		i915_gem_object_unpin_fence(obj);
218 
219 	if (entry->flags & __EXEC_OBJECT_HAS_PIN)
220 		i915_gem_object_unpin(obj);
221 
222 	entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN);
223 }
224 
225 static void eb_destroy(struct eb_vmas *eb)
226 {
227 	while (!list_empty(&eb->vmas)) {
228 		struct i915_vma *vma;
229 
230 		vma = list_first_entry(&eb->vmas,
231 				       struct i915_vma,
232 				       exec_list);
233 		list_del_init(&vma->exec_list);
234 		i915_gem_execbuffer_unreserve_vma(vma);
235 		drm_gem_object_unreference(&vma->obj->base);
236 	}
237 	kfree(eb);
238 }
239 
240 static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)
241 {
242 	return (HAS_LLC(obj->base.dev) ||
243 		obj->base.write_domain == I915_GEM_DOMAIN_CPU ||
244 		!obj->map_and_fenceable ||
245 		obj->cache_level != I915_CACHE_NONE);
246 }
247 
248 static int
249 relocate_entry_cpu(struct drm_i915_gem_object *obj,
250 		   struct drm_i915_gem_relocation_entry *reloc)
251 {
252 	struct drm_device *dev = obj->base.dev;
253 	uint32_t page_offset = offset_in_page(reloc->offset);
254 	char *vaddr;
255 	int ret;
256 
257 	ret = i915_gem_object_set_to_cpu_domain(obj, true);
258 	if (ret)
259 		return ret;
260 
261 	vaddr = kmap_atomic(i915_gem_object_get_page(obj,
262 				reloc->offset >> PAGE_SHIFT));
263 	*(uint32_t *)(vaddr + page_offset) = reloc->delta;
264 
265 	if (INTEL_INFO(dev)->gen >= 8) {
266 		page_offset = offset_in_page(page_offset + sizeof(uint32_t));
267 
268 		if (page_offset == 0) {
269 			kunmap_atomic(vaddr);
270 			vaddr = kmap_atomic(i915_gem_object_get_page(obj,
271 			    (reloc->offset + sizeof(uint32_t)) >> PAGE_SHIFT));
272 		}
273 
274 		*(uint32_t *)(vaddr + page_offset) = 0;
275 	}
276 
277 	kunmap_atomic(vaddr);
278 
279 	return 0;
280 }
281 
282 static int
283 relocate_entry_gtt(struct drm_i915_gem_object *obj,
284 		   struct drm_i915_gem_relocation_entry *reloc)
285 {
286 	struct drm_device *dev = obj->base.dev;
287 	uint32_t __iomem *reloc_entry;
288 	void __iomem *reloc_page;
289 	int ret;
290 
291 	ret = i915_gem_object_set_to_gtt_domain(obj, true);
292 	if (ret)
293 		return ret;
294 
295 	ret = i915_gem_object_put_fence(obj);
296 	if (ret)
297 		return ret;
298 
299 	/* Map the page containing the relocation we're going to perform.  */
300 	reloc->offset += i915_gem_obj_ggtt_offset(obj);
301 	reloc_page = pmap_mapdev_attr(dev->agp->base + (reloc->offset &
302 		    ~PAGE_MASK), PAGE_SIZE, PAT_WRITE_COMBINING);
303 	reloc_entry = (uint32_t __iomem *)
304 		((char *)reloc_page + offset_in_page(reloc->offset));
305 	iowrite32(reloc->delta, reloc_entry);
306 
307 	if (INTEL_INFO(dev)->gen >= 8) {
308 		reloc_entry += 1;
309 
310 		if (offset_in_page(reloc->offset + sizeof(uint32_t)) == 0) {
311 			pmap_unmapdev((vm_offset_t)reloc_page, PAGE_SIZE);
312 			reloc_page = pmap_mapdev_attr(
313 					dev->agp->base +
314 					reloc->offset + sizeof(uint32_t),
315 					PAGE_SIZE, PAT_WRITE_COMBINING);
316 			reloc_entry = reloc_page;
317 		}
318 
319 		iowrite32(0, reloc_entry);
320 	}
321 
322 	pmap_unmapdev((vm_offset_t)reloc_page, PAGE_SIZE);
323 
324 	return 0;
325 }
326 
327 static int
328 i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
329 				   struct eb_vmas *eb,
330 				   struct drm_i915_gem_relocation_entry *reloc,
331 				   struct i915_address_space *vm)
332 {
333 	struct drm_device *dev = obj->base.dev;
334 	struct drm_gem_object *target_obj;
335 	struct drm_i915_gem_object *target_i915_obj;
336 	struct i915_vma *target_vma;
337 	uint32_t target_offset;
338 	int ret;
339 
340 	/* we've already hold a reference to all valid objects */
341 	target_vma = eb_get_vma(eb, reloc->target_handle);
342 	if (unlikely(target_vma == NULL))
343 		return -ENOENT;
344 	target_i915_obj = target_vma->obj;
345 	target_obj = &target_vma->obj->base;
346 
347 	target_offset = target_vma->node.start;
348 
349 	/* Sandybridge PPGTT errata: We need a global gtt mapping for MI and
350 	 * pipe_control writes because the gpu doesn't properly redirect them
351 	 * through the ppgtt for non_secure batchbuffers. */
352 	if (unlikely(IS_GEN6(dev) &&
353 	    reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION &&
354 	    !target_i915_obj->has_global_gtt_mapping)) {
355 		i915_gem_gtt_bind_object(target_i915_obj,
356 					 target_i915_obj->cache_level);
357 	}
358 
359 	/* Validate that the target is in a valid r/w GPU domain */
360 	if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) {
361 		DRM_DEBUG("reloc with multiple write domains: "
362 			  "obj %p target %d offset %d "
363 			  "read %08x write %08x",
364 			  obj, reloc->target_handle,
365 			  (int) reloc->offset,
366 			  reloc->read_domains,
367 			  reloc->write_domain);
368 		return -EINVAL;
369 	}
370 	if (unlikely((reloc->write_domain | reloc->read_domains)
371 		     & ~I915_GEM_GPU_DOMAINS)) {
372 		DRM_DEBUG("reloc with read/write non-GPU domains: "
373 			  "obj %p target %d offset %d "
374 			  "read %08x write %08x",
375 			  obj, reloc->target_handle,
376 			  (int) reloc->offset,
377 			  reloc->read_domains,
378 			  reloc->write_domain);
379 		return -EINVAL;
380 	}
381 
382 	target_obj->pending_read_domains |= reloc->read_domains;
383 	target_obj->pending_write_domain |= reloc->write_domain;
384 
385 	/* If the relocation already has the right value in it, no
386 	 * more work needs to be done.
387 	 */
388 	if (target_offset == reloc->presumed_offset)
389 		return 0;
390 
391 	/* Check that the relocation address is valid... */
392 	if (unlikely(reloc->offset >
393 		obj->base.size - (INTEL_INFO(dev)->gen >= 8 ? 8 : 4))) {
394 		DRM_DEBUG("Relocation beyond object bounds: "
395 			  "obj %p target %d offset %d size %d.\n",
396 			  obj, reloc->target_handle,
397 			  (int) reloc->offset,
398 			  (int) obj->base.size);
399 		return -EINVAL;
400 	}
401 	if (unlikely(reloc->offset & 3)) {
402 		DRM_DEBUG("Relocation not 4-byte aligned: "
403 			  "obj %p target %d offset %d.\n",
404 			  obj, reloc->target_handle,
405 			  (int) reloc->offset);
406 		return -EINVAL;
407 	}
408 
409 	/* We can't wait for rendering with pagefaults disabled */
410 	if (obj->active && (curthread->td_flags & TDF_NOFAULT))
411 		return -EFAULT;
412 
413 	reloc->delta += target_offset;
414 	if (use_cpu_reloc(obj))
415 		ret = relocate_entry_cpu(obj, reloc);
416 	else
417 		ret = relocate_entry_gtt(obj, reloc);
418 
419 	if (ret)
420 		return ret;
421 
422 	/* and update the user's relocation entry */
423 	reloc->presumed_offset = target_offset;
424 
425 	return 0;
426 }
427 
428 static int
429 i915_gem_execbuffer_relocate_vma(struct i915_vma *vma,
430 				 struct eb_vmas *eb)
431 {
432 #define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
433 	struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)];
434 	struct drm_i915_gem_relocation_entry __user *user_relocs;
435 	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
436 	int remain, ret;
437 
438 	user_relocs = to_user_ptr(entry->relocs_ptr);
439 
440 	remain = entry->relocation_count;
441 	while (remain) {
442 		struct drm_i915_gem_relocation_entry *r = stack_reloc;
443 		int count = remain;
444 		if (count > ARRAY_SIZE(stack_reloc))
445 			count = ARRAY_SIZE(stack_reloc);
446 		remain -= count;
447 
448 		if (__copy_from_user_inatomic(r, user_relocs, count*sizeof(r[0])))
449 			return -EFAULT;
450 
451 		do {
452 			u64 offset = r->presumed_offset;
453 
454 			ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, r,
455 								 vma->vm);
456 			if (ret)
457 				return ret;
458 
459 			if (r->presumed_offset != offset &&
460 			    __copy_to_user_inatomic(&user_relocs->presumed_offset,
461 						    &r->presumed_offset,
462 						    sizeof(r->presumed_offset))) {
463 				return -EFAULT;
464 			}
465 
466 			user_relocs++;
467 			r++;
468 		} while (--count);
469 	}
470 
471 	return 0;
472 #undef N_RELOC
473 }
474 
475 static int
476 i915_gem_execbuffer_relocate_vma_slow(struct i915_vma *vma,
477 				      struct eb_vmas *eb,
478 				      struct drm_i915_gem_relocation_entry *relocs)
479 {
480 	const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
481 	int i, ret;
482 
483 	for (i = 0; i < entry->relocation_count; i++) {
484 		ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, &relocs[i],
485 							 vma->vm);
486 		if (ret)
487 			return ret;
488 	}
489 
490 	return 0;
491 }
492 
493 static int
494 i915_gem_execbuffer_relocate(struct eb_vmas *eb)
495 {
496 	struct i915_vma *vma;
497 	int ret = 0;
498 
499 	/* This is the fast path and we cannot handle a pagefault whilst
500 	 * holding the struct mutex lest the user pass in the relocations
501 	 * contained within a mmaped bo. For in such a case we, the page
502 	 * fault handler would call i915_gem_fault() and we would try to
503 	 * acquire the struct mutex again. Obviously this is bad and so
504 	 * lockdep complains vehemently.
505 	 */
506 	pagefault_disable();
507 	list_for_each_entry(vma, &eb->vmas, exec_list) {
508 		ret = i915_gem_execbuffer_relocate_vma(vma, eb);
509 		if (ret)
510 			break;
511 	}
512 	pagefault_enable();
513 
514 	return ret;
515 }
516 
517 static int
518 need_reloc_mappable(struct i915_vma *vma)
519 {
520 	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
521 	return entry->relocation_count && !use_cpu_reloc(vma->obj) &&
522 		i915_is_ggtt(vma->vm);
523 }
524 
525 static int
526 i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
527 				struct intel_ring_buffer *ring,
528 				bool *need_reloc)
529 {
530 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
531 	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
532 	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
533 	bool need_fence, need_mappable;
534 	struct drm_i915_gem_object *obj = vma->obj;
535 	int ret;
536 
537 	need_fence =
538 		has_fenced_gpu_access &&
539 		entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
540 		obj->tiling_mode != I915_TILING_NONE;
541 	need_mappable = need_fence || need_reloc_mappable(vma);
542 
543 	ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, need_mappable,
544 				  false);
545 	if (ret)
546 		return ret;
547 
548 	entry->flags |= __EXEC_OBJECT_HAS_PIN;
549 
550 	if (has_fenced_gpu_access) {
551 		if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
552 			ret = i915_gem_object_get_fence(obj);
553 			if (ret)
554 				return ret;
555 
556 			if (i915_gem_object_pin_fence(obj))
557 				entry->flags |= __EXEC_OBJECT_HAS_FENCE;
558 
559 			obj->pending_fenced_gpu_access = true;
560 		}
561 	}
562 
563 	/* Ensure ppgtt mapping exists if needed */
564 	if (dev_priv->mm.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) {
565 		i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt,
566 				       obj, obj->cache_level);
567 
568 		obj->has_aliasing_ppgtt_mapping = 1;
569 	}
570 
571 	if (entry->offset != vma->node.start) {
572 		entry->offset = vma->node.start;
573 		*need_reloc = true;
574 	}
575 
576 	if (entry->flags & EXEC_OBJECT_WRITE) {
577 		obj->base.pending_read_domains = I915_GEM_DOMAIN_RENDER;
578 		obj->base.pending_write_domain = I915_GEM_DOMAIN_RENDER;
579 	}
580 
581 	if (entry->flags & EXEC_OBJECT_NEEDS_GTT &&
582 	    !obj->has_global_gtt_mapping)
583 		i915_gem_gtt_bind_object(obj, obj->cache_level);
584 
585 	return 0;
586 }
587 
588 static int
589 i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
590 			    struct list_head *vmas,
591 			    bool *need_relocs)
592 {
593 	struct drm_i915_gem_object *obj;
594 	struct i915_vma *vma;
595 	struct i915_address_space *vm;
596 	struct list_head ordered_vmas;
597 	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
598 	int retry;
599 
600 	if (list_empty(vmas))
601 		return 0;
602 
603 	vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm;
604 
605 	INIT_LIST_HEAD(&ordered_vmas);
606 	while (!list_empty(vmas)) {
607 		struct drm_i915_gem_exec_object2 *entry;
608 		bool need_fence, need_mappable;
609 
610 		vma = list_first_entry(vmas, struct i915_vma, exec_list);
611 		obj = vma->obj;
612 		entry = vma->exec_entry;
613 
614 		need_fence =
615 			has_fenced_gpu_access &&
616 			entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
617 			obj->tiling_mode != I915_TILING_NONE;
618 		need_mappable = need_fence || need_reloc_mappable(vma);
619 
620 		if (need_mappable)
621 			list_move(&vma->exec_list, &ordered_vmas);
622 		else
623 			list_move_tail(&vma->exec_list, &ordered_vmas);
624 
625 		obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND;
626 		obj->base.pending_write_domain = 0;
627 		obj->pending_fenced_gpu_access = false;
628 	}
629 	list_splice(&ordered_vmas, vmas);
630 
631 	/* Attempt to pin all of the buffers into the GTT.
632 	 * This is done in 3 phases:
633 	 *
634 	 * 1a. Unbind all objects that do not match the GTT constraints for
635 	 *     the execbuffer (fenceable, mappable, alignment etc).
636 	 * 1b. Increment pin count for already bound objects.
637 	 * 2.  Bind new objects.
638 	 * 3.  Decrement pin count.
639 	 *
640 	 * This avoid unnecessary unbinding of later objects in order to make
641 	 * room for the earlier objects *unless* we need to defragment.
642 	 */
643 	retry = 0;
644 	do {
645 		int ret = 0;
646 
647 		/* Unbind any ill-fitting objects or pin. */
648 		list_for_each_entry(vma, vmas, exec_list) {
649 			struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
650 			bool need_fence, need_mappable;
651 
652 			obj = vma->obj;
653 
654 			if (!drm_mm_node_allocated(&vma->node))
655 				continue;
656 
657 			need_fence =
658 				has_fenced_gpu_access &&
659 				entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
660 				obj->tiling_mode != I915_TILING_NONE;
661 			need_mappable = need_fence || need_reloc_mappable(vma);
662 
663 			WARN_ON((need_mappable || need_fence) &&
664 			       !i915_is_ggtt(vma->vm));
665 
666 			if ((entry->alignment &&
667 			     vma->node.start & (entry->alignment - 1)) ||
668 			    (need_mappable && !obj->map_and_fenceable))
669 				ret = i915_vma_unbind(vma);
670 			else
671 				ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs);
672 			if (ret)
673 				goto err;
674 		}
675 
676 		/* Bind fresh objects */
677 		list_for_each_entry(vma, vmas, exec_list) {
678 			if (drm_mm_node_allocated(&vma->node))
679 				continue;
680 
681 			ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs);
682 			if (ret)
683 				goto err;
684 		}
685 
686 err:
687 		if (ret != -ENOSPC || retry++)
688 			return ret;
689 
690 		/* Decrement pin count for bound objects */
691 		list_for_each_entry(vma, vmas, exec_list)
692 			i915_gem_execbuffer_unreserve_vma(vma);
693 
694 		ret = i915_gem_evict_vm(vm, true);
695 		if (ret)
696 			return ret;
697 	} while (1);
698 }
699 
700 static int
701 i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
702 				  struct drm_i915_gem_execbuffer2 *args,
703 				  struct drm_file *file,
704 				  struct intel_ring_buffer *ring,
705 				  struct eb_vmas *eb,
706 				  struct drm_i915_gem_exec_object2 *exec)
707 {
708 	struct drm_i915_gem_relocation_entry *reloc;
709 	struct i915_address_space *vm;
710 	struct i915_vma *vma;
711 	bool need_relocs;
712 	int *reloc_offset;
713 	int i, total, ret;
714 	unsigned count = args->buffer_count;
715 
716 	if (WARN_ON(list_empty(&eb->vmas)))
717 		return 0;
718 
719 	vm = list_first_entry(&eb->vmas, struct i915_vma, exec_list)->vm;
720 
721 	/* We may process another execbuffer during the unlock... */
722 	while (!list_empty(&eb->vmas)) {
723 		vma = list_first_entry(&eb->vmas, struct i915_vma, exec_list);
724 		list_del_init(&vma->exec_list);
725 		i915_gem_execbuffer_unreserve_vma(vma);
726 		drm_gem_object_unreference(&vma->obj->base);
727 	}
728 
729 	mutex_unlock(&dev->struct_mutex);
730 
731 	total = 0;
732 	for (i = 0; i < count; i++)
733 		total += exec[i].relocation_count;
734 
735 	reloc_offset = drm_malloc_ab(count, sizeof(*reloc_offset));
736 	reloc = drm_malloc_ab(total, sizeof(*reloc));
737 	if (reloc == NULL || reloc_offset == NULL) {
738 		drm_free_large(reloc);
739 		drm_free_large(reloc_offset);
740 		mutex_lock(&dev->struct_mutex);
741 		return -ENOMEM;
742 	}
743 
744 	total = 0;
745 	for (i = 0; i < count; i++) {
746 		struct drm_i915_gem_relocation_entry __user *user_relocs;
747 		u64 invalid_offset = (u64)-1;
748 		int j;
749 
750 		user_relocs = to_user_ptr(exec[i].relocs_ptr);
751 
752 		if (copy_from_user(reloc+total, user_relocs,
753 				   exec[i].relocation_count * sizeof(*reloc))) {
754 			ret = -EFAULT;
755 			mutex_lock(&dev->struct_mutex);
756 			goto err;
757 		}
758 
759 		/* As we do not update the known relocation offsets after
760 		 * relocating (due to the complexities in lock handling),
761 		 * we need to mark them as invalid now so that we force the
762 		 * relocation processing next time. Just in case the target
763 		 * object is evicted and then rebound into its old
764 		 * presumed_offset before the next execbuffer - if that
765 		 * happened we would make the mistake of assuming that the
766 		 * relocations were valid.
767 		 */
768 		for (j = 0; j < exec[i].relocation_count; j++) {
769 			if (copy_to_user(&user_relocs[j].presumed_offset,
770 					 &invalid_offset,
771 					 sizeof(invalid_offset))) {
772 				ret = -EFAULT;
773 				mutex_lock(&dev->struct_mutex);
774 				goto err;
775 			}
776 		}
777 
778 		reloc_offset[i] = total;
779 		total += exec[i].relocation_count;
780 	}
781 
782 	ret = i915_mutex_lock_interruptible(dev);
783 	if (ret) {
784 		mutex_lock(&dev->struct_mutex);
785 		goto err;
786 	}
787 
788 	/* reacquire the objects */
789 	eb_reset(eb);
790 	ret = eb_lookup_vmas(eb, exec, args, vm, file);
791 	if (ret)
792 		goto err;
793 
794 	need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
795 	ret = i915_gem_execbuffer_reserve(ring, &eb->vmas, &need_relocs);
796 	if (ret)
797 		goto err;
798 
799 	list_for_each_entry(vma, &eb->vmas, exec_list) {
800 		int offset = vma->exec_entry - exec;
801 		ret = i915_gem_execbuffer_relocate_vma_slow(vma, eb,
802 							    reloc + reloc_offset[offset]);
803 		if (ret)
804 			goto err;
805 	}
806 
807 	/* Leave the user relocations as are, this is the painfully slow path,
808 	 * and we want to avoid the complication of dropping the lock whilst
809 	 * having buffers reserved in the aperture and so causing spurious
810 	 * ENOSPC for random operations.
811 	 */
812 
813 err:
814 	drm_free_large(reloc);
815 	drm_free_large(reloc_offset);
816 	return ret;
817 }
818 
819 static int
820 i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
821 				struct list_head *vmas)
822 {
823 	struct i915_vma *vma;
824 	uint32_t flush_domains = 0;
825 	bool flush_chipset = false;
826 	int ret;
827 
828 	list_for_each_entry(vma, vmas, exec_list) {
829 		struct drm_i915_gem_object *obj = vma->obj;
830 		ret = i915_gem_object_sync(obj, ring);
831 		if (ret)
832 			return ret;
833 
834 		if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
835 			flush_chipset |= i915_gem_clflush_object(obj, false);
836 
837 		flush_domains |= obj->base.write_domain;
838 	}
839 
840 	if (flush_chipset)
841 		i915_gem_chipset_flush(ring->dev);
842 
843 	if (flush_domains & I915_GEM_DOMAIN_GTT)
844 		wmb();
845 
846 	/* Unconditionally invalidate gpu caches and ensure that we do flush
847 	 * any residual writes from the previous batch.
848 	 */
849 	return intel_ring_invalidate_all_caches(ring);
850 }
851 
852 static bool
853 i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
854 {
855 	if (exec->flags & __I915_EXEC_UNKNOWN_FLAGS)
856 		return false;
857 
858 	return ((exec->batch_start_offset | exec->batch_len) & 0x7) == 0;
859 }
860 
861 static int
862 validate_exec_list(struct drm_i915_gem_exec_object2 *exec,
863 		   int count)
864 {
865 	int i;
866 	unsigned relocs_total = 0;
867 	unsigned relocs_max = UINT_MAX / sizeof(struct drm_i915_gem_relocation_entry);
868 
869 	for (i = 0; i < count; i++) {
870 		char __user *ptr = to_user_ptr(exec[i].relocs_ptr);
871 		int length; /* limited by fault_in_pages_readable() */
872 
873 		if (exec[i].flags & __EXEC_OBJECT_UNKNOWN_FLAGS)
874 			return -EINVAL;
875 
876 		/* First check for malicious input causing overflow in
877 		 * the worst case where we need to allocate the entire
878 		 * relocation tree as a single array.
879 		 */
880 		if (exec[i].relocation_count > relocs_max - relocs_total)
881 			return -EINVAL;
882 		relocs_total += exec[i].relocation_count;
883 
884 		length = exec[i].relocation_count *
885 			sizeof(struct drm_i915_gem_relocation_entry);
886 		/*
887 		 * We must check that the entire relocation array is safe
888 		 * to read, but since we may need to update the presumed
889 		 * offsets during execution, check for full write access.
890 		 */
891 #if 0
892 		if (!access_ok(VERIFY_WRITE, ptr, length))
893 			return -EFAULT;
894 #endif
895 
896 		if (likely(!i915_prefault_disable)) {
897 			if (fault_in_multipages_readable(ptr, length))
898 				return -EFAULT;
899 		}
900 	}
901 
902 	return 0;
903 }
904 
905 static int
906 i915_gem_validate_context(struct drm_device *dev, struct drm_file *file,
907 			  const u32 ctx_id)
908 {
909 	struct i915_ctx_hang_stats *hs;
910 
911 	hs = i915_gem_context_get_hang_stats(dev, file, ctx_id);
912 	if (IS_ERR(hs))
913 		return PTR_ERR(hs);
914 
915 	if (hs->banned) {
916 		DRM_DEBUG("Context %u tried to submit while banned\n", ctx_id);
917 		return -EIO;
918 	}
919 
920 	return 0;
921 }
922 
923 static void
924 i915_gem_execbuffer_move_to_active(struct list_head *vmas,
925 				   struct intel_ring_buffer *ring)
926 {
927 	struct i915_vma *vma;
928 
929 	list_for_each_entry(vma, vmas, exec_list) {
930 		struct drm_i915_gem_object *obj = vma->obj;
931 		u32 old_read = obj->base.read_domains;
932 		u32 old_write = obj->base.write_domain;
933 
934 		obj->base.write_domain = obj->base.pending_write_domain;
935 		if (obj->base.write_domain == 0)
936 			obj->base.pending_read_domains |= obj->base.read_domains;
937 		obj->base.read_domains = obj->base.pending_read_domains;
938 		obj->fenced_gpu_access = obj->pending_fenced_gpu_access;
939 
940 		i915_vma_move_to_active(vma, ring);
941 		if (obj->base.write_domain) {
942 			obj->dirty = 1;
943 			obj->last_write_seqno = intel_ring_get_seqno(ring);
944 			if (obj->pin_count) /* check for potential scanout */
945 				intel_mark_fb_busy(obj, ring);
946 		}
947 
948 		trace_i915_gem_object_change_domain(obj, old_read, old_write);
949 	}
950 }
951 
952 static void
953 i915_gem_execbuffer_retire_commands(struct drm_device *dev,
954 				    struct drm_file *file,
955 				    struct intel_ring_buffer *ring,
956 				    struct drm_i915_gem_object *obj)
957 {
958 	/* Unconditionally force add_request to emit a full flush. */
959 	ring->gpu_caches_dirty = true;
960 
961 	/* Add a breadcrumb for the completion of the batch buffer */
962 	(void)__i915_add_request(ring, file, obj, NULL);
963 }
964 
965 static int
966 i915_reset_gen7_sol_offsets(struct drm_device *dev,
967 			    struct intel_ring_buffer *ring)
968 {
969 	drm_i915_private_t *dev_priv = dev->dev_private;
970 	int ret, i;
971 
972 	if (!IS_GEN7(dev) || ring != &dev_priv->ring[RCS])
973 		return 0;
974 
975 	ret = intel_ring_begin(ring, 4 * 3);
976 	if (ret)
977 		return ret;
978 
979 	for (i = 0; i < 4; i++) {
980 		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
981 		intel_ring_emit(ring, GEN7_SO_WRITE_OFFSET(i));
982 		intel_ring_emit(ring, 0);
983 	}
984 
985 	intel_ring_advance(ring);
986 
987 	return 0;
988 }
989 
990 static int
991 i915_gem_do_execbuffer(struct drm_device *dev, void *data,
992 		       struct drm_file *file,
993 		       struct drm_i915_gem_execbuffer2 *args,
994 		       struct drm_i915_gem_exec_object2 *exec,
995 		       struct i915_address_space *vm)
996 {
997 	drm_i915_private_t *dev_priv = dev->dev_private;
998 	struct eb_vmas *eb;
999 	struct drm_i915_gem_object *batch_obj;
1000 	struct drm_clip_rect *cliprects = NULL;
1001 	struct intel_ring_buffer *ring;
1002 	const u32 ctx_id = i915_execbuffer2_get_context_id(*args);
1003 	u32 exec_start, exec_len;
1004 	u32 mask, flags;
1005 	int ret, mode, i;
1006 	bool need_relocs;
1007 
1008 	if (!i915_gem_check_execbuffer(args))
1009 		return -EINVAL;
1010 
1011 	ret = validate_exec_list(exec, args->buffer_count);
1012 	if (ret)
1013 		return ret;
1014 
1015 	flags = 0;
1016 	if (args->flags & I915_EXEC_SECURE) {
1017 		flags |= I915_DISPATCH_SECURE;
1018 	}
1019 	if (args->flags & I915_EXEC_IS_PINNED)
1020 		flags |= I915_DISPATCH_PINNED;
1021 
1022 	switch (args->flags & I915_EXEC_RING_MASK) {
1023 	case I915_EXEC_DEFAULT:
1024 	case I915_EXEC_RENDER:
1025 		ring = &dev_priv->ring[RCS];
1026 		break;
1027 	case I915_EXEC_BSD:
1028 		ring = &dev_priv->ring[VCS];
1029 		if (ctx_id != DEFAULT_CONTEXT_ID) {
1030 			DRM_DEBUG("Ring %s doesn't support contexts\n",
1031 				  ring->name);
1032 			return -EPERM;
1033 		}
1034 		break;
1035 	case I915_EXEC_BLT:
1036 		ring = &dev_priv->ring[BCS];
1037 		if (ctx_id != DEFAULT_CONTEXT_ID) {
1038 			DRM_DEBUG("Ring %s doesn't support contexts\n",
1039 				  ring->name);
1040 			return -EPERM;
1041 		}
1042 		break;
1043 	case I915_EXEC_VEBOX:
1044 		ring = &dev_priv->ring[VECS];
1045 		if (ctx_id != DEFAULT_CONTEXT_ID) {
1046 			DRM_DEBUG("Ring %s doesn't support contexts\n",
1047 				  ring->name);
1048 			return -EPERM;
1049 		}
1050 		break;
1051 
1052 	default:
1053 		DRM_DEBUG("execbuf with unknown ring: %d\n",
1054 			  (int)(args->flags & I915_EXEC_RING_MASK));
1055 		return -EINVAL;
1056 	}
1057 	if (!intel_ring_initialized(ring)) {
1058 		DRM_DEBUG("execbuf with invalid ring: %d\n",
1059 			  (int)(args->flags & I915_EXEC_RING_MASK));
1060 		return -EINVAL;
1061 	}
1062 
1063 	mode = args->flags & I915_EXEC_CONSTANTS_MASK;
1064 	mask = I915_EXEC_CONSTANTS_MASK;
1065 	switch (mode) {
1066 	case I915_EXEC_CONSTANTS_REL_GENERAL:
1067 	case I915_EXEC_CONSTANTS_ABSOLUTE:
1068 	case I915_EXEC_CONSTANTS_REL_SURFACE:
1069 		if (ring == &dev_priv->ring[RCS] &&
1070 		    mode != dev_priv->relative_constants_mode) {
1071 			if (INTEL_INFO(dev)->gen < 4)
1072 				return -EINVAL;
1073 
1074 			if (INTEL_INFO(dev)->gen > 5 &&
1075 			    mode == I915_EXEC_CONSTANTS_REL_SURFACE)
1076 				return -EINVAL;
1077 
1078 			/* The HW changed the meaning on this bit on gen6 */
1079 			if (INTEL_INFO(dev)->gen >= 6)
1080 				mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE;
1081 		}
1082 		break;
1083 	default:
1084 		DRM_DEBUG("execbuf with unknown constants: %d\n", mode);
1085 		return -EINVAL;
1086 	}
1087 
1088 	if (args->buffer_count < 1) {
1089 		DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
1090 		return -EINVAL;
1091 	}
1092 
1093 	if (args->num_cliprects != 0) {
1094 		if (ring != &dev_priv->ring[RCS]) {
1095 			DRM_DEBUG("clip rectangles are only valid with the render ring\n");
1096 			return -EINVAL;
1097 		}
1098 
1099 		if (INTEL_INFO(dev)->gen >= 5) {
1100 			DRM_DEBUG("clip rectangles are only valid on pre-gen5\n");
1101 			return -EINVAL;
1102 		}
1103 
1104 		if (args->num_cliprects > UINT_MAX / sizeof(*cliprects)) {
1105 			DRM_DEBUG("execbuf with %u cliprects\n",
1106 				  args->num_cliprects);
1107 			return -EINVAL;
1108 		}
1109 
1110 		cliprects = kcalloc(args->num_cliprects,
1111 				    sizeof(*cliprects),
1112 				    GFP_KERNEL);
1113 		if (cliprects == NULL) {
1114 			ret = -ENOMEM;
1115 			goto pre_mutex_err;
1116 		}
1117 
1118 		if (copy_from_user(cliprects,
1119 				   to_user_ptr(args->cliprects_ptr),
1120 				   sizeof(*cliprects)*args->num_cliprects)) {
1121 			ret = -EFAULT;
1122 			goto pre_mutex_err;
1123 		}
1124 	}
1125 
1126 	intel_runtime_pm_get(dev_priv);
1127 
1128 	ret = i915_mutex_lock_interruptible(dev);
1129 	if (ret)
1130 		goto pre_mutex_err;
1131 
1132 	if (dev_priv->ums.mm_suspended) {
1133 		mutex_unlock(&dev->struct_mutex);
1134 		ret = -EBUSY;
1135 		goto pre_mutex_err;
1136 	}
1137 
1138 	ret = i915_gem_validate_context(dev, file, ctx_id);
1139 	if (ret) {
1140 		mutex_unlock(&dev->struct_mutex);
1141 		goto pre_mutex_err;
1142 	}
1143 
1144 	eb = eb_create(args);
1145 	if (eb == NULL) {
1146 		mutex_unlock(&dev->struct_mutex);
1147 		ret = -ENOMEM;
1148 		goto pre_mutex_err;
1149 	}
1150 
1151 	/* Look up object handles */
1152 	ret = eb_lookup_vmas(eb, exec, args, vm, file);
1153 	if (ret)
1154 		goto err;
1155 
1156 	/* take note of the batch buffer before we might reorder the lists */
1157 	batch_obj = list_entry(eb->vmas.prev, struct i915_vma, exec_list)->obj;
1158 
1159 	/* Move the objects en-masse into the GTT, evicting if necessary. */
1160 	need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
1161 	ret = i915_gem_execbuffer_reserve(ring, &eb->vmas, &need_relocs);
1162 	if (ret)
1163 		goto err;
1164 
1165 	/* The objects are in their final locations, apply the relocations. */
1166 	if (need_relocs)
1167 		ret = i915_gem_execbuffer_relocate(eb);
1168 	if (ret) {
1169 		if (ret == -EFAULT) {
1170 			ret = i915_gem_execbuffer_relocate_slow(dev, args, file, ring,
1171 								eb, exec);
1172 			BUG_ON(!mutex_is_locked(&dev->struct_mutex));
1173 		}
1174 		if (ret)
1175 			goto err;
1176 	}
1177 
1178 	/* Set the pending read domains for the batch buffer to COMMAND */
1179 	if (batch_obj->base.pending_write_domain) {
1180 		DRM_DEBUG("Attempting to use self-modifying batch buffer\n");
1181 		ret = -EINVAL;
1182 		goto err;
1183 	}
1184 	batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
1185 
1186 	/* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
1187 	 * batch" bit. Hence we need to pin secure batches into the global gtt.
1188 	 * hsw should have this fixed, but bdw mucks it up again. */
1189 	if (flags & I915_DISPATCH_SECURE && !batch_obj->has_global_gtt_mapping)
1190 		i915_gem_gtt_bind_object(batch_obj, batch_obj->cache_level);
1191 
1192 	ret = i915_gem_execbuffer_move_to_gpu(ring, &eb->vmas);
1193 	if (ret)
1194 		goto err;
1195 
1196 	ret = i915_switch_context(ring, file, ctx_id);
1197 	if (ret)
1198 		goto err;
1199 
1200 	if (ring == &dev_priv->ring[RCS] &&
1201 	    mode != dev_priv->relative_constants_mode) {
1202 		ret = intel_ring_begin(ring, 4);
1203 		if (ret)
1204 				goto err;
1205 
1206 		intel_ring_emit(ring, MI_NOOP);
1207 		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
1208 		intel_ring_emit(ring, INSTPM);
1209 		intel_ring_emit(ring, mask << 16 | mode);
1210 		intel_ring_advance(ring);
1211 
1212 		dev_priv->relative_constants_mode = mode;
1213 	}
1214 
1215 	if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
1216 		ret = i915_reset_gen7_sol_offsets(dev, ring);
1217 		if (ret)
1218 			goto err;
1219 	}
1220 
1221 	exec_start = i915_gem_obj_offset(batch_obj, vm) +
1222 		args->batch_start_offset;
1223 	exec_len = args->batch_len;
1224 	if (cliprects) {
1225 		for (i = 0; i < args->num_cliprects; i++) {
1226 			ret = i915_emit_box(dev, &cliprects[i],
1227 					    args->DR1, args->DR4);
1228 			if (ret)
1229 				goto err;
1230 
1231 			ret = ring->dispatch_execbuffer(ring,
1232 							exec_start, exec_len,
1233 							flags);
1234 			if (ret)
1235 				goto err;
1236 		}
1237 	} else {
1238 		ret = ring->dispatch_execbuffer(ring,
1239 						exec_start, exec_len,
1240 						flags);
1241 		if (ret)
1242 			goto err;
1243 	}
1244 
1245 	trace_i915_gem_ring_dispatch(ring, intel_ring_get_seqno(ring), flags);
1246 
1247 	i915_gem_execbuffer_move_to_active(&eb->vmas, ring);
1248 	i915_gem_execbuffer_retire_commands(dev, file, ring, batch_obj);
1249 
1250 err:
1251 	eb_destroy(eb);
1252 
1253 	mutex_unlock(&dev->struct_mutex);
1254 
1255 pre_mutex_err:
1256 	kfree(cliprects);
1257 
1258 	/* intel_gpu_busy should also get a ref, so it will free when the device
1259 	 * is really idle. */
1260 	intel_runtime_pm_put(dev_priv);
1261 	return ret;
1262 }
1263 
1264 /*
1265  * Legacy execbuffer just creates an exec2 list from the original exec object
1266  * list array and passes it to the real function.
1267  */
1268 int
1269 i915_gem_execbuffer(struct drm_device *dev, void *data,
1270 		    struct drm_file *file)
1271 {
1272 	struct drm_i915_private *dev_priv = dev->dev_private;
1273 	struct drm_i915_gem_execbuffer *args = data;
1274 	struct drm_i915_gem_execbuffer2 exec2;
1275 	struct drm_i915_gem_exec_object *exec_list = NULL;
1276 	struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1277 	int ret, i;
1278 
1279 	if (args->buffer_count < 1) {
1280 		DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
1281 		return -EINVAL;
1282 	}
1283 
1284 	/* Copy in the exec list from userland */
1285 	exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count);
1286 	exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count);
1287 	if (exec_list == NULL || exec2_list == NULL) {
1288 		DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
1289 			  args->buffer_count);
1290 		drm_free_large(exec_list);
1291 		drm_free_large(exec2_list);
1292 		return -ENOMEM;
1293 	}
1294 	ret = copy_from_user(exec_list,
1295 			     to_user_ptr(args->buffers_ptr),
1296 			     sizeof(*exec_list) * args->buffer_count);
1297 	if (ret != 0) {
1298 		DRM_DEBUG("copy %d exec entries failed %d\n",
1299 			  args->buffer_count, ret);
1300 		drm_free_large(exec_list);
1301 		drm_free_large(exec2_list);
1302 		return -EFAULT;
1303 	}
1304 
1305 	for (i = 0; i < args->buffer_count; i++) {
1306 		exec2_list[i].handle = exec_list[i].handle;
1307 		exec2_list[i].relocation_count = exec_list[i].relocation_count;
1308 		exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr;
1309 		exec2_list[i].alignment = exec_list[i].alignment;
1310 		exec2_list[i].offset = exec_list[i].offset;
1311 		if (INTEL_INFO(dev)->gen < 4)
1312 			exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE;
1313 		else
1314 			exec2_list[i].flags = 0;
1315 	}
1316 
1317 	exec2.buffers_ptr = args->buffers_ptr;
1318 	exec2.buffer_count = args->buffer_count;
1319 	exec2.batch_start_offset = args->batch_start_offset;
1320 	exec2.batch_len = args->batch_len;
1321 	exec2.DR1 = args->DR1;
1322 	exec2.DR4 = args->DR4;
1323 	exec2.num_cliprects = args->num_cliprects;
1324 	exec2.cliprects_ptr = args->cliprects_ptr;
1325 	exec2.flags = I915_EXEC_RENDER;
1326 	i915_execbuffer2_set_context_id(exec2, 0);
1327 
1328 	ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list,
1329 				     &dev_priv->gtt.base);
1330 	if (!ret) {
1331 		/* Copy the new buffer offsets back to the user's exec list. */
1332 		for (i = 0; i < args->buffer_count; i++)
1333 			exec_list[i].offset = exec2_list[i].offset;
1334 		/* ... and back out to userspace */
1335 		ret = copy_to_user(to_user_ptr(args->buffers_ptr),
1336 				   exec_list,
1337 				   sizeof(*exec_list) * args->buffer_count);
1338 		if (ret) {
1339 			ret = -EFAULT;
1340 			DRM_DEBUG("failed to copy %d exec entries "
1341 				  "back to user (%d)\n",
1342 				  args->buffer_count, ret);
1343 		}
1344 	}
1345 
1346 	drm_free_large(exec_list);
1347 	drm_free_large(exec2_list);
1348 	return ret;
1349 }
1350 
1351 int
1352 i915_gem_execbuffer2(struct drm_device *dev, void *data,
1353 		     struct drm_file *file)
1354 {
1355 	struct drm_i915_private *dev_priv = dev->dev_private;
1356 	struct drm_i915_gem_execbuffer2 *args = data;
1357 	struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1358 	int ret;
1359 
1360 	if (args->buffer_count < 1 ||
1361 	    args->buffer_count > UINT_MAX / sizeof(*exec2_list)) {
1362 		DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count);
1363 		return -EINVAL;
1364 	}
1365 
1366 	exec2_list = kmalloc(sizeof(*exec2_list)*args->buffer_count,
1367 			     M_DRM, M_NOWAIT);
1368 	if (exec2_list == NULL)
1369 		exec2_list = drm_malloc_ab(sizeof(*exec2_list),
1370 					   args->buffer_count);
1371 	if (exec2_list == NULL) {
1372 		DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
1373 			  args->buffer_count);
1374 		return -ENOMEM;
1375 	}
1376 	ret = copy_from_user(exec2_list,
1377 			     to_user_ptr(args->buffers_ptr),
1378 			     sizeof(*exec2_list) * args->buffer_count);
1379 	if (ret != 0) {
1380 		DRM_DEBUG("copy %d exec entries failed %d\n",
1381 			  args->buffer_count, ret);
1382 		drm_free_large(exec2_list);
1383 		return -EFAULT;
1384 	}
1385 
1386 	ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list,
1387 				     &dev_priv->gtt.base);
1388 	if (!ret) {
1389 		/* Copy the new buffer offsets back to the user's exec list. */
1390 		ret = copy_to_user(to_user_ptr(args->buffers_ptr),
1391 				   exec2_list,
1392 				   sizeof(*exec2_list) * args->buffer_count);
1393 		if (ret) {
1394 			ret = -EFAULT;
1395 			DRM_DEBUG("failed to copy %d exec entries "
1396 				  "back to user (%d)\n",
1397 				  args->buffer_count, ret);
1398 		}
1399 	}
1400 
1401 	drm_free_large(exec2_list);
1402 	return ret;
1403 }
1404