1 /*
2  * Copyright © 2008,2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *    Chris Wilson <chris@chris-wilson.co.uk>
26  *
27  */
28 
29 #include <drm/drmP.h>
30 #include <drm/i915_drm.h>
31 #include "i915_drv.h"
32 #include "i915_trace.h"
33 #include "intel_drv.h"
34 #include <linux/pagemap.h>
35 
36 #define  __EXEC_OBJECT_HAS_PIN (1<<31)
37 #define  __EXEC_OBJECT_HAS_FENCE (1<<30)
38 #define  __EXEC_OBJECT_NEEDS_MAP (1<<29)
39 #define  __EXEC_OBJECT_NEEDS_BIAS (1<<28)
40 
41 #define BATCH_OFFSET_BIAS (256*1024)
42 
43 struct eb_vmas {
44 	struct list_head vmas;
45 	int and;
46 	union {
47 		struct i915_vma *lut[0];
48 		struct hlist_head buckets[0];
49 	};
50 };
51 
52 static struct eb_vmas *
53 eb_create(struct drm_i915_gem_execbuffer2 *args)
54 {
55 	struct eb_vmas *eb = NULL;
56 
57 	if (args->flags & I915_EXEC_HANDLE_LUT) {
58 		unsigned size = args->buffer_count;
59 		size *= sizeof(struct i915_vma *);
60 		size += sizeof(struct eb_vmas);
61 		eb = kmalloc(size, M_DRM, M_NOWAIT);
62 	}
63 
64 	if (eb == NULL) {
65 		unsigned size = args->buffer_count;
66 		unsigned count = PAGE_SIZE / sizeof(struct hlist_head) / 2;
67 		BUILD_BUG_ON_NOT_POWER_OF_2(PAGE_SIZE / sizeof(struct hlist_head));
68 		while (count > 2*size)
69 			count >>= 1;
70 		eb = kzalloc(count*sizeof(struct hlist_head) +
71 			     sizeof(struct eb_vmas),
72 			     GFP_TEMPORARY);
73 		if (eb == NULL)
74 			return eb;
75 
76 		eb->and = count - 1;
77 	} else
78 		eb->and = -args->buffer_count;
79 
80 	INIT_LIST_HEAD(&eb->vmas);
81 	return eb;
82 }
83 
84 static void
85 eb_reset(struct eb_vmas *eb)
86 {
87 	if (eb->and >= 0)
88 		memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head));
89 }
90 
91 static int
92 eb_lookup_vmas(struct eb_vmas *eb,
93 	       struct drm_i915_gem_exec_object2 *exec,
94 	       const struct drm_i915_gem_execbuffer2 *args,
95 	       struct i915_address_space *vm,
96 	       struct drm_file *file)
97 {
98 	struct drm_i915_gem_object *obj;
99 	struct list_head objects;
100 	int i, ret;
101 
102 	INIT_LIST_HEAD(&objects);
103 	lockmgr(&file->table_lock, LK_EXCLUSIVE);
104 	/* Grab a reference to the object and release the lock so we can lookup
105 	 * or create the VMA without using GFP_ATOMIC */
106 	for (i = 0; i < args->buffer_count; i++) {
107 		obj = to_intel_bo(idr_find(&file->object_idr, exec[i].handle));
108 		if (obj == NULL) {
109 			lockmgr(&file->table_lock, LK_RELEASE);
110 			DRM_DEBUG("Invalid object handle %d at index %d\n",
111 				   exec[i].handle, i);
112 			ret = -ENOENT;
113 			goto err;
114 		}
115 
116 		if (!list_empty(&obj->obj_exec_link)) {
117 			lockmgr(&file->table_lock, LK_RELEASE);
118 			DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n",
119 				   obj, exec[i].handle, i);
120 			ret = -EINVAL;
121 			goto err;
122 		}
123 
124 		drm_gem_object_reference(&obj->base);
125 		list_add_tail(&obj->obj_exec_link, &objects);
126 	}
127 	lockmgr(&file->table_lock, LK_RELEASE);
128 
129 	i = 0;
130 	while (!list_empty(&objects)) {
131 		struct i915_vma *vma;
132 
133 		obj = list_first_entry(&objects,
134 				       struct drm_i915_gem_object,
135 				       obj_exec_link);
136 
137 		/*
138 		 * NOTE: We can leak any vmas created here when something fails
139 		 * later on. But that's no issue since vma_unbind can deal with
140 		 * vmas which are not actually bound. And since only
141 		 * lookup_or_create exists as an interface to get at the vma
142 		 * from the (obj, vm) we don't run the risk of creating
143 		 * duplicated vmas for the same vm.
144 		 */
145 		vma = i915_gem_obj_lookup_or_create_vma(obj, vm);
146 		if (IS_ERR(vma)) {
147 			DRM_DEBUG("Failed to lookup VMA\n");
148 			ret = PTR_ERR(vma);
149 			goto err;
150 		}
151 
152 		/* Transfer ownership from the objects list to the vmas list. */
153 		list_add_tail(&vma->exec_list, &eb->vmas);
154 		list_del_init(&obj->obj_exec_link);
155 
156 		vma->exec_entry = &exec[i];
157 		if (eb->and < 0) {
158 			eb->lut[i] = vma;
159 		} else {
160 			uint32_t handle = args->flags & I915_EXEC_HANDLE_LUT ? i : exec[i].handle;
161 			vma->exec_handle = handle;
162 			hlist_add_head(&vma->exec_node,
163 				       &eb->buckets[handle & eb->and]);
164 		}
165 		++i;
166 	}
167 
168 	return 0;
169 
170 
171 err:
172 	while (!list_empty(&objects)) {
173 		obj = list_first_entry(&objects,
174 				       struct drm_i915_gem_object,
175 				       obj_exec_link);
176 		list_del_init(&obj->obj_exec_link);
177 		drm_gem_object_unreference(&obj->base);
178 	}
179 	/*
180 	 * Objects already transfered to the vmas list will be unreferenced by
181 	 * eb_destroy.
182 	 */
183 
184 	return ret;
185 }
186 
187 static struct i915_vma *eb_get_vma(struct eb_vmas *eb, unsigned long handle)
188 {
189 	if (eb->and < 0) {
190 		if (handle >= -eb->and)
191 			return NULL;
192 		return eb->lut[handle];
193 	} else {
194 		struct hlist_head *head;
195 		struct hlist_node *node;
196 
197 		head = &eb->buckets[handle & eb->and];
198 		hlist_for_each(node, head) {
199 			struct i915_vma *vma;
200 
201 			vma = hlist_entry(node, struct i915_vma, exec_node);
202 			if (vma->exec_handle == handle)
203 				return vma;
204 		}
205 		return NULL;
206 	}
207 }
208 
209 static void
210 i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma)
211 {
212 	struct drm_i915_gem_exec_object2 *entry;
213 	struct drm_i915_gem_object *obj = vma->obj;
214 
215 	if (!drm_mm_node_allocated(&vma->node))
216 		return;
217 
218 	entry = vma->exec_entry;
219 
220 	if (entry->flags & __EXEC_OBJECT_HAS_FENCE)
221 		i915_gem_object_unpin_fence(obj);
222 
223 	if (entry->flags & __EXEC_OBJECT_HAS_PIN)
224 		vma->pin_count--;
225 
226 	entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN);
227 }
228 
229 static void eb_destroy(struct eb_vmas *eb)
230 {
231 	while (!list_empty(&eb->vmas)) {
232 		struct i915_vma *vma;
233 
234 		vma = list_first_entry(&eb->vmas,
235 				       struct i915_vma,
236 				       exec_list);
237 		list_del_init(&vma->exec_list);
238 		i915_gem_execbuffer_unreserve_vma(vma);
239 		drm_gem_object_unreference(&vma->obj->base);
240 	}
241 	kfree(eb);
242 }
243 
244 static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)
245 {
246 	return (HAS_LLC(obj->base.dev) ||
247 		obj->base.write_domain == I915_GEM_DOMAIN_CPU ||
248 		!obj->map_and_fenceable ||
249 		obj->cache_level != I915_CACHE_NONE);
250 }
251 
252 static int
253 relocate_entry_cpu(struct drm_i915_gem_object *obj,
254 		   struct drm_i915_gem_relocation_entry *reloc,
255 		   uint64_t target_offset)
256 {
257 	struct drm_device *dev = obj->base.dev;
258 	uint32_t page_offset = offset_in_page(reloc->offset);
259 	uint64_t delta = reloc->delta + target_offset;
260 	char *vaddr;
261 	int ret;
262 
263 	ret = i915_gem_object_set_to_cpu_domain(obj, true);
264 	if (ret)
265 		return ret;
266 
267 	vaddr = kmap_atomic(i915_gem_object_get_page(obj,
268 				reloc->offset >> PAGE_SHIFT));
269 	*(uint32_t *)(vaddr + page_offset) = lower_32_bits(delta);
270 
271 	if (INTEL_INFO(dev)->gen >= 8) {
272 		page_offset = offset_in_page(page_offset + sizeof(uint32_t));
273 
274 		if (page_offset == 0) {
275 			kunmap_atomic(vaddr);
276 			vaddr = kmap_atomic(i915_gem_object_get_page(obj,
277 			    (reloc->offset + sizeof(uint32_t)) >> PAGE_SHIFT));
278 		}
279 
280 		*(uint32_t *)(vaddr + page_offset) = upper_32_bits(delta);
281 	}
282 
283 	kunmap_atomic(vaddr);
284 
285 	return 0;
286 }
287 
288 static int
289 relocate_entry_gtt(struct drm_i915_gem_object *obj,
290 		   struct drm_i915_gem_relocation_entry *reloc,
291 		   uint64_t target_offset)
292 {
293 	struct drm_device *dev = obj->base.dev;
294 	struct drm_i915_private *dev_priv = dev->dev_private;
295 	uint64_t delta = reloc->delta + target_offset;
296 	uint64_t offset;
297 	void __iomem *reloc_page;
298 	int ret;
299 
300 	ret = i915_gem_object_set_to_gtt_domain(obj, true);
301 	if (ret)
302 		return ret;
303 
304 	ret = i915_gem_object_put_fence(obj);
305 	if (ret)
306 		return ret;
307 
308 	/* Map the page containing the relocation we're going to perform.  */
309 	offset = i915_gem_obj_ggtt_offset(obj);
310 	offset += reloc->offset;
311 	reloc_page = io_mapping_map_atomic_wc(dev_priv->gtt.mappable,
312 					      offset & ~PAGE_MASK);
313 	iowrite32(lower_32_bits(delta), reloc_page + offset_in_page(offset));
314 
315 	if (INTEL_INFO(dev)->gen >= 8) {
316 		offset += sizeof(uint32_t);
317 
318 		if (offset_in_page(offset) == 0) {
319 			io_mapping_unmap_atomic(reloc_page);
320 			reloc_page =
321 				io_mapping_map_atomic_wc(dev_priv->gtt.mappable,
322 							 offset);
323 		}
324 
325 		iowrite32(upper_32_bits(delta),
326 			  reloc_page + offset_in_page(offset));
327 	}
328 
329 	io_mapping_unmap_atomic(reloc_page);
330 
331 	return 0;
332 }
333 
334 static int
335 i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
336 				   struct eb_vmas *eb,
337 				   struct drm_i915_gem_relocation_entry *reloc)
338 {
339 	struct drm_device *dev = obj->base.dev;
340 	struct drm_gem_object *target_obj;
341 	struct drm_i915_gem_object *target_i915_obj;
342 	struct i915_vma *target_vma;
343 	uint64_t target_offset;
344 	int ret;
345 
346 	/* we've already hold a reference to all valid objects */
347 	target_vma = eb_get_vma(eb, reloc->target_handle);
348 	if (unlikely(target_vma == NULL))
349 		return -ENOENT;
350 	target_i915_obj = target_vma->obj;
351 	target_obj = &target_vma->obj->base;
352 
353 	target_offset = target_vma->node.start;
354 
355 	/* Sandybridge PPGTT errata: We need a global gtt mapping for MI and
356 	 * pipe_control writes because the gpu doesn't properly redirect them
357 	 * through the ppgtt for non_secure batchbuffers. */
358 	if (unlikely(IS_GEN6(dev) &&
359 	    reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION &&
360 	    !target_i915_obj->has_global_gtt_mapping)) {
361 		struct i915_vma *vma =
362 			list_first_entry(&target_i915_obj->vma_list,
363 					 typeof(*vma), vma_link);
364 		vma->bind_vma(vma, target_i915_obj->cache_level, GLOBAL_BIND);
365 	}
366 
367 	/* Validate that the target is in a valid r/w GPU domain */
368 	if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) {
369 		DRM_DEBUG("reloc with multiple write domains: "
370 			  "obj %p target %d offset %d "
371 			  "read %08x write %08x",
372 			  obj, reloc->target_handle,
373 			  (int) reloc->offset,
374 			  reloc->read_domains,
375 			  reloc->write_domain);
376 		return -EINVAL;
377 	}
378 	if (unlikely((reloc->write_domain | reloc->read_domains)
379 		     & ~I915_GEM_GPU_DOMAINS)) {
380 		DRM_DEBUG("reloc with read/write non-GPU domains: "
381 			  "obj %p target %d offset %d "
382 			  "read %08x write %08x",
383 			  obj, reloc->target_handle,
384 			  (int) reloc->offset,
385 			  reloc->read_domains,
386 			  reloc->write_domain);
387 		return -EINVAL;
388 	}
389 
390 	target_obj->pending_read_domains |= reloc->read_domains;
391 	target_obj->pending_write_domain |= reloc->write_domain;
392 
393 	/* If the relocation already has the right value in it, no
394 	 * more work needs to be done.
395 	 */
396 	if (target_offset == reloc->presumed_offset)
397 		return 0;
398 
399 	/* Check that the relocation address is valid... */
400 	if (unlikely(reloc->offset >
401 		obj->base.size - (INTEL_INFO(dev)->gen >= 8 ? 8 : 4))) {
402 		DRM_DEBUG("Relocation beyond object bounds: "
403 			  "obj %p target %d offset %d size %d.\n",
404 			  obj, reloc->target_handle,
405 			  (int) reloc->offset,
406 			  (int) obj->base.size);
407 		return -EINVAL;
408 	}
409 	if (unlikely(reloc->offset & 3)) {
410 		DRM_DEBUG("Relocation not 4-byte aligned: "
411 			  "obj %p target %d offset %d.\n",
412 			  obj, reloc->target_handle,
413 			  (int) reloc->offset);
414 		return -EINVAL;
415 	}
416 
417 	/* We can't wait for rendering with pagefaults disabled */
418 	if (obj->active && (curthread->td_flags & TDF_NOFAULT))
419 		return -EFAULT;
420 
421 	if (use_cpu_reloc(obj))
422 		ret = relocate_entry_cpu(obj, reloc, target_offset);
423 	else
424 		ret = relocate_entry_gtt(obj, reloc, target_offset);
425 
426 	if (ret)
427 		return ret;
428 
429 	/* and update the user's relocation entry */
430 	reloc->presumed_offset = target_offset;
431 
432 	return 0;
433 }
434 
435 static int
436 i915_gem_execbuffer_relocate_vma(struct i915_vma *vma,
437 				 struct eb_vmas *eb)
438 {
439 #define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
440 	struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)];
441 	struct drm_i915_gem_relocation_entry __user *user_relocs;
442 	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
443 	int remain, ret;
444 
445 	user_relocs = to_user_ptr(entry->relocs_ptr);
446 
447 	remain = entry->relocation_count;
448 	while (remain) {
449 		struct drm_i915_gem_relocation_entry *r = stack_reloc;
450 		int count = remain;
451 		if (count > ARRAY_SIZE(stack_reloc))
452 			count = ARRAY_SIZE(stack_reloc);
453 		remain -= count;
454 
455 		if (__copy_from_user_inatomic(r, user_relocs, count*sizeof(r[0])))
456 			return -EFAULT;
457 
458 		do {
459 			u64 offset = r->presumed_offset;
460 
461 			ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, r);
462 			if (ret)
463 				return ret;
464 
465 			if (r->presumed_offset != offset &&
466 			    __copy_to_user_inatomic(&user_relocs->presumed_offset,
467 						    &r->presumed_offset,
468 						    sizeof(r->presumed_offset))) {
469 				return -EFAULT;
470 			}
471 
472 			user_relocs++;
473 			r++;
474 		} while (--count);
475 	}
476 
477 	return 0;
478 #undef N_RELOC
479 }
480 
481 static int
482 i915_gem_execbuffer_relocate_vma_slow(struct i915_vma *vma,
483 				      struct eb_vmas *eb,
484 				      struct drm_i915_gem_relocation_entry *relocs)
485 {
486 	const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
487 	int i, ret;
488 
489 	for (i = 0; i < entry->relocation_count; i++) {
490 		ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, &relocs[i]);
491 		if (ret)
492 			return ret;
493 	}
494 
495 	return 0;
496 }
497 
498 static int
499 i915_gem_execbuffer_relocate(struct eb_vmas *eb)
500 {
501 	struct i915_vma *vma;
502 	int ret = 0;
503 
504 	/* This is the fast path and we cannot handle a pagefault whilst
505 	 * holding the struct mutex lest the user pass in the relocations
506 	 * contained within a mmaped bo. For in such a case we, the page
507 	 * fault handler would call i915_gem_fault() and we would try to
508 	 * acquire the struct mutex again. Obviously this is bad and so
509 	 * lockdep complains vehemently.
510 	 */
511 	pagefault_disable();
512 	list_for_each_entry(vma, &eb->vmas, exec_list) {
513 		ret = i915_gem_execbuffer_relocate_vma(vma, eb);
514 		if (ret)
515 			break;
516 	}
517 	pagefault_enable();
518 
519 	return ret;
520 }
521 
522 static int
523 i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
524 				struct intel_engine_cs *ring,
525 				bool *need_reloc)
526 {
527 	struct drm_i915_gem_object *obj = vma->obj;
528 	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
529 	uint64_t flags;
530 	int ret;
531 
532 	flags = 0;
533 	if (entry->flags & __EXEC_OBJECT_NEEDS_MAP)
534 		flags |= PIN_MAPPABLE;
535 	if (entry->flags & EXEC_OBJECT_NEEDS_GTT)
536 		flags |= PIN_GLOBAL;
537 	if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS)
538 		flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
539 
540 	ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags);
541 	if (ret)
542 		return ret;
543 
544 	entry->flags |= __EXEC_OBJECT_HAS_PIN;
545 
546 	if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
547 		ret = i915_gem_object_get_fence(obj);
548 		if (ret)
549 			return ret;
550 
551 		if (i915_gem_object_pin_fence(obj))
552 			entry->flags |= __EXEC_OBJECT_HAS_FENCE;
553 	}
554 
555 	if (entry->offset != vma->node.start) {
556 		entry->offset = vma->node.start;
557 		*need_reloc = true;
558 	}
559 
560 	if (entry->flags & EXEC_OBJECT_WRITE) {
561 		obj->base.pending_read_domains = I915_GEM_DOMAIN_RENDER;
562 		obj->base.pending_write_domain = I915_GEM_DOMAIN_RENDER;
563 	}
564 
565 	return 0;
566 }
567 
568 static bool
569 need_reloc_mappable(struct i915_vma *vma)
570 {
571 	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
572 
573 	if (entry->relocation_count == 0)
574 		return false;
575 
576 	if (!i915_is_ggtt(vma->vm))
577 		return false;
578 
579 	/* See also use_cpu_reloc() */
580 	if (HAS_LLC(vma->obj->base.dev))
581 		return false;
582 
583 	if (vma->obj->base.write_domain == I915_GEM_DOMAIN_CPU)
584 		return false;
585 
586 	return true;
587 }
588 
589 static bool
590 eb_vma_misplaced(struct i915_vma *vma)
591 {
592 	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
593 	struct drm_i915_gem_object *obj = vma->obj;
594 
595 	WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP &&
596 	       !i915_is_ggtt(vma->vm));
597 
598 	if (entry->alignment &&
599 	    vma->node.start & (entry->alignment - 1))
600 		return true;
601 
602 	if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable)
603 		return true;
604 
605 	if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
606 	    vma->node.start < BATCH_OFFSET_BIAS)
607 		return true;
608 
609 	return false;
610 }
611 
612 static int
613 i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
614 			    struct list_head *vmas,
615 			    bool *need_relocs)
616 {
617 	struct drm_i915_gem_object *obj;
618 	struct i915_vma *vma;
619 	struct i915_address_space *vm;
620 	struct list_head ordered_vmas;
621 	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
622 	int retry;
623 
624 	i915_gem_retire_requests_ring(ring);
625 
626 	vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm;
627 
628 	INIT_LIST_HEAD(&ordered_vmas);
629 	while (!list_empty(vmas)) {
630 		struct drm_i915_gem_exec_object2 *entry;
631 		bool need_fence, need_mappable;
632 
633 		vma = list_first_entry(vmas, struct i915_vma, exec_list);
634 		obj = vma->obj;
635 		entry = vma->exec_entry;
636 
637 		if (!has_fenced_gpu_access)
638 			entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE;
639 		need_fence =
640 			entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
641 			obj->tiling_mode != I915_TILING_NONE;
642 		need_mappable = need_fence || need_reloc_mappable(vma);
643 
644 		if (need_mappable) {
645 			entry->flags |= __EXEC_OBJECT_NEEDS_MAP;
646 			list_move(&vma->exec_list, &ordered_vmas);
647 		} else
648 			list_move_tail(&vma->exec_list, &ordered_vmas);
649 
650 		obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND;
651 		obj->base.pending_write_domain = 0;
652 	}
653 	list_splice(&ordered_vmas, vmas);
654 
655 	/* Attempt to pin all of the buffers into the GTT.
656 	 * This is done in 3 phases:
657 	 *
658 	 * 1a. Unbind all objects that do not match the GTT constraints for
659 	 *     the execbuffer (fenceable, mappable, alignment etc).
660 	 * 1b. Increment pin count for already bound objects.
661 	 * 2.  Bind new objects.
662 	 * 3.  Decrement pin count.
663 	 *
664 	 * This avoid unnecessary unbinding of later objects in order to make
665 	 * room for the earlier objects *unless* we need to defragment.
666 	 */
667 	retry = 0;
668 	do {
669 		int ret = 0;
670 
671 		/* Unbind any ill-fitting objects or pin. */
672 		list_for_each_entry(vma, vmas, exec_list) {
673 			if (!drm_mm_node_allocated(&vma->node))
674 				continue;
675 
676 			if (eb_vma_misplaced(vma))
677 				ret = i915_vma_unbind(vma);
678 			else
679 				ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs);
680 			if (ret)
681 				goto err;
682 		}
683 
684 		/* Bind fresh objects */
685 		list_for_each_entry(vma, vmas, exec_list) {
686 			if (drm_mm_node_allocated(&vma->node))
687 				continue;
688 
689 			ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs);
690 			if (ret)
691 				goto err;
692 		}
693 
694 err:
695 		if (ret != -ENOSPC || retry++)
696 			return ret;
697 
698 		/* Decrement pin count for bound objects */
699 		list_for_each_entry(vma, vmas, exec_list)
700 			i915_gem_execbuffer_unreserve_vma(vma);
701 
702 		ret = i915_gem_evict_vm(vm, true);
703 		if (ret)
704 			return ret;
705 	} while (1);
706 }
707 
708 static int
709 i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
710 				  struct drm_i915_gem_execbuffer2 *args,
711 				  struct drm_file *file,
712 				  struct intel_engine_cs *ring,
713 				  struct eb_vmas *eb,
714 				  struct drm_i915_gem_exec_object2 *exec)
715 {
716 	struct drm_i915_gem_relocation_entry *reloc;
717 	struct i915_address_space *vm;
718 	struct i915_vma *vma;
719 	bool need_relocs;
720 	int *reloc_offset;
721 	int i, total, ret;
722 	unsigned count = args->buffer_count;
723 
724 	vm = list_first_entry(&eb->vmas, struct i915_vma, exec_list)->vm;
725 
726 	/* We may process another execbuffer during the unlock... */
727 	while (!list_empty(&eb->vmas)) {
728 		vma = list_first_entry(&eb->vmas, struct i915_vma, exec_list);
729 		list_del_init(&vma->exec_list);
730 		i915_gem_execbuffer_unreserve_vma(vma);
731 		drm_gem_object_unreference(&vma->obj->base);
732 	}
733 
734 	mutex_unlock(&dev->struct_mutex);
735 
736 	total = 0;
737 	for (i = 0; i < count; i++)
738 		total += exec[i].relocation_count;
739 
740 	reloc_offset = drm_malloc_ab(count, sizeof(*reloc_offset));
741 	reloc = drm_malloc_ab(total, sizeof(*reloc));
742 	if (reloc == NULL || reloc_offset == NULL) {
743 		drm_free_large(reloc);
744 		drm_free_large(reloc_offset);
745 		mutex_lock(&dev->struct_mutex);
746 		return -ENOMEM;
747 	}
748 
749 	total = 0;
750 	for (i = 0; i < count; i++) {
751 		struct drm_i915_gem_relocation_entry __user *user_relocs;
752 		u64 invalid_offset = (u64)-1;
753 		int j;
754 
755 		user_relocs = to_user_ptr(exec[i].relocs_ptr);
756 
757 		if (copy_from_user(reloc+total, user_relocs,
758 				   exec[i].relocation_count * sizeof(*reloc))) {
759 			ret = -EFAULT;
760 			mutex_lock(&dev->struct_mutex);
761 			goto err;
762 		}
763 
764 		/* As we do not update the known relocation offsets after
765 		 * relocating (due to the complexities in lock handling),
766 		 * we need to mark them as invalid now so that we force the
767 		 * relocation processing next time. Just in case the target
768 		 * object is evicted and then rebound into its old
769 		 * presumed_offset before the next execbuffer - if that
770 		 * happened we would make the mistake of assuming that the
771 		 * relocations were valid.
772 		 */
773 		for (j = 0; j < exec[i].relocation_count; j++) {
774 			if (__copy_to_user(&user_relocs[j].presumed_offset,
775 					   &invalid_offset,
776 					   sizeof(invalid_offset))) {
777 				ret = -EFAULT;
778 				mutex_lock(&dev->struct_mutex);
779 				goto err;
780 			}
781 		}
782 
783 		reloc_offset[i] = total;
784 		total += exec[i].relocation_count;
785 	}
786 
787 	ret = i915_mutex_lock_interruptible(dev);
788 	if (ret) {
789 		mutex_lock(&dev->struct_mutex);
790 		goto err;
791 	}
792 
793 	/* reacquire the objects */
794 	eb_reset(eb);
795 	ret = eb_lookup_vmas(eb, exec, args, vm, file);
796 	if (ret)
797 		goto err;
798 
799 	need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
800 	ret = i915_gem_execbuffer_reserve(ring, &eb->vmas, &need_relocs);
801 	if (ret)
802 		goto err;
803 
804 	list_for_each_entry(vma, &eb->vmas, exec_list) {
805 		int offset = vma->exec_entry - exec;
806 		ret = i915_gem_execbuffer_relocate_vma_slow(vma, eb,
807 							    reloc + reloc_offset[offset]);
808 		if (ret)
809 			goto err;
810 	}
811 
812 	/* Leave the user relocations as are, this is the painfully slow path,
813 	 * and we want to avoid the complication of dropping the lock whilst
814 	 * having buffers reserved in the aperture and so causing spurious
815 	 * ENOSPC for random operations.
816 	 */
817 
818 err:
819 	drm_free_large(reloc);
820 	drm_free_large(reloc_offset);
821 	return ret;
822 }
823 
824 static int
825 i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs *ring,
826 				struct list_head *vmas)
827 {
828 	struct i915_vma *vma;
829 	uint32_t flush_domains = 0;
830 	bool flush_chipset = false;
831 	int ret;
832 
833 	list_for_each_entry(vma, vmas, exec_list) {
834 		struct drm_i915_gem_object *obj = vma->obj;
835 		ret = i915_gem_object_sync(obj, ring);
836 		if (ret)
837 			return ret;
838 
839 		if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
840 			flush_chipset |= i915_gem_clflush_object(obj, false);
841 
842 		flush_domains |= obj->base.write_domain;
843 	}
844 
845 	if (flush_chipset)
846 		i915_gem_chipset_flush(ring->dev);
847 
848 	if (flush_domains & I915_GEM_DOMAIN_GTT)
849 		wmb();
850 
851 	/* Unconditionally invalidate gpu caches and ensure that we do flush
852 	 * any residual writes from the previous batch.
853 	 */
854 	return intel_ring_invalidate_all_caches(ring);
855 }
856 
857 static bool
858 i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
859 {
860 	if (exec->flags & __I915_EXEC_UNKNOWN_FLAGS)
861 		return false;
862 
863 	return ((exec->batch_start_offset | exec->batch_len) & 0x7) == 0;
864 }
865 
866 static int
867 validate_exec_list(struct drm_device *dev,
868 		   struct drm_i915_gem_exec_object2 *exec,
869 		   int count)
870 {
871 	unsigned relocs_total = 0;
872 	unsigned relocs_max = UINT_MAX / sizeof(struct drm_i915_gem_relocation_entry);
873 	unsigned invalid_flags;
874 	int i;
875 
876 	invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS;
877 	if (USES_FULL_PPGTT(dev))
878 		invalid_flags |= EXEC_OBJECT_NEEDS_GTT;
879 
880 	for (i = 0; i < count; i++) {
881 		char __user *ptr = to_user_ptr(exec[i].relocs_ptr);
882 		int length; /* limited by fault_in_pages_readable() */
883 
884 		if (exec[i].flags & invalid_flags)
885 			return -EINVAL;
886 
887 		/* First check for malicious input causing overflow in
888 		 * the worst case where we need to allocate the entire
889 		 * relocation tree as a single array.
890 		 */
891 		if (exec[i].relocation_count > relocs_max - relocs_total)
892 			return -EINVAL;
893 		relocs_total += exec[i].relocation_count;
894 
895 		length = exec[i].relocation_count *
896 			sizeof(struct drm_i915_gem_relocation_entry);
897 		/*
898 		 * We must check that the entire relocation array is safe
899 		 * to read, but since we may need to update the presumed
900 		 * offsets during execution, check for full write access.
901 		 */
902 #if 0
903 		if (!access_ok(VERIFY_WRITE, ptr, length))
904 			return -EFAULT;
905 #endif
906 
907 		if (likely(!i915.prefault_disable)) {
908 			if (fault_in_multipages_readable(ptr, length))
909 				return -EFAULT;
910 		}
911 	}
912 
913 	return 0;
914 }
915 
916 static struct intel_context *
917 i915_gem_validate_context(struct drm_device *dev, struct drm_file *file,
918 			  struct intel_engine_cs *ring, const u32 ctx_id)
919 {
920 	struct intel_context *ctx = NULL;
921 	struct i915_ctx_hang_stats *hs;
922 
923 	if (ring->id != RCS && ctx_id != DEFAULT_CONTEXT_HANDLE)
924 		return ERR_PTR(-EINVAL);
925 
926 	ctx = i915_gem_context_get(file->driver_priv, ctx_id);
927 	if (IS_ERR(ctx))
928 		return ctx;
929 
930 	hs = &ctx->hang_stats;
931 	if (hs->banned) {
932 		DRM_DEBUG("Context %u tried to submit while banned\n", ctx_id);
933 		return ERR_PTR(-EIO);
934 	}
935 
936 	if (i915.enable_execlists && !ctx->engine[ring->id].state) {
937 		int ret = intel_lr_context_deferred_create(ctx, ring);
938 		if (ret) {
939 			DRM_DEBUG("Could not create LRC %u: %d\n", ctx_id, ret);
940 			return ERR_PTR(ret);
941 		}
942 	}
943 
944 	return ctx;
945 }
946 
947 void
948 i915_gem_execbuffer_move_to_active(struct list_head *vmas,
949 				   struct intel_engine_cs *ring)
950 {
951 	u32 seqno = intel_ring_get_seqno(ring);
952 	struct i915_vma *vma;
953 
954 	list_for_each_entry(vma, vmas, exec_list) {
955 		struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
956 		struct drm_i915_gem_object *obj = vma->obj;
957 		u32 old_read = obj->base.read_domains;
958 		u32 old_write = obj->base.write_domain;
959 
960 		obj->base.write_domain = obj->base.pending_write_domain;
961 		if (obj->base.write_domain == 0)
962 			obj->base.pending_read_domains |= obj->base.read_domains;
963 		obj->base.read_domains = obj->base.pending_read_domains;
964 
965 		i915_vma_move_to_active(vma, ring);
966 		if (obj->base.write_domain) {
967 			obj->dirty = 1;
968 			obj->last_write_seqno = seqno;
969 
970 			intel_fb_obj_invalidate(obj, ring);
971 
972 			/* update for the implicit flush after a batch */
973 			obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
974 		}
975 		if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
976 			obj->last_fenced_seqno = seqno;
977 			if (entry->flags & __EXEC_OBJECT_HAS_FENCE) {
978 				struct drm_i915_private *dev_priv = to_i915(ring->dev);
979 				list_move_tail(&dev_priv->fence_regs[obj->fence_reg].lru_list,
980 					       &dev_priv->mm.fence_list);
981 			}
982 		}
983 
984 		trace_i915_gem_object_change_domain(obj, old_read, old_write);
985 	}
986 }
987 
988 void
989 i915_gem_execbuffer_retire_commands(struct drm_device *dev,
990 				    struct drm_file *file,
991 				    struct intel_engine_cs *ring,
992 				    struct drm_i915_gem_object *obj)
993 {
994 	/* Unconditionally force add_request to emit a full flush. */
995 	ring->gpu_caches_dirty = true;
996 
997 	/* Add a breadcrumb for the completion of the batch buffer */
998 	(void)__i915_add_request(ring, file, obj, NULL);
999 }
1000 
1001 static int
1002 i915_reset_gen7_sol_offsets(struct drm_device *dev,
1003 			    struct intel_engine_cs *ring)
1004 {
1005 	struct drm_i915_private *dev_priv = dev->dev_private;
1006 	int ret, i;
1007 
1008 	if (!IS_GEN7(dev) || ring != &dev_priv->ring[RCS]) {
1009 		DRM_DEBUG("sol reset is gen7/rcs only\n");
1010 		return -EINVAL;
1011 	}
1012 
1013 	ret = intel_ring_begin(ring, 4 * 3);
1014 	if (ret)
1015 		return ret;
1016 
1017 	for (i = 0; i < 4; i++) {
1018 		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
1019 		intel_ring_emit(ring, GEN7_SO_WRITE_OFFSET(i));
1020 		intel_ring_emit(ring, 0);
1021 	}
1022 
1023 	intel_ring_advance(ring);
1024 
1025 	return 0;
1026 }
1027 
1028 int
1029 i915_gem_ringbuffer_submission(struct drm_device *dev, struct drm_file *file,
1030 			       struct intel_engine_cs *ring,
1031 			       struct intel_context *ctx,
1032 			       struct drm_i915_gem_execbuffer2 *args,
1033 			       struct list_head *vmas,
1034 			       struct drm_i915_gem_object *batch_obj,
1035 			       u64 exec_start, u32 flags)
1036 {
1037 	struct drm_clip_rect *cliprects = NULL;
1038 	struct drm_i915_private *dev_priv = dev->dev_private;
1039 	u64 exec_len;
1040 	int instp_mode;
1041 	u32 instp_mask;
1042 	int i, ret = 0;
1043 
1044 	if (args->num_cliprects != 0) {
1045 		if (ring != &dev_priv->ring[RCS]) {
1046 			DRM_DEBUG("clip rectangles are only valid with the render ring\n");
1047 			return -EINVAL;
1048 		}
1049 
1050 		if (INTEL_INFO(dev)->gen >= 5) {
1051 			DRM_DEBUG("clip rectangles are only valid on pre-gen5\n");
1052 			return -EINVAL;
1053 		}
1054 
1055 		if (args->num_cliprects > UINT_MAX / sizeof(*cliprects)) {
1056 			DRM_DEBUG("execbuf with %u cliprects\n",
1057 				  args->num_cliprects);
1058 			return -EINVAL;
1059 		}
1060 
1061 		cliprects = kcalloc(args->num_cliprects,
1062 				    sizeof(*cliprects),
1063 				    GFP_KERNEL);
1064 		if (cliprects == NULL) {
1065 			ret = -ENOMEM;
1066 			goto error;
1067 		}
1068 
1069 		if (copy_from_user(cliprects,
1070 				   to_user_ptr(args->cliprects_ptr),
1071 				   sizeof(*cliprects)*args->num_cliprects)) {
1072 			ret = -EFAULT;
1073 			goto error;
1074 		}
1075 	} else {
1076 		if (args->DR4 == 0xffffffff) {
1077 			DRM_DEBUG("UXA submitting garbage DR4, fixing up\n");
1078 			args->DR4 = 0;
1079 		}
1080 
1081 		if (args->DR1 || args->DR4 || args->cliprects_ptr) {
1082 			DRM_DEBUG("0 cliprects but dirt in cliprects fields\n");
1083 			return -EINVAL;
1084 		}
1085 	}
1086 
1087 	ret = i915_gem_execbuffer_move_to_gpu(ring, vmas);
1088 	if (ret)
1089 		goto error;
1090 
1091 	ret = i915_switch_context(ring, ctx);
1092 	if (ret)
1093 		goto error;
1094 
1095 	instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK;
1096 	instp_mask = I915_EXEC_CONSTANTS_MASK;
1097 	switch (instp_mode) {
1098 	case I915_EXEC_CONSTANTS_REL_GENERAL:
1099 	case I915_EXEC_CONSTANTS_ABSOLUTE:
1100 	case I915_EXEC_CONSTANTS_REL_SURFACE:
1101 		if (instp_mode != 0 && ring != &dev_priv->ring[RCS]) {
1102 			DRM_DEBUG("non-0 rel constants mode on non-RCS\n");
1103 			ret = -EINVAL;
1104 			goto error;
1105 		}
1106 
1107 		if (instp_mode != dev_priv->relative_constants_mode) {
1108 			if (INTEL_INFO(dev)->gen < 4) {
1109 				DRM_DEBUG("no rel constants on pre-gen4\n");
1110 				ret = -EINVAL;
1111 				goto error;
1112 			}
1113 
1114 			if (INTEL_INFO(dev)->gen > 5 &&
1115 			    instp_mode == I915_EXEC_CONSTANTS_REL_SURFACE) {
1116 				DRM_DEBUG("rel surface constants mode invalid on gen5+\n");
1117 				ret = -EINVAL;
1118 				goto error;
1119 			}
1120 
1121 			/* The HW changed the meaning on this bit on gen6 */
1122 			if (INTEL_INFO(dev)->gen >= 6)
1123 				instp_mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE;
1124 		}
1125 		break;
1126 	default:
1127 		DRM_DEBUG("execbuf with unknown constants: %d\n", instp_mode);
1128 		ret = -EINVAL;
1129 		goto error;
1130 	}
1131 
1132 	if (ring == &dev_priv->ring[RCS] &&
1133 			instp_mode != dev_priv->relative_constants_mode) {
1134 		ret = intel_ring_begin(ring, 4);
1135 		if (ret)
1136 			goto error;
1137 
1138 		intel_ring_emit(ring, MI_NOOP);
1139 		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
1140 		intel_ring_emit(ring, INSTPM);
1141 		intel_ring_emit(ring, instp_mask << 16 | instp_mode);
1142 		intel_ring_advance(ring);
1143 
1144 		dev_priv->relative_constants_mode = instp_mode;
1145 	}
1146 
1147 	if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
1148 		ret = i915_reset_gen7_sol_offsets(dev, ring);
1149 		if (ret)
1150 			goto error;
1151 	}
1152 
1153 	exec_len = args->batch_len;
1154 	if (cliprects) {
1155 		for (i = 0; i < args->num_cliprects; i++) {
1156 			ret = i915_emit_box(dev, &cliprects[i],
1157 					    args->DR1, args->DR4);
1158 			if (ret)
1159 				goto error;
1160 
1161 			ret = ring->dispatch_execbuffer(ring,
1162 							exec_start, exec_len,
1163 							flags);
1164 			if (ret)
1165 				goto error;
1166 		}
1167 	} else {
1168 		ret = ring->dispatch_execbuffer(ring,
1169 						exec_start, exec_len,
1170 						flags);
1171 		if (ret)
1172 			return ret;
1173 	}
1174 
1175 	trace_i915_gem_ring_dispatch(ring, intel_ring_get_seqno(ring), flags);
1176 
1177 	i915_gem_execbuffer_move_to_active(vmas, ring);
1178 	i915_gem_execbuffer_retire_commands(dev, file, ring, batch_obj);
1179 
1180 error:
1181 	kfree(cliprects);
1182 	return ret;
1183 }
1184 
1185 /**
1186  * Find one BSD ring to dispatch the corresponding BSD command.
1187  * The Ring ID is returned.
1188  */
1189 static int gen8_dispatch_bsd_ring(struct drm_device *dev,
1190 				  struct drm_file *file)
1191 {
1192 	struct drm_i915_private *dev_priv = dev->dev_private;
1193 	struct drm_i915_file_private *file_priv = file->driver_priv;
1194 
1195 	/* Check whether the file_priv is using one ring */
1196 	if (file_priv->bsd_ring)
1197 		return file_priv->bsd_ring->id;
1198 	else {
1199 		/* If no, use the ping-pong mechanism to select one ring */
1200 		int ring_id;
1201 
1202 		mutex_lock(&dev->struct_mutex);
1203 		if (dev_priv->mm.bsd_ring_dispatch_index == 0) {
1204 			ring_id = VCS;
1205 			dev_priv->mm.bsd_ring_dispatch_index = 1;
1206 		} else {
1207 			ring_id = VCS2;
1208 			dev_priv->mm.bsd_ring_dispatch_index = 0;
1209 		}
1210 		file_priv->bsd_ring = &dev_priv->ring[ring_id];
1211 		mutex_unlock(&dev->struct_mutex);
1212 		return ring_id;
1213 	}
1214 }
1215 
1216 static struct drm_i915_gem_object *
1217 eb_get_batch(struct eb_vmas *eb)
1218 {
1219 	struct i915_vma *vma = list_entry(eb->vmas.prev, typeof(*vma), exec_list);
1220 
1221 	/*
1222 	 * SNA is doing fancy tricks with compressing batch buffers, which leads
1223 	 * to negative relocation deltas. Usually that works out ok since the
1224 	 * relocate address is still positive, except when the batch is placed
1225 	 * very low in the GTT. Ensure this doesn't happen.
1226 	 *
1227 	 * Note that actual hangs have only been observed on gen7, but for
1228 	 * paranoia do it everywhere.
1229 	 */
1230 	vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
1231 
1232 	return vma->obj;
1233 }
1234 
1235 static int
1236 i915_gem_do_execbuffer(struct drm_device *dev, void *data,
1237 		       struct drm_file *file,
1238 		       struct drm_i915_gem_execbuffer2 *args,
1239 		       struct drm_i915_gem_exec_object2 *exec)
1240 {
1241 	struct drm_i915_private *dev_priv = dev->dev_private;
1242 	struct eb_vmas *eb;
1243 	struct drm_i915_gem_object *batch_obj;
1244 	struct intel_engine_cs *ring;
1245 	struct intel_context *ctx;
1246 	struct i915_address_space *vm;
1247 	const u32 ctx_id = i915_execbuffer2_get_context_id(*args);
1248 	u64 exec_start = args->batch_start_offset;
1249 	u32 flags;
1250 	int ret;
1251 	bool need_relocs;
1252 
1253 	if (!i915_gem_check_execbuffer(args))
1254 		return -EINVAL;
1255 
1256 	ret = validate_exec_list(dev, exec, args->buffer_count);
1257 	if (ret)
1258 		return ret;
1259 
1260 	flags = 0;
1261 	if (args->flags & I915_EXEC_SECURE) {
1262 		flags |= I915_DISPATCH_SECURE;
1263 	}
1264 	if (args->flags & I915_EXEC_IS_PINNED)
1265 		flags |= I915_DISPATCH_PINNED;
1266 
1267 	if ((args->flags & I915_EXEC_RING_MASK) > LAST_USER_RING) {
1268 		DRM_DEBUG("execbuf with unknown ring: %d\n",
1269 			  (int)(args->flags & I915_EXEC_RING_MASK));
1270 		return -EINVAL;
1271 	}
1272 
1273 	if ((args->flags & I915_EXEC_RING_MASK) == I915_EXEC_DEFAULT)
1274 		ring = &dev_priv->ring[RCS];
1275 	else if ((args->flags & I915_EXEC_RING_MASK) == I915_EXEC_BSD) {
1276 		if (HAS_BSD2(dev)) {
1277 			int ring_id;
1278 			ring_id = gen8_dispatch_bsd_ring(dev, file);
1279 			ring = &dev_priv->ring[ring_id];
1280 		} else
1281 			ring = &dev_priv->ring[VCS];
1282 	} else
1283 		ring = &dev_priv->ring[(args->flags & I915_EXEC_RING_MASK) - 1];
1284 
1285 	if (!intel_ring_initialized(ring)) {
1286 		DRM_DEBUG("execbuf with invalid ring: %d\n",
1287 			  (int)(args->flags & I915_EXEC_RING_MASK));
1288 		return -EINVAL;
1289 	}
1290 
1291 	if (args->buffer_count < 1) {
1292 		DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
1293 		return -EINVAL;
1294 	}
1295 
1296 	intel_runtime_pm_get(dev_priv);
1297 
1298 	ret = i915_mutex_lock_interruptible(dev);
1299 	if (ret)
1300 		goto pre_mutex_err;
1301 
1302 	if (dev_priv->ums.mm_suspended) {
1303 		mutex_unlock(&dev->struct_mutex);
1304 		ret = -EBUSY;
1305 		goto pre_mutex_err;
1306 	}
1307 
1308 	ctx = i915_gem_validate_context(dev, file, ring, ctx_id);
1309 	if (IS_ERR(ctx)) {
1310 		mutex_unlock(&dev->struct_mutex);
1311 		ret = PTR_ERR(ctx);
1312 		goto pre_mutex_err;
1313 	}
1314 
1315 	i915_gem_context_reference(ctx);
1316 
1317 	if (ctx->ppgtt)
1318 		vm = &ctx->ppgtt->base;
1319 	else
1320 		vm = &dev_priv->gtt.base;
1321 
1322 	eb = eb_create(args);
1323 	if (eb == NULL) {
1324 		i915_gem_context_unreference(ctx);
1325 		mutex_unlock(&dev->struct_mutex);
1326 		ret = -ENOMEM;
1327 		goto pre_mutex_err;
1328 	}
1329 
1330 	/* Look up object handles */
1331 	ret = eb_lookup_vmas(eb, exec, args, vm, file);
1332 	if (ret)
1333 		goto err;
1334 
1335 	/* take note of the batch buffer before we might reorder the lists */
1336 	batch_obj = eb_get_batch(eb);
1337 
1338 	/* Move the objects en-masse into the GTT, evicting if necessary. */
1339 	need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
1340 	ret = i915_gem_execbuffer_reserve(ring, &eb->vmas, &need_relocs);
1341 	if (ret)
1342 		goto err;
1343 
1344 	/* The objects are in their final locations, apply the relocations. */
1345 	if (need_relocs)
1346 		ret = i915_gem_execbuffer_relocate(eb);
1347 	if (ret) {
1348 		if (ret == -EFAULT) {
1349 			ret = i915_gem_execbuffer_relocate_slow(dev, args, file, ring,
1350 								eb, exec);
1351 			BUG_ON(!mutex_is_locked(&dev->struct_mutex));
1352 		}
1353 		if (ret)
1354 			goto err;
1355 	}
1356 
1357 	/* Set the pending read domains for the batch buffer to COMMAND */
1358 	if (batch_obj->base.pending_write_domain) {
1359 		DRM_DEBUG("Attempting to use self-modifying batch buffer\n");
1360 		ret = -EINVAL;
1361 		goto err;
1362 	}
1363 	batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
1364 
1365 	if (i915_needs_cmd_parser(ring)) {
1366 		ret = i915_parse_cmds(ring,
1367 				      batch_obj,
1368 				      args->batch_start_offset,
1369 				      file->is_master);
1370 		if (ret)
1371 			goto err;
1372 
1373 		/*
1374 		 * XXX: Actually do this when enabling batch copy...
1375 		 *
1376 		 * Set the DISPATCH_SECURE bit to remove the NON_SECURE bit
1377 		 * from MI_BATCH_BUFFER_START commands issued in the
1378 		 * dispatch_execbuffer implementations. We specifically don't
1379 		 * want that set when the command parser is enabled.
1380 		 */
1381 	}
1382 
1383 	/* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
1384 	 * batch" bit. Hence we need to pin secure batches into the global gtt.
1385 	 * hsw should have this fixed, but bdw mucks it up again. */
1386 	if (flags & I915_DISPATCH_SECURE) {
1387 		/*
1388 		 * So on first glance it looks freaky that we pin the batch here
1389 		 * outside of the reservation loop. But:
1390 		 * - The batch is already pinned into the relevant ppgtt, so we
1391 		 *   already have the backing storage fully allocated.
1392 		 * - No other BO uses the global gtt (well contexts, but meh),
1393 		 *   so we don't really have issues with mutliple objects not
1394 		 *   fitting due to fragmentation.
1395 		 * So this is actually safe.
1396 		 */
1397 		ret = i915_gem_obj_ggtt_pin(batch_obj, 0, 0);
1398 		if (ret)
1399 			goto err;
1400 
1401 		exec_start += i915_gem_obj_ggtt_offset(batch_obj);
1402 	} else
1403 		exec_start += i915_gem_obj_offset(batch_obj, vm);
1404 
1405 	ret = dev_priv->gt.do_execbuf(dev, file, ring, ctx, args,
1406 				      &eb->vmas, batch_obj, exec_start, flags);
1407 
1408 	/*
1409 	 * FIXME: We crucially rely upon the active tracking for the (ppgtt)
1410 	 * batch vma for correctness. For less ugly and less fragility this
1411 	 * needs to be adjusted to also track the ggtt batch vma properly as
1412 	 * active.
1413 	 */
1414 	if (flags & I915_DISPATCH_SECURE)
1415 		i915_gem_object_ggtt_unpin(batch_obj);
1416 err:
1417 	/* the request owns the ref now */
1418 	i915_gem_context_unreference(ctx);
1419 	eb_destroy(eb);
1420 
1421 	mutex_unlock(&dev->struct_mutex);
1422 
1423 pre_mutex_err:
1424 	/* intel_gpu_busy should also get a ref, so it will free when the device
1425 	 * is really idle. */
1426 	intel_runtime_pm_put(dev_priv);
1427 	return ret;
1428 }
1429 
1430 /*
1431  * Legacy execbuffer just creates an exec2 list from the original exec object
1432  * list array and passes it to the real function.
1433  */
1434 int
1435 i915_gem_execbuffer(struct drm_device *dev, void *data,
1436 		    struct drm_file *file)
1437 {
1438 	struct drm_i915_gem_execbuffer *args = data;
1439 	struct drm_i915_gem_execbuffer2 exec2;
1440 	struct drm_i915_gem_exec_object *exec_list = NULL;
1441 	struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1442 	int ret, i;
1443 
1444 	if (args->buffer_count < 1) {
1445 		DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
1446 		return -EINVAL;
1447 	}
1448 
1449 	/* Copy in the exec list from userland */
1450 	exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count);
1451 	exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count);
1452 	if (exec_list == NULL || exec2_list == NULL) {
1453 		DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
1454 			  args->buffer_count);
1455 		drm_free_large(exec_list);
1456 		drm_free_large(exec2_list);
1457 		return -ENOMEM;
1458 	}
1459 	ret = copy_from_user(exec_list,
1460 			     to_user_ptr(args->buffers_ptr),
1461 			     sizeof(*exec_list) * args->buffer_count);
1462 	if (ret != 0) {
1463 		DRM_DEBUG("copy %d exec entries failed %d\n",
1464 			  args->buffer_count, ret);
1465 		drm_free_large(exec_list);
1466 		drm_free_large(exec2_list);
1467 		return -EFAULT;
1468 	}
1469 
1470 	for (i = 0; i < args->buffer_count; i++) {
1471 		exec2_list[i].handle = exec_list[i].handle;
1472 		exec2_list[i].relocation_count = exec_list[i].relocation_count;
1473 		exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr;
1474 		exec2_list[i].alignment = exec_list[i].alignment;
1475 		exec2_list[i].offset = exec_list[i].offset;
1476 		if (INTEL_INFO(dev)->gen < 4)
1477 			exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE;
1478 		else
1479 			exec2_list[i].flags = 0;
1480 	}
1481 
1482 	exec2.buffers_ptr = args->buffers_ptr;
1483 	exec2.buffer_count = args->buffer_count;
1484 	exec2.batch_start_offset = args->batch_start_offset;
1485 	exec2.batch_len = args->batch_len;
1486 	exec2.DR1 = args->DR1;
1487 	exec2.DR4 = args->DR4;
1488 	exec2.num_cliprects = args->num_cliprects;
1489 	exec2.cliprects_ptr = args->cliprects_ptr;
1490 	exec2.flags = I915_EXEC_RENDER;
1491 	i915_execbuffer2_set_context_id(exec2, 0);
1492 
1493 	ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list);
1494 	if (!ret) {
1495 		struct drm_i915_gem_exec_object __user *user_exec_list =
1496 			to_user_ptr(args->buffers_ptr);
1497 
1498 		/* Copy the new buffer offsets back to the user's exec list. */
1499 		for (i = 0; i < args->buffer_count; i++) {
1500 			ret = __copy_to_user(&user_exec_list[i].offset,
1501 					     &exec2_list[i].offset,
1502 					     sizeof(user_exec_list[i].offset));
1503 			if (ret) {
1504 				ret = -EFAULT;
1505 				DRM_DEBUG("failed to copy %d exec entries "
1506 					  "back to user (%d)\n",
1507 					  args->buffer_count, ret);
1508 				break;
1509 			}
1510 		}
1511 	}
1512 
1513 	drm_free_large(exec_list);
1514 	drm_free_large(exec2_list);
1515 	return ret;
1516 }
1517 
1518 int
1519 i915_gem_execbuffer2(struct drm_device *dev, void *data,
1520 		     struct drm_file *file)
1521 {
1522 	struct drm_i915_gem_execbuffer2 *args = data;
1523 	struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1524 	int ret;
1525 
1526 	if (args->buffer_count < 1 ||
1527 	    args->buffer_count > UINT_MAX / sizeof(*exec2_list)) {
1528 		DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count);
1529 		return -EINVAL;
1530 	}
1531 
1532 	if (args->rsvd2 != 0) {
1533 		DRM_DEBUG("dirty rvsd2 field\n");
1534 		return -EINVAL;
1535 	}
1536 
1537 	exec2_list = kmalloc(sizeof(*exec2_list)*args->buffer_count,
1538 			     M_DRM, M_NOWAIT);
1539 	if (exec2_list == NULL)
1540 		exec2_list = drm_malloc_ab(sizeof(*exec2_list),
1541 					   args->buffer_count);
1542 	if (exec2_list == NULL) {
1543 		DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
1544 			  args->buffer_count);
1545 		return -ENOMEM;
1546 	}
1547 	ret = copy_from_user(exec2_list,
1548 			     to_user_ptr(args->buffers_ptr),
1549 			     sizeof(*exec2_list) * args->buffer_count);
1550 	if (ret != 0) {
1551 		DRM_DEBUG("copy %d exec entries failed %d\n",
1552 			  args->buffer_count, ret);
1553 		drm_free_large(exec2_list);
1554 		return -EFAULT;
1555 	}
1556 
1557 	ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list);
1558 	if (!ret) {
1559 		/* Copy the new buffer offsets back to the user's exec list. */
1560 		struct drm_i915_gem_exec_object2 __user *user_exec_list =
1561 				   to_user_ptr(args->buffers_ptr);
1562 		int i;
1563 
1564 		for (i = 0; i < args->buffer_count; i++) {
1565 			ret = __copy_to_user(&user_exec_list[i].offset,
1566 					     &exec2_list[i].offset,
1567 					     sizeof(user_exec_list[i].offset));
1568 			if (ret) {
1569 				ret = -EFAULT;
1570 				DRM_DEBUG("failed to copy %d exec entries "
1571 					  "back to user\n",
1572 					  args->buffer_count);
1573 				break;
1574 			}
1575 		}
1576 	}
1577 
1578 	drm_free_large(exec2_list);
1579 	return ret;
1580 }
1581